forked from avinashkranjan/Amazing-Python-Scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
[pull] master from avinashkranjan:master #8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
c70caac
#913 Codeforces_Problem_Scraper Added
iamakkkhil 8e193c5
User IP of Driver Path Added
iamakkkhil 502e3ca
Sentiment Detector
Amit366 bc2de17
Readme Updated of CodeForces Scraper
iamakkkhil 9a9ef9a
Codeforces Problem Scraper Added
iamakkkhil d09eab2
Update requirements.txt
iamakkkhil a086be2
Resolved minor bugs
iamakkkhil 4a73d9b
Merge pull request #919 from iamakkkhil/master
avinashkranjan 9888be7
Merge pull request #925 from Amit366/Amit1
avinashkranjan File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
153 changes: 153 additions & 0 deletions
Coderforces_Problem_Scrapper/Codeforces_problem_scrapper.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
import os | ||
from selenium import webdriver # Automated webdriver | ||
from PIL import Image | ||
from fpdf import FPDF # For converting images to pdf | ||
|
||
|
||
def select_difficulty(): | ||
""" | ||
This function will let user to choose the difficulty level | ||
:return: difficulty_level[] | ||
""" | ||
difficulty_level = [] | ||
print("\nEnter the Range of difficulty between 800 to 3500: ") | ||
difficulty_level.append(int(input("Min: "))) | ||
difficulty_level.append(int(input("Max: "))) | ||
|
||
return difficulty_level | ||
|
||
|
||
def extracting_problem_links(diff_level): | ||
""" | ||
This function saves first saves the link of the pages to scrape from | ||
and then the link of every question, saves it in list | ||
:param diff_level: difficulty_level entered by the user | ||
:return pblms_links: consists of all the available questions to scrape | ||
""" | ||
no_of_questions = int(input("\nHow many Questions you want to scrape: ")) | ||
|
||
pblms_link_scraped = 0 | ||
pblms_links = [] | ||
page = 1 | ||
options = webdriver.ChromeOptions() | ||
options.headless = True | ||
driver = webdriver.Chrome(DRIVER_PATH, options=options) | ||
print("\nRequesting URL ...") | ||
driver.get(f"https://codeforces.com/problemset/?tags={diff_level[0]}-{diff_level[1]}") | ||
|
||
# ===================Getting no. of Pages to Scrape============================= | ||
|
||
# It will give the total no. of pages present with that question from | ||
# which we are going to scrape | ||
page_links = [] | ||
|
||
print("\nFinding available pages to scrape....") | ||
|
||
available_pages = driver.find_elements_by_css_selector("div.pagination a") | ||
for page_no in available_pages: | ||
page_links.append(page_no.get_attribute("href")) | ||
|
||
print(f"Available Pages to scrape are: {len(page_links[:-1])}") | ||
|
||
# =================================================================================== | ||
|
||
# ***************************** SCRAPING PAGE 1 ************************************* | ||
print(f"\nScraping Page {page}") | ||
|
||
elements = driver.find_elements_by_css_selector("td.id.dark.left a" and "td.id.left a") | ||
for element in elements: | ||
# Saving the link in pblms_links | ||
pblms_links.append(element.get_attribute("href")) | ||
pblms_link_scraped += 1 | ||
|
||
# If we scraped required no. of questions then return | ||
if pblms_link_scraped == no_of_questions: | ||
print(f"URLs of Question Scraped till now: {pblms_link_scraped}") | ||
print(f"\nURLs Scrapped Successfully {pblms_link_scraped} out of {no_of_questions}") | ||
return pblms_links | ||
page += 1 | ||
print(f"URLs of Question Scraped till now: {pblms_link_scraped}") | ||
# ************************************************************************************* | ||
|
||
# ----------------------------- SCRAPING SUBSEQUENT PAGES ----------------------------- | ||
for link in page_links[1:-1]: | ||
print(f"\nScraping Page {page}") | ||
|
||
# Going to next Page | ||
driver.get(link) | ||
elements = driver.find_elements_by_css_selector("td.id.dark.left a" and "td.id.left a") | ||
for element in elements: | ||
# Saving the link in pblms_links | ||
pblms_links.append(element.get_attribute("href")) | ||
pblms_link_scraped += 1 | ||
|
||
# If we scraped required no. of questions then return | ||
if pblms_link_scraped == no_of_questions: | ||
print(f"URLs of Question Scraped till now: {pblms_link_scraped}") | ||
print(f"\nURLs Scrapped Successfully {pblms_link_scraped} out of {no_of_questions}") | ||
return pblms_links | ||
|
||
print(f"URLs of Question Scraped till now: {pblms_link_scraped}") | ||
page += 1 | ||
# ---------------------------------------------------------------------------------------------- | ||
|
||
# scraped all the available questions but still the count is less | ||
print(f"\n{pblms_link_scraped} out of {no_of_questions} URLs able to scrapped !!!") | ||
return pblms_links | ||
|
||
|
||
def getproblem(URLs): | ||
""" | ||
getproblem() : It takes input from the user of codeforces problemID and difficulty | ||
level and then by using selenium and chrome webdriver, capturing screenshot of the | ||
Codeforces problem using ttypography tag because all the problems of codeforces are | ||
stored inside this div tag and saving it in a image.png file. | ||
Then saving the image.png as pdf file by using fdf library. | ||
""" | ||
|
||
path = 'image.png' | ||
|
||
# Creating a Target Output Folder | ||
target_folder = './Coderforces_Problem_Scrapper/problems_pdf' | ||
if not os.path.exists(target_folder): | ||
os.makedirs(target_folder) | ||
|
||
options = webdriver.ChromeOptions() | ||
# Headless = True for taking a scrolling snapshot | ||
options.headless = True | ||
driver = webdriver.Chrome(DRIVER_PATH, options=options) | ||
file_counter = 1 | ||
|
||
for url in URLs: | ||
driver.get(url) | ||
# Deciding height by tag | ||
required_height = driver.execute_script( | ||
'return document.body.parentNode.scrollHeight') | ||
driver.set_window_size(1366, required_height) | ||
|
||
title = driver.find_element_by_class_name("title").text | ||
filename = title[3:] + '.pdf' | ||
|
||
# Taking SS of everything within the ttypography class | ||
driver.find_element_by_class_name('ttypography').screenshot(path) | ||
|
||
# Opening image with pillow so based to capture its height and width | ||
cover = Image.open(path) | ||
WIDTH, HEIGHT = cover.size | ||
MARGIN = 10 | ||
# based on image's height and width we are adjusting the pdf margin and borders | ||
pdf = FPDF(unit='pt', format=[WIDTH + 2 * MARGIN, HEIGHT + 2 * MARGIN]) | ||
pdf.add_page() # Adding new page to the pdf | ||
pdf.image(path, MARGIN, MARGIN) | ||
|
||
pdf.output(os.path.join(target_folder, filename), "F") # saving the pdf with the specified filename | ||
print(f'File saved in your directory ./problems_pdf/{filename} ({file_counter}/{len(URLs)}) !') | ||
file_counter += 1 | ||
|
||
|
||
if __name__ == "__main__": | ||
DRIVER_PATH = input("Enter DRIVER PATH location: ") | ||
diff = select_difficulty() # Accepting difficulty level from user | ||
problems_link = extracting_problem_links(diff) # scraping the required the no. of links | ||
getproblem(problems_link) # saving the Questions in PDF file. | ||
os.remove('image.png') |
35 changes: 35 additions & 0 deletions
Coderforces_Problem_Scrapper/README.md
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
# Save any number of Problem Statement you like from Codeforces as a PDF. | ||
|
||
This python script will let you download any number of Problem Statements from Codeforces and save them as a pdf file. The script uses Selenium Webdriver and fpdf library. Selenium is used with Chrome Webdriver, so having Chrome browser is a requirement. | ||
|
||
## Setting up: | ||
|
||
- Create a virtual environment and activate it. | ||
|
||
- Install the requirements | ||
|
||
```sh | ||
$ pip install -r requirements.txt | ||
``` | ||
|
||
## Running the script: | ||
|
||
```sh | ||
$ python Codeforces_Problem_Scrapper.py | ||
``` | ||
|
||
## Terminal Screenshot: | ||
|
||
 | ||
|
||
The program will ask you to enter: | ||
1. DRIVER PATH | ||
2. VALID Difficulty Range of PROBLEMS. | ||
3. Number of Questions to Scrape. | ||
|
||
## PDF Output: | ||
 | ||
 | ||
|
||
## Author | ||
[ Akhil Bhalerao ](https://github.com/iamakkkhil) |
3 changes: 3 additions & 0 deletions
Coderforces_Problem_Scrapper/requirements.txt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
pillow | ||
fpdf | ||
selenium |
40 changes: 40 additions & 0 deletions
Script to check Sentiment/Readme.md
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
# <b>Sentiment Detector</b> | ||
|
||
[](https://forthebadge.com) | ||
|
||
## Sentiment Detector Functionalities : 🚀 | ||
|
||
- In the input field write the sentence whose sentiment is to be checked | ||
- On clicking on ```check sentiment``` button it displays the percentage of neutral, positive and negative sentiments | ||
- It also displays the overall sentiment | ||
|
||
## Sentiment Detector Instructions: 👨🏻💻 | ||
|
||
### Step 1: | ||
|
||
Open Termnial 💻 | ||
|
||
### Step 2: | ||
|
||
Locate to the directory where python file is located 📂 | ||
|
||
### Step 3: | ||
|
||
Run the command: python script.py/python3 script.py 🧐 | ||
|
||
### Step 4: | ||
|
||
Sit back and Relax. Let the Script do the Job. ☕ | ||
|
||
## Requirements | ||
|
||
- vaderSentiment | ||
- tkinter | ||
|
||
## DEMO | ||
 | ||
|
||
## Author | ||
|
||
Amit Kumar Mishra | ||
|
92 changes: 92 additions & 0 deletions
Script to check Sentiment/script.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer | ||
from tkinter import * | ||
|
||
def detect_sentiment(): | ||
|
||
sentence = textArea.get("1.0", "end") | ||
sid_obj = SentimentIntensityAnalyzer() | ||
|
||
sentiment_dict = sid_obj.polarity_scores(sentence) | ||
|
||
string = str(sentiment_dict['neg']*100) + "% Negative" | ||
negativeField.insert(10, string) | ||
|
||
string = str(sentiment_dict['neu']*100) + "% Neutral" | ||
neutralField.insert(10, string) | ||
|
||
string = str(sentiment_dict['pos']*100) +"% Positive" | ||
positiveField.insert(10, string) | ||
|
||
if sentiment_dict['compound'] >= 0.05 : | ||
string = "Positive" | ||
|
||
elif sentiment_dict['compound'] <= - 0.05 : | ||
string = "Negative" | ||
else : | ||
string = "Neutral" | ||
|
||
overallField.insert(10, string) | ||
|
||
def clearAll() : | ||
|
||
negativeField.delete(0, END) | ||
neutralField.delete(0, END) | ||
positiveField.delete(0, END) | ||
overallField.delete(0, END) | ||
textArea.delete(1.0, END) | ||
|
||
|
||
gui = Tk() | ||
gui.config(background = "light blue") | ||
gui.title("Sentiment Detector") | ||
|
||
gui.geometry("500x500") | ||
enterText = Label(gui, text = "Enter Your Sentence",bg = "light blue") | ||
|
||
textArea = Text(gui, height = 10, width = 53, font = "lucida 13") | ||
|
||
check = Button(gui, text = "Check Sentiment", fg = "Black",bg = "light yellow", command = detect_sentiment) | ||
negative = Label(gui, text = "sentence was rated as: ",bg = "light blue") | ||
|
||
neutral = Label(gui, text = "sentence was rated as: ",bg = "light blue") | ||
|
||
positive = Label(gui, text = "sentence was rated as: ",bg = "light blue") | ||
|
||
overall = Label(gui, text = "Sentence Overall Rated As: ",bg = "light blue") | ||
|
||
negativeField = Entry(gui) | ||
|
||
neutralField = Entry(gui) | ||
positiveField = Entry(gui) | ||
overallField = Entry(gui) | ||
clear = Button(gui, text = "Clear", fg = "Black",bg = "light yellow", command = clearAll) | ||
Exit = Button(gui, text = "Exit", fg = "Black",bg = "light yellow", command = exit) | ||
|
||
enterText.grid(row = 0, column = 2) | ||
|
||
textArea.grid(row = 1, column = 2, padx = 10, sticky = W) | ||
|
||
check.grid(row = 2, column = 2) | ||
|
||
neutral.grid(row = 3, column = 2) | ||
|
||
neutralField.grid(row = 4, column = 2) | ||
|
||
positive.grid(row = 5, column = 2) | ||
|
||
positiveField.grid(row = 6, column = 2) | ||
|
||
negative.grid(row = 7, column = 2) | ||
|
||
negativeField.grid(row = 8, column = 2) | ||
|
||
overall.grid(row = 9, column = 2) | ||
|
||
overallField.grid(row = 10, column = 2) | ||
|
||
clear.grid(row = 11, column = 2) | ||
|
||
Exit.grid(row = 12, column = 2) | ||
|
||
gui.mainloop() | ||
|
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.