Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit beedbf2

Browse files
committed
Updated file for leetcode scrapping
1 parent 068d694 commit beedbf2

File tree

2 files changed

+32
-39
lines changed

2 files changed

+32
-39
lines changed

‎LeetCode-Scrapper/README.md‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# LeetCode Scraper
2-
This python script will let the user to scrape 'n' number of LeetCode problems from any category/difficulty in [Leetcode](https://leetcode.com/problemset/all) ,as provided by the user. The functionality of the script is to gain the information regarding particular codechef problem in different PDFs.
2+
This python script will let the user to scrape 'n' number of LeetCode problems from any category/difficulty in [Leetcode](https://leetcode.com/problemset/all), as provided by the user. The functionality of the script is to gain the information regarding particular leetcode problem in different PDFs.
33

44
## Prerequisites:
55
Download the required packages from the following command in you terminal.(Make sure you're in the same project directory)
@@ -10,7 +10,7 @@ To run this script, you need to have selenium installed and configure webdriver
1010

1111
` driver = webdriver.Chrome('/path/to/chromedriver') `
1212

13-
## To Run the script:
13+
## Running the script:
1414
After installing all the requirements,run this command in your terminal.
1515

1616
` python3 ques.py `

‎LeetCode-Scrapper/ques.py‎

Lines changed: 30 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,53 @@
11
from selenium import webdriver
2-
import os
3-
options = webdriver.ChromeOptions()
4-
options.add_argument("--headless")
52
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
63
from selenium.webdriver.support.ui import WebDriverWait
74
from selenium.webdriver.support import expected_conditions as EC
85
from selenium.webdriver.common.by import By
96
from selenium.common.exceptions import NoSuchElementException
107
from selenium.common.exceptions import TimeoutException
8+
import os
119
from fpdf import FPDF
1210

11+
options = webdriver.ChromeOptions()
12+
options.add_argument("--headless")
13+
1314

1415
capa = DesiredCapabilities.CHROME
1516
capa["pageLoadStrategy"] = "none"
1617

17-
driver = webdriver.Chrome("C:/Softwares/chromedriver_win32/chromedriver")
18-
#driver = webdriver.Chrome(desired_capabilities=capa,options=options)
18+
print("Enter Chrome Driver path")
19+
inp = input()
20+
driver = webdriver.Chrome(inp)
21+
#the base url of leetcode problem set page
1922
baseurl="https://leetcode.com/problemset/all"
2023
wait = WebDriverWait(driver, 15)
2124

22-
# map to get url from its problem difficulty
25+
#the difficulty level of all the of all the problems
2326
problem_difficulty = {"Easy": "?difficulty=Easy", "Medium": "?difficulty=Medium", "Hard": "?difficulty=hard"}
2427

25-
# get_problems returns the name and links of the problems
26-
def get_problems(category, no_of_problems):
28+
def get_problem(category, no_of_problems):
2729

28-
# A map to store problem name and problem url
29-
problem_info = {}
30+
prblm_info = {}
3031
try:
32+
#checking if there is no network or any other iisue
3133
driver.get(baseurl + '/' + category)
32-
# wait till the first element is loaded
3334
wait.until(EC.element_to_be_clickable((By.XPATH, "//*[@id='question-app']/div/div[2]/div[2]/div[2]/table/tbody[1]/tr[1]")))
3435
except TimeoutException as exception:
3536
print("Couldn't fetch problem. Network issue or page slow to render. Try again")
3637
os._exit(-1)
37-
38-
3938

4039
for problem_index in range(1, no_of_problems + 1):
40+
#set problem name
4141
problem_name = driver.find_element_by_xpath("//*[@id='question-app']/div/div[2]/div[2]/div[2]/table/tbody[1]/tr[{}]/td[3]".format(problem_index)).text
42+
#set problem url
4243
problem_url = driver.find_element_by_xpath("//*[@id='question-app']/div/div[2]/div[2]/div[2]/table/tbody[1]/tr[{}]/td[3]/div/a".format(problem_index)).get_attribute('href')
4344
print(problem_name," ",problem_url)
44-
problem_info[problem_name] = problem_url
45-
return problem_info
45+
prblm_info[problem_name] = problem_url
46+
return prblm_info
4647

47-
# get_problem_desciption returns content of the problem
48-
def get_problem_description(problem_url,problem_name):
48+
def get_description(problem_url,problem_name):
4949
try:
50+
#check if the element is founded, and located in the correct format
5051
driver.get(problem_url)
5152
wait.until(EC.element_to_be_clickable((By.XPATH, "//*[@id='app']/div/div[2]/div/div/div[1]/div/div[1]/div[1]/div/div[2]/div/div[2]/div/p[1]")))
5253
problem_title= problem_name
@@ -59,60 +60,52 @@ def get_problem_description(problem_url,problem_name):
5960
problem_test_cases+="\nOutput\n"
6061
problem_test_cases += driver.find_element_by_xpath("//*[@id='problem-statement']/pre[2]").text
6162

62-
63-
else:
64-
65-
63+
else:
6664
driver.execute_script("window.stop();")
6765
problem={'title':problem_title,'statement':problem_statement,'test_case':problem_test_cases,'url':problem_url}
6866
return problem
6967

70-
#Handling exceptions
7168
except NoSuchElementException as e:
7269
print("Couldn't scrap the element, Unable to locate it")
7370
problem=None
7471
except TimeoutException as exception:
7572
print("Couldn't scrap the element, Unable to locate it")
7673
problem=None
7774

78-
79-
80-
81-
82-
#storing the information in the pdf
83-
def convert_to_pdf(problem):
75+
def to_pdf(problem):
8476
pdf = FPDF()
8577
pdf.add_page()
8678
pdf.set_font("Arial", size = 15)
87-
# Replace character that aren't in latin-1 character set
79+
#set title
8880
title=problem["title"].encode('latin-1', 'replace').decode('latin-1')
81+
#set statement
8982
statement=problem["statement"].encode('latin-1', 'replace').decode('latin-1')
83+
#set test cases
9084
test_case=problem["test_case"].encode('latin-1', 'replace').decode('latin-1')
85+
#set url
9186
url=problem["url"]
92-
# add sections to pdf
9387
pdf.cell(200, 10, txt =title, ln = 1, align = 'C')
9488
pdf.multi_cell(200, 10, txt =statement, align = 'L')
9589
pdf.multi_cell(200, 10, txt =test_case, align = 'L')
9690
pdf.write(5, 'Problem_Link: ')
9791
pdf.write(5,url,url)
98-
9992
title = title.rstrip()
10093
pdf.output(title+".pdf")
10194

10295

103-
#main function
10496
def main():
105-
category=input("Enter the difficulty level from the following \n Easy \n Medium \n Hard \n\n")
106-
no_of_problems=int(input("\nEnter the number of problems to be scrapped: \n"))
107-
info = get_problems(problem_difficulty[category],no_of_problems)
97+
category=input("Choose difficulty level from \n Easy \n Medium \n Hard \n\n : ")
98+
no_of_problems=int(input("Enter the number of problems to be scrapped : "))
99+
info = get_problem(problem_difficulty[category],no_of_problems)
108100
for name, url in info.items():
109-
problem=get_problem_description(url,name)
101+
problem=get_description(url,name)
110102
if(problem is not None ):
111-
convert_to_pdf(problem)
103+
to_pdf(problem)
112104
else:
113105
pass
114106

115107
if __name__ == '__main__':
116108
main()
117109

110+
#Close the driver path
118111
driver.close()

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /