Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 1b51db4

Browse files
Merge pull request avinashkranjan#939 from AshuKV/LeetcodeScrapper/AshuKV
Leetcode Scrapper
2 parents 389b0a2 + c614a71 commit 1b51db4

File tree

3 files changed

+132
-0
lines changed

3 files changed

+132
-0
lines changed

‎LeetCode-Scrapper/README.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# LeetCode Scraper
2+
This python script will let the user to scrape 'n' number of LeetCode problems from any category/difficulty in [Leetcode](https://leetcode.com/problemset/all), as provided by the user. The functionality of the script is to gain the information regarding particular leetcode problem in different PDFs.
3+
4+
## Prerequisites:
5+
Download the required packages from the following command in you terminal.(Make sure you're in the same project directory)
6+
7+
` pip3 install -r requirements.txt `
8+
9+
To run this script, you need to have selenium installed and configure webdriver to use chrome browser in your$PATH. You can directly download chrome driver from the link below- https://chromedriver.chromium.org/downloads. Then, just enter the chrome driver path as asked in the prompt.
10+
11+
## Running the script:
12+
After installing all the requirements,run this command in your terminal.
13+
14+
` python3 ques.py `
15+
16+
## Output:
17+
This script will generate 'n' number of different PDFs in the same folder to store the problem information, specifically problem title, problem statement, test cases, and the problem link.

‎LeetCode-Scrapper/ques.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
from selenium import webdriver
2+
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
3+
from selenium.webdriver.support.ui import WebDriverWait
4+
from selenium.webdriver.support import expected_conditions as EC
5+
from selenium.webdriver.common.by import By
6+
from selenium.common.exceptions import NoSuchElementException
7+
from selenium.common.exceptions import TimeoutException
8+
import os
9+
from fpdf import FPDF
10+
11+
options = webdriver.ChromeOptions()
12+
options.add_argument("--headless")
13+
14+
15+
capa = DesiredCapabilities.CHROME
16+
capa["pageLoadStrategy"] = "none"
17+
18+
print("Enter Chrome Driver path: ")
19+
input_driver_path = input()
20+
driver = webdriver.Chrome(input_driver_path)
21+
#the base url of leetcode problem set page
22+
baseurl="https://leetcode.com/problemset/all"
23+
wait = WebDriverWait(driver, 15)
24+
25+
#the difficulty level of all the of all the problems
26+
problem_difficulty = {"Easy": "?difficulty=Easy", "Medium": "?difficulty=Medium", "Hard": "?difficulty=hard"}
27+
28+
def get_problem(category, no_of_problems):
29+
30+
prblm_info = {}
31+
try:
32+
#checking if there is no network or any other iisue
33+
driver.get(baseurl + '/' + category)
34+
wait.until(EC.element_to_be_clickable((By.XPATH, "//*[@id='question-app']/div/div[2]/div[2]/div[2]/table/tbody[1]/tr[1]")))
35+
except TimeoutException as exception:
36+
print("Couldn't fetch problem. Network issue or page slow to render. Try again")
37+
os._exit(-1)
38+
39+
for problem_index in range(1, no_of_problems + 1):
40+
#set problem name
41+
problem_name = driver.find_element_by_xpath("//*[@id='question-app']/div/div[2]/div[2]/div[2]/table/tbody[1]/tr[{}]/td[3]".format(problem_index)).text
42+
#set problem url
43+
problem_url = driver.find_element_by_xpath("//*[@id='question-app']/div/div[2]/div[2]/div[2]/table/tbody[1]/tr[{}]/td[3]/div/a".format(problem_index)).get_attribute('href')
44+
print(problem_name," ",problem_url)
45+
prblm_info[problem_name] = problem_url
46+
return prblm_info
47+
48+
def get_description(problem_url,problem_name):
49+
try:
50+
#check if the element is founded, and located in the correct format
51+
driver.get(problem_url)
52+
wait.until(EC.element_to_be_clickable((By.XPATH, "//*[@id='app']/div/div[2]/div/div/div[1]/div/div[1]/div[1]/div/div[2]/div/div[2]/div/p[1]")))
53+
problem_title= problem_name
54+
problem_statement = driver.find_element_by_xpath("//*[@id='app']/div/div[2]/div/div/div[1]/div/div[1]/div[1]/div/div[2]/div/div[2]/div/p[1]").text
55+
problem_test_cases = driver.find_element_by_xpath("//*[@id='app']/div/div[2]/div/div/div[1]/div/div[1]/div[1]/div/div[2]/div/div[2]/div/pre[1]").text
56+
57+
58+
if (problem_test_cases.find("Output") == -1):
59+
problem_test_cases = "Input\n" + problem_test_cases
60+
problem_test_cases+="\nOutput\n"
61+
problem_test_cases += driver.find_element_by_xpath("//*[@id='problem-statement']/pre[2]").text
62+
63+
else:
64+
driver.execute_script("window.stop();")
65+
problem={'title':problem_title,'statement':problem_statement,'test_case':problem_test_cases,'url':problem_url}
66+
return problem
67+
68+
except NoSuchElementException as e:
69+
print("Couldn't scrap the element, Unable to locate it")
70+
problem=None
71+
except TimeoutException as exception:
72+
print("Couldn't scrap the element, Unable to locate it")
73+
problem=None
74+
75+
def to_pdf(problem):
76+
pdf = FPDF()
77+
pdf.add_page()
78+
pdf.set_font("Arial", size = 15)
79+
#set title
80+
title=problem["title"].encode('latin-1', 'replace').decode('latin-1')
81+
#set statement
82+
statement=problem["statement"].encode('latin-1', 'replace').decode('latin-1')
83+
#set test cases
84+
test_case=problem["test_case"].encode('latin-1', 'replace').decode('latin-1')
85+
#set url
86+
url=problem["url"]
87+
pdf.cell(200, 10, txt =title, ln = 1, align = 'C')
88+
pdf.multi_cell(200, 10, txt =statement, align = 'L')
89+
pdf.multi_cell(200, 10, txt =test_case, align = 'L')
90+
pdf.write(5, 'Problem_Link: ')
91+
pdf.write(5,url,url)
92+
title = title.rstrip()
93+
pdf.output("./LeetCode-Scrapper/"+title+".pdf")
94+
95+
96+
def main():
97+
category=input("Choose difficulty level from \n Easy \n Medium \n Hard \n\n : ")
98+
no_of_problems=int(input("Enter the number of problems to be scrapped : "))
99+
info = get_problem(problem_difficulty[category], no_of_problems)
100+
for name, url in info.items():
101+
problem=get_description(url,name)
102+
if(problem is not None ):
103+
to_pdf(problem)
104+
else:
105+
pass
106+
107+
if __name__ == '__main__':
108+
main()
109+
110+
#Close the driver path
111+
driver.close()

‎LeetCode-Scrapper/requirements.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
fpdf==1.7.2
2+
requests==2.24.0
3+
selenium==3.141.0
4+
urllib3==1.25.11

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /