Commit 1b51db4

authored

Merge pull request avinashkranjan#939 from AshuKV/LeetcodeScrapper/AshuKV

Leetcode Scrapper

2 parents 389b0a2 + c614a71 commit 1b51db4Copy full SHA for 1b51db4

File tree

3 files changed

+132

-0

lines changed

LeetCode-Scrapper

3 files changed

+132

-0

lines changed

`‎LeetCode-Scrapper/README.md`

Lines changed: 17 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,17 @@`
	`1`	`+# LeetCode Scraper`
	`2`	`+This python script will let the user to scrape 'n' number of LeetCode problems from any category/difficulty in [Leetcode](https://leetcode.com/problemset/all), as provided by the user. The functionality of the script is to gain the information regarding particular leetcode problem in different PDFs.`
	`3`	`+`
	`4`	`+## Prerequisites:`
	`5`	`+Download the required packages from the following command in you terminal.(Make sure you're in the same project directory)`
	`6`	`+`
	`7`	+` pip3 install -r requirements.txt `
	`8`	`+`
	`9`	`+To run this script, you need to have selenium installed and configure webdriver to use chrome browser in your$PATH. You can directly download chrome driver from the link below- https://chromedriver.chromium.org/downloads. Then, just enter the chrome driver path as asked in the prompt.`
	`10`	`+`
	`11`	`+## Running the script:`
	`12`	`+After installing all the requirements,run this command in your terminal.`
	`13`	`+`
	`14`	+` python3 ques.py `
	`15`	`+`
	`16`	`+## Output:`
	`17`	`+This script will generate 'n' number of different PDFs in the same folder to store the problem information, specifically problem title, problem statement, test cases, and the problem link.`

`‎LeetCode-Scrapper/ques.py`

Lines changed: 111 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,111 @@`
	`1`	`+from selenium import webdriver`
	`2`	`+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities`
	`3`	`+from selenium.webdriver.support.ui import WebDriverWait`
	`4`	`+from selenium.webdriver.support import expected_conditions as EC`
	`5`	`+from selenium.webdriver.common.by import By`
	`6`	`+from selenium.common.exceptions import NoSuchElementException`
	`7`	`+from selenium.common.exceptions import TimeoutException`
	`8`	`+import os`
	`9`	`+from fpdf import FPDF`
	`10`	`+`
	`11`	`+options = webdriver.ChromeOptions()`
	`12`	`+options.add_argument("--headless")`
	`13`	`+`
	`14`	`+`
	`15`	`+capa = DesiredCapabilities.CHROME`
	`16`	`+capa["pageLoadStrategy"] = "none"`
	`17`	`+`
	`18`	`+print("Enter Chrome Driver path: ")`
	`19`	`+input_driver_path = input()`
	`20`	`+driver = webdriver.Chrome(input_driver_path)`
	`21`	`+#the base url of leetcode problem set page`
	`22`	`+baseurl="https://leetcode.com/problemset/all"`
	`23`	`+wait = WebDriverWait(driver, 15)`
	`24`	`+`
	`25`	`+#the difficulty level of all the of all the problems`
	`26`	`+problem_difficulty = {"Easy": "?difficulty=Easy", "Medium": "?difficulty=Medium", "Hard": "?difficulty=hard"}`
	`27`	`+`
	`28`	`+def get_problem(category, no_of_problems):`
	`29`	`+`
	`30`	`+ prblm_info = {}`
	`31`	`+ try:`
	`32`	`+ #checking if there is no network or any other iisue`
	`33`	`+ driver.get(baseurl + '/' + category)`
	`34`	`+ wait.until(EC.element_to_be_clickable((By.XPATH, "//*[@id='question-app']/div/div[2]/div[2]/div[2]/table/tbody[1]/tr[1]")))`
	`35`	`+ except TimeoutException as exception:`
	`36`	`+ print("Couldn't fetch problem. Network issue or page slow to render. Try again")`
	`37`	`+ os._exit(-1)`
	`38`	`+`
	`39`	`+ for problem_index in range(1, no_of_problems + 1):`
	`40`	`+ #set problem name`
	`41`	`+ problem_name = driver.find_element_by_xpath("//*[@id='question-app']/div/div[2]/div[2]/div[2]/table/tbody[1]/tr[{}]/td[3]".format(problem_index)).text`
	`42`	`+ #set problem url`
	`43`	`+ problem_url = driver.find_element_by_xpath("//*[@id='question-app']/div/div[2]/div[2]/div[2]/table/tbody[1]/tr[{}]/td[3]/div/a".format(problem_index)).get_attribute('href')`
	`44`	`+ print(problem_name," ",problem_url)`
	`45`	`+ prblm_info[problem_name] = problem_url`
	`46`	`+ return prblm_info`
	`47`	`+`
	`48`	`+def get_description(problem_url,problem_name):`
	`49`	`+ try:`
	`50`	`+ #check if the element is founded, and located in the correct format`
	`51`	`+ driver.get(problem_url)`
	`52`	`+ wait.until(EC.element_to_be_clickable((By.XPATH, "//*[@id='app']/div/div[2]/div/div/div[1]/div/div[1]/div[1]/div/div[2]/div/div[2]/div/p[1]")))`
	`53`	`+ problem_title= problem_name`
	`54`	`+ problem_statement = driver.find_element_by_xpath("//*[@id='app']/div/div[2]/div/div/div[1]/div/div[1]/div[1]/div/div[2]/div/div[2]/div/p[1]").text`
	`55`	`+ problem_test_cases = driver.find_element_by_xpath("//*[@id='app']/div/div[2]/div/div/div[1]/div/div[1]/div[1]/div/div[2]/div/div[2]/div/pre[1]").text`
	`56`	`+`
	`57`	`+`
	`58`	`+ if (problem_test_cases.find("Output") == -1):`
	`59`	`+ problem_test_cases = "Input\n" + problem_test_cases`
	`60`	`+ problem_test_cases+="\nOutput\n"`
	`61`	`+ problem_test_cases += driver.find_element_by_xpath("//*[@id='problem-statement']/pre[2]").text`
	`62`	`+`
	`63`	`+ else:`
	`64`	`+ driver.execute_script("window.stop();")`
	`65`	`+ problem={'title':problem_title,'statement':problem_statement,'test_case':problem_test_cases,'url':problem_url}`
	`66`	`+ return problem`
	`67`	`+`
	`68`	`+ except NoSuchElementException as e:`
	`69`	`+ print("Couldn't scrap the element, Unable to locate it")`
	`70`	`+ problem=None`
	`71`	`+ except TimeoutException as exception:`
	`72`	`+ print("Couldn't scrap the element, Unable to locate it")`
	`73`	`+ problem=None`
	`74`	`+`
	`75`	`+def to_pdf(problem):`
	`76`	`+ pdf = FPDF()`
	`77`	`+ pdf.add_page()`
	`78`	`+ pdf.set_font("Arial", size = 15)`
	`79`	`+ #set title`
	`80`	`+ title=problem["title"].encode('latin-1', 'replace').decode('latin-1')`
	`81`	`+ #set statement`
	`82`	`+ statement=problem["statement"].encode('latin-1', 'replace').decode('latin-1')`
	`83`	`+ #set test cases`
	`84`	`+ test_case=problem["test_case"].encode('latin-1', 'replace').decode('latin-1')`
	`85`	`+ #set url`
	`86`	`+ url=problem["url"]`
	`87`	`+ pdf.cell(200, 10, txt =title, ln = 1, align = 'C')`
	`88`	`+ pdf.multi_cell(200, 10, txt =statement, align = 'L')`
	`89`	`+ pdf.multi_cell(200, 10, txt =test_case, align = 'L')`
	`90`	`+ pdf.write(5, 'Problem_Link: ')`
	`91`	`+ pdf.write(5,url,url)`
	`92`	`+ title = title.rstrip()`
	`93`	`+ pdf.output("./LeetCode-Scrapper/"+title+".pdf")`
	`94`	`+`
	`95`	`+`
	`96`	`+def main():`
	`97`	`+ category=input("Choose difficulty level from \n Easy \n Medium \n Hard \n\n : ")`
	`98`	`+ no_of_problems=int(input("Enter the number of problems to be scrapped : "))`
	`99`	`+ info = get_problem(problem_difficulty[category], no_of_problems)`
	`100`	`+ for name, url in info.items():`
	`101`	`+ problem=get_description(url,name)`
	`102`	`+ if(problem is not None ):`
	`103`	`+ to_pdf(problem)`
	`104`	`+ else:`
	`105`	`+ pass`
	`106`	`+`
	`107`	`+if __name__ == '__main__':`
	`108`	`+ main()`
	`109`	`+`
	`110`	`+#Close the driver path`
	`111`	`+driver.close()`

`‎LeetCode-Scrapper/requirements.txt`

Lines changed: 4 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,4 @@`
	`1`	`+fpdf==1.7.2`
	`2`	`+requests==2.24.0`
	`3`	`+selenium==3.141.0`
	`4`	`+urllib3==1.25.11`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit 1b51db4

File tree

3 files changed

3 files changed

`‎LeetCode-Scrapper/README.md`

`‎LeetCode-Scrapper/ques.py`

`‎LeetCode-Scrapper/requirements.txt`

0 commit comments