Commit beedbf2

committed

Updated file for leetcode scrapping

1 parent 068d694 commit beedbf2Copy full SHA for beedbf2

File tree

2 files changed

+32

-39

lines changed

LeetCode-Scrapper
- README.md
- ques.py

2 files changed

+32

-39

lines changed

`‎LeetCode-Scrapper/README.md‎`

Lines changed: 2 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`	`1`	`# LeetCode Scraper`
`2`		`-This python script will let the user to scrape 'n' number of LeetCode problems from any category/difficulty in [Leetcode](https://leetcode.com/problemset/all) ,as provided by the user. The functionality of the script is to gain the information regarding particular codechef problem in different PDFs.`
	`2`	`+This python script will let the user to scrape 'n' number of LeetCode problems from any category/difficulty in [Leetcode](https://leetcode.com/problemset/all), as provided by the user. The functionality of the script is to gain the information regarding particular leetcode problem in different PDFs.`
`3`	`3`
`4`	`4`	`## Prerequisites:`
`5`	`5`	`Download the required packages from the following command in you terminal.(Make sure you're in the same project directory)`
`@@ -10,7 +10,7 @@ To run this script, you need to have selenium installed and configure webdriver`
`10`	`10`
`11`	`11`	` driver = webdriver.Chrome('/path/to/chromedriver') `
`12`	`12`
`13`		`-## To Run the script:`
	`13`	`+## Running the script:`
`14`	`14`	`After installing all the requirements,run this command in your terminal.`
`15`	`15`
`16`	`16`	` python3 ques.py `

`‎LeetCode-Scrapper/ques.py‎`

Lines changed: 30 additions & 37 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,52 +1,53 @@`
`1`	`1`	`from selenium import webdriver`
`2`		`-import os`
`3`		`-options = webdriver.ChromeOptions()`
`4`		`-options.add_argument("--headless")`
`5`	`2`	`from selenium.webdriver.common.desired_capabilities import DesiredCapabilities`
`6`	`3`	`from selenium.webdriver.support.ui import WebDriverWait`
`7`	`4`	`from selenium.webdriver.support import expected_conditions as EC`
`8`	`5`	`from selenium.webdriver.common.by import By`
`9`	`6`	`from selenium.common.exceptions import NoSuchElementException`
`10`	`7`	`from selenium.common.exceptions import TimeoutException`
	`8`	`+import os`
`11`	`9`	`from fpdf import FPDF`
`12`	`10`
	`11`	`+options = webdriver.ChromeOptions()`
	`12`	`+options.add_argument("--headless")`
	`13`	`+`
`13`	`14`
`14`	`15`	`capa = DesiredCapabilities.CHROME`
`15`	`16`	`capa["pageLoadStrategy"] = "none"`
`16`	`17`
`17`		`-driver = webdriver.Chrome("C:/Softwares/chromedriver_win32/chromedriver")`
`18`		`-#driver = webdriver.Chrome(desired_capabilities=capa,options=options)`
	`18`	`+print("Enter Chrome Driver path")`
	`19`	`+inp = input()`
	`20`	`+driver = webdriver.Chrome(inp)`
	`21`	`+#the base url of leetcode problem set page`
`19`	`22`	`baseurl="https://leetcode.com/problemset/all"`
`20`	`23`	`wait = WebDriverWait(driver, 15)`
`21`	`24`
`22`		`-# map to get url from its problem difficulty`
	`25`	`+#the difficulty level of all the of all the problems`
`23`	`26`	`problem_difficulty = {"Easy": "?difficulty=Easy", "Medium": "?difficulty=Medium", "Hard": "?difficulty=hard"}`
`24`	`27`
`25`		`-# get_problems returns the name and links of the problems`
`26`		`-def get_problems(category, no_of_problems):`
	`28`	`+def get_problem(category, no_of_problems):`
`27`	`29`
`28`		`-# A map to store problem name and problem url`
`29`		`- problem_info = {}`
	`30`	`+ prblm_info = {}`
`30`	`31`	`try:`
	`32`	`+ #checking if there is no network or any other iisue`
`31`	`33`	`driver.get(baseurl + '/' + category)`
`32`		`- # wait till the first element is loaded`
`33`	`34`	`wait.until(EC.element_to_be_clickable((By.XPATH, "//*[@id='question-app']/div/div[2]/div[2]/div[2]/table/tbody[1]/tr[1]")))`
`34`	`35`	`except TimeoutException as exception:`
`35`	`36`	`print("Couldn't fetch problem. Network issue or page slow to render. Try again")`
`36`	`37`	`os._exit(-1)`
`37`		`-`
`38`		`-`
`39`	`38`
`40`	`39`	`for problem_index in range(1, no_of_problems + 1):`
	`40`	`+ #set problem name`
`41`	`41`	`problem_name = driver.find_element_by_xpath("//*[@id='question-app']/div/div[2]/div[2]/div[2]/table/tbody[1]/tr[{}]/td[3]".format(problem_index)).text`
	`42`	`+ #set problem url`
`42`	`43`	`problem_url = driver.find_element_by_xpath("//*[@id='question-app']/div/div[2]/div[2]/div[2]/table/tbody[1]/tr[{}]/td[3]/div/a".format(problem_index)).get_attribute('href')`
`43`	`44`	`print(problem_name," ",problem_url)`
`44`		`- problem_info[problem_name] = problem_url`
`45`		`- return problem_info`
	`45`	`+ prblm_info[problem_name] = problem_url`
	`46`	`+ return prblm_info`
`46`	`47`
`47`		`-# get_problem_desciption returns content of the problem`
`48`		`-def get_problem_description(problem_url,problem_name):`
	`48`	`+def get_description(problem_url,problem_name):`
`49`	`49`	`try:`
	`50`	`+ #check if the element is founded, and located in the correct format`
`50`	`51`	`driver.get(problem_url)`
`51`	`52`	`wait.until(EC.element_to_be_clickable((By.XPATH, "//*[@id='app']/div/div[2]/div/div/div[1]/div/div[1]/div[1]/div/div[2]/div/div[2]/div/p[1]")))`
`52`	`53`	`problem_title= problem_name`
`@@ -59,60 +60,52 @@ def get_problem_description(problem_url,problem_name):`
`59`	`60`	`problem_test_cases+="\nOutput\n"`
`60`	`61`	`problem_test_cases += driver.find_element_by_xpath("//*[@id='problem-statement']/pre[2]").text`
`61`	`62`
`62`		`-`
`63`		`- else:`
`64`		`-`
`65`		`-`
	`63`	`+ else:`
`66`	`64`	`driver.execute_script("window.stop();")`
`67`	`65`	`problem={'title':problem_title,'statement':problem_statement,'test_case':problem_test_cases,'url':problem_url}`
`68`	`66`	`return problem`
`69`	`67`
`70`		`- #Handling exceptions`
`71`	`68`	`except NoSuchElementException as e:`
`72`	`69`	`print("Couldn't scrap the element, Unable to locate it")`
`73`	`70`	`problem=None`
`74`	`71`	`except TimeoutException as exception:`
`75`	`72`	`print("Couldn't scrap the element, Unable to locate it")`
`76`	`73`	`problem=None`
`77`	`74`
`78`		`-`
`79`		`-`
`80`		`-`
`81`		`-`
`82`		`-#storing the information in the pdf`
`83`		`-def convert_to_pdf(problem):`
	`75`	`+def to_pdf(problem):`
`84`	`76`	`pdf = FPDF()`
`85`	`77`	`pdf.add_page()`
`86`	`78`	`pdf.set_font("Arial", size = 15)`
`87`		`- # Replace character that aren't in latin-1 character set`
	`79`	`+ #set title`
`88`	`80`	`title=problem["title"].encode('latin-1', 'replace').decode('latin-1')`
	`81`	`+ #set statement`
`89`	`82`	`statement=problem["statement"].encode('latin-1', 'replace').decode('latin-1')`
	`83`	`+ #set test cases`
`90`	`84`	`test_case=problem["test_case"].encode('latin-1', 'replace').decode('latin-1')`
	`85`	`+ #set url`
`91`	`86`	`url=problem["url"]`
`92`		`- # add sections to pdf`
`93`	`87`	`pdf.cell(200, 10, txt =title, ln = 1, align = 'C')`
`94`	`88`	`pdf.multi_cell(200, 10, txt =statement, align = 'L')`
`95`	`89`	`pdf.multi_cell(200, 10, txt =test_case, align = 'L')`
`96`	`90`	`pdf.write(5, 'Problem_Link: ')`
`97`	`91`	`pdf.write(5,url,url)`
`98`		`-`
`99`	`92`	`title = title.rstrip()`
`100`	`93`	`pdf.output(title+".pdf")`
`101`	`94`
`102`	`95`
`103`		`-#main function`
`104`	`96`	`def main():`
`105`		`- category=input("Enter the difficulty level from the following \n Easy \n Medium \n Hard \n\n")`
`106`		`- no_of_problems=int(input("\nEnter the number of problems to be scrapped: \n"))`
`107`		`- info = get_problems(problem_difficulty[category],no_of_problems)`
	`97`	`+ category=input("Choose difficulty level from \n Easy \n Medium \n Hard \n\n : ")`
	`98`	`+ no_of_problems=int(input("Enter the number of problems to be scrapped : "))`
	`99`	`+ info = get_problem(problem_difficulty[category],no_of_problems)`
`108`	`100`	`for name, url in info.items():`
`109`		`- problem=get_problem_description(url,name)`
	`101`	`+ problem=get_description(url,name)`
`110`	`102`	`if(problem is not None ):`
`111`		`- convert_to_pdf(problem)`
	`103`	`+ to_pdf(problem)`
`112`	`104`	`else:`
`113`	`105`	`pass`
`114`	`106`
`115`	`107`	`if __name__ == '__main__':`
`116`	`108`	`main()`
`117`	`109`
	`110`	`+#Close the driver path`
`118`	`111`	`driver.close()`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit beedbf2

File tree

2 files changed

2 files changed

`‎LeetCode-Scrapper/README.md‎`

`‎LeetCode-Scrapper/ques.py‎`

0 commit comments