Commit 2282255

author

smriti raina

committed

Updated the script to add problem links

1 parent f1cae34 commit 2282255Copy full SHA for 2282255

File tree

1 file changed

+33

-11

lines changed

Codechef Scrapper
- codechef.py

1 file changed

+33

-11

lines changed

`‎Codechef Scrapper/codechef.py‎`

Lines changed: 33 additions & 11 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,11 +1,13 @@`
`1`	`1`	`from selenium import webdriver`
	`2`	`+import os`
`2`	`3`	`options = webdriver.ChromeOptions()`
`3`	`4`	`options.add_argument("--headless")`
`4`	`5`	`from selenium.webdriver.common.desired_capabilities import DesiredCapabilities`
`5`	`6`	`from selenium.webdriver.support.ui import WebDriverWait`
`6`	`7`	`from selenium.webdriver.support import expected_conditions as EC`
`7`	`8`	`from selenium.webdriver.common.by import By`
`8`	`9`	`from selenium.common.exceptions import NoSuchElementException`
	`10`	`+from selenium.common.exceptions import TimeoutException`
`9`	`11`	`from fpdf import FPDF`
`10`	`12`
`11`	`13`
`@@ -14,7 +16,7 @@`
`14`	`16`
`15`	`17`	`driver = webdriver.Chrome(desired_capabilities=capa,options=options)`
`16`	`18`	`baseurl="https://www.codechef.com/problems"`
`17`		`-wait = WebDriverWait(driver, 20)`
	`19`	`+wait = WebDriverWait(driver, 15)`
`18`	`20`
`19`	`21`	`# map to get url from its problem difficulty`
`20`	`22`	`problem_difficulty = {"Beginner": "school", "Easy": "easy", "Medium": "medium", "Hard": "hard", "Challenge": "challenge"}`
`@@ -24,9 +26,15 @@ def get_problems(category, no_of_problems):`
`24`	`26`
`25`	`27`	`# A map to store problem name and problem url`
`26`	`28`	`problem_info = {}`
`27`		`- driver.get(baseurl + '/' + category)`
`28`		`- # wait till the first element is loaded`
`29`		`- wait.until(EC.element_to_be_clickable((By.XPATH, "//*[@id='primary-content']/div/div[2]/div/div[2]/table/tbody/tr[1]/td[1]/div/a/b")))`
	`29`	`+ try:`
	`30`	`+ driver.get(baseurl + '/' + category)`
	`31`	`+ # wait till the first element is loaded`
	`32`	`+ wait.until(EC.element_to_be_clickable((By.XPATH, "//*[@id='primary-content']/div/div[2]/div/div[2]/table/tbody/tr[1]/td[1]/div/a/b")))`
	`33`	`+ except TimeoutException as exception:`
	`34`	`+ print("Couldn't fetch problem. Network issue or page slow to render. Try again")`
	`35`	`+ os._exit(-1)`
	`36`	`+`
	`37`	`+`
`30`	`38`
`31`	`39`	`for problem_index in range(1, no_of_problems + 1):`
`32`	`40`	`problem_name = driver.find_element_by_xpath("//*[@id='primary-content']/div/div[2]/div/div[2]/table/tbody/tr[{}]/td[1]/div/a/b".format(problem_index)).text`
`@@ -55,13 +63,17 @@ def get_problem_description(problem_url,problem_name):`
`55`	`63`
`56`	`64`
`57`	`65`	`driver.execute_script("window.stop();")`
`58`		`- problem={'title':problem_title,'statement':problem_statement,'test_case':problem_test_cases}`
	`66`	`+ problem={'title':problem_title,'statement':problem_statement,'test_case':problem_test_cases,'url':problem_url}`
`59`	`67`	`return problem`
`60`	`68`
`61`	`69`	`#Handling exceptions`
`62`	`70`	`except NoSuchElementException as e:`
`63`		`- print("Couldn't scrap the element, Unable to locate it",e)`
	`71`	`+ print("Couldn't scrap the element, Unable to locate it")`
	`72`	`+ problem=None`
	`73`	`+ except TimeoutException as exception:`
	`74`	`+ print("Couldn't scrap the element, Unable to locate it")`
`64`	`75`	`problem=None`
	`76`	`+`
`65`	`77`
`66`	`78`
`67`	`79`
`@@ -71,10 +83,20 @@ def convert_to_pdf(problem):`
`71`	`83`	`pdf = FPDF()`
`72`	`84`	`pdf.add_page()`
`73`	`85`	`pdf.set_font("Arial", size = 15)`
`74`		`- pdf.cell(200, 10, txt = problem["title"], ln = 1, align = 'C')`
`75`		`- pdf.multi_cell(200, 10, txt =problem["statement"], align = 'L')`
`76`		`- pdf.multi_cell(200, 10, txt =problem["test_case"], align = 'L')`
`77`		`- pdf.output(problem["title"]+".pdf")`
	`86`	`+ # Replace character that aren't in latin-1 character set`
	`87`	`+ title=problem["title"].encode('latin-1', 'replace').decode('latin-1')`
	`88`	`+ statement=problem["statement"].encode('latin-1', 'replace').decode('latin-1')`
	`89`	`+ test_case=problem["test_case"].encode('latin-1', 'replace').decode('latin-1')`
	`90`	`+ url=problem["url"]`
	`91`	`+ # add sections to pdf`
	`92`	`+ pdf.cell(200, 10, txt =title, ln = 1, align = 'C')`
	`93`	`+ pdf.multi_cell(200, 10, txt =statement, align = 'L')`
	`94`	`+ pdf.multi_cell(200, 10, txt =test_case, align = 'L')`
	`95`	`+ pdf.write(5, 'Problem_Link: ')`
	`96`	`+ pdf.write(5,url,url)`
	`97`	`+`
	`98`	`+`
	`99`	`+ pdf.output(title+".pdf")`
`78`	`100`
`79`	`101`
`80`	`102`	`#main function`
`@@ -92,4 +114,4 @@ def main():`
`92`	`114`	`if __name__ == '__main__':`
`93`	`115`	`main()`
`94`	`116`
`95`		`-driver.close()`
	`117`	`+driver.close()`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit 2282255

File tree

1 file changed

1 file changed

`‎Codechef Scrapper/codechef.py‎`

0 commit comments