Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 2282255

Browse files
author
smriti raina
committed
Updated the script to add problem links
1 parent f1cae34 commit 2282255

File tree

1 file changed

+33
-11
lines changed

1 file changed

+33
-11
lines changed

‎Codechef Scrapper/codechef.py‎

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
from selenium import webdriver
2+
import os
23
options = webdriver.ChromeOptions()
34
options.add_argument("--headless")
45
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
56
from selenium.webdriver.support.ui import WebDriverWait
67
from selenium.webdriver.support import expected_conditions as EC
78
from selenium.webdriver.common.by import By
89
from selenium.common.exceptions import NoSuchElementException
10+
from selenium.common.exceptions import TimeoutException
911
from fpdf import FPDF
1012

1113

@@ -14,7 +16,7 @@
1416

1517
driver = webdriver.Chrome(desired_capabilities=capa,options=options)
1618
baseurl="https://www.codechef.com/problems"
17-
wait = WebDriverWait(driver, 20)
19+
wait = WebDriverWait(driver, 15)
1820

1921
# map to get url from its problem difficulty
2022
problem_difficulty = {"Beginner": "school", "Easy": "easy", "Medium": "medium", "Hard": "hard", "Challenge": "challenge"}
@@ -24,9 +26,15 @@ def get_problems(category, no_of_problems):
2426

2527
# A map to store problem name and problem url
2628
problem_info = {}
27-
driver.get(baseurl + '/' + category)
28-
# wait till the first element is loaded
29-
wait.until(EC.element_to_be_clickable((By.XPATH, "//*[@id='primary-content']/div/div[2]/div/div[2]/table/tbody/tr[1]/td[1]/div/a/b")))
29+
try:
30+
driver.get(baseurl + '/' + category)
31+
# wait till the first element is loaded
32+
wait.until(EC.element_to_be_clickable((By.XPATH, "//*[@id='primary-content']/div/div[2]/div/div[2]/table/tbody/tr[1]/td[1]/div/a/b")))
33+
except TimeoutException as exception:
34+
print("Couldn't fetch problem. Network issue or page slow to render. Try again")
35+
os._exit(-1)
36+
37+
3038

3139
for problem_index in range(1, no_of_problems + 1):
3240
problem_name = driver.find_element_by_xpath("//*[@id='primary-content']/div/div[2]/div/div[2]/table/tbody/tr[{}]/td[1]/div/a/b".format(problem_index)).text
@@ -55,13 +63,17 @@ def get_problem_description(problem_url,problem_name):
5563

5664

5765
driver.execute_script("window.stop();")
58-
problem={'title':problem_title,'statement':problem_statement,'test_case':problem_test_cases}
66+
problem={'title':problem_title,'statement':problem_statement,'test_case':problem_test_cases,'url':problem_url}
5967
return problem
6068

6169
#Handling exceptions
6270
except NoSuchElementException as e:
63-
print("Couldn't scrap the element, Unable to locate it",e)
71+
print("Couldn't scrap the element, Unable to locate it")
72+
problem=None
73+
except TimeoutException as exception:
74+
print("Couldn't scrap the element, Unable to locate it")
6475
problem=None
76+
6577

6678

6779

@@ -71,10 +83,20 @@ def convert_to_pdf(problem):
7183
pdf = FPDF()
7284
pdf.add_page()
7385
pdf.set_font("Arial", size = 15)
74-
pdf.cell(200, 10, txt = problem["title"], ln = 1, align = 'C')
75-
pdf.multi_cell(200, 10, txt =problem["statement"], align = 'L')
76-
pdf.multi_cell(200, 10, txt =problem["test_case"], align = 'L')
77-
pdf.output(problem["title"]+".pdf")
86+
# Replace character that aren't in latin-1 character set
87+
title=problem["title"].encode('latin-1', 'replace').decode('latin-1')
88+
statement=problem["statement"].encode('latin-1', 'replace').decode('latin-1')
89+
test_case=problem["test_case"].encode('latin-1', 'replace').decode('latin-1')
90+
url=problem["url"]
91+
# add sections to pdf
92+
pdf.cell(200, 10, txt =title, ln = 1, align = 'C')
93+
pdf.multi_cell(200, 10, txt =statement, align = 'L')
94+
pdf.multi_cell(200, 10, txt =test_case, align = 'L')
95+
pdf.write(5, 'Problem_Link: ')
96+
pdf.write(5,url,url)
97+
98+
99+
pdf.output(title+".pdf")
78100

79101

80102
#main function
@@ -92,4 +114,4 @@ def main():
92114
if __name__ == '__main__':
93115
main()
94116

95-
driver.close()
117+
driver.close()

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /