1
1
from selenium import webdriver
2
+ import os
2
3
options = webdriver .ChromeOptions ()
3
4
options .add_argument ("--headless" )
4
5
from selenium .webdriver .common .desired_capabilities import DesiredCapabilities
5
6
from selenium .webdriver .support .ui import WebDriverWait
6
7
from selenium .webdriver .support import expected_conditions as EC
7
8
from selenium .webdriver .common .by import By
8
9
from selenium .common .exceptions import NoSuchElementException
10
+ from selenium .common .exceptions import TimeoutException
9
11
from fpdf import FPDF
10
12
11
13
14
16
15
17
driver = webdriver .Chrome (desired_capabilities = capa ,options = options )
16
18
baseurl = "https://www.codechef.com/problems"
17
- wait = WebDriverWait (driver , 20 )
19
+ wait = WebDriverWait (driver , 15 )
18
20
19
21
# map to get url from its problem difficulty
20
22
problem_difficulty = {"Beginner" : "school" , "Easy" : "easy" , "Medium" : "medium" , "Hard" : "hard" , "Challenge" : "challenge" }
@@ -24,9 +26,15 @@ def get_problems(category, no_of_problems):
24
26
25
27
# A map to store problem name and problem url
26
28
problem_info = {}
27
- driver .get (baseurl + '/' + category )
28
- # wait till the first element is loaded
29
- wait .until (EC .element_to_be_clickable ((By .XPATH , "//*[@id='primary-content']/div/div[2]/div/div[2]/table/tbody/tr[1]/td[1]/div/a/b" )))
29
+ try :
30
+ driver .get (baseurl + '/' + category )
31
+ # wait till the first element is loaded
32
+ wait .until (EC .element_to_be_clickable ((By .XPATH , "//*[@id='primary-content']/div/div[2]/div/div[2]/table/tbody/tr[1]/td[1]/div/a/b" )))
33
+ except TimeoutException as exception :
34
+ print ("Couldn't fetch problem. Network issue or page slow to render. Try again" )
35
+ os ._exit (- 1 )
36
+
37
+
30
38
31
39
for problem_index in range (1 , no_of_problems + 1 ):
32
40
problem_name = driver .find_element_by_xpath ("//*[@id='primary-content']/div/div[2]/div/div[2]/table/tbody/tr[{}]/td[1]/div/a/b" .format (problem_index )).text
@@ -55,13 +63,17 @@ def get_problem_description(problem_url,problem_name):
55
63
56
64
57
65
driver .execute_script ("window.stop();" )
58
- problem = {'title' :problem_title ,'statement' :problem_statement ,'test_case' :problem_test_cases }
66
+ problem = {'title' :problem_title ,'statement' :problem_statement ,'test_case' :problem_test_cases , 'url' : problem_url }
59
67
return problem
60
68
61
69
#Handling exceptions
62
70
except NoSuchElementException as e :
63
- print ("Couldn't scrap the element, Unable to locate it" ,e )
71
+ print ("Couldn't scrap the element, Unable to locate it" )
72
+ problem = None
73
+ except TimeoutException as exception :
74
+ print ("Couldn't scrap the element, Unable to locate it" )
64
75
problem = None
76
+
65
77
66
78
67
79
@@ -71,10 +83,20 @@ def convert_to_pdf(problem):
71
83
pdf = FPDF ()
72
84
pdf .add_page ()
73
85
pdf .set_font ("Arial" , size = 15 )
74
- pdf .cell (200 , 10 , txt = problem ["title" ], ln = 1 , align = 'C' )
75
- pdf .multi_cell (200 , 10 , txt = problem ["statement" ], align = 'L' )
76
- pdf .multi_cell (200 , 10 , txt = problem ["test_case" ], align = 'L' )
77
- pdf .output (problem ["title" ]+ ".pdf" )
86
+ # Replace character that aren't in latin-1 character set
87
+ title = problem ["title" ].encode ('latin-1' , 'replace' ).decode ('latin-1' )
88
+ statement = problem ["statement" ].encode ('latin-1' , 'replace' ).decode ('latin-1' )
89
+ test_case = problem ["test_case" ].encode ('latin-1' , 'replace' ).decode ('latin-1' )
90
+ url = problem ["url" ]
91
+ # add sections to pdf
92
+ pdf .cell (200 , 10 , txt = title , ln = 1 , align = 'C' )
93
+ pdf .multi_cell (200 , 10 , txt = statement , align = 'L' )
94
+ pdf .multi_cell (200 , 10 , txt = test_case , align = 'L' )
95
+ pdf .write (5 , 'Problem_Link: ' )
96
+ pdf .write (5 ,url ,url )
97
+
98
+
99
+ pdf .output (title + ".pdf" )
78
100
79
101
80
102
#main function
@@ -92,4 +114,4 @@ def main():
92
114
if __name__ == '__main__' :
93
115
main ()
94
116
95
- driver .close ()
117
+ driver .close ()
0 commit comments