1
+ from bs4 import BeautifulSoup
2
+ import requests
3
+ import csv
4
+
5
+ URL = "https://www.indiatoday.in/"
6
+
7
+ def writeToCSV (topTenNews , category ):
8
+ with open ("topTen" + category + "News.csv" , "w" ) as file :
9
+ writer = csv .writer (file )
10
+ writer .writerow (["Date" , "Link" , "Headline" ])
11
+ for news in topTenNews :
12
+ writer .writerow ([news [2 ], "https://www.indiatoday.in/" + news [1 ], news [0 ]])
13
+
14
+ def getTopTenFromDivTag (category ):
15
+ topTenNews = []
16
+ count = 0
17
+ category_url = URL + category
18
+
19
+ page = requests .get (category_url )
20
+ soup = BeautifulSoup (page .text , "html.parser" )
21
+
22
+ all_div_tags = soup .find_all (class_ = "detail" )
23
+
24
+ for div in all_div_tags :
25
+ count += 1
26
+ if count > 10 :
27
+ break
28
+ headline = div .find ("h2" ).text
29
+ link = div .find ("a" ).attrs ["href" ]
30
+ date = div .find ("a" ).attrs ["href" ][- 10 :]
31
+ topTenNews .append ([headline , link , date ])
32
+
33
+ return topTenNews
34
+
35
+ def getTopTenFromLiTag (category ):
36
+ topTenNews = []
37
+ count = 0
38
+ category_url = URL + category
39
+
40
+ page = requests .get (category_url )
41
+ soup = BeautifulSoup (page .text , "html.parser" )
42
+
43
+ ul_tag = soup .find_all (class_ = "itg-listing" )
44
+ ul_tag = str (ul_tag )[25 :- 6 ]
45
+ li_tags = ul_tag .split ("</li>" )
46
+
47
+ for li in li_tags :
48
+ count += 1
49
+ if count > 10 :
50
+ break
51
+ ele = li .split (">" )
52
+ link = ele [1 ].split ("=" )[1 ][2 :- 1 ]
53
+ headline = ele [2 ][:- 3 ]
54
+ date = link [- 10 :]
55
+ topTenNews .append ([headline , link , date ])
56
+
57
+ return topTenNews
58
+
59
+ def main ():
60
+
61
+ categories = ["india" , "world" , "cities" , "business" , "health" , "technology" , "sports" ,
62
+ "education" , "lifestyle" ]
63
+
64
+ print ("Please Choose a Category from the following list" )
65
+
66
+ for index , category in enumerate (categories ):
67
+ print (str (index + 1 ) + ". " + category .capitalize ())
68
+
69
+ print ("Example: Enter 'world' for top 10 world news" )
70
+ print ()
71
+
72
+ category = input ()
73
+ category = category .lower ()
74
+
75
+ if category not in categories :
76
+ print ("\n Please choose a valid category!" )
77
+ exit ()
78
+
79
+ if category in categories [:5 ]:
80
+ topTenNews = getTopTenFromDivTag (category )
81
+ else :
82
+ topTenNews = getTopTenFromLiTag (category )
83
+
84
+ writeToCSV (topTenNews , category )
85
+
86
+ print ("Created CSV File Successfully!" )
87
+
88
+ main ()
0 commit comments