@@ -42,7 +42,6 @@ def get_movie_url(html):
42
42
43
43
# 【名称,链接。导演,国家,上映时间,类型,评分,[五星,四星,三星,二星,一星占比],评价人数】
44
44
def get_movie_info (url ):
45
- ans = {}
46
45
html = get_page_html (url )
47
46
soup = bs4 .BeautifulSoup (html , 'html.parser' )
48
47
content = soup .find ('div' , id = 'content' )
@@ -64,11 +63,10 @@ def get_movie_info(url):
64
63
votes = content .find ('span' , property = 'v:votes' ).text
65
64
66
65
rating_per_items = content .find ('div' , class_ = 'ratings-on-weight' ).find_all ('div' , class_ = 'item' )
67
- rating_per = [rating_per_items [0 ].find ('span' , class_ = 'rating_per' ).text ,
68
- rating_per_items [1 ].find ('span' , class_ = 'rating_per' ).text ]
66
+ rating_per = [rating_per_items [0 ].find ('span' , class_ = 'rating_per' ).text , rating_per_items [1 ].find ('span' , class_ = 'rating_per' ).text ]
69
67
70
- return {'title' : title , 'url' : url , 'director' : director , 'country' : country , 'year' : year , 'type' : type ,
71
- 'average' : average , 'votes' : votes , 'rating_per' : rating_per }
68
+ return {'title' : title , 'url' : url , 'director' : "#" . join ( director ) , 'country' : country , 'year' : year , 'type' : "#" . join ( type ) ,
69
+ 'average' : average , 'votes' : votes , 'rating_per' : "#" . join ( rating_per ) }
72
70
73
71
74
72
def main ():
@@ -93,17 +91,25 @@ def getUrls():
93
91
94
92
def writeToFile (content ):
95
93
filename = 'doubanTop250.txt'
96
- with open (filename ,'a' ) as f :
94
+ with open (filename ,'a' ) as f :
97
95
f .write (content + '\n ' )
98
96
99
-
100
97
if __name__ == '__main__' :
101
98
list_urls = getUrls ()
102
99
list_htmls = [get_page_html (url ) for url in list_urls ]
103
100
movie_urls = [get_movie_url (html ) for html in list_htmls ]
101
+ movie_url_list = []
102
+ for url_list in movie_urls :
103
+ movie_url_list += url_list
104
+
105
+ for url in movie_url_list :
106
+ print (url )
107
+
108
+ movie_details = [get_movie_info (url ) for url in movie_url_list ]
104
109
105
- movie_details = [get_movie_info (url ) for url in movie_urls [0 ]]
106
-
107
110
for detail in movie_details :
108
111
writeToFile (str (detail ))
112
+ print (detail )
113
+
109
114
115
+ #print(get_movie_info('https://movie.douban.com/subject/1292052/'))
0 commit comments