Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit c12b319

Browse files
添加代码
1 parent a4b65e1 commit c12b319

File tree

5 files changed

+964
-9
lines changed

5 files changed

+964
-9
lines changed

‎day-119/douban-movie-top250.py‎

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ def get_movie_url(html):
4242

4343
# 【名称,链接。导演,国家,上映时间,类型,评分,[五星,四星,三星,二星,一星占比],评价人数】
4444
def get_movie_info(url):
45-
ans = {}
4645
html = get_page_html(url)
4746
soup = bs4.BeautifulSoup(html, 'html.parser')
4847
content = soup.find('div', id='content')
@@ -64,11 +63,10 @@ def get_movie_info(url):
6463
votes = content.find('span', property='v:votes').text
6564

6665
rating_per_items = content.find('div', class_='ratings-on-weight').find_all('div', class_='item')
67-
rating_per = [rating_per_items[0].find('span', class_='rating_per').text,
68-
rating_per_items[1].find('span', class_='rating_per').text]
66+
rating_per = [rating_per_items[0].find('span', class_='rating_per').text, rating_per_items[1].find('span', class_='rating_per').text]
6967

70-
return {'title': title, 'url': url, 'director': director, 'country': country, 'year': year, 'type': type,
71-
'average': average, 'votes': votes, 'rating_per': rating_per}
68+
return {'title': title, 'url': url, 'director': "#".join(director), 'country': country, 'year': year, 'type': "#".join(type),
69+
'average': average, 'votes': votes, 'rating_per': "#".join(rating_per)}
7270

7371

7472
def main():
@@ -93,17 +91,25 @@ def getUrls():
9391

9492
def writeToFile(content):
9593
filename = 'doubanTop250.txt'
96-
with open(filename,'a') as f:
94+
with open(filename,'a') as f:
9795
f.write(content + '\n')
9896

99-
10097
if __name__ == '__main__':
10198
list_urls = getUrls()
10299
list_htmls = [get_page_html(url) for url in list_urls]
103100
movie_urls = [get_movie_url(html) for html in list_htmls]
101+
movie_url_list = []
102+
for url_list in movie_urls:
103+
movie_url_list += url_list
104+
105+
for url in movie_url_list:
106+
print(url)
107+
108+
movie_details = [get_movie_info(url) for url in movie_url_list]
104109

105-
movie_details = [get_movie_info(url) for url in movie_urls[0]]
106-
107110
for detail in movie_details:
108111
writeToFile(str(detail))
112+
print(detail)
113+
109114

115+
#print(get_movie_info('https://movie.douban.com/subject/1292052/'))

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /