Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 9ffea31

Browse files
add code
1 parent 1a50b5d commit 9ffea31

File tree

4 files changed

+144
-0
lines changed

4 files changed

+144
-0
lines changed

‎doudou/2020-10-13-national-day‎

-988 Bytes
Binary file not shown.
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import requests
2+
import csv
3+
import time
4+
from requests import RequestException
5+
from bs4 import BeautifulSoup
6+
7+
8+
headers = {
9+
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
10+
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36',
11+
'accept-encoding': 'gzip, deflate, br',
12+
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
13+
'referer': 'https://piao.qunar.com/',
14+
'cookie': 'QN1=0000048030682631e2b8e754; QN99=9790; QN300=s%3Dbing; _i=DFiEZnlDE06wWY2e-VJVB_sesBww; fid=7bffafe6-c57b-4fe2-a347-57564cf0e66f; QunarGlobal=10.86.213.148_-ba9ffe3_173d2293375_-522b|1596959542130; QN601=6087af8bc791a83b1722cf1f3a337261; QN48=000018002f10273189b0bd0e; QN621=1490067914133%3DDEFAULT%26fr%3Dqunarindex; QN668=51%2C55%2C59%2C56%2C59%2C55%2C59%2C55%2C55%2C57%2C57%2C52%2C52; quinn=449c5a2ddd5098e4f741c730191aa6912eabaf034aa8231814cd7f5efb7ca1dbcff43ae3475ce1d71b90ad243bc206c6; SC1=21fa2e00edea939d117d9a7e41129b1c; SC18=; QN205=s%3Dbing; QN277=s%3Dbing; csrfToken=HomfccFgWNTkPHmFVLrLHhlXCV6mjpsX; QN269=A21E61200CA111EBA025FA163E26B699; QN163=0; QN71="MTE3LjEzNi4xMi4xOTA65bm/5LicOjE="; QN57=16025173972310.18425984059497624; QN243=22; Hm_lvt_15577700f8ecddb1a927813c81166ade=1602517398,1602517783; QN63=%E7%83%AD%E9%97%A8%E6%99%AF%E7%82%B9; _vi=g_OFZoprSNiT8bT2fhMMgWQhy-acGZ71z08p4vqpe6lVRC2Xv29cXK1WQEMpCBGx_4IHmo0unplzjb6oGmuoAhUZNNr22jOvzOiFBCsn4Q7AbvU8itcY097o-NJQC3d9gVplwq7h5uOrek1Kr7dV3MmHblSRGp_fqwibyoi9LuUx; QN267=016328531675c329458; QN58=1602521369113%7C1602521608108%7C2; JSESSIONID=21D7E279F1794E089E322E748FFE3B89; Hm_lpvt_15577700f8ecddb1a927813c81166ade=1602521609; QN271=a6ad6f7e-c46f-453f-aa1a-110855ad9ec7; __qt=v1%7CVTJGc2RHVmtYMS9CR1htaXZ6S0tkcVg3c3AyaGI0TnN4VmIwR1BXeldqYWJFZXlnekV0VnpJNjRCK2VrS1Axek8vb0dLZ25JM1F1WE83SURKU2dOd3lPb1I0UHVoSzNZWG43MnFRdTh0alJXTGdpK1BETUVNYTk0ejQ2cmpPNXNRazAwNUpsYXViNENwV0ovY09TYnIzcHgwc3AvYkpLUk4reXZkdTVHMXJVPQ%3D%3D%7C1602521615121%7CVTJGc2RHVmtYMS9DMHJaQlpialhNRXJKcXo1SkNBSzdKZXFxQmRWV01QbnFUSEdEMXNNZzBsZjI3U0ZNTWxjei9PeFBkcDNKUlp3MWxnb29SbjNPemc9PQ%3D%3D%7CVTJGc2RHVmtYMSt5UmRsK1BMMDJUZHRMSDlhR1lMNXhPbXNIeVMyNk9DdGgraTJ6OXJHbEdQWXhrZCtmN1hDeDhlRGlMQThLRDJOK1hTV2VYd1EvaDdVcmZnNDNaYTE0cnE2bklsNENIcjlYRVNTdExxb01BNy9ZQlFwTFE5VHp3dTJHRjI5SE1mTCtIRXMvb25FSXVxbEY5UTdPcVlYSzZlU1phK1pDVmhOZElsL1BlNmtROXVGMmhJb1FKd3hxV3F3Qyt5OTc5K3Zjdk9zVjhsMzN3VEN2ZUN3WGx6VGJ2OFYwYSsvMDBWYmpLNFhMRk8xQWd1WmxsNzU5TXRKV2lTdDFZbEZpaUlMV1ZSQkZOSk50dVRqVDh4WFRSQ0lqUUdXd2U2eXBydVBDaXhSWDRWUklHV3hGRDVLQkwyQ2J4emlvaU5tZzNIbENFb2g0YWFndGhDZnFvV3dpaEJMYkpNNWQzdDkvTzF5S1FPVWJpTlhvRFFZcDFXSnJzMWRUZUNvT1MrSU4zVHJiRER4MkdZRWMxMEtCKzBXai9RanVoMzNyWUt0Qi9CbFZLOXViYlo5eXBVaXZwTzMyMWtrSGRnaGNydy9BVzIyWEFoRjBKN1QwTEtwdVE5QWJqa1BLa0kzWUJDWGVOZVdMWjdVQjRnb1ppSXdHM3VFZWxsZDlRZUI0SUtBeXRSVjAyT0Znck8xdUsxY2taVzQyMzk0UUJUZ20wQjRJRk5VbUdhN2VPR0Q3STl1YTlOdnNSV2d2TVk5K1kzRjh3bzVXbHZ3eFdxQnBVeER3YW5JOTVOd0RXZnVQd0xqWmVMSFNSNStCaFVkNGJ5WGdBRHRabUJacktpbnVwV2MzWDIvTmwxaDdpK1l1VElYRGJreTdSUURWOEtaTFlwT3dwNktPQ3pUalJuNFBxYVEyanZFb2V4aGRyVFJ4Mmw3UEg4aDk5Y1gzZklPdlJnRGE3SVJGMnRydkMvMkIzVVFmZUp6NXFteUxZZXFSa2FveDA5dE1GaTVOWjZPVWZ4emZZRmFnQW1OQ0NiQ0ROempjZzBaMTdXSDZqM2YrVlVBNGJDZz0%3D'
15+
}
16+
17+
excel_file = open('data.csv', 'w', encoding='utf-8', newline='')
18+
writer = csv.writer(excel_file)
19+
writer.writerow(['名称', '城市', '类型', '级别', '热度', '地址'])
20+
21+
22+
def get_page_html(url):
23+
try:
24+
response = requests.get(url, headers=headers)
25+
if response.status_code == 200:
26+
return response.text
27+
return None
28+
except RequestException:
29+
return None
30+
31+
32+
def parse_content(content, subject, url):
33+
if not content:
34+
print('content is none ', url)
35+
return;
36+
soup = BeautifulSoup(content, "html.parser")
37+
search_list = soup.find(id='search-list')
38+
items = search_list.find_all('div', class_="sight_item")
39+
for item in items:
40+
name = item['data-sight-name']
41+
districts = item['data-districts']
42+
address = item['data-address']
43+
level = item.find('span', class_='level')
44+
level = level.text if level else ''
45+
star = item.find('span', class_='product_star_level')
46+
star = star.text if star else ''
47+
writer.writerow([name, districts, subject, level, star, address])
48+
# print(name, districts, address, id, level, star)
49+
50+
51+
subjects = ['文化古迹', '自然风光', '农家度假', '游乐场', '展馆', '古建筑', '城市观光']
52+
53+
54+
def get_data():
55+
for subject in subjects:
56+
for page in range(10):
57+
page = page + 1
58+
url = F'https://piao.qunar.com/ticket/list.htm?keyword=热门景点&region=&from=mps_search_suggest&subject={subject}&page={page}&sku='
59+
print(url)
60+
content = get_page_html(url)
61+
parse_content(content, subject, url)
62+
time.sleep(5)
63+
64+
65+
if __name__ == '__main__':
66+
get_data()
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
名称,城市,类型,级别,热度,地址
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
import csv
2+
import pandas as pd
3+
from pyecharts.charts import Geo
4+
from pyecharts import options as opts
5+
from pyecharts.globals import ChartType, SymbolType
6+
from pyecharts.charts import Bar
7+
from pyecharts.charts import Pie
8+
9+
data = []
10+
with open('data.csv', 'r') as f:
11+
reader = csv.reader(f)
12+
header = next(reader)
13+
for row in reader:
14+
data.append(row)
15+
16+
df_data = []
17+
for row in data:
18+
city = row[1].split('·')[1]
19+
if city in ['保亭', '德宏', '湘西', '陵水', '黔东南', '黔南']:
20+
continue
21+
star = row[4].split('热度')[1].strip()
22+
star = int(float(star) * 1000)
23+
df_data.append([row[0], city, row[3], star])
24+
25+
df = pd.DataFrame(df_data, columns=['name', 'city', 'level', 'star'])
26+
27+
28+
def show_pic_one():
29+
data = df.groupby(by=['city'])['star'].sum()
30+
citys = list(data.index)
31+
city_stars = list(data)
32+
33+
data = [list(z) for z in zip(citys, city_stars)]
34+
geo = (
35+
Geo()
36+
.add_schema(maptype="china")
37+
.add(
38+
"热点图", # 图题
39+
data,
40+
type_=ChartType.HEATMAP, # 地图类型
41+
)
42+
.set_series_opts(label_opts=opts.LabelOpts(is_show=False)) # 设置是否显示标签
43+
.set_global_opts(
44+
visualmap_opts=opts.VisualMapOpts(max_=5000), # 设置legend显示的最大值
45+
title_opts=opts.TitleOpts(title=""), # 左上角标题
46+
)
47+
)
48+
49+
geo.render_notebook()
50+
51+
52+
def show_pic_two():
53+
data = df.loc[:, 'city'].value_counts().sort_values(ascending=False)
54+
citys = list(data.index)[:15]
55+
city_count = list(data)[:15]
56+
57+
bar = Bar()
58+
bar.add_xaxis(citys)
59+
bar.add_yaxis("Top 15", city_count)
60+
bar.set_global_opts(title_opts=opts.TitleOpts(title=""))
61+
bar.render_notebook()
62+
63+
64+
def show_pic_three():
65+
data = df.groupby(by=['name'])['star'].sum().sort_values(ascending=False)
66+
names = list(data.index)[:10]
67+
name_stars = list(data)[:10]
68+
69+
# data
70+
71+
pie = (
72+
Pie()
73+
.add("", [list(z) for z in zip(names, name_stars)])
74+
.set_global_opts(title_opts=opts.TitleOpts(title=""))
75+
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
76+
)
77+
pie.render_notebook()

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /