Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 4be3f6c

Browse files
add code
1 parent 40da46e commit 4be3f6c

File tree

5 files changed

+341
-3
lines changed

5 files changed

+341
-3
lines changed

‎doudou/2020-06-22-music-163/app.py‎

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import requests
2+
import json
3+
import matplotlib.pyplot as plt
4+
from wordcloud import WordCloud
5+
6+
7+
# 模拟浏览器请求
8+
headers = {
9+
'Referer': 'http://music.163.com/',
10+
'Host': 'music.163.com',
11+
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36',
12+
'Accept': '*/*',
13+
}
14+
15+
# 构建 URL 以及 POSt 参数
16+
url = 'https://music.163.com/weapi/v1/play/record?csrf_token='
17+
data = {
18+
'params': 'xrJhjXYUqEWa98DVbFtw6yTygOTCOvSAypxfWNr5kpw/MEvXsRk+Av+DNF7zY9a1oA95FsmDtE3VpM422dZR6WJGDxS3/se00qFFHx6wumfLzc9mgnfB5hGkrBwF9+P/7zamjfWSOUfvvUuWhM2Gd7z2pA11lMB',
19+
'encSecKey': '2371bb4de91d5de7110722d3491c7cf6d3f6f5cdcbc16a5e9c7456e4b9075c1965bbd2bf4fbf02023cf63391f74b6956339cb72fa32a4413de347ffb536299f5711fe02fe60f66b77ac96a16a6bcb5ba14cf9b1609ddf8e8180d683bba5801acf'
20+
}
21+
22+
# 发送请求
23+
req = requests.post(url, data) # 发送 post 请求,第一个参数是 URL,第二个参数是参数
24+
25+
print(json.loads(req.text))
26+
27+
# 输出结果
28+
# {"allData":[{"playCount":0,"score":100,"song":{"name":"盛夏光年 (2013版)","id":28181110,"pst":0,"t":0,"ar":[{"id":13193,"name":"五月天","tns":...
29+
30+
result = json.loads(req.text)
31+
names = []
32+
for i in range(100):
33+
names.append(result['allData'][i]['song']['ar'][0]['name'])
34+
35+
text = ",".join(names)
36+
37+
38+
def show_word_cloud(text):
39+
wc = WordCloud(font_path='/System/Library/Fonts/PingFang.ttc', background_color="white", scale=2.5,
40+
contour_color="lightblue", ).generate(text)
41+
42+
# 读入背景图片
43+
w = WordCloud(background_color='white', scale=1.5).generate(text)
44+
w.to_file("names.png")
45+
plt.figure(figsize=(16, 9))
46+
plt.imshow(wc)
47+
plt.axis('off')
48+
plt.show()
49+
50+
51+
show_word_cloud(text)

‎doudou/2020-07-13-lagou/analysis.py‎

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
import numpy as np
2+
from pyecharts import options as opts
3+
from pyecharts.charts import Bar
4+
from pyecharts.charts import Pie
5+
from wordcloud import WordCloud
6+
import matplotlib.pyplot as plt
7+
import json
8+
import pandas as pd
9+
10+
11+
def get_data():
12+
with open('data.txt') as f:
13+
data = []
14+
for line in f.readlines():
15+
result = json.loads(line)
16+
result_list = result['content']['positionResult']['result']
17+
for item in result_list:
18+
dict = {
19+
'city': item['city'],
20+
'industryField': item['industryField'],
21+
'education': item['education'],
22+
'workYear': item['workYear'],
23+
'salary': item['salary'],
24+
'firstType': item['firstType'],
25+
'secondType': item['secondType'],
26+
'thirdType': item['thirdType'],
27+
# list
28+
'skillLables': ','.join(item['skillLables']),
29+
'companyLabelList': ','.join(item['companyLabelList'])
30+
}
31+
data.append(dict)
32+
return data
33+
34+
35+
data = get_data()
36+
data = pd.DataFrame(data)
37+
data.head(5)
38+
39+
# 城市图
40+
citys_value_counts = data['city'].value_counts()
41+
top = 15
42+
citys = list(citys_value_counts.head(top).index)
43+
city_counts = list(citys_value_counts.head(top))
44+
45+
bar = (
46+
Bar()
47+
.add_xaxis(citys)
48+
.add_yaxis("", city_counts)
49+
)
50+
bar.render_notebook()
51+
52+
# 城市图
53+
pie = (
54+
Pie()
55+
.add("", [list(z) for z in zip(citys, city_counts)])
56+
.set_global_opts(title_opts=opts.TitleOpts(title=""))
57+
.set_global_opts(legend_opts=opts.LegendOpts(is_show=False))
58+
)
59+
pie.render_notebook()
60+
61+
# 行业
62+
industrys = list(data['industryField'])
63+
industry_list = [i for item in industrys for i in item.split(',')]
64+
65+
industry_series = pd.Series(data=industry_list)
66+
industry_value_counts = industry_series.value_counts()
67+
68+
industrys = list(industry_value_counts.head(top).index)
69+
industry_counts = list(industry_value_counts.head(top))
70+
71+
pie = (
72+
Pie()
73+
.add("", [list(z) for z in zip(industrys, industry_counts)])
74+
.set_global_opts(title_opts=opts.TitleOpts(title=""))
75+
.set_global_opts(legend_opts=opts.LegendOpts(is_show=False))
76+
)
77+
pie.render_notebook()
78+
79+
# 学历
80+
eduction_value_counts = data['education'].value_counts()
81+
82+
eduction = list(eduction_value_counts.index)
83+
eduction_counts = list(eduction_value_counts)
84+
85+
pie = (
86+
Pie()
87+
.add("", [list(z) for z in zip(eduction, eduction_counts)])
88+
.set_global_opts(title_opts=opts.TitleOpts(title=""))
89+
.set_global_opts(legend_opts=opts.LegendOpts(is_show=False))
90+
)
91+
pie.render_notebook()
92+
93+
# 工作年限
94+
work_year_value_counts = data['workYear'].value_counts()
95+
work_year = list(work_year_value_counts.index)
96+
work_year_counts = list(work_year_value_counts)
97+
98+
bar = (
99+
Bar()
100+
.add_xaxis(work_year)
101+
.add_yaxis("", work_year_counts)
102+
)
103+
bar.render_notebook()
104+
105+
# 技能
106+
word_data = data['skillLables'].str.split(',').apply(pd.Series)
107+
word_data = word_data.replace(np.nan, '')
108+
text = word_data.to_string(header=False, index=False)
109+
110+
wc = WordCloud(font_path='/System/Library/Fonts/PingFang.ttc', background_color="white", scale=2.5,
111+
contour_color="lightblue", ).generate(text)
112+
113+
plt.figure(figsize=(16, 9))
114+
plt.imshow(wc)
115+
plt.axis('off')
116+
plt.show()
117+
118+
# 福利
119+
word_data = data['companyLabelList'].str.split(',').apply(pd.Series)
120+
word_data = word_data.replace(np.nan, '')
121+
text = word_data.to_string(header=False, index=False)
122+
123+
wc = WordCloud(font_path='/System/Library/Fonts/PingFang.ttc', background_color="white", scale=2.5,
124+
contour_color="lightblue", ).generate(text)
125+
126+
plt.figure(figsize=(16, 9))
127+
plt.imshow(wc)
128+
plt.axis('off')
129+
plt.show()
130+
131+
# 薪资
132+
salary_value_counts = data['salary'].value_counts()
133+
salary = list(salary_value_counts.head(top).index)
134+
salary_counts = list(salary_value_counts.head(top))
135+
136+
bar = (
137+
Bar()
138+
.add_xaxis(salary)
139+
.add_yaxis("", salary_counts)
140+
.set_global_opts(xaxis_opts=opts.AxisOpts(name_rotate=0, name="薪资", axislabel_opts={"rotate": 45}))
141+
)
142+
bar.render_notebook()

‎doudou/2020-07-13-lagou/app.py‎

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
import requests
2+
import time
3+
4+
5+
def headers_to_dict(headers):
6+
headers = headers.split("\n")
7+
d_headers = dict()
8+
for h in headers:
9+
if h:
10+
k, v = h.split(":", 1)
11+
if k == 'cookie' and d_headers.get(k, None) is not None:
12+
d_headers[k] = d_headers.get(k) + "; " + v.strip()
13+
else:
14+
d_headers[k] = v.strip()
15+
return d_headers
16+
17+
18+
home_url = 'https://www.lagou.com/jobs/list_python?px=new&city=%E5%85%A8%E5%9B%BD'
19+
url = 'https://www.lagou.com/jobs/positionAjax.json?px=new&needAddtionalResult=false'
20+
headers = """
21+
accept: application/json, text/javascript, */*; q=0.01
22+
origin: https://www.lagou.com
23+
referer: https://www.lagou.com/jobs/list_python?px=new&city=%E5%85%A8%E5%9B%BD
24+
user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36
25+
"""
26+
27+
headers_dict = headers_to_dict(headers)
28+
29+
30+
def get_data_from_cloud(page):
31+
params = {
32+
'first': 'false',
33+
'pn': page,
34+
'kd': 'python'
35+
}
36+
s = requests.Session() # 创建一个session对象
37+
s.get(home_url, headers=headers_dict, timeout=3) # 用session对象发出get请求,请求首页获取cookies
38+
cookie = s.cookies # 为此次获取的cookies
39+
response = requests.post(url, data=params, headers=headers_dict, cookies=cookie, timeout=3)
40+
result = response.text
41+
write_file(result)
42+
43+
44+
def write_file(content):
45+
filename = 'data.txt'
46+
with open(filename, 'a') as f:
47+
f.write(content + '\n')
48+
49+
50+
"""
51+
工作地点地图 : city
52+
行业分布:industryField
53+
学历要求:education
54+
工作经验:workYear
55+
薪资:salary
56+
所需技能:skillLables
57+
福利:companyLabelList
58+
类型:firstType、secondType
59+
"""
60+
def get_data():
61+
for i in range(76):
62+
page = i + 1
63+
get_data_from_cloud(page)
64+
time.sleep(5)
65+
66+
67+
get_data()

‎doudou/2020-07-13-lagou/data.txt‎

Lines changed: 76 additions & 0 deletions
Large diffs are not rendered by default.

‎doudou/README.md‎

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,13 @@ Python技术 公众号文章代码库
1616

1717
+ [520](https://github.com/JustDoPython/python-examples/tree/master/doudou/2020-05-17-520):Python 教你花式表白小姐姐
1818

19-
+ [字符画](https://github.com/JustDoPython/python-examples/tree/master/doudou/2020-05-17-character-drawing):字符画
19+
+ [character-drawing](https://github.com/JustDoPython/python-examples/tree/master/doudou/2020-05-17-character-drawing):字符画
2020

21-
+ [迷宫](https://github.com/JustDoPython/python-examples/tree/master/doudou/2020-06-12-maze):迷宫
21+
+ [maze](https://github.com/JustDoPython/python-examples/tree/master/doudou/2020-06-12-maze):迷宫
2222

23-
+ [Python 骚操作](https://github.com/JustDoPython/python-examples/tree/master/doudou/2020-06-19-skills):Python 骚操作
23+
+ [python-skills](https://github.com/JustDoPython/python-examples/tree/master/doudou/2020-06-19-skills):Python 骚操作
24+
25+
+ [lagou](https://github.com/JustDoPython/python-examples/tree/master/doudou/2020-07-13-lagou):拉钩招聘数据分析
2426

2527
---
2628

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /