Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit ffed287

Browse files
committed
提交代码
1 parent 275e40d commit ffed287

File tree

3 files changed

+158
-0
lines changed

3 files changed

+158
-0
lines changed

‎xianhuan/.DS_Store‎

0 Bytes
Binary file not shown.

‎xianhuan/yanxuanbriefs/briefs_ana.py‎

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
@author: 闲欢
5+
"""
6+
import json
7+
import pandas as pd
8+
from pyecharts.charts import Bar, Pie
9+
from pyecharts import options as opts
10+
import jieba
11+
from PIL import Image
12+
from wordcloud import WordCloud
13+
from matplotlib import pyplot as plt
14+
import numpy as np
15+
from os import path
16+
17+
color = []
18+
size = []
19+
comments = []
20+
21+
with open("briefs.txt", "r", encoding="utf-8") as f:
22+
for line in f:
23+
data_obj = json.loads(line)
24+
comments.append(data_obj['content'])
25+
skuinfo = data_obj['skuInfo']
26+
for sku in skuinfo:
27+
if '颜色' in sku and '规格' not in sku:
28+
filter_sku = sku.replace("颜色:", "").strip().replace("(", "").replace(")3条", "").replace("四条装", "").replace("*2", "").replace("2条", "").replace(")", "")
29+
color.extend(filter_sku.split('+'))
30+
elif '尺码' in sku and '~' not in sku:
31+
size.append(sku.replace('尺码:', ""))
32+
33+
# 颜色可视化
34+
df = pd.DataFrame(color, columns=['color'])
35+
analyse_color = df['color'].value_counts()
36+
37+
bar = Bar()
38+
bar.add_xaxis(analyse_color.index.values.tolist())
39+
bar.add_yaxis("", analyse_color.values.tolist())
40+
bar.set_global_opts(
41+
xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-90)),
42+
title_opts=opts.TitleOpts(title="颜色分布"),
43+
# datazoom_opts=opts.DataZoomOpts(),
44+
)
45+
# bar.render_notebook()
46+
bar.render('briefs_color.html')
47+
48+
49+
# 尺码可视化
50+
df2 = pd.DataFrame(size, columns=['size'])
51+
analyse_size = df2['size'].value_counts()
52+
53+
bar = Bar()
54+
bar.add_xaxis(analyse_size.index.values.tolist())
55+
bar.add_yaxis("", analyse_size.values.tolist())
56+
bar.set_global_opts(
57+
xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=0)),
58+
title_opts=opts.TitleOpts(title="尺寸分布"),
59+
# datazoom_opts=opts.DataZoomOpts(),
60+
)
61+
bar.render('briefs_size.html')
62+
63+
64+
# 评论可视化
65+
text = " ".join(comments)
66+
def gen_wc_split_text(text='There is no txt', max_words=None, background_color=None,
67+
font_path='/System/Library/Fonts/PingFang.ttc',
68+
output_path='', output_name='',
69+
mask_path=None, mask_name=None,
70+
width=400, height=200, max_font_size=100, axis='off'):
71+
all_seg = jieba.cut(text, cut_all=False)
72+
split_text = ' '
73+
for seg in all_seg:
74+
split_text = split_text + seg + ' '
75+
76+
# 设置一个底图
77+
mask = None
78+
if mask_path is not None:
79+
mask = np.array(Image.open(path.join(mask_path, mask_name)))
80+
81+
wordcloud = WordCloud(background_color=background_color,
82+
mask=mask,
83+
max_words=max_words,
84+
max_font_size=max_font_size,
85+
width=width,
86+
height=height,
87+
# 如果不设置中文字体,可能会出现乱码
88+
font_path=font_path)
89+
myword = wordcloud.generate(str(split_text))
90+
# 展示词云图
91+
plt.imshow(myword)
92+
plt.axis(axis)
93+
plt.show()
94+
95+
# 保存词云图
96+
wordcloud.to_file(path.join(output_path, output_name))
97+
98+
gen_wc_split_text(text, output_name='briefs_comments_wc.png', output_path='./')

‎xianhuan/yanxuanbriefs/briefs_man.py‎

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
@author: 闲欢
5+
"""
6+
import requests
7+
import time
8+
import json
9+
10+
# 获取商品列表
11+
def search_keyword(keyword):
12+
uri = 'https://you.163.com/xhr/search/search.json'
13+
query = {
14+
"keyword": keyword,
15+
"page": 1
16+
}
17+
try:
18+
res = requests.get(uri, params=query).json()
19+
result = res['data']['directly']['searcherResult']['result']
20+
product_id = []
21+
for r in result:
22+
product_id.append(r['id'])
23+
return product_id
24+
except:
25+
raise
26+
27+
# 获取评论
28+
def details(product_id):
29+
url = 'https://you.163.com/xhr/comment/listByItemByTag.json'
30+
try:
31+
C_list = []
32+
for i in range(1, 100):
33+
query = {
34+
"itemId": product_id,
35+
"page": i,
36+
}
37+
res = requests.get(url, params=query).json()
38+
if not res['data']['commentList']:
39+
break
40+
print("爬取第 %s 页评论" % i)
41+
commentList = res['data']['commentList']
42+
C_list.extend(commentList)
43+
time.sleep(1)
44+
45+
return C_list
46+
except:
47+
raise
48+
49+
50+
product_id = search_keyword('男士内裤')
51+
r_list = []
52+
for p in product_id:
53+
r_list.extend(details(p))
54+
55+
with open('./briefs.txt', 'w') as f:
56+
for r in r_list:
57+
try:
58+
f.write(json.dumps(r, ensure_ascii=False) + '\n')
59+
except:
60+
print('出错啦')

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /