Commit ffed287

committed

提交代码

1 parent 275e40d commit ffed287Copy full SHA for ffed287

File tree

3 files changed

+158

-0

lines changed

xianhuan
- .DS_Store
- yanxuanbriefs
  - briefs_ana.py
  - briefs_man.py

3 files changed

+158

-0

lines changed

`‎xianhuan/.DS_Store‎`

0 Bytes

Binary file not shown.

`‎xianhuan/yanxuanbriefs/briefs_ana.py‎`

Lines changed: 98 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,98 @@`
	`1`	`+#!/usr/bin/env python3`
	`2`	`+# -- coding: utf-8 --`
	`3`	`+"""`
	`4`	`+@author: 闲欢`
	`5`	`+"""`
	`6`	`+import json`
	`7`	`+import pandas as pd`
	`8`	`+from pyecharts.charts import Bar, Pie`
	`9`	`+from pyecharts import options as opts`
	`10`	`+import jieba`
	`11`	`+from PIL import Image`
	`12`	`+from wordcloud import WordCloud`
	`13`	`+from matplotlib import pyplot as plt`
	`14`	`+import numpy as np`
	`15`	`+from os import path`
	`16`	`+`
	`17`	`+color = []`
	`18`	`+size = []`
	`19`	`+comments = []`
	`20`	`+`
	`21`	`+with open("briefs.txt", "r", encoding="utf-8") as f:`
	`22`	`+ for line in f:`
	`23`	`+ data_obj = json.loads(line)`
	`24`	`+ comments.append(data_obj['content'])`
	`25`	`+ skuinfo = data_obj['skuInfo']`
	`26`	`+ for sku in skuinfo:`
	`27`	`+ if '颜色' in sku and '规格' not in sku:`
	`28`	`+ filter_sku = sku.replace("颜色:", "").strip().replace("(", "").replace(")3条", "").replace("四条装", "").replace("*2", "").replace("2条", "").replace(")", "")`
	`29`	`+ color.extend(filter_sku.split('+'))`
	`30`	`+ elif '尺码' in sku and '~' not in sku:`
	`31`	`+ size.append(sku.replace('尺码:', ""))`
	`32`	`+`
	`33`	`+# 颜色可视化`
	`34`	`+df = pd.DataFrame(color, columns=['color'])`
	`35`	`+analyse_color = df['color'].value_counts()`
	`36`	`+`
	`37`	`+bar = Bar()`
	`38`	`+bar.add_xaxis(analyse_color.index.values.tolist())`
	`39`	`+bar.add_yaxis("", analyse_color.values.tolist())`
	`40`	`+bar.set_global_opts(`
	`41`	`+ xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-90)),`
	`42`	`+ title_opts=opts.TitleOpts(title="颜色分布"),`
	`43`	`+ # datazoom_opts=opts.DataZoomOpts(),`
	`44`	`+)`
	`45`	`+# bar.render_notebook()`
	`46`	`+bar.render('briefs_color.html')`
	`47`	`+`
	`48`	`+`
	`49`	`+# 尺码可视化`
	`50`	`+df2 = pd.DataFrame(size, columns=['size'])`
	`51`	`+analyse_size = df2['size'].value_counts()`
	`52`	`+`
	`53`	`+bar = Bar()`
	`54`	`+bar.add_xaxis(analyse_size.index.values.tolist())`
	`55`	`+bar.add_yaxis("", analyse_size.values.tolist())`
	`56`	`+bar.set_global_opts(`
	`57`	`+ xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=0)),`
	`58`	`+ title_opts=opts.TitleOpts(title="尺寸分布"),`
	`59`	`+ # datazoom_opts=opts.DataZoomOpts(),`
	`60`	`+)`
	`61`	`+bar.render('briefs_size.html')`
	`62`	`+`
	`63`	`+`
	`64`	`+# 评论可视化`
	`65`	`+text = " ".join(comments)`
	`66`	`+def gen_wc_split_text(text='There is no txt', max_words=None, background_color=None,`
	`67`	`+ font_path='/System/Library/Fonts/PingFang.ttc',`
	`68`	`+ output_path='', output_name='',`
	`69`	`+ mask_path=None, mask_name=None,`
	`70`	`+ width=400, height=200, max_font_size=100, axis='off'):`
	`71`	`+ all_seg = jieba.cut(text, cut_all=False)`
	`72`	`+ split_text = ' '`
	`73`	`+ for seg in all_seg:`
	`74`	`+ split_text = split_text + seg + ' '`
	`75`	`+`
	`76`	`+ # 设置一个底图`
	`77`	`+ mask = None`
	`78`	`+ if mask_path is not None:`
	`79`	`+ mask = np.array(Image.open(path.join(mask_path, mask_name)))`
	`80`	`+`
	`81`	`+ wordcloud = WordCloud(background_color=background_color,`
	`82`	`+ mask=mask,`
	`83`	`+ max_words=max_words,`
	`84`	`+ max_font_size=max_font_size,`
	`85`	`+ width=width,`
	`86`	`+ height=height,`
	`87`	`+ # 如果不设置中文字体,可能会出现乱码`
	`88`	`+ font_path=font_path)`
	`89`	`+ myword = wordcloud.generate(str(split_text))`
	`90`	`+ # 展示词云图`
	`91`	`+ plt.imshow(myword)`
	`92`	`+ plt.axis(axis)`
	`93`	`+ plt.show()`
	`94`	`+`
	`95`	`+ # 保存词云图`
	`96`	`+ wordcloud.to_file(path.join(output_path, output_name))`
	`97`	`+`
	`98`	`+gen_wc_split_text(text, output_name='briefs_comments_wc.png', output_path='./')`

`‎xianhuan/yanxuanbriefs/briefs_man.py‎`

Lines changed: 60 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,60 @@`
	`1`	`+#!/usr/bin/env python3`
	`2`	`+# -- coding: utf-8 --`
	`3`	`+"""`
	`4`	`+@author: 闲欢`
	`5`	`+"""`
	`6`	`+import requests`
	`7`	`+import time`
	`8`	`+import json`
	`9`	`+`
	`10`	`+# 获取商品列表`
	`11`	`+def search_keyword(keyword):`
	`12`	`+ uri = 'https://you.163.com/xhr/search/search.json'`
	`13`	`+ query = {`
	`14`	`+ "keyword": keyword,`
	`15`	`+ "page": 1`
	`16`	`+ }`
	`17`	`+ try:`
	`18`	`+ res = requests.get(uri, params=query).json()`
	`19`	`+ result = res['data']['directly']['searcherResult']['result']`
	`20`	`+ product_id = []`
	`21`	`+ for r in result:`
	`22`	`+ product_id.append(r['id'])`
	`23`	`+ return product_id`
	`24`	`+ except:`
	`25`	`+ raise`
	`26`	`+`
	`27`	`+# 获取评论`
	`28`	`+def details(product_id):`
	`29`	`+ url = 'https://you.163.com/xhr/comment/listByItemByTag.json'`
	`30`	`+ try:`
	`31`	`+ C_list = []`
	`32`	`+ for i in range(1, 100):`
	`33`	`+ query = {`
	`34`	`+ "itemId": product_id,`
	`35`	`+ "page": i,`
	`36`	`+ }`
	`37`	`+ res = requests.get(url, params=query).json()`
	`38`	`+ if not res['data']['commentList']:`
	`39`	`+ break`
	`40`	`+ print("爬取第 %s 页评论" % i)`
	`41`	`+ commentList = res['data']['commentList']`
	`42`	`+ C_list.extend(commentList)`
	`43`	`+ time.sleep(1)`
	`44`	`+`
	`45`	`+ return C_list`
	`46`	`+ except:`
	`47`	`+ raise`
	`48`	`+`
	`49`	`+`
	`50`	`+product_id = search_keyword('男士内裤')`
	`51`	`+r_list = []`
	`52`	`+for p in product_id:`
	`53`	`+ r_list.extend(details(p))`
	`54`	`+`
	`55`	`+with open('./briefs.txt', 'w') as f:`
	`56`	`+ for r in r_list:`
	`57`	`+ try:`
	`58`	`+ f.write(json.dumps(r, ensure_ascii=False) + '\n')`
	`59`	`+ except:`
	`60`	`+ print('出错啦')`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit ffed287

File tree

3 files changed

3 files changed

`‎xianhuan/.DS_Store‎`

`‎xianhuan/yanxuanbriefs/briefs_ana.py‎`

`‎xianhuan/yanxuanbriefs/briefs_man.py‎`

0 commit comments