Commit 275e40d

committed

提交代码

1 parent fbf22ef commit 275e40dCopy full SHA for 275e40d

File tree

4 files changed

+184

-0

lines changed

.DS_Store
xianhuan
- .DS_Store
- yanxuanbra
  - ana.py
  - bra.py

4 files changed

+184

-0

lines changed

`‎.DS_Store‎`

0 Bytes

Binary file not shown.

`‎xianhuan/.DS_Store‎`

6 KB

Binary file not shown.

`‎xianhuan/yanxuanbra/ana.py‎`

Lines changed: 124 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,124 @@`
	`1`	`+#!/usr/bin/env python3`
	`2`	`+# -- coding: utf-8 --`
	`3`	`+"""`
	`4`	`+@author: 闲欢`
	`5`	`+"""`
	`6`	`+import json`
	`7`	`+import pandas as pd`
	`8`	`+from pyecharts.charts import Bar, Pie`
	`9`	`+from pyecharts import options as opts`
	`10`	`+import jieba`
	`11`	`+from PIL import Image`
	`12`	`+from wordcloud import WordCloud`
	`13`	`+from matplotlib import pyplot as plt`
	`14`	`+import numpy as np`
	`15`	`+from os import path`
	`16`	`+`
	`17`	`+size = ['XXL', 'XL', 'XS', 'S', 'M', 'L']`
	`18`	`+`
	`19`	`+color = []`
	`20`	`+size1 = []`
	`21`	`+size2 = []`
	`22`	`+comments = []`
	`23`	`+`
	`24`	`+with open("comments.txt", "r", encoding="utf-8") as f:`
	`25`	`+ for line in f:`
	`26`	`+ data_obj = json.loads(line)`
	`27`	`+ comments.append(data_obj['content'])`
	`28`	`+ skuinfo = data_obj['skuInfo']`
	`29`	`+ # skuArr = skuinfo.split(",")`
	`30`	`+ for sku in skuinfo:`
	`31`	`+ if '颜色' in sku and '内裤' not in sku:`
	`32`	`+ color.append(sku.replace("颜色:", "").strip().replace("开扣", "").replace("套头", "").replace("文胸", "").replace("套装", "").replace("(薄杯)", "").replace("(厚杯)", ""))`
	`33`	`+ elif '尺码' in sku:`
	`34`	`+ is_size1 = False`
	`35`	`+ for s in size:`
	`36`	`+ if s in sku:`
	`37`	`+ is_size1 = True`
	`38`	`+ size1.append(s)`
	`39`	`+ break`
	`40`	`+`
	`41`	`+ # 非SML这种定义尺寸的,就是简单罩杯定义的,同时去掉"适合75ABCD"这种定义的`
	`42`	`+ if not is_size1 and '适合' not in sku:`
	`43`	`+ size2.append(sku.replace('尺码:', ""))`
	`44`	`+`
	`45`	`+# 颜色可视化`
	`46`	`+df = pd.DataFrame(color, columns=['color'])`
	`47`	`+analyse_color = df['color'].value_counts()`
	`48`	`+`
	`49`	`+bar = Bar()`
	`50`	`+bar.add_xaxis(analyse_color.index.values.tolist())`
	`51`	`+bar.add_yaxis("", analyse_color.values.tolist())`
	`52`	`+bar.set_global_opts(`
	`53`	`+ xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-90)),`
	`54`	`+ title_opts=opts.TitleOpts(title="颜色分布"),`
	`55`	`+ # datazoom_opts=opts.DataZoomOpts(),`
	`56`	`+)`
	`57`	`+# bar.render_notebook()`
	`58`	`+bar.render('color.html')`
	`59`	`+`
	`60`	`+`
	`61`	`+# 尺码可视化`
	`62`	`+df2 = pd.DataFrame(size1, columns=['size'])`
	`63`	`+analyse_size = df2['size'].value_counts()`
	`64`	`+`
	`65`	`+bar = Bar()`
	`66`	`+bar.add_xaxis(analyse_size.index.values.tolist())`
	`67`	`+bar.add_yaxis("", analyse_size.values.tolist())`
	`68`	`+bar.set_global_opts(`
	`69`	`+ xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=0)),`
	`70`	`+ title_opts=opts.TitleOpts(title="尺寸分布"),`
	`71`	`+ # datazoom_opts=opts.DataZoomOpts(),`
	`72`	`+)`
	`73`	`+bar.render('size1.html')`
	`74`	`+`
	`75`	`+df2 = pd.DataFrame(size2, columns=['size'])`
	`76`	`+analyse_size = df2['size'].value_counts()`
	`77`	`+`
	`78`	`+bar = Bar()`
	`79`	`+bar.add_xaxis(analyse_size.index.values.tolist())`
	`80`	`+bar.add_yaxis("", analyse_size.values.tolist())`
	`81`	`+bar.set_global_opts(`
	`82`	`+ xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=0)),`
	`83`	`+ title_opts=opts.TitleOpts(title="尺寸分布"),`
	`84`	`+ # datazoom_opts=opts.DataZoomOpts(),`
	`85`	`+)`
	`86`	`+bar.render('size2.html')`
	`87`	`+`
	`88`	`+`
	`89`	`+`
	`90`	`+# 评论可视化`
	`91`	`+text = " ".join(comments)`
	`92`	`+def gen_wc_split_text(text='There is no txt', max_words=None, background_color=None,`
	`93`	`+ font_path='/System/Library/Fonts/PingFang.ttc',`
	`94`	`+ output_path='', output_name='',`
	`95`	`+ mask_path=None, mask_name=None,`
	`96`	`+ width=400, height=200, max_font_size=100, axis='off'):`
	`97`	`+ all_seg = jieba.cut(text, cut_all=False)`
	`98`	`+ split_text = ' '`
	`99`	`+ for seg in all_seg:`
	`100`	`+ split_text = split_text + seg + ' '`
	`101`	`+`
	`102`	`+ # 设置一个底图`
	`103`	`+ mask = None`
	`104`	`+ if mask_path is not None:`
	`105`	`+ mask = np.array(Image.open(path.join(mask_path, mask_name)))`
	`106`	`+`
	`107`	`+ wordcloud = WordCloud(background_color=background_color,`
	`108`	`+ mask=mask,`
	`109`	`+ max_words=max_words,`
	`110`	`+ max_font_size=max_font_size,`
	`111`	`+ width=width,`
	`112`	`+ height=height,`
	`113`	`+ # 如果不设置中文字体,可能会出现乱码`
	`114`	`+ font_path=font_path)`
	`115`	`+ myword = wordcloud.generate(str(split_text))`
	`116`	`+ # 展示词云图`
	`117`	`+ plt.imshow(myword)`
	`118`	`+ plt.axis(axis)`
	`119`	`+ plt.show()`
	`120`	`+`
	`121`	`+ # 保存词云图`
	`122`	`+ wordcloud.to_file(path.join(output_path, output_name))`
	`123`	`+`
	`124`	`+gen_wc_split_text(text, output_name='comments_wc.png', output_path='./')`

`‎xianhuan/yanxuanbra/bra.py‎`

Lines changed: 60 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,60 @@`
	`1`	`+#!/usr/bin/env python3`
	`2`	`+# -- coding: utf-8 --`
	`3`	`+"""`
	`4`	`+@author: 闲欢`
	`5`	`+"""`
	`6`	`+import requests`
	`7`	`+import time`
	`8`	`+import json`
	`9`	`+`
	`10`	`+# 获取产品列表`
	`11`	`+def search_keyword(keyword):`
	`12`	`+ uri = 'https://you.163.com/xhr/search/search.json'`
	`13`	`+ query = {`
	`14`	`+ "keyword": keyword,`
	`15`	`+ "page": 1`
	`16`	`+ }`
	`17`	`+ try:`
	`18`	`+ res = requests.get(uri, params=query).json()`
	`19`	`+ result = res['data']['directly']['searcherResult']['result']`
	`20`	`+ product_id = []`
	`21`	`+ for r in result:`
	`22`	`+ product_id.append(r['id'])`
	`23`	`+ return product_id`
	`24`	`+ except:`
	`25`	`+ raise`
	`26`	`+`
	`27`	`+# 获取评论`
	`28`	`+def details(product_id):`
	`29`	`+ url = 'https://you.163.com/xhr/comment/listByItemByTag.json'`
	`30`	`+ try:`
	`31`	`+ C_list = []`
	`32`	`+ for i in range(1, 100):`
	`33`	`+ query = {`
	`34`	`+ "itemId": product_id,`
	`35`	`+ "page": i,`
	`36`	`+ }`
	`37`	`+ res = requests.get(url, params=query).json()`
	`38`	`+ if not res['data']['commentList']:`
	`39`	`+ break`
	`40`	`+ print("爬取第 %s 页评论" % i)`
	`41`	`+ commentList = res['data']['commentList']`
	`42`	`+ C_list.extend(commentList)`
	`43`	`+ time.sleep(1)`
	`44`	`+`
	`45`	`+ return C_list`
	`46`	`+ except:`
	`47`	`+ raise`
	`48`	`+`
	`49`	`+`
	`50`	`+product_id = search_keyword('文胸')`
	`51`	`+r_list = []`
	`52`	`+for p in product_id:`
	`53`	`+ r_list.extend(details(p))`
	`54`	`+`
	`55`	`+with open('./comments.txt', 'w') as f:`
	`56`	`+ for r in r_list:`
	`57`	`+ try:`
	`58`	`+ f.write(json.dumps(r, ensure_ascii=False) + '\n')`
	`59`	`+ except:`
	`60`	`+ print('出错啦')`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit 275e40d

File tree

4 files changed

4 files changed

`‎.DS_Store‎`

`‎xianhuan/.DS_Store‎`

`‎xianhuan/yanxuanbra/ana.py‎`

`‎xianhuan/yanxuanbra/bra.py‎`

0 commit comments