|
| 1 | +#!/usr/bin/env python3 |
| 2 | +# -*- coding: utf-8 -*- |
| 3 | +""" |
| 4 | +@author: 闲欢 |
| 5 | +""" |
| 6 | +import json |
| 7 | +import pandas as pd |
| 8 | +from pyecharts.charts import Bar, Pie |
| 9 | +from pyecharts import options as opts |
| 10 | +import jieba |
| 11 | +from PIL import Image |
| 12 | +from wordcloud import WordCloud |
| 13 | +from matplotlib import pyplot as plt |
| 14 | +import numpy as np |
| 15 | +from os import path |
| 16 | + |
| 17 | +color = [] |
| 18 | +size = [] |
| 19 | +comments = [] |
| 20 | + |
| 21 | +with open("briefs.txt", "r", encoding="utf-8") as f: |
| 22 | + for line in f: |
| 23 | + data_obj = json.loads(line) |
| 24 | + comments.append(data_obj['content']) |
| 25 | + skuinfo = data_obj['skuInfo'] |
| 26 | + for sku in skuinfo: |
| 27 | + if '颜色' in sku and '规格' not in sku: |
| 28 | + filter_sku = sku.replace("颜色:", "").strip().replace("(", "").replace(")3条", "").replace("四条装", "").replace("*2", "").replace("2条", "").replace(")", "") |
| 29 | + color.extend(filter_sku.split('+')) |
| 30 | + elif '尺码' in sku and '~' not in sku: |
| 31 | + size.append(sku.replace('尺码:', "")) |
| 32 | + |
| 33 | +# 颜色可视化 |
| 34 | +df = pd.DataFrame(color, columns=['color']) |
| 35 | +analyse_color = df['color'].value_counts() |
| 36 | + |
| 37 | +bar = Bar() |
| 38 | +bar.add_xaxis(analyse_color.index.values.tolist()) |
| 39 | +bar.add_yaxis("", analyse_color.values.tolist()) |
| 40 | +bar.set_global_opts( |
| 41 | + xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-90)), |
| 42 | + title_opts=opts.TitleOpts(title="颜色分布"), |
| 43 | + # datazoom_opts=opts.DataZoomOpts(), |
| 44 | +) |
| 45 | +# bar.render_notebook() |
| 46 | +bar.render('briefs_color.html') |
| 47 | + |
| 48 | + |
| 49 | +# 尺码可视化 |
| 50 | +df2 = pd.DataFrame(size, columns=['size']) |
| 51 | +analyse_size = df2['size'].value_counts() |
| 52 | + |
| 53 | +bar = Bar() |
| 54 | +bar.add_xaxis(analyse_size.index.values.tolist()) |
| 55 | +bar.add_yaxis("", analyse_size.values.tolist()) |
| 56 | +bar.set_global_opts( |
| 57 | + xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=0)), |
| 58 | + title_opts=opts.TitleOpts(title="尺寸分布"), |
| 59 | + # datazoom_opts=opts.DataZoomOpts(), |
| 60 | +) |
| 61 | +bar.render('briefs_size.html') |
| 62 | + |
| 63 | + |
| 64 | +# 评论可视化 |
| 65 | +text = " ".join(comments) |
| 66 | +def gen_wc_split_text(text='There is no txt', max_words=None, background_color=None, |
| 67 | + font_path='/System/Library/Fonts/PingFang.ttc', |
| 68 | + output_path='', output_name='', |
| 69 | + mask_path=None, mask_name=None, |
| 70 | + width=400, height=200, max_font_size=100, axis='off'): |
| 71 | + all_seg = jieba.cut(text, cut_all=False) |
| 72 | + split_text = ' ' |
| 73 | + for seg in all_seg: |
| 74 | + split_text = split_text + seg + ' ' |
| 75 | + |
| 76 | + # 设置一个底图 |
| 77 | + mask = None |
| 78 | + if mask_path is not None: |
| 79 | + mask = np.array(Image.open(path.join(mask_path, mask_name))) |
| 80 | + |
| 81 | + wordcloud = WordCloud(background_color=background_color, |
| 82 | + mask=mask, |
| 83 | + max_words=max_words, |
| 84 | + max_font_size=max_font_size, |
| 85 | + width=width, |
| 86 | + height=height, |
| 87 | + # 如果不设置中文字体,可能会出现乱码 |
| 88 | + font_path=font_path) |
| 89 | + myword = wordcloud.generate(str(split_text)) |
| 90 | + # 展示词云图 |
| 91 | + plt.imshow(myword) |
| 92 | + plt.axis(axis) |
| 93 | + plt.show() |
| 94 | + |
| 95 | + # 保存词云图 |
| 96 | + wordcloud.to_file(path.join(output_path, output_name)) |
| 97 | + |
| 98 | +gen_wc_split_text(text, output_name='briefs_comments_wc.png', output_path='./') |
0 commit comments