赞
踩
本项目前期用jieba分词然后用wordcloud绘制词云图
环境python3.8 使用IDE为pycharm
import numpy as np
from PIL import Image
from matplotlib import pyplot as plt
from wordcloud import WordCloud
import pandas as pd
import jieba
text=pd.read_csv('tongjifenxi-zhihu.csv')
with open('中文停用词.txt','r', encoding='utf-8') as f:
#print(f.readlines())
stopwords=f
stop=[x.strip() for x in f.readlines()]#
dic={}
for i in text['zhaiyao']:
for st in [' ',')',',','。','—','、','的','年','为',',']:#针对性去停用词
i=i.replace(st,'')
words=jieba.lcut(i)#返回列表
#print(words)
for i in words:
if i in stop:
words.remove(i)#按元素删除
#print(words)
for n in words:
dic[n]=dic.get(n,0)+1
text的zhaiyao列为纯文本,展示读取text.zhaiyao[0]第一行内容:
最后处理为:
img=Image.open(r'rio.png')#打开背景图 自定义
font = r'C:\Windows\Fonts\FZSTK.TTF'
img_array = np.array(img) #将图片装换为数组
wc = WordCloud(
background_color='white',
width=1000,
height=800,
mask=img_array ,#设置背景图片
#stopwords=stop,
font_path=font
)
wc.generate_from_frequencies(dic)
plt.imshow(wc)
plt.axis('off')#隐藏坐标轴
plt.show() #显示图片
分享几个底板背景图给大家:
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。