赞
踩
源代码:点击此处下载
from wordcloud import WordCloud from wordcloud import ImageColorGenerator from PIL import Image import numpy as np txt = "Python Java C++ JavaScript PHP Ruby Swift Kotlin Go Rust" # 示例文本 # 创建词云对象,配置参数 c = WordCloud( width=400, height=400, font_path=None, repeat=True, # 表示单词是否可以重复出现 mask=np.array(Image.open("love.png")), # 给词云生成轮廓用的参数,不需要做成轮廓的背景应为白色 background_color="white", # 背景颜色 max_words=150 # 词云的最大单词数 ) c.generate(txt) # 指定词云的文本文件 c.to_file("词云图.jpg") # 将生成的词云文件输入到一个文件中
在使用wordcloud库的时候,运行报错:ValueError: Only supported for TrueType fonts
解决方案:去升级pillow库!!!
import jieba
s = "中国是一个伟大的国家"
s1 = jieba.lcut(s) # 精确模式
print(s1)
s2 = jieba.lcut(s, cut_all=True) # 全模式
print(s2)
s3 = jieba.lcut_for_search(s) # 搜索引擎模式
print(s3)
# jieba.add_word(str) 向分词词典添加新词 str
from wordcloud import WordCloud from wordcloud import ImageColorGenerator from PIL import Image import numpy as np from jieba import * def getText(filename): text = open("{}".format(filename), encoding='utf-8').read() sign = '''!~·@¥……*“”‘’\n(){}【】;:"'「,」。-、?\u3000\ufeff''' for ch in sign: # 特殊符号替换成空格 text = text.replace(ch, ' ') return text def wordCount(text, N): words = lcut(text) # 精确分词 counts = {} # 字典类型 for word in words: if len(word) < 2: continue if word not in counts: counts[word] = 0 else: counts[word] += 1 temp = sorted(counts.items(), key=lambda d: d[1], reverse=True) # 按计数值逆序排序 result = dict(temp[1:N+1]) return result def drawWordCloud(data, N): # 创建词云对象,配置参数 c = WordCloud( width=400, height=400, font_path="C:\Windows\Fonts\STXINGKA.TTF", # 设置字体 repeat=False, # 表示单词是否可以重复出现 mask=np.array(Image.open("三国.png")), # 给词云生成轮廓用的参数,不需要做成轮廓的背景应为白色 background_color="white", # 背景颜色 max_words=N # 词云的最大单词数 ) result_str = ' '.join(data.keys()) c.generate(result_str) # 指定词云的文本文件 c.to_file("自制结果1.jpg") # 将生成的词云文件输入到一个文件中 if __name__ == '__main__': N = 200 text = getText('三国演义utf8.txt') result = wordCount(text, N) drawWordCloud(result, N)
运行结果:
十个名字:刘备、赵云、关羽、周瑜、曹操、孔明、孙权、司马懿、张飞、吕布
from wordcloud import WordCloud from wordcloud import ImageColorGenerator from PIL import Image import numpy as np from jieba import * def getText(filename): text = open("{}".format(filename), encoding='utf-8').read() sign = '''!~·@¥……*“”‘’\n(){}【】;:"'「,」。-、?\u3000\ufeff''' for ch in sign: # 特殊符号替换成空格 text = text.replace(ch, ' ') return text def wordCount(text, N): words = lcut(text) # 精确分词 counts = {} # 字典类型 name = ["刘备", "赵云", "关羽", "周瑜", "曹操", "孔明", "孙权", "司马懿", "张飞", "吕布"] for word in words: if word in name: if word not in counts: counts[word] = 0 else: counts[word] += 1 else: continue temp = sorted(counts.items(), key=lambda d: d[1], reverse=True) # 按计数值逆序排序 result = dict(temp[:N]) return result def drawWordCloud(data, N): # 创建词云对象,配置参数 c = WordCloud( width=400, height=400, font_path="C:\Windows\Fonts\STXINGKA.TTF", # 设置字体 repeat=False, # 表示单词是否可以重复出现 mask=np.array(Image.open("love.png")), # 给词云生成轮廓用的参数,不需要做成轮廓的背景应为白色 background_color="white", # 背景颜色 max_words=N # 词云的最大单词数 ) result_str = ' '.join(data.keys()) c.generate(result_str) # 指定词云的文本文件 c.to_file("自制结果2.jpg") # 将生成的词云文件输入到一个文件中 if __name__ == '__main__': text = getText('三国演义utf8.txt') result = wordCount(text, 10) drawWordCloud(result, 10)
运行结果:
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。