赞
踩
import jieba #1.获取文本 with open('book.txt','r',encoding='UTF-8') as fp: txt = fp.read() #2.分词 words = jieba.lcut(txt) # print(words) #3.过滤文本 for ch in ',。?!“ ” ;:、》《\n \u3000': if ch in words: for i in range(words.count(ch)): words.remove(ch) # print(words) #4.统计词语及个数 counts = {} for word in words: if(len(word)>=2): counts[word] = counts.get(word,0) + 1 #5.排序输出 items = list(counts.items())#items()将字典的键值对以元组的形式打包,方便后续进行sort()排序,将元组转化为列表类 print(items) items.sort(key=lambda x:x[1],reverse=True) for i in range(20): word,count=items[i] print('{0:^4}\t{1:^3}'.format(word,count))#冒号前的0和1表示输出值的顺序,冒号后的<表示左对齐,>右对齐,数字代表宽度
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。