赞
踩
- import re
- from openpyxl import Workbook
- import warnings
- warnings.filterwarnings('ignore')
-
- wb = Workbook()
- ws1 = wb.create_sheet('词频统计')
- ws1['A1'] = '排序'
- ws1['B1'] = '单词'
- ws1['C1'] = '词频'
- wb.save('./斯宾塞自传词频统计.xlsx')
- print("词频工作表创建好啦!")
-
- txt = open('spencer.txt',errors='ignore').read().lower()
- txt = re.sub(r'[^a-zA-Z]',' ', txt)
- words = txt.split()
- print("文本单词处理好啦!")
-
- stop_words = open('stop_words.txt').read()
- stop_words = re.sub(r'[^a-zA-Z]',' ', stop_words)
- stop_words = stop_words.split()
- print("停用词表处理好啦!")
-
-
- words = [x for x in words if x not in stop_words]
- print("已经从文本中排除停用词啦!")
-
- counts = {}
- for word in words:
- if word in counts:
- counts[word] = counts[word] + 1
- elif word not in counts:
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。