赞
踩
数据结构:搜狗数据集(最外层文件夹)
- import os
- import jieba
-
- # 保存文件的函数
- def savefile(savepath, content):
- fp = open(savepath, 'w', encoding='ANSI',errors='ignore')
- fp.write(content)
- fp.close()
-
- # 读取文件的函数
- def readfile(path):
- fp = open(path, "r", encoding='ANSI', errors='ignore')
- content = fp.read()
- fp.close()
- return content
-
- ## 去除停用词的2个函数
- # 创建停用词list
- def stopwordslist(filepath):
- stopwords = [line.strip() for line in open(filepath, 'r', encoding='utf-8').readlines()]
- return stopwords
-
- # 对句子去除停用词
- def movestopwords(sentence):
- s
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。