赞
踩
Sklearn库的学习之TF-IDF算法:
- # coding:utf-8
- import jieba
- import jieba.posseg as pseg
- import os
- import sys
- from sklearn import feature_extraction
- from sklearn.feature_extraction.text import TfidfTransformer
- from sklearn.feature_extraction.text import CountVectorizer
- one = open(r'one.txt',encoding = "utf-8")
- onee = list(one)
- two = open(r'two.txt',encoding = "utf-8")
- twoo = list(two)
- three = open(r'three.txt',encoding = "utf-8")
- threee = list(three)
- four = open(r'four.txt',encoding = "utf-8")
- fourr = list(four)
- five = open(r'five.txt',encoding = "utf-8")
- fivee = list(five)
- six = open(r'six.txt',encoding = "utf-8")
- sixx = list(six)
- one.close()
- two.close()
- three.close()
- if __name__ == "__main__":
- corpus= onee + twoo + threee + fourr + fivee

Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。