赞
踩
from sklearn.feature_extraction.text import TfidfVectorizer import jieba # text = ['This is the first document.', 'This is the second second document.', 'And the third one.', # 'Is this the first document?', ] # # tf = TfidfVectorizer(min_df=1) # # X = tf.fit_transform(text) # names = tf.get_feature_names() # print(names) # print(X.toarray()) text = '今天天气真好,我要去北京天安门玩,要去景山攻牙之后,玩完大明劫' # 进行结巴分词,精确模式 text_list = jieba.cut(text, cut_all=False) text_list = ",".join(text_list) context = [] context.append(text_list) print(context) tf = TfidfVectorizer(min_df=1) X = tf.fit_transform(context) names = tf.get_feature_names() print(names) print(X.toarray())
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。