赞
踩
import jieba import jieba.analyse from tqdm import tqdm from sklearn.feature_extraction.text import TfidfVectorizer # data : 文本list # jieba 关键词提取 def key_words1(data): text = ' '.join(data) kwds = jieba.analyse.extract_tags(text, topK=20, withWeight=True) for i in kwds: print(i) # sklearn 关键词提取 def key_words2(data): data = [' '.join(jieba.lcut(_)) for _ in tqdm(data)] vectorizer = TfidfVectorizer(ngram_range=(1, 1)) vct = vectorizer.fit(data) vocabs = vct.get_feature_names() text = [' '.join(data)] result = vct.transform(text) result = [(i, j) for i,j in zip(result.indices, result.data)] result = sorted(result, key=lambda x: x[1], reverse=True) for i in result[:20]: print(vocabs[i[0]], i[1])
赞
踩
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。