赞
踩
早年学习时按原理写的Tf-idf关键词提取
## TF-idf 关键词提取 import jieba import jieba.analyse import re from collections import Counter from numpy import * def load_stop_words(): global stopwords with open("data/stop_word.txt", "r", encoding="utf-8") as f: stopwords = f.readlines() for i in range(len(stopwords)): stopwords[i] = stopwords[i].replace("\n", "") def jieba_tf_idf_extract_keyword(corpus_list): for corpus in corpus_list: corpus = re.sub(r'[^\w\s]', '', corpus) corpus = re.sub(r'[0-9]', '', corpus) keyword = [] for i in range(len(corpus_list)): keyword.append([]) keywords = jieba.analyse.extract_tags(corpus_list[i], topK=20, withWeight=True, allowPOS=('n', 'nr', 'ns')) keyword[i].append
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。