赞
踩
pyhanlp地址:https://github.com/hankcs/pyhanlp
pip安装完后,import pyhanlpy会自动下载一个.hanlp
文件到C:\ProgramData
地址下
如果因为国内原因下载不下来的话,可以在这里下载:.hanlp下载地址
from pyhanlp import *
# 关键词提取
def extractKeyword(document, keyword_num=2):
return HanLP.extractKeyword(document, keyword_num)
if __name__ == '__main__':
document = ''
# 返回list形式
keywords = list(extractKeyword(document keyword_num=2))
# 若需要进一步处理变成一个str,则用下面的内容
keywords = '+'.join(keywords)
其他方法
from pyhanlp import * # 自动摘要 def extractSummary(document, summary_num=1): return HanLP.extractSummary(document, summary_num) # 切词 def segment(document): return HanLP.segment(document) # 关键词提取 def extractKeyword(document, keyword_num=2): return HanLP.extractKeyword(document, keyword_num) # 短语提取 def extractPhrase(document, phrase_num=2): return HanLP.extractPhrase(document, phrase_num) def extractPhrase_forSearch(document, phrase_num=2): return HanLP.extractPhrase(document, phrase_num) # 自动摘要+关键词提取 def summary_and_keyword(document, summary_num=1, keyword_num=2, ifsummary=True): if ifsummary: document = str(extractSummary(document, summary_num=summary_num)) document = document.replace('[','') document = document.replace(']', '') if len(document)>=10: keywords = list(extractKeyword(document, keyword_num=keyword_num)) keywords = '+'.join(keywords) return keywords else: return document
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。