赞
踩
今天修改了情感分析的程序发现之前有一些不足。这个最简单的实现一个string情感分析的小函数,加载了积极词典,消极词典,程度词典,以及一些反转词等的词典。这里我没有做符号的分析和判断,因为的东西暂时用不到,需要的童鞋可以自己添加。。。
由于这里不能放文件所以...我放到网盘啦,,哇哇哇 链接: http://pan.baidu.com/s/1mhDqfWG 密码: vm38
- import jieba
- import cPickle as pickle
- """载入情感词典"""
- g_pos_dict = pickle.load(open("../sentiment_dic/posdict.pkl", "r"))
- g_neg_dict = pickle.load(open("../sentiment_dic/negdict.pkl", "r"))
- g_most_dict = pickle.load(open("../sentiment_dic/mostdict.pkl", "r"))
- g_more_dict = pickle.load(open("../sentiment_dic/moredict.pkl", "r"))
- g_very_dict = pickle.load(open("../sentiment_dic/verydict.pkl", "r"))
- g_ish_dict = pickle.load(open("../sentiment_dic/ishdict.pkl", "r"))
- g_insufficient_dict = pickle.load(open("../sentiment_dic/insufficentdict.pkl", "r"))
- g_inverse_dict = pickle.load(open("../sentiment_dic/inversedict.pkl", "r"))
-
-
- def emotion_test(string):
- """情感分析函数"""
- words = list(jieba.cut(string))
- a = 0 # 记录情感词位置
- poscount = 0 # 积极词的分值
- poscount3 = 0 # 积极词最后分值
- negcount = 0
- negcount3 = 0
- for index, word in enumerate(words):
- word = word.encode("utf8")
- if word in g_pos_dict: # 判断词语是否是积极情感词
- poscount += 1
- c = 0 # 反转词
- for w in words[a:index]:
- w = w.encode("utf8")
- if w in g_most_dict:
- poscount *= 4.0
- elif w in g_very_dict:
- poscount *= 3.0
- elif w in g_more_dict:
- poscount *= 2.0
- elif w in g_ish_dict:
- poscount /= 2.0
- elif w in g_insufficient_dict:
- poscount /= 4.0
- elif w in g_inverse_dict:
- c += 1
- if c % 2 == 1:
- poscount *= -1.0
- poscount3 += poscount
- poscount = 0
- a = index + 1 # 情感词的位置变化
- elif word in g_neg_dict: # 消极情感
- negcount += 1
- d = 0 # 反转词
- for w in words[a:index]:
- w = w.encode("utf8")
- if w in g_most_dict:
- negcount *= 4.0
- elif w in g_very_dict:
- negcount *= 3.0
- elif w in g_more_dict:
- negcount *= 2.0
- elif w in g_ish_dict:
- negcount /= 2.0
- elif w in g_insufficient_dict:
- negcount /= 4.0
- elif w in g_inverse_dict:
- d += 1
- if d % 2 == 1:
- negcount *= -1.0
- negcount3 += negcount
- negcount = 0
- a = index + 1
- if poscount3 <= 0 and negcount3 <= 0:
- t = poscount3
- poscount3 = -negcount3
- negcount3 = -t
- elif poscount3 <= 0 and negcount3 >= 0:
- negcount3 -= poscount3
- poscount3 = 0
- elif poscount3 >= 0 and negcount3 <= 0:
- poscount3 -= negcount3
- negcount3 = 0
- return poscount3, negcount3
-
-
- print emotion_test("我真的非常的烦心")
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。