赞
踩
NRC词典的加载方法和分词方法之前的文章有写,直接套用即可。
具体函数参考了这篇文章:基于情感词典的文本情感分析(附代码)_Petrichoryi的博客-CSDN博客_bosonnlp情感词典
- not_word_file = open('E:/data/否定词.txt','r+',encoding='utf-8')
- not_word_list = not_word_file.readlines()
- not_word_list = [w.strip() for w in not_word_list]
- #读取程度副词文件,里面包含表示程度副词程度的数字
- degree_file = open('E:/data/程度副词.txt','r+',encoding='utf-8')
- degree_list = degree_file.readlines()
- degree_list = [item.strip().split(',') for item in degree_list]
- print(not_word_list)
- print(degree_list)
- from collections import defaultdict
- def classify_words(word_list,Sentiment):
- sen_dict = defaultdict()
- for i in Sentiment:
- sen_dict[i] = 1 #因为用的是NRC词典中的情感词汇,没有具体的情感程度,都定为1
-
- degree_dict = defaultdict()
- for i in degree_list:
- degree_dict[i[0]] = i[1]
-
- sen_word = dict()
- not_word = dict()
- degree_word = dict()
- #分类
- for i in range(len(word_list)):
- word = word_list[i]
- if word in sen_dict.keys() and word not in not_word_list and word not in degree_dict.keys():
- # 找出分词结果中在情感字典中的词
- sen_word[i] = sen_dict[word]
- elif word in not_word_list and word not in degree_dict.keys():
- # 分词结果中在否定词列表中的词
- not_word[i] = -1
- elif word in degree_dict.keys():
- # 分词结果中在程度副词中的词
- degree_word[i] = degree_dict[word]
- return sen_word,not_word,degree_word
data:image/s3,"s3://crabby-images/deb9d/deb9d52e6c78f73fbfaadc6e519fd00d286664e1" alt=""
- def score_sentiment(sen_word,not_word,degree_word,seg_result):
- #权重初始化为1
- W = 1
- score = 0
- #情感词下标初始化
- sentiment_index = -1
- #情感词的位置下标集合
- sentiment_index_list = list(sen_word.keys())
- #遍历分词结果
- for i in range(0,len(seg_result)):
- #如果是情感词
- if i in sen_word.keys():
- #权重*情感词得分
- score += W*float(sen_word[i])
- #情感词下标加一,获取下一个情感词的位置
- sentiment_index += 1
- if sentiment_index < len(sentiment_index_list)-1:
- #判断当前的情感词与下一个情感词之间是否有程度副词或否定词
- for j in range(sentiment_index_list[sentiment_index],sentiment_index_list[sentiment_index+1]):
- #更新权重,如果有否定词,权重取反
- if j in not_word.keys():
- W = W*(-1)
- elif j in degree_word.keys():
- W *= float(degree_word[j])
- #定位到下一个情感词
- if sentiment_index < len(sentiment_index_list)-1:
- i = sentiment_index_list[sentiment_index+1]
- return score
data:image/s3,"s3://crabby-images/deb9d/deb9d52e6c78f73fbfaadc6e519fd00d286664e1" alt=""
- def sentiment_score(sentence,Sentiment):
- #1.对文档分词
- seg_list = seg_depart(sentence) #分词函数之前文章有写
- #2.将分词结果转换成字典,找出情感词、否定词和程度副词
- sen_word,not_word,degree_word = classify_words(seg_list,Sentiment)
- #3.计算得分
- score = score_sentiment(sen_word,not_word,degree_word,seg_list)
- return score
- #将NRC词典中所有离散情感词汇整理成二维列表
- Senti_list8 = [Anger,Anticipation,Disgust,Fear,Joy,Sadness,Surprise,Trust]
- #将文档中句子的八种情绪结果转换为字典
- all_senti = dict()
- i = 0
- for senti in Senti_list8:
- filename = r'E:\data\test1.csv'
- senti_count = []
- with open(filename, 'r', encoding='UTF-8') as csvfile:
- next(csvfile)
- reader = csv.reader(csvfile)
- for row in reader:
- score = sentiment_score(row[0],Sentiment=senti)
- senti_count.append(score)
- print(senti_count)
- all_senti[i]=senti_count
- i = i+1
- #存为csv
- test = pd.DataFrame.from_dict(all_senti)
- test.to_csv(r"E:\data\test1_result2.csv")
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。