当前位置:   article > 正文

基于NRC词典的离散情感分析(含有程度词和否定词)_情感词中的带程度词

情感词中的带程度词

NRC词典的加载方法和分词方法之前的文章有写,直接套用即可。

具体函数参考了这篇文章:基于情感词典的文本情感分析(附代码)_Petrichoryi的博客-CSDN博客_bosonnlp情感词典

我相当于是把这篇文章里的正负情感改成了NRC词典中的八种离散情感,进行逐一计算,具体代码如下:

  1. not_word_file = open('E:/data/否定词.txt','r+',encoding='utf-8')
  2. not_word_list = not_word_file.readlines()
  3. not_word_list = [w.strip() for w in not_word_list]
  4. #读取程度副词文件,里面包含表示程度副词程度的数字
  5. degree_file = open('E:/data/程度副词.txt','r+',encoding='utf-8')
  6. degree_list = degree_file.readlines()
  7. degree_list = [item.strip().split(',') for item in degree_list]
  8. print(not_word_list)
  9. print(degree_list)
  1. from collections import defaultdict
  2. def classify_words(word_list,Sentiment):
  3. sen_dict = defaultdict()
  4. for i in Sentiment:
  5. sen_dict[i] = 1 #因为用的是NRC词典中的情感词汇,没有具体的情感程度,都定为1
  6. degree_dict = defaultdict()
  7. for i in degree_list:
  8. degree_dict[i[0]] = i[1]
  9. sen_word = dict()
  10. not_word = dict()
  11. degree_word = dict()
  12. #分类
  13. for i in range(len(word_list)):
  14. word = word_list[i]
  15. if word in sen_dict.keys() and word not in not_word_list and word not in degree_dict.keys():
  16. # 找出分词结果中在情感字典中的词
  17. sen_word[i] = sen_dict[word]
  18. elif word in not_word_list and word not in degree_dict.keys():
  19. # 分词结果中在否定词列表中的词
  20. not_word[i] = -1
  21. elif word in degree_dict.keys():
  22. # 分词结果中在程度副词中的词
  23. degree_word[i] = degree_dict[word]
  24. return sen_word,not_word,degree_word
  1. def score_sentiment(sen_word,not_word,degree_word,seg_result):
  2. #权重初始化为1
  3. W = 1
  4. score = 0
  5. #情感词下标初始化
  6. sentiment_index = -1
  7. #情感词的位置下标集合
  8. sentiment_index_list = list(sen_word.keys())
  9. #遍历分词结果
  10. for i in range(0,len(seg_result)):
  11. #如果是情感词
  12. if i in sen_word.keys():
  13. #权重*情感词得分
  14. score += W*float(sen_word[i])
  15. #情感词下标加一,获取下一个情感词的位置
  16. sentiment_index += 1
  17. if sentiment_index < len(sentiment_index_list)-1:
  18. #判断当前的情感词与下一个情感词之间是否有程度副词或否定词
  19. for j in range(sentiment_index_list[sentiment_index],sentiment_index_list[sentiment_index+1]):
  20. #更新权重,如果有否定词,权重取反
  21. if j in not_word.keys():
  22. W = W*(-1)
  23. elif j in degree_word.keys():
  24. W *= float(degree_word[j])
  25. #定位到下一个情感词
  26. if sentiment_index < len(sentiment_index_list)-1:
  27. i = sentiment_index_list[sentiment_index+1]
  28. return score
  1. def sentiment_score(sentence,Sentiment):
  2. #1.对文档分词
  3. seg_list = seg_depart(sentence) #分词函数之前文章有写
  4. #2.将分词结果转换成字典,找出情感词、否定词和程度副词
  5. sen_word,not_word,degree_word = classify_words(seg_list,Sentiment)
  6. #3.计算得分
  7. score = score_sentiment(sen_word,not_word,degree_word,seg_list)
  8. return score
  1. #将NRC词典中所有离散情感词汇整理成二维列表
  2. Senti_list8 = [Anger,Anticipation,Disgust,Fear,Joy,Sadness,Surprise,Trust]
  1. #将文档中句子的八种情绪结果转换为字典
  2. all_senti = dict()
  3. i = 0
  4. for senti in Senti_list8:
  5. filename = r'E:\data\test1.csv'
  6. senti_count = []
  7. with open(filename, 'r', encoding='UTF-8') as csvfile:
  8. next(csvfile)
  9. reader = csv.reader(csvfile)
  10. for row in reader:
  11. score = sentiment_score(row[0],Sentiment=senti)
  12. senti_count.append(score)
  13. print(senti_count)
  14. all_senti[i]=senti_count
  15. i = i+1
  1. #存为csv
  2. test = pd.DataFrame.from_dict(all_senti)
  3. test.to_csv(r"E:\data\test1_result2.csv")

声明:本文内容由网友自发贡献,转载请注明出处:【wpsshop】
推荐阅读
相关标签
  

闽ICP备14008679号