赞
踩
最近做了一个分析国外读者对红楼梦评价的小项目。这部分是使用textblob库对评论进行情感分析,得到情感值,并且进行分类,生成词云。
生成直方图、条形图的数据分析过程见我的这篇文章
读入的数据是这样的格式。包含两行,一行评论,一行url来源。
生成的结果是这样的result.csv文件
词云图片:
代码如下
from textblob import TextBlob from wordcloud import WordCloud import pandas as pd import numpy as np import csv from os import listdir def getComments(filename): # 获取评论列表、评论中所有的单词,以空格分隔 comments = np.zeros(0) words = '' com_file = pd.read_csv(filename) comments = np.append(comments, com_file['comment']) for each in comments: words += each replace_list = [',', '.', '\'', '\"'] for each in replace_list: words = words.replace(each, ' ') return comments, words def getWordCloud(text_str, picture_name): # 生成词云 wordcloud = WordCloud(background_color="white",width=1980, height=1080, margin=2, random_state=0).generate(text_str) wordcloud.to_file(picture_name) def get_p_or_n(comments): # 获取情绪极化评分,并划定阈值确定是积极、消极或中立 with open('result.csv', 'w', encoding='utf-8') as csvfile: id = 0 writer = csv.writer(csvfile) writer.writerow(['id', 'result', 'score', 'comment']) with open('samples.csv', 'w', encoding='utf-8') as samples_file: writer_samples = csv.writer(samples_file) writer_samples.writerow(['id', 'result', 'score', 'OurJudge', 'comment']) for each in comments: judge = TextBlob(each) # print(each) result = '' score = judge.sentiment.polarity if score > 0.05: result = '积极' elif score < -0.03: result = '消极' else: result = '中立' id += 1 writer.writerow([id, result, score, each]) if id%5 == 0: writer_samples.writerow([id, result, score, '', each]) def main(): filename = "comments.csv" comments, words = getComments(filename) print(len(comments)) getWordCloud(words, "WordCloud.png") get_p_or_n(comments) if __name__ == "__main__": main()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。