当前位置:   article > 正文

Python情感分析_python情感分析数据集csdn

python情感分析数据集csdn
# !/usr/bin/python3
# -*- coding:utf-8 -*-
"""
@author: JHC000abc@gmail.com
@file: test.py
@time: 2023/12/6 13:45 
@desc: 

"""
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB


class SklearnEmotion(object):
    """
    基于 Sklearn 的情感分析器
    """
    def __init__(self,file_pos,file_neg):
        self.vectorizer = None
        self.train_vectors = None
        self.clf = None
        self.file_pos = file_pos
        self.file_neg = file_neg
        self.process()

    def read_train_data(self):
        """
        读取训练数据,拼接 x_train y_train
        :param file_pos:
        :param file_neg:
        :return:
        """
        pos_data = []
        neg_data = []
        with open(self.file_pos, "r", encoding='utf-8')as pos, \
                open(self.file_neg, "r", encoding='utf-8')as neg:
            for i, j in zip(pos, neg):
                # print(i,j)
                pos_data.append(i.strip())
                neg_data.append(j.strip())

        x_train = pos_data + neg_data
        pos_lis = ["积极" for _ in pos_data]
        neg_lis = ["消极" for _ in neg_data]
        y_train = pos_lis + neg_lis

        return x_train,y_train

    def train_data(self,x_train,y_train):
        """
        训练模型
        :param x_train:
        :param y_train:
        :return:
        """
        self.vectorizer = CountVectorizer()
        self.train_vectors = self.vectorizer.fit_transform(x_train)

        # 训练朴素贝叶斯分类器
        self.clf = MultinomialNB()
        self.clf.fit(self.train_vectors, y_train)

    def process(self):
        """
        读取数据 训练模型
        :return:
        """
        x_train, y_train = self.read_train_data()
        self.train_data(x_train, y_train)
        
    def get_result(self,sentens_list):
        """
        获取结果
        :param sentens_list:
        :return:
        """
        test_vectors = self.vectorizer.transform(sentens_list)
        predictions = self.clf.predict(test_vectors)
        for i, prediction in enumerate(predictions):
            yield {sentens_list[i]: prediction}




if __name__ == '__main__':
    file_pos = R"C:\Users\v_jiaohaicheng\Downloads\pos_all_dict.txt"
    file_neg = R"C:\Users\v_jiaohaicheng\Downloads\neg_all_dict.txt"
    se = SklearnEmotion(file_pos,file_neg)
    for i in se.get_result(["我喜欢你","脑残","大傻叉"]):
        print(i)

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91

file_pos = R"C:\Users\v_jiaohaicheng\Downloads\pos_all_dict.txt"
file_neg = R"C:\Users\v_jiaohaicheng\Downloads\neg_all_dict.txt"

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/码创造者/article/detail/893826
推荐阅读
相关标签
  

闽ICP备14008679号