赞
踩
要做一个项目要用到中文文本情感分析,查找了多种资料,在网上看了很多博客后,终于完成,对自己帮助最大的两篇博客为【python机器学习】中文情感分析和 Python开发之Sklearn的模型和CountVectorizer Transformer保存和使用中模型的加载与保存,之后又在之前手写数字识别中采用svm、决策树、朴素贝叶斯、knn等算法分别训练模型,采用了data1.csv作为数据集
import pickle from sklearn import svm # 离散型朴素贝叶斯 from sklearn.naive_bayes import MultinomialNB import os import joblib import jieba import numpy as np import pandas as pd import jieba from sklearn.neighbors import KNeighborsClassifier from sklearn.tree import DecisionTreeClassifier from snownlp import SnowNLP from sklearn.model_selection import train_test_split from sklearn.feature_extraction.text import CountVectorizer def make_label(star): if star > 3: return 1 else: return 0 def snow_result(comment): s = SnowNLP(comment) if s.sentiments >= 0.5: return 1 else: return 0 # jieba分词 def chinese_word_cut(mytext): return " ".join(jieba.cut(mytext)) def get_custom_stopwords(stop_words_file): with open(stop_words_file, 'r', encoding='UTF-8') as f: stopwords = f.read() stopwords_list = stopwords.split('\n') custom_stopwords_list = [i for i in stopwords_list] return custom_stopwords_list def nb_mode_train(x_train,y_train,Vectorizer): nb = MultinomialNB() nb.fit(x_train, y_train) # 创建文件目录 dirs = 'testModel' if not os.path.exists(dirs): os.makedirs(dirs)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。