当前位置:   article > 正文

python 文本情感分类_python 中文文本情感分类

python 中文文本情感分类

      对于一个简单的文本情感分类来说,其实就是一个二分类,这篇博客主要讲述的是使用scikit-learn来做文本情感分类。分类主要分为两步:1)训练,主要根据训练集来学习分类模型的规则。2)分类,先用已知的测试集评估分类的准确率等,如果效果还可以,那么该模型对无标注的待测样本进行预测。

     首先先介绍下我样本集,样本是已经分好词的酒店评论,第一列为标签,第二列为评论,前半部分为积极评论,后半部分为消极评论,格式如下:

  

下面实现了SVM,NB,逻辑回归,决策树,逻辑森林,KNN 等几种分类方法,主要代码如下:

  1. #coding:utf-8
  2. from matplotlib import pyplot
  3. import scipy as sp
  4. import numpy as np
  5. from sklearn.cross_validation import train_test_split
  6. from sklearn.feature_extraction.text import CountVectorizer
  7. from sklearn.feature_extraction.text import TfidfVectorizer
  8. from sklearn.metrics import precision_recall_curve
  9. from sklearn.metrics import classification_report
  10. from numpy import *
  11. #========SVM========#
  12. def SvmClass(x_train, y_train):
  13. from sklearn.svm import SVC
  14. #调分类器
  15. clf = SVC(kernel = 'linear',probability=True)#default with 'rbf'
  16. clf.fit(x_train, y_train)#训练,对于监督模型来说是 fit(X, y),对于非监督模型是 fit(X)
  17. return clf
  18. #=====NB=========#
  19. def NbClass(x_train, y_train):
  20. from sklearn.naive_bayes import MultinomialNB
  21. clf=MultinomialNB(alpha=0.01).fit(x_train, y_train)
  22. return clf
  23. #========Logistic Regression========#
  24. def LogisticClass(x_train, y_train):
  25. from sklearn.linear_model import LogisticRegression
  26. clf = LogisticRegression(penalty='l2')
  27. clf.fit(x_train, y_train)
  28. return clf
  29. #========KNN========#
  30. def KnnClass(x_train,y_train):
  31. from sklearn.neighbors import KNeighborsClassifier
  32. clf=KNeighborsClassifier()
  33. clf.fit(x_train,y_train)
  34. return clf
  35. #========Decision Tree ========#
  36. def DccisionClass(x_train,y_train):
  37. from sklearn import tree
  38. clf=tree.DecisionTreeClassifier()
  39. clf.fit(x_train,y_train)
  40. return clf
  41. #========Random Forest Classifier ========#
  42. def random_forest_class(x_train,y_train):
  43. from sklearn.ensemble import RandomForestClassifier
  44. clf= RandomForestClassifier(n_estimators=8)#参数n_estimators设置弱分类器的数量
  45. clf.fit(x_train,y_train)
  46. return clf
  47. #========准确率召回率 ========#
  48. def Precision(clf):
  49. doc_class_predicted = clf.predict(x_test)
  50. print(np.mean(doc_class_predicted == y_test))#预测结果和真实标签
  51. #准确率与召回率
  52. precision, recall, thresholds = precision_recall_curve(y_test, clf.predict(x_test))
  53. answer = clf.predict_proba(x_test)[:,1]
  54. report = answer > 0.5
  55. print(classification_report(y_test, report, target_names = ['neg', 'pos']))
  56. print("--------------------")
  57. from sklearn.metrics import accuracy_score
  58. print('准确率: %.2f' % accuracy_score(y_test, doc_class_predicted))
  59. if __name__ == '__main__':
  60. data=[]
  61. labels=[]
  62. with open ("train2.txt","r")as file:
  63. for line in file:
  64. line=line[0:1]
  65. labels.append(line)
  66. with open("train2.txt","r")as file:
  67. for line in file:
  68. line=line[1:]
  69. data.append(line)
  70. x=np.array(data)
  71. labels=np.array(labels)
  72. labels=[int (i)for i in labels]
  73. movie_target=labels
  74. #转换成空间向量
  75. count_vec = TfidfVectorizer(binary = False)
  76. #加载数据集,切分数据集80%训练,20%测试
  77. x_train, x_test, y_train, y_test= train_test_split(x, movie_target, test_size = 0.2)
  78. x_train = count_vec.fit_transform(x_train)
  79. x_test = count_vec.transform(x_test)
  80. print('**************支持向量机************ ')
  81. Precision(SvmClass(x_train, y_train))
  82. print('**************朴素贝叶斯************ ')
  83. Precision(NbClass(x_train, y_train))
  84. print('**************最近邻KNN************ ')
  85. Precision(KnnClass(x_train,y_train))
  86. print('**************逻辑回归************ ')
  87. Precision(LogisticClass(x_train, y_train))
  88. print('**************决策树************ ')
  89. Precision(DccisionClass(x_train,y_train))
  90. print('**************逻辑森林************ ')
  91. Precision(random_forest_class(x_train,y_train))

结果如下:


对于整体代码和语料的下载,可以去下载

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/花生_TL007/article/detail/417397
推荐阅读
相关标签
  

闽ICP备14008679号