当前位置:   article > 正文

机器学习:Softmax回归(Python)_softmax回归python

softmax回归python

Softmax回归(多分类)

logistic_regression_mulclass.py

  1. import numpy as np
  2. import matplotlib.pyplot as plt
  3. class LogisticRegression_MulClass:
  4. """
  5. 逻辑回归,采用梯度下降算法 + 正则化,交叉熵损失函数,实现多分类,Softmax函数
  6. """
  7. def __init__(self, fit_intercept=True, normalize=True, alpha=0.05, eps=1e-10,
  8. max_epochs=300, batch_size=20, l1_ratio=None, l2_ratio=None, en_rou=None):
  9. """
  10. :param eps: 提前停止训练的精度要求,按照两次训练损失的绝对值差小于eps,停止训练
  11. :param fit_intercept: 是否训练偏置项
  12. :param normalize: 是否标准化
  13. :param alpha: 学习率
  14. :param max_epochs: 最大迭代次数
  15. :param batch_size: 批量大小,若为1,则为随机梯度,若为训练集样本量,则为批量梯度,否则为小批量梯度
  16. :param l1_ratio: LASSO回归惩罚项系数
  17. :param l2_ratio: 岭回归惩罚项系数
  18. :param en_rou: 弹性网络权衡L1和L2的系数
  19. """
  20. self.fit_intercept = fit_intercept # 线性模型的常数项。也即偏置bias,模型中的theta0
  21. self.normalize = normalize # 是否标准化数据
  22. self.alpha = alpha # 学习率
  23. self.eps = eps # 提前停止训练
  24. if l1_ratio:
  25. if l1_ratio < 0:
  26. raise ValueError("惩罚项系数不能为负数")
  27. self.l1_ratio = l1_ratio # LASSO回归惩罚项系数
  28. if l2_ratio:
  29. if l2_ratio < 0:
  30. raise ValueError("惩罚项系数不能为负数")
  31. self.l2_ratio = l2_ratio # 岭回归惩罚项系数
  32. if en_rou:
  33. if en_rou > 1 or en_rou < 0:
  34. raise ValueError("弹性网络权衡系数范围在[0, 1]")
  35. self.en_rou = en_rou # 弹性网络权衡L1和L2的系数
  36. self.max_epochs = max_epochs
  37. self.batch_size = batch_size
  38. self.theta = None # 训练权重系数
  39. if normalize:
  40. self.feature_mean, self.feature_std = None, None # 特征的均值,标准方差
  41. self.n_samples, self.n_classes = 0, 0 # 样本量和类别数
  42. self.train_loss, self.test_loss = [], [] # 存储训练过程中的训练损失和测试损失
  43. def init_theta_params(self, n_features, n_classes):
  44. """
  45. 初始化参数
  46. 如果训练偏置项,也包含了bias的初始化
  47. :param n_features: 样本的特征数量
  48. :param n_classes: 类别数
  49. :return: n_features * n_classes
  50. """
  51. self.theta = np.random.randn(n_features, n_classes) * 0.05
  52. @staticmethod
  53. def one_hot_encoding(target):
  54. """
  55. 类别编码
  56. :param target:
  57. :return:
  58. """
  59. class_labels = np.unique(target) # 类别标签,去重
  60. target_y = np.zeros((len(target), len(class_labels)), dtype=np.int64)
  61. for i, label in enumerate(target):
  62. target_y[i, label] = 1 # 对应类别所在列为1
  63. return target_y
  64. @staticmethod
  65. def softmax_func(x):
  66. """
  67. softmax函数,为避免上溢或下溢,对参数x做限制
  68. :param x: 数组: batch_size * n_classes
  69. :return: 1 * n_classes
  70. """
  71. exps = np.exp(x - np.max(x)) # 避免溢出,每个数减去其最大值
  72. exp_sum = np.sum(exps, axis=1, keepdims=True)
  73. return exps / exp_sum
  74. @staticmethod
  75. def sign_func(z_values):
  76. """
  77. 符号函数,针对L1正则化
  78. :param z_values: 模型系数,二维数组
  79. :return:
  80. """
  81. sign = np.zeros(z_values.shape)
  82. sign[z_values > 0] = 1.0
  83. sign[z_values < 0] = -1.0
  84. return sign
  85. @staticmethod
  86. def cal_cross_entropy(y_test, y_prob):
  87. """
  88. 计算交叉熵损失
  89. :param y_test: 样本真值,二维数组n * c,c表示类别数
  90. :param y_prob: 模型预测类别概率,n * c
  91. :return:
  92. """
  93. loss = -np.sum(y_test * np.log(y_prob + 1e-08), axis=1)
  94. loss -= np.sum((1 - y_test) * np.log(1 - y_prob + 1e-08), axis=1)
  95. return np.mean(loss)
  96. def fit(self, x_train, y_train, x_test=None, y_test=None):
  97. """
  98. 样本的预处理,模型系数的求解,闭式解公式 + 梯度方法
  99. :param x_train: 训练样本集 m*k
  100. :param y_train: 训练目标集 m*c
  101. :param x_test: 测试样本集 n*k
  102. :param y_test: 测试目标集 n*c
  103. :return:
  104. """
  105. y_train = self.one_hot_encoding(y_train)
  106. self.n_classes = y_train.shape[1] # 类别数
  107. if y_test is not None:
  108. y_test = self.one_hot_encoding(y_test)
  109. if self.normalize:
  110. self.feature_mean = np.mean(x_train, axis=0) # 样本均值
  111. self.feature_std = np.std(x_train, axis=0) + 1e-8 # 样本方差
  112. x_train = (x_train - self.feature_mean) / self.feature_std # 标准化
  113. if x_test is not None:
  114. x_test = (x_test - self.feature_mean) / self.feature_std # 标准化
  115. if self.fit_intercept:
  116. x_train = np.c_[x_train, np.ones((len(y_train), 1))] # 添加一列1,即偏置项样本
  117. if x_test is not None and y_test is not None:
  118. x_test = np.c_[x_test, np.ones((len(y_test), 1))] # 添加一列1,即偏置项样本
  119. self.init_theta_params(x_train.shape[1], self.n_classes) # 初始化参数
  120. # 训练模型
  121. self._fit_gradient_desc(x_train, y_train, x_test, y_test) # 梯度下降法训练模型
  122. def _fit_gradient_desc(self, x_train, y_train, x_test=None, y_test=None):
  123. """
  124. 三种梯度下降求解 + 正则化:
  125. (1)如果batch_size为1,则为随机梯度下降法
  126. (2)如果batch_size为样本量,则为批量梯度下降法
  127. (3)如果batch_size小于样本量,则为小批量梯度下降法
  128. :return:
  129. """
  130. train_sample = np.c_[x_train, y_train] # 组合训练集和目标集,以便随机打乱样本
  131. # np.c_水平方向连接数组,np.r_竖直方向连接数组
  132. # 按batch_size更新theta,三种梯度下降法取决于batch_size的大小
  133. for epoch in range(self.max_epochs):
  134. self.alpha *= 0.95
  135. np.random.shuffle(train_sample) # 打乱样本顺序,模拟随机化
  136. batch_nums = train_sample.shape[0] // self.batch_size # 批次
  137. for idx in range(batch_nums):
  138. # 取小批量样本,可以是随机梯度(1),批量梯度(n)或者是小批量梯度(<n)
  139. batch_xy = train_sample[self.batch_size * idx: self.batch_size * (idx + 1)]
  140. # 分取训练样本和目标样本,注意目标值不再是一列
  141. batch_x, batch_y = batch_xy[:, :x_train.shape[1]], batch_xy[:, x_train.shape[1]:]
  142. # 计算权重更新增量,包含偏置项
  143. y_prob_batch = self.softmax_func(batch_x.dot(self.theta)) # 小批量的预测概率
  144. # 1 * n <--> n * k = 1 * k --> 转置 k * 1
  145. delta = ((y_prob_batch - batch_y).T.dot(batch_x) / self.batch_size).T
  146. # 计算并添加正则化部分,不包含偏置项,最后一列是偏置项
  147. dw_reg = np.zeros(shape=(x_train.shape[1] - 1, self.n_classes))
  148. if self.l1_ratio and self.l2_ratio is None:
  149. # LASSO回归,L1正则化
  150. dw_reg = self.l1_ratio * self.sign_func(self.theta[:-1, :])
  151. if self.l2_ratio and self.l1_ratio is None:
  152. # Ridge回归,L2正则化
  153. dw_reg = 2 * self.l2_ratio * self.theta[:-1, :]
  154. if self.en_rou and self.l1_ratio and self.l2_ratio:
  155. # 弹性网络
  156. dw_reg = self.l1_ratio * self.en_rou * self.sign_func(self.theta[:-1, :])
  157. dw_reg += 2 * self.l2_ratio * (1 - self.en_rou) * self.theta[:-1, :]
  158. delta[:-1, :] += dw_reg / self.batch_size # 添加了正则化
  159. self.theta = self.theta - self.alpha * delta
  160. # 计算训练过程中的交叉熵损失值
  161. y_train_prob = self.softmax_func(x_train.dot(self.theta)) # 当前迭代训练的模型预测概率
  162. train_cost = self.cal_cross_entropy(y_train, y_train_prob) # 训练集的交叉熵损失
  163. self.train_loss.append(train_cost) # 交叉熵损失均值
  164. if x_test is not None and y_test is not None:
  165. y_test_prob = self.softmax_func(x_test.dot(self.theta)) # 当前测试样本预测概率
  166. test_cost = self.cal_cross_entropy(y_test, y_test_prob)
  167. self.test_loss.append(test_cost) # 交叉熵损失均值
  168. # 两次交叉熵损失均值的差异小于给定的均值,提前停止训练
  169. if epoch > 10 and (np.abs(self.train_loss[-1] - self.train_loss[-2])) <= self.eps:
  170. break
  171. def get_params(self):
  172. """
  173. 返回线性模型训练的系数
  174. :return:
  175. """
  176. if self.fit_intercept: # 存在偏置项
  177. weight, bias = self.theta[:-1, :], self.theta[-1, :]
  178. else:
  179. weight, bias = self.theta, np.array([0])
  180. if self.normalize: # 标准化后的系数
  181. weight = weight / self.feature_std.reshape(-1, 1) # 还原模型系数
  182. bias = bias - weight.T.dot(self.feature_mean)
  183. return weight, bias
  184. def predict_prob(self, x_test):
  185. """
  186. 预测测试样本的概率,第1列为y = 0的概率,第2列是y = 1的概率
  187. :param x_test: 测试样本,ndarray:n * k
  188. :return:
  189. """
  190. if self.normalize:
  191. x_test = (x_test - self.feature_mean) / self.feature_std # 测试数据标准化
  192. if self.fit_intercept:
  193. # 存在偏置项,加一列1
  194. x_test = np.c_[x_test, np.ones(shape=x_test.shape[0])]
  195. y_prob = self.softmax_func(x_test.dot(self.theta))
  196. return y_prob
  197. def predict(self, x):
  198. """
  199. 预测样本类别
  200. :param x: 预测样本
  201. :return:
  202. """
  203. y_prob = self.predict_prob(x)
  204. # 对应每个样本中所有类别的概率,哪个概率大,返回哪个类别所在索引列编号,即类别
  205. return np.argmax(y_prob, axis=1)
  206. def plt_loss_curve(self, lab=None, is_show=True):
  207. """
  208. 可视化交叉熵损失曲线
  209. :param is_show: 是否可视化
  210. :return:
  211. """
  212. if is_show:
  213. plt.figure(figsize=(8, 6))
  214. plt.plot(self.train_loss, "k-", lw=1, label="Train Loss")
  215. if self.test_loss:
  216. plt.plot(self.test_loss, "r--", lw=1.2, label="Test Loss")
  217. plt.xlabel("Training Epochs", fontdict={"fontsize": 12})
  218. plt.ylabel("The Mean of Cross Entropy Loss", fontdict={"fontsize": 12})
  219. plt.title("%s: The Loss Curve of Cross Entropy" % lab)
  220. plt.legend(frameon=False)
  221. plt.grid(ls=":")
  222. # plt.axis([0, 300, 20, 30])
  223. if is_show:
  224. plt.show()

test_logistic_reg_mulclass.py

  1. from sklearn.datasets import load_breast_cancer, load_iris, load_digits
  2. from sklearn.model_selection import train_test_split
  3. from logistic_regression_mulclass import LogisticRegression_MulClass
  4. import matplotlib.pyplot as plt
  5. from performance_metrics import ModelPerformanceMetrics
  6. from sklearn.preprocessing import StandardScaler
  7. iris = load_iris() # 加载数据集
  8. X, y = iris.data, iris.target
  9. X = StandardScaler().fit_transform(X) # 标准化
  10. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0, stratify=y)
  11. lg_lr = LogisticRegression_MulClass(alpha=0.5, l1_ratio=0.5,
  12. batch_size=5, normalize=False, max_epochs=1000, eps=1e-15)
  13. lg_lr.fit(X_train, y_train, X_test, y_test)
  14. print("L1正则化模型参数如下:")
  15. theta = lg_lr.get_params()
  16. fn = iris.feature_names
  17. for i, w in enumerate(theta[0]):
  18. print(fn[i], ":", w)
  19. print("theta0:", theta[1])
  20. print("=" * 70)
  21. y_test_prob = lg_lr.predict_prob(X_test) # 预测概率
  22. y_test_labels = lg_lr.predict(X_test)
  23. plt.figure(figsize=(12, 8))
  24. plt.subplot(221)
  25. lg_lr.plt_loss_curve(lab="L1", is_show=False)
  26. pm = ModelPerformanceMetrics(y_test, y_test_prob)
  27. print(pm.cal_classification_report())
  28. pr_values = pm.precision_recall_curve() # PR指标值
  29. plt.subplot(222)
  30. pm.plt_pr_curve(pr_values, is_show=False) # PR曲线
  31. roc_values = pm.roc_metrics_curve() # ROC指标值
  32. plt.subplot(223)
  33. pm.plt_roc_curve(roc_values, is_show=False) # ROC曲线
  34. plt.subplot(224)
  35. cm = pm.cal_confusion_matrix()
  36. pm.plt_confusion_matrix(cm, label_names=iris.target_names, is_show=False)
  37. plt.tight_layout()
  38. plt.show()

 

 

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/天景科技苑/article/detail/785225
推荐阅读
相关标签
  

闽ICP备14008679号