当前位置:   article > 正文

手写LASSO回归python实现_lasso求解python代码

lasso求解python代码
  1. import numpy as np
  2. from matplotlib.font_manager import FontProperties
  3. from sklearn.datasets import make_regression
  4. from sklearn.model_selection import train_test_split
  5. import matplotlib.pyplot as plt
  6. class Lasso():
  7. def __init__(self):
  8. pass
  9. # 数据准备
  10. def prepare_data(self):
  11. # 生成样本数据
  12. X, y = make_regression(n_samples=40, n_features=80, random_state=0, noise=0.5)
  13. # 划分数据集
  14. X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
  15. return X_train, X_test, y_train.reshape(-1,1), y_test.reshape(-1,1)
  16. # 参数初始化
  17. def initialize_params(self, dims):
  18. w = np.zeros((dims, 1))
  19. b = 0
  20. return w, b
  21. # 定义L1损失函数
  22. def l1_loss(self, X, y, w, b, alpha):
  23. num_train = X.shape[0] # 样本数
  24. num_feature = X.shape[1] # 特征数
  25. y_hat = np.dot(X, w) + b # 回归预测数据
  26. # 计算损失
  27. loss = np.sum((y_hat - y) ** 2) / num_train + alpha * np.sum(np.abs(w))
  28. # 计算梯度,即参数的变化
  29. dw = np.dot(X.T, (y_hat - y)) / num_train + alpha * np.sign(w)
  30. db = np.sum((y_hat - y)) / num_train
  31. return y_hat, loss, dw, db
  32. def lasso_train(self, X, y, learning_rate, epochs, alpha):
  33. loss_list = []
  34. w, b = self.initialize_params(X.shape[1])
  35. # 归一化特征
  36. X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
  37. for i in range(1, epochs):
  38. y_hat, loss, dw, db = self.l1_loss(X, y, w, b, alpha)
  39. # 更新参数
  40. w += -learning_rate * dw
  41. b += -learning_rate * db
  42. loss_list.append(loss)
  43. # if i % 300 == 0:
  44. # print('epoch %d loss %f' % (i, loss))
  45. params = {
  46. 'w': w,
  47. 'b': b
  48. }
  49. grads = {
  50. 'dw': dw,
  51. 'db': db
  52. }
  53. return loss, loss_list, params, grads
  54. # 根据计算的得到的参数进行预测
  55. def predict(self, X, params):
  56. w = params['w']
  57. b = params['b']
  58. y_pred = np.dot(X, w) + b
  59. return y_pred
  60. if __name__ == '__main__':
  61. lasso = Lasso()
  62. X_train, X_test, y_train, y_test = lasso.prepare_data()
  63. alphas=np.arange(0.01,0.11,0.01)
  64. wc=[]#统计参数w中绝对值小于0.1的个数,模拟稀疏度
  65. for alpha in alphas:
  66. # 参数:训练集x,训练集y,学习率,迭代次数,正则化系数
  67. loss, loss_list, params, grads = lasso.lasso_train(X_train, y_train, 0.02, 3000,alpha)
  68. w=np.squeeze(params['w'])
  69. count=np.sum(np.abs(w)<1e-1)
  70. wc.append(count)
  71. # 设置中文字体
  72. plt.rcParams['font.sans-serif'] = ['SimHei']
  73. plt.rcParams['axes.unicode_minus'] = False
  74. plt.figure(figsize=(10, 8))
  75. plt.plot(alphas, wc, 'o-')
  76. plt.xlabel('正则项系数',fontsize=15)
  77. plt.ylabel('参数w矩阵的稀疏度',fontsize=15)
  78. plt.show()

本文内容由网友自发贡献,转载请注明出处:https://www.wpsshop.cn/w/IT小白/article/detail/598916
推荐阅读
相关标签
  

闽ICP备14008679号