赞
踩
- """
- 创建 100行1列的 x,y数据
- """
-
-
- # 解析解 求解模型的方法
- # numpy是做数值计算的
- import numpy as np
- # matplotlib 是关于绘图的
- import matplotlib.pyplot as plt
-
-
- # 回归,有监督的机器学习 X,y
- X= np.random.rand(100,1)
-
- # 这里要模拟出来的数据y是代表真实的数据,所以也就是y_hat+error 预期值和误差
- # 假设误差 服从正太分布
- # standard normal distribution 标准正太分布 期望为μ=0 方差1
- y= 2 +4*X +np.random.randn(100,1)
-
- # 为了去求解w0截距项,我们给X矩阵一开始加上一列全为1的X0
- # np.c_[] 拼接函数
- X_b = np.c_[np.ones((100,1)),X]
-
- # 实现解析解的公式来求解θ
- θ = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)
- print(θ)
-
- # 使用模型去做预测
- # 新建数据
- X_new = np.array([[0],
- [2]])
- X_new_b = np.c_[np.ones((2,1)),X_new]
- print(X_new_b)
-
- y_predict = X_new_b.dot(θ)
- print(y_predict)
-
-
- # 绘图进行展示真实的数据点和我们预测用的模型
- plt.plot(X_new,y_predict,'r-')
- plt.plot(X,y,'b.')
- plt.axis([0,2,0,10])
- plt.show()
- import numpy as np
- from sklearn.linear_model import LinearRegression
- import matplotlib.pyplot as plt
-
-
- X1 = 2*np.random.rand(100, 1)
- X2 = 2*np.random.rand(100, 1)
- X = np.c_[X1, X2]
-
- y = 4 + 3*X1 + 5*X2 + np.random.randn(100, 1)
-
- reg = LinearRegression(fit_intercept=True) # fit_intercept 是否计算截距
- reg.fit(X, y) # 对X,y进行训练拟合
- print(reg.intercept_, reg.coef_) # intercept_ :截距项值 coef_:其他的参数值
-
- X_new = np.array([[0, 0],
- [2, 1],
- [2, 4]])
- y_predict = reg.predict(X_new)
-
- # 绘图进行展示真实的数据点和我们预测用的模型
- plt.plot(X_new[:, 0], y_predict, 'r-')
- plt.plot(X1, y, 'b.')
- plt.axis([0, 2, 0, 25])
- plt.show()
- import numpy as np
-
- # 创建数据集X,y
- np.random.seed(1) # 设置随机种子,确定随机值
- X = np.random.rand(100,1)
- y = 4 + 3*X +np.random.randn(100,1) # 表示真实值
- X_b = np.c_[np.ones((100,1)),X]
-
- # 创建超参数
- learning_rate =0.001 # 学习率
- n_iteration = 10000
-
-
- # 第一步、初始化θ, θ=W0.....Wn, 标准正太分布创建W
- theta= np.random.randn(2,1)
-
-
- # 4、判断是否收敛,一般不会设定阈值,而是直接采用设置相对大的迭代次数保证可以收敛
- for _ in range(n_iteration):
- # 2、求梯度 ,计算gradient = (Xθ-y)Xj
- gradient = X_b.T.dot(X_b.dot(theta)-y)
- # 3、应用梯度下降法的公式去调整θ值 公式: θt+1 =θt-η*gradient
- theta = theta -learning_rate*gradient
-
- print(theta)
- """
- 小批量梯度下降
- """
- import numpy as np
-
- # 创建数据集X,y
- X = np.random.rand(100,1)
- y = 4+ 3*X + np.random.randn(100,1)
- X_b = np.c_[np.ones((100,1)),X]
-
- # 创建超参数
- learn_rating = 0.0001
- n_epochs = 10000 # 迭代轮次
- m = 100 # 样本数
- batch_size = 10 # 每一批次使用多少条样本
- num_batches = int(m/batch_size) # 每一轮次 需要多少批次才能执行完一个完整样本
- # 第一步 随机初始化θ
- theta = np.random.randn(2,1)
- for epoch in range(n_epochs):
- # 在双层for循环之间,每个轮次开始分批次迭代之前 打乱数据索引顺序, 目的 保证随机性
- arr = np.arange(len(X_b)) # 返回索引列表
- np.random.shuffle(arr) # 把索引打乱顺序
- X_b = X_b[arr]
- y = y[arr]
- for i in range(num_batches):
- # random_index = np.random.randint(m)
- x_batch = X_b[i*batch_size:i*batch_size+batch_size]
- y_batch = y[i*batch_size:i*batch_size+batch_size]
- # 第二步 求梯度 gradient = (Xθ-y)Xj
- gradient = x_batch.T.dot(x_batch.dot(theta)-y_batch)
-
- # 第三步 用梯度下降法公式去调整θ值 公式: θt+1 =θt-η*gradient
- theta = theta - learn_rating*gradient
-
- print(theta)
- """
- 随机梯度: 随机一个样本进行测试
- """
-
- import numpy as np
-
- # 创建数据集X,y
- X = np.random.rand(100,1)
- y= 4 + 3*X + np.random.randn(100,1)
- X_b = np.c_[np.ones((100,1)),X]
-
-
- # 创建超参数
- learning_rating = 0.001 # 学习率
- n_epochs = 10000 # 迭代轮次
- m =100 # 100个样本
-
-
-
- # 第一步 初始化θ
- theta = np.random.randn(2,1)
- for epoch in range(n_epochs):
- arr = np.arange(len(X_b))
- np.random.shuffle(arr)
- X_b = X_b[arr]
- y = y[arr]
- for i in range(m):
- # random_index = np.random.randint(m)
- xi = X_b[i:i+1]
- yi = y[i:i+1]
- # 第二部 求梯度 gradient =(Xθ-y)Xj
- gradient = xi.T.dot(xi.dot(theta)-yi)
-
- # 3、应用梯度下降法的公式去调整θ值 公式: θt+1 =θt-η*gradient
- theta = theta - learning_rating*gradient
-
-
- print(theta)
- import numpy as np
- from sklearn.preprocessing import MinMaxScaler
- scaler=MinMaxScaler() # 创建对象
- temp = np.array([1,2,3,4,5,5])
- # 求归一化
- result = scaler.fit_transform(temp.reshape(-1,1))
- print(result)
- """
- 代码实现标准归一化
- """
- from sklearn.preprocessing import StandardScaler
- import numpy as np
- temp = np.array([1,2,3,5,5])
- # 创建对象
- scaler = StandardScaler()
- # fit()会将某一列的均值和方差求出来
- scaler.fit(temp.reshape(-1,1))
- # 查看均值
- scaler.mean_
- # 查看标准差
- scaler.var_
- # 转换数据
- scaler.transform(temp.reshape(-1,1))
- """
- lasso 回归 损失函数 +L2正则项
- """
- import numpy as np
- from sklearn.linear_model import Lasso
- from sklearn.linear_model import SGDRegressor # 随机梯度下降
- X = np.random.rand(100,1)
- y = 4 + 3*X + np.random.randn(100,1)
-
- # lasso_reg = Lasso(alpha=0.04,max_iter=30000)
- # lasso_reg.fit(X,y)
-
- # print(lasso_reg.predict([[1.5]]))
- # print(lasso_reg.intercept_) # 查看截距项W0
- # print(lasso_reg.coef_) # 查看其他的参数 W1
- sgd_reg = SGDRegressor(penalty='l1',max_iter=30000)
- # sgd_reg.fit(X,y.ravel())
- sgd_reg.fit(X,y.reshape(-1,))
- print(sgd_reg.predict([[1.5]]))
- print(sgd_reg.intercept_)
- print(sgd_reg.coef_)
- """
- ridge岭回归 损失函数+L2正则项
- Ridge(alpha=0.4, solver='sag')
- alpha : L2 正则项系数
- solver : 梯度训练方法
- """
-
- import numpy as np
- from sklearn.linear_model import Ridge
- from sklearn.linear_model import SGDRegressor # 随机梯度下降
-
- X = np.random.rand(100,1)
- y = 4 + 3*X +np.random.randn(100,1) # 真实值
-
- # # 创建Ridge岭对象
- # ridge_reg = Ridge(alpha=0.4, solver='sag')
- # ridge_reg.fit(X,y) # 把训练集里面的对象 训练好
- # print(ridge_reg.predict([[1.5]])) # 预测值
- # print(ridge_reg.intercept_) # 获取截距项 W0
- # print(ridge_reg.coef_) # 其他系数 W1
-
- # 创建SGDRegressor 对象
- sgd_reg = SGDRegressor(penalty='l2',max_iter=1000)
- sgd_reg.fit(X,y)
- print(sgd_reg.predict([[1.5]]))
- print(sgd_reg.intercept_)
- print(sgd_reg.coef_)
- """
- 损失函数 + L1 正则项+L2正则项
- """
-
- import numpy as np
- from sklearn.linear_model import ElasticNet
- from sklearn.linear_model import SGDRegressor # 随机梯度下降
- X = np.random.rand(100,1)
- y = 4 + 3*X + np.random.randn(100,1)
-
- # elastic_reg = ElasticNet(alpha=0.04,max_iter=100000,l1_ratio=0.15) # l1_ratio 指的是权衡L1 L2正则项哪个重要
- # elastic_reg.fit(X,y)
- # print(elastic_reg.predict([[1.5]]))
- # print(elastic_reg.intercept_)
- # print(elastic_reg.coef_)
-
- sgd_reg = SGDRegressor(penalty='elasticnet',max_iter=30000)
- sgd_reg.fit(X,y)
- print(sgd_reg.predict([[1.5]]))
- print(sgd_reg.intercept_)
- print(sgd_reg.coef_)
- """
- 多项式回归 升维
- """
- import numpy as np
- import matplotlib.pyplot as plt
- from sklearn.preprocessing import PolynomialFeatures # 对特征进行预处理
- from sklearn.linear_model import LinearRegression
- from sklearn.metrics import mean_squared_error # 回归评估指标 mse ;评估升维的效果
-
-
- np.random.seed(42) # 随机种子 目的 :运行多次 数据是一样的
- m =100 # 样本
- X = 6*np.random.rand(m,1) -3
- y = 0.5*X**2 +X +2 + np.random.randn(m,1)
-
- # # 画图
- plt.plot(X,y,'b.')
-
-
- # 训练集
- X_train = X[:80]
- y_train = y[:80]
- # 测试集
- X_test =X[80:]
- y_test = y[80:]
-
-
- d = {1: 'g-', 2:'r+', 10:'y*'}
- for i in d:
- # 把训练集和测试集升维多项式回归
- poly_features = PolynomialFeatures(degree=i, include_bias=True) # include_bias 是否创建截距项
- X_poly_train = poly_features.fit_transform(X_train)
- X_poly_test = poly_features.fit_transform(X_test)
- print(X_train[0])
- print(X_poly_train[0])
- print(X_train.shape)
- print(X_poly_train.shape)
-
- # 创建模型
- lin_reg = LinearRegression(fit_intercept=False) # fit_intercept 是否计算截距
- lin_reg.fit(X_poly_train,y_train)
- print(lin_reg.intercept_,lin_reg.coef_)
-
- # 看看是否随着degree 的增加升维,是否过拟合了
- y_train_predict = lin_reg.predict(X_poly_train)
- y_test_predict = lin_reg.predict(X_poly_test)
-
- plt.plot(X_poly_train[:,1],y_train_predict,d[i])
-
- print(mean_squared_error(y_test,y_test_predict))
- print(mean_squared_error(y_train,y_train_predict))
- # plt.show()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。