赞
踩
f
w
⃗
,
b
(
x
⃗
)
=
w
⃗
⋅
x
⃗
+
b
=
w
1
x
1
+
w
2
x
2
+
.
.
.
+
w
n
x
n
+
b
=
∑
j
=
1
n
w
j
x
j
+
b
其中:
J
(
w
⃗
,
b
)
=
1
2
m
∑
i
=
1
m
[
f
w
⃗
,
b
(
x
⃗
(
i
)
)
−
y
(
i
)
]
2
=
1
2
m
∑
i
=
1
m
[
w
⃗
⋅
x
⃗
(
i
)
+
b
−
y
(
i
)
]
2
r
e
p
e
a
t
{
t
m
p
_
w
1
=
w
1
−
α
1
m
∑
i
=
1
m
[
f
w
⃗
,
b
(
x
⃗
(
i
)
)
−
y
(
i
)
]
x
1
(
i
)
t
m
p
_
w
2
=
w
2
−
α
1
m
∑
i
=
1
m
[
f
w
⃗
,
b
(
x
⃗
(
i
)
)
−
y
(
i
)
]
x
2
(
i
)
.
.
.
t
m
p
_
w
n
=
w
n
−
α
1
m
∑
i
=
1
m
[
f
w
⃗
,
b
(
x
⃗
(
i
)
)
−
y
(
i
)
]
x
n
(
i
)
t
m
p
_
b
=
b
−
α
1
m
∑
i
=
1
m
[
f
w
⃗
,
b
(
x
⃗
(
i
)
)
−
y
(
i
)
]
s
i
m
u
l
t
a
n
e
o
u
s
u
p
d
a
t
e
e
v
e
r
y
p
a
r
a
m
e
t
e
r
s
}
u
n
t
i
l
c
o
n
v
e
r
g
e
import numpy as np import matplotlib.pyplot as plt # 计算误差均方函数 J(w,b) def cost_function(X, y, w, b): m = X.shape[0] # 训练集的数据样本数 cost_sum = 0.0 for i in range(m): f_wb_i = np.dot(w, X[i]) + b cost = (f_wb_i - y[i]) ** 2 cost_sum += cost return cost_sum / (2 * m) # 计算梯度值 dJ/dw, dJ/db def compute_gradient(X, y, w, b): m = X.shape[0] # 训练集的数据样本数(矩阵行数) n = X.shape[1] # 每个数据样本的维度(矩阵列数) dj_dw = np.zeros((n,)) dj_db = 0.0 for i in range(m): # 每个数据样本 f_wb_i = np.dot(w, X[i]) + b for j in range(n): # 每个数据样本的维度 dj_dw[j] += (f_wb_i - y[i]) * X[i, j] dj_db += (f_wb_i - y[i]) dj_dw = dj_dw / m dj_db = dj_db / m return dj_dw, dj_db # 梯度下降算法 def linear_regression(X, y, w, b, learning_rate=0.01, epochs=1000): J_history = [] # 记录每次迭代产生的误差值 for epoch in range(epochs): dj_dw, dj_db = compute_gradient(X, y, w, b) # w 和 b 需同步更新 w = w - learning_rate * dj_dw b = b - learning_rate * dj_db J_history.append(cost_function(X, y, w, b)) # 记录每次迭代产生的误差值 return w, b, J_history # 绘制散点图 def draw_scatter(x, y, title): plt.xlabel("X-axis", size=15) plt.ylabel("Y-axis", size=15) plt.title(title, size=20) plt.scatter(x, y) # 打印训练集数据和预测值数据以便对比 def print_contrast(train, prediction, n): print("train prediction") for i in range(n): print(np.round(train[i], 4), np.round(prediction[i], 4)) # 从这里开始执行 if __name__ == '__main__': # 训练集样本 data = np.loadtxt("./data.txt", delimiter=',', skiprows=1) X_train = data[:, :4] # 训练集的第 0-3 列为 X = (x0, x1, x2, x3) y_train = data[:, 4] # 训练集的第 4 列为 y w = np.zeros((X_train.shape[1],)) # 权重 b = 0.0 # 偏置 epochs = 1000 # 迭代次数 learning_rate = 1e-7 # 学习率 J_history = [] # 记录每次迭代产生的误差值 # 线性回归模型的建立 w, b, J_history = linear_regression(X_train, y_train, w, b, learning_rate, epochs) print(f"result: w = {np.round(w, 4)}, b = {b:0.4f}") # 打印结果 # 训练集 y_train 与预测值 y_hat 的对比(这里其实我偷了个懒,训练集当测试集用,以后不要这样做!) y_hat = np.zeros(X_train.shape[0]) for i in range(X_train.shape[0]): y_hat[i] = np.dot(w, X_train[i]) + b print_contrast(y_train, y_hat, y_train.shape[0]) # 绘制误差值的散点图 x_axis = list(range(0, epochs)) draw_scatter(x_axis, J_history, "Cost Function in Every Epoch") plt.show()
将原有特征值通过组合或转化等方式变成新特征值。
x j ( i ) : = x j ( i ) − μ j max ( x j ) − min ( x j ) x_j^{(i)} := \frac{x_j^{(i)} - \mu_j}{\max (x_j) - \min (x_j)} xj(i):=max(xj)−min(xj)xj(i)−μj
其中: x ⃗ ( i ) = ( x 1 ( i ) , x 2 ( i ) , . . . , x j ( i ) , . . . , x n ( i ) ) \vec{x}^{(i)} = (x_1^{(i)}, x_2^{(i)}, ..., x_j^{(i)}, ..., x_n^{(i)}) x (i)=(x1(i),x2(i),...,xj(i),...,xn(i)), μ j \mu_j μj 为所有 x j x_j xj 的平均值(mean),即
μ j = 1 n ∑ i = 1 n x j ( i ) \mu_j = \frac{1}{n} \sum_{i=1}^{n} x_j^{(i)} μj=n1i=1∑nxj(i)
x j ( i ) : = x j ( i ) − μ j σ j x_j^{(i)} := \frac{x_j^{(i)} - \mu_j}{\sigma_j} xj(i):=σjxj(i)−μj
其中: x ⃗ ( i ) = ( x 1 ( i ) , x 2 ( i ) , . . . , x j ( i ) , . . . , x n ( i ) ) \vec{x}^{(i)} = (x_1^{(i)}, x_2^{(i)}, ..., x_j^{(i)}, ..., x_n^{(i)}) x (i)=(x1(i),x2(i),...,xj(i),...,xn(i)), σ j \sigma_j σj 为所有 x j x_j xj 的标准差(Standard Deviation,std),即
μ j = 1 n ∑ i = 1 n [ x j ( i ) − μ j ] 2 \mu_j = \sqrt {\frac{1}{n} \sum_{i=1}^{n} [x_j^{(i)} - \mu_j]^2} μj=n1i=1∑n[xj(i)−μj]2
import numpy as np import matplotlib.pyplot as plt # 均值归一化 def mean_normalize_features(X): mu = np.mean(X, axis=0) # 计算平均值,矩阵可指定计算行(axis=1)或列(axis=0,此处即特征值) X_mean = (X - mu) / (np.max(X, axis=0) - np.min(X, axis=0)) return X_mean # z-score 归一化 def zscore_normalize_features(X): mu = np.mean(X, axis=0) # 计算平均值,矩阵可指定计算行(axis=1)或列(axis=0,此处即特征值) sigma = np.std(X, axis=0) # 计算标准差,矩阵可指定计算行(axis=1)或列(axis=0,此处即特征值) X_zscore = (X - sigma) / mu return X_zscore # 计算误差均方函数 J(w,b) def cost_function(X, y, w, b): m = X.shape[0] # 训练集的数据样本数 cost_sum = 0.0 for i in range(m): f_wb_i = np.dot(w, X[i]) + b cost = (f_wb_i - y[i]) ** 2 cost_sum += cost return cost_sum / (2 * m) # 计算梯度值 dJ/dw, dJ/db def compute_gradient(X, y, w, b): m = X.shape[0] # 训练集的数据样本数(矩阵行数) n = X.shape[1] # 每个数据样本的维度(矩阵列数) dj_dw = np.zeros((n,)) dj_db = 0.0 for i in range(m): # 每个数据样本 f_wb_i = np.dot(w, X[i]) + b for j in range(n): # 每个数据样本的维度 dj_dw[j] += (f_wb_i - y[i]) * X[i, j] dj_db += (f_wb_i - y[i]) dj_dw = dj_dw / m dj_db = dj_db / m return dj_dw, dj_db # 梯度下降算法 def linear_regression(X, y, w, b, learning_rate=0.01, epochs=1000): J_history = [] # 记录每次迭代产生的误差值 for epoch in range(epochs): dj_dw, dj_db = compute_gradient(X, y, w, b) # w 和 b 需同步更新 w = w - learning_rate * dj_dw b = b - learning_rate * dj_db J_history.append(cost_function(X, y, w, b)) # 记录每次迭代产生的误差值 return w, b, J_history # 绘制散点图 def draw_scatter(x, y, title): plt.xlabel("X-axis", size=15) plt.ylabel("Y-axis", size=15) plt.title(title, size=20) plt.scatter(x, y) # 打印训练集数据和预测值数据以便对比 def print_contrast(train, prediction, n): print("train prediction") for i in range(n): print(np.round(train[i], 4), np.round(prediction[i], 4)) # 从这里开始执行 if __name__ == '__main__': # 训练集样本 data = np.loadtxt("./data.txt", delimiter=',', skiprows=1) X_train = data[:, :4] # 训练集的第 0-3 列为 X = (x0, x1, x2, x3) y_train = data[:, 4] # 训练集的第 4 列为 y w = np.zeros((X_train.shape[1],)) # 权重 b = 0.0 # 偏置 epochs = 1000 # 迭代次数 learning_rate = 0.01 # 学习率 J_history = [] # 记录每次迭代产生的误差值 # Z-score 归一化 X_norm = zscore_normalize_features(X_train) #y_norm = zscore_normalize_features(y_train) print(f"X_norm = {np.round(X_norm, 4)}") #print(f"y_norm = {np.round(y_norm, 4)}") # 线性回归模型的建立 w, b, J_history = linear_regression(X_norm, y_train, w, b, learning_rate, epochs) print(f"result: w = {np.round(w, 4)}, b = {b:0.4f}") # 打印结果 # 训练集 y_train 与预测值 y_hat 的对比(这里其实我偷了个懒,训练集当测试集用,以后不要这样做!) y_hat = np.zeros(X_train.shape[0]) for i in range(X_train.shape[0]): # 注意,测试集的输入也需要进行归一化! y_hat[i] = np.dot(w, X_norm[i]) + b print_contrast(y_train, y_hat, y_train.shape[0]) # 绘制误差值的散点图 x_axis = list(range(0, epochs)) draw_scatter(x_axis, J_history, "Cost Function in Every Epoch") plt.show()
J
(
w
⃗
,
b
)
=
1
2
m
∑
i
=
1
m
[
f
w
⃗
,
b
(
x
⃗
(
i
)
)
−
y
(
i
)
]
2
+
λ
2
m
∑
j
=
1
n
w
j
2
其中,第一项称为均方误差(mean squared error),第二项称为正则化项(regularization term),使 w j w_j wj 变小。初始设置的 λ \lambda λ 越大,最终得到的 w j w_j wj 越小。
r
e
p
e
a
t
{
t
m
p
_
w
1
=
w
1
−
α
1
m
∑
i
=
1
m
[
f
w
⃗
,
b
(
x
⃗
(
i
)
)
−
y
(
i
)
]
x
1
(
i
)
+
λ
m
w
1
t
m
p
_
w
2
=
w
2
−
α
1
m
∑
i
=
1
m
[
f
w
⃗
,
b
(
x
⃗
(
i
)
)
−
y
(
i
)
]
x
2
(
i
)
+
λ
m
w
2
.
.
.
t
m
p
_
w
n
=
w
n
−
α
1
m
∑
i
=
1
m
[
f
w
⃗
,
b
(
x
⃗
(
i
)
)
−
y
(
i
)
]
x
n
(
i
)
+
λ
m
w
n
t
m
p
_
b
=
b
−
α
1
m
∑
i
=
1
m
[
f
w
⃗
,
b
(
x
⃗
(
i
)
)
−
y
(
i
)
]
s
i
m
u
l
t
a
n
e
o
u
s
u
p
d
a
t
e
e
v
e
r
y
p
a
r
a
m
e
t
e
r
s
}
u
n
t
i
l
c
o
n
v
e
r
g
e
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。