赞
踩
from xml.etree.ElementTree import tostring import numpy as np import pandas as pd import matplotlib.pyplot as plt # 设置随机种子,以确保结果可重复 np.random.seed(0) # 生成自变量 x,假设范围是 0 到 100,共有 100 个数据点 x = np.random.uniform(0, 100, 100) # 假设因变量 y 与 x 之间存在线性关系,加上一些噪声 # y = 3 * x + 5 + 噪声,噪声服从均值为 0,标准差为 10 的正态分布 noise = np.random.normal(0, 10, 100) y = 3 * x + 5 + noise # 创建 DataFrame 存储数据 data = pd.DataFrame({'X': x, 'Y': y}) # 保存数据到 CSV 文件 data.to_csv('linear_regression_data.csv', index=False) data.insert(0,'ones',1) # 绘制散点图 plt.scatter(x, y) plt.title('Scatter Plot of Linear Regression Data') plt.xlabel('X') plt.ylabel('Y') plt.grid(True) plt.show() #数据处理 X = data.iloc[:,0:-1] X.head() X = X.values X.shape y = data.iloc[:,-1] y.head() y = y.values y.shape y = y.reshape(100,1) y.shape #计算J(Θ)的值 def cost_func(X,y,theta): inner=np.power(X@theta - y,2) return np.sum(inner)/(2*len(X)) #随机初始值 theta=np.zeros((2,1)) print(theta) cost0=cost_func(X,y,theta) print("cost0:") print(cost0) #学习率和学习论述 alpha=0.000001 count=1000000 #梯度下降算法 def gradient_Abscent(X,y,alpha,count): global theta costs=[] for i in range(count): #theta = theta-(X.T @(X @ theta - y))*alpha/len(X) theta = theta - (X.T @ (X @ theta - y)) * alpha / len(X) nowcost=cost_func(X,y,theta) costs.append(nowcost) if i%100==0: print(nowcost) return theta,costs theta_ans,cost_ans=gradient_Abscent(X,y,alpha,count) #代价函数可视化 fig,ax = plt.subplots() ax.plot(np.arange(count),cost_ans) ax.set(xlabel = 'count',ylabel = 'cost') plt.show() # 拟合函数可视化 x = np.linspace(y.min(), y.max(), 100) # 网格数据 y_ = theta_ans[0, 0] + theta_ans[1, 0] * x # 取theta第一行第一个和第二行第一个 print("b:") print(theta_ans[0, 0]) print("k:") print(theta_ans[1, 0]) fig, ax = plt.subplots() ax.scatter(X[:, 1], y, label='training') # 绘制数据集散点图取x所有行,第2列population ax.plot(x, y_, 'r', label='predict') # 绘制预测后的直线 ax.legend() ax.set(xlabel='population', ylabel='profit') plt.show()
import numpy as np import pandas as pd from matplotlib import pyplot as plt from sklearn.model_selection import train_test_split np.random.seed(0) # 设置数据集大小 num_samples = 1000 # 生成特征数据 area = np.random.normal(loc=1500, scale=300, size=num_samples) # 房屋面积,均值为1500,标准差为300 year = np.random.randint(1950, 2023, size=num_samples) # 房屋年份,范围在1950年至2022年之间 num_rooms = np.random.randint(2, 6, size=num_samples) # 房间数量,范围在2至5之间 # 生成目标变量数据(房价),假设线性关系为 price = 100 * area + 500 * year - 300 * num_rooms + noise noise = np.random.normal(loc=0, scale=10000, size=num_samples) # 添加噪声 price = 100 * area + 500 * (2022 - year) - 300 * num_rooms + noise # 定义Z-score标准化函数 def z_score_normalization(feature): mean = np.mean(feature) std = np.std(feature) normalized_feature = (feature - mean) / std return normalized_feature # 对每个特征进行Z-score标准化 area_normalized = z_score_normalization(area) year_normalized = z_score_normalization(year) num_rooms_normalized = z_score_normalization(num_rooms) # 输出标准化后的特征数据 print("Normalized Area:", area_normalized) print("Normalized Year:", year_normalized) print("Normalized Number of Rooms:", num_rooms_normalized) # 创建 DataFrame 对象 data = pd.DataFrame({ 'Area': area_normalized, 'Year': year_normalized, 'NumRooms': num_rooms_normalized, 'Price': price }) # 保存数据集到文件 data.to_csv('linear_regression_data1.csv', index=False) data.insert(0,'ones',1) #x0=1 # 数据处理 X = data.iloc[:, :-1].values # 特征矩阵 y = data.iloc[:, -1].values.reshape(-1, 1) # 目标变量列 # 数据集分割为训练集和测试集(70%训练,30%测试) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) #计算J(Θ)的值 def cost_func(X,y,theta): #inner=np.power(X @ theta - y,2) # print("X shape:", X.shape) # print("y shape:", y.shape) # print("theta shape:", theta.shape) inner = (X @ theta - y) ** 2 return np.sum(inner)/(2*len(X)) # 随机初始值 theta = np.zeros((X.shape[1], 1)) # 初始化参数 #学习率和学习论述 alpha=0.0001 count=1000000 #梯度下降算法 def gradient_Abscent(X,y,alpha,count): global theta costs=[] for i in range(count): # print("X shape:", X.shape) # print("y shape:", y.shape) # print("theta shape:", theta.shape) #theta = theta-(X.T @(X @ theta - y))*alpha/len(X) theta = theta - (X.T @ (X @ theta - y)) * alpha / len(X) nowcost=cost_func(X,y,theta) costs.append(nowcost) if i%100==0: print(nowcost) return theta,costs theta_ans, cost_ans = gradient_Abscent(X_train, y_train, alpha, count) #代价函数可视化 fig,ax = plt.subplots() ax.plot(np.arange(count),cost_ans) ax.set(xlabel = 'count',ylabel = 'cost') plt.show() print("θ0:") print(theta_ans[0, 0]) print("θ1:") print(theta_ans[1, 0]) print("θ2:") print(theta_ans[2, 0]) print("θ3:") print(theta_ans[3, 0]) # 使用测试集评估模型性能 test_cost = cost_func(X_test, y_test, theta_ans) print("测试集上的代价函数值:", test_cost)
import numpy as np def sigmoid(x): return 1 / (1 + np.exp(-x)) def bce_loss(pred, target): """ 计算误差 :param pred: 预测 :param target: ground truth :return: 损失序列 """ return -np.mean(target * np.log(pred) + (1-target) * np.log(1-pred)) class LogisticRegression: """ Logistic回归类 """ def __init__(self, x, y, val_x, val_y, epoch=100, lr=0.1, normalize=True, regularize=None, scale=0, show=True): """ 初始化 :param x: 样本, (sample_number, dimension) :param y: 标签, (sample_numer, 1) :param epoch: 训练迭代次数 :param lr: 学习率 """ self.theta = None self.loss = [] self.val_loss = [] self.n = x.shape[0] self.d = x.shape[1] self.epoch = epoch self.lr = lr t = np.ones(shape=(self.n, 1)) self.normalize = normalize if self.normalize: self.x_std = x.std(axis=0) self.x_mean = x.mean(axis=0) self.y_mean = y.mean(axis=0) self.y_std = y.std(axis=0) x = (x - self.x_mean) / self.x_std self.y = y self.x = np.concatenate((t, x), axis=1) # self.val_x = (val_x - val_x.mean(axis=0)) / val_x.std(axis=0) self.val_x = val_x self.val_y = val_y self.regularize = regularize self.scale = scale self.show = show def init_theta(self): """ 初始化参数 :return: theta (1, d+1) """ self.theta = np.zeros(shape=(1, self.d + 1)) def gradient_decent(self, pred): """ 实现梯度下降求解 """ # error (n,1) error = pred - self.y # term (d+1, 1) term = np.matmul(self.x.T, error) # term (1,d+1) term = term.T if self.regularize == "L2": re = self.scale / self.n * self.theta[0, 1:] re = np.expand_dims(np.array(re), axis=0) re = np.concatenate((np.array([[0]]), re), axis=1) # re [0,...] (1,d+1) self.theta = self.theta - self.lr * (term / self.n + re) # update parameters else: self.theta = self.theta - self.lr * (term / self.n) def validation(self, x, y): if self.normalize: x = (x - x.mean(axis=0)) / x.std(axis=0) outputs = self.get_prob(x) curr_loss = bce_loss(outputs, y) if self.regularize == "L2": curr_loss += self.scale / self.n * np.sum(self.theta[0, 1:] ** 2) self.val_loss.append(curr_loss) predicted = np.expand_dims(np.where(outputs[:, 0] > 0.5, 1, 0), axis=1) count = np.sum(predicted == y) if self.show: print("Accuracy on Val set: {:.2f}%\tLoss on Val set: {:.4f}".format(count / y.shape[0] * 100, curr_loss)) def test(self, x, y): outputs = self.get_prob(x) predicted = np.expand_dims(np.where(outputs[:, 0] > 0.5, 1, 0), axis=1) count = np.sum(predicted == y) # print("Accuracy on Test set: {:.2f}%".format(count / y.shape[0] * 100)) # curr_loss = bce_loss(outputs, y) # if self.regularize == "L2": # curr_loss += self.scale / self.n * np.sum(self.theta[0, 1:] ** 2) return count / y.shape[0] # , curr_loss def train(self): """ 训练Logistic回归 :return: 参数矩阵theta (1,d+1); 损失序列 loss """ self.init_theta() for i in range(self.epoch): # pred (1,n); theta (1,d+1); self.x.T (d+1, n) z = np.matmul(self.theta, self.x.T).T # pred (n,1) pred = sigmoid(z) curr_loss = bce_loss(pred, self.y) if self.regularize == "L2": curr_loss += self.scale / self.n * np.sum(self.theta[0, 1:] ** 2) self.loss.append(curr_loss) self.gradient_decent(pred) if self.show: print("Epoch: {}/{}, Train Loss: {:.4f}".format(i + 1, self.epoch, curr_loss)) self.validation(self.val_x, self.val_y) if self.normalize: y_mean = np.mean(z, axis=0) self.theta[0, 1:] = self.theta[0, 1:] / self.x_std.T self.theta[0, 0] = y_mean - np.dot(self.theta[0, 1:], self.x_mean.T) return self.theta, self.loss, self.val_loss def get_prob(self, x): """ 回归预测 :param x: 输入样本 (n,d) :return: 预测结果 (n,1) """ t = np.ones(shape=(x.shape[0], 1)) x = np.concatenate((t, x), axis=1) pred = sigmoid(np.matmul(self.theta, x.T)) return pred.T def get_inner_product(self, x): t = np.ones(shape=(x.shape[0], 1)) x = np.concatenate((t, x), axis=1) return np.matmul(self.theta, x.T) def predict(self, x): prob = self.get_prob(x) return np.expand_dims(np.where(prob[:, 0] > 0.5, 1, 0), axis=1)
公式与思路得推导如下图:
代码如下:
import copy import numpy as np import matplotlib as mpl import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelBinarizer """ mse:使用均方误差 ce:将多分类问题转化为多个二分类问题 soft:多分类交叉熵,最后一层输出后再使用softmax函数 """ mse_list = [] ce_list = [] soft_list = [] #激活函数 def sigmoid(x): return 1/(1+np.exp(-x)) #激活函数求导 def sigmoid_derivative(x): return x*(1-x) #计算均方误差 def mse_function(y,pred_y): mse=(np.sum(pow((pred_y-y),2))/len(pred_y))/2 return mse #多分类问题转化为多个二分类问题时,计算交叉熵 def ce_function(y,pred_y): cross_entropy=-np.sum(y*np.log(pred_y)+(1-y)*np.log(1-pred_y)) return cross_entropy #计算多分类交叉熵 def soft_function(y,pred_y): pred_y = softmax(pred_y) soft_y = -np.sum(y * np.log(pred_y)) return soft_y def softmax(pred_y): denominator = np.sum(np.exp(pred_y)) pred_y = np.exp(pred_y) / denominator return pred_y class NeuralNetwork: def __init__(self, layer, times, alpha, epsilon): self.layer = layer self.times = times self.alpha = alpha # 初始化隐藏层和输出层的权重 self.ce_weights = [] # (100, 65) (10, 101) for tier in range(len(layer) - 1): self.ce_weights.append(np.random.rand(layer[tier + 1], layer[tier] + 1) * 2 * epsilon - epsilon) self.mse_weights = copy.deepcopy(self.ce_weights) self.soft_weights = copy.deepcopy(self.ce_weights) # 前向传播,得到每层神经元的输出 def forward_propagation(self, feature_one, for_weights): activators = [feature_one.reshape(1, -1)] # 激活项,输入层的激活项即为X (1*65) for forward_layer in range(len(for_weights)): activator = sigmoid(np.dot(activators[forward_layer], for_weights[forward_layer].T)) #a3=sigmoid(z3)=sigmoid(a2*theta2) if forward_layer < len(for_weights) - 1: activator = np.append(np.array([1]), activator) # 1*101 #如果当前层不是输出层,添加一个额外的神经元作为偏置单元,其输出固定为 1 # activators:(1, 65) (1, 101) (1, 10) activators.append(activator.reshape(1, -1)) return activators # 使用交叉熵函数进行反向传播 def ce_back_propagation(self, activators, target_one, back_weights): # 反向计算该样本各层神经元误差 δ deltas deltas_error = [0 for _ in range(len(back_weights))] error = target_one - activators[-1] #δ=aL-y,此处取得是-δ不影响,在最后的梯度下降处+变-,-变+即可 deltas = [error] for j in range(len(back_weights) - 1, 0, -1): delta = np.dot(back_weights[j].T, deltas[-1].T).T * sigmoid_derivative(activators[j]) deltas.append(delta) deltas.reverse() # deltas:(1, 101) (1, 10) # 计算deltas_error △ for j in range(len(back_weights)): #de_error = np.dot(deltas[j].reshape(-1, 1), activators[j]) de_error = np.dot(deltas[j].reshape(-1, 1), activators[j]) if j < len(back_weights) - 1: de_err = de_error[1:] else: de_err = de_error deltas_error[j] = de_err return deltas_error # 更新参数 def update_parameters(self, deltas_error_list, up_weights, data_num, gamma): for ly in range(len(up_weights)): # 除偏置神经元外,其余神经元加上正则项 deltas_error_list_regular = deltas_error_list[ly][:, 1:] #去除了每层第一个元素(偏置项的误差)的误差梯度Δ,因为偏置项不受正则化影响。 weights_regular = up_weights[ly][:, 1:] #去除了每层第一列(偏置项的权重)的权重矩阵,因为偏置项不参与正则化计算。 d_part = deltas_error_list_regular / data_num + gamma * weights_regular #计算了正则化项的部分,将误差梯度除以样本数量 data_num,并加上了正则化系数 gamma 乘以权重矩阵。 # test_der = deltas_error_list[ly][:, 0]/data_num der = np.hstack( (((deltas_error_list[ly][:, 0].reshape(-1, 1)) / data_num), d_part)) up_weights[ly] = up_weights[ly] + self.alpha * der #此处原本应该是theta=theta-Δ,但在一开始时δ用的-δ,因此-变+ return up_weights def fit(self, feature, target, gamma, batch_num): feature_x0 = np.ones((np.shape(feature)[0], 1)) feature_x = np.hstack((feature_x0, feature)) # 维度(1257, 65) m = len(feature_x) iters = 0 # 第一次前向传播和反向传播,使用全部样本,更新参数 ce_error = 0 ce_deltas_error_list = np.array( [0 for _ in range(len(self.ce_weights))]) for i in range(m): # 前向传播,得到每一层神经元的激活值,即输出,并得到第一次前向传播的ce值 activators = self.forward_propagation( feature_x[i], self.mse_weights) ce = ce_function(target[i], activators[-1]) ce_error = ce_error + ce #得J(theta) # 反向传播 # 使用交叉熵误差时的反向传播,得到deltas_error △,并更新 ce_deltas_error = self.ce_back_propagation(activators, target[i], self.ce_weights) ce_deltas_error_list = ce_deltas_error_list + ce_deltas_error # if i % 100 == 0: # print(i) # 记录第一次前向传播后,权重未更新时,加上正则项后的ce的误差 ce_regu = [] for w in range(len(self.ce_weights)): ce_weights_re = np.sum(np.power(self.ce_weights[w], 2)) ce_regu.append(ce_weights_re) ce_regular = (ce_error + gamma * np.sum(ce_regu) / 2) / m ce_list.append(ce_regular) # 根据反向传播的结果,更新各层参数 ce_weights = self.update_parameters( ce_deltas_error_list, self.ce_weights, m, gamma) self.ce_weights = ce_weights # 记录第一次反向传播,权重更新后,加上正则项后,所有样本的ce的误差 self.compute_error(feature_x, target, m, gamma) # 小批量更新 print("batch----------------------------------------------------------") while iters < self.times: rand_index = np.random.randint(0, m, size=(1, batch_num))[0] feature_batch = feature_x[rand_index] target_batch = target[rand_index] ce_error = 0 ce_deltas_error_list = np.array( [0 for _ in range(len(self.ce_weights))]) for i in range(batch_num): # 前向传播,得到每一层神经元的激活值,即输出,并得到前向传播的mse和ce值 ce_activators = self.forward_propagation( feature_batch[i], self.ce_weights) ce = ce_function(target[i], ce_activators[-1]) ce_error = ce_error + ce # print(iters) # print("mse_activators:", mse_activators) # print("ce_activators:", ce_activators) # print("soft_activators:", soft_activators) # 反向传播 # 使用交叉熵误差时的反向传播,得到deltas_error △,并更新 ce_deltas_error = self.ce_back_propagation( ce_activators, target_batch[i], self.ce_weights) ce_deltas_error_list = ce_deltas_error_list + ce_deltas_error # 根据反向传播的结果,更新各层参数 ce_weights = self.update_parameters( ce_deltas_error_list, self.ce_weights, batch_num, gamma) self.ce_weights = ce_weights # 记录此次反向传播,权重更新后,加上正则项后,所有样本的mse、ce、使用soft和交叉熵的误差 self.compute_error(feature_x, target, m, gamma) iters += 1 if iters % 500 == 0: print(iters) return self.ce_weights # 进行预测 def predict(feature, target, target_lb, mse_w, ce_w, soft_w, gamma): feature_x0 = np.ones((np.shape(feature)[0], 1)) feature_x = np.hstack((feature_x0, feature)) # 维度(540, 65) data_num = len(feature_x) ce_error = 0 ce_predict_value_list = [] for i in range(data_num): ce_activators = nn.forward_propagation(feature_x[i], ce_w) # 计算数字形式的预测输出,用于之后计算准确率 ce_pred = ce_activators[-1] ce_index_value = np.argmax(ce_pred) ce_predict_value_list.append(ce_index_value) # 计算均方误差和交叉熵 ce = ce_function(target_lb[i], ce_activators[-1]) ce_error = ce_error + ce # 计算加上正则项后的mse、ce、使用soft和交叉熵的误差 ce_regu = [] for w in range(len(ce_weights)): ce_weights_re = np.sum(np.power(ce_w[w], 2)) ce_regu.append(ce_weights_re) ce_regular = (ce_error + gamma * np.sum(ce_regu) / 2) / data_num # 计算准确率 ce_judge = np.array(ce_predict_value_list) == np.array(target) ce_prec = np.sum(ce_judge) / len(ce_judge) print(ce_judge,sep="\n") return ce_regular, ce_prec def plot(mse, ce, soft): # 设置matplotlib 支持中文显示 mpl.rcParams['font.family'] = 'SimHei' # 设置字体为黑体 mpl.rcParams['axes.unicode_minus'] = False # 设置在中文字体是能够正常显示负号(“-”) # plt.figure(figsize=(20, 20)) # plt.plot(mse_re, lw=1, c='red', marker='s', ms=4, label="均方误差") # plt.plot(ce_re, lw=1, c='green', marker='o', ms=4, label="二分类交叉熵") # plt.plot(soft_re, lw=1, c='yellow', marker='^', ms=4, label="多分类交叉熵") plt.figure() # 绘制误差值 ce_re_part = [ce[i - 1] for i in range(1, len(ce)) if i % 50 == 0] ce_re_part.insert(0, ce[0]) x_data = [i for i in range(len(soft)) if i % 50 == 0] plt.plot(x_data, ce_re_part, lw=1, c='green', marker='o', ms=4, label="二分类交叉熵") plt.xlabel("迭代次数") plt.ylabel("误差") plt.title("手写字预测-神经网络") plt.legend() plt.show() if __name__ == "__main__": #加载手写数字数据集 digits = datasets.load_digits() #对数据进行归一化处理 range_value = np.max(digits.data) - np.min(digits.data) data = (digits.data - np.min(digits.data)) / range_value #将数据集划分为训练集和测试集 train_feature, test_feature, train_target, test_target = train_test_split(data, digits.target, test_size=0.3) train_target_lb = LabelBinarizer().fit_transform(train_target) test_target_lb = LabelBinarizer().fit_transform(test_target) layer = [64, 100, 10] times = 8000 # 迭代次数 alphas = 0.02 # 迭代步长 epsilon = 1 # 初始化权重的范围[-epsilon, epsilon] nn = NeuralNetwork(layer, times, alphas, epsilon) # 初始化一个三层的神经网络 gamma = 0.0001 # 正则化系数 batch_num = 20 ce_weights = nn.fit(train_feature, train_target_lb, gamma, batch_num) # print(mse_list, ce_list, soft_list, sep="\n") ce_re, ce_precision = predict(test_feature, test_target, test_target_lb,ce_weights, gamma) print("ce_re:{0}".format(ce_re), sep="\n") print("ce_precision:{0}".format(ce_precision),sep="\n") plot(ce_list)
tips:还有点小bug,应该是矩阵运算得形状没对上
待完成
待完成
import numpy as np import pandas as pd import matplotlib.pyplot as plt import random dataset = pd.read_csv('watermelon.csv', delimiter=",") data = dataset.values print(dataset) def distance(x1, x2): # 计算距离 return sum((x1 - x2) ** 2) def Kmeans(D, K, maxIter): m, n = np.shape(D) if K >= m: return D initSet = set() curK = K while (curK > 0): # 随机选取k个样本 randomInt = random.randint(0, m - 1) if randomInt not in initSet: curK -= 1 initSet.add(randomInt) U = D[list(initSet), :] # 均值向量,即质心 C = np.zeros(m) curIter = maxIter # 最大的迭代次数 while curIter > 0: curIter -= 1 # 计算样本到各均值向量的距离 for i in range(m): p = 0 minDistance = distance(D[i], U[0]) for j in range(1, K): if distance(D[i], U[j]) < minDistance: p = j minDistance = distance(D[i], U[j]) C[i] = p newU = np.zeros((K, n)) cnt = np.zeros(K) for i in range(m): newU[int(C[i])] += D[i] cnt[int(C[i])] += 1 changed = 0 # 判断质心是否发生变化,如果发生变化则继续迭代,否则结束 for i in range(K): newU[i] /= cnt[i] for j in range(n): if U[i, j] != newU[i, j]: changed = 1 U[i, j] = newU[i, j] if changed == 0: return U, C, maxIter - curIter return U, C, maxIter - curIter U, C, iter = Kmeans(data, 3, 20) f1 = plt.figure(1) plt.title('watermelon') plt.xlabel('density') plt.ylabel('ratio') plt.scatter(data[:, 0], data[:, 1], marker='o', color='g', s=50) plt.scatter(U[:, 0], U[:, 1], marker='o', color='r', s=100) m, n = np.shape(data) for i in range(m): plt.plot([data[i, 0], U[int(C[i]), 0]], [data[i, 1], U[int(C[i]), 1]], "c--", linewidth=0.3) plt.show()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。