当前位置:   article > 正文

笔记-自己使用pytorch实现的GRU_torch gru 实现

torch gru 实现
  1. import torch
  2. import Config
  3. import torch.nn as nn
  4. from torch.utils.data import DataLoader, TensorDataset
  5. import pickle
  6. import numpy as np
  7. from sklearn import metrics
  8. import math
  9. batch_size = 256 # 一批数据有多少条
  10. input_size = 63 # 输入的维度
  11. hidden_size = 32 # GRU隐藏层的维度
  12. workers = 2 # 用几个进程加载数据
  13. learning_rate = 1e-2 # 学习率,学习率越高梯度下降的越快
  14. epochs = 50 # 总共训练多少轮
  15. device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu") # 数据放到gpu上还是cpu上
  16. datatype = "mm3"
  17. save_model_dir = 'xxx' + datatype + '.pth' # 模型保存的路径
  18. testepoch = 10
  19. def get_data():
  20. # 读取数据集,这里输入数据格式是(totalsize,timestep,features)
  21. x_torch = pickle.load(open('dataset/x.p', 'rb'))
  22. y_torch = pickle.load(open('dataset/y.p', 'rb'))
  23. print(x_torch.shape)
  24. print(y_torch.shape)
  25. # 划分训练集验证集和测试集,8:1:1
  26. train_ratio = 0.8
  27. valid_ratio = 0.1
  28. test_ratio = 0.1
  29. N = len(x_torch)
  30. training_x = x_torch[: int(train_ratio * N)]
  31. validing_x = x_torch[int(train_ratio * N): int((train_ratio + valid_ratio) * N)]
  32. testing_x = x_torch[int((train_ratio + valid_ratio) * N):]
  33. training_y = y_torch[: int(train_ratio * N)]
  34. validing_y = y_torch[int(train_ratio * N): int((train_ratio + valid_ratio) * N)]
  35. testing_y = y_torch[int((train_ratio + valid_ratio) * N):]
  36. train_deal_dataset = TensorDataset(training_x, training_y)
  37. test_deal_dataset = TensorDataset(testing_x, testing_y)
  38. valid_deal_dataset = TensorDataset(validing_x, validing_y)
  39. train_loader = DataLoader(dataset=train_deal_dataset,
  40. batch_size=batch_size,
  41. shuffle=True,
  42. drop_last=True,
  43. num_workers=workers)
  44. test_loader = DataLoader(dataset=test_deal_dataset,
  45. batch_size=batch_size,
  46. shuffle=True,
  47. drop_last=True,
  48. num_workers=workers)
  49. valid_loader = DataLoader(dataset=valid_deal_dataset,
  50. batch_size=batch_size,
  51. shuffle=True,
  52. drop_last=True,
  53. num_workers=workers)
  54. return train_loader, test_loader, valid_loader
  55. # 这是官方的GRU
  56. class GRUofficial(nn.Module):
  57. def __init__(self, input_size, hidden_size, output_size):
  58. super(GRUofficial, self).__init__()
  59. self.input_size = input_size
  60. self.hidden_size = hidden_size
  61. # 指定输入维度,隐藏层维度,GRU层数,是否把batchsize放到第一个维度,是否是双向RNN
  62. self.GRUofficialAPI = nn.GRU(input_size=input_size,
  63. hidden_size=hidden_size,
  64. num_layers=1,
  65. batch_first=True,
  66. bidirectional=False)
  67. self.outlinear = nn.Sequential(
  68. nn.Linear(hidden_size, output_size),
  69. nn.Sigmoid()
  70. ) # 输出层
  71. def forward(self, input):
  72. # 把数据放到gpu上,转成float类型
  73. input = input.to(device).float() # batchsize,seqlen,inputsize
  74. out, h = self.GRUofficialAPI(input)
  75. '''
  76. out第一个维度是几个样本,batch
  77. 第二个维度是几个时间步,一个时间步出一个h
  78. 第三个维度是每个hstate的维数,就是numdirections*hiddensize
  79. 说白了out就是把每次rnncell输出的hstate打包在了一起
  80. out[0][0]是第一个样本第一个时间步的rnncell输出的hstate
  81. out[0][1]是第一个样本第二个时间步的rnncell输出的hstate
  82. ht的维度[num_layers * num_directions, batch_size, hidden_size],
  83. 如果是单向单层的GRU那么一个样本只有一个hidden,即,ht的维度为[1, batch_size, hidden_size]
  84. '''
  85. # 因为ht的维度为[1, batch_size, hidden_size],我们想把那个1去掉,就用squeeze函数
  86. # 想在某个维度扩充就用unsqueeze,比如h的维度是[A,B],我们想让它变成[A,1,B],在第二个维度扩充,就写h.unsqueeze(dim=1)(dim从0开始算)
  87. output = self.outlinear(h.squeeze()) # 最后一个单元的输出,接一个带有Sigmoid激活函数的线性层,因为我们的任务是分类任务
  88. # print(output.shape) #output矩阵的形状现在是(batchsize,outputsize)
  89. return output
  90. # 这是自己实现的GRU
  91. class GRU(nn.Module):
  92. def __init__(self, input_size, hidden_size, output_size):
  93. super(GRU, self).__init__()
  94. self.input_size = input_size
  95. self.hidden_size = hidden_size
  96. # 设置可以训练的参数矩阵
  97. self.w_xr = torch.nn.Parameter(torch.Tensor(input_size, hidden_size))
  98. self.w_hr = torch.nn.Parameter(torch.Tensor(hidden_size, hidden_size))
  99. self.w_xz = torch.nn.Parameter(torch.Tensor(input_size, hidden_size))
  100. self.w_hz = torch.nn.Parameter(torch.Tensor(hidden_size, hidden_size))
  101. self.w_xh = torch.nn.Parameter(torch.Tensor(input_size, hidden_size))
  102. self.w_hh = torch.nn.Parameter(torch.Tensor(hidden_size, hidden_size))
  103. self.b_r = torch.nn.Parameter(torch.Tensor(hidden_size))
  104. self.b_z = torch.nn.Parameter(torch.Tensor(hidden_size))
  105. self.b_h = torch.nn.Parameter(torch.Tensor(hidden_size))
  106. self.outlinear = nn.Sequential(
  107. nn.Linear(hidden_size, output_size),
  108. nn.Sigmoid()
  109. ) # 输出层
  110. self.reset_parameters() # 初始化参数
  111. def reset_parameters(self):
  112. stdv = 1.0 / math.sqrt(self.hidden_size)
  113. for weight in self.parameters():
  114. torch.nn.init.uniform_(weight, -stdv, stdv)
  115. def forward(self, input):
  116. input = input.to(device).float() # (batchsize,seqlen,inputsize)
  117. batch_size = input.size(0) # 一个batch的大小
  118. step_size = input.size(1) # 时间步
  119. # 初始化隐藏状态矩阵h为零矩阵
  120. h = torch.zeros(batch_size, self.hidden_size).to(device)
  121. # 这里面存放每一个时间步出来的h
  122. lisths = []
  123. # 一个时间步一个时间步的计算
  124. for i in range(step_size):
  125. # 取input每个时间步的数据
  126. x = input[:, i, :]
  127. # --------------------------------GRU核心公式-----------------------------------
  128. # x形状是(batchsize,inputsize),w_xz矩阵形状是(inputsize,hiddensize)
  129. # torch.mm是矩阵乘法,这样(torch.mm(x,self.w_xz)的形状是(batchsize,hiddensize)
  130. z = torch.sigmoid((torch.mm(x, self.w_xz) + torch.mm(h, self.w_hz) + self.b_z))
  131. r = torch.sigmoid((torch.mm(x, self.w_xr) + torch.mm(h, self.w_hr) + self.b_r))
  132. h_tilde = torch.tanh((torch.mm(x, self.w_xh) + torch.mm(r * h, self.w_hh) + self.b_h))
  133. h = (1 - z) * h + z * h_tilde
  134. # --------------------------------GRU核心公式-----------------------------------
  135. # h的形状是(batch_size,hidden_size)
  136. # 把每个时间步出来的h都存到list里
  137. lisths.append(h)
  138. # 用torch.stack把装有tensor的list转为torch.tensor类型,dim=1是指从第二个维度转化,因为seqlen在第二个维度上
  139. # 所以hs的形状是(batchsize,seqlen,hiddensize)
  140. hs = torch.stack(lisths, dim=1) # 全部cell所计算的隐藏状态的集合
  141. # 此时的h是最后一个时间步计算出来的h,可以用这个作为最后的输出
  142. output = self.outlinear(h) # 最后一个单元的输出,接一个带有Sigmoid激活函数的线性层,因为我们的任务是分类任务
  143. # output矩阵的形状现在是(batchsize,outputsize)
  144. return output
  145. model = GRUofficial(input_size, hidden_size,1)
  146. #model = GRU(input_size, hidden_size, 1)
  147. # 把模型放到gpu上
  148. model.to(device)
  149. # 指定优化器为adam
  150. #optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
  151. def train_model(model, train_loader, valid_loader):
  152. optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
  153. # 设置loss的数组
  154. train_loss_array = []
  155. # 是否提前停止,提前停止的代码在Config里
  156. # 提前停止就是训练的过程中,如果验证集的loss不再下降,就停止训练了
  157. Early_stopping = Config.EarlyStopping()
  158. for epoch in range(epochs):
  159. device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
  160. # 如果模型中有BN层(Batch Normalization)和Dropout,需要在训练时添加model.train()。
  161. # model.train()是保证BN层能够用到每一批数据的均值和方差。
  162. # 对于Dropout,model.train()是随机取一部分网络连接来训练更新参数。
  163. model.train()
  164. for i, data in enumerate(train_loader):
  165. # 这里的i是所有的traindata根据一个batchsize分成的总数
  166. # i是第i个batchsize
  167. # 从data中取出输入和标签,然后都放到gpu上
  168. inputs, labels = data
  169. inputs = inputs.to(device)
  170. labels = labels.to(device)
  171. labels = labels.float()
  172. # 前向传播
  173. out = model(inputs) # (256,1)
  174. out = out.to(device).squeeze() # (256,1)->(256)
  175. # 二分类任务loss函数使用binary cross entropy,BCE
  176. lossF = torch.nn.BCELoss(size_average=True).to(device)
  177. # 得到loss分数
  178. batch_loss = lossF(out, labels)
  179. # 反向传播
  180. optimizer.zero_grad()
  181. batch_loss.backward(retain_graph=True)
  182. optimizer.step()
  183. # 每四个epoch把学习率降为原来的一半,防止步长太大反复横跳无法收敛到最优解
  184. if epoch % 4 == 0:
  185. for p in optimizer.param_groups:
  186. p['lr'] *= 0.5
  187. if (epoch + 1) % 1 == 0: # 每 1 次输出结果
  188. print('Epoch: {}, Train Loss: {}'.format(epoch + 1, batch_loss.detach().data))
  189. train_loss_array.append(batch_loss.detach().data)
  190. # 每个epoch都在验证集上过一遍
  191. device = torch.device("cpu")
  192. # 如果模型中有BN层(Batch Normalization)和Dropout,在测试/验证时添加model.eval()。
  193. # model.eval()是保证BN层能够用全部训练数据的均值和方差,即测试/验证过程中要保证BN层的均值和方差不变。
  194. # 对于Dropout,model.eval()是利用到了所有网络连接,即不进行随机舍弃神经元。
  195. model.eval()
  196. valid_losses = []
  197. for i, data in enumerate(valid_loader):
  198. inputs, labels = data
  199. inputs = inputs.to(device)
  200. labels = labels.to(device)
  201. labels = labels.float()
  202. # 前向传播
  203. out = model(inputs)
  204. out = out.to(device).squeeze()
  205. lossF = torch.nn.BCELoss(size_average=True).to(device)
  206. batch_loss = lossF(out, labels)
  207. # 验证集就没有反向传播了
  208. valid_losses.append(batch_loss.detach().data)
  209. valid_loss = np.average(valid_losses)
  210. print('Epoch: {}, Valid Loss: {}'.format(epoch + 1, valid_loss))
  211. state = {'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': epoch, 'percent':123, 'result':444}
  212. Early_stopping(valid_loss, model, state, save_model_dir)
  213. # 如果满足条件就提前停止,我这里设置的条件是经过四个epoch 验证集loss都不下降
  214. if Early_stopping.early_stop:
  215. print("Early stopping")
  216. break
  217. def test_model(model, test_loader):
  218. device = torch.device("cpu")
  219. # 切换成验证模式,这个模式下模型参数被固定,不会再更新
  220. model.eval()
  221. test_loss_array = []
  222. # 把模型的输出outs和标签都放到list里,之后计算auroc和auprc要用
  223. outs = list()
  224. labelss = list()
  225. with torch.no_grad():
  226. for i, data in enumerate(test_loader):
  227. inputs, labels = data
  228. inputs = inputs.to(device)
  229. labels = labels.to(device)
  230. labels = labels.float()
  231. # print(inputs.shape)
  232. # 前向传播
  233. out = model(inputs)
  234. out = out.to(device).squeeze()
  235. lossF = torch.nn.BCELoss(size_average=True).to(device)
  236. batch_loss = lossF(out, labels)
  237. outs.extend(list(out.numpy()))
  238. labelss.extend(list(labels.numpy()))
  239. print('Test loss:{}'.format(float(batch_loss.data)))
  240. test_loss_array.append(float(batch_loss.data))
  241. # 转成numpy.array类型
  242. outs = np.array(outs)
  243. labelss = np.array(labelss)
  244. auroc = metrics.roc_auc_score(labelss, outs)
  245. (precisions, recalls, thresholds) = metrics.precision_recall_curve(labelss, outs)
  246. auprc = metrics.auc(recalls, precisions)
  247. return auroc, auprc
  248. def main():
  249. # 取数据
  250. train_loader, test_loader, valid_loader = get_data()
  251. # 训练模型
  252. train_model(model, train_loader, valid_loader)
  253. # 加载保存的模型
  254. checkpoint = torch.load(save_model_dir)
  255. model.load_state_dict(checkpoint['model'])
  256. aurocs = []
  257. auprcs = []
  258. for i in range(testepoch):
  259. train_loader, test_loader, valid_loader = get_data()
  260. auroc, auprc = test_model(model, test_loader)
  261. aurocs.append(auroc)
  262. auprcs.append(auprc)
  263. auroc_mean = np.mean(aurocs)
  264. auroc_std = np.std(aurocs, ddof=1)
  265. auprc_mean = np.mean(auprcs)
  266. auprc_std = np.std(auprcs, ddof=1)
  267. print("auroc 平均值为:" + str(auroc_mean) + " 标准差为:" + str(auroc_std))
  268. print("auprc 平均值为:" + str(auprc_mean) + " 标准差为:" + str(auprc_std))
  269. return
  270. if __name__ == '__main__':
  271. main()

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/Cpp五条/article/detail/101465
推荐阅读
相关标签
  

闽ICP备14008679号