赞
踩
import torch
torch.empty(5,3) # 返回未初始化张量(矩阵)
torch.rand(5,3) # 随机生成服从均匀分布的数据,返回值为张量。
torch.zeros(5,3,dtype=torch.long) # 返回零矩阵
x = torch.tensor([5.5,3]) # 转化成张量
x = x.new_ones(5,3,dtype=torch.double) # 返回5行3列的1矩阵
x = torch.randn_like(x,dtype=torch.float) # 返回形状和x一样的随机矩阵
x+y
torch.add(x,y) # 两者相等
x[:,1] # 索引操作
x = torch.randn(4,4) # 随机生成服从正态分布的数据,返回值为张量。
y = x.view(16) # 改变矩阵维度
z = x.view(-1,8) # -1代表自动计算
print(x.size(),y.size(),z.size()) # 获取张量的形状
a = torch.ones(5)
b = a.numpy()
a = np.ones(5)
b = torch.from_numpy(a)
# 自动求导
# 方法一
x = torch.randn(3,4,requires_grad=True)
# 方法二
x = torch.randn(3,4)
x.requires_grad=True
b = torch.randn(3,4,requires_grad=True)
t = x+b
y = t.sum() # tensor(-7.2647, grad_fn=<SumBackward0>)
y.backward(retain_graph=True) # 梯度默认累加
b.grad
x.requires_grad,b.requires_grad,t.requires_grad # (True, True, True)
t.is_leaf,x.is_leaf # 是不是叶子 (False, True)
目的:将数据打乱分成多份。
def get_data(train_ds: TensorDataset, valid_ds: TensorDataset, bs: int): return ( DataLoader(train_ds, batch_size=bs, shuffle=True), DataLoader(valid_ds, batch_size=bs * 2) ) # 把训练集和测试集的特征和标签打包 train_ds = TensorDataset( torch.from_numpy(X_train).float(), torch.from_numpy(Y_train).float() ) valid_ds = TensorDataset( torch.from_numpy(X_test).float(), torch.from_numpy(Y_test).float() ) # 把训练集和测试集分包 bs = 16 train_dl, valid_dl = get_data(train_ds, valid_ds, bs)
import warnings import sklearn from sklearn import preprocessing import torch.nn.functional as F import torch from sklearn import datasets from sklearn.datasets import make_regression from sklearn.model_selection import train_test_split from torch import nn import numpy as np import matplotlib.pyplot as plt # 其实线性回归就是一个不加激活函数的全链接层 from torch.utils.data import DataLoader, TensorDataset from tqdm import tqdm warnings.filterwarnings("ignore") class LinearRegressionModel(nn.Module): def __init__(self, input_dim, output_dim): super(LinearRegressionModel, self).__init__() self.linear = nn.Linear(input_dim, output_dim) self.loss_func = nn.MSELoss() # MSE损失函数 数据记得标准化,数值要求在[-1,1]之间 self.opt = torch.optim.SGD(self.parameters(), lr=0.03) # SGD优化器 self.val_loss = None def forward(self, x): x = self.linear(x) return x def loss_batch(self, xb, yb, opt=None): loss = self.loss_func(self(xb), yb) # 计算损失 if opt is not None: loss.backward() # 反向传播 opt.step() # 更新权重参数 opt.zero_grad() # 每一次迭代 梯度要清零 return loss.item(), len(xb) def fit(self, steps, train_dl: DataLoader, valid_dl: DataLoader): with tqdm(total=steps) as pbar: for step in range(steps): self.train() # 开始训练的过程 for xb, yb in train_dl: self.loss_batch(xb, yb, self.opt) self.eval() # 开始测试的过程 with torch.no_grad(): losses, nums = zip( *[self.loss_batch(xb, yb) for xb, yb in valid_dl] ) self.val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums) pbar.set_description("model") pbar.set_postfix(step=step, val_loss=self.val_loss) pbar.update(1) pbar.write('当前数据损失:{}'.format(self.val_loss))
def get_data(train_ds: TensorDataset, valid_ds: TensorDataset, bs: int): return ( DataLoader(train_ds, batch_size=bs, shuffle=True), DataLoader(valid_ds, batch_size=bs * 2) ) if __name__ == '__main__': # 生成一个线性回归问题 # X, Y = make_regression(n_features=1, n_samples=506, n_informative=2, n_targets=1, noise=1.2) # n_features(default=100) 表示每一个样本有多少特征值 # n_samples (default=100)表示样本的个数 # n_informative(default=10)有信息的特征数量,也就是用来构造线性模型,生成输出的特征数量 # n_targets(default=1)回归目标的数量,也就是对应于一个样本输出向量y的维度。默认输出是标量 # noise(default=0.0)施加于输出的高斯噪声的标准差(越大越离散) # 获取波士顿房价数据 loaded_data = datasets.load_boston() X = loaded_data.data Y = loaded_data.target # 处理数据 X = sklearn.preprocessing.StandardScaler().fit_transform(X) # 数据标准化 X_train, X_test, Y_train, Y_test = train_test_split( X, Y.reshape(-1, 1), test_size=0.01 ) train_ds = TensorDataset(torch.from_numpy(X_train).float(), torch.from_numpy(Y_train).float()) valid_ds = TensorDataset(torch.from_numpy(X_test).float(), torch.from_numpy(Y_test).float()) bs = 16 train_dl, valid_dl = get_data(train_ds, valid_ds, bs) # 训练模型 model = LinearRegressionModel(13, 1) model.fit(1000, train_dl, valid_dl) pass
import pandas as pd from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier from sklearn.linear_model import LinearRegression from sklearn import preprocessing from torch.utils.data import TensorDataset from torch.utils.data import DataLoader import torch from torch import nn import numpy as np import torch.nn.functional as F import warnings from sklearn.datasets import make_blobs from torch import optim from tqdm import tqdm warnings.filterwarnings("ignore") # torch.nn.functional(一般用于没有可学习的参数) 和 nn.Module (一般用于有可学习的参数) # 创建一个model来更简化代码 # 必须继承mm.Module 且在其构造函数中需要调用nn.Module的构造函数 # 无需写反向传播函数,nn.Module 能够利用 autograd 自动实现反向传播 # Module 中的可学习参数可以通过named_parameters()或者parameters()返回迭代器 class Mnist_NN(nn.Module): def __init__(self, input_dim, output_dim): super().__init__() self.hidden1 = nn.Linear(input_dim, 128) self.hidden2 = nn.Linear(128, 256) self.out = nn.Linear(256, output_dim) self.loss_func = F.cross_entropy # 损失函数 self.opt = optim.SGD(self.parameters(), lr=0.001) # 随机梯度下降 self.val_loss = None def forward(self, x): x = F.relu(self.hidden1(x)) x = F.relu(self.hidden2(x)) x = self.out(x) return x def loss_batch(self, xb, yb, opt=None): loss = self.loss_func(self(xb), yb) if opt is not None: loss.backward() opt.step() opt.zero_grad() return loss.item(), len(xb) def fit(self, steps, train_dl: DataLoader, valid_dl: DataLoader): with tqdm(total=steps) as pbar: for step in range(steps): self.train() # 开始训练的过程 for xb, yb in train_dl: self.loss_batch(xb, yb, self.opt) self.eval() # 开始测试的过程 with torch.no_grad(): losses, nums = zip( *[self.loss_batch(xb, yb) for xb, yb in valid_dl] ) self.val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums) pbar.set_description("model") pbar.set_postfix(step=step, val_loss=self.val_loss) pbar.update(1) pbar.write('当前数据损失:{}'.format(self.val_loss)) def predict(self, X): return self(X)
def get_data(train_ds: TensorDataset, valid_ds: TensorDataset, bs: int): return ( DataLoader(train_ds, batch_size=bs, shuffle=True), DataLoader(valid_ds, batch_size=bs * 2) ) if __name__ == '__main__': # 生成一个分类问题 x, y = make_blobs(n_features=10, n_samples=100, centers=3, random_state=3) # n_features 表示每一个样本有多少特征值 # n_samples 表示样本的个数 # centers 是聚类中心点的个数,可以理解为label的种类数 # random_state 是随机种子,可以固定生成的数据 # cluster_std 设置每个类别的方差,若要指定需要传入长度为centers的列表 # 处理数据 x_train, x_test, y_train, y_test = train_test_split(x, y) # 分类 - (训练集 , 测试集) train_ds = TensorDataset(torch.from_numpy(x_train), torch.from_numpy(y_train).long()) valid_ds = TensorDataset(torch.from_numpy(x_test), torch.from_numpy(y_test).long()) bs = 16 torch.set_default_tensor_type(torch.DoubleTensor) train_dl, valid_dl = get_data(train_ds, valid_ds, bs) # 训练模型 model = Mnist_NN(10, 3) model.fit(100, train_dl, valid_dl) pass
from torch.utils.data import Dataset
class OneData(Dataset):
def __init__(self,):
pass
def __getitem__(self,):
pass
def __len__(self,):
pass
torch.save(model.state_dict(),'data/model.pkl')
model.load_state_dict(torch.load('data/model.pkl'))
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。