import torch
# ^^^ pyforest auto-imports - don't write above this line
超参数 二进制分类 多类分类 输入层形状(in_features) 与特征数量相同(例如:心脏病预测中的年龄、性别、身高、体重) 与二分类相同 隐藏层(hidden laylers) 具体问题,最小值,最大值=无限制 与二分类相同 每个隐藏层的神经元 具体问题,一般为10到512个 与二分类相同 输出层形状(out_features) 一类或者另一类 每类一个 隐藏层激活 ReLU线性单元。 与二分类相同 输出激活 sigmoid(torch.sigmoid) Softmax(torch.softmax) 损失函数 binary loss(torch.nn.BCELoss在Pytorch中) 交叉熵(torch.nn.CrossEntropyloss) 优化器 SGD(随机梯度下降),Adam() # 优化器距离 optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum=0.9) optimizer = optim.Adam([var1, var2], lr = 0.0001)
from sklearn.datasets import make_circles
# 采样样本数量为1000
n_samples = 1000
# 创建一个圆形的数据集
X, y = make_circles(n_samples,
noise=0.03, # 加入一些噪音
random_state=42) # 加入随机种子保证每次结果一致。
print(f"\n前 5个 X 特征:\n{X[:5]}")
print(f"\n前 5个 y 标签:\n{y[:5]}")
前 5个 X 特征:
[[ 0.75424625 0.23148074]
[-0.75615888 0.15325888]
[-0.81539193 0.17328203]
[-0.39373073 0.69288277]
[ 0.44220765 -0.89672343]]
前 5个 y 标签:
[1 1 1 1 0]
import pandas as pd
circles = pd.DataFrame({"X1": X[:, 0],#选取第一列
"X2": X[:, 1],#选取第二列
"标签": y #选取标签y
X1 | X2 | 标签 | |
0 | 0.754246 | 0.231481 | 1 |
1 | -0.756159 | 0.153259 | 1 |
2 | -0.815392 | 0.173282 | 1 |
3 | -0.393731 | 0.692883 | 1 |
4 | 0.442208 | -0.896723 | 0 |
5 | -0.479646 | 0.676435 | 1 |
6 | -0.013648 | 0.803349 | 1 |
7 | 0.771513 | 0.147760 | 1 |
8 | -0.169322 | -0.793456 | 1 |
9 | -0.121486 | 1.021509 | 0 |
1 500
0 500
Name: 标签, dtype: int64
# 可视化
import matplotlib.pyplot as plt
plt.scatter(x=X[:, 0],
y=X[:, 1],
X.shape, y.shape
((1000, 2), (1000,))
X_sample = X[0]
y_sample = y[0]
print(f"一个X采样的值 X: {X_sample} and the same for y: {y_sample}")
print(f"一个X采样的值的形状X: {X_sample.shape} and the same for y: {y_sample.shape}")
一个X采样的值 X: [0.75424625 0.23148074] and the same for y: 1
一个X采样的值的形状X: (2,) and the same for y: ()
import torch
X = torch.from_numpy(X).type(torch.float)
y = torch.from_numpy(y).type(torch.float)
# View the first five samples
X[:5], y[:5]
(tensor([[ 0.7542, 0.2315],
[-0.7562, 0.1533],
[-0.8154, 0.1733],
[-0.3937, 0.6929],
[ 0.4422, -0.8967]]),
tensor([1., 1., 1., 1., 0.]))
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,
test_size=0.2, # 20% 测试集, 80% 训练集
random_state=42) # 随机划分的种子
len(X_train), len(X_test), len(y_train), len(y_test)
(800, 200, 800, 200)
1.设置与设备无关的代码(如果可用,我们的模型可以在 CPU 或 GPU 上运行)。
# 1.设置与设备无关的代码
import torch
from torch import nn
# Make device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
class CircleModelV0(nn.Module):
def __init__(self):
self.layer_1 = nn.Linear(in_features=2, out_features=5) # 输入两个特征(X),输出5 个隐藏单元或神经元
self.layer_2 = nn.Linear(in_features=5, out_features=1) # 接收5个神经元, 生产出一 1 feature (y)
# 3. 定义一个前向传播的方法
def forward(self, x):
# 返回第2层的输出,一个单一的特征,与y的形状相同。
return self.layer_2(self.layer_1(x)) # 计算先经过第1层,然后第1层的输出再经过第2层。
# 4. 创建一个模型的实例,把他放在目标设备
model_0 = CircleModelV0().to(device)
(layer_1): Linear(in_features=2, out_features=5, bias=True)
(layer_2): Linear(in_features=5, out_features=1, bias=True)
# https://playground.tensorflow.org/
# 您也可以使用nn.Sequential.为什么不总使用nn.Sequential,因为它是 按照顺序 来计算的。
model_0 = nn.Sequential(
nn.Linear(in_features = 2,out_features = 5),
nn.Linear(in_features = 5,out_features = 1)
(0): Linear(in_features=2, out_features=5, bias=True)
(1): Linear(in_features=5, out_features=1, bias=True)
untrained_preds = model_0(X_test.to(device))
print(f"Length of predictions: {len(untrained_preds)}, Shape: {untrained_preds.shape}")
print(f"Length of test samples: {len(y_test)}, Shape: {y_test.shape}")
print(f"\nFirst 10 predictions:\n{untrained_preds[:10]}")
print(f"\nFirst 10 test labels:\n{y_test[:10]}")
Length of predictions: 200, Shape: torch.Size([200, 1]) Length of test samples: 200, Shape: torch.Size([200]) First 10 predictions: tensor([[-0.1415], [-0.1357], [-0.0911], [-0.1561], [ 0.0132], [ 0.0160], [-0.0502], [-0.0144], [-0.0956], [-0.1341]], grad_fn=<SliceBackward>) First 10 test labels: tensor([1., 0., 1., 0., 1., 1., 0., 0., 1., 0.])
优化器名称 问题类型 PyTorch代码
随机梯度下降优化器 分类、回归等等 torch.optim.SGD()
Adam优化器 分类、回归等等 torch.optim.Adam()
二元交叉熵损失 二进制分类 torch.BCELossWithLogits或者torch.nn.BCELoss
交叉熵损失 多级分类 torch.nn.CrossEntopyLoss
平均绝对误差(MAE)或L1损失 回归 torch.nn.L1Loss
均方误差(MSE)或L2损失 回归 torch.nn.MSELoss
# 让我们创建一个损失函数和一个优化器。
# loss_fn = nn.BCELoss() # BCELoss = no sigmoid built-in
loss_fn = nn.BCEWithLogitsLoss() # BCEWithLogitsLoss = sigmoid built-in
# 创建一个优化器
optimizer = torch.optim.SGD(params=model_0.parameters(),
# 评估指标视为衡量模型的正确程度。
def accuracy_fn(y_true, y_pred):
correct = torch.eq(y_true, y_pred).sum().item() # torch.eq() calculates where two tensors are equal
acc = (correct / len(y_pred)) * 100
return acc
y_logits = model_0(X_test.to(device))[:5]
[ 0.0132]], grad_fn=<SliceBackward>)
y_pred_probs = torch.sigmoid(y_logits)
[0.5033]], grad_fn=<SigmoidBackward>)
# 为了在预测标签中转换我们的预测概率,我们可以对 sigmoid 激活函数的输出进行四舍五入
# Find the predicted labels (round the prediction probabilities)
y_preds = torch.round(y_pred_probs)
# In full
y_pred_labels = torch.round(torch.sigmoid(model_0(X_test.to(device))[:5]))
# Check for equality
print(torch.eq(y_preds.squeeze(), y_pred_labels.squeeze()))
# Get rid of extra dimension
tensor([True, True, True, True, True])
tensor([0., 0., 0., 0., 1.], grad_fn=<SqueezeBackward0>)
tensor([1., 0., 1., 0., 1.])
torch.manual_seed(42) # Set the number of epochs epochs = 100 # Put data to target device X_train, y_train = X_train.to(device), y_train.to(device) X_test, y_test = X_test.to(device), y_test.to(device) # Build training and evaluation loop for epoch in range(epochs): ### Training model_0.train() # 1. Forward pass (model outputs raw logits) y_logits = model_0(X_train).squeeze() # squeeze to remove extra `1` dimensions, this won't work unless model and data are on same device y_pred = torch.round(torch.sigmoid(y_logits)) # turn logits -> pred probs -> pred labls # 2. Calculate loss/accuracy # loss = loss_fn(torch.sigmoid(y_logits), # Using nn.BCELoss you need torch.sigmoid() # y_train) loss = loss_fn(y_logits, # Using nn.BCEWithLogitsLoss works with raw logits y_train) acc = accuracy_fn(y_true=y_train, y_pred=y_pred) # 3. Optimizer zero grad optimizer.zero_grad() # 4. Loss backwards loss.backward() # 5. Optimizer step optimizer.step() ### Testing model_0.eval() with torch.no_grad(): # 1. Forward pass test_logits = model_0(X_test).squeeze() test_pred = torch.round(torch.sigmoid(test_logits)) # 2. Caculate loss/accuracy test_loss = loss_fn(test_logits, y_test) test_acc = accuracy_fn(y_true=y_test, y_pred=test_pred) # Print out what's happening every 10 epochs if epoch % 10 == 0: print(f"Epoch: {epoch} | Loss: {loss:.5f}, Accuracy: {acc:.2f}% | Test loss: {test_loss:.5f}, Test acc: {test_acc:.2f}%")
Epoch: 0 | Loss: 0.69443, Accuracy: 45.00% | Test loss: 0.69336, Test acc: 45.50%
Epoch: 10 | Loss: 0.69395, Accuracy: 47.38% | Test loss: 0.69309, Test acc: 48.00%
Epoch: 20 | Loss: 0.69367, Accuracy: 48.00% | Test loss: 0.69302, Test acc: 48.50%
Epoch: 30 | Loss: 0.69349, Accuracy: 48.38% | Test loss: 0.69303, Test acc: 47.00%
Epoch: 40 | Loss: 0.69337, Accuracy: 48.38% | Test loss: 0.69308, Test acc: 47.50%
Epoch: 50 | Loss: 0.69328, Accuracy: 49.00% | Test loss: 0.69316, Test acc: 46.00%
Epoch: 60 | Loss: 0.69321, Accuracy: 49.62% | Test loss: 0.69324, Test acc: 45.00%
Epoch: 70 | Loss: 0.69316, Accuracy: 49.38% | Test loss: 0.69332, Test acc: 43.50%
Epoch: 80 | Loss: 0.69313, Accuracy: 48.62% | Test loss: 0.69340, Test acc: 49.00%
Epoch: 90 | Loss: 0.69310, Accuracy: 48.62% | Test loss: 0.69347, Test acc: 48.50%
import requests
from pathlib import Path
# Download helper functions from Learn PyTorch repo (if not already downloaded)
if Path("helper_functions.py").is_file():
print("helper_functions.py already exists, skipping download")
print("Downloading helper_functions.py")
request = requests.get("https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/main/helper_functions.py")
with open("helper_functions.py", "wb") as f:
from helper_functions import plot_predictions, plot_decision_boundary
helper_functions.py already exists, skipping download
# Plot decision boundaries for training and test sets
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plot_decision_boundary(model_0, X_train, y_train)
plt.subplot(1, 2, 2)
plot_decision_boundary(model_0, X_test, y_test)
# 尝试解决模型的欠拟合问题。
# 可以手动调整的参数,他们被称为:超参数
class CircleModelV1(nn.Module): def __init__(self): super().__init__() self.layer_1 = nn.Linear(in_features=2, out_features=10) self.layer_2 = nn.Linear(in_features=10, out_features=10) # 多加一层 self.layer_3 = nn.Linear(in_features=10, out_features=1) def forward(self, x): # note: always make sure forward is spelt correctly! # Creating a model like this is the same as below, though below # generally benefits from speedups where possible. # z = self.layer_1(x) # z = self.layer_2(z) # z = self.layer_3(z) # return z return self.layer_3(self.layer_2(self.layer_1(x))) model_1 = CircleModelV1().to(device) model_1
(layer_1): Linear(in_features=2, out_features=10, bias=True)
(layer_2): Linear(in_features=10, out_features=10, bias=True)
(layer_3): Linear(in_features=10, out_features=1, bias=True)
loss_fn = nn.BCEWithLogitsLoss() # 不需要在输入时使用 sigmoid
optimizer = torch.optim.SGD(model_1.parameters(), lr=0.1)
torch.manual_seed(42) epochs = 1000 # Train for longer # Put data to target device X_train, y_train = X_train.to(device), y_train.to(device) X_test, y_test = X_test.to(device), y_test.to(device) for epoch in range(epochs): ### Training # 1. Forward pass y_logits = model_1(X_train).squeeze() y_pred = torch.round(torch.sigmoid(y_logits)) # logits -> predicition probabilities -> prediction labels # 2. Calculate loss/accuracy loss = loss_fn(y_logits, y_train) acc = accuracy_fn(y_true=y_train, y_pred=y_pred) # 3. Optimizer zero grad optimizer.zero_grad() # 4. Loss backwards loss.backward() # 5. Optimizer step optimizer.step() ### Testing model_1.eval() with torch.no_grad(): # 1. Forward pass test_logits = model_1(X_test).squeeze() test_pred = torch.round(torch.sigmoid(test_logits)) # 2. Caculate loss/accuracy test_loss = loss_fn(test_logits, y_test) test_acc = accuracy_fn(y_true=y_test, y_pred=test_pred) # Print out what's happening every 10 epochs if epoch % 100 == 0: print(f"Epoch: {epoch} | Loss: {loss:.5f}, Accuracy: {acc:.2f}% | Test loss: {test_loss:.5f}, Test acc: {test_acc:.2f}%")
Epoch: 0 | Loss: 0.69396, Accuracy: 50.88% | Test loss: 0.69261, Test acc: 51.00%
Epoch: 100 | Loss: 0.69305, Accuracy: 50.38% | Test loss: 0.69379, Test acc: 48.00%
Epoch: 200 | Loss: 0.69299, Accuracy: 51.12% | Test loss: 0.69437, Test acc: 46.00%
Epoch: 300 | Loss: 0.69298, Accuracy: 51.62% | Test loss: 0.69458, Test acc: 45.00%
Epoch: 400 | Loss: 0.69298, Accuracy: 51.12% | Test loss: 0.69465, Test acc: 46.00%
Epoch: 500 | Loss: 0.69298, Accuracy: 51.00% | Test loss: 0.69467, Test acc: 46.00%
Epoch: 600 | Loss: 0.69298, Accuracy: 51.00% | Test loss: 0.69468, Test acc: 46.00%
Epoch: 700 | Loss: 0.69298, Accuracy: 51.00% | Test loss: 0.69468, Test acc: 46.00%
Epoch: 800 | Loss: 0.69298, Accuracy: 51.00% | Test loss: 0.69468, Test acc: 46.00%
Epoch: 900 | Loss: 0.69298, Accuracy: 51.00% | Test loss: 0.69468, Test acc: 46.00%
weight = 0.7
bias = 0.3
start = 0
end = 1
step = 0.01
# 生成数据
X_regression = torch.arange(start, end, step).unsqueeze(dim=1)
y_regression = weight * X_regression + bias # linear regression formula
# 查看这些数据
X_regression[:5], y_regression[:5]
100 (tensor([[0.0000], [0.0100], [0.0200], [0.0300], [0.0400]]), tensor([[0.3000], [0.3070], [0.3140], [0.3210], [0.3280]]))
# 把数据分成训练集和测试集
train_split = int(0.8 * len(X_regression)) # 80%的数据用作训练集
X_train_regression, y_train_regression = X_regression[:train_split], y_regression[:train_split]
X_test_regression, y_test_regression = X_regression[train_split:], y_regression[train_split:]
# Check the lengths of each split
80 80 20 20
model_2 = nn.Sequential(
nn.Linear(in_features=1, out_features=10),
nn.Linear(in_features=10, out_features=10),
nn.Linear(in_features=10, out_features=1)
(0): Linear(in_features=1, out_features=10, bias=True)
(1): Linear(in_features=10, out_features=10, bias=True)
(2): Linear(in_features=10, out_features=1, bias=True)
# 我们将损失函数设置为nn.L1Loss()(与平均绝对误差相同),将优化器设置为torch.optim.SGD().
# 损失函数
loss_fn = nn.L1Loss()
# 创建一个优化器
optimizer = torch.optim.SGD(model_2.parameters(), lr=0.1)
torch.manual_seed(42) # Set the number of epochs epochs = 1000 # Put data to target device X_train_regression, y_train_regression = X_train_regression.to(device), y_train_regression.to(device) X_test_regression, y_test_regression = X_test_regression.to(device), y_test_regression.to(device) for epoch in range(epochs): ### Training # 1. Forward pass y_pred = model_2(X_train_regression) # 2. Calculate loss (no accuracy since it's a regression problem, not classification) loss = loss_fn(y_pred, y_train_regression) # 3. Optimizer zero grad optimizer.zero_grad() # 4. Loss backwards loss.backward() # 5. Optimizer step optimizer.step() ### Testing model_2.eval() with torch.no_grad(): # 1. Forward pass test_pred = model_2(X_test_regression) # 2. Calculate the loss test_loss = loss_fn(test_pred, y_test_regression) # Print out what's happening if epoch % 100 == 0: print(f"Epoch: {epoch} | Train loss: {loss:.5f}, Test loss: {test_loss:.5f}")
Epoch: 0 | Train loss: 0.75986, Test loss: 0.54143
Epoch: 100 | Train loss: 0.09309, Test loss: 0.02901
Epoch: 200 | Train loss: 0.07376, Test loss: 0.02850
Epoch: 300 | Train loss: 0.06745, Test loss: 0.00615
Epoch: 400 | Train loss: 0.06107, Test loss: 0.02004
Epoch: 500 | Train loss: 0.05698, Test loss: 0.01061
Epoch: 600 | Train loss: 0.04857, Test loss: 0.01326
Epoch: 700 | Train loss: 0.06109, Test loss: 0.02127
Epoch: 800 | Train loss: 0.05600, Test loss: 0.01425
Epoch: 900 | Train loss: 0.05571, Test loss: 0.00603
# 好的,与model_1分类数据不同,看起来model_2损失实际上正在下降。
# Make predictions (inference)
with torch.no_grad():
y_preds = model_2(X_test_regression)
# Plot data and predictions with data on the CPU (matplotlib can't handle data on the GPU)
# (try removing .cpu() from one of the below and see what happens)
# 生成和可视化数据
import matplotlib.pyplot as plt
from sklearn.datasets import make_circles
n_samples = 1000
X, y = make_circles(n_samples=1000,
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.RdBu);
# 现在让我们使用 80% 的数据用于训练和 20% 的数据用于测试,将其拆分为训练集和测试集。
# 转换为tensor然后将他们分成训练集和测试集。 import torch from sklearn.model_selection import train_test_split # Turn data into tensors X = torch.from_numpy(X).type(torch.float) y = torch.from_numpy(y).type(torch.float) # Split into train and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42 ) X_train[:5], y_train[:5]
(tensor([[ 0.6579, -0.4651],
[ 0.6319, -0.7347],
[-1.0086, -0.1240],
[-0.9666, -0.2256],
[-0.1666, 0.7994]]),
tensor([1., 0., 0., 0., 1.]))
# 用非线性激活函数创建一个模型 from torch import nn class CircleModelV2(nn.Module): def __init__(self): super().__init__() self.layer_1 = nn.Linear(in_features=2, out_features=10) self.layer_2 = nn.Linear(in_features=10, out_features=10) self.layer_3 = nn.Linear(in_features=10, out_features=1) self.relu = nn.ReLU() # <- add in ReLU activation function # Can also put sigmoid in the model # This would mean you don't need to use it on the predictions # self.sigmoid = nn.Sigmoid() def forward(self, x): # Intersperse the ReLU activation function between layers return self.layer_3(self.relu(self.layer_2(self.relu(self.layer_1(x))))) model_3 = CircleModelV2().to(device) print(model_3)
(layer_1): Linear(in_features=2, out_features=10, bias=True)
(layer_2): Linear(in_features=10, out_features=10, bias=True)
(layer_3): Linear(in_features=10, out_features=1, bias=True)
(relu): ReLU()
# 建立损失函数和优化器
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(model_3.parameters(), lr=0.1)
torch.manual_seed(42) epochs = 1000 # Put all data on target device X_train, y_train = X_train.to(device), y_train.to(device) X_test, y_test = X_test.to(device), y_test.to(device) for epoch in range(epochs): # 1. Forward pass y_logits = model_3(X_train).squeeze() y_pred = torch.round(torch.sigmoid(y_logits)) # logits -> prediction probabilities -> prediction labels # 2. Calculate loss and accuracy loss = loss_fn(y_logits, y_train) # BCEWithLogitsLoss calculates loss using logits acc = accuracy_fn(y_true=y_train, y_pred=y_pred) # 3. Optimizer zero grad optimizer.zero_grad() # 4. Loss backward loss.backward() # 5. Optimizer step optimizer.step() ### Testing model_3.eval() with torch.no_grad(): # 1. Forward pass test_logits = model_3(X_test).squeeze() test_pred = torch.round(torch.sigmoid(test_logits)) # logits -> prediction probabilities -> prediction labels # 2. Calcuate loss and accuracy test_loss = loss_fn(test_logits, y_test) test_acc = accuracy_fn(y_true=y_test, y_pred=test_pred) # Print out what's happening if epoch % 100 == 0: print(f"Epoch: {epoch} | Loss: {loss:.5f}, Accuracy: {acc:.2f}% | Test Loss: {test_loss:.5f}, Test Accuracy: {test_acc:.2f}%")
Epoch: 0 | Loss: 0.69295, Accuracy: 50.00% | Test Loss: 0.69319, Test Accuracy: 50.00%
Epoch: 100 | Loss: 0.69115, Accuracy: 52.88% | Test Loss: 0.69102, Test Accuracy: 52.50%
Epoch: 200 | Loss: 0.68977, Accuracy: 53.37% | Test Loss: 0.68940, Test Accuracy: 55.00%
Epoch: 300 | Loss: 0.68795, Accuracy: 53.00% | Test Loss: 0.68723, Test Accuracy: 56.00%
Epoch: 400 | Loss: 0.68517, Accuracy: 52.75% | Test Loss: 0.68411, Test Accuracy: 56.50%
Epoch: 500 | Loss: 0.68102, Accuracy: 52.75% | Test Loss: 0.67941, Test Accuracy: 56.50%
Epoch: 600 | Loss: 0.67515, Accuracy: 54.50% | Test Loss: 0.67285, Test Accuracy: 56.00%
Epoch: 700 | Loss: 0.66659, Accuracy: 58.38% | Test Loss: 0.66322, Test Accuracy: 59.00%
Epoch: 800 | Loss: 0.65160, Accuracy: 64.00% | Test Loss: 0.64757, Test Accuracy: 67.50%
Epoch: 900 | Loss: 0.62362, Accuracy: 74.00% | Test Loss: 0.62145, Test Accuracy: 79.00%
# 做预测
with torch.no_grad():
y_preds = torch.round(torch.sigmoid(model_3(X_test))).squeeze()
y_preds[:10], y[:10] # want preds in same format as truth labels
(tensor([1., 0., 1., 0., 0., 1., 0., 0., 1., 0.]),
tensor([1., 1., 1., 1., 0., 1., 1., 1., 1., 0.]))
# Plot decision boundaries for training and test sets
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plot_decision_boundary(model_1, X_train, y_train) # model_1 = no non-linearity
plt.subplot(1, 2, 2)
plot_decision_boundary(model_3, X_test, y_test) # model_3 = has non-linearity
A = torch.arange(-10, 10, 1, dtype=torch.float32)
tensor([-10., -9., -8., -7., -6., -5., -4., -3., -2., -1., 0., 1.,
2., 3., 4., 5., 6., 7., 8., 9.])
# 手动创建一个ReLu函数
def relu(x):
return torch.maximum(torch.tensor(0), x)#输入必须是一个张量
# 调用这个函数
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 2., 3., 4., 5., 6., 7.,
8., 9.])
# 手动创建一个sigmoid函数
def sigmoid(x):
return 1 / (1 + torch.exp(-x))
tensor([4.5398e-05, 1.2339e-04, 3.3535e-04, 9.1105e-04, 2.4726e-03, 6.6929e-03,
1.7986e-02, 4.7426e-02, 1.1920e-01, 2.6894e-01, 5.0000e-01, 7.3106e-01,
8.8080e-01, 9.5257e-01, 9.8201e-01, 9.9331e-01, 9.9753e-01, 9.9909e-01,
9.9966e-01, 9.9988e-01])
# 1.使用 .创建一些多类数据make_blobs()。
# 2.将数据转换为张量(默认make_blobs()是使用 NumPy 数组)。
# 3.使用 . 将数据拆分为训练集和测试集train_test_split()。
# 4.可视化数据。
# 导入依赖包 import torch import matplotlib.pyplot as plt from sklearn.datasets import make_blobs from sklearn.model_selection import train_test_split # Set the hyperparameters for data creation NUM_CLASSES = 4 #分几类 NUM_FEATURES = 2 #特征的个数 RANDOM_SEED = 42 #随机种子 # 1.创建多分类数据 X_blob, y_blob = make_blobs(n_samples=1000,#数据条数 n_features=NUM_FEATURES, # X 的特征 centers=NUM_CLASSES, # y 标签 cluster_std=1.5, # 给数据集一个小的震动干扰(试着把这个改为1.0,默认的) random_state=RANDOM_SEED ) # 2. 将数据转换为张量 X_blob = torch.from_numpy(X_blob).type(torch.float) y_blob = torch.from_numpy(y_blob).type(torch.LongTensor) print(X_blob[:5], y_blob[:5])#打印前五条数据 # 3. 将数据分为训练集和测试集 X_blob_train, X_blob_test, y_blob_train, y_blob_test = train_test_split(X_blob, y_blob, test_size=0.2, random_state=RANDOM_SEED ) # 4. 可视化数据 plt.figure(figsize=(10, 7)) plt.scatter(X_blob[:, 0], X_blob[:, 1], c=y_blob, cmap=plt.cm.RdYlBu);
tensor([[-8.4134, 6.9352],
[-5.7665, -6.4312],
[-6.0421, -6.7661],
[ 3.9508, 0.6984],
[ 4.2505, -0.2815]]) tensor([3, 2, 2, 1, 1])
device = "cuda" if torch.cuda.is_available() else "cpu"
from torch import nn # 创建模型 class BlobModel(nn.Module): def __init__(self, input_features, output_features, hidden_units=8): """Initializes all required hyperparameters for a multi-class classification model. Args: input_features (int): 模型的输入特征数 out_features (int): 模型的输出特征数(也就是分类的类别数). hidden_units (int): 隐藏层的神经元个数, default 8. """ super().__init__() self.linear_layer_stack = nn.Sequential( #三层的线性神经网络 nn.Linear(in_features=input_features, out_features=hidden_units), # nn.ReLU(), # <- does our dataset require non-linear layers? (try uncommenting and see if the results change) nn.Linear(in_features=hidden_units, out_features=hidden_units), # nn.ReLU(), # <- does our dataset require non-linear layers? (try uncommenting and see if the results change) nn.Linear(in_features=hidden_units, out_features=output_features), # how many classes are there? ) def forward(self, x): return self.linear_layer_stack(x) # 生成一个BlobModel的实例并把它转换到目标设备上。 model_4 = BlobModel(input_features=NUM_FEATURES, output_features=NUM_CLASSES, hidden_units=8).to(device) model_4
(linear_layer_stack): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): Linear(in_features=8, out_features=8, bias=True)
(2): Linear(in_features=8, out_features=4, bias=True)
# 创建损失函数
loss_fn = nn.CrossEntropyLoss()
# 创建一个SGD的优化器
optimizer = torch.optim.SGD(model_4.parameters(),
tensor([[-1.2711, -0.6494, -1.4740, -0.7044],
[ 0.2210, -1.5439, 0.0420, 1.1531],
[ 2.8698, 0.9143, 3.3169, 1.4027],
[ 1.9576, 0.3125, 2.2244, 1.1324],
[ 0.5458, -1.2381, 0.4441, 1.1804]], grad_fn=<SliceBackward>)
# 检查一下特征的维度和分类的类别
model_4(X_blob_train.to(device))[0].shape, NUM_CLASSES
(torch.Size([4]), 4)
# 输出从logits—(经过softmax变化)—>prediction probabilities—(经过argmax(dim=1))—>prediction labels
# torch.softmax(y_logits, dim=1).argmax(dim=1)
# 预测输出原始的logits
y_logits = model_4(X_test.to(device))
# 将预测的logits转换为概率
y_pred_probs = torch.softmax(y_logits, dim=1)
tensor([[ 0.2341, -0.3357, 0.2307, 0.2534],
[ 0.1198, -0.3702, 0.0998, 0.1887],
[ 0.3790, -0.2037, 0.4095, 0.2689],
[ 0.1936, -0.3733, 0.1807, 0.2496],
[ 0.1338, -0.1378, 0.1487, 0.0247]], grad_fn=<SliceBackward>)
tensor([[0.2792, 0.1579, 0.2782, 0.2846],
[0.2729, 0.1672, 0.2675, 0.2924],
[0.2869, 0.1602, 0.2958, 0.2570],
[0.2769, 0.1571, 0.2733, 0.2928],
[0.2722, 0.2075, 0.2763, 0.2441]], grad_fn=<SliceBackward>)
# 每一类的概率相加起来等于1.
tensor(1., grad_fn=<SumBackward0>)
# 这些预测概率本质上是在说明模型认为目标X样本(输入)映射到每个类的程度
tensor([0.2792, 0.1579, 0.2782, 0.2846], grad_fn=<SelectBackward>)
# 对于多类分类问题,要将 logits 转换为预测概率,您可以使用 softmax 激活函数 ( torch.softmax)。
# 设置随机种子 torch.manual_seed(42) # 数据集跑多少轮 epochs = 100 # 将数据放入目标设备 X_blob_train, y_blob_train = X_blob_train.to(device), y_blob_train.to(device) X_blob_test, y_blob_test = X_blob_test.to(device), y_blob_test.to(device) for epoch in range(epochs): ### 训练模式 model_4.train() # 1. 前向传播 y_logits = model_4(X_blob_train) # 模型输出原始逻辑值 #print(y_logits) # go from logits -> prediction probabilities -> prediction labels y_pred = torch.softmax(y_logits, dim=1).argmax(dim=1) # 2. 计算损失和准确率 loss = loss_fn(y_logits, y_blob_train) acc = accuracy_fn(y_true=y_blob_train, y_pred=y_pred) # 3. 优化器梯度置零 optimizer.zero_grad() # 4. 损失反向传播 loss.backward() # 5. 优化一步一步优化 optimizer.step() ### 测试模式 model_4.eval() with torch.no_grad(): # 1. 前向传播 test_logits = model_4(X_blob_test) test_pred = torch.softmax(test_logits, dim=1).argmax(dim=1) # 2. 计算测试损失和正确率 test_loss = loss_fn(test_logits, y_blob_test) test_acc = accuracy_fn(y_true=y_blob_test, y_pred=test_pred) # Print out what's happening if epoch % 10 == 0: print(f"Epoch: {epoch} | Loss: {loss:.5f}, Acc: {acc:.2f}% | Test Loss: {test_loss:.5f}, Test Acc: {test_acc:.2f}%")
Epoch: 0 | Loss: 0.02564, Acc: 99.25% | Test Loss: 0.01499, Test Acc: 99.50%
Epoch: 10 | Loss: 0.02555, Acc: 99.25% | Test Loss: 0.01485, Test Acc: 99.50%
Epoch: 20 | Loss: 0.02547, Acc: 99.25% | Test Loss: 0.01472, Test Acc: 99.50%
Epoch: 30 | Loss: 0.02539, Acc: 99.25% | Test Loss: 0.01460, Test Acc: 99.50%
Epoch: 40 | Loss: 0.02531, Acc: 99.25% | Test Loss: 0.01448, Test Acc: 99.50%
Epoch: 50 | Loss: 0.02524, Acc: 99.25% | Test Loss: 0.01437, Test Acc: 99.50%
Epoch: 60 | Loss: 0.02517, Acc: 99.25% | Test Loss: 0.01427, Test Acc: 99.50%
Epoch: 70 | Loss: 0.02510, Acc: 99.25% | Test Loss: 0.01417, Test Acc: 99.50%
Epoch: 80 | Loss: 0.02504, Acc: 99.25% | Test Loss: 0.01407, Test Acc: 99.50%
Epoch: 90 | Loss: 0.02498, Acc: 99.25% | Test Loss: 0.01398, Test Acc: 99.50%
with torch.no_grad():
y_logits = model_4(X_blob_test)
# 查看前十条的预测情况
tensor([[ 6.1082, 15.6307, -20.3789, -13.3291],
[ 7.1512, -18.6590, 5.0168, 14.9331],
[ -8.2533, -19.4368, 28.1576, 16.7717],
[ 2.4859, 11.6492, -11.9552, -9.7569],
[ 11.5548, 4.4087, -19.5344, -4.4759],
[ 7.9877, -23.1785, 7.3417, 18.6040],
[ -8.8218, -14.5741, 25.1713, 12.7964],
[ 10.1165, -1.6657, -12.6617, 0.6567],
[ -8.2441, -27.4645, 34.4467, 23.4065],
[ 10.4595, 0.5678, -14.9170, -1.2161]])
# 将最原始的额预测y_logits转换为概率
y_pred_probs = torch.softmax(y_logits, dim=1)
# 将预测概率转换为预测标签
y_preds = y_pred_probs.argmax(dim=1)
# 比较前10个模型测试集和数据和测试标签的准确性
print(f"Predictions: {y_preds[:10]}\nLabels: {y_blob_test[:10]}")
print(f"Test accuracy: {accuracy_fn(y_true=y_blob_test, y_pred=y_preds)}%")
Predictions: tensor([1, 3, 2, 1, 0, 3, 2, 0, 2, 0])
Labels: tensor([1, 3, 2, 1, 0, 3, 2, 0, 2, 0])
Test accuracy: 99.5%
plt.figure(figsize=(12, 6))#画布大小 plt.subplot(1, 2, 1)#画布位置,1行 2列,第一个图 plt.title("Train")#绘画标题 plot_decision_boundary(model_4, X_blob_train, y_blob_train) plt.subplot(1, 2, 2)#画布位置,1行 2列,第二个图 plt.title("Test")#绘画标题 plot_decision_boundary(model_4, X_blob_test, y_blob_test) `` # 26、更多分类评价指标 ```python # 准确率 torchmetrics.Accuracy()或者sklearn.metrics.accuracy_score()
# 精确率
# 召回率
# F1分数
# 混淆矩阵
# 分类报告
# 1.sklearn.metrics.classification_report()
# 安装torchmetrics
!pip -q install torchmetrics
from torchmetrics import Accuracy
# 创建一个矩阵确保他们在目标设备中
torchmetrics_accuracy = Accuracy().to(device)
# 计算准确率
torchmetrics_accuracy(y_preds, y_blob_test)
