算法工程师的修养 | PyTorch 的 nn 详解
对您的一点要求
用 Tensor 白手起家实现一个网络
准备数据集
python from pathlib import Path import requests
DATA_PATH = Path("data") PATH = DATA_PATH / "mnist"
PATH.mkdir(parents=True, exist_ok=True)
URL = "https://github.com/pytorch/tutorials/raw/master/_static/" FILENAME = "mnist.pkl.gz"
if not (PATH / FILENAME).exists(): content = requests.get(URL + FILENAME).content (PATH / FILENAME).open("wb").write(content)
python
import pickle
import gzip
with gzip.open((PATH / FILENAME).as_posix(), "rb") as f:
((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding="latin-1")```
每张图片尺寸是 28 x 28,被存储为长度为 784(=28x28)的向量。可以利用 `matplotlib` 看看其中一张图像,想要将其展示出来首先还需要先把其 reshape 为2D。
```python
from matplotlib import pyplot
import numpy as np
pyplot.imshow(x_train[0].reshape((28, 28)), cmap="gray")
print(x_train.shape)
import torch
x_train, y_train, x_valid, y_valid = map(
torch.tensor, (x_train, y_train, x_valid, y_valid)
)
n, c = x_train.shape
print(x_train, y_train)
print(x_train.shape)
print(y_train.min(), y_train.max())
tensor([[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]]) tensor([5, 0, 4, ..., 8, 4, 8])
torch.Size([50000, 784])
tensor(0) tensor(9)
开始搭建网络(不借助外力 nn 什么的)
import math
weights = torch.randn(784, 10) / math.sqrt(784)
weights.requires_grad_()
bias = torch.zeros(10, requires_grad=True)
def log_softmax(x):
return x - x.exp().sum(-1).log().unsqueeze(-1)
def model(xb):
return log_softmax(xb @ weights + bias)
bs = 64 # batch size
xb = x_train[0:bs] # a mini-batch from x
preds = model(xb) # predictions
preds[0], preds.shape
print(preds[0], preds.shape)
tensor([-1.9398, -2.3529, -2.2999, -2.6261, -2.6767, -1.3650, -2.3081, -2.7904,
-2.9199, -3.0043], grad_fn=<SelectBackward>) torch.Size([64, 10])
def nll(input, target):
return -input[range(target.shape[0]), target].mean()
loss_func = nll
yb = y_train[0:bs]
print(loss_func(preds, yb))
tensor(2.3554, grad_fn=<NegBackward>)
def accuracy(out, yb):
preds = torch.argmax(out, dim=1)
return (preds == yb).float().mean()
print(accuracy(preds, yb))
tensor(0.1562)
选择一个小批量的数据(大小为bs)。 使用模型进行预测 计算损失 loss.backward()更新模型参数的梯度,在这里是指权重和偏置。
from IPython.core.debugger import set_trace
lr = 0.5 # learning rate
epochs = 2 # how many epochs to train for
for epoch in range(epochs):
for i in range((n - 1) // bs + 1):
# set_trace()
start_i = i * bs
end_i = start_i + bs
xb = x_train[start_i:end_i]
yb = y_train[start_i:end_i]
pred = model(xb)
loss = loss_func(pred, yb)
loss.backward()
with torch.no_grad():
weights -= weights.grad * lr
bias -= bias.grad * lr
weights.grad.zero_()
bias.grad.zero_()
print(loss_func(model(xb), yb), accuracy(model(xb), yb))
tensor(0.0815, grad_fn=<NegBackward>) tensor(1.)
利用 nn 重构神经网络
使用 torch.nn.functional 重构损失函数和激活函数
import torch.nn.functional as F
loss_func = F.cross_entropy
def model(xb):
return xb @ weights + bias
print(loss_func(model(xb), yb), accuracy(model(xb), yb))
tensor(0.0815, grad_fn=<NllLossBackward>) tensor(1.)
使用 nn.Module 重构神经网络
from torch import nn
class Mnist_Logistic(nn.Module):
def __init__(self):
super().__init__()
self.weights = nn.Parameter(torch.randn(784, 10) / math.sqrt(784))
self.bias = nn.Parameter(torch.zeros(10))
def forward(self, xb):
return xb @ self.weights + self.bias
model = Mnist_Logistic()
print(loss_func(model(xb), yb))
tensor(2.3154, grad_fn=<NllLossBackward>)
with torch.no_grad():
weights -= weights.grad * lr
bias -= bias.grad * lr
weights.grad.zero_()
bias.grad.zero_()
def fit():
for epoch in range(epochs):
for i in range((n - 1) // bs + 1):
start_i = i * bs
end_i = start_i + bs
xb = x_train[start_i:end_i]
yb = y_train[start_i:end_i]
pred = model(xb)
loss = loss_func(pred, yb)
loss.backward()
with torch.no_grad():
for p in model.parameters():
p -= p.grad * lr
model.zero_grad()
fit()
print(loss_func(model(xb), yb))
tensor(0.0830, grad_fn=<NllLossBackward>)
利用 nn.Linear 进行重构
利用 nn.Linear 进行重构
class Mnist_Logistic(nn.Module):
def __init__(self):
super().__init__()
self.lin = nn.Linear(784, 10)
def forward(self, xb):
return self.lin(xb)
实例化定义好的模型,依旧用同样方法检测一下
model = Mnist_Logistic()
print(loss_func(model(xb), yb))
tensor(2.3734, grad_fn=<NllLossBackward>)
fit()
print(loss_func(model(xb), yb))
tensor(0.0822, grad_fn=<NllLossBackward>)
利用 optim 进行重构优化器
from torch import optim
def get_model():
model = Mnist_Logistic()
return model, optim.SGD(model.parameters(), lr=lr)
model, opt = get_model()
print(loss_func(model(xb), yb))
for epoch in range(epochs):
for i in range((n - 1) // bs + 1):
start_i = i * bs
end_i = start_i + bs
xb = x_train[start_i:end_i]
yb = y_train[start_i:end_i]
pred = model(xb)
loss = loss_func(pred, yb)
loss.backward()
opt.step()
opt.zero_grad()
print(loss_func(model(xb), yb))
tensor(2.3110, grad_fn=<NllLossBackward>)
tensor(0.0816, grad_fn=<NllLossBackward>)
使用 Dataset 进行重构
len 函数: (由Python的标准len函数调用) getitem函数: 接受索引参数,然后根据索引返回一个索引对应的数据
from torch.utils.data import TensorDataset
model, opt = get_model()
for epoch in range(epochs):
for i in range((n - 1) // bs + 1):
xb, yb = train_ds[i * bs: i * bs + bs]
pred = model(xb)
loss = loss_func(pred, yb)
loss.backward()
opt.step()
opt.zero_grad()
print(loss_func(model(xb), yb))
tensor(0.0819, grad_fn=<NllLossBackward>)
使用 DataLoader 重构数据加载器
from torch.utils.data import DataLoader
train_ds = TensorDataset(x_train, y_train)
train_dl = DataLoader(train_ds, batch_size=bs)
from torch.utils.data import DataLoader
train_ds = TensorDataset(x_train, y_train)
train_dl = DataLoader(train_ds, batch_size=bs)
model, opt = get_model()
for epoch in range(epochs):
for xb, yb in train_dl:
pred = model(xb)
loss = loss_func(pred, yb)
loss.backward()
opt.step()
opt.zero_grad()
print(loss_func(model(xb), yb))
输出
tensor(0.0822, grad_fn=<NllLossBackward>)
添加验证
train_ds = TensorDataset(x_train, y_train)
train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True)
valid_ds = TensorDataset(x_valid, y_valid)
valid_dl = DataLoader(valid_ds, batch_size=bs * 2)
model, opt = get_model()
for epoch in range(epochs):
model.train()
for xb, yb in train_dl:
pred = model(xb)
loss = loss_func(pred, yb)
loss.backward()
opt.step()
opt.zero_grad()
model.eval()
with torch.no_grad():
valid_loss = sum(loss_func(model(xb), yb) for xb, yb in valid_dl)
print(epoch, valid_loss / len(valid_dl))
0 tensor(0.3134)
1 tensor(0.4116)
猜您喜欢:
CVPR 2021 | GAN的说话人驱动、3D人脸论文汇总
附下载 |《TensorFlow 2.0 深度学习算法实战》
评论