PyTorch Computer Vision

0 PyTorch 中 Computer vision 的库

  • torchvision:PyTorch Computer Vision基本库
  • torchvision.datasets:获取数据和数据加载函数
  • torchvision.models:获得预训练模型,可以利用它来解决自己的问题
  • torchvision.transforms:操作视觉数据(图片)使得它能够很好的适用于机器学习模型
  • torch.utils.data.Datasets:PyTorch的基本数据集类
  • torch.utils.data.DataLoader:从数据集上创建一个python可迭代的对象
# 导入 PyTorch 包
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

# 导入 torchvision 包
import torchvision
from torchvision import datasets
from torchvision import transforms
from torchvision.transforms import ToTensor

# 导入可视化包
import matplotlib.pyplot as plt

# 查看包的版本
print(torch.__version__)
print(torchvision.__version__)

2.3.1+cu118
0.18.1+cpu

1 获得一个数据集

我们这里使用的数据集是 FashionMNIST

# 创建训练数据
train_data = datasets.FashionMNIST(root='data', # 存放数据的路径
                                   train=True, # 我们需要的是训练数据集
                                   download=True, # 是否需要下载数据
                                   transform=transforms.ToTensor(), # 我们需要如何转换数据
                                   target_transform=None #我们需要如何转换标签
                                   )

# 创建测试数据
test_data = datasets.FashionMNIST(root='data',
                                  train=False,
                                  download=True,
                                  transform=transforms.ToTensor(),
                                  target_transform=None
                                  )
len(train_data), len(test_data)

(60000, 10000)

# 查看一下数据
train_data[0]

(tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
0.0000, 0.0000, 0.0000, 0.0000],

0.0000, 0.0000, 0.0000, 0.0000]]]),
9)
Output is truncated. View as a scrollable element or open in a text editor. Adjust cell output settings…

可以看出这里的数据分为两部分,前面是图片数据被转换成了 tensor, 后面的 9 就是标签

image, label = train_data[0]
image, label

(tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
0.0000, 0.0000, 0.0000, 0.0000],

[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
0.0000, 0.0000, 0.0000, 0.0000]]]),
9)
Output is truncated. View as a scrollable element or open in a text editor. Adjust cell output settings…

class_names = train_data.classes
class_names

[‘T-shirt/top’,
‘Trouser’,
‘Pullover’,
‘Dress’,
‘Coat’,
‘Sandal’,
‘Shirt’,
‘Sneaker’,
‘Bag’,
‘Ankle boot’]

class_to_idx = train_data.class_to_idx
class_to_idx

{‘T-shirt/top’: 0,
‘Trouser’: 1,
‘Pullover’: 2,
‘Dress’: 3,
‘Coat’: 4,
‘Sandal’: 5,
‘Shirt’: 6,
‘Sneaker’: 7,
‘Bag’: 8,
‘Ankle boot’: 9}

train_data.targets

tensor([9, 0, 0, …, 3, 0, 5])

1.1 查看数据的输入和输出形状

# 查看图片的形状
print(f"Image shape:{image.shape}")
print(f"Image label:{class_names[label]}")

Image shape:torch.Size([1, 28, 28])
Image label:Ankle boot

1.2 可视化数据

print(f"Image shape:{image.shape}")
plt.imshow(image.squeeze())
plt.title(label)

Image shape:torch.Size([1, 28, 28])
Text(0.5, 1.0, ‘9’)
在这里插入图片描述

plt.imshow(image.squeeze(), cmap="gray")
plt.title(class_names[label]);
plt.axis(False);

在这里插入图片描述

# 绘制更多的图像
# torch.manual_seed(42)
fig = plt.figure(figsize=(9,9))
rows, cols = 4, 4
for i in range(1, rows*cols + 1):
    random_idx = torch.randint(0, len(train_data), size=[1]).item()
    image, label = train_data[random_idx]
    fig.add_subplot(rows, cols, i)
    plt.imshow(image.squeeze(), cmap="gray")
    plt.title(class_names[label])
    plt.axis(False);

在这里插入图片描述
思考一下,为这些衣服的图片创建模型,模型中只包含纯粹的线性方法吗?还是我们需要非线性的方法?

train_data, test_data

(Dataset FashionMNIST
Number of datapoints: 60000
Root location: data
Split: Train
StandardTransform
Transform: ToTensor(),
Dataset FashionMNIST
Number of datapoints: 10000
Root location: data
Split: Test
StandardTransform
Transform: ToTensor())

2 准备 DataLoader

DataLoader 可以将我们的数据集转换为 python 的可迭代对象,更具体地说,就是将我们庞大的数据转换为小一点的数据,batches(mini-batches)
For more on mini-batches, see here: https://youtu.be/l4lSUAcvHFs

# 设置 batch 的超参数
BATCH_SIZE = 32

# 将数据集转换为可迭代对象 batch
train_dataloader = DataLoader(dataset=train_data,
                              shuffle=True, # 将顺序打断,以免模型学习的是顺序
                              batch_size=BATCH_SIZE)
test_dataloader = DataLoader(dataset=test_data,
                             shuffle=False, # 测试数据集模型没有见过,顺序的影响不大
                             batch_size=BATCH_SIZE)

train_dataloader, test_dataloader

(<torch.utils.data.dataloader.DataLoader at 0x1e9adcf8670>,
<torch.utils.data.dataloader.DataLoader at 0x1e9add46cd0>)

# 查看我们创建的数据
print(f"DataLoaders:{train_dataloader, test_dataloader}")
print(f"Length of train_dataloader:{len(train_dataloader)} batches of {BATCH_SIZE}...")
print(f"Length of test_dataloader:{len(test_dataloader)} batches of {BATCH_SIZE}...")

DataLoaders:(<torch.utils.data.dataloader.DataLoader object at 0x000001E9ADCF8670>, <torch.utils.data.dataloader.DataLoader object at 0x000001E9ADD46CD0>)
Length of train_dataloader:1875 batches of 32…
Length of test_dataloader:313 batches of 32…

# 查看 train_dataloader 具体数据
train_features_batch, train_labels_batch = next(iter(train_dataloader))
train_features_batch.shape, train_labels_batch.shape

(torch.Size([32, 1, 28, 28]), torch.Size([32]))

# 可视化一个样例
torch.manual_seed(42)
random_idx = torch.randint(0, len(train_features_batch), size=[1]).item()
image, label = train_features_batch[random_idx], train_labels_batch[random_idx]
plt.imshow(image.squeeze(), cmap="gray")
plt.title(class_names[label])
plt.axis(False)
print(f"Image size:{image.shape}")
print(f"Label:{label}, label size:{label.shape}")

Image size:torch.Size([1, 28, 28])
Label:4, label size:torch.Size([])

在这里插入图片描述

3 Model 0: 创建一个 baseline model

当我们开始创建一系列机器学习实验的时候,最好的方式是从一个基准模型开始。
一个基准模型就是一个简单的基本模型,其他的改进就是从这个模型开始的。
换句话说,从一个简单模型开始,添加复杂性和难度,慢慢增加

# 创建 flatten 层,展平,就是将数据展平展开
flatten_model = nn.Flatten()

# 获取一个单一的样本
x = train_features_batch[0]

# Flatten 样本
output = flatten_model(x)

print(f"Shape before flattening:{x.shape}")
print(f"Shape after flattening:{output.shape}")

Shape before flattening:torch.Size([1, 28, 28])
Shape after flattening:torch.Size([1, 784])

可以看出这个结果就是 28*28

class FashionMNISTModelV0(nn.Module):
    def __init__(self,
                 input_shape:int,
                 hidden_units:int,
                 output_shape:int
                 ):
        super().__init__()
        self.layer_stack = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=input_shape, 
                      out_features=hidden_units),
            nn.Linear(in_features=hidden_units, 
                      out_features=output_shape)
        )
    def forward(self, x):
        return self.layer_stack(x)
torch.manual_seed(42)

model_0 = FashionMNISTModelV0(
    input_shape=28*28,
    hidden_units=10,
    output_shape=len(class_names)
).to("cpu")

model_0

FashionMNISTModelV0(
(layer_stack): Sequential(
(0): Flatten(start_dim=1, end_dim=-1)
(1): Linear(in_features=784, out_features=10, bias=True)
(2): Linear(in_features=10, out_features=10, bias=True)
)
)

dummy_x = torch.rand([1, 1, 28, 28]) # batch_size, color_channel, width, height
model_0(dummy_x)

tensor([[-0.0315, 0.3171, 0.0531, -0.2525, 0.5959, 0.2112, 0.3233, 0.2694,
-0.1004, 0.0157]], grad_fn=)

model_0.state_dict()

OrderedDict([(‘layer_stack.1.weight’,
tensor([[ 0.0273, 0.0296, -0.0084, …, -0.0142, 0.0093, 0.0135],
[-0.0188, -0.0354, 0.0187, …, -0.0106, -0.0001, 0.0115],
[-0.0008, 0.0017, 0.0045, …, -0.0127, -0.0188, 0.0059],
…,
[-0.0116, 0.0273, -0.0344, …, 0.0176, 0.0283, -0.0011],
[-0.0230, 0.0257, 0.0291, …, -0.0187, -0.0087, 0.0001],
[ 0.0176, -0.0147, 0.0053, …, -0.0336, -0.0221, 0.0205]])),
(‘layer_stack.1.bias’,
tensor([-0.0093, 0.0283, -0.0033, 0.0255, 0.0017, 0.0037, -0.0302, -0.0123,
0.0018, 0.0163])),
(‘layer_stack.2.weight’,
tensor([[ 0.0614, -0.0687, 0.0021, 0.2718, 0.2109, 0.1079, -0.2279, -0.1063,
0.2019, 0.2847],
[-0.1495, 0.1344, -0.0740, 0.2006, -0.0475, -0.2514, -0.3130, -0.0118,
0.0932, -0.1864],
[ 0.2488, 0.1500, 0.1907, 0.1457, -0.3050, -0.0580, 0.1643, 0.1565,
-0.2877, -0.1792],
[ 0.2305, -0.2618, 0.2397, -0.0610, 0.0232, 0.1542, 0.0851, -0.2027,
0.1030, -0.2715],
[-0.1596, -0.0555, -0.0633, 0.2302, -0.1726, 0.2654, 0.1473, 0.1029,
0.2252, -0.2160],
[-0.2725, 0.0118, 0.1559, 0.1596, 0.0132, 0.3024, 0.1124, 0.1366,
-0.1533, 0.0965],
[-0.1184, -0.2555, -0.2057, -0.1909, -0.0477, -0.1324, 0.2905, 0.1307,

[-0.1310, 0.0645, -0.1171, 0.2168, -0.0245, -0.2820, 0.0736, 0.2621,
0.0012, -0.0810]])),
(‘layer_stack.2.bias’,
tensor([-0.0087, 0.1791, 0.2712, -0.0791, 0.1685, 0.1762, 0.2825, 0.2266,
-0.2612, -0.2613]))])
Output is truncated. View as a scrollable element or open in a text editor. Adjust cell output settings…

3.1 设置损失函数、优化器和评估指标

多分类问题: CrossEntropyLoss()
优化器: SGD()
评估指标:accuracy

import requests
from pathlib import Path

# 下载 helper functions from PyTorch repo
if Path("helper_functions.py").is_file():
    print("helper_functions.py is already exists, skipping download...")
else:
    print("Downloading helper_functions.py")
    request = requests.get("https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/main/helper_functions.py")
    with open("helper_functions.py", "wb") as f:
        f.write(request.content)

helper_functions.py is already exists, skipping download…

from helper_functions import accuracy_fn

loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(params=model_0.parameters(),
                      lr=0.1)

3.2 创建一个函数来给我们的实验计时

要跟踪的东西是:

  1. 模型的性能(loss和accuracy值)
  2. 它运行的速度
from timeit import default_timer as timer
def print_train_time(start:float,
                     end:float,
                     device:torch.device=None):
    """Prints difference between start and end time."""
    total_time = end - start
    print(f"Train time on {device}:{total_time:.3f} seconds")
    return total_time
start_time = timer()
# 一些代码
end_time = timer()
print_train_time(start=start_time,
                 end=end_time,
                 device="cpu")

Train time on cpu:0.000 seconds
1.3100000018084756e-05

3.3 在批量数据集上创建训练和测试循环

Note:由于我们在batches上进行计算,所以optimizer会一次性更新每一个batch而不是每一个epoch

# 进度条柱子,引入tqdm
from tqdm.auto import tqdm

如果报错的话可能是没有安装这个包,执行下面这个操作即可

pip install tqdm

Requirement already satisfied: tqdm in d:\anaconda3\envs\limu\lib\site-packages (4.66.4)
Requirement already satisfied: colorama in d:\anaconda3\envs\limu\lib\site-packages (from tqdm) (0.4.6)
Note: you may need to restart the kernel to use updated packages.

我这是已经安装过的提示。

torch.manual_seed(42)
train_time_start_on_cpu = timer()

# 设置训练周期(这里采用少量训练周期,可以花费更少的时间看看我们的模型性能如何)
epochs=3

# 这里加了tqdm,等会就知道它的作用啦
for epoch in tqdm(range(epochs)):
    print(f"Epoch:{epoch}\n-------")
    # Training
    train_loss = 0
    # 在每一个训练batch中加入循环进行遍历
    for batch, (X,y) in enumerate(train_dataloader):
        model_0.train()
        y_pred = model_0(X)
        
        loss = loss_fn(y_pred, y)
        train_loss += loss # 这是所有batch的loss,会进行累加
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if batch % 400 == 0:
            print(f"Look at {batch * len(X)}/{len(train_dataloader)} samples.")
            
    train_loss /= len(train_dataloader)
    
    # Testing
    test_loss, test_acc = 0, 0
    model_0.eval()
    with torch.inference_mode():
        for X_test, y_test in test_dataloader:
            test_pred = model_0(X_test)
            test_loss += loss_fn(test_pred, y_test)
            test_acc += accuracy_fn(y_true=y_test,
                                    y_pred=test_pred.argmax(dim=1))
        test_loss /= len(test_dataloader)
        test_acc /= len(test_dataloader)
        
    print(f"\nTrain loss:{train_loss:.4f} | Test loss:{test_loss:.4f} | Tess acc:{test_acc:.2f}%")
    
# 计算训练时间
train_time_end_on_cpu = timer()
total_train_time_on_model_0 = print_train_time(start=train_time_start_on_cpu,
                                               end=train_time_end_on_cpu,
                                               device=str(next(model_0.parameters()).device))

在这里插入图片描述

Epoch:0
Look at 0/1875 samples.
Look at 12800/1875 samples.
Look at 25600/1875 samples.
Look at 38400/1875 samples.
Look at 51200/1875 samples.

Train loss:0.5904 | Test loss:0.5095 | Tess acc:82.04%

Epoch:1
Look at 0/1875 samples.
Look at 12800/1875 samples.
Look at 25600/1875 samples.
Look at 38400/1875 samples.
Look at 51200/1875 samples.

Train loss:0.4763 | Test loss:0.4799 | Tess acc:83.20%

Epoch:2

Look at 0/1875 samples.
Look at 12800/1875 samples.
Look at 25600/1875 samples.
Look at 38400/1875 samples.
Look at 51200/1875 samples.

Train loss:0.4550 | Test loss:0.4766 | Tess acc:83.43%
Train time on cpu:24.777 seconds

4 预测获得model 0 的结果

torch.manual_seed(42)
device="cpu"
def eval_model(model:torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               loss_fn:torch.nn.Module,
               accuracy_fn,
               device=device):
    """Return a dictionary containing the results of model predictioning on data_loader."""
    loss, acc = 0, 0
    model.eval()
    with torch.inference_mode():
        for X, y in tqdm(data_loader):
            # 将数据与设备无关
            X, y = X.to(device), y.to(device)
            y_pred = model(X)
            loss += loss_fn(y_pred, y)
            acc += accuracy_fn(y_true=y,
                               y_pred=y_pred.argmax(dim=1))
        loss /= len(data_loader)
        acc /= len(data_loader)
        
    return {"model_name":model.__class__.__name__,
            "model_loss":loss.item(),
            "model_acc":acc}

# 计算model 0在测试数据集上的结果

model_0_results = eval_model(model=model_0,
                             data_loader=test_dataloader,
                             loss_fn=loss_fn,
                             accuracy_fn=accuracy_fn,
                             device=device)

model_0_results

在这里插入图片描述
这里就是很需要注意这个per batch的loss和acc的计算。
ok,BB,如果文档对您有用的话,记得给俺点一个赞赞

谢谢啦!

点赞(0) 打赏

评论列表 共有 0 条评论

暂无评论

微信公众账号

微信扫一扫加关注

发表
评论
返回
顶部