pytorch

pytorch教程

阅读该教程,您需要python基础的预备知识

参考视频

conda管理环境

安装conda

注意:在树莓派上可能无法正常安装conda, 出现报错Illegal instruction "$CONDA_EXEC" constructor --prefix "$PREFIX" --extract-conda-pkgs

原因是不支持最新版的conda,考虑以下的指令安装旧版

1
2
3
sudo wget http://repo.continuum.io/miniconda/Miniconda3-py39_4.9.2-Linux-aarch64.sh

sudo /bin/bash Miniconda3-py39_4.9.2-Linux-aarch64.sh

创建环境

打开anaconda prompt

1
2
conda create -n pytorch python=3.11
创建 环境名称 python版本号
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
(base) C:\Users\CJL>conda create -n pytorch python=3.11
Collecting package metadata (current_repodata.json): done
Solving environment: done


==> WARNING: A newer version of conda exists. <==
current version: 23.7.4
latest version: 23.9.0

Please update conda by running

$ conda update -n base -c defaults conda

Or to minimize the number of packages updated during conda update use

conda install conda=23.9.0



## Package Plan ##

environment location: D:\AI\anaconda\envs\pytorch

added / updated specs:
- python=3.11


The following packages will be downloaded:

package | build
---------------------------|-----------------
openssl-3.0.11 | h2bbff1b_2 7.4 MB
wheel-0.41.2 | py311haa95532_0 163 KB
------------------------------------------------------------
Total: 7.6 MB

The following NEW packages will be INSTALLED:

bzip2 pkgs/main/win-64::bzip2-1.0.8-he774522_0
ca-certificates pkgs/main/win-64::ca-certificates-2023.08.22-haa95532_0
libffi pkgs/main/win-64::libffi-3.4.4-hd77b12b_0
openssl pkgs/main/win-64::openssl-3.0.11-h2bbff1b_2
pip pkgs/main/win-64::pip-23.2.1-py311haa95532_0
python pkgs/main/win-64::python-3.11.5-he1021f5_0
setuptools pkgs/main/win-64::setuptools-68.0.0-py311haa95532_0
sqlite pkgs/main/win-64::sqlite-3.41.2-h2bbff1b_0
tk pkgs/main/win-64::tk-8.6.12-h2bbff1b_0
tzdata pkgs/main/noarch::tzdata-2023c-h04d1e81_0
vc pkgs/main/win-64::vc-14.2-h21ff451_1
vs2015_runtime pkgs/main/win-64::vs2015_runtime-14.27.29016-h5e58377_2
wheel pkgs/main/win-64::wheel-0.41.2-py311haa95532_0
xz pkgs/main/win-64::xz-5.4.2-h8cc25b3_0
zlib pkgs/main/win-64::zlib-1.2.13-h8cc25b3_0


Proceed ([y]/n)? y


Downloading and Extracting Packages

Preparing transaction: done
Verifying transaction: done
Executing transaction: done
#
# To activate this environment, use
#
# $ conda activate pytorch
#
# To deactivate an active environment, use
#
# $ conda deactivate

打开名为pytorch的环境

1
conda activate pytorch

使用pip list查看当前安装的所有包

1
conda install pytorch torchvision torchaudio cpuonly -c pytorch

输入jupyter notebook打开jupyter

shift+enter自动执行当前语句

Dataset类代码实践

读取文件夹中的文件列表:

1
2
3
4
import os             #python中关于系统的库
dir_path="D:\\AI\\doc\\TrainData\\hymenoptera_data\\hymenoptera_data\\train\\ants"
imp_path_list=os.listdir(dir_path)
print(f"路径dir_path中的文件列表为{imp_path_list}")
1
路径dir_path中的文件列表为['0013035.jpg', '1030023514_aad5c608f9.jpg', '1095476100_3906d8afde.jpg', '1099452230_d1949d3250.jpg', '116570827_e9c126745d.jpg', '1225872729_6f0856588f.jpg', .....(手动加的省略号), 'VietnameseAntMimicSpider.jpg']

导入图片

1
2
3
4
from PIL import Image
img_path="D:/AI/doc/TrainData/hymenoptera_data/hymenoptera_data/train/ants/0013035.jpg"
img=Image.open(img_path)
img.show() #打开图片

实现类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from torch.utils.data import Dataset
import os #python中关于系统的库
from PIL import Image
class MyData(Dataset):

def __init__(self,root_dir,label_dir):
'''
构造函数
:param root_dir: 数据集根目录
:param label_dir: 图片的路径(相对根目录)
'''
self.root_dir=root_dir
self.label_dir=label_dir
self.path=os.path.join(self.root_dir,self.label_dir) #组合两个路径
self.img_path=os.listdir(self.path) #获取路径中的文件列表
def __getitem__(self, idx):
img_name=self.img_path[idx]
img_item_path=os.path.join(self.root_dir,self.label_dir,img_name)
img=Image.open(img_item_path)# 读取图片
label=self.label_dir
return img,label

def __len__(self):
'''
返回文件列表中的文件个数
:return: 文件个数
'''
return len(self.img_path)

# 蚂蚁
root_dir="D:\\AI\\doc"
label_dir="TrainData\\hymenoptera_data\\hymenoptera_data\\train\\ants"
ants_dataset=MyData(root_dir,label_dir)
img,path=ants_dataset[0]
img.show() # 会打开图片
print(path)

#蜜蜂
bees_label_dir="TrainData\\hymenoptera_data\\hymenoptera_data\\train\\bees"
bees_dataset=MyData(root_dir,bees_label_dir)
img,path=bees_dataset[0]
img.show() # 会打开图片
print(path)

#数据集拼接
train_dataset=ants_dataset+bees_dataset
print(f"蚂蚁数据集长度{len(ants_dataset)},蜜蜂数据集长度{len(bees_dataset)},拼接后长度{len(train_dataset)}")
1
2
3
TrainData\hymenoptera_data\hymenoptera_data\train\ants
TrainData\hymenoptera_data\hymenoptera_data\train\bees
蚂蚁数据集长度124,蜜蜂数据集长度121,拼接后长度245

tensorboard的使用

基础使用

1
2
3
4
5
from torch.utils.tensorboard import SummaryWriter
writer=SummaryWriter("logs")
for i in range(100):
writer.add_scalar("y=x^2",i**2,i)
writer.close()

在控制台输入tensorboard --logdir=20231011/logs --port=6007查看绘图结果

导入图片

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
image_path="D:\\AI\\doc\\TrainData\\data\\train\\ants_image\\0013035.jpg"
from PIL import Image
# 导入图片
img=Image.open(image_path)
print(type(img))
import numpy as np
# 将图片的类型转化为numpy,以便使用下面的函数
img_array=np.array(img)
print(type(img_array))
print(img_array.shape)
from torch.utils.tensorboard import SummaryWriter
writer=SummaryWriter("ants1")
writer.add_image("test01",img_array,1,dataformats='HWC') # 1表示step
# 打开第二张图
image_path="D:\\AI\\doc\\TrainData\\data\\train\\ants_image\\6240329_72c01e663e.jpg"
img=Image.open(image_path)
img_array=np.array(img)
print(type(img_array))
print(img_array.shape)
writer.add_image("test01",img_array,2,dataformats='HWC') # 2表示step

writer.close()
1
2
3
4
5
<class 'PIL.JpegImagePlugin.JpegImageFile'>
<class 'numpy.ndarray'>
(512, 768, 3) #注:(H,W,C)(高度,宽度,通道),3通道在最后一位
<class 'numpy.ndarray'>
(369, 500, 3)

在console中输入tensorboard --logdir=20231011/ants1 --port=6007,打开网站,滑动滑块,可以看见两张不同的图像

transforms的使用

基础使用

1
2
3
4
5
6
7
8
9
from PIL import Image
from torchvision import transforms
image_path="D:\\AI\\doc\\TrainData\\data\\train\\ants_image\\0013035.jpg"
img=Image.open(image_path)

tensor_trans=transforms.ToTensor() # 定义一个转换的方法
tensor_img=tensor_trans(img) # 使用转换的方法处理img

print(tensor_img)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
tensor([[[0.3137, 0.3137, 0.3137,  ..., 0.3176, 0.3098, 0.2980],
[0.3176, 0.3176, 0.3176, ..., 0.3176, 0.3098, 0.2980],
[0.3216, 0.3216, 0.3216, ..., 0.3137, 0.3098, 0.3020],
...,
[0.3412, 0.3412, 0.3373, ..., 0.1725, 0.3725, 0.3529],
[0.3412, 0.3412, 0.3373, ..., 0.3294, 0.3529, 0.3294],
[0.3412, 0.3412, 0.3373, ..., 0.3098, 0.3059, 0.3294]],

[[0.5922, 0.5922, 0.5922, ..., 0.5961, 0.5882, 0.5765],
[0.5961, 0.5961, 0.5961, ..., 0.5961, 0.5882, 0.5765],
[0.6000, 0.6000, 0.6000, ..., 0.5922, 0.5882, 0.5804],
...,
[0.6275, 0.6275, 0.6235, ..., 0.3608, 0.6196, 0.6157],
[0.6275, 0.6275, 0.6235, ..., 0.5765, 0.6275, 0.5961],
[0.6275, 0.6275, 0.6235, ..., 0.6275, 0.6235, 0.6314]],

[[0.9137, 0.9137, 0.9137, ..., 0.9176, 0.9098, 0.8980],
[0.9176, 0.9176, 0.9176, ..., 0.9176, 0.9098, 0.8980],
[0.9216, 0.9216, 0.9216, ..., 0.9137, 0.9098, 0.9020],
...,
[0.9294, 0.9294, 0.9255, ..., 0.5529, 0.9216, 0.8941],
[0.9294, 0.9294, 0.9255, ..., 0.8863, 1.0000, 0.9137],
[0.9294, 0.9294, 0.9255, ..., 0.9490, 0.9804, 0.9137]]])

tensor数据类型包装了 神经网络所需理论基础的一些参数

1
2
3
4
5
6
7
8
9
10
11
12
13
from PIL import Image
from torchvision import transforms
image_path="D:\\AI\\doc\\TrainData\\data\\train\\ants_image\\0013035.jpg"
img=Image.open(image_path)

tensor_trans=transforms.ToTensor() # 定义一个转换的方法
tensor_img=tensor_trans(img) # 使用转换的方法处理img
# 白嫖上一段代码来
import cv2
from torch.utils.tensorboard import SummaryWriter
writer=SummaryWriter("test02")
writer.add_image("Tensor.jpg",tensor_img)
writer.close()

在console输入tensorboard --logdir=20231011/test02,可以在网页上看到图片

常见transforms(1:normalize)

归一化操作

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
from PIL import Image
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms

writer=SummaryWriter("test03")
img=Image.open("CJL.jpg")

# Totensor
trans_totensor=transforms.ToTensor()
img_tensor=trans_totensor(img)
writer.add_image("ToTensor",img_tensor)

#normalize
trans_norm=transforms.Normalize([1,2,3],[3,2,1]) #分别是三个通道的均值、三个通道的标准差 ,output[channel] = (input[channel] - mean[channel]) / std[channel]
img_norm=trans_norm(img_tensor)
writer.add_image("Normalize",img_norm,1)

使用Compose组合多个操作

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
from PIL import Image
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms

writer=SummaryWriter("test04")
img=Image.open("CJL.jpg")

# Totensor
trans_totensor=transforms.ToTensor()
# resize
trans_resize=transforms.Resize([300,500]) #(h, w)
# 组合
combine=transforms.Compose([trans_resize,trans_totensor])#使用Compose组合多个操作

img_1=combine(img)
writer.add_image("img_1",img_1,1)

可以观察到图片被压缩了

使用公开的数据集

这里使用手写数字作为示范

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import torchvision
from torch.utils.tensorboard import SummaryWriter
dataset_transform=torchvision.transforms.Compose([torchvision.transforms.ToTensor()])# 定义transform

train_set= torchvision.datasets.MNIST(root="D:\\AI\\doc\\20231011\\MNIST_data",train=True,transform=dataset_transform,download=True)
test_set=torchvision.datasets.MNIST(root="D:\\AI\\doc\\20231011\\MNIST_data",train=False,transform=dataset_transform,download=True)

print("分类列表",test_set.classes)
img,target=test_set[0]
print("img所属分类",test_set.classes[target])

# writer
writer=SummaryWriter("test05")
for i in range(10):
img,idx=test_set[i]
writer.add_image("test",img,i)
writer.close()
1
2
分类列表 ['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four', '5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine']
img所属分类 7 - seven

DataLoader的使用

在DataLoader中num_works表示同时进行的任务数,默认为0(只使用主进程),据说在windows下如果设置为大于0的值会报错

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import torchvision.datasets
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

test_data = torchvision.datasets.CIFAR10("./dataset", download=True, train=False,
transform=torchvision.transforms.ToTensor())
test_loader = DataLoader(dataset=test_data, batch_size=4, shuffle=True, num_workers=0, drop_last=False)
# 测试数据集中第一张图片及target
img, target = test_data[0]
print(img.shape)
print(target)

writer = SummaryWriter("dataloader")
for epoch in range(2):
step = 0
for data in test_loader:
imgs, targets = data
writer.add_images("Epoch:{}".format(epoch), imgs, step)
step += 1


writer.close()

神经网络基本骨架- nn.module的使用

torch.nntorch.nn.functional的封装,前者包含后者,前者更容易使用

torch.nn.Module是所有神经网络的骨架,所有搭建的神经网络都应当基础这个类

官方文档

定义神经网络的基本模板

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
import torch.nn as nn
import torch.nn.functional as F
# 定义一个Model类继承自nn.Module
class Model(nn.Module):
# 实现下面两个函数
def __init__(self):
super().__init__() #这个是必须的
self.conv1 = nn.Conv2d(1, 20, 5)
self.conv2 = nn.Conv2d(20, 20, 5)
# 前向传播
def forward(self, x):
# x先卷积(conv1),再经过非线性处理(relu)
x = F.relu(self.conv1(x))
# x再卷积(conv2),再经过非线性处理(relu)
return F.relu(self.conv2(x))
graph LR;
id1(input) ---> id2{forward}
id2 ---> id3(output)

可以使用pycharm的code generate功能(在菜单栏code/generate…),选择Override Methods自动补全代码

示例代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import torch
from torch import nn


class Tudui(nn.Module):
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)

def forward(self, input):
output = input + 1
return output

# 创建一个神经网络
tudui = Tudui()
x = torch.tensor(1.0)
output = tudui(x)
print(output)

输出

1
tensor(2.)

卷积操作

官方文档

这里主要学习nn.Conv2d

卷积

Stride的意义是卷积核单次移动的距离

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import torch
import torch.nn.functional as F

input = torch.tensor([[1, 2, 0, 3, 1],
[0, 1, 2, 3, 1],
[1, 2, 1, 0, 0],
[5, 2, 3, 1, 1],
[2, 1, 0, 1, 1]])
kernel = torch.tensor([[1, 2, 1],
[0, 1, 0],
[2, 1, 0]])
# 调整input和kernel的尺寸(要求有四个数字,分别是minibatch,in_channels,iH,iW)
input = torch.reshape(input, (1, 1, 5, 5))
kernel = torch.reshape(kernel, (1, 1, 3, 3))
print(input.shape)
print(kernel.shape)

output = F.conv2d(input, kernel, stride=1)
print("stride=1\n",output)
output = F.conv2d(input, kernel, stride=2)
print("stride=2\n",output)
1
2
3
4
5
6
7
8
9
torch.Size([1, 1, 5, 5])
torch.Size([1, 1, 3, 3])
stride=1
tensor([[[[10, 12, 12],
[18, 16, 16],
[13, 9, 3]]]])
stride=2
tensor([[[[10, 12],
[13, 3]]]])

设置padding

卷积设置padding

padding默认为0,当把padding设置为1时,输入图像周围增加一圈,数值填0

1
2
3
# input 和 kernel代码同上
output = F.conv2d(input, kernel, stride=1, padding=1)
print(output)
1
2
3
4
5
tensor([[[[ 1,  3,  4, 10,  8],
[ 5, 10, 12, 12, 6],
[ 7, 18, 16, 16, 8],
[11, 13, 9, 3, 4],
[14, 13, 9, 7, 4]]]])

神经网络-卷积层

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import torch
import torchvision
from torch import nn
from torch.nn import Conv2d
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

dataset = torchvision.datasets.CIFAR10("./dataset", transform=torchvision.transforms.ToTensor(),
download=True)
dataloader = DataLoader(dataset, batch_size=64)


class Tudui(nn.Module):
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.conv1 = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)

def forward(self, x):
x = self.conv1(x)
return x


tudui = Tudui()
writer = SummaryWriter("./logs/nn_conv2d_logs")
step = 0
for data in dataloader:
imgs, targets = data
# imgs: torch.Size([64,3,32,32])
output = tudui(imgs)
# output: torch.Size([64,6,30,30])
writer.add_images("input", imgs, step)
# 这里reshape的原因是原output有6个通道,无法直接显示,所以这里不严谨地将output改为3通道
output = torch.reshape(output, (-1, 3, 30, 30))
# -1是占位符,表示让pytorch自动计算某一维度的大小
writer.add_images("output", output, step)
step += 1

writer.close()

输入tensorboard --logdir=logs/nn_conv2d_logs查看结果

神经网络-最大池化

最大池化考虑不同Ceil_model

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import torch
from torch import nn
from torch.nn import MaxPool2d

input = torch.tensor([[1, 2, 0, 3, 1],
[0, 1, 2, 3, 1],
[1, 2, 1, 0, 0],
[5, 2, 3, 1, 1],
[2, 1, 0, 1, 1]], dtype=torch.float32)
input = torch.reshape(input, (-1, 1, 5, 5))
print(input.shape)


class Tudui(nn.Module):
def __init__(self, ceil_mode, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.maxpool1 = MaxPool2d(kernel_size=3, ceil_mode=ceil_mode)

def forward(self, input):
output = self.maxpool1(input)
return output

# ceil_mode = True
tudui1 = Tudui(True)
output1 = tudui1(input)
print(output1)
# ceil_mode = False
tudui2 = Tudui(False)
output2 = tudui2(input)
print(output2)
1
2
3
4
torch.Size([1, 1, 5, 5])
tensor([[[[2., 3.],
[5., 1.]]]])
tensor([[[[2.]]]])

最大池化的一个作用是有损压缩文件,比如将1080P的视频转为720P的

在神经网络的训练中很常用,可以减少计算量

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import torch
import torchvision.datasets
from torch import nn
from torch.nn import MaxPool2d
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

dataset = torchvision.datasets.CIFAR10("./dataset", train=False, transform=torchvision.transforms.ToTensor(),
download=True)
dataloader = DataLoader(dataset, batch_size=64)


class Tudui(nn.Module):
def __init__(self, ceil_mode, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.maxpool1 = MaxPool2d(kernel_size=3, ceil_mode=ceil_mode)

def forward(self, input):
output = self.maxpool1(input)
return output


tudui2 = Tudui(False)

writer = SummaryWriter("./logs/nn_maxpool")
step = 0
for data in dataloader:
imgs, targets = data
writer.add_images("input", imgs, step)
output = tudui2(imgs)
writer.add_images("output", output, step)
step += 1
writer.close()

输入tensorboard --logdir=logs/nn_maxpool查看效果

最大池化效果

可以看出,图片被模糊了

神经网络-非线性激活

这里举例ReLU和sigmoid两个函数,实现功能是将图像每个元素的值映射为新的函数值

ReLU:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import torch
import torchvision.datasets
from torch import nn
from torch.nn import MaxPool2d, ReLU
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

input = torch.tensor([[1,-0.5],
[-1,3]])
input = torch.reshape(input,(-1,1,2,2))

class Tudui(nn.Module):
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.relu=ReLU()

def forward(self, input):
output = self.relu(input)
return output


tudui2 = Tudui()
output = tudui2(input)
print(output)
1
2
tensor([[[[1., 0.],
[0., 3.]]]])

可以看出,ReLU执行的是x=max(x,0)x=max(x,0)的操作

Sigmoid:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import torch
import torchvision.datasets
from torch import nn
from torch.nn import MaxPool2d, ReLU, Sigmoid
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter


dataset = torchvision.datasets.CIFAR10("./dataset", train=False, transform=torchvision.transforms.ToTensor(),
download=True)
dataloader = DataLoader(dataset, batch_size=64)


class Tudui(nn.Module):
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.relu=Sigmoid()

def forward(self, input):
output = self.relu(input)
return output


tudui2 = Tudui()

writer = SummaryWriter("./logs/nn_relu")
step = 0
for data in dataloader:
imgs, targets = data
writer.add_images("input", imgs, step)
output = tudui2(imgs)
writer.add_images("output", output, step)
step += 1
writer.close()

输入tensorboard --logdir=logs/nn_relu查看结果

Sigmoid

神经网络-线性层及其他层介绍

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import torch
import torchvision.datasets
from torch import nn
from torch.nn import MaxPool2d, ReLU, Sigmoid, Linear
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

dataset = torchvision.datasets.CIFAR10("./dataset", train=False, transform=torchvision.transforms.ToTensor(),
download=True)
dataloader = DataLoader(dataset, batch_size=64)


class Tudui(nn.Module):
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.linear1 = Linear(196608, 10)
# 64*3*32*32 = 196608

def forward(self, input):
output = self.linear1(input)
return output


tudui = Tudui()
for data in dataloader:
imgs, targets = data
print(imgs.shape)
output = torch.flatten(imgs) # 将imgs变为一维的
print(output.shape)
output = tudui(output)
print(output.shape)
1
2
3
4
torch.Size([64, 3, 32, 32])
torch.Size([196608])
torch.Size([10])
...

神经网络-搭建实战和Sequential的使用

CIFAR10的模型结构

CIFAR10模型结构

Conv2d的输入和输出大小计算公式为

Conv2d shape

对于模型的第一步,输入满足Hin=Win=32H_{in}=W_{in}=32N=1N=1Cin=3C_{in}=3

kernel_size[0]=kernel_size[1]=5kernel\_size[0]=kernel\_size[1]=5,未知参数为padding和stride

stride不为1时,padding过大而不合理,所以设置stride[0]=stride[1]=1stride[0]=stride[1]=1

padding[0]=padding[1]=2padding[0]=padding[1]=2

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import torch
from torch import nn
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear


class Tudui(nn.Module):
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.conv1 = Conv2d(3, 32, 5, padding=2)
self.maxpool1 = MaxPool2d(2)
self.conv2 = Conv2d(32, 32, 5, padding=2)
self.maxpool2 = MaxPool2d(2)
self.conv3 = Conv2d(32, 64, 5, padding=2)
self.maxpool3 = MaxPool2d(2)
self.flatten = Flatten()
self.linear1 = Linear(1024, 64)
self.linear2 = Linear(64, 10)

def forward(self, x):
x = self.conv1(x)
x = self.maxpool1(x)
x = self.conv2(x)
x = self.maxpool2(x)
x = self.conv3(x)
x = self.maxpool3(x)
x = self.flatten(x)
x = self.linear1(x)
x = self.linear2(x)
return x

tudui = Tudui()
input = torch.ones((64,3,32,32))
output = tudui(input)
# 检查网络参数是否正确
print((output.shape))
1
torch.Size([64, 10])

使用Sequential简化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import torch
from torch import nn
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential


class Tudui(nn.Module):
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.model1 = Sequential(
Conv2d(3, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 64, 5, padding=2),
MaxPool2d(2),
Flatten(),
Linear(1024, 64),
Linear(64, 10)
)

def forward(self, x):
x = self.model1(x)
return x


tudui = Tudui()
input = torch.ones((64, 3, 32, 32))
output = tudui(input)
# 检查网络参数是否正确
print((output.shape))
# 可视化查看网络
writer = SummaryWriter("./logs/mm_seq")
writer.add_graph(tudui,input)
writer.close()
1
torch.Size([64, 10])

输入tensorboard --logdir=logs/mm_seq ,可以可视化查看网络

可视化查看网络

损失函数和反向传播

损失函数举例

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import torch
from torch.nn import *

inputs = torch.tensor([1, 2, 3], dtype=torch.float32)
targets = torch.tensor([1, 2, 5], dtype=torch.float32)

inputs = torch.reshape(inputs, (1, 1, 1, 3))
targets = torch.reshape(targets, (1, 1, 1, 3))

loss1 = L1Loss(reduction='sum')
result = loss1(inputs, targets)
print(result)
loss2 = L1Loss(reduction='mean')
result = loss2(inputs, targets)
print(result)
loss3 = MSELoss()
result = loss3(inputs,targets)
print(result)
1
2
3
tensor(2.)
tensor(0.6667)
tensor(1.3333)

inputs对于运行的结果,targets对应期望的结果,损失的作用是对网络进行打分,结果loss越小,网络的效果越优秀

该代码中用到了三种损失函数

对于L1Loss(reduction='sum'),计算过程为abs(1-1)+abs(2-2)+abs(3-5)=2

对于L1Loss(reduction='mean'),计算过程为(abs(1-1)+abs(2-2)+abs(3-5)=2)/3=0.6667

对于MSELoss(),计算过程为((1-1)**2+(2-2)**2+(3-5)**2)/3=1.3333

1
2
3
4
5
6
7
8
9
10
11
import torch
from torch.nn import *
from math import *

x = torch.tensor([0.1, 0.2, 0.3])
y = torch.tensor([1])
x = torch.reshape(x, (1, 3))
loss = CrossEntropyLoss()
result = loss(x, y)
print(result)
print(-0.2 + log(exp(0.1) + exp(0.2) + exp(0.3)))
1
2
tensor(1.1019)
1.1019428482292442

第四种损失函数相对复杂,可以参考官方文档

给网络加上损失函数和反向传播

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import torch
import torchvision.datasets
from torch.nn import *
from torch.utils.data import DataLoader

dataset = torchvision.datasets.CIFAR10("./dataset", train=False, transform=torchvision.transforms.ToTensor(),
download=True)
dataloader = DataLoader(dataset, batch_size=64)


class Tudui(Module):
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.model1 = Sequential(
Conv2d(3, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 64, 5, padding=2),
MaxPool2d(2),
Flatten(),
Linear(1024, 64),
Linear(64, 10)
)

def forward(self, x):
x = self.model1(x)
return x


loss = CrossEntropyLoss()
tudui = Tudui()
for data in dataloader:
imgs, targets = data
outputs = tudui(imgs)
# print(imgs)
# print(targets)
"""
imgs:
tensor([[[[0.6196, 0.6235, 0.6471, ..., 0.5373, 0.4941, 0.4549],
[0.5961, 0.5922, 0.6235, ..., 0.5333, 0.4902, 0.4667],
[0.5922, 0.5922, 0.6196, ..., 0.5451, 0.5098, 0.4706],
...,
...
targets:
tensor([3, 8, 8, 0, 6, 6, 1, 6, 3, 1, 0, 9, 5, 7, 9, 8, 5, 7, 8, 6, 7, 0, 4, 9,
5, 2, 4, 0, 9, 6, 6, 5, 4, 5, 9, 2, 4, 1, 9, 5, 4, 6, 5, 6, 0, 9, 3, 9,
7, 6, 9, 8, 0, 3, 8, 8, 7, 7, 4, 6, 7, 3, 6, 3])
"""
result_loss = loss(outputs, targets)
print(result_loss)
# 反向传播
result_loss.backward()
1
2
3
4
tensor(2.3176, grad_fn=<NllLossBackward0>)
tensor(2.2948, grad_fn=<NllLossBackward0>)
tensor(2.3078, grad_fn=<NllLossBackward0>)
...

优化器

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import torch
import torchvision.datasets
from torch.nn import *
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
writer=SummaryWriter("logs/nn_optim")
dataset = torchvision.datasets.CIFAR10("./dataset", train=False, transform=torchvision.transforms.ToTensor(),
download=True)
dataloader = DataLoader(dataset, batch_size=64)


class Tudui(Module):
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.model1 = Sequential(
Conv2d(3, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 64, 5, padding=2),
MaxPool2d(2),
Flatten(),
Linear(1024, 64),
Linear(64, 10)
)

def forward(self, x):
x = self.model1(x)
return x


loss = CrossEntropyLoss()
tudui = Tudui()
# 创建优化器
optim = torch.optim.SGD(tudui.parameters(), lr=0.01)
for epoch in range(60): # 为了更好的学习效果,循环学习60轮
running_loss = 0.0 # 这一轮的loss
for data in dataloader:
imgs, targets = data
outputs = tudui(imgs)
result_loss = loss(outputs, targets)
# print(result_loss)
# 梯度清零,因为上次循环的梯度数据对这次循环没有用
optim.zero_grad()
# 反向传播,求出每个结点的梯度
result_loss.backward()
# 对模型参数调优
optim.step()
# 更新running_loss
running_loss += result_loss
print(running_loss)
writer.add_scalar("loss",running_loss,epoch)
writer.close()

输入tensorboard --logdir=logs/nn_optim查看训练效果

训练效果

现有网络模型的使用和修改

下面的代码使用训练好的模型vgg16,修改并训练后可用于分类CIFAR10

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import torch
import torchvision
from torch import nn
from torch.nn import CrossEntropyLoss
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

# 使用预训练好的模型
vgg16 = torchvision.models.vgg16(pretrained=True)
dataset = torchvision.datasets.CIFAR10('./dataset', train=False, transform=torchvision.transforms.ToTensor(),
download=True)
# 增加一层,将分1000类变为分10类
vgg16.classifier.add_module('add_linear', nn.Linear(1000, 10))
dataloader = DataLoader(dataset, batch_size=64)
writer = SummaryWriter("logs/model_pretrained")
loss = CrossEntropyLoss()
optim = torch.optim.SGD(vgg16.parameters(), lr=0.01)
for epoch in range(5): # 训练的时间很长,所以这里设置的数字小
running_loss = 0.0
for data in dataloader:
imgs, targets = data
outputs = vgg16(imgs)
result_loss = loss(outputs, targets)
optim.zero_grad()
result_loss.backward()
optim.step()
running_loss += result_loss
print(epoch, ' ', running_loss)
writer.add_scalar("loss", running_loss, epoch)
# 保存训练好的模型
torch.save(vgg16, "vgg16_method1.pth")
writer.close()

输入tensorboard --logdir=logs/model_pretrained查看训练效果

模型的保存和读取

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import torch
import torchvision

vgg16 = torchvision.models.vgg16(pretrained=False)
# 保存模型
# 保存方式1,模型结构+模型参数
torch.save(vgg16, "vgg16_method1.pth")

# 保存方式2,模型参数(官方推荐)
torch.save(vgg16.state_dict(), "vgg16_method2.pth")
# 保存方式1对应的加载模型方式
model1 = torch.load("vgg16_method1.pth")
print(model1)
# 保存方式2对应的加载模型方式
model2 = torchvision.models.vgg16(pretrained=False)
model2.load_state_dict(torch.load("vgg16_method2.pth"))
print(model2)

完整的模型训练套路

该示例训练CIFAR10

CIFAR10模型结构

首先建立模型框架,定义在model.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import torch
from torch import nn
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential


# 这里复制了 # 神经网络-搭建实战和Sequential的使用 的代码
class Tudui(nn.Module):
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.model1 = Sequential(
Conv2d(3, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 64, 5, padding=2),
MaxPool2d(2),
Flatten(),
Linear(1024, 64),
Linear(64, 10)
)

def forward(self, x):
x = self.model1(x)
return x


# 用于测试网络是否正确
if __name__ == '__main__':
tudui = Tudui()
input = torch.ones((64, 3, 32, 32))
output = tudui(input)
print(output.shape)
1
torch.Size([64, 10])

train.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import torchvision.datasets
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

# 引入Tudui模型框架
from model import *

# 准备数据集
train_data = torchvision.datasets.CIFAR10(root="dataset", train=True, transform=torchvision.transforms.ToTensor(),
download=True)
test_data = torchvision.datasets.CIFAR10(root="dataset", train=False, transform=torchvision.transforms.ToTensor(),
download=True)
# len()可以获得数据集的长度
print("训练数据集的长度为:{}".format(len(train_data))) # ctrl+d复制当前行
print("测试数据集的长度为:{}".format(len(test_data)))
# 利用DataLoader来加载数据集
train_dataloader = DataLoader(train_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)
# 创建网络模型
tudui = Tudui()
# 损失函数
loss_fn = nn.CrossEntropyLoss()
# 优化器
learning_rate = 0.01 # 学习速率
optimizer = torch.optim.SGD(tudui.parameters(), lr=learning_rate) # SGD是随机梯度下降
# 设置训练网络的一些参数
# 记录训练的次数
total_train_step = 0
# 记录测试的次数
total_test_step = 0
# 训练的轮数
epoch = 100 # 10

# 添加tensorboard
writer = SummaryWriter("./logs/train")

for i in range(epoch):
print("--------第{}轮训练开始--------".format(i + 1))

# 训练步骤开始
tudui.train()
for data in train_dataloader:
imgs, targets = data
outputs = tudui(imgs)
loss = loss_fn(outputs, targets)

# 优化器优化模型
# 梯度清零,因为上次循环的梯度数据对这次循环没有用
optimizer.zero_grad()
# 反向传播,求出每个结点的梯度
loss.backward()
# 对模型参数调优
optimizer.step()

total_train_step += 1
if total_train_step % 100 == 0:
print("训练次数:{}, Loss:{}".format(total_train_step, loss.item())) # .item的作用是把tensor数据类型转为纯数字
writer.add_scalar("train_loss", loss.item(), total_train_step)

# 测试步骤开始
tudui.eval()
total_test_loss = 0
total_accuracy = 0
with torch.no_grad(): # 在测试的过程中不进行调优,所以设置无梯度
for data in test_dataloader:
imgs, targets = data
outputs = tudui(imgs)
loss = loss_fn(outputs, targets)
total_test_loss += loss.item()
accuracy = (outputs.argmax(1) == targets).sum()
total_accuracy += accuracy
print("整体测试集上的Loss:{}".format(total_test_loss))
print("整体测试集上的正确率:{}".format(total_accuracy / len(test_data)))
writer.add_scalar("test_loss", total_test_loss, total_test_step)
writer.add_scalar("test_accuracy", total_accuracy / len(test_data), total_test_step)
total_test_step += 1
# 保存训练结果
torch.save(tudui, "./train_save/tudui_{}.pth".format(i + 1))
writer.close()

运行train.py,输入tensorboard --logdir=logs/train查看训练效果

train1

train2

使用GPU训练

修改以下内容

1
2
3
4
5
6
7
if torch.cuda.is_available():
tudui = tudui.cuda()
if torch.cuda.is_available():
loss_fn = loss_fn.cuda()
if torch.cuda.is_available(): # 测试和训练都要修改
imgs = imgs.cuda()
targets = targets.cuda()

使用google的免费GPU计算资源https://colab.research.google.com/

在修改-笔记本设置,可以选择硬件加速器为T4GPU,就能使用GPU运行python代码

验证

1
2
3
import torch
print(torch.__version__)
print(torch.cuda.is_available())
1
2
2.1.0+cu121
True
1
!nvidia-smi
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
Tue Jan 30 02:57:01 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05 Driver Version: 535.104.05 CUDA Version: 12.2 |
|-----------------------------------------+----------------------+----------------------+
| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|=========================================+======================+======================|
| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |
| N/A 44C P8 10W / 70W | 3MiB / 15360MiB | 0% Default |
| | | N/A |
+-----------------------------------------+----------------------+----------------------+

+---------------------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=======================================================================================|
| No running processes found |
+---------------------------------------------------------------------------------------+

有16GB的显存,配置很好

在colab上用GPU运行

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import torch
import torchvision.datasets
from torch import nn
from torch.nn import Sequential, Conv2d, MaxPool2d, Flatten, Linear
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from time import time
# 引入Tudui模型框架

# 准备数据集
train_data = torchvision.datasets.CIFAR10(root="dataset", train=True, transform=torchvision.transforms.ToTensor(),
download=True)
test_data = torchvision.datasets.CIFAR10(root="dataset", train=False, transform=torchvision.transforms.ToTensor(),
download=True)
# len()可以获得数据集的长度
print("训练数据集的长度为:{}".format(len(train_data))) # ctrl+d复制当前行
print("测试数据集的长度为:{}".format(len(test_data)))
# 利用DataLoader来加载数据集
train_dataloader = DataLoader(train_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)
# 创建网络模型
class Tudui(nn.Module):
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.model1 = Sequential(
Conv2d(3, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 64, 5, padding=2),
MaxPool2d(2),
Flatten(),
Linear(1024, 64),
Linear(64, 10)
)

def forward(self, x):
x = self.model1(x)
return x
tudui = Tudui()
if torch.cuda.is_available():
tudui = tudui.cuda()
# 损失函数
loss_fn = nn.CrossEntropyLoss()
if torch.cuda.is_available():
loss_fn = loss_fn.cuda()
# 优化器
learning_rate = 0.01 # 学习速率
optimizer = torch.optim.SGD(tudui.parameters(), lr=learning_rate) # SGD是随机梯度下降
# 设置训练网络的一些参数
# 记录训练的次数
total_train_step = 0
# 记录测试的次数
total_test_step = 0
# 训练的轮数
epoch = 100 # 10

# 添加tensorboard
writer = SummaryWriter("./logs/train")
start_time = time()
for i in range(epoch):
print("--------第{}轮训练开始--------".format(i + 1))

# 训练步骤开始
tudui.train()
for data in train_dataloader:
imgs, targets = data
if torch.cuda.is_available():
imgs = imgs.cuda()
targets = targets.cuda()
outputs = tudui(imgs)
loss = loss_fn(outputs, targets)

# 优化器优化模型
# 梯度清零,因为上次循环的梯度数据对这次循环没有用
optimizer.zero_grad()
# 反向传播,求出每个结点的梯度
loss.backward()
# 对模型参数调优
optimizer.step()

total_train_step += 1
if total_train_step % 100 == 0:
end_time = time()
print(f"训练时间{end_time-start_time}s")
print("训练次数:{}, Loss:{}".format(total_train_step, loss.item())) # .item的作用是把tensor数据类型转为纯数字
writer.add_scalar("train_loss", loss.item(), total_train_step)

# 测试步骤开始
tudui.eval()
total_test_loss = 0
total_accuracy = 0
with torch.no_grad(): # 在测试的过程中不进行调优,所以设置无梯度
for data in test_dataloader:
imgs, targets = data
if torch.cuda.is_available():
imgs = imgs.cuda()
targets = targets.cuda()
outputs = tudui(imgs)
loss = loss_fn(outputs, targets)
total_test_loss += loss.item()
accuracy = (outputs.argmax(1) == targets).sum()
total_accuracy += accuracy
print("整体测试集上的Loss:{}".format(total_test_loss))
print("整体测试集上的正确率:{}".format(total_accuracy / len(test_data)))
writer.add_scalar("test_loss", total_test_loss, total_test_step)
writer.add_scalar("test_accuracy", total_accuracy / len(test_data), total_test_step)
total_test_step += 1
# 保存训练结果
torch.save(tudui, "./tudui_{}.pth".format(i + 1))
writer.close()

或者换一种写法

1
2
3
4
5
6
7
8
9
10
11
12
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# if torch.cuda.is_available():
# tudui = tudui.cuda()
tudui = tudui.to(device)
# if torch.cuda.is_available():
# loss_fn = loss_fn.cuda()
loss_fn = loss_fn.to(device)
# if torch.cuda.is_available():
# imgs = imgs.cuda()
# targets = targets.cuda()
imgs = imgs.to(device)
targets = targets.to(device)

完整的模型验证思路

类别

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import torchvision.transforms
from PIL import Image
from torch import nn
from torch.nn import *
import torch

image_path = "./imgs/frog.png"
image = Image.open(image_path)
image.show()
transform = torchvision.transforms.Compose([torchvision.transforms.Resize((32, 32)),
torchvision.transforms.ToTensor()])
image = transform(image)


class Tudui(nn.Module):
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.model1 = Sequential(
Conv2d(3, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 64, 5, padding=2),
MaxPool2d(2),
Flatten(),
Linear(1024, 64),
Linear(64, 10)
)

def forward(self, x):
x = self.model1(x)
return x

classify = {0:'airplane',1:'automoble',2:'bird',3:'cat',4:'deer',5:'dog',6:'frog',7:'horse',8:'ship',9:'truck'}
model = torch.load("./train_save/tudui_40.pth")
image = torch.reshape(image, (1, 3, 32, 32))
model.eval()
with torch.no_grad():
output = model(image)
print(output)
print(classify[output.argmax(1).item()])
1
2
3
tensor([[ -5.6745,  -6.9874,   2.7819,   4.9582,   9.3090,   3.5353,  11.1875,
-2.3855, -6.4177, -11.6776]])
frog

测试了几张图,除了分不清狗和猫,模型总体表现是不错的

注意:如果模型是在GPU上训练的,但是检验模型的环境不支持GPU,load函数要这么写:model = torch.load("./train_save/tudui_40.pth",map_location=torch.device('cpu'))

案例1-手写数字识别

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import torch
import torchvision.datasets
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from torch import nn
from torch.nn import Sequential, Conv2d, Linear, MaxPool2d, Flatten
from time import time

# 根据运行电脑是否支持cuda,决定使用cuda还是cpu
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# #数据准备环节
# 引入数据集
dataset_transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])
train_set = torchvision.datasets.MNIST(root="D:\\AI\\doc\\20231011\\dataset", train=True,
transform=dataset_transform, download=True)
test_set = torchvision.datasets.MNIST(root="D:\\AI\\doc\\20231011\\dataset", train=False,
transform=dataset_transform, download=True)
img, target = test_set[0]
# 查看数据集图片信息
print(f"图片属性:{img.shape},所属类型:{target}")
# 图片属性:torch.Size([1, 28, 28]),所属类型:7
# 使用DataLoader打包数据集
batch_size = 64
train_set = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True, num_workers=0, drop_last=False)
test_set = DataLoader(dataset=test_set, batch_size=batch_size, shuffle=True, num_workers=0, drop_last=False)


# #创建网络模型
class neural_network(nn.Module):
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.model = Sequential(
Conv2d(1, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 64, 5, padding=2),
Flatten(),
Linear(3136, 64),
Linear(64, 10)
)

def forward(self, x):
return self.model(x)


# 实例化网络
net1 = neural_network().to(device)
# net1 = torch.load("./save/01/99.pth").to(device)
# 损失函数
loss_fn = nn.CrossEntropyLoss().to(device)
# 优化器
learning_rate = 0.01 # 学习速率
optimizer = torch.optim.SGD(net1.parameters(), lr=learning_rate) # SGD是随机梯度下降
# tensorboard
writer = SummaryWriter("./logs/02")

# #记录一些次数信息
# 记录训练的次数
total_train_step = 0
# 记录测试的次数
total_test_step = 0
# 训练的轮数
epoch = 500

# #开始训练
# 记录时间
start_time = time()
for i in range(epoch):
print("--------第{}轮训练开始--------".format(i))
# 开启训练模式
net1.train()
# 从训练数据集中取出数据
#train_accuracy = 0
for imgs, targets in train_set:
imgs = imgs.to(device)
targets = targets.to(device)
outputs = net1(imgs)
loss = loss_fn(outputs, targets)
# #优化器优化模型
# 梯度清零,因为上次循环的梯度数据对这次循环没有用
optimizer.zero_grad()
# 反向传播,求出每个结点的梯度
loss.backward()
# 对模型参数调优
optimizer.step()
accuracy = (outputs.argmax(1) == targets).sum()
#train_accuracy += accuracy / batch_size

total_train_step += 1
if total_train_step % 100 == 0 or total_train_step < 300:
end_time = time()
print(f"训练时间{end_time - start_time}s")
print("训练次数:{}, Loss:{}".format(total_train_step, loss.item())) # .item的作用是把tensor数据类型转为纯数字
writer.add_scalar("train_loss", loss.item() / len(train_set), total_train_step)
writer.add_images("train_imgs", imgs, total_train_step)
writer.add_scalar("train_accuracy", accuracy/batch_size, total_train_step)

# 开启测试模式
net1.eval()
total_test_loss = 0
test_accuracy = 0
with torch.no_grad(): # 在测试的过程中不进行调优,所以设置无梯度
for imgs, targets in test_set:
imgs = imgs.to(device)
targets = targets.to(device)
outputs = net1(imgs)
loss = loss_fn(outputs, targets)
total_test_loss += loss.item()
accuracy = (outputs.argmax(1) == targets).sum() / batch_size
test_accuracy += accuracy
print("整体测试集上的Loss:{}".format(total_test_loss))
print("整体测试集上的正确率:{}".format(test_accuracy / len(test_set)))
writer.add_scalar("test_loss", total_test_loss, total_test_step)
writer.add_scalar("test_accuracy", test_accuracy / len(test_set), total_test_step)
total_test_step += 1
# 保存训练结果
if i % 10 == 9:
torch.save(net1, "./save/02/{}.pth".format(i))
writer.close()

输入tensorboard --logdir=logs/01查看训练效果

train_loss
test_loss
train_accuracy
train_imgs
test_accuracy

C++版torch

安装

在pytorch官网 https://pytorch.org/ 找到下载链接,下载后解压即可

安装

下面默认解压后路径为/usr/lib/libtorch

VScode支持

在 VScode 的设置中给 include path 添加以下路径

1
2
/usr/lib/libtorch/include/torch/csrc/api/include
/usr/lib/libtorch/include/

使用cmake编译

hello.cpp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#include <torch/torch.h>
#include <iostream>
int main()
{
// 检查是否有可用的GPU
if (torch::cuda::is_available())
{
std::cout << "CUDA is available! Training on GPU." << std::endl;
// 将张量分配到 GPU 上
torch::Tensor a = torch::rand({2, 3}, torch::device(torch::kCUDA));
torch::Tensor b = torch::rand({3, 4}, torch::device(torch::kCUDA));
torch::Tensor c = a.mm(b);
std::cout << c << std::endl;
}
else
{
std::cout << "CUDA is not available! Training on CPU." << std::endl;
// 继续在 CPU 上运行
torch::Tensor tensor = torch::rand({2, 3});
std::cout << tensor << std::endl;
}
}

CMakeLists.txt

1
2
3
4
5
6
7
8
9
cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
project(hello)

find_package(Torch REQUIRED)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")

add_executable(hello hello.cpp)
target_link_libraries(hello "${TORCH_LIBRARIES}")
set_property(TARGET hello PROPERTY CXX_STANDARD 17)

项目结构如下

1
2
3
4
.
├── build
├── CMakeLists.txt
└── hello.cpp

编译

1
2
3
cd build
cmake -DCMAKE_PREFIX_PATH=/usr/lib/libtorch ..
make # 或者cmake --build . --config Release

pytorch
https://blog.algorithmpark.xyz/2024/01/30/language/pytorch/index/
作者
CJL
发布于
2024年1月30日
更新于
2024年8月13日
许可协议