# 基本配置

  1. 常用包

    import torch
    import torch.nn as nn
    from torch.utils.data import Dataset, DataLoader
    import torch.optim as optimizer
    import pandas as pd  # 数据处理和分析
    import cv2  # 计算机视觉和图像处理
    import matplotlib.pyplot as plt  # 创建静态、交互式和动画图形的绘图库
    import seaborn as sns  # 基于 matplotlib 的高级数据可视化库
    import sklearn  # 机器学习下游分析和指标计算
  2. 常见超参数

    batch_size = 16
    lr = 1e-4
    max_epochs = 100
  3. 指定设备

    # 方案 1,后续使用 GPU 不需要设置
    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
    # 方案 2,后续对使用 GPU 的变量使用 .to(device)
    device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

# 数据读入

  1. 定义自己的 Dataset 类,继承自 torch.utils.data.Dataset

  2. 定义 __init__ 方法,传入外部参数,同时定义样本集

  3. 定义 __getitem__ 方法,用于逐个读取样本集合中的元素,可以进行一定的变换,并将返回训练/验证所需的数据

  4. 定义 __len__ 方法,用于返回数据集的样本数

  5. 使用 torch.utils.data.DataLoader 按照批次读取数据

    class MyTrainDataset(Dataset):
    		def __init__(self, **kwargs):
    				pass
    		def __getitem__(self, **kwargs):
    				pass
        def __len__(self, **kwargs):
    				pass
    class MyValDataset(Dataset):
      	pass
    
    train_loader = DataLoader(MyTrainDataset, batch_size=batch_size, num_workers=4, shuffle=True, drop_last=True)
    val_loader = DataLoader(MyValDataset, batch_size=batch_size, num_workers=4, shuffle=False)

# 模型构建

  1. 构造神经网络,继承自 torch.nn.Module,重载 __init__()forward()__call__() 将调用 forward(),完成前向计算

    class MLP(nn.Module):
    		def __init__(self, **kwargs):
    				super(MLP, self).__init__(**kwargs)
            self.hidden = nn.Linear(784, 256)
            self.act = nn.ReLU()
            self.output = nn.Linear(256, 10)
    
          def forward(self, x):
            a = self.act(self.hidden(x))
            return self.output(a)
    
    X = torch.rand(2, 784)  # 随机输入张亮
    net = MLP()
    print(net)  # 打印网络结构
    print(net(X))  # 前向计算
  2. 常见层

    • nn.Linear(in_features, out_features, bias=True):全连接层,包含可学习的权重和偏置
    • nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0):卷积层,包含可学习的卷积核和偏置
    • nn.BatchNorm2d(num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True):批量归一化层,包含可学习的拉伸和偏移参数
    • nn.LayerNorm(normalized_shape, eps=1e-05, elementwise_affine=True):层归一化层
    • nn.MaxPool2d(kernel_size, stride=None, padding=0):最大池化层
    • nn.AvgPool2d(kernel_size, stride=None, padding=0):平均池化层
    • nn.Dropout(p=0.5, inplace=False):Dropout 层
    • nn.ReLU(inplace=False):ReLU 激活函数
    • nn.Sigmoid():Sigmoid 激活函数
    • nn.Tanh():Tanh 激活函数
    • nn.Softmax(dim=None):Softmax 激活函数
    • nn.Flatten():展平层,用于将多维输入数据展平成 1 维

# 模型初始化

  1. 常见初始化方法

    • torch.nn.init.uniform_(tensor, a=0.0, b=1.0):均匀分布初始化
    • torch.nn.init.normal_(tensor, mean=0.0, std=1.0):正态分布初始化
    • torch.nn.init.constant_(tensor, val):常数初始化
    • torch.nn.init.zeros_(tensor):零初始化
    • torch.nn.init.ones_(tensor):全 1 初始化
    • torch.nn.init.eye_(tensor):单位矩阵初始化
    • torch.nn.init.xavier_normal_(tensor, gain=1.0):Xavier 正态初始化
    • torch.nn.init.xavier_uniform_(tensor, gain=1.0):Xavier 均匀初始化
    • torch.nn.init.kaiming_normal_(tensor, a=0, mode='fan_in', nonlinearity='leaky_relu'):Kaiming 正态初始化
    • torch.nn.init.kaiming_uniform_(tensor, a=0, mode='fan_in', nonlinearity='leaky_relu'):Kaiming 均匀初始化
  2. 初始化函数的封装

    def init_weights(m):
        if isinstance(m, nn.Linear):
            torch.nn.init.normal_(m.weight.data, 0.1)
            if m.bias is not None:
                torch.nn.init.zeros_(m.bias.data)
        elif isinstance(m, nn.Conv2d):
              torch.nn.init.zeros_(m.weight.data)
              if m.bias is not None:
                 torch.nn.init.constant_(m.bias.data, 0.3)
        elif isinstance(m, nn.BatchNorm2d):
              m.weight.data.fill_(1)
              m.bias.data.zero_()
    
    net.apply(init_weights)
    print(net[0].weight.data)

# 损失函数

  1. L1 损失函数 L_n = \abs{y - \hat{y}}

    loss = nn.L1Loss(reduction='mean')  # reduction='none'(逐个计算)、'sum'(所有元素求和)、'mean'(加权平均,默认)
    input = torch.randn(3, 5, requires_grad=True)
    target = torch.randn(3, 5)
    output = loss(input, target)
    output.backward()
    print(output)
  2. MSE 损失函数 Ln=(yy^)2L_n = (y - \hat{y})^2

    loss = nn.MSELoss(reduction='mean')  # reduction='none'/'sum'/'mean'
    pass

# 优化器

  1. torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9):随机梯度下降

    vt=βvt1+(1β)L(θt1)θt=θt1αvtv_t = \beta \cdot v_{t-1} + (1 - \beta) \cdot \nabla L(\theta_{t-1}) \\ \theta_t = \theta_{t-1} - \alpha \cdot v_t

  2. torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999)):Adam 优化器

    mt=β1mt1+(1β1)L(θt1)vt=β2vt1+(1β2)(L(θt1))2θt=θt1αmtvt+ϵm_t = \beta_1 \cdot m_{t-1} + (1 - \beta_1) \cdot \nabla L(\theta_{t-1}) \\ v_t = \beta_2 \cdot v_{t-1} + (1 - \beta_2) \cdot (\nabla L(\theta_{t-1}))^2 \\ \theta_t = \theta_{t-1} - \alpha \cdot \frac{m_t}{\sqrt{v_t} + \epsilon}

  3. torch.optim.AdamW(model.parameters(), lr=0.001, betas=(0.9, 0.999), weight_decay=0.01):AdamW 优化器

    θt=θt1αmtvt+ϵαweight_decayθt1\theta_t = \theta_{t-1} - \alpha \cdot \frac{m_t}{\sqrt{v_t} + \epsilon} - \alpha \cdot \text{weight\_decay} \cdot \theta_{t-1}

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
for epoch in range(max_epochs):
    ...
    optimizer.zero_grad()
    loss = ...
    loss.backward()
    optimizer.step()

# 训练与评估

  1. 模型状态设置

    • model.train() 设置为训练模式,Dropout 会丢失一部分神经元,BatchNorm 根据当前批次的统计信息对输入进行标准化,计算批次的均值和方差,并将输入标准化为零均值和单位方差
    • model.eval() 设置为评估模式,Dropout 不会丢失神经元,BatchNorm 使用在训练过程中学到的移动平均值和方差来标准化输入
  2. 训练流程示例

    model.train()
    train_loss = 0
    for data, label in train_loader:
        data, label = data.cuda(), label.cuda()
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * data.size(0)
    train_loss /= len(train_loader.dataset)
    print(f'Epoch: {epoch}, Train Loss: {train_loss}')
  3. 评估流程示例

    model.eval()
    val_loss = 0
    correct = 0
    with torch.no_grad():
       for data, label in val_loader:
          data, label = data.cuda(), label.cuda()
          output = model(data)
          val_loss += criterion(output, label).item() * data.size(0)
          pred = output.argmax(dim=1, keepdim=True)
          correct += pred.eq(label.view_as(pred)).sum().item()
    val_loss /= len(val_loader.dataset)
    val_acc = correct / len(val_loader.dataset)
    print(f'Epoch: {epoch}, Val Loss: {val_loss}, Val Acc: {val_acc}')
  4. 对图像分类任务,还可以使用 sklearn.metrics 中的 classification_report() 来计算模型的准确率、召回率、F1值等指标

    from sklearn.metrics import classification
    print(classification_report(label.cpu(), pred.cpu(), target_names=class_names))

# 可视化

  1. 打印模型基础信息: print(model)

  2. 输出模型结构: from torchinfo import summary; summary(model, input_size=(batch_size, 3, 224, 224))

  3. 使用 tensorboard 可视化训练过程

    from tensorboardX import SummaryWriter
    writer = SummaryWriter('logs')
    writer.add_scalar('Loss/train', train_loss, epoch)
    writer.add_scalar('Loss/val', val_loss, epoch)
    writer.add_scalar('Accuracy/val', val_acc, epoch)
    writer.add_histogram('model', model, epoch)
    writer.add_image('input', data[0], epoch)
    writer.close()
    tensorboard --logdir=logs --port=6006