Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
点
点头人工智能课程-v6.0-影像
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
靓靓
点头人工智能课程-v6.0-影像
Commits
1691dd0c
Commit
1691dd0c
authored
Aug 03, 2025
by
前钰
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Upload New File
parent
f6391abd
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
225 additions
and
0 deletions
+225
-0
train_optuna.py
4-模型改进/4.4-模型参数调优/train_optuna.py
+225
-0
No files found.
4-模型改进/4.4-模型参数调优/train_optuna.py
0 → 100644
View file @
1691dd0c
import
argparse
# 用于解析命令行参数
import
argparse
# 用于解析命令行参数
import
torch
import
optuna
from
torch.optim
import
Adam
,
SGD
import
torch.optim
as
optim
# PyTorch中的优化器
from
torch.utils.data
import
DataLoader
# PyTorch中用于加载数据的工具
from
tqdm
import
tqdm
# 用于在循环中显示进度条
from
torch.optim.lr_scheduler
import
CosineAnnealingLR
# 余弦退火学习率调度器
import
torch.nn.functional
as
F
# PyTorch中的函数库
from
torchvision
import
datasets
# PyTorch中的视觉数据集
import
torchvision.transforms
as
transforms
# PyTorch中的数据变换操作
from
tensorboardX
import
SummaryWriter
# 用于创建TensorBoard日志的工具
import
os
# Python中的操作系统相关功能
from
utils
import
AverageMeter
,
accuracy
# 自定义工具模块,用于计算模型的平均值和准确度
from
models
import
model_dict
# 自定义模型字典,包含了各种模型的定义
import
numpy
as
np
# NumPy库,用于数值计算
import
time
# Python中的时间相关功能
import
random
# Python中的随机数生成器
parser
=
argparse
.
ArgumentParser
()
# 导入argparse模块,用于解析命令行参数
parser
.
add_argument
(
"--model_names"
,
type
=
str
,
default
=
"resnet18"
)
# 添加命令行参数,指定模型名称,默认为"resnet18"
parser
.
add_argument
(
"--pre_trained"
,
type
=
bool
,
default
=
False
)
#指定是否使用预训练模型,默认为False
parser
.
add_argument
(
"--classes_num"
,
type
=
int
,
default
=
4
)
# 指定1类别数,默认为4
parser
.
add_argument
(
"--dataset"
,
type
=
str
,
default
=
"new_COVID_19_Radiography_Dataset"
)
# 指定数据集名称,默认为"new_COVID_19_Radiography_Dataset"
parser
.
add_argument
(
"--batch_size"
,
type
=
int
,
default
=
256
)
# 指定批量大小,默认为64
parser
.
add_argument
(
"--epoch"
,
type
=
int
,
default
=
3
)
# 指定训练轮次数,默认为10
parser
.
add_argument
(
"--lr"
,
type
=
float
,
default
=
0.001
)
# 指定学习率,默认为0.01
parser
.
add_argument
(
"--momentum"
,
type
=
float
,
default
=
0.9
)
# 优化器的动量,默认为 0.9
parser
.
add_argument
(
"--weight-decay"
,
type
=
float
,
default
=
1e-4
)
# 权重衰减(正则化项),默认为 5e-4
parser
.
add_argument
(
"--seed"
,
type
=
int
,
default
=
33
)
# 指定随机种子,默认为33
parser
.
add_argument
(
"--gpu-id"
,
type
=
int
,
default
=
0
)
# 指定GPU编号,默认为0
parser
.
add_argument
(
"--print_freq"
,
type
=
int
,
default
=
1
)
# 打印训练信息的频率,默认为 1(每个轮次打印一次)
parser
.
add_argument
(
"--exp_postfix"
,
type
=
str
,
default
=
"seed33"
)
# 实验结果文件夹的后缀,默认为 "seed33"
parser
.
add_argument
(
"--txt_name"
,
type
=
str
,
default
=
"lr0.001_wd1e-4_yuan_batch_256"
)
# 文本文件名称,默认为 "lr0.001_wd1e-4"
parser
.
add_argument
(
"--weights"
,
type
=
str
,
default
=
None
,
help
=
"预训练权重文件路径"
)
# python train.py --modele_names vgg --batch_size 64 --lr 0.001
args
=
parser
.
parse_args
()
def
seed_torch
(
seed
=
74
):
# 设置随机数生成器的种子,确保实验的可重复性
random
.
seed
(
seed
)
# Python random module.
os
.
environ
[
'PYTHONHASHSEED'
]
=
str
(
seed
)
# 为了禁止hash随机化,使得实验可复现
np
.
random
.
seed
(
seed
)
# Numpy module.
torch
.
manual_seed
(
seed
)
# 为CPU设置随机种子
torch
.
cuda
.
manual_seed
(
seed
)
# 为当前GPU设置随机种子
torch
.
cuda
.
manual_seed_all
(
seed
)
# if you are using multi-GPU.
# 设置cuDNN:cudnn中对卷积操作进行了优化,牺牲了精度来换取计算效率。如果需要保证可重复性,可以使用如下设置:
torch
.
backends
.
cudnn
.
benchmark
=
False
torch
.
backends
.
cudnn
.
deterministic
=
True
# 实际上这个设置对精度影响不大,仅仅是小数点后几位的差别。所以如果不是对精度要求极高,其实不太建议修改,因为会使计算效率降低。
print
(
'random seed has been fixed'
)
seed_torch
(
seed
=
args
.
seed
)
os
.
environ
[
"CUDA_VISIBLE_DEVICES"
]
=
str
(
args
.
gpu_id
)
# 设置环境变量 CUDA_VISIBLE_DEVICES,指定可见的 GPU 设备,仅在需要时使用特定的 GPU 设备进行训练
exp_name
=
args
.
exp_postfix
# 从命令行参数中获取实验名称后缀
exp_path
=
"./report/{}/{}/{}"
.
format
(
args
.
dataset
,
args
.
model_names
,
exp_name
)
# 创建实验结果文件夹的路径
os
.
makedirs
(
exp_path
,
exist_ok
=
True
)
# dataloader
transform_train
=
transforms
.
Compose
([
transforms
.
Resize
([
256
,
256
]),
# # 调整图像大小为 256x256 像素
transforms
.
RandomCrop
(
224
),
# 随机裁剪图像为 224x224 大小
transforms
.
RandomHorizontalFlip
(
p
=
0.5
),
# 50%概率进行水平翻转(从左向右反转)
transforms
.
RandomRotation
(
degrees
=
15
),
# 图像会被随机旋转,范围在 ±15度之间
transforms
.
ToTensor
(),
transforms
.
Normalize
((
0.3738
,
0.3738
,
0.3738
),
# # 对图像进行标准化
(
0.3240
,
0.3240
,
0.3240
))])
transform_test
=
transforms
.
Compose
([
transforms
.
Resize
([
224
,
224
]),
transforms
.
ToTensor
(),
transforms
.
Normalize
((
0.3738
,
0.3738
,
0.3738
),
(
0.3240
,
0.3240
,
0.3240
))])
trainset
=
datasets
.
ImageFolder
(
root
=
os
.
path
.
join
(
'new_COVID_19_Radiography_Dataset'
,
'train'
),
transform
=
transform_train
)
testset
=
datasets
.
ImageFolder
(
root
=
os
.
path
.
join
(
'new_COVID_19_Radiography_Dataset'
,
'val'
),
transform
=
transform_test
)
# 创建训练数据加载器
train_loader
=
DataLoader
(
trainset
,
batch_size
=
args
.
batch_size
,
num_workers
=
4
,
# 后台工作线程数量,可以并行加载数据以提高效率
shuffle
=
True
,
pin_memory
=
True
)
# 如果可用,将数据加载到 GPU 内存中以提高训练速度
# 创建测试数据加载器
test_loader
=
DataLoader
(
testset
,
batch_size
=
args
.
batch_size
,
num_workers
=
4
,
shuffle
=
False
,
pin_memory
=
True
)
# train
def
train_one_epoch
(
model
,
optimizer
,
train_loader
):
model
.
train
()
acc_recorder
=
AverageMeter
()
# 用于记录精度的工具
loss_recorder
=
AverageMeter
()
# 用于记录损失的工具
for
(
inputs
,
targets
)
in
tqdm
(
train_loader
,
desc
=
"train"
):
# 遍历训练数据加载器 train_loader 中的每个批次数据,使用 tqdm 包装以显示进度条。
# for i, (inputs, targets) in enumerate(train_loader):
# 如果当前设备支持 CUDA 加速,则将输入数据和目标数据送到 GPU 上进行计算,设置 non_blocking=True 可以使数据异步加载,提高效率。
if
torch
.
cuda
.
is_available
():
inputs
=
inputs
.
cuda
(
non_blocking
=
True
)
targets
=
targets
.
cuda
(
non_blocking
=
True
)
out
=
model
(
inputs
)
loss
=
F
.
cross_entropy
(
out
,
targets
)
# 计算损失(交叉熵损失)
# 记录损失值 # 调用 update 方法,传入当前批次的损失值 loss.item() 和该批次的样本数量 inputs.size(0)。
# 这样做是为了根据样本数量加权计算损失的平均值,确保不同批次的损失贡献相等,而不受批次大小的影响。
loss_recorder
.
update
(
loss
.
item
(),
n
=
inputs
.
size
(
0
))
acc
=
accuracy
(
out
,
targets
)[
0
]
# 计算精度 # 取出 top-1 精度
acc_recorder
.
update
(
acc
.
item
(),
n
=
inputs
.
size
(
0
))
# 记录精度值 按样本数量加权平均
optimizer
.
zero_grad
()
# 清零之前的梯度
loss
.
backward
()
# 反向传播,计算梯度
optimizer
.
step
()
# 更新模型参数
losses
=
loss_recorder
.
avg
# 计算平均损失
acces
=
acc_recorder
.
avg
# 计算平均精度
return
losses
,
acces
# 返回平均损失和平均精度
def
evaluation
(
model
,
test_loader
):
# 将模型设置为评估模式,不会进行参数更新
model
.
eval
()
acc_recorder
=
AverageMeter
()
# 初始化两个计量器,用于记录准确度和损失
loss_recorder
=
AverageMeter
()
with
torch
.
no_grad
():
for
img
,
label
in
tqdm
(
test_loader
,
desc
=
"Evaluating"
):
# for img, label in test_loader: # 迭代测试数据加载器中的每个批次
if
torch
.
cuda
.
is_available
():
img
=
img
.
cuda
()
label
=
label
.
cuda
()
out
=
model
(
img
)
acc
=
accuracy
(
out
,
label
)[
0
]
# 计算准确度
loss
=
F
.
cross_entropy
(
out
,
label
)
# 计算交叉熵损失
acc_recorder
.
update
(
acc
.
item
(),
img
.
size
(
0
))
# 更新准确率记录器,记录当前批次的准确率 img.size(0)表示批次中的样本数量
loss_recorder
.
update
(
loss
.
item
(),
img
.
size
(
0
))
# 更新损失记录器,记录当前批次的损失
losses
=
loss_recorder
.
avg
# 计算所有批次的平均损失
acces
=
acc_recorder
.
avg
# 计算所有批次的平均准确率
return
losses
,
acces
# 返回平均损失和准确率
def
train
(
model
,
optimizer
,
train_loader
,
test_loader
,
scheduler
):
since
=
time
.
time
()
# 记录训练开始时间
best_acc
=
-
1
# 初始化最佳准确度为-1,以便跟踪最佳模型
f
=
open
(
os
.
path
.
join
(
exp_path
,
"{}.txt"
.
format
(
args
.
txt_name
)),
"w"
)
# 打开一个用于写入训练过程信息的文件
for
epoch
in
range
(
args
.
epoch
):
# 在训练集上执行一个周期的训练,并获取训练损失和准确度
train_losses
,
train_acces
=
train_one_epoch
(
model
,
optimizer
,
train_loader
)
# 在测试集上评估模型性能,获取测试损失和准确度
test_losses
,
test_acces
=
evaluation
(
model
,
test_loader
)
# 如果当前测试准确度高于历史最佳准确度,更新最佳准确度并保存模型参数
if
test_acces
>
best_acc
:
best_acc
=
test_acces
state_dict
=
dict
(
epoch
=
epoch
+
1
,
model
=
model
.
state_dict
(),
acc
=
test_acces
)
name
=
os
.
path
.
join
(
exp_path
,
"ckpt"
,
"best.pth"
)
os
.
makedirs
(
os
.
path
.
dirname
(
name
),
exist_ok
=
True
)
torch
.
save
(
state_dict
,
name
)
scheduler
.
step
()
# 更新学习率调度器
# # 定义一个包含4个字符串的列表,用来给 TensorBoard 中记录的曲线命名
# tags = ['train_losses', # 训练损失(loss)
# 'train_acces',
# 'test_losses',
# 'test_acces']
# tb_writer.add_scalar(tags[0], train_losses, epoch + 1) # 将训练损失记录到TensorBoard中,对应 tag: 'train_losses'
# tb_writer.add_scalar(tags[1], train_acces, epoch + 1) # 将训练准确率记录到 TensorBoard 中,对应 tag: 'train_acces'
# tb_writer.add_scalar(tags[2], test_losses, epoch + 1)
# tb_writer.add_scalar(tags[3], test_acces, epoch + 1)
# # 使用tqdm包装循环以查看进度
# for tag in tqdm(tags, desc=f"Epoch {epoch + 1}/{args.epoch}"):
# tb_writer.add_scalar(tag, train_losses, epoch + 1)
# 打印训练过程信息,以及将信息写入文件
# 说明:args.print_freq 是参数中设定的打印频率,例如为 1 表示每轮都打印,为 5 表示每 5 轮打印一次
if
(
epoch
+
1
)
%
args
.
print_freq
==
0
:
# 构建要打印/写入文件的字符串,包含 epoch 编号、模型名称、训练与验证的 loss 和 accuracy
msg
=
"epoch:{} model:{} train loss:{:.2f} acc:{:.2f} test loss{:.2f} acc:{:.2f}
\n
"
.
format
(
epoch
+
1
,
args
.
model_names
,
train_losses
,
train_acces
,
test_losses
,
test_acces
,
)
print
(
msg
)
# 将训练信息打印到控制台
f
.
write
(
msg
)
# 将信息写入日志文件 f
f
.
flush
()
# 输出训练结束后的最佳准确度和总训练时间
msg_best
=
"model:{} best acc:{:.2f}
\n
"
.
format
(
args
.
model_names
,
best_acc
)
time_elapsed
=
"traninng time: {}"
.
format
(
time
.
time
()
-
since
)
print
(
msg_best
)
f
.
write
(
msg_best
)
f
.
write
(
time_elapsed
)
f
.
close
()
# 添加 objective 函数作为 Optuna 的优化目标
def
objective
(
trial
):
# 定义 Optuna 的目标函数,trial 用于建议超参数
model_name
=
trial
.
suggest_categorical
(
"model_name"
,
[
"resnet18"
,
"resnet50"
])
# 从模型列表中选择一个模型结构
lr
=
trial
.
suggest_categorical
(
"lr"
,
[
0.001
,
0.01
])
# 从两个学习率中选择一个
optimizer_name
=
trial
.
suggest_categorical
(
"optimizer"
,
[
"adam"
,
"sgd"
])
# 从优化器列表中选择一个优化器
# 根据选择的模型名称创建模型,并将其移动到GPU或CPU
model
=
model_dict
[
model_name
](
num_classes
=
args
.
classes_num
,
pretrained
=
args
.
pre_trained
)
.
to
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
# 根据选择的优化器名称创建对应的优化器
if
optimizer_name
==
"adam"
:
optimizer
=
Adam
(
model
.
parameters
(),
lr
=
lr
,
weight_decay
=
args
.
weight_decay
)
# Adam 优化器
else
:
optimizer
=
SGD
(
model
.
parameters
(),
lr
=
lr
,
momentum
=
args
.
momentum
,
weight_decay
=
args
.
weight_decay
,
nesterov
=
True
)
# SGD 优化器
scheduler
=
CosineAnnealingLR
(
optimizer
,
T_max
=
args
.
epoch
)
# 使用余弦退火学习率调度器
best_acc
=
-
1
# 初始化最佳准确率为 -1
for
epoch
in
range
(
args
.
epoch
):
# 遍历训练轮次
train
(
model
,
optimizer
,
train_loader
,
test_loader
,
scheduler
)
# 进行一轮训练
_
,
acc
=
evaluation
(
model
,
test_loader
)
# 在验证集上评估模型性能,获取准确率
best_acc
=
max
(
best_acc
,
acc
)
# 更新历史最佳准确率
return
best_acc
# 返回本次 trial 的最佳准确率作为评估指标
if
__name__
==
"__main__"
:
# 如果当前文件作为主程序运行
study
=
optuna
.
create_study
(
direction
=
"maximize"
)
# 创建一个优化目标为最大化的 study
study
.
optimize
(
objective
,
n_trials
=
5
)
# 调用目标函数进行 5 次试验
print
(
" 最佳参数组合:"
)
# 输出提示信息
print
(
study
.
best_trial
.
params
)
# 输出找到的最优超参数组合
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment