目前正在写一个训练框架,需要有以下几个功能:
1.保存模型
2.断点继续训练
3.加载模型
4.tensorboard 查询训练记录的功能
命令:
tensorboard --logdir=runs --host=192.168.112.5
效果:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
# 假设有一个训练数据集和其对应的标签
# 例如:
data = torch.tensor([[1.0], [2.0], [3.0]])
labels = torch.tensor([[2.0], [4.0], [6.0]])
class MYdataset(torch.utils.data.Dataset):
def __init__(self, features, labels):
self.features = features
self.labels = labels
def __getitem__(self, index):
return self.features[index], self.labels[index]
def __len__(self):
return len(self.features)
BATCH_SIZE= 1
train_ds = MYdataset(data, labels)
train_dl = torch.utils.data.DataLoader(
train_ds,
batch_size=BATCH_SIZE,
shuffle = True
)
# 定义模型、损失函数和优化器
model = nn.Linear(1, 1)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)
# 创建summary writer
writer = SummaryWriter('runs/loss_visualization')
for epoch in range(10): # 假设我们只训练10个周期
for i, (data,labels) in enumerate(train_dl): # 假设train_loader是数据加载器
# 前向传播
outputs = model(data)
loss = criterion(outputs, labels)
# 反向传播
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 将损失记录到TensorBoard中
print("Loss:",loss.item())
writer.add_scalar('Loss', loss.item(), epoch * len(train_dl) + i)
# 关闭summary writer
writer.close()