“最难的深度学习是谁?”
“嗯,是迁徙学习吧?”
“要分情况,不过,应该是迁徙学习吧? ”
“不是迁徙学习嘛?”
目录
域自适应是啥?
域自适应的方法?
基于差异的方法
基于对抗的方法
具体代码实现?
DDC改
DANN
人类擅长将学习经验从一个领域迁移到另一个,如从自行车到摩托车,或从羽毛球到网球,这种能力体现了“举一反三”的智慧。在计算机科学中,迁移学习旨在让计算机也具备这种能力,利用已学知识解决新问题,尤其是数据稀缺的情况,是迈向通用人工智能的重要一步。
域自适应是啥?
域自适应(Domain Adaptation)是指通过学习源领域和目标领域之间的差异,将源领域上训练的模型迁移到目标领域,以提高模型在目标领域上的性能。这种技术特别适用于目标领域标注数据稀缺或获取成本高昂的场景。通常而言,源域的数据是大量的有标签的,目标域的数据是大量的无标签的,只有少部分有标签的。
域自适应的方法?
域自适应的方法有很多种:基于特征的自适应、基于实例的自适应、基于模型参数的自适应。比较常用的是第一种基于特征的自适应。而基于特征的自适应里面又有很多方法,这里只介绍DDC与DANN。
基于差异的方法
经典的用于无监督DA的DDC方法,它是使用MMD(Maximum Mean Discrepancy) ,即找一个核函数,将源域和目标域都映射到一个再生核的Hilbert空间上,在这个空间上取这个两个域数据分别作均值之后的差,然后将这个差作为距离。
该方法损失函数由两部分组成:源域的分类交叉熵损失和源域与目标域的差异MMD。
好似这个跟一般的分类网络差不多,就只是多了下面这条路罢了。 我的理解是源域数据有标签,所以可以进行分类计算分类损失,所以我们可以把源域数据分类分的很好,然后MMD差异损失越小,就是在降低源域与目标域的差异,你想,源域分类可以分的很好,目标域又和源域的差异很小,这四舍五入,不就是目标域分类分的很好嘛。
基于对抗的方法
例如,RevGrad(ICML,2015) 的基本思路就是用GAN去让生成器生成特征,然后让判别器判别它是源域的还是目标域的特征,如果判别不出来,就说明在这个特征空间里源域和目标域是一致的,也可以理解为源域与目标域的差异很小。
这个可以用GAN的最小化-最大化的思想去训练,也可以用论文中的梯度反转层(Gradient Reversal Layer) 的方法,就是在上图中白色空心箭头的位置加了个梯度反转层,在前向传播的过程中就是正常的网络,即最小化LOSS让红色部分的判别器性能更好,再反向传播的过程中把梯度取负,即优化绿色部分的特征提取器,来尽量让红色部分的判别器分不清特征是源域的还是目标域的。
具体代码实现?
数据集用的是美国西储大学的轴承数据集,以下代码用于实现不同工况的轴承故障诊断的迁徙学习域自适应。
以下代码均为作者亲手编写,如有错误请指出,谢谢!!!
#导入系统库
import sys
import os
#导入paddle库
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.metric import Accuracy
from paddle.io import DataLoader
from paddle.optimizer.lr import CosineAnnealingDecay
#导入其他库
import numpy as np
import pandas as pd
import random
from scipy.io import loadmat
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
from sklearn import preprocessing
from datetime import datetime
#固定随机种子,保证每次运行效果可复现
seed = 102
paddle.seed(seed)
np.random.seed(seed)
random.seed(seed)
AXIS = "_DE_time" # 用驱动端的数据
FAULT_LABEL_DICT = {
"B007":0,
"B014":1,
"B021":2,
"B028":3,
"IR007":4,
"IR014":5,
"IR021":6,
"IR028":7,
"OR007@12":8,
"OR007@3":9,
"OR007@6":10,
"OR014@6":11,
"OR021@12":12,
"OR021@3":13,
"OR021@6":14,
"OR014@3":15,
"normal":16
}
class CWRUDataset(paddle.io.Dataset):
"""
继承paddle.io.Dataset类
"""
def __init__(self, data_dir, time_steps=1024, window=128, mode='train', val_rate=0.3, test_rate=0.5, \
noise=False, snr=None):
"""
实现构造函数,定义数据读取方式,划分训练和测试数据集
time_steps: 样本的长度
window:相邻样本之间重合的点数
mode:数据集合
val_rate:
test_rate:
noise:是否添加噪声
snr:添加噪声的分贝数
"""
super(CWRUDataset, self).__init__()
self.time_steps = time_steps
self.mode = mode
self.noise = noise
self.snr = snr
self.window = window
self.feature_all, self.label_all = self.transform(data_dir)
#训练集和验证集的划分
train_feature, val_feature, train_label, val_label = \
train_test_split(self.feature_all, self.label_all, test_size=val_rate, random_state=seed)
#标准化
train_feature, val_feature = self.standardization(train_feature, val_feature)
#验证集和测试集的划分
val_feature, test_feature, val_label, test_label = \
train_test_split(val_feature, val_label, test_size=test_rate, random_state=seed)
if self.mode == 'train':
self.feature = train_feature
self.label = train_label
elif self.mode == 'val':
self.feature = val_feature
self.label = val_label
elif self.mode == 'test':
self.feature = test_feature
self.label = test_label
else:
raise Exception("mode can only be one of ['train', 'val', 'test']")
def transform(self, data_dir) :
"""
转换函数,获取数据
"""
feature, label = [], []
# 获取mat文件集合
matList = os.listdir(data_dir)
for mat in matList:
# mat文件的类型
matType = mat.split("_")[-3]
# 数据标签
lab = FAULT_LABEL_DICT[matType]
# mat文件的路径
matPath = os.path.join(data_dir, mat)
# 因为一个mat文件有多个端的值
# 选用一个端的数据
mat_data = loadmat(matPath)
mat_data_keys = list(mat_data.keys())
for key in mat_data_keys:
if AXIS in key:
index = key
break
mat_data = mat_data[index]
# ---------------------------------------------------------------
#start, end = 0, self.time_steps
#每隔self.time_steps窗口构建一个样本,指定样本之间重叠的数目
for i in range(0, len(mat_data) - self.time_steps, self.window):
sub_mat_data = mat_data[i: (i+self.time_steps)].reshape(-1,)
#是否往数据中添加噪声
if self.noise:
sub_mat_data = self.awgn(sub_mat_data, snr)
feature.append(sub_mat_data)
label.append(lab)
return np.array(feature, dtype='float32'), np.array(label, dtype="int64")
def __getitem__(self, index):
"""
实现__getitem__方法,定义指定index时如何获取数据,并返回单条数据
"""
feature = self.feature[index]
#增加一列并将通道复制三份满足resnet的输入要求
n = int(np.sqrt(len(feature)))
#将feature重新排列成一个二维数组
feature = np.reshape(feature, (n, n))
#将feature转换为(1,n,n)形式
feature = feature[np.newaxis,:]
#复制3次,转变为(3,n,n)形式
feature = np.concatenate((feature, feature, feature), axis=0)
label = self.label[index]
feature = feature.astype('float32')
label = np.array([label], dtype="int64")
return feature, label
def __len__(self):
"""
实现__len__方法,返回数据集总数目
"""
return len(self.feature)
def awgn(self, data, snr, seed=seed):
"""
添加高斯白噪声
"""
np.random.seed(seed)
snr = 10 ** (snr / 10.0)
xpower = np.sum(data ** 2) / len(data)
npower = xpower / snr
noise = np.random.randn(len(data)) * np.sqrt(npower)
return np.array(data + noise)
def standardization(self, train_data, val_data):
"""
标准化
"""
scalar = preprocessing.StandardScaler().fit(train_data)
train_data = scalar.transform(train_data)
val_data = scalar.transform(val_data)
return train_data, val_data
# 加载数据集
sourceValRate = 0.25
targetValRate = 0.6
sourceDir = "/home/aistudio/data/data290814/12k_Drive_End/0"
targetDir = "/home/aistudio/data/data290814/12k_Drive_End/1"
sourceTrain = CWRUDataset(sourceDir, mode='train', val_rate=sourceValRate)
targetTrain = CWRUDataset(targetDir, mode='train', val_rate=targetValRate)
sourceVal = CWRUDataset(sourceDir, mode='val', val_rate=sourceValRate)
targetVal = CWRUDataset(targetDir, mode='val', val_rate=targetValRate)
sourceTest = CWRUDataset(sourceDir, mode='test', val_rate=sourceValRate)
targetTest = CWRUDataset(targetDir, mode='test', val_rate=targetValRate)
DDC改
我深度学习框架使用的是paddlepaddle,具体实现DDC中,我并没有采用MMD,而用了MKMMD,算是MMD的加强版,如下。
def guassian_kernel(source, target, kernel_mul=2.0, kernel_num=5, fix_sigma=None):
'''
多核或单核高斯核矩阵函数,根据输入样本集x和y,计算返回对应的高斯核矩阵
Params:
source: (b1,n)的X分布样本数组
target:(b2,n)的Y分布样本数组
kernel_mul: 多核MMD,以bandwidth为中心,两边扩展的基数,比如bandwidth/kernel_mul, bandwidth, bandwidth*kernel_mul
kernel_num: 取不同高斯核的数量
fix_sigma: 是否固定,如果固定,则为单核MMD
Return:
sum(kernel_val): 多个核矩阵之和
'''
# 堆叠两组样本,上面是X分布样本,下面是Y分布样本,得到(b1+b2,n)组总样本
n_samples = int(source.shape[0]) + int(target.shape[0])
total = np.concatenate((source, target), axis=0)
# 对总样本变换格式为(1,b1+b2,n),然后将后两维度数据复制到新拓展的维度上(b1+b2,b1+b2,n),相当于按行复制
total0 = np.expand_dims(total, axis=0)
total0 = np.broadcast_to(total0, [int(total.shape[0]), int(total.shape[0]), int(total.shape[1])])
# 对总样本变换格式为(b1+b2,1,n),然后将后两维度数据复制到新拓展的维度上(b1+b2,b1+b2,n),相当于按复制
total1 = np.expand_dims(total, axis=1)
total1 = np.broadcast_to(total1, [int(total.shape[0]), int(total.shape[0]), int(total.shape[1])])
# total1 - total2 得到的矩阵中坐标(i,j, :)代表total中第i行数据和第j行数据之间的差
# sum函数,对第三维进行求和,即平方后再求和,获得高斯核指数部分的分子,是L2范数的平方
L2_distance_square = np.cumsum(np.square(total0 - total1), axis=2)
# 调整高斯核函数的sigma值
if fix_sigma:
bandwidth = fix_sigma
else:
bandwidth = np.sum(L2_distance_square) / (n_samples ** 2 - n_samples)
# 多核MMD
# 以fix_sigma为中值,以kernel_mul为倍数取kernel_num个bandwidth值(比如fix_sigma为1时,得到[0.25,0.5,1,2,4]
bandwidth /= kernel_mul ** (kernel_num // 2)
bandwidth_list = [bandwidth * (kernel_mul ** i) for i in range(kernel_num)]
# print(bandwidth_list)
# 高斯核函数的数学表达式
kernel_val = [np.exp(-L2_distance_square / bandwidth_temp) for bandwidth_temp in bandwidth_list]
# 得到最终的核矩阵
return sum(kernel_val) # 多核合并
def MK_MMD(source, target, kernel_mul=2.0, kernel_num=5, fix_sigma=None):
'''
计算源域数据和目标域数据的MMD距离
Params:
source: (b1,n)的X分布样本数组
target:(b2,n)的Y分布样本数组
kernel_mul: 多核MMD,以bandwidth为中心,两边扩展的基数,比如bandwidth/kernel_mul, bandwidth, bandwidth*kernel_mul
kernel_num: 取不同高斯核的数量
fix_sigma: 是否固定,如果固定,则为单核MMD
Return:
loss: MK-MMD loss
'''
batch_size = int(source.shape[0]) # 一般默认为源域和目标域的batchsize相同
kernels = guassian_kernel(source, target,kernel_mul=kernel_mul, kernel_num=kernel_num, fix_sigma=fix_sigma)
# 将核矩阵分成4部分
loss = 0
for i in range(batch_size):
s1, s2 = i, (i + 1) % batch_size
t1, t2 = s1 + batch_size, s2 + batch_size
loss += kernels[s1, s2] + kernels[t1, t2]
loss -= kernels[s1, t2] + kernels[s2, t1]
# 这里计算出的n_loss是每个维度上的MK-MMD距离,一般还会做均值化处理
n_loss = loss / float(batch_size)
return np.mean(n_loss)
模型结构的话,这里我用一个resnet-18层作为特征提取器,用一个也相当于是权重共享了,然后一个全连接层作为分类器。
from paddle.vision.models import resnet18
class ResNetDoMain(nn.Layer):
def __init__(self, num_classes):
super(ResNetDoMain, self).__init__()
self.backbone = resnet18()
self.fc1 = nn.Sequential(nn.Linear(1000, 512), nn.ReLU(), nn.Dropout(0.5))
self.fc2 = nn.Linear(512, num_classes)
def forward(self, source, target):
sourceFeature = self.backbone(source)
targetFeature = self.backbone(target)
MKDloss = MK_MMD(sourceFeature, targetFeature)
outputs = self.fc1(sourceFeature)
outputs = self.fc2(outputs)
if not self.training:
outputs = paddle.nn.functional.softmax(outputs)
return outputs, MKDloss
# 训练的batchSize
batch_size = 16
# 训练的轮次
epoch = 50
# 设置基础学习率
base_lr = 0.001
# 每隔几轮保存一次模型
saveEpoch = 2
# 训练的标签种类
classNum = len(FAULT_LABEL_DICT)
# 训练的模型保存目录
modelSaveDir = "./work/models"
# 训练的日志保存目录
modelLogDir = "./work/logs"
# 获取当前日期和时间
now = datetime.now()
# 训练日志保存路径
modelLogPath = os.path.join(modelLogDir, "e{}_b{}_lr{}_t{}|{}.txt".format(epoch, batch_size, base_lr, now.date(),now.time()))
# 创建训练日志
with open(modelLogPath, "w") as _:
pass
# 使用DataLoader加载数据
sourceTrainLoader = DataLoader(sourceTrain, batch_size=batch_size, shuffle=True, drop_last=True)
targetTrainLoader = DataLoader(targetTrain, batch_size=batch_size, shuffle=True, drop_last=True)
sourceValLoader = DataLoader(sourceVal, batch_size=batch_size, drop_last=True)
targetValLoader = DataLoader(targetVal, batch_size=batch_size, drop_last=True)
net = ResNetDoMain(classNum)
# 设置学习率衰减策略,这里以CosineAnnealingDecay为例
# epochs: 总训练轮次
# T_max: 衰减周期,即达到最大学习率后经过多少个epoch再回到最小学习率
# eta_min: 最小学习率,默认为0
lr_scheduler = CosineAnnealingDecay(learning_rate=base_lr, T_max=35, eta_min=0, verbose=True)
opt = paddle.optimizer.Adam(learning_rate=lr_scheduler, parameters=net.parameters())
# 尝试使用zip函数来将两个长度不一样的列表“zip”到一起,zip函数会按照最短的列表长度来停止迭代。
# 这意味着,只有直到最短列表结束的元素会被配对,而较长列表中剩余的元素则不会被包含在zip的结果中。
for i in range(epoch):
with open(modelLogPath, "a") as f:
net.train()
for batch_id, ((src, srcLab), (tar, tarLab)) in enumerate(zip(sourceTrainLoader, targetTrainLoader)):
src = paddle.to_tensor(src)
tar = paddle.to_tensor(tar)
srcLab = paddle.to_tensor(srcLab).reshape([-1])
tarLab = paddle.to_tensor(tarLab)
srcClass, MKDloss = net(src, tar)
# 向前传播
# [10, 16]
# [10]
loss = F.cross_entropy(srcClass, srcLab) +MKDloss
# 计算损失
avg_loss = paddle.mean(loss)
# 计算批次损失的均值
if batch_id % 40 == 0:
now = datetime.now()
print("{}|{}-TRAIN INFO:: EPOCH:{} BATCHID:{} LOSS:{}".format(now.date(),now.time(),i, batch_id, avg_loss.numpy()))
f.write("{}|{}-TRAIN INFO:: EPOCH:{} BATCHID:{} LOSS:{}".format(now.date(),now.time(),i, batch_id, avg_loss.numpy()))
f.write("\n")
avg_loss.backward()
# 反向传播
opt.step()
# 更新参数
opt.clear_grad()
now = datetime.now()
print("{}|{}-INFO:: START EVAL".format(now.date(),now.time()))
f.write("{}|{}-INFO:: START EVAL".format(now.date(),now.time()))
f.write("\n")
with paddle.no_grad():
srcAcc = []
tarAcc = []
for batch_id, ((src, srcLab), (tar, tarLab)) in enumerate(zip(sourceValLoader, targetValLoader)):
src = paddle.to_tensor(src)
tar = paddle.to_tensor(tar)
srcLab = paddle.to_tensor(srcLab).reshape([-1])
tarLab = paddle.to_tensor(tarLab).reshape([-1])
# src 进行判断故障类型
srcClass, MKDloss = net(src, tar)
# tar 进行判断故障类型
tarClass, MKDloss = net(tar, src)
# 使用paddle.argmax获取每个样本的预测类别索引
srcClass = paddle.argmax(srcClass, axis=1)
tarClass = paddle.argmax(tarClass, axis=1)
srcResult = list(srcClass==srcLab)
tarResult = list(tarClass==tarLab)
srcAcc.append(srcResult.count(True)/len(srcResult))
tarAcc.append(tarResult.count(True)/len(tarResult))
srcAcc = np.array(srcAcc)
tarAcc = np.array(tarAcc)
sacc = srcAcc.mean()
tacc = tarAcc.mean()
now = datetime.now()
print("{}|{}-VAL INFO:: EPOCH:{} SRC-ACC:{} TAR-ACC:{}".format(now.date(),now.time(), i, sacc, tacc))
f.write("{}|{}-VAL INFO:: EPOCH:{} SRC-ACC:{} TAR-ACC:{}".format(now.date(),now.time(), i, sacc, tacc))
f.write("\n")
if i % saveEpoch == 0:
paddle.save(net.state_dict(), os.path.join(modelSaveDir, 'epoch{}_sacc{}_tacc{}.pdparams'.format(i, sacc, tacc)))
DANN
from paddle.nn import Sequential, Conv2D, BatchNorm1D, BatchNorm2D, ReLU, MaxPool2D, Linear
from paddle.vision.models import resnet18
# 梯度反转层
class GradientReverseLayer(paddle.autograd.PyLayer):
@staticmethod
def forward(ctx, x, coeff):
ctx.coeff = coeff
return x.clone() # 使用 clone() 避免修改原张量
@staticmethod
def backward(ctx, grad_output):
# Return only one tensor, as PaddlePaddle expects only one output
grad_input = -ctx.coeff * grad_output
return grad_input
# 域分判器
class Discriminator(nn.Layer):
def __init__(self):
super(Discriminator, self).__init__()
self.layer = nn.Sequential(
nn.Linear(1000, 512),
nn.BatchNorm1D(512),
nn.ReLU(),
nn.Linear(512, 512),
nn.BatchNorm1D(512),
nn.ReLU(),
nn.Linear(512, 512),
nn.BatchNorm1D(512),
nn.ReLU(),
nn.Linear(512, 1),
)
def forward(self, x):
# Apply the gradient reversal layer with coefficient
x = GradientReverseLayer.apply(x, 1.0)
x = self.layer(x)
return x
# 特征提取器
class Feature(nn.Layer):
def __init__(self):
super(Feature, self).__init__()
self.backbone = resnet18()
def forward(self, x):
x = self.backbone(x)
return x
# 分类器
class Classifier(nn.Layer):
def __init__(self, classNum):
super(Classifier, self).__init__()
self.layer = Sequential(
Linear(1000, 512),
ReLU(),
Linear(512, 512),
ReLU(),
Linear(512, classNum),
)
def forward(self, h):
c = self.layer(h)
return c
# 训练的batchSize
batch_size = 16
# 训练的轮次
epoch = 50
# 设置基础学习率
base_lr = 0.001
# 每隔几轮保存一次模型
saveEpoch = 2
# 训练的标签种类
classNum = len(FAULT_LABEL_DICT)
# 训练的模型保存目录
modelSaveDir = "./work/DANN-models"
# 训练的日志保存目录
modelLogDir = "./work/DANN-logs"
# 获取当前日期和时间
now = datetime.now()
# 训练日志保存路径
modelLogPath = os.path.join(modelLogDir, "e{}_b{}_lr{}_t{}|{}.txt".format(epoch, batch_size, base_lr, now.date(),now.time()))
# 创建训练日志
with open(modelLogPath, "w") as _:
pass
# 使用DataLoader加载数据
sourceTrainLoader = DataLoader(sourceTrain, batch_size=batch_size, shuffle=True, drop_last=True)
targetTrainLoader = DataLoader(targetTrain, batch_size=batch_size, shuffle=True, drop_last=True)
sourceValLoader = DataLoader(sourceVal, batch_size=batch_size, drop_last=True)
targetValLoader = DataLoader(targetVal, batch_size=batch_size, drop_last=True)
# 初始化网络
feature = Feature()
classifier = Classifier(classNum)
discriminator = Discriminator()
# 创建一个参数列表,这里只是示例,你需要根据你的模型结构来填充这个列表
# 注意:在Paddle中,模型参数通常通过 model.parameters() 访问,它会返回一个生成器
# params_list = [
# {"params": feature.parameters(), "lr": 0.5 * base_lr},
# {"params": classifier.parameters(), "lr": base_lr},
# {"params": discriminator.parameters(), "lr": base_lr},
# ]
# 设置学习率衰减策略,这里以CosineAnnealingDecay为例
# epochs: 总训练轮次
# T_max: 衰减周期,即达到最大学习率后经过多少个epoch再回到最小学习率
# eta_min: 最小学习率,默认为0
lr_scheduler = CosineAnnealingDecay(learning_rate=base_lr, T_max=35, eta_min=0, verbose=True)
featureOpt = paddle.optimizer.Adam(learning_rate=lr_scheduler, parameters=feature.parameters())
classifierOpt = paddle.optimizer.Adam(learning_rate=lr_scheduler, parameters=classifier.parameters())
discriminatorOpt = paddle.optimizer.Adam(learning_rate=lr_scheduler, parameters=discriminator.parameters())
class_criterion = paddle.nn.loss.CrossEntropyLoss()
domain_criterion = paddle.nn.BCEWithLogitsLoss()
for i in range(epoch):
with open(modelLogPath, "a") as f:
feature.train()
classifier.train()
discriminator.train()
for batch_id, ((src, srcLab), (tar, tarLab)) in enumerate(zip(sourceTrainLoader, targetTrainLoader)):
# 拿到批次数
batch_num = src.shape[0]
# 我们把source data和target data混在一起,否则batch_norm可能会算错 (两边的data的mean/var不太一样)
mixed_data = paddle.concat([src, tar], axis=0)
domain_label = paddle.zeros([src.shape[0] + tar.shape[0], 1])
# 设定source data的label为1
domain_label[:src.shape[0]] = 1
# compute model cotput and loss
logits = feature(mixed_data)
outputs = classifier(logits)
domain_outputs = discriminator(logits)
# 拿到分类的损失
clsLoss= class_criterion(outputs[:batch_num, :], srcLab)
clsLoss = paddle.mean(clsLoss)
clsLoss.backward(retain_graph=True)
classifierOpt.step()
featureOpt.step()
classifierOpt.clear_grad()
featureOpt.clear_grad()
# 拿到域判别的损失
dmaLoss = domain_criterion(domain_outputs, domain_label)
dmaLoss = paddle.mean(dmaLoss) # 确保 dmaLoss 是标量
# print(dmaLoss)
dmaLoss.backward()
discriminatorOpt.step()
discriminatorOpt.clear_grad()
if batch_id%20 == 0:
print("{}|{}-TRAIN INFO:: EPOCH:{} BATCHID:{} CLSLOSS:{} DOMAINLOSS:{}".format(now.date(), now.time(), i, batch_id, clsLoss.numpy(), dmaLoss.numpy()))
f.write("{}|{}-TRAIN INFO:: EPOCH:{} BATCHID:{} CLSLOSS:{} DOMAINLOSS:{}".format(now.date(), now.time(), i, batch_id, clsLoss.numpy(), dmaLoss.numpy()))
now = datetime.now()
print("{}|{}-INFO:: START EVAL".format(now.date(),now.time()))
f.write("{}|{}-INFO:: START EVAL".format(now.date(),now.time()))
f.write("\n")
with paddle.no_grad():
srcAcc = []
tarAcc = []
doMainAcc = []
for batch_id, ((src, srcLab), (tar, tarLab)) in enumerate(zip(sourceValLoader, targetValLoader)):
srcLab = paddle.to_tensor(srcLab).reshape([-1])
tarLab = paddle.to_tensor(tarLab).reshape([-1])
# 拿到批次数
batch_num = src.shape[0]
# 我们把source data和target data混在一起,否则batch_norm可能会算错 (两边的data的mean/var不太一样)
mixed_data = paddle.concat([src, tar], axis=0)
domain_label = paddle.zeros([src.shape[0] + tar.shape[0], 1])
# 设定source data的label为1
domain_label[:src.shape[0]] = 1
domain_label = domain_label.reshape([-1])
# 推理
logits = feature(mixed_data)
outputs = classifier(logits)
domain_outputs = discriminator(logits)
# 使用paddle.argmax获取每个样本的预测类别索引
srcClass = paddle.argmax(outputs[:batch_num,:], axis=1)
tarClass = paddle.argmax(outputs[batch_num:,:], axis=1)
doMainClass = paddle.argmax(domain_outputs, axis=1)
srcResult = list(srcClass==srcLab)
tarResult = list(tarClass==tarLab)
doMainResult = list(doMainClass==domain_label)
srcAcc.append(srcResult.count(True)/len(srcResult))
tarAcc.append(tarResult.count(True)/len(tarResult))
doMainAcc.append(doMainResult.count(True)/len(doMainResult))
srcAcc = np.array(srcAcc)
tarAcc = np.array(tarAcc)
doMainAcc = np.array(doMainAcc)
sacc = srcAcc.mean()
tacc = tarAcc.mean()
dacc = doMainAcc.mean()
now = datetime.now()
print("{}|{}-VAL INFO:: EPOCH:{} SRC-ACC:{} TAR-ACC:{} DOMAIN-ACC:{}".format(now.date(),now.time(), i, sacc, tacc, dacc))
f.write("{}|{}-VAL INFO:: EPOCH:{} SRC-ACC:{} TAR-ACC:{} DOMAIN-ACC:{}".format(now.date(),now.time(), i, sacc, tacc, dacc))
if i % saveEpoch == 0:
paddle.save(feature.state_dict(), os.path.join(modelSaveDir, 'feature-epoch{}_sacc{}_tacc{}.pdparams'.format(i, sacc, tacc)))
paddle.save(classifier.state_dict(), os.path.join(modelSaveDir, 'classifier-epoch{}_sacc{}_tacc{}.pdparams'.format(i, sacc, tacc)))
paddle.save(discriminator.state_dict(), os.path.join(modelSaveDir, 'dis-epoch{}_sacc{}_tacc{}.pdparams'.format(i, sacc, tacc)))
学习渠道
飞桨AI Studio星河社区 - 人工智能学习与实训社区 (baidu.com)
https://aistudio.baidu.com/education/group/info/1978
飞桨AI Studio星河社区-人工智能学习与实训社区 (baidu.com)https://aistudio.baidu.com/projectdetail/8240469