DCGAN简介
Generative Adversarial Networks(GANs),GANs有两个模型组成,一个是生成器,用于训练生成假的数据,另一个是判别器,用于预测生成器的输出结果。其中生成器提供训练数据给判别器,提高判别器的准确率。判别器提供生成样本的预测结果,给生成器提供优化的方向。其实在1990年前后,对抗的思想就已经应用于无监督人工神经网络领域,通过最小化另一个程序最大化的目标函数来求解问题。
生成器的输入通常是一些随机向量,然后去生成接近真实的训练数据。为了生成逼真的数据,生成器的输出会作为判别器的输入,生成器会收到来自判别器的反馈(判别器的识别结果)。生成器可以通过判别器的反馈,知道自己生成结果跟真实数据的差距,从而不断的提高自己,同时判别器也从生成器源源不断获得训练数据,不断提高自己的鉴别能力。
生成器的目标函数
m i n i m i z e E x ∼ P G ( x ∗ ) [ l o g ( 1 − D ( x ) ) ] , D ( x ) → 1 minimize E_{x \sim P_G(x^*)} [ log(1 - D(x))] , ~~~~D(x) \to 1 minimizeEx∼PG(x∗)[log(1−D(x))], D(x)→1
- 假设真实样本 标签为 1, 生成样本标签为 0;
- z ∼ P z z \sim P_z z∼Pz : 随机噪声
- P G ( G ( z ) ) = P G ( x ∗ ) P_G(G(z)) = P_G(x^*) PG(G(z))=PG(x∗): 生成数据的分布
- D ( x ) D(x) D(x) : 判别器输出结果
判别器的目标函数
- 准确分辨出真实样本 : x ∼ P r ( x ) x \sim P_r(x) x∼Pr(x)
m a x i m i z e E x ∼ P r ( x ) [ l o g ( D ( x ) ] , D ( x ) → 1 maximize E_{x \sim P_r(x)} [log(D(x)], ~~~~ D(x) \to 1 maximizeEx∼Pr(x)[log(D(x)], D(x)→1
- 准确分辨出生成样本 :
m a x i m i z e E x ∼ P G ( x ∗ ) [ l o g ( 1 − D ( x ) ) ] , D ( x ) → 0 maximize E_{x \sim P_G(x^*)} [log(1 - D(x))] , ~~~~D(x) \to 0 maximizeEx∼PG(x∗)[log(1−D(x))], D(x)→0
综合目标函数
G m i n D m a x L ( D , G ) = E x ∼ P r ( x ) [ l o g ( D ( x ) ] + E x ∼ P G ( x ∗ ) [ l o g ( 1 − D ( x ) ) ] G_{min} D_{max} L(D, G) = E_{x \sim P_r(x)} [log(D(x)] + E_{x \sim P_G(x^*)} [log(1 - D(x))] GminDmaxL(D,G)=Ex∼Pr(x)[log(D(x)]+Ex∼PG(x∗)[log(1−D(x))]
DCGAN实现
import os
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.layers import Conv2D,LeakyReLU,Input,BatchNormalization,Flatten,Conv2DTranspose,Activation,Dense,Reshape,Dropout
from tqdm import tqdm
生成模型
def create_generator(alpha=0.2):
inputs = Input(shape=(128,))
x = Dense(units=28 * 28 * 128, use_bias=False)(inputs)
x = LeakyReLU(alpha=alpha)(x)
x = BatchNormalization()(x)
x = Reshape((28, 28, 128))(x)
x = Conv2DTranspose(filters=128, strides=(1, 1),kernel_size=(5, 5), padding='same',use_bias=False)(x)
x = LeakyReLU(alpha=alpha)(x)
x = BatchNormalization()(x)
x = Conv2DTranspose(filters=128, strides=(2, 2), kernel_size=(5, 5), padding='same', use_bias=False)(x)
x = LeakyReLU(alpha=alpha)(x)
x = BatchNormalization()(x)
x = Conv2DTranspose(filters=128, strides=(2, 2), kernel_size=(5, 5), padding='same', use_bias=False)(x)
x = LeakyReLU(alpha=alpha)(x)
x = BatchNormalization()(x)
x = Conv2D(filters=3, kernel_size=(3, 3),strides=(1, 1), padding='same')(x)
outputs = Activation('tanh')(x)
return Model(inputs, outputs)
generator = create_generator()
generator_opt = Adam(learning_rate=1e-4)
判别模型
def create_discriminator(alpha=0.2, dropout=0.2):
inputs = Input(shape=(112, 112, 3))
x = Conv2D(filters=128, kernel_size=(5, 5), strides=(2, 2), padding='same')(inputs)
x = LeakyReLU(alpha=alpha)(x)
x = Dropout(rate=dropout)(x)
x = Conv2D(filters=64, kernel_size=(5, 5),strides=(2, 2), padding='same')(x)
x = LeakyReLU(alpha=alpha)(x)
x = Dropout(rate=dropout)(x)
x = Conv2D(filters=64, kernel_size=(5, 5),strides=(2, 2), padding='same')(x)
x = LeakyReLU(alpha=alpha)(x)
x = Dropout(rate=dropout)(x)
x = Conv2D(filters=64, kernel_size=(5, 5),strides=(2, 2), padding='same')(x)
x = LeakyReLU(alpha=alpha)(x)
x = Dropout(rate=dropout)(x)
x = Flatten()(x)
outputs = Dense(units=1)(x)
return Model(inputs, outputs)
discriminator = create_discriminator()
discriminator_opt = Adam(learning_rate=1e-4)
损失函数
loss = BinaryCrossentropy(from_logits=True)
def discriminator_loss(real, fake):
real_loss = loss(tf.ones_like(real), real)
fake_loss = loss(tf.zeros_like(fake), fake)
return real_loss + fake_loss
def generator_loss(fake):
return loss(tf.ones_like(fake), fake)
Training Step
单个训练步骤:
- 生成随机噪声向量
- 根据随机向量生成图像
- 判断真实图像和生成图像的真伪
- 计算生成损失和判别损失
- 计算生成模型的损失函数对于模型参数的梯度
- 更新生成模型的参数
- 计算判别模型的损失函数对于模型参数的梯度
- 更新判别模型的参数
@tf.function
def train_step(images, batch_size, noise_dim = 100):
noise = tf.random.normal((batch_size, noise_dim))
with tf.GradientTape() as gen_tape, tf.GradientTape() as dis_tape:
gen_images = generator(noise, training=True)
real_pred = discriminator(images, training=True)
fake_pred = discriminator(gen_images, training=True)
gen_loss = generator_loss(fake_pred)
dis_loss = discriminator_loss(real_pred, fake_pred)
gen_gradient = gen_tape.gradient(gen_loss, generator.trainable_variables)
gen_opt_args = zip(gen_gradient, generator.trainable_variables)
generator_opt.apply_gradients(gen_opt_args)
dis_gradient = dis_tape.gradient(dis_loss, discriminator.trainable_variables)
dis_opt_args = zip(dis_gradient, discriminator.trainable_variables)
discriminator_opt.apply_gradients(dis_opt_args)
return gen_loss, dis_loss
训练数据
来自 32 国家的 211 种不同的硬币数据集。
import pathlib
import numpy as np
from glob import glob
from PIL import Image
from tqdm import tqdm
import matplotlib.pyplot as plt
file_patten = str(pathlib.Path.home()/'DeepVision/SeData/coins/data/*/*/*.jpg')
DataSetPaths = np.array([*glob(file_patten)])
def process_image(image):
image = (image - 127.5) / 127.5
return image
def resize_image(original_image, size=(112, 112)):
new_size = tuple(size)
resized = original_image.resize(new_size)
resized = np.array(resized)
resized = resized.astype(np.uint8)
return resized
train_data = []
for image_path in tqdm(DataSetPaths, ncols=60):
image = Image.open(image_path)
image = resize_image(image)
if image.shape[-1] != 3:
continue
train_data.append(process_image(image))
train_data = np.array(train_data)
100%|██████████████████| 8101/8101 [00:30<00:00, 264.58it/s]
训练模型
def generate_batch_image(train_data, batch_size):
indices = np.random.choice(range(0, len(train_data)), batch_size, replace=False)
batch_images = train_data[indices]
return batch_images
BS = 128
Gloss = []
Dloss = []
for epoch in tqdm(range(200),ncols=60):
loss1 = []
loss2 = []
for step in range(150):
batch_images = generate_batch_image(train_data, batch_size=BS)
gloss, dloss = train_step(batch_images, noise_dim=128, batch_size=BS)
loss1.append(gloss)
loss2.append(dloss)
#print("Gen_loss : %.3f, Dis_loss : %.3f"%(np.mean(loss1), np.mean(loss2)))
Gloss.append(np.mean(loss1))
Dloss.append(np.mean(loss2))
100%|███████████████████| 200/200 [1:15:27<00:00, 22.64s/it]
测试生成图像
def generate_and_save_images(model=generator, epoc=0, test_input=None):
predictions = model(test_input, training=False)
plt.figure(figsize=(12, 12))
for i in range(predictions.shape[0]):
plt.subplot(6, 6, i + 1)
image = predictions[i, :, :, :] *127.5 + 127.5
image = tf.cast(image, tf.uint8)
plt.imshow(image, cmap='Greys_r')
plt.axis('off')
#plt.savefig(f'{epoch}.png')
plt.show()
test_seed = tf.random.normal((36, 128))
generate_and_save_images(test_input=test_seed)