This notebook is an example of a Generative Adversarial Nets(GANs) in Kokoyi. Specifically, we are going to use a simple GAN to perform handwritten digit image generation.
Generative Adversarial Nets(GANs) is one of the most popular structures in the class of generative models. A typical GAN simultaneously train two models: a generative model $G$ and a discriminative model $D$.
The data pipeline looks like this:
$D$ and $G$ play the following two-player minimax game with the loss function $V(G, D)$:
$$ \min_G\max_DV(D,G)= \mathbb{E}_{x\sim p_{\text{data}}(x)}[\log D(x)] + \mathbb{E}_{x\sim p_{z}(z)}[\log(1 - D(G(z)))]$$Following Algorithm 1 in the paper, training is split up into two main steps. The first step updates the discriminator $D$ and the second step updates the generator $G$.
We train $D$ to maximize the probability of assigning the correct label to both real images and fake image generated by $G$, so it acts as a binary classifier. In other words, we want to maximize $\log(D(x)) + \log(1-D(G(x)))$. We can write the loss in Kokoyi using BCELoss
:
%kokoyi
\Function {Loss_D} {D, x_{true}, x_{fake}}
loss \gets \BCELoss(D(x_{true}), \{1.0\}^{1}) + \BCELoss(D(x_{fake}), \{0.0\}^{1})\\
\Return loss \\
\EndFunction
We train $G$ to generate better fake images to fool $D$ such that it cannot tell the real from the fake. To do so, we want images generated from the latent code, i.e. $G(z)$, to have higher probability to be taken as real, i.e. we will minimize $\log(1-D(G(z)))$ with $D$ fixed. We can write generator's loss in Kokoyi:
%kokoyi
\Function {Loss_G} {D, G, z}
loss \gets \BCELoss(D(G(z)), \{1.0\}^{1}) \\
\Return loss \\
\EndFunction
Now we can specify the structure of the discriminator $D$ and the generator $G$. To make it simple, we use a simple multilayer perceptron(MLP) as $D$ and a transposed convolution module as $G$.
%kokoyi
\sigma(x) \gets \Sigmoid(x) \\
\Module{D}{x; W, b}
L \gets |W| \\
h[0 \leq i \leq L] \gets
\begin{cases}
\Flatten(x) & i = 0 \\
\ReLU (W[i-1] @ h[i-1] + b[i-1]) & i < L \\
W[i-1] @ h[i-1] + b[i-1] & otherwise \\
\end{cases} \\
\Return \sigma(h[L]) \\
\EndModule
%kokoyi
\Module {TransposedConvBlock} {x; ConvTranspose2d, BatchNorm2d}
\Return \ReLU(BatchNorm2d(ConvTranspose2d(x))) \\
\EndModule
\Module {G} {z; C, H, W, Linear, TransposedConvBlocks, Conv2d}
\hat{z} \gets Linear(z) \\
L \gets |TransposedConvBlocks| \\
h[0 \leq i \leq L] \gets \begin{cases}
\Reshape(\hat{z}, (C, H, W)) & i = 0 \\
TransposedConvBlocks[i-1](h[i-1]) & otherwise \\
\end{cases} \\
\hat{x} \gets \tanh(Conv2d(h[L])) \\
\Return \hat{x} \\
\EndModule
You can let Kokoyi to set up the initialization for D and G (just copy and paste and then fill up what's needed):
class D(torch.nn.Module): def __init__(self): """ Add your code for parameter initialization here (not necessarily the same names).""" super().__init__() self.W = None self.b = None def get_parameters(self): """ Change the following code to return the parameters as a tuple in the order of (W, b).""" return None forward = kokoyi.symbol["D"]
class TransposedConvBlock(torch.nn.Module): def __init__(self): """ Add your code for parameter initialization here (not necessarily the same names).""" super().__init__() self.ConvTranspose2d = None self.BatchNorm2d = None def get_parameters(self): """ Change the following code to return the parameters as a tuple in the order of (ConvTranspose2d, BatchNorm2d).""" return None forward = kokoyi.symbol["TransposedConvBlock"]
class G(torch.nn.Module): def __init__(self): """ Add your code for parameter initialization here (not necessarily the same names).""" super().__init__() self.C = None self.H = None self.W = None self.Linear = None self.TransposedConvBlocks = None self.Conv2d = None def get_parameters(self): """ Change the following code to return the parameters as a tuple in the order of (C, H, W, Linear, TransposedConvBlocks, Conv2d).""" return None forward = kokoyi.symbol["G"]
Here's the completed module definitions.
import kokoyi
import torch
import torch.nn as nn
import torch.nn.functional as F
from kokoyi.nn import ConvTranspose2d, BatchNorm2d, Conv2d, Linear
class D(torch.nn.Module):
def __init__(self, dims):
super().__init__()
self.W = nn.ParameterList([nn.Parameter(torch.empty(dims[i + 1], dims[i]) - 0.5) for i in range(len(dims) - 1)])
self.b = nn.ParameterList([nn.Parameter(torch.empty(dims[i + 1])) for i in range(len(dims) - 1)])
for param in self.W:
nn.init.xavier_uniform_(param)
for param in self.b:
nn.init.uniform_(param)
def get_parameters(self):
return self.W, self.b
forward = kokoyi.symbol["D"]
class TransposedConvBlock(torch.nn.Module):
def __init__(self, in_channels, out_channels):
super().__init__()
self.ConvTranspose2d = ConvTranspose2d(in_channels, out_channels, 4, 2, 1)
self.BatchNorm2d = BatchNorm2d(out_channels)
def get_parameters(self):
return self.ConvTranspose2d, self.BatchNorm2d
forward = kokoyi.symbol["TransposedConvBlock"]
class G(torch.nn.Module):
def __init__(self, C, H, W, latent_dim, out_channels):
super().__init__()
self.C = C
self.H = H
self.W = W
self.Linear = Linear(latent_dim, self.C * self.H * self.W)
self.TransposedConvBlocks = torch.nn.ModuleList([ #(256, 7, 7)
TransposedConvBlock(256, 128), #(128, 14, 14)
TransposedConvBlock(128, 64) #(64, 28, 28)
])
self.Conv2d = Conv2d(64, out_channels, 3, stride=1, padding=1)
def get_parameters(self):
return self.C, self.H, self.W, self.Linear, self.TransposedConvBlocks, self.Conv2d
forward = kokoyi.symbol["G"]
Let's first do some setup:
import kokoyi
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.utils as vutils
import matplotlib.pyplot as plt
from torchvision.utils import save_image
import torchvision
import torchvision.transforms as transforms
MNIST supported by torchvision is used to train the model. The dataset consists of 2D images of handwritten numbers and corresponding integer labels (from 0 to 9).
batch_size = 32
img_size = 28
mnist = torchvision.datasets.MNIST(
root='data/',
train=True,
download=True,
transform=transforms.Compose(
[transforms.Resize(img_size), transforms.ToTensor(), transforms.Normalize([0.5], [0.5])]
)
)
dataloader = torch.utils.data.DataLoader(
mnist,
batch_size=batch_size,
shuffle=True
)
import os
os.makedirs("gan/fake", exist_ok=True)
# Use GPU if possible
if torch.cuda.is_available():
device_name = 'cuda:0'
else:
device_name = 'cpu'
print('Using device: ', device_name)
device = torch.device(device_name)
kokoyi.set_rt_device(device)
Finally, we can set the hyper-parameters and start training!
latent_dim = 100
C, H, W = 256, 7, 7
channels = 1
d = D([1 * 28 * 28, 256, 100, 1]).to(device)
g = G(C, H, W, latent_dim, channels).to(device)
optimizer_D = torch.optim.Adam(d.parameters(), lr=0.0005, betas=(0.5, 0.999))
optimizer_G = torch.optim.Adam(g.parameters(), lr=0.0005, betas=(0.5, 0.999))
img_list = []
G_losses = []
D_losses = []
fixed_z = torch.randn(25, latent_dim, device=device)
num_epochs = 10
iters = 0
for epoch in range(num_epochs):
for i, (imgs, _) in enumerate(dataloader):
imgs = imgs.to(device)
batchsize = imgs.size(0)
z = torch.randn(batchsize, latent_dim, device=device)
# Training discriminator
optimizer_D.zero_grad()
fakeimages = g(z, batch_level=[1])
Dloss = kokoyi.symbol['Loss_D'](d, imgs, fakeimages.detach(), batch_level=[0, 1, 1])
Dloss = Dloss.mean()
Dloss.backward()
optimizer_D.step()
# Training generator
optimizer_G.zero_grad()
Gloss = kokoyi.symbol['Loss_G'](d, g, z, batch_level=[0, 0, 1])
Gloss = Gloss.mean()
Gloss.backward()
optimizer_G.step()
if i % 500 == 0:
print('[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f'
% (epoch, num_epochs, i, len(dataloader),
Dloss.item(), Gloss.item())) #, D_x, D_G_z1, D_G_z2))
# Save Losses for plotting later
G_losses.append(Gloss.item())
D_losses.append(Dloss.item())
with torch.no_grad():
fakeimgs = g(fixed_z, batch_level=[1]).detach().cpu()
img_list.append(vutils.make_grid(fakeimgs.view(25, 1, 28, 28), padding=2, normalize=True))
save_image(fakeimgs.view(25, 1, 28, 28), "gan/fake/%d.png" % (epoch), nrow=5, normalize=True)