We present high quality image synthesis results using diffusion probabilistic models, a class of latent variable models inspired by considerations from nonequilibrium thermodynamics. Our best results are obtained by training on a weighted variational bound designed according to a novel connection between diffusion probabilistic models and denoising score matching with Langevin dynamics, and our models naturally admit a progressive lossy decompression scheme that can be interpreted as a generalization of autoregressive decoding. On the unconditional CIFAR10 dataset, we obtain an Inception score of 9.46 and a state-of-the-art FID score of 3.17. On 256x256 LSUN, we obtain sample quality similar to ProgressiveGAN. Our implementation is available at https://github.com/hojonathanho/diffusion
Source: Denoising Diffusion Probabilistic Models (2020-06-19). See: paper link.
import torch
import torch.nn as nn
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from keras.datasets.mnist import load_data
from unet import UNet
(trainX, trainy), (testX, testy) = load_data()
trainX = np.float32(trainX) / 255.
testX = np.float32(testX) / 255.
def sample_batch(batch_size, device):
indices = torch.randperm(trainX.shape[0])[:batch_size]
data = torch.from_numpy(trainX[indices]).unsqueeze(1).to(device)
return torch.nn.functional.interpolate(data, 32)
class DiffusionModel:
def __init__(self, T: int, model: nn.Module, device: str):
self.T = T
self.function_approximator = model.to(device)
self.device = device
self.beta = torch.linspace(1e-4, 0.02, T).to(device)
self.alpha = 1. - self.beta
self.alpha_bar = torch.cumprod(self.alpha, dim=0)
def training(self, batch_size, optimizer):
"""
Algorithm 1 in Denoising Diffusion Probabilistic Models
"""
x0 = sample_batch(batch_size, self.device)
t = torch.randint(1, self.T + 1, (batch_size,), device=self.device,
dtype=torch.long)
eps = torch.randn_like(x0)
# Take one gradient descent step
alpha_bar_t = self.alpha_bar[t - 1].unsqueeze(-1).unsqueeze(
-1).unsqueeze(-1)
eps_predicted = self.function_approximator(torch.sqrt(
alpha_bar_t) * x0 + torch.sqrt(1 - alpha_bar_t) * eps, t - 1)
loss = nn.functional.mse_loss(eps, eps_predicted)
optimizer.zero_grad()
loss.backward()
optimizer.step()
return loss.item()
@torch.no_grad()
def sampling(self, n_samples=1, image_channels=1, img_size=(32, 32),
use_tqdm=True):
"""
Algorithm 2 in Denoising Diffusion Probabilistic Models
"""
x = torch.randn((n_samples, image_channels, img_size[0], img_size[1]),
device=self.device)
progress_bar = tqdm if use_tqdm else lambda x: x
for t in progress_bar(range(self.T, 0, -1)):
z = torch.randn_like(x) if t > 1 else torch.zeros_like(x)
t = torch.ones(n_samples, dtype=torch.long, device=self.device) * t
beta_t = self.beta[t - 1].unsqueeze(-1).unsqueeze(-1).unsqueeze(-1)
alpha_t = self.alpha[t - 1].unsqueeze(-1).unsqueeze(-1).unsqueeze(-1)
alpha_bar_t = self.alpha_bar[t - 1].unsqueeze(-1).unsqueeze(-1).unsqueeze(-1)
mean = 1 / torch.sqrt(alpha_t) * (x - ((1 - alpha_t) / torch.sqrt(
1 - alpha_bar_t)) * self.function_approximator(x, t - 1))
sigma = torch.sqrt(beta_t)
x = mean + sigma * z
return x
if __name__ == "__main__":
device = 'cuda'
batch_size = 64
model = UNet()
optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)
diffusion_model = DiffusionModel(1000, model, device)
# Training
for epoch in tqdm(range(40_000)):
loss = diffusion_model.training(batch_size, optimizer)
# Plot results
nb_images = 81
samples = diffusion_model.sampling(n_samples=nb_images, use_tqdm=False)
plt.figure(figsize=(17, 17))
for i in range(nb_images):
plt.subplot(9, 9, 1 + i)
plt.axis('off')
plt.imshow(samples[i].squeeze(0).clip(0, 1).data.cpu().numpy(),
cmap='gray')
plt.savefig(f'Imgs/samples.png')
python implementation Denoising Diffusion Probabilistic Models in 100 lines
2020-06-19