3. Latent diffusion¶

This tutorial demonstrates how to generate images with latent diffusion models.

# !git clone --depth 1 --single-branch https://github.com/NVlabs/edm2
# !pip install accelerate diffusers

import sys
import torch

sys.path.append("edm2")

from torchvision.transforms.functional import to_pil_image

from azula.plugins import eldm
from azula.sample import EABSampler  # DPM-Solver

device = "cuda"

_ = torch.manual_seed(0)

3.1. Pre-trained latent diffusion model¶

denoiser, autoencoder = eldm.load_model("imagenet_512x512_xxl")
denoiser, autoencoder = denoiser.to(device), autoencoder.to(device)

Loading from /home/frozet/.cache/azula/hub/https.nvlabs-fi-cdn.nvidia.com.edm2.posthoc-reconstructions.edm2-img512-xxl-0939524-0.150.pkl

def postprocess(x):
    return torch.clip(x, min=0, max=1)

sampler = EABSampler(denoiser, steps=16).to(device)
label = torch.nn.functional.one_hot(torch.tensor(88), 1000).to(device)

z1 = sampler.init((1, 4, 64, 64))
z0 = sampler(z1, label=label)

with torch.no_grad():
    x = autoencoder.decode(z0)

to_pil_image(postprocess(x).squeeze())

../_images/7a14883883f6993283188e3187ba8d7de362754b2eb729cab586cf4ffb83ee54.png

3.2. Classifier-free guidance¶

label = torch.nn.functional.one_hot(torch.tensor(33), 1000).to(device)

z1 = sampler.init((1, 4, 64, 64))
z0 = sampler(z1, label=label, omega=1.5)

with torch.no_grad():
    x = autoencoder.decode(z0)

to_pil_image(postprocess(x).squeeze())

../_images/76497a74d30e7f29a0813800f256e86304f88e6e866a79fcd45c64d5008a71cd.png