3. Latent diffusion¶
This tutorial demonstrates how to generate images with latent diffusion models.
# !git clone --depth 1 --single-branch https://github.com/NVlabs/edm2
# !pip install accelerate diffusers
import sys
import torch
sys.path.append("edm2")
from torchvision.transforms.functional import to_pil_image
from azula.plugins import eldm
from azula.sample import LMSSampler
device = "cuda"
_ = torch.manual_seed(0)
3.1. Pre-trained latent diffusion model¶
denoiser, autoencoder = eldm.load_model("imagenet_512x512_xxl")
denoiser, autoencoder = denoiser.to(device), autoencoder.to(device)
Skipping download as /mnt/home/frozet/.cache/azula/hub/https.nvlabs-fi-cdn.nvidia.com.edm2.posthoc-reconstructions.edm2-img512-xxl-0939524-0.150.pkl already exists.
def postprocess(x):
return torch.clip(x, min=0, max=1)
sampler = LMSSampler(denoiser, steps=32).to(device)
label = torch.nn.functional.one_hot(torch.tensor(88), 1000).to(device)
z1 = sampler.init((1, 4 * 64 * 64))
z0 = sampler(z1, label=label)
x = autoencoder.decode(z0.reshape(-1, 4, 64, 64)).reshape(3, 512, 512)
to_pil_image(postprocess(x))
3.2. Classifier-free guidance¶
label = torch.nn.functional.one_hot(torch.tensor(33), 1000).to(device)
z1 = sampler.init((1, 4 * 64 * 64))
z0 = sampler(z1, label=label, omega=1.5)
x = autoencoder.decode(z0.reshape(-1, 4, 64, 64)).reshape(3, 512, 512)
to_pil_image(postprocess(x))