3. Latent diffusion¶
This tutorial demonstrates how to generate images with latent diffusion models.
# !git clone --depth 1 --single-branch https://github.com/NVlabs/edm2
# !pip install accelerate diffusers
import sys
import torch
sys.path.append("edm2")
from torchvision.transforms.functional import to_pil_image
from azula.plugins import eldm
from azula.sample import EABSampler # DPM-Solver
device = "cuda"
_ = torch.manual_seed(0)
3.1. Pre-trained latent diffusion model¶
denoiser, autoencoder = eldm.load_model("imagenet_512x512_xxl")
denoiser, autoencoder = denoiser.to(device), autoencoder.to(device)
Loading from /home/frozet/.cache/azula/hub/https.nvlabs-fi-cdn.nvidia.com.edm2.posthoc-reconstructions.edm2-img512-xxl-0939524-0.150.pkl
def postprocess(x):
return torch.clip(x, min=0, max=1)
sampler = EABSampler(denoiser, steps=16).to(device)
label = torch.nn.functional.one_hot(torch.tensor(88), 1000).to(device)
z1 = sampler.init((1, 4, 64, 64))
z0 = sampler(z1, label=label)
with torch.no_grad():
x = autoencoder.decode(z0)
to_pil_image(postprocess(x).squeeze())
3.2. Classifier-free guidance¶
label = torch.nn.functional.one_hot(torch.tensor(33), 1000).to(device)
z1 = sampler.init((1, 4, 64, 64))
z0 = sampler(z1, label=label, omega=1.5)
with torch.no_grad():
x = autoencoder.decode(z0)
to_pil_image(postprocess(x).squeeze())