2. Guidance

This tutorial demonstrates how to perform guidance with a pre-trained diffusion model.

# !git clone --depth 1 --single-branch https://github.com/openai/guided-diffusion
import io
import requests
import sys
import torch

sys.path.append("guided-diffusion")

from PIL import Image
from torchvision.transforms.functional import to_pil_image, to_tensor
from torchvision.utils import make_grid

from azula.guidance import DiffPIRDenoiser, DPSSampler, MMPSDenoiser, PGDMSampler, TMPDenoiser
from azula.plugins import adm
from azula.sample import DDIMSampler

device = "cuda"
_ = torch.manual_seed(42)
def preprocess(x):
    return 2 * x - 1
def postprocess(x):
    return torch.clip((x + 1) / 2, min=0, max=1)

2.1. Pre-trained diffusion model

denoiser = adm.load_model("imagenet_256x256").to(device)
denoiser = denoiser.requires_grad_(False)  # reduce memory overhead
Loading from /home/frozet/.cache/azula/hub/https.openaipublic.blob.core.windows.net.diffusion.jul-2021.256x256_diffusion_uncond.pt
sampler = DDIMSampler(denoiser, steps=64).to(device)

x1 = sampler.init((4, 3, 256, 256))
x0 = sampler(x1)

to_pil_image(make_grid(postprocess(x0)))
../_images/647fa66c2b23c3b4f0ce41d87f7d3e6c7eeb73727d4b2bcc54dd6baeb1a2da9b.png

2.2. Measurement

image = requests.get("https://upload.wikimedia.org/wikipedia/commons/3/3a/Cat03.jpg", headers={"User-Agent": "Azula"}).content
image = io.BytesIO(image)
image = Image.open(image).convert("RGB")
image = image.crop((0, 0, min(image.size), min(image.size))).resize((256, 256))
image
../_images/2bc38707721e20eaf6adb9c2ffeaeea27bddefce873f53faeb373f0980f78b38.png
x = preprocess(to_tensor(image)).to(device)


def A(x):
    return torch.nn.functional.interpolate(x, (16, 16), mode="bicubic", antialias=True).flatten(-3)


def A_inv(y):
    return torch.nn.functional.interpolate(
        y.unflatten(-1, (3, 16, 16)), (256, 256), mode="nearest"
    )


sigma_y = 0.01

y = A(x.unsqueeze(0))
y = y + sigma_y * torch.randn_like(y)

to_pil_image(make_grid(postprocess(A_inv(y))))
../_images/fbc79fe545a9ab9227ab5b977e8de3c97446ca96dbed6e8d973e5dacceeaaed1.png

2.3. Diffusion Posterior Sampling (DPS)

cond_sampler = DPSSampler(denoiser, y=y, A=A, steps=256).to(device)

x1 = cond_sampler.init((4, 3, 256, 256))
x0 = cond_sampler(x1)

to_pil_image(make_grid(postprocess(x0)))
../_images/a696b3627304a8afe4d0c6198790aac5650cc2257a93582fe7c7f754a7cbecce.png

2.4. Pseudo-inverse Guided Diffusion Model (PGDM)

cond_sampler = PGDMSampler(denoiser, y=y, A=A, A_inv=A_inv, steps=64, eta=1.0).to(device)

x1 = cond_sampler.init((4, 3, 256, 256))
x0 = cond_sampler(x1)

to_pil_image(make_grid(postprocess(x0)))
../_images/de78f650e472dcf4d2d3070bf904e4c010a3516fedb59425494ff7ebb6461cf5.png

2.5. Diffusion Plug-and-Play Image Restoration (DiffPIR)

cond_denoiser = DiffPIRDenoiser(denoiser, y=y, A=A, var_y=sigma_y**2, iterations=1)
cond_sampler = DDIMSampler(cond_denoiser, steps=64, eta=1.0).to(device)

x1 = cond_sampler.init((4, 3, 256, 256))
x0 = cond_sampler(x1)

to_pil_image(make_grid(postprocess(x0)))
../_images/dacab17932279c75a2d5b9cca1423f01838a260bf5f6e68c4919ba45ae435047.png

2.6. Tweedie Moment Projected Diffusion (TMPD)

cond_denoiser = TMPDenoiser(denoiser, y=y, A=A, var_y=sigma_y**2)
cond_sampler = DDIMSampler(cond_denoiser, steps=64, eta=1.0).to(device)

x1 = cond_sampler.init((4, 3, 256, 256))
x0 = cond_sampler(x1)

to_pil_image(make_grid(postprocess(x0)))
../_images/224cd326cdd3ae5aa3252725798ee23576dd1e88880880328a88ee7cf514ac05.png

2.7. Moment Matching Posterior Sampling (MMPS)

cond_denoiser = MMPSDenoiser(denoiser, y=y, A=A, var_y=sigma_y**2, iterations=3)
cond_sampler = DDIMSampler(cond_denoiser, steps=64, eta=1.0).to(device)

x1 = cond_sampler.init((4, 3, 256, 256))
x0 = cond_sampler(x1)

to_pil_image(make_grid(postprocess(x0)))
../_images/9a39e86c38bcb2bae4c181d5af3914592cba6bbb8a8437ddd64dd6a09380cc49.png