# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# This work is licensed under a Creative Commons
# Attribution-NonCommercial-ShareAlike 4.0 International License.
# You should have received a copy of the license along with this
# work. If not, see http://creativecommons.org/licenses/by-nc-sa/4.0/

"""Generate random images using the given model."""

import os
import re
import warnings
import click
import tqdm
import pickle
import numpy as np
import torch
import PIL.Image
import dnnlib
from torch_utils import distributed as dist

warnings.filterwarnings('ignore', '`resume_download` is deprecated')
warnings.filterwarnings('ignore', 'You are using `torch.load` with `weights_only=False`')
warnings.filterwarnings('ignore', '1Torch was not compiled with flash attention')

#----------------------------------------------------------------------------
# egg EMA
class EMA:
    def __init__(self, m=0, beta=0.9, epsilon=1e-8):
        self.m = m
        self.beta = beta
        self.epsilon = epsilon

    def update(self, x):
        self.m = self.beta * self.m + x
        return self.m

    def scale(self, x1, x2):
        if isinstance(x1, int):
            return torch.tensor(0)
        x1_norm = torch.norm(x1, dim=1, keepdim=True)
        x2_norm = torch.norm(x2, dim=1, keepdim=True)
        return x1 * x2_norm / (x1_norm + self.epsilon)
    
    def __call__(self, x, norm_scale=True):
        m_hat = self.update(x)
        return self.scale(m_hat, x) if norm_scale else m_hat
    
#----------------------------------------------------------------------------
# apg momentum

# self.running_average = self.momentum * self.running_average + update_value
class MomentumBuffer:
    def __init__(self, momentum: float):
        self.momentum = momentum
        self.running_average = 0
        
    def update(self, update_value: torch.Tensor):
        new_average = self.momentum * self.running_average
        self.running_average = update_value + new_average
        
# decompose v0 into parrallel and orthogonal components with respect to v1
def project(
        v0: torch.Tensor, # [B, C, H, W]
        v1: torch.Tensor, # [B, C, H, W]
    ):
    dtype = v0.dtype
    v0, v1 = v0.double(), v1.double()
    v1 = torch.nn.functional.normalize(v1, dim=[-1, -2, -3])
    v0_parallel = (v0 * v1).sum(dim=[-1, -2, -3], keepdim=True) * v1
    v0_orthogonal = v0 - v0_parallel
    return v0_parallel.to(dtype), v0_orthogonal.to(dtype)

# convert pred_cond to normmalized_pred
def adaptive_projected_guidance(
        pred_cond: torch.Tensor, # [B, C, H, W]
        pred_uncond: torch.Tensor, # [B, C, H, W]
        guidance_scale: float,
        momentum_buffer: MomentumBuffer = None,
        eta: float = 0.0, # 1.0
        norm_threshold: float = 2.5, # 0.0
    ):
    diff = pred_cond - pred_uncond
    if momentum_buffer is not None:
        momentum_buffer.update(diff)
        diff = momentum_buffer.running_average
    if norm_threshold > 0:
        ones = torch.ones_like(diff)
        diff_norm = diff.norm(p=2, dim=[-1, -2, -3], keepdim=True)
        scale_factor = torch.minimum(ones, norm_threshold / diff_norm)
        diff = diff * scale_factor
    diff_parallel, diff_orthogonal = project(diff, pred_cond)
    normalized_update = diff_orthogonal + eta * diff_parallel
    pred_guided = pred_cond + (guidance_scale - 1) * normalized_update
    return pred_guided

#----------------------------------------------------------------------------
# fdg
from kornia.geometry.transform import build_laplacian_pyramid
def project(
    v0: torch.Tensor, # [B, C, H, W]
    v1: torch.Tensor, # [B, C, H, W]
):
    dtype = v0.dtype
    v0, v1 = v0.double(), v1.double()
    v1 = torch.nn.functional.normalize(v1, dim=[-1, -2, -3])
    v0_parallel = (v0 * v1).sum(dim=[-1, -2, -3], keepdim=True) * v1
    v0_orthogonal = v0 - v0_parallel
    return v0_parallel.to(dtype), v0_orthogonal.to(dtype)
    
def build_image_from_pyramid(pyramid):
    img = pyramid[-1]
    for i in range(len(pyramid) - 2, -1, -1):
        img = kornia.geometry.pyrup(img) + pyramid[i]
    return img

# We assume all model predictions are converted to "x_0" prediction. 
def laplacian_guidance(
    pred_cond: torch.Tensor, # [B, C, H, W]
    pred_uncond: torch.Tensor, # [B, C, H, W]
    guidance_scale=[1.0, 1.0], # Guidance scales from high- to low-frequency
    parallel_weights=None, # Optional weights for projection
):
    levels = len(guidance_scale)
    if parallel_weights == None:
        parallel_weights = [1.0] * levels
    
    pred_cond_pyramid = build_laplacian_pyramid(pred_cond, levels)
    pred_uncond_pyramid = build_laplacian_pyramid(pred_uncond, levels)
    
    pred_guided_pyramid = []
    parameters = zip(
        pred_cond_pyramid, pred_uncond_pyramid, guidance_scale, parallel_weights
        )
        
    for idx, (p_cond, p_uncond, scale, par_weight) in enumerate(parameters):
        diff = p_cond - p_uncond
        diff_parallel, diff_orthogonal = project(diff, p_cond)
        diff = par_weight * diff_parallel + diff_orthogonal
        p_guided = p_cond + (scale - 1) * diff
        pred_guided_pyramid.append(p_guided)
    pred_guided = build_image_from_pyramid(pred_guided_pyramid)
    return pred_guided.to(pred_cond.dtype)

#----------------------------------------------------------------------------
# Configuration presets.

model_root = 'https://nvlabs-fi-cdn.nvidia.com/edm2/posthoc-reconstructions'

config_presets = {
    'edm2-img512-xs-fid':              dnnlib.EasyDict(net=f'{model_root}/edm2-img512-xs-2147483-0.135.pkl'),      # fid = 3.53
    'edm2-img512-xs-dino':             dnnlib.EasyDict(net=f'{model_root}/edm2-img512-xs-2147483-0.200.pkl'),      # fd_dinov2 = 103.39
    'edm2-img512-s-fid':               dnnlib.EasyDict(net=f'{model_root}/edm2-img512-s-2147483-0.130.pkl'),       # fid = 2.56
    'edm2-img512-s-dino':              dnnlib.EasyDict(net=f'{model_root}/edm2-img512-s-2147483-0.190.pkl'),       # fd_dinov2 = 68.64
    'edm2-img512-m-fid':               dnnlib.EasyDict(net=f'{model_root}/edm2-img512-m-2147483-0.100.pkl'),       # fid = 2.25
    'edm2-img512-m-dino':              dnnlib.EasyDict(net=f'{model_root}/edm2-img512-m-2147483-0.155.pkl'),       # fd_dinov2 = 58.44
    'edm2-img512-l-fid':               dnnlib.EasyDict(net=f'{model_root}/edm2-img512-l-1879048-0.085.pkl'),       # fid = 2.06
    'edm2-img512-l-dino':              dnnlib.EasyDict(net=f'{model_root}/edm2-img512-l-1879048-0.155.pkl'),       # fd_dinov2 = 52.25
    'edm2-img512-xl-fid':              dnnlib.EasyDict(net=f'{model_root}/edm2-img512-xl-1342177-0.085.pkl'),      # fid = 1.96
    'edm2-img512-xl-dino':             dnnlib.EasyDict(net=f'{model_root}/edm2-img512-xl-1342177-0.155.pkl'),      # fd_dinov2 = 45.96
    'edm2-img512-xxl-fid':             dnnlib.EasyDict(net=f'{model_root}/edm2-img512-xxl-0939524-0.070.pkl'),     # fid = 1.91
    'edm2-img512-xxl-dino':            dnnlib.EasyDict(net=f'{model_root}/edm2-img512-xxl-0939524-0.150.pkl'),     # fd_dinov2 = 42.84
    'edm2-img64-s-fid':                dnnlib.EasyDict(net=f'{model_root}/edm2-img64-s-1073741-0.075.pkl'),        # fid = 1.58
    'edm2-img64-m-fid':                dnnlib.EasyDict(net=f'{model_root}/edm2-img64-m-2147483-0.060.pkl'),        # fid = 1.43
    'edm2-img64-l-fid':                dnnlib.EasyDict(net=f'{model_root}/edm2-img64-l-1073741-0.040.pkl'),        # fid = 1.33
    'edm2-img64-xl-fid':               dnnlib.EasyDict(net=f'{model_root}/edm2-img64-xl-0671088-0.040.pkl'),       # fid = 1.33
    'edm2-img512-xs-guid-fid':         dnnlib.EasyDict(net=f'{model_root}/edm2-img512-xs-2147483-0.045.pkl',       gnet=f'{model_root}/edm2-img512-xs-uncond-2147483-0.045.pkl', guidance=1.40), # fid = 2.91
    'edm2-img512-xs-guid-dino':        dnnlib.EasyDict(net=f'{model_root}/edm2-img512-xs-2147483-0.150.pkl',       gnet=f'{model_root}/edm2-img512-xs-uncond-2147483-0.150.pkl', guidance=1.70), # fd_dinov2 = 79.94
    'edm2-img512-s-guid-fid':          dnnlib.EasyDict(net=f'{model_root}/edm2-img512-s-2147483-0.025.pkl',        gnet=f'{model_root}/edm2-img512-xs-uncond-2147483-0.025.pkl', guidance=1.40), # fid = 2.23
    'edm2-img512-s-guid-dino':         dnnlib.EasyDict(net=f'{model_root}/edm2-img512-s-2147483-0.085.pkl',        gnet=f'{model_root}/edm2-img512-xs-uncond-2147483-0.085.pkl', guidance=1.90), # fd_dinov2 = 52.32
    'edm2-img512-m-guid-fid':          dnnlib.EasyDict(net=f'{model_root}/edm2-img512-m-2147483-0.030.pkl',        gnet=f'{model_root}/edm2-img512-xs-uncond-2147483-0.030.pkl', guidance=1.20), # fid = 2.01
    'edm2-img512-m-guid-dino':         dnnlib.EasyDict(net=f'{model_root}/edm2-img512-m-2147483-0.015.pkl',        gnet=f'{model_root}/edm2-img512-xs-uncond-2147483-0.015.pkl', guidance=2.00), # fd_dinov2 = 41.98
    'edm2-img512-l-guid-fid':          dnnlib.EasyDict(net=f'{model_root}/edm2-img512-l-1879048-0.015.pkl',        gnet=f'{model_root}/edm2-img512-xs-uncond-2147483-0.015.pkl', guidance=1.20), # fid = 1.88
    'edm2-img512-l-guid-dino':         dnnlib.EasyDict(net=f'{model_root}/edm2-img512-l-1879048-0.035.pkl',        gnet=f'{model_root}/edm2-img512-xs-uncond-2147483-0.035.pkl', guidance=1.70), # fd_dinov2 = 38.20
    'edm2-img512-xl-guid-fid':         dnnlib.EasyDict(net=f'{model_root}/edm2-img512-xl-1342177-0.020.pkl',       gnet=f'{model_root}/edm2-img512-xs-uncond-2147483-0.020.pkl', guidance=1.20), # fid = 1.85
    'edm2-img512-xl-guid-dino':        dnnlib.EasyDict(net=f'{model_root}/edm2-img512-xl-1342177-0.030.pkl',       gnet=f'{model_root}/edm2-img512-xs-uncond-2147483-0.030.pkl', guidance=1.70), # fd_dinov2 = 35.67
    'edm2-img512-xxl-guid-fid':        dnnlib.EasyDict(net=f'{model_root}/edm2-img512-xxl-0939524-0.015.pkl',      gnet=f'{model_root}/edm2-img512-xs-uncond-2147483-0.015.pkl', guidance=1.20), # fid = 1.81
    'edm2-img512-xxl-guid-dino':       dnnlib.EasyDict(net=f'{model_root}/edm2-img512-xxl-0939524-0.015.pkl',      gnet=f'{model_root}/edm2-img512-xs-uncond-2147483-0.015.pkl', guidance=1.70), # fd_dinov2 = 33.09
    'edm2-img512-s-autog-fid':         dnnlib.EasyDict(net=f'{model_root}/edm2-img512-s-2147483-0.070.pkl',        gnet=f'{model_root}/edm2-img512-xs-0134217-0.125.pkl',        guidance=2.10), # fid = 1.34
    'edm2-img512-s-autog-dino':        dnnlib.EasyDict(net=f'{model_root}/edm2-img512-s-2147483-0.120.pkl',        gnet=f'{model_root}/edm2-img512-xs-0134217-0.165.pkl',        guidance=2.45), # fd_dinov2 = 36.67
    'edm2-img512-xxl-autog-fid':       dnnlib.EasyDict(net=f'{model_root}/edm2-img512-xxl-0939524-0.075.pkl',      gnet=f'{model_root}/edm2-img512-m-0268435-0.155.pkl',         guidance=2.05), # fid = 1.25
    'edm2-img512-xxl-autog-dino':      dnnlib.EasyDict(net=f'{model_root}/edm2-img512-xxl-0939524-0.130.pkl',      gnet=f'{model_root}/edm2-img512-m-0268435-0.205.pkl',         guidance=2.30), # fd_dinov2 = 24.18
    'edm2-img512-s-uncond-autog-fid':  dnnlib.EasyDict(net=f'{model_root}/edm2-img512-s-uncond-2147483-0.070.pkl', gnet=f'{model_root}/edm2-img512-xs-uncond-0134217-0.110.pkl', guidance=2.85), # fid = 3.86
    'edm2-img512-s-uncond-autog-dino': dnnlib.EasyDict(net=f'{model_root}/edm2-img512-s-uncond-2147483-0.090.pkl', gnet=f'{model_root}/edm2-img512-xs-uncond-0134217-0.125.pkl', guidance=2.90), # fd_dinov2 = 90.39
    'edm2-img64-s-autog-fid':          dnnlib.EasyDict(net=f'{model_root}/edm2-img64-s-1073741-0.045.pkl',         gnet=f'{model_root}/edm2-img64-xs-0134217-0.110.pkl',         guidance=1.70), # fid = 1.01
    'edm2-img64-s-autog-dino':         dnnlib.EasyDict(net=f'{model_root}/edm2-img64-s-1073741-0.105.pkl',         gnet=f'{model_root}/edm2-img64-xs-0134217-0.175.pkl',         guidance=2.20), # fd_dinov2 = 31.85
}

#----------------------------------------------------------------------------
# EDM sampler from the paper
# "Elucidating the Design Space of Diffusion-Based Generative Models",
# extended to support classifier-free guidance.

def edm_sampler(
    net, noise, labels=None, gnet=None,
    num_steps=32, sigma_min=0.002, sigma_max=80, rho=7, guidance=1,
    S_churn=0, S_min=0, S_max=float('inf'), S_noise=1,
    dtype=torch.float32, randn_like=torch.randn_like,
    **kwargs,
):  
    # Guided denoiser.
    ################################################################################
    if kwargs.get('pred_type') == 'vanilla':
        def denoise(x, t):
            Dx = net(x, t, labels).to(dtype)
            if guidance == 1:
                return Dx
            ref_Dx = gnet(x, t, labels).to(dtype)
            return ref_Dx.lerp(Dx, guidance)
    elif kwargs.get('pred_type') == 'ag_noise':
        def denoise(x_cond, t_cond, x_uncond = None, t_uncond = None):
            x_uncond = x_cond if x_uncond is None else x_uncond
            t_uncond = t_cond if t_uncond is None else t_uncond

            Dx = net(x_cond, t_cond, labels).to(dtype)
            if guidance == 1:
                return Dx
            ref_Dx = gnet(x_uncond, t_uncond, labels).to(dtype)
            return ref_Dx.lerp(Dx, guidance)
    elif kwargs.get('pred_type') == 'mom':
        cond_ema = EMA(beta=kwargs.get('beta_1'))
        uncond_ema = EMA(beta=kwargs.get('beta_2'))
        def denoise(x, t):
            Dx = net(x, t, labels).to(dtype)
            Dx = cond_ema(Dx)
            if guidance == 1:
                return Dx
            ref_Dx = gnet(x, t, labels).to(dtype)
            ref_Dx = uncond_ema(ref_Dx)
            return ref_Dx.lerp(Dx, guidance)
    elif kwargs.get('pred_type') == 'egg':
        cond_ema = EMA(beta=kwargs.get('beta_1'))
        uncond_ema = EMA(beta=kwargs.get('beta_2'))
        def denoise(x, t):
            Dx = net(x, t, labels).to(dtype)
            Dx_ema = cond_ema(Dx)
            if guidance == 1:
                return Dx
            ref_Dx = gnet(x, t, labels).to(dtype)
            ref_Dx_ema = uncond_ema(ref_Dx)
            ref_Dx += kwargs.get('gamma') * (Dx_ema - ref_Dx_ema)
            return ref_Dx.lerp(Dx, guidance)
    elif kwargs.get('pred_type') == 'egg3':
        cond_ema = EMA(beta=kwargs.get('beta_1'))
        uncond_ema = EMA(beta=kwargs.get('beta_2'))
        def denoise(x, t):
            Dx = net(x, t, labels).to(dtype)
            Dx_ema_prev = cond_ema.m
            cond_ema(Dx)
            if guidance == 1:
                return Dx
            ref_Dx = gnet(x, t, labels).to(dtype)
            ref_Dx_ema_prev = uncond_ema.m
            uncond_ema(ref_Dx)
            ref_Dx += kwargs.get('gamma') * (Dx_ema_prev - ref_Dx_ema_prev)
            return ref_Dx.lerp(Dx, guidance)
    elif kwargs.get('pred_type') == 'apg':
        momentum_buffer = MomentumBuffer(momentum=kwargs.get('beta_1', -0.75))
        def denoise(x, t):
            Dx = net(x, t, labels).to(dtype)
            if guidance == 1:
                return Dx, None
            ref_Dx = gnet(x, t, labels).to(dtype)
            return Dx, ref_Dx
    elif kwargs.get('pred_type') == 'ig':
        def denoise(x, t):
            Dx = net(x, t, labels).to(dtype)
            if guidance == 1 or not (kwargs.get('sigma_start') < t.item() <= kwargs.get('sigma_end')):
                return Dx
            ref_Dx = gnet(x, t, labels).to(dtype)
            return ref_Dx.lerp(Dx, guidance)
    elif kwargs.get('pred_type') == 'fdg':
        def denoise(x, t):
            Dx = net(x, t, labels).to(dtype)
            if guidance == 1:
                return Dx
            ref_Dx = gnet(x, t, labels).to(dtype)
            return laplacian_guidance(
                Dx, ref_Dx, guidance_scale=[kwargs.get('w_high'), kwargs.get('w_low')], 
                # parallel_weights=[kwargs.get('w_par'), kwargs.get('w_par')],
            )
    elif kwargs.get('pred_type') == 'cfgpp':
        def denoise(x, t):
            Dx = net(x, t, labels).to(dtype)
            if guidance == 1:
                return Dx
            ref_Dx = gnet(x, t, labels).to(dtype)
            return ref_Dx.lerp(Dx, guidance), ref_Dx
    elif kwargs.get('pred_type') == 'tcfg':
        def denoise(x, t):
            Dx = net(x, t, labels).to(dtype)
            if guidance == 1:
                return Dx
            ref_Dx = gnet(x, t, labels).to(dtype)
            # --------------------------------------------------------------------------------
            all_noise = torch.stack((Dx, ref_Dx), dim=1).to(dtype=torch.float32) # (1,16,128,128) & (1,16,128,128) -> (1,2,16,128,128)
            all_noise = all_noise.reshape(all_noise.size(0), all_noise.size(1), -1) # (1,2,16,128,128) -> (1,2,262144)
            U, S, Vh = torch.linalg.svd(all_noise, full_matrices=False) # (1,2,262144) -> U.shape==(1,2,2), S.shape==(1,2), Vh.shape==(1,2,262144)
            Vh = Vh.to(all_noise.device)
            Vh_modified = Vh.clone().to(all_noise.device)
            Vh_modified[:,1] = 0
            noise_null_flat = ref_Dx.reshape(ref_Dx.size(0), 1, -1).to(dtype=torch.float32) # (1,16,128,128) -> (1,1,262144)
            noise_null_flat = noise_null_flat.to(Vh.device)
            x_Vh = torch.matmul(noise_null_flat, Vh.transpose(-2, -1)) # (1,1,262144) x (1,262144,2) -> (1,1,2)
            x_Vh_V = torch.matmul(x_Vh, Vh_modified) # (1,1,2) x (1,2,262144) -> (1,1,262144)
            ref_Dx = x_Vh_V.reshape(*ref_Dx.shape).to(Dx.dtype).to(Dx.device) # (1,1,262144) -> (1,16,128,128)
            # --------------------------------------------------------------------------------
            return ref_Dx.lerp(Dx, guidance)
    else:
        raise ValueError(f'Unknown pred_type: {kwargs.get("pred_type")}')
        
    ################################################################################

    # Time step discretization.
    step_indices = torch.arange(num_steps, dtype=dtype, device=noise.device)
    t_steps = (sigma_max ** (1 / rho) + step_indices / (num_steps - 1) * (sigma_min ** (1 / rho) - sigma_max ** (1 / rho))) ** rho
    t_steps = torch.cat([t_steps, torch.zeros_like(t_steps[:1])]) # t_N = 0

    # Main sampling loop.
    x_next = noise.to(dtype) * t_steps[0]
    for i, (t_cur, t_next) in enumerate(zip(t_steps[:-1], t_steps[1:])): # 0, ..., N-1
        x_cur = x_next

        # Increase noise temporarily.
        if S_churn > 0 and S_min <= t_cur <= S_max:
            gamma = min(S_churn / num_steps, np.sqrt(2) - 1)
            t_hat = t_cur + gamma * t_cur
            x_hat = x_cur + (t_hat ** 2 - t_cur ** 2).sqrt() * S_noise * randn_like(x_cur)
        ################################################################################
        elif kwargs.get('pred_type') == 'ag_noise' and S_min <= t_cur <= S_max:
            t_hat_cond = (0.01 + t_cur**2).sqrt()
            x_hat_cond = x_cur + (t_hat_cond ** 2 - t_cur ** 2).sqrt() * S_noise * randn_like(x_cur)
            t_hat_uncond = (0.04 + t_cur**2).sqrt()
            x_hat_uncond = x_cur + (t_hat_uncond ** 2 - t_cur ** 2).sqrt() * S_noise * randn_like(x_cur)
            t_hat = t_cur
            x_hat = x_cur
        ################################################################################
        else:
            t_hat = t_cur
            x_hat = x_cur

        # Euler step.
        ################################################################################
        if kwargs.get('pred_type') == 'apg':
            Dx, ref_Dx = denoise(x_hat, t_hat)
            Dx_apg = adaptive_projected_guidance(
                Dx, ref_Dx, guidance, momentum_buffer=momentum_buffer,
                eta=kwargs.get('eta', 0.0), norm_threshold=kwargs.get('r_scale', 2.5),
            )
            d_cur = (x_hat - Dx_apg) / t_hat
            x_next = x_hat + (t_next - t_hat) * d_cur
        elif kwargs.get('pred_type') == 'ag_noise':
            d_cur = (x_hat - denoise(x_hat_cond, t_hat_cond, x_hat_uncond, t_hat_uncond)) / t_hat
            x_next = x_hat + (t_next - t_hat) * d_cur
        ################################################################################
        elif kwargs.get('pred_type') == 'cfgpp':
            Dx_guid, ref_Dx = denoise(x_hat, t_hat)
            d_cur = (x_hat - Dx_guid) / t_hat
            d_ref = (x_hat - ref_Dx) / t_hat
            x_next = x_hat - t_hat * d_cur + t_next * d_ref
        else:
            d_cur = (x_hat - denoise(x_hat, t_hat)) / t_hat
            x_next = x_hat + (t_next - t_hat) * d_cur

        # Apply 2nd order correction.
        if i < num_steps - 1:
            ################################################################################
            if kwargs.get('pred_type') == 'apg':
                Dx, ref_Dx = denoise(x_next, t_next)
                Dx_apg = adaptive_projected_guidance(
                    Dx, ref_Dx, guidance, momentum_buffer=momentum_buffer,
                    eta=kwargs.get('eta', 0.0), norm_threshold=kwargs.get('r_scale', 2.5),
                )
                d_prime = (x_next - Dx_apg) / t_next
                x_next = x_hat + (t_next - t_hat) * (0.5 * d_cur + 0.5 * d_prime)
            elif kwargs.get('pred_type') == 'ag_noise':
                d_prime = (x_next - denoise(x_next, t_next)) / t_next
                x_next = x_hat + (t_next - t_hat) * (0.5 * d_cur + 0.5 * d_prime)
            ################################################################################
            elif kwargs.get('pred_type') == 'cfgpp':
                Dx_guid, ref_Dx = denoise(x_next, t_next)
                d_prime_cur = (x_next - Dx_guid) / t_next
                d_prime_ref = (x_next - ref_Dx) / t_next
                x_next = x_hat - t_hat * (0.5 * d_cur + 0.5 * d_prime_cur) + t_next * (0.5 * d_ref + 0.5 * d_prime_ref)
            else:
                d_prime = (x_next - denoise(x_next, t_next)) / t_next
                x_next = x_hat + (t_next - t_hat) * (0.5 * d_cur + 0.5 * d_prime)

    return x_next

#----------------------------------------------------------------------------
# Wrapper for torch.Generator that allows specifying a different random seed
# for each sample in a minibatch.

class StackedRandomGenerator:
    def __init__(self, device, seeds):
        super().__init__()
        self.generators = [torch.Generator(device).manual_seed(int(seed) % (1 << 32)) for seed in seeds]

    def randn(self, size, **kwargs):
        assert size[0] == len(self.generators)
        return torch.stack([torch.randn(size[1:], generator=gen, **kwargs) for gen in self.generators])

    def randn_like(self, input):
        return self.randn(input.shape, dtype=input.dtype, layout=input.layout, device=input.device)

    def randint(self, *args, size, **kwargs):
        assert size[0] == len(self.generators)
        return torch.stack([torch.randint(*args, size=size[1:], generator=gen, **kwargs) for gen in self.generators])

#----------------------------------------------------------------------------
# Generate images for the given seeds in a distributed fashion.
# Returns an iterable that yields
# dnnlib.EasyDict(images, labels, noise, batch_idx, num_batches, indices, seeds)

def generate_images(
    net,                                        # Main network. Path, URL, or torch.nn.Module.
    gnet                = None,                 # Guiding network. None = same as main network.
    encoder             = None,                 # Instance of training.encoders.Encoder. None = load from network pickle.
    outdir              = None,                 # Where to save the output images. None = do not save.
    subdirs             = False,                # Create subdirectory for every 1000 seeds?
    seeds               = range(16, 24),        # List of random seeds.
    class_idx           = None,                 # Class label. None = select randomly.
    max_batch_size      = 32,                   # Maximum batch size for the diffusion model.
    encoder_batch_size  = 4,                    # Maximum batch size for the encoder. None = default.
    verbose             = True,                 # Enable status prints?
    device              = torch.device('cuda'), # Which compute device to use.
    sampler_fn          = edm_sampler,          # Which sampler function to use.
    **sampler_kwargs,                           # Additional arguments for the sampler function.
):
    # Rank 0 goes first.
    if dist.get_rank() != 0:
        torch.distributed.barrier()

    # Load main network.
    if isinstance(net, str):
        if verbose:
            dist.print0(f'Loading main network from {net} ...')
        with dnnlib.util.open_url(net, verbose=(verbose and dist.get_rank() == 0)) as f:
            data = pickle.load(f)
        net = data['ema'].to(device)
        if encoder is None:
            encoder = data.get('encoder', None)
            if encoder is None:
                encoder = dnnlib.util.construct_class_by_name(class_name='training.encoders.StandardRGBEncoder')
    assert net is not None

    # Load guidance network.
    if isinstance(gnet, str):
        if verbose:
            dist.print0(f'Loading guiding network from {gnet} ...')
        with dnnlib.util.open_url(gnet, verbose=(verbose and dist.get_rank() == 0)) as f:
            gnet = pickle.load(f)['ema'].to(device)
    if gnet is None:
        gnet = net

    # Initialize encoder.
    assert encoder is not None
    if verbose:
        dist.print0(f'Setting up {type(encoder).__name__}...')
    encoder.init(device)
    if encoder_batch_size is not None and hasattr(encoder, 'batch_size'):
        encoder.batch_size = encoder_batch_size

    # Other ranks follow.
    if dist.get_rank() == 0:
        torch.distributed.barrier()

    # Divide seeds into batches.
    num_batches = max((len(seeds) - 1) // (max_batch_size * dist.get_world_size()) + 1, 1) * dist.get_world_size()
    rank_batches = np.array_split(np.arange(len(seeds)), num_batches)[dist.get_rank() :: dist.get_world_size()]
    if verbose:
        dist.print0(f'Generating {len(seeds)} images...')

    # Return an iterable over the batches.
    class ImageIterable:
        def __len__(self):
            return len(rank_batches)

        def __iter__(self):
            # Loop over batches.
            for batch_idx, indices in enumerate(rank_batches):
                r = dnnlib.EasyDict(images=None, labels=None, noise=None, batch_idx=batch_idx, num_batches=len(rank_batches), indices=indices)
                r.seeds = [seeds[idx] for idx in indices]
                if len(r.seeds) > 0:

                    ################################################################################
                    # Skip existing images.
                    image_exists = False
                    if outdir is not None:
                        for seed in r.seeds:
                            image_dir = os.path.join(outdir, f'{seed//1000*1000:06d}') if subdirs else outdir
                            os.makedirs(image_dir, exist_ok=True)
                            if not os.path.exists(os.path.join(image_dir, f'{seed:06d}.png')):
                                image_exists = True
                                break
                    
                    if not image_exists:
                        print(f"Skipping seeds: {r.seeds}")
                        continue
                    ################################################################################

                    # Pick noise and labels.
                    rnd = StackedRandomGenerator(device, r.seeds)
                    r.noise = rnd.randn([len(r.seeds), net.img_channels, net.img_resolution, net.img_resolution], device=device)
                    r.labels = None
                    if net.label_dim > 0:
                        r.labels = torch.eye(net.label_dim, device=device)[rnd.randint(net.label_dim, size=[len(r.seeds)], device=device)]
                        if class_idx is not None:
                            r.labels[:, :] = 0
                            r.labels[:, class_idx] = 1

                    # Generate images.
                    latents = dnnlib.util.call_func_by_name(func_name=sampler_fn, net=net, noise=r.noise,
                        labels=r.labels, gnet=gnet, randn_like=rnd.randn_like, **sampler_kwargs)
                    r.images = encoder.decode(latents)

                    # Save images.
                    if outdir is not None:
                        for seed, image in zip(r.seeds, r.images.permute(0, 2, 3, 1).cpu().numpy()):
                            image_dir = os.path.join(outdir, f'{seed//1000*1000:06d}') if subdirs else outdir
                            os.makedirs(image_dir, exist_ok=True)
                            PIL.Image.fromarray(image, 'RGB').save(os.path.join(image_dir, f'{seed:06d}.png'))

                # Yield results.
                torch.distributed.barrier() # keep the ranks in sync
                yield r

    return ImageIterable()

#----------------------------------------------------------------------------
# Parse a comma separated list of numbers or ranges and return a list of ints.
# Example: '1,2,5-10' returns [1, 2, 5, 6, 7, 8, 9, 10]

def parse_int_list(s):
    if isinstance(s, list):
        return s
    ranges = []
    range_re = re.compile(r'^(\d+)-(\d+)$')
    for p in s.split(','):
        m = range_re.match(p)
        if m:
            ranges.extend(range(int(m.group(1)), int(m.group(2))+1))
        else:
            ranges.append(int(p))
    return ranges

#----------------------------------------------------------------------------
# Command line interface.

@click.command()
@click.option('--preset',                   help='Configuration preset', metavar='STR',                             type=str, default=None)
@click.option('--net',                      help='Main network pickle filename', metavar='PATH|URL',                type=str, default=None)
@click.option('--gnet',                     help='Guiding network pickle filename', metavar='PATH|URL',             type=str, default=None)
@click.option('--outdir',                   help='Where to save the output images', metavar='DIR',                  type=str, required=True)
@click.option('--subdirs',                  help='Create subdirectory for every 1000 seeds',                        is_flag=True)
@click.option('--seeds',                    help='List of random seeds (e.g. 1,2,5-10)', metavar='LIST',            type=parse_int_list, default='16-19', show_default=True)
@click.option('--class', 'class_idx',       help='Class label  [default: random]', metavar='INT',                   type=click.IntRange(min=0), default=None)
@click.option('--batch', 'max_batch_size',  help='Maximum batch size', metavar='INT',                               type=click.IntRange(min=1), default=32, show_default=True)

@click.option('--steps', 'num_steps',       help='Number of sampling steps', metavar='INT',                         type=click.IntRange(min=1), default=32, show_default=True)
@click.option('--sigma_min',                help='Lowest noise level', metavar='FLOAT',                             type=click.FloatRange(min=0, min_open=True), default=0.002, show_default=True)
@click.option('--sigma_max',                help='Highest noise level', metavar='FLOAT',                            type=click.FloatRange(min=0, min_open=True), default=80, show_default=True)
@click.option('--rho',                      help='Time step exponent', metavar='FLOAT',                             type=click.FloatRange(min=0, min_open=True), default=7, show_default=True)
@click.option('--guidance',                 help='Guidance strength  [default: 1; no guidance]', metavar='FLOAT',   type=float, default=None)
@click.option('--S_churn', 'S_churn',       help='Stochasticity strength', metavar='FLOAT',                         type=click.FloatRange(min=0), default=0, show_default=True)
@click.option('--S_min', 'S_min',           help='Stoch. min noise level', metavar='FLOAT',                         type=click.FloatRange(min=0), default=0, show_default=True)
@click.option('--S_max', 'S_max',           help='Stoch. max noise level', metavar='FLOAT',                         type=click.FloatRange(min=0), default='inf', show_default=True)
@click.option('--S_noise', 'S_noise',       help='Stoch. noise inflation', metavar='FLOAT',                         type=float, default=1, show_default=True)

################################################################################
@click.option('--pred_type',                help='Model prediction type', metavar='STR',                            type=str, default=None)
# reg
@click.option('--beta_1',                   help='beta for ema 1', metavar='FLOAT',                                 type=float, default=0.9, show_default=True)
@click.option('--beta_2',                   help='beta for ema 2', metavar='FLOAT',                                 type=float, default=0.9, show_default=True)
@click.option('--gamma',                    help='ema scale for pred_type == egg', metavar='FLOAT',                 type=float, default=0.4, show_default=True)
# apg
@click.option('--eta',                      help='parallel scale for pred_type == apg', metavar='FLOAT',            type=float, default=0.0, show_default=True)
@click.option('--r_scale',                  help='scale for pred_type == apg', metavar='FLOAT',                     type=float, default=2.5, show_default=True)
# fdg
@click.option('--w_high',              help='scale for pred_type == apg', metavar='FLOAT',                     type=float, default=3.0, show_default=True)
@click.option('--w_low',                help='scale for pred_type == apg', metavar='FLOAT',                     type=float, default=1.0, show_default=True)
@click.option('--w_par',                help='scale for pred_type == apg', metavar='FLOAT',                     type=float, default=3.0, show_default=True)
# ig
@click.option('--sigma_start',              help='scale for pred_type == apg', metavar='FLOAT',                     type=float, default=0.28, show_default=True)
@click.option('--sigma_end',                help='scale for pred_type == apg', metavar='FLOAT',                     type=float, default=2.9, show_default=True)
################################################################################

def cmdline(preset, **opts):
    """Generate random images using the given model.

    Examples:

    \b
    # Generate a couple of images and save them as out/*.png
    python generate_images.py --preset=edm2-img512-s-guid-dino --outdir=out

    \b
    # Generate 50000 images using 8 GPUs and save them as out/*/*.png
    torchrun --standalone --nproc_per_node=8 generate_images.py \\
        --preset=edm2-img64-s-fid --outdir=out --subdirs --seeds=0-49999
    """
    opts = dnnlib.EasyDict(opts)

    # Apply preset.
    if preset is not None:
        if preset not in config_presets:
            raise click.ClickException(f'Invalid configuration preset "{preset}"')
        for key, value in config_presets[preset].items():
            if opts[key] is None:
                opts[key] = value

    # Validate options.
    if opts.net is None:
        raise click.ClickException('Please specify either --preset or --net')
    if opts.guidance is None or opts.guidance == 1:
        opts.guidance = 1
        opts.gnet = None
    elif opts.gnet is None:
        raise click.ClickException('Please specify --gnet when using guidance')
    ################################################################################
    if opts.pred_type == 'vanilla':
        opts.beta_1 = None
        opts.beta_2 = None
        opts.gamma = None
    elif opts.pred_type == 'mom':
        opts.gamma = None
        opts.beta_2 = None
    elif opts.pred_type == 'ag_noise':
        opts.beta_1 = None
        opts.beta_2 = None
        opts.gamma = None
    ################################################################################

    # Generate.
    dist.init()
    image_iter = generate_images(**opts)
    for _r in tqdm.tqdm(image_iter, unit='batch', disable=(dist.get_rank() != 0)):
        pass

#----------------------------------------------------------------------------

if __name__ == "__main__":
    cmdline()

#----------------------------------------------------------------------------
