# Copied from https://github.com/huggingface/diffusers/blob/fc6acb6b97e93d58cb22b5fee52d884d77ce84d8/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
# with the following modifications:
# - It uses the patched version of `ddim_step_with_logprob` from `ddim_with_logprob.py`. As such, it only supports the
#   `ddim` scheduler.
# - It returns all the intermediate latents of the denoising process as well as the log probs of each denoising step.

from typing import Any, Callable, Dict, List, Optional, Union

import math
import torch
import numpy as np
from smc_utils import compute_ess_from_log_w, normalize_log_weights, resampling_function, normalize_weights, adaptive_tempering

from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import (
    StableDiffusionPipeline,
    rescale_noise_cfg,
)
from .ddim_with_logprob import get_variance, ddim_step_with_mean, ddim_step_with_logprob, ddim_prediction_with_logprob

def _left_broadcast(t, shape):
    assert t.ndim <= len(shape)
    return t.reshape(t.shape + (1,) * (len(shape) - t.ndim)).broadcast_to(shape)

@torch.no_grad()
def pipeline_using_smc(
    self: StableDiffusionPipeline,
    prompt: Union[str, List[str]] = None,
    height: Optional[int] = None,
    width: Optional[int] = None,
    num_inference_steps: int = 50,
    guidance_scale: float = 7.5,
    negative_prompt: Optional[Union[str, List[str]]] = None,
    # num_images_per_prompt: Optional[int] = 1, # use batch_p instead
    eta: float = 0.0,
    generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
    latents: Optional[torch.FloatTensor] = None,
    prompt_embeds: Optional[torch.FloatTensor] = None,
    negative_prompt_embeds: Optional[torch.FloatTensor] = None,
    output_type: Optional[str] = "pil",
    return_dict: bool = True,
    callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
    callback_steps: int = 1,
    cross_attention_kwargs: Optional[Dict[str, Any]] = None,
    guidance_rescale: float = 0.0,
    # TDS parameters
    num_particles: int = 4,
    batch_p: int = 4, # number of particles to run parallely
    resample_strategy: str = None,
    ess_threshold: float = None,
    tempering: str = None,
    tempering_schedule: Union[float, int, str] = None,
    tempering_gamma: float = 0.,
    tempering_start: float = 0.,
    reward_fn: Callable[Union[torch.Tensor, np.ndarray], float] = None, # Ex) lambda images: _fn(images, prompts.repeat_interleave(batch_p, dim=0), metadata.repeat_interleave(batch_p, dim=0))
    penalty_type: str = "RKL",
    penalty_coeff: float = 1.,
    f_alpha: float = .5,
    omega: float = 1.,
):
    r"""
    Function invoked when calling the pipeline for generation.

    Args:
        prompt (`str` or `List[str]`, *optional*):
            The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
            instead.
        height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
            The height in pixels of the generated image.
        width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
            The width in pixels of the generated image.
        num_inference_steps (`int`, *optional*, defaults to 50):
            The number of denoising steps. More denoising steps usually lead to a higher quality image at the
            expense of slower inference.
        guidance_scale (`float`, *optional*, defaults to 7.5):
            Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
            `guidance_scale` is defined as `w` of equation 2. of [Imagen
            Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
            1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
            usually at the expense of lower image quality.
        negative_prompt (`str` or `List[str]`, *optional*):
            The prompt or prompts not to guide the image generation. If not defined, one has to pass
            `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
            less than `1`).
        num_images_per_prompt (`int`, *optional*, defaults to 1):
            The number of images to generate per prompt.
        eta (`float`, *optional*, defaults to 0.0):
            Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
            [`schedulers.DDIMScheduler`], will be ignored for others.
        generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
            One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
            to make generation deterministic.
        latents (`torch.FloatTensor`, *optional*):
            Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
            generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
            tensor will ge generated by sampling using the supplied random `generator`.
        prompt_embeds (`torch.FloatTensor`, *optional*):
            Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
            provided, text embeddings will be generated from `prompt` input argument.
        negative_prompt_embeds (`torch.FloatTensor`, *optional*):
            Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
            weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
            argument.
        output_type (`str`, *optional*, defaults to `"pil"`):
            The output format of the generate image. Choose between
            [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
        return_dict (`bool`, *optional*, defaults to `True`):
            Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a
            plain tuple.
        callback (`Callable`, *optional*):
            A function that will be called every `callback_steps` steps during inference. The function will be
            called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
        callback_steps (`int`, *optional*, defaults to 1):
            The frequency at which the `callback` function will be called. If not specified, the callback will be
            called at every step.
        cross_attention_kwargs (`dict`, *optional*):
            A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
            `self.processor` in
            [diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
        guidance_rescale (`float`, *optional*, defaults to 0.7):
            Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
            Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `φ` in equation 16. of
            [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf).
            Guidance rescale factor should fix overexposure when using zero terminal SNR.

    Examples:

    Returns:
        [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
        [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple.
        When returning a tuple, the first element is a list with the generated images, and the second element is a
        list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
        (nsfw) content, according to the `safety_checker`.
    """
    # 0. Default height and width to unet
    height = height or self.unet.config.sample_size * self.vae_scale_factor
    width = width or self.unet.config.sample_size * self.vae_scale_factor

    # 1. Check inputs. Raise error if not correct
    self.check_inputs(
        prompt,
        height,
        width,
        callback_steps,
        negative_prompt,
        prompt_embeds,
        negative_prompt_embeds,
    )

    # 2. Define call parameters
    if prompt is not None and isinstance(prompt, str):
        batch_size = 1
    elif prompt is not None and isinstance(prompt, list):
        batch_size = len(prompt)
    else:
        batch_size = prompt_embeds.shape[0]
    print("Batch size:", batch_size)

    device = self._execution_device
    # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
    # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
    # corresponds to doing no classifier free guidance.
    do_classifier_free_guidance = guidance_scale > 1.0
    print("Classifier guidance: ", do_classifier_free_guidance)

    # 3. Encode input prompt
    text_encoder_lora_scale = (
        cross_attention_kwargs.get("scale", None)
        if cross_attention_kwargs is not None
        else None
    )
    prompt_embeds = self._encode_prompt(
        prompt,
        device,
        batch_size*batch_p, # num_images_per_prompt
        do_classifier_free_guidance,
        negative_prompt,
        prompt_embeds=prompt_embeds,
        negative_prompt_embeds=negative_prompt_embeds,
        lora_scale=text_encoder_lora_scale,
    )

    # 4. Prepare timesteps
    self.scheduler.set_timesteps(num_inference_steps, device=device)
    timesteps = self.scheduler.timesteps

    # 5. Prepare latent variables
    num_channels_latents = self.unet.config.in_channels
    prop_latents = self.prepare_latents(
        batch_size*num_particles, # num_images_per_prompt
        num_channels_latents,
        height,
        width,
        prompt_embeds.dtype,
        device,
        generator,
        latents,
    ) # latents sampled from proposal p(x_T)

    # print(prompt_embeds.shape)
    # print(prompt_embeds==prompt_embeds[0][None, :])

    # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
    extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)

    # 7. Denoising loop using Sequential Monte Carlo with Twisted Proposal
    num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order

    # Define helper function for predicting noise in SMC sampling
    def _pred_noise(latents, t):
        # expand the latents if we are doing classifier free guidance
        latent_model_input = (
            torch.cat([latents] * 2) if do_classifier_free_guidance else latents
        )
        latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
        
        # predict the noise residual
        noise_pred = self.unet(
            latent_model_input,
            t,
            encoder_hidden_states=prompt_embeds,
            cross_attention_kwargs=cross_attention_kwargs,
            return_dict=False,
        )[0]

        # perform guidance
        if do_classifier_free_guidance:
            noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
            noise_pred = noise_pred_uncond + guidance_scale * (
                noise_pred_text - noise_pred_uncond
            )

        if do_classifier_free_guidance and guidance_rescale > 0.0:
            # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
            noise_pred = rescale_noise_cfg(
                noise_pred, noise_pred_text, guidance_rescale=guidance_rescale
            )
        
        guidance = noise_pred - noise_pred_uncond if do_classifier_free_guidance else torch.zeros_like(noise_pred, device=noise_pred.device)

        return noise_pred, guidance

    print("Safety Checker: ", self.safety_checker)
    def _decode(latents):
        # print(len(latents))
        image_un = self.vae.decode(
            latents / self.vae.config.scaling_factor
        ).sample

        return image_un

    # Set guidance for depending on f divergence penalty type
    penalty_coeff = torch.tensor(penalty_coeff, device=device).to(torch.float32)
    reward_lambda = 0.

    if penalty_type == "RKL":
        min_gap = 0.
        lookforward_fn = lambda r, reward_lambda: r / penalty_coeff
        guidance_fn = lambda r, r_guidance, reward_lambda: r_guidance / penalty_coeff
        
    elif penalty_type == "FKL":
        min_gap = 0.
        lookforward_fn = lambda r, reward_lambda: torch.log(penalty_coeff) - torch.log(reward_lambda - r)
        guidance_fn = lambda r, r_guidance, reward_lambda: r_guidance / (reward_lambda - r)

    elif penalty_type == "JSD":
        min_gap = - penalty_coeff * math.log(2)
        lookforward_fn = lambda r, reward_lambda: (r - reward_lambda) / penalty_coeff - torch.log(2 - torch.exp((r - reward_lambda) / penalty_coeff))
        guidance_fn = lambda r, r_guidance, reward_lambda: r_guidance * (1 + torch.exp((r - reward_lambda)/penalty_coeff) / (2 - torch.exp((r - reward_lambda)/penalty_coeff))) / penalty_coeff

    elif penalty_type == "alpha":
        assert f_alpha > 0. and f_alpha < 1., "alpha of alpha divergence should be in (0, 1)"
        f_alpha = torch.tensor(f_alpha, device=device).to(torch.float32)

        min_gap = - penalty_coeff / f_alpha # + penalty_coeff / f_alpha**2  # - penalty_coeff / f_alpha
        lookforward_fn = lambda r, reward_lambda: - torch.log(1 + (reward_lambda - r) * f_alpha / penalty_coeff) / f_alpha
        guidance_fn = lambda r, r_guidance, reward_lambda: r_guidance / (penalty_coeff + f_alpha*(reward_lambda - r))

    else:
        raise NotImplementedError("Invalid penalty type")

    def adaptively_set_lambda(rewards, scale_factor, log_Z, log_w, log_twist_func_prev, log_prob_diffusion, log_prob_proposal):
        sorted_rewards = torch.sort(rewards)[0]
        max_reward = sorted_rewards[-1].item()
        # reward_gap = (sorted_rewards[1:] - sorted_rewards[:-1]).mean().item()
        min_lambda = max_reward + min_gap + penalty_coeff # + reward_gap

        print("max reward: ", max_reward)
        print("min gap: ", min_gap)
        # print("reward gap: ", reward_gap)
        reward_lambda = torch.tensor(min_lambda, device=device)

        reward_lambda = torch.tensor(min_lambda + 0.01, device=device).requires_grad_(True)
        optimizer = torch.optim.Adam([reward_lambda], lr=1e-3)    

        with torch.enable_grad():
            for _ in range(500):
                prev_reward_lambda = reward_lambda.clone()

                optimizer.zero_grad()
                log_twist_func = lookforward_fn(rewards, reward_lambda)
                update_log_w = log_w + scale_factor*log_twist_func - log_twist_func_prev - log_prob_proposal + log_prob_diffusion
                loss = (log_Z + torch.logsumexp(update_log_w, dim=-1)).pow(2).mean() # target: Z=1 <=> log_Z=0
                # print(loss)
                loss.backward()
                # print(reward_lambda.grad)
                optimizer.step()

                reward_lambda.data = torch.clamp(reward_lambda.data, min=min_lambda)
                if (prev_reward_lambda - reward_lambda).abs().max() < 1e-8:
                    break
        
        if torch.isnan(reward_lambda):
            print("reward lambda nan")
            reward_lambda = min_lambda

        return reward_lambda.detach()

    # TDS 0) Intialize variables for TDS
    # self.unet.requires_grad_(True)
    # self.vae.requires_grad_(True)
    noise_pred = torch.zeros_like(prop_latents, device=device)
    guidance = torch.zeros_like(prop_latents, device=device)
    approx_guidance = torch.zeros_like(prop_latents, device=device)
    reward_guidance = torch.zeros_like(prop_latents, device=device)
    pred_original_sample = torch.zeros_like(prop_latents, device=device)
    scale_factor = torch.zeros(batch_size, device=device)
    min_scale_next = torch.zeros(batch_size, device=device)
    rewards = torch.zeros(prop_latents.shape[0], device=device)
    log_twist_func = torch.zeros(prop_latents.shape[0], device=device)
    log_twist_func_prev = torch.zeros(prop_latents.shape[0], device=device)
    log_Z = torch.zeros(batch_size, device=device)
    log_w = torch.zeros(prop_latents.shape[0], device=device)
    log_prob_diffusion = torch.zeros(prop_latents.shape[0], device=device)
    log_prob_proposal = torch.zeros(prop_latents.shape[0], device=device)
    resample_fn = resampling_function(resample_strategy=resample_strategy, ess_threshold=ess_threshold)
    # Deactivate clipping as in https://arxiv.org/abs/2302.11552?
    # Note: Original implememtation of TDS from https://github.com/blt2114/twisted_diffusion_sampler use clipping
    # In TDS implementation, clipping is done for sampling(proposal), but no clipping for pred_x0 when calculating twist function!
    # TODO Check if clipping helps
    all_latents = []
    all_log_w = []
    all_resample_indices = []
    ess_trace = []
    scale_factor_trace = []
    rewards_trace = []
    manifold_deviation_trace = torch.tensor([], device=device)
    log_prob_diffusion_trace = torch.tensor([], device=device)

    start = int(len(timesteps)*tempering_start)

    def _calc_rewards():
        if (i >= start):
            for idx in range(math.ceil(num_particles / batch_p)):
                tmp_latents = latents[batch_p*idx : batch_p*(idx+1)].to(torch.float32)
                
                # TDS 1) Noise prediction and predicted x_0
                tmp_noise_pred, tmp_guidance = _pred_noise(tmp_latents, t)
                    
                tmp_pred_original_sample, _ = ddim_prediction_with_logprob(
                    self.scheduler, tmp_noise_pred, t, tmp_latents, **extra_step_kwargs
                )

                # Calculate rewards
                print(tmp_pred_original_sample.shape)
                tmp_rewards = reward_fn(_decode(tmp_pred_original_sample)).to(torch.float32)

                pred_original_sample[batch_p*idx : batch_p*(idx+1)] = tmp_pred_original_sample
                rewards[batch_p*idx : batch_p*(idx+1)] = tmp_rewards
            
                noise_pred[batch_p*idx : batch_p*(idx+1)] = tmp_noise_pred
                guidance[batch_p*idx : batch_p*(idx+1)] = tmp_guidance

    # def _calc_rewards():
    #     if (i >= start):
    #         with torch.enable_grad():
    #             for idx in range(math.ceil(num_particles / batch_p)):
    #                 tmp_latents = latents[batch_p*idx : batch_p*(idx+1)].detach().requires_grad_(True)

    #                 # TDS 1) Noise prediction and predicted x_0
    #                 tmp_noise_pred, tmp_guidance = _pred_noise(tmp_latents, t)
                    
    #                 tmp_pred_original_sample, _ = ddim_prediction_with_logprob(
    #                     self.scheduler, tmp_noise_pred, t, tmp_latents, **extra_step_kwargs
    #                 )

    #                 # Calculate rewards
    #                 tmp_rewards = reward_fn(_decode(tmp_pred_original_sample)).to(torch.float32)
    #                 tmp_approx_guidance = torch.autograd.grad(outputs=tmp_rewards, inputs=tmp_latents, grad_outputs=torch.ones_like(tmp_rewards))[0].detach()
    #                 # grad r(x)

    #                 pred_original_sample[batch_p*idx : batch_p*(idx+1)] = tmp_pred_original_sample.detach()
    #                 rewards[batch_p*idx : batch_p*(idx+1)] = tmp_rewards.detach().to(torch.float32)
    #                 reward_guidance[batch_p*idx : batch_p*(idx+1)] = tmp_approx_guidance.to(torch.float32)
                    
    #                 noise_pred[batch_p*idx : batch_p*(idx+1)] = tmp_noise_pred.detach()
    #                 guidance[batch_p*idx : batch_p*(idx+1)] = tmp_guidance.detach()
            
    #         print("NaN in reward_guidance: ", torch.isnan(reward_guidance).any())
    #         reward_guidance[:] = torch.nan_to_num(reward_guidance)

    #     else:
    #         for idx in range(math.ceil(num_particles / batch_p)):
    #             tmp_latents = latents[batch_p*idx : batch_p*(idx+1)]
    #             tmp_noise_pred, tmp_guidance = _pred_noise(tmp_latents, t)

    #             noise_pred[batch_p*idx : batch_p*(idx+1)] = tmp_noise_pred
    #             guidance[batch_p*idx : batch_p*(idx+1)] = tmp_guidance

    def _calc_guidance():
        if (i >= max(start, 1)):

            with torch.enable_grad():
                for idx in range(math.ceil(num_particles / batch_p)): 
                    tmp_latents = latents[batch_p*idx : batch_p*(idx+1)].detach().to(torch.float32).requires_grad_(True)
                    # TDS 1) Noise prediction and predicted x_0
                    tmp_noise_pred, tmp_guidance = _pred_noise(tmp_latents, t)

                    tmp_pred_original_sample, _ = ddim_prediction_with_logprob(
                        self.scheduler, tmp_noise_pred, t, tmp_latents, **extra_step_kwargs
                    )

                    # Calculate rewards
                    tmp_rewards = reward_fn(_decode(tmp_pred_original_sample)).to(torch.float32)
                    tmp_log_twist_func = lookforward_fn(tmp_rewards, reward_lambda).to(torch.float32)
        
                    # Calculate approximate guidance noise for maximizing reward
                    tmp_approx_guidance = torch.autograd.grad(outputs=tmp_log_twist_func, inputs=tmp_latents, grad_outputs=torch.ones_like(tmp_log_twist_func))[0].detach()
                    
                    # print(tmp_approx_guidance.dtype)
                    # tmp_reward_guidance = torch.autograd.grad(outputs=tmp_rewards, inputs=tmp_latents, grad_outputs=torch.ones_like(tmp_rewards))[0].detach()
                    # print(tmp_reward_guidance.dtype)
                    # print("Difference between two guidances: ", (tmp_approx_guidance - tmp_reward_guidance).abs().mean())

                    pred_original_sample[batch_p*idx : batch_p*(idx+1)] = tmp_pred_original_sample.detach().clone()
                    rewards[batch_p*idx : batch_p*(idx+1)] = tmp_rewards.detach().clone()
                    
                    noise_pred[batch_p*idx : batch_p*(idx+1)] = tmp_noise_pred.detach().clone()
                    guidance[batch_p*idx : batch_p*(idx+1)] = tmp_guidance.detach().clone()

                    log_twist_func[batch_p*idx : batch_p*(idx+1)] = tmp_log_twist_func.detach().clone()
                    approx_guidance[batch_p*idx : batch_p*(idx+1)] = tmp_approx_guidance.clone()
            
            print(torch.isnan(approx_guidance).any())
            log_twist_func[:] = torch.nan_to_num(log_twist_func)
            approx_guidance[:] = torch.nan_to_num(approx_guidance)

        else:
            for idx in range(math.ceil(num_particles / batch_p)):
                tmp_latents = latents[batch_p*idx : batch_p*(idx+1)].detach().requires_grad_(True)
                with torch.no_grad():
                    tmp_latents = latents[batch_p*idx : batch_p*(idx+1)].detach()
                    tmp_noise_pred, tmp_guidance = _pred_noise(tmp_latents, t)

                noise_pred[batch_p*idx : batch_p*(idx+1)] = tmp_noise_pred.detach().clone()
                guidance[batch_p*idx : batch_p*(idx+1)] = tmp_guidance.detach().clone()
        
        # torch.set_printoptions(precision=6)
        print(rewards)
        print(log_twist_func)

    with self.progress_bar(total=num_inference_steps) as progress_bar:
        for i, t in enumerate(timesteps):
            print(t)
            prev_timestep = (
                t - self.scheduler.config.num_train_timesteps // self.scheduler.num_inference_steps
            )
            # to prevent OOB on gather
            prev_timestep = torch.clamp(prev_timestep, 0, self.scheduler.config.num_train_timesteps - 1)

            prop_latents = prop_latents.detach()
            latents = prop_latents.clone()
            log_twist_func_prev = log_twist_func.clone() # Used to calculate weight later

            if penalty_type != "RKL" and i > 0:
                _calc_rewards()

            with torch.no_grad():
                if (i >= start):
                    # TDS 4) Calculate weights for samples from proposal distribution

                    print(tempering)
                    if isinstance(tempering_schedule, float) or isinstance(tempering_schedule, int):
                        min_scale = torch.tensor([min((tempering_gamma * (i - start))**tempering_schedule, 1.)]*batch_size, device=device)
                        min_scale_next = torch.tensor([min(tempering_gamma * (i + 1 - start), 1.)]*batch_size, device=device)
                    elif tempering_schedule == "exp":
                        min_scale = torch.tensor([min((1 + tempering_gamma) ** (i - start) - 1, 1.)]*batch_size, device=device)
                        min_scale_next = torch.tensor([min((1 + tempering_gamma) ** (i + 1 - start) - 1, 1.)]*batch_size, device=device)
                    elif tempering_schedule == "adaptive":
                        min_scale = scale_factor.clone()
                    else:
                        min_scale = torch.tensor([1.]*batch_size, device=device)
                        min_scale_next = torch.tensor([1.]*batch_size, device=device)
                    
                    if tempering == "adaptive" and i > 0 and (min_scale < 1.).any():
                        scale_factor = adaptive_tempering(log_w.view(-1, num_particles), log_prob_diffusion.view(-1, num_particles), log_twist_func.view(-1, num_particles), log_prob_proposal.view(-1, num_particles), log_twist_func_prev.view(-1, num_particles), min_scale=min_scale, ess_threshold=ess_threshold)
                        # if tempering_schedule == "adaptive":
                        min_scale_next = scale_factor.clone()
                    elif tempering == "adaptive" and i == 0:
                        pass
                    elif tempering == "FreeDoM":
                        scale_factor = (guidance ** 2).mean().sqrt() / (approx_guidance ** 2).mean().sqrt()
                        scale_factor = torch.tensor([scale_factor]*batch_size, device=device)
                        min_scale_next = scale_factor.clone()
                    elif tempering == "schedule":
                        scale_factor = min_scale
                    else:
                        scale_factor = torch.ones(batch_size, device=device)
                    print(scale_factor)
                    scale_factor_trace.append(scale_factor.cpu())

                    if penalty_type != "RKL" and i > 0:
                        reward_lambda = adaptively_set_lambda(rewards, scale_factor.repeat_interleave(num_particles, dim=0), log_Z, log_w, log_twist_func_prev, log_prob_diffusion, log_prob_proposal)
                        print("reward_lambda: ", reward_lambda)
                    _calc_guidance()
                    rewards_trace.append(rewards.view(-1, num_particles).max(dim=1)[0].cpu())

                    # reward_guidance[:] = guidance_fn(rewards.view(-1, 1, 1, 1), reward_guidance, reward_lambda)
                    # print("Difference between two guidances: ", (1-self.scheduler.alphas_cumprod.gather(0, t.cpu()))*((approx_guidance - reward_guidance)**2).mean())

                    print((noise_pred**2).mean().sqrt())
                    print((guidance ** 2).mean().sqrt())
                    print(approx_guidance.device)
                    print(self.scheduler.alphas_cumprod.device)
                    print(self.scheduler.alphas_cumprod.gather(0, t))
                    print((1-self.scheduler.alphas_cumprod.gather(0, t))*(approx_guidance ** 2).mean().sqrt())

                    log_twist_func *= scale_factor.repeat_interleave(num_particles, dim=0)
                    approx_guidance *= scale_factor.repeat_interleave(num_particles, dim=0).view([-1] + [1]*(approx_guidance.dim()-1)) # min_scale_next.repeat_interleave(num_particles, dim=0).view([-1] + [1]*(approx_guidance.dim()-1))

                    print((1-self.scheduler.alphas_cumprod.gather(0, t))*(approx_guidance ** 2).mean().sqrt())

                    incremental_log_w = log_prob_diffusion + log_twist_func - log_prob_proposal - log_twist_func_prev
                    # print(log_twist_func)
                    print(log_prob_diffusion - log_prob_proposal)
                    print(log_twist_func - log_twist_func_prev)
                    # print(((log_prob_diffusion - log_prob_proposal) - (log_twist_func - log_twist_func_prev)).abs().mean())
                    log_w += incremental_log_w.detach()
                    log_Z += torch.logsumexp(log_w, dim=-1)
                    print("log Z: ", log_Z)
                    print(incremental_log_w)
                    # print(log_prob_fn(prop_latents)) # print(normalize_weights(log_prob_fn(prop_latents)))
                    # print(log_twist_func) # print(normalize_weights(log_twist_func))
                    # print(log_prob_proposal) #  print(normalize_weights(log_prob_proposal))
                    # print(log_twist_func_prev) # print(normalize_weights(log_twist_func_prev))
                    # print(normalize_weights(incremental_log_w))
                    ess = [compute_ess_from_log_w(log_w_prompt).item() for log_w_prompt in log_w.view(-1, num_particles)]
                    print(ess)

                    all_log_w.append(log_w)
                    ess_trace.append(torch.tensor(ess).cpu())

                    # TDS 5) resample latents and corresponding variables
                    resample_indices, is_resampled, log_w = resample_fn(log_w.view(-1, num_particles))
                    log_w = log_w.view(-1)
                    print(resample_indices)
                    all_resample_indices.append(resample_indices)
                    # Note: log_w is updated to 0 for batches with is_resampled==True in resample_fn

                    latents = latents.detach().view(-1, num_particles, *latents.shape[1:])[torch.arange(latents.size(0)//num_particles).unsqueeze(1), resample_indices].view(-1, *latents.shape[1:])
                    noise_pred = noise_pred.view(-1, num_particles, *noise_pred.shape[1:])[torch.arange(noise_pred.size(0)//num_particles).unsqueeze(1), resample_indices].view(-1, *noise_pred.shape[1:])
                    pred_original_sample = pred_original_sample.view(-1, num_particles, *pred_original_sample.shape[1:])[torch.arange(pred_original_sample.size(0)//num_particles).unsqueeze(1), resample_indices].view(-1, *pred_original_sample.shape[1:])
                    manifold_deviation_trace = manifold_deviation_trace.view(-1, num_particles, *manifold_deviation_trace.shape[1:])[torch.arange(manifold_deviation_trace.size(0)//num_particles).unsqueeze(1), resample_indices].view(-1, *manifold_deviation_trace.shape[1:])
                    log_prob_diffusion_trace = log_prob_diffusion_trace.view(-1, num_particles, *log_prob_diffusion_trace.shape[1:])[torch.arange(log_prob_diffusion_trace.size(0)//num_particles).unsqueeze(1), resample_indices].view(-1, *log_prob_diffusion_trace.shape[1:])
                    
                # TDS 6) Sample from proposal distribution
                
                prev_sample, prev_sample_mean = ddim_step_with_mean(
                    self.scheduler, noise_pred, t, latents, **extra_step_kwargs
                )

                variance = get_variance(self.scheduler, t, prev_timestep)
                print(eta * variance.sqrt())
                variance = eta**2 * _left_broadcast(variance, prev_sample.shape).to(device)
                std_dev_t = variance.sqrt()

                prop_latents = prev_sample + variance * approx_guidance
                manifold_deviation_trace = torch.cat([manifold_deviation_trace, ((variance * approx_guidance * (-noise_pred)).view(num_particles, -1).sum(dim=1).abs() / (noise_pred**2).view(num_particles, -1).sum(dim=1).sqrt()).unsqueeze(1)], dim=1)
                
                print(((variance * approx_guidance) ** 2).sqrt().mean())
                log_prob_diffusion = -0.5 * (prop_latents - prev_sample_mean).pow(2) / variance - torch.log(std_dev_t) - torch.log(torch.sqrt(2 * torch.as_tensor(math.pi)))
                log_prob_diffusion = log_prob_diffusion.sum(dim=tuple(range(1, log_prob_diffusion.ndim)))
                log_prob_proposal = -0.5 * (prop_latents - prev_sample_mean - variance * approx_guidance).pow(2) / variance - torch.log(std_dev_t) - torch.log(torch.sqrt(2 * torch.as_tensor(math.pi)))
                log_prob_proposal = log_prob_proposal.sum(dim=tuple(range(1, log_prob_proposal.ndim)))
                log_prob_diffusion[:] = torch.nan_to_num(log_prob_diffusion, nan=-1e6)
                log_prob_proposal[:] = torch.nan_to_num(log_prob_proposal, nan=1e6)
                
                log_prob_diffusion_trace = torch.cat([log_prob_diffusion_trace, (log_prob_diffusion_trace.transpose(0, 1)[-1] + log_prob_diffusion).unsqueeze(1)], dim=1) if i > 0 else log_prob_diffusion.unsqueeze(1)

                # call the callback, if provided
                if i > num_warmup_steps and i % self.scheduler.order == 0:
                    progress_bar.update()
                    if callback is not None and i % callback_steps == 0:
                        callback(i, t, latents)

        # TDS 7) Weights for Final samples
        latents = prop_latents.detach()
        log_twist_func_prev = log_twist_func.clone()
        image = []
        for idx in range(math.ceil(num_particles / batch_p)):
            tmp_latents = latents[batch_p*idx : batch_p*(idx+1)]
            tmp_image = _decode(tmp_latents).detach()
            image.append(tmp_image)
            tmp_rewards = reward_fn(tmp_image).detach().to(torch.float32)
            rewards[batch_p*idx : batch_p*(idx+1)] = tmp_rewards
        scale_factor = min_scale_next
        if penalty_type != "RKL":
            reward_lambda = adaptively_set_lambda(rewards, scale_factor.repeat_interleave(num_particles, dim=0), log_Z, log_w, log_twist_func_prev, log_prob_diffusion, log_prob_proposal)
            print(reward_lambda)
        log_twist_func[:] = lookforward_fn(rewards, reward_lambda)

        scale_factor_trace.append(min_scale_next.cpu())
        rewards_trace.append(rewards.view(-1, num_particles).max(dim=1)[0].cpu())

        log_w += log_prob_diffusion + log_twist_func - log_prob_proposal - log_twist_func_prev
        log_Z += torch.logsumexp(log_w, dim=-1)
        print("log Z: ", log_Z)
        normalized_w = normalize_weights(log_w.view(-1, num_particles), dim=-1).view(-1)
        ess = [compute_ess_from_log_w(log_w_prompt) for log_w_prompt in log_w.view(-1, num_particles)]
        image = torch.cat(image, dim=0)

        all_log_w.append(log_w)
        ess_trace.append(torch.tensor(ess).cpu())

        progress_bar.update()
        if callback is not None:
            callback(timesteps, 0, latents)

    image = image[torch.argmax(log_w)].unsqueeze(0) # return only image with maximum weight
    latent = latents[torch.argmax(log_w)].unsqueeze(0)

    # Offload last model to CPU
    if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
        self.final_offload_hook.offload()

    return latent
