from typing import List
from jaxtyping import Float
import os
import torch
from torch import nn
from einops import rearrange, repeat, reduce
import torch.nn.functional as F
from torch import Tensor
import nerfacc
import numpy as np
from src.utils.project import ray_sample


class VolumeNERF(nn.Module):
    def __init__(
        self,
        feature_space,
        resolution,
        N_samples,
        image_transformer = None,
        query_transformer = None,
        resnet = None,
        implicit_network=None,
        rendering_network=None,
        density_network=None,
        perturb = False,
        density_scale = 1.,
        bg_color = "white"
    ):
        super().__init__()

        self.feature_space = feature_space
        self.resolution = resolution
        self.N_samples = N_samples
        self.image_transformer = image_transformer
        self.query_transformer = query_transformer  
        self.resnet = resnet

        self.implicit_network = implicit_network
        self.rendering_network = rendering_network
        self.density_network = density_network
        self.perturb = perturb
        self.density_scale = density_scale
        self.bg_color = bg_color
        self.aabb_box = torch.as_tensor([-1, -1, -1, 1, 1, 1], dtype=torch.float32)
        self.estimator = nerfacc.OccGridEstimator(
            resolution=resolution,
            roi_aabb=self.aabb_box,
            levels=1
        )
        self.estimator.occs.fill_(True)
        self.estimator.binaries.fill_(True)
        self.render_step_size = np.sqrt(3) * 2 / self.N_samples
        # more effience sampling ---> sample by last stage density

    def set_use_memory_efficient_attention_xformers(
        self, valid: bool, attention_op=None
    ) -> None:
        def fn_recursive_set_mem_eff(module: torch.nn.Module):
            if hasattr(module, "set_use_memory_efficient_attention_xformers"):
                module.set_use_memory_efficient_attention_xformers(valid, attention_op)

            for child in module.children():
                fn_recursive_set_mem_eff(child)

        for module in self.children():
            if isinstance(module, torch.nn.Module):
                fn_recursive_set_mem_eff(module)

    def query_triplane(self,
        images: Float[Tensor, "B F C H W"],
        time_embeddings: Float[Tensor, "B F D"] = None,
    ):
        b, f, c, h, w = images.shape
        volume = self.feature_space.volume
        volume_size = self.feature_space.volume_size
        volume = repeat(volume, 'd h w c -> b (d h w) c', b=b)
        images = rearrange(images, 'b f c h w -> b (f h w) c')
        volume = self.query_transformer(volume, images, time_embeddings).sample # b (n v v) d
        volume = rearrange(volume, 'b (d h w) c -> b c d h w', d=volume_size, h=volume_size, w=volume_size)
        if self.resnet:
            volume = self.resnet(volume, time_embeddings)
        volume = rearrange(volume, 'b c d h w -> b d h w c')
        # print(volume.shape)
        return volume
    
    def volume_rendering(self, 
        sdf,
        normals,
        rgbs,
        t_starts,
        t_ends,
        t_positions,
        ray_indices,
        n_rays
        ):
        density = self.density_network(sdf)
        weights_, trans_, _ = nerfacc.render_weight_from_density(
            t_starts[..., 0],
            t_ends[..., 0],
            density[..., 0],
            ray_indices=ray_indices,
            n_rays=n_rays,
        )

        weights = weights_[..., None]

        acc = nerfacc.accumulate_along_rays(
            weights[..., 0], values=None, ray_indices=ray_indices, n_rays=n_rays
        )

        depth = nerfacc.accumulate_along_rays(
            weights[..., 0], values=t_positions, ray_indices=ray_indices, n_rays=n_rays
        )
        color = nerfacc.accumulate_along_rays(
            weights[..., 0], values=rgbs, ray_indices=ray_indices, n_rays=n_rays
        )
        normals = nerfacc.accumulate_along_rays(
            weights[..., 0], values=normals, ray_indices=ray_indices, n_rays=n_rays
        )
        normals = F.normalize(normals, p=2, dim=-1)

        if self.bg_color == "white":
            bg_color = torch.ones_like(color)

        color = color + (1.0 - acc) * bg_color
        # color to [-1, 1]
        # color = color * 2 - 1
        return {
            "rgb": color,
            "depth": depth,
            "mask": acc,
            "normal": normals
        }


    def render(self, 
        target_cameras: Float[Tensor, "B F 32"],
        time_embeddings: Float[Tensor, "B F D"] = None,
        volume: Float[Tensor, "B C D H W"] = None,
    ):
        b, f, _ = target_cameras.shape
        instrinsics = target_cameras[..., :16].reshape(-1, 4, 4)
        c2w = target_cameras[..., 16:].reshape(-1, 4, 4)
        ray_origins, ray_dirs = ray_sample(c2w, instrinsics[:, :3, :3], self.resolution)
        ray_origins_flattened = ray_origins.reshape(-1, 3)
        ray_dirs_flattened = ray_dirs.reshape(-1, 3)
        n_rays = ray_origins_flattened.shape[0]

        with torch.no_grad():
            ray_indices, t_starts, t_ends = self.estimator.sampling(
                ray_origins_flattened, ray_dirs_flattened, 
                render_step_size=self.render_step_size,
            )

        t_starts, t_ends = t_starts[..., None], t_ends[..., None]
        t_origins = ray_origins_flattened[ray_indices]
        t_dirs = ray_dirs_flattened[ray_indices]
        t_positions = (t_starts + t_ends) / 2.0

        xyzs = t_origins + t_dirs * t_positions # [N, 3]

        sampled_features = self.feature_space.sample_features(xyzs, volume)
        density, feature_vectors, grads = self.implicit_network.get_outputs(xyzs, sampled_features) # TODO: add timesteps support
        
        normals = grads / (grads.norm(2, -1, keepdim=True) + 1e-6)

        rgb = self.rendering_network(t_dirs, feature_vectors, normals) # TODO: add timesteps support
        # rgb: [B, H, W, 3]
        outputs = self.volume_rendering(density, normals, rgb, t_starts, t_ends, t_positions, ray_indices, n_rays)

        for k, v in outputs.items():
            outputs[k] = rearrange(v, "(b f h w) c -> b f c h w", h=self.resolution, w=self.resolution, f=f)
        outputs.update(
            {
                "sdf_grad": grads
            }
        )
        return outputs

    def forward(self,
        images: Float[Tensor, "B F C H W"],
        target_cameras: Float[Tensor, "B F 32"],
        time_embeddings: Float[Tensor, "B F D"] = None
    ):
        images = images * 2 - 1
        if self.image_transformer is not None:
            features = self.image_transformer(images, time_embeddings)
        # 查看显存占用
        # print(torch.cuda.memory_summary())
        volume = self.query_triplane(features, time_embeddings)
        volume = rearrange(volume, 'b d h w c -> b c d h w')
        # print(torch.cuda.memory_summary())
        output = self.render(target_cameras, time_embeddings, volume)
        # output: {"rgb": xx, "depth": xx, "mask": xx, "normal": xx}
        return output
        