from typing import Optional

import torch
import torch.nn as nn
from diffusers.configuration_utils import ConfigMixin, register_to_config
from diffusers import ModelMixin
from torch import Tensor
from timm.models.vision_transformer import Attention, LayerScale, DropPath, Mlp

from .point_cloud_model import PointCloudModel


class PointCloudModelBlock(nn.Module):
    def __init__(
        self,
        *,
        # Point cloud model
        dim: int,
        model_type: str = "pvcnn",
        dropout: float = 0.1,
        width_multiplier: int = 1,
        voxel_resolution_multiplier: int = 1,
        # Transformer model
        num_heads=6,
        mlp_ratio=4.0,
        qkv_bias=False,
        drop=0.0,
        attn_drop=0.0,
        init_values=None,
        drop_path=0.0,
        act_layer=nn.GELU,
        norm_layer=nn.LayerNorm,
        use_attn=False
    ):
        super().__init__()

        # Point cloud model
        self.norm0 = norm_layer(dim)
        self.point_cloud_model = PointCloudModel(
            model_type=model_type,
            in_channels=dim,
            out_channels=dim,
            embed_dim=dim,
            dropout=dropout,
            width_multiplier=width_multiplier,
            voxel_resolution_multiplier=voxel_resolution_multiplier,
        )
        self.ls0 = (
            LayerScale(dim, init_values=init_values) if init_values else nn.Identity()
        )
        self.drop_path0 = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()

        # Attention
        self.use_attn = use_attn
        if self.use_attn:
            self.norm1 = norm_layer(dim)
            self.attn = Attention(
                dim,
                num_heads=num_heads,
                qkv_bias=qkv_bias,
                attn_drop=attn_drop,
                proj_drop=drop,
            )
            self.ls1 = (
                LayerScale(dim, init_values=init_values)
                if init_values
                else nn.Identity()
            )
            self.drop_path1 = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()

        # MLP
        self.norm2 = norm_layer(dim)
        self.mlp = Mlp(
            in_features=dim,
            hidden_features=int(dim * mlp_ratio),
            act_layer=act_layer,
            drop=drop,
        )
        self.ls2 = (
            LayerScale(dim, init_values=init_values) if init_values else nn.Identity()
        )
        self.drop_path2 = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()

    def apply_point_cloud_model(self, x: Tensor, t: Optional[Tensor] = None) -> Tensor:
        t = (
            t
            if t is not None
            else torch.zeros(len(x), device=x.device, dtype=torch.long)
        )
        return self.point_cloud_model(x, t)

    def forward(self, x: Tensor):
        x = x + self.drop_path0(self.ls0(self.apply_point_cloud_model(self.norm0(x))))
        if self.use_attn:
            x = x + self.drop_path1(self.ls1(self.attn(self.norm1(x))))
        x = x + self.drop_path2(self.ls2(self.mlp(self.norm2(x))))
        return x


class PointCloudTransformerModel(ModelMixin, ConfigMixin):
    @register_to_config
    def __init__(
        self,
        num_layers: int,
        in_channels: int = 3,
        out_channels: int = 3,
        embed_dim: int = 64,
        **kwargs
    ):
        super().__init__()
        self.num_layers = num_layers
        self.input_projection = nn.Linear(in_channels, embed_dim)
        self.blocks = nn.Sequential(
            *[
                PointCloudModelBlock(dim=embed_dim, **kwargs)
                for i in range(self.num_layers)
            ]
        )
        self.norm = nn.LayerNorm(embed_dim)
        self.output_projection = nn.Linear(embed_dim, out_channels)

    def forward(self, inputs: Tensor) -> Tensor:
        """Receives input of shape (B, N, in_channels) and returns output
        of shape (B, N, out_channels)"""
        x = self.input_projection(inputs)
        x = self.blocks(x)
        x = self.output_projection(x)
        return x
