from functools import partial
import torch
import torch.nn as nn
from opencood.utils.sparse_utils import spconv, replace_feature
try: # spconv1
    from spconv import SparseSequential, SubMConv3d, SparseConv3d, SparseInverseConv3d, SparseConvTensor
except: # spconv2
    from spconv.pytorch import  SparseSequential, SubMConv3d, SparseConv3d, SparseInverseConv3d, SparseConvTensor

def post_act_block(in_channels, out_channels, kernel_size, indice_key=None, stride=1, padding=0,
                   conv_type='subm', norm_fn=None):

    if conv_type == 'subm':
        conv = SubMConv3d(in_channels, out_channels, kernel_size, bias=False, indice_key=indice_key)
    elif conv_type == 'spconv':
        conv = SparseConv3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding,
                                   bias=False, indice_key=indice_key)
    elif conv_type == 'inverseconv':
        conv = SparseInverseConv3d(in_channels, out_channels, kernel_size, indice_key=indice_key, bias=False)
    else:
        raise NotImplementedError

    m = SparseSequential(
        conv,
        norm_fn(out_channels),
        nn.ReLU(),
    )

    return m

class SparseBasicBlock(spconv.SparseModule):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None, indice_key=None, norm_fn=None):
        super(SparseBasicBlock, self).__init__()
        self.conv1 = spconv.SubMConv3d(
            inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False, indice_key=indice_key
        )
        self.bn1 = norm_fn(planes)
        self.relu = nn.ReLU()
        self.conv2 = spconv.SubMConv3d(
            planes, planes, kernel_size=3, stride=1, padding=1, bias=False, indice_key=indice_key
        )
        self.bn2 = norm_fn(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x.features

        assert x.features.dim() == 2, 'x.features.dim()=%d' % x.features.dim()

        out = self.conv1(x)
        out = replace_feature(out, self.bn1(out.features))
        out = replace_feature(out, self.relu(out.features))

        out = self.conv2(out)
        out = replace_feature(out, self.bn2(out.features))

        if self.downsample is not None:
            identity = self.downsample(x)

        out = replace_feature(out, out.features + identity)
        out = replace_feature(out, self.relu(out.features))

        return out

class VoxelBackBone8x(nn.Module):
    def __init__(self, model_cfg, input_channels, grid_size, **kwargs):
        super().__init__()
        self.model_cfg = model_cfg
        
        # default use conv_out layer, not use when using decoder after
        if 'use_conv_out' in model_cfg.keys() and not model_cfg['use_conv_out']:
            self._use_conv_out = False
        else:
            self._use_conv_out = True
        
        norm_fn = partial(nn.BatchNorm1d, eps=1e-3, momentum=0.01)

        self.sparse_shape = grid_size[::-1] + [1, 0, 0]

        self.conv_input = SparseSequential(
            SubMConv3d(input_channels, 16, 3, padding=1, bias=False, indice_key='subm1'),
            norm_fn(16),
            nn.ReLU(),
        )
        block = post_act_block

        self.conv1 = SparseSequential(
            block(16, 16, 3, norm_fn=norm_fn, padding=1, indice_key='subm1'),
        )

        self.conv2 = SparseSequential(
            # [1600, 1408, 41] <- [800, 704, 21]
            block(16, 32, 3, norm_fn=norm_fn, stride=2, padding=1, indice_key='spconv2', conv_type='spconv'),
            block(32, 32, 3, norm_fn=norm_fn, padding=1, indice_key='subm2'),
            block(32, 32, 3, norm_fn=norm_fn, padding=1, indice_key='subm2'),
        )

        self.conv3 = SparseSequential(
            # [800, 704, 21] <- [400, 352, 11]
            block(32, 64, 3, norm_fn=norm_fn, stride=2, padding=1, indice_key='spconv3', conv_type='spconv'),
            block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm3'),
            block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm3'),
        )

        self.conv4 = SparseSequential(
            # [400, 352, 11] <- [200, 176, 5]
            block(64, 64, 3, norm_fn=norm_fn, stride=2, padding=(0, 1, 1), indice_key='spconv4', conv_type='spconv'),
            block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm4'),
            block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm4'),
        )

        if self._use_conv_out:
            last_pad = 0
            if 'num_features_out' in self.model_cfg:
                self.num_point_features = self.model_cfg['num_features_out']
            else:
                self.num_point_features = 128
            self.conv_out = SparseSequential(
                # [200, 150, 5] -> [200, 150, 2]
                SparseConv3d(64, self.num_point_features, (3, 1, 1), stride=(2, 1, 1), padding=last_pad,
                                    bias=False, indice_key='spconv_down2'),
                norm_fn(self.num_point_features),
                nn.ReLU(),
            )

        self.backbone_channels = {
            'x_conv1': 16,
            'x_conv2': 32,
            'x_conv3': 64,
            'x_conv4': 64
        }

    def forward(self, batch_dict):
        """
        Args:
            batch_dict:
                batch_size: int
                vfe_features: (num_voxels, C)
                voxel_coords: (num_voxels, 4), [batch_idx, z_idx, y_idx, x_idx]
        Returns:
            batch_dict:
                encoded_spconv_tensor: sparse tensor
        """
        voxel_features, voxel_coords = batch_dict['voxel_features'], \
                                       batch_dict['voxel_coords']
        batch_size = batch_dict['batch_size']
        input_sp_tensor = SparseConvTensor(
            features=voxel_features,
            indices=voxel_coords.int(),
            spatial_shape=self.sparse_shape,
            batch_size=batch_size
        )

        x = self.conv_input(input_sp_tensor)

        x_conv1 = self.conv1(x)
        x_conv2 = self.conv2(x_conv1)
        x_conv3 = self.conv3(x_conv2)
        x_conv4 = self.conv4(x_conv3)

        if self._use_conv_out:
            # for detection head
            # [200, 176, 5] -> [200, 176, 2]
            out = self.conv_out(x_conv4)
            batch_dict.update({
                'encoded_spconv_tensor': out,
                'encoded_spconv_tensor_stride': 8
            })
        
        batch_dict.update({
            'multi_scale_3d_features': {
                'x_conv1': x_conv1,
                'x_conv2': x_conv2,
                'x_conv3': x_conv3,
                'x_conv4': x_conv4,
            }
        })
        batch_dict.update({
            'multi_scale_3d_strides': {
                'x_conv1': 1,
                'x_conv2': 2,
                'x_conv3': 4,
                'x_conv4': 8,
            }
        })

        return batch_dict

class VoxelDecoder(nn.Module):
    def __init__(self, decoder_channels):
        super().__init__()
        
        self.decoder_channels = decoder_channels
        self.block_num = len(self.decoder_channels)
        
        lateral_layers = []
        merge_layers = []
        upsample_layer = []
        
        block = post_act_block
        norm_fn = partial(nn.BatchNorm1d, eps=1e-3, momentum=0.01)
        for i, block_channels in enumerate(self.decoder_channels):
            lateral_layers.append(
                SparseBasicBlock(
                    block_channels[0],
                    block_channels[0],
                    indice_key=f'subm{self.block_num-i}',
                    norm_fn=norm_fn
                )
            )
            merge_layers.append(
                block(block_channels[0] * 2, block_channels[0], 3, 
                    norm_fn=norm_fn, indice_key=f'subm{self.block_num-i}')
            )
            if self.block_num - i != 1:
                upsample_layer.append(
                    block(block_channels[0], block_channels[1], 3, 
                        norm_fn=norm_fn, indice_key=f'spconv{self.block_num-i}',
                        conv_type='inverseconv')
                )
            else:
                # use submanifold conv instead of inverse conv in the last block
                upsample_layer.append(
                    block(block_channels[0], block_channels[1], 3, 
                        norm_fn=norm_fn, indice_key=f'subm{self.block_num-i}')
                )
        self.lateral_layers = nn.ModuleList(lateral_layers[::-1])
        self.merge_layers = nn.ModuleList(merge_layers[::-1])
        self.upsample_layer = nn.ModuleList(upsample_layer[::-1])
    
    @staticmethod
    def reduce_channel(x, out_channels):
        """reduce channel for element-wise addition.

        Args:
            x (:obj:`SparseConvTensor`): Sparse tensor, ``x.features``
                are in shape (N, C1).
            out_channels (int): The number of channel after reduction.

        Returns:
            :obj:`SparseConvTensor`: Channel reduced feature.
        """
        features = x.features
        n, in_channels = features.shape
        assert (in_channels % out_channels
                == 0) and (in_channels >= out_channels)

        x = x.replace_feature(features.view(n, out_channels, -1).sum(dim=2))
        return x
              
    def forward(self, batch_dict):
        x_bottom = batch_dict['multi_scale_3d_features'][f'x_conv{self.block_num}']
        
        for i in range(self.block_num, 0, -1):
            layer_idx = i - 1
            # sparse conv on lateral feature (use res)
            x_lateral = batch_dict['multi_scale_3d_features'][f'x_conv{i}']
            x = self.lateral_layers[layer_idx](x_lateral)
            
            # merge lateral feature and buttom feature
            x = x.replace_feature(torch.cat((x_bottom.features, x.features), dim=1))
            x_merge = self.merge_layers[layer_idx](x)
            x = self.reduce_channel(x, x_merge.features.shape[1])
            x = x.replace_feature(x_merge.features + x.features)
            
            # upsample feature
            x = self.upsample_layer[layer_idx](x)
            x_bottom = x
        
        batch_dict.update({
            'voxel_features': x_bottom
        })
        return batch_dict