# Copyright 2019 Yan Yan
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import torch
from collections import OrderedDict
from torch import nn

from .structure import SparseConvTensor


def is_spconv_module(module):
    spconv_modules = (SparseModule, )
    return isinstance(module, spconv_modules)


def is_sparse_conv(module):
    from .conv import SparseConvolution
    return isinstance(module, SparseConvolution)


def _mean_update(vals, m_vals, t):
    outputs = []
    if not isinstance(vals, list):
        vals = [vals]
    if not isinstance(m_vals, list):
        m_vals = [m_vals]
    for val, m_val in zip(vals, m_vals):
        output = t / float(t + 1) * m_val + 1 / float(t + 1) * val
        outputs.append(output)
    if len(outputs) == 1:
        outputs = outputs[0]
    return outputs


class SparseModule(nn.Module):
    """place holder, All module subclass from this will take sptensor in
    SparseSequential."""
    pass


class SparseSequential(SparseModule):
    r"""A sequential container.
    Modules will be added to it in the order they are passed in the
    constructor.
    Alternatively, an ordered dict of modules can also be passed in.

    To make it easier to understand, given is a small example::

        # Example of using Sequential
        model = SparseSequential(
                  SparseConv2d(1,20,5),
                  nn.ReLU(),
                  SparseConv2d(20,64,5),
                  nn.ReLU()
                )

        # Example of using Sequential with OrderedDict
        model = SparseSequential(OrderedDict([
                  ('conv1', SparseConv2d(1,20,5)),
                  ('relu1', nn.ReLU()),
                  ('conv2', SparseConv2d(20,64,5)),
                  ('relu2', nn.ReLU())
                ]))

        # Example of using Sequential with kwargs(python 3.6+)
        model = SparseSequential(
                  conv1=SparseConv2d(1,20,5),
                  relu1=nn.ReLU(),
                  conv2=SparseConv2d(20,64,5),
                  relu2=nn.ReLU()
                )
    """

    def __init__(self, *args, **kwargs):
        super(SparseSequential, self).__init__()
        if len(args) == 1 and isinstance(args[0], OrderedDict):
            for key, module in args[0].items():
                self.add_module(key, module)
        else:
            for idx, module in enumerate(args):
                self.add_module(str(idx), module)
        for name, module in kwargs.items():
            if sys.version_info < (3, 6):
                raise ValueError('kwargs only supported in py36+')
            if name in self._modules:
                raise ValueError('name exists.')
            self.add_module(name, module)
        self._sparity_dict = {}

    def __getitem__(self, idx):
        if not (-len(self) <= idx < len(self)):
            raise IndexError('index {} is out of range'.format(idx))
        if idx < 0:
            idx += len(self)
        it = iter(self._modules.values())
        for i in range(idx):
            next(it)
        return next(it)

    def __len__(self):
        return len(self._modules)

    @property
    def sparity_dict(self):
        return self._sparity_dict

    def add(self, module, name=None):
        if name is None:
            name = str(len(self._modules))
            if name in self._modules:
                raise KeyError('name exists')
        self.add_module(name, module)

    def forward(self, input):
        for k, module in self._modules.items():
            if is_spconv_module(module):  # use SpConvTensor as input
                assert isinstance(input, SparseConvTensor)
                self._sparity_dict[k] = input.sparity
                input = module(input)
            else:
                if isinstance(input, SparseConvTensor):
                    if input.indices.shape[0] != 0:
                        input.features = module(input.features)
                else:
                    input = module(input)
        return input

    def fused(self):
        """don't use this.

        no effect.
        """
        from .conv import SparseConvolution
        mods = [v for k, v in self._modules.items()]
        fused_mods = []
        idx = 0
        while idx < len(mods):
            if is_sparse_conv(mods[idx]):
                if idx < len(mods) - 1 and isinstance(mods[idx + 1],
                                                      nn.BatchNorm1d):
                    new_module = SparseConvolution(
                        ndim=mods[idx].ndim,
                        in_channels=mods[idx].in_channels,
                        out_channels=mods[idx].out_channels,
                        kernel_size=mods[idx].kernel_size,
                        stride=mods[idx].stride,
                        padding=mods[idx].padding,
                        dilation=mods[idx].dilation,
                        groups=mods[idx].groups,
                        bias=True,
                        subm=mods[idx].subm,
                        output_padding=mods[idx].output_padding,
                        transposed=mods[idx].transposed,
                        inverse=mods[idx].inverse,
                        indice_key=mods[idx].indice_key,
                        fused_bn=True,
                    )
                    new_module.load_state_dict(mods[idx].state_dict(), False)
                    new_module.to(mods[idx].weight.device)
                    conv = new_module
                    bn = mods[idx + 1]
                    conv.bias.data.zero_()
                    conv.weight.data[:] = conv.weight.data * bn.weight.data / (
                        torch.sqrt(bn.running_var) + bn.eps)
                    conv.bias.data[:] = (
                        conv.bias.data - bn.running_mean) * bn.weight.data / (
                            torch.sqrt(bn.running_var) + bn.eps) + bn.bias.data
                    fused_mods.append(conv)
                    idx += 2
                else:
                    fused_mods.append(mods[idx])
                    idx += 1
            else:
                fused_mods.append(mods[idx])
                idx += 1
        return SparseSequential(*fused_mods)


class ToDense(SparseModule):
    """convert SparseConvTensor to NCHW dense tensor."""

    def forward(self, x: SparseConvTensor):
        return x.dense()


class RemoveGrid(SparseModule):
    """remove pre-allocated grid buffer."""

    def forward(self, x: SparseConvTensor):
        x.grid = None
        return x
