import torch
import torch.nn as nn
from codec_evaluation.codecs.YuE.RepCodec.repcodec.layers.conv_layer import Conv1d, ConvTranspose1d
from codec_evaluation.codecs.YuE.RepCodec.repcodec.modules.residual_unit import ResidualUnit

class DecoderBlock(nn.Module):
    """ Decoder block (no up-sampling) """

    def __init__(
            self,
            in_channels: int,
            out_channels: int,
            stride: int,
            dilations=(1, 1),
            unit_kernel_size=3,
            bias=True
    ):
        super().__init__()

        if stride == 1:
            self.conv = Conv1d(
                in_channels=in_channels,
                out_channels=out_channels,
                kernel_size=3,  # fix kernel=3 when stride=1 for unchanged shape
                stride=stride,
                bias=bias,
            )
        else:
            self.conv = ConvTranspose1d(
                in_channels=in_channels,
                out_channels=out_channels,
                kernel_size=(2 * stride),
                stride=stride,
                bias=bias,
            )

        self.res_units = torch.nn.ModuleList()
        for idx, dilation in enumerate(dilations):
            self.res_units += [
                ResidualUnit(out_channels, out_channels,
                             kernel_size=unit_kernel_size,
                             dilation=dilation)
            ]
        self.num_res = len(self.res_units)

    def forward(self, x):
        x = self.conv(x)
        for idx in range(self.num_res):
            x = self.res_units[idx](x)
        return x

class Decoder(nn.Module):
    def __init__(
            self,
            code_dim: int,
            output_channels: int,
            decode_channels: int,
            channel_ratios=(1, 1),
            strides=(1, 1),
            kernel_size=3,
            bias=True,
            block_dilations=(1, 1),
            unit_kernel_size=3,
    ):
        super().__init__()
        assert len(channel_ratios) == len(strides)

        self.conv1 = Conv1d(
            in_channels=code_dim,
            out_channels=int(decode_channels * channel_ratios[0]),
            kernel_size=kernel_size,
            stride=1,
            bias=False
        )

        self.conv_blocks = torch.nn.ModuleList()
        for idx, stride in enumerate(strides):
            in_channels = int(decode_channels * channel_ratios[idx])
            if idx < (len(channel_ratios) - 1):
                out_channels = int(decode_channels * channel_ratios[idx + 1])
            else:
                out_channels = decode_channels
            self.conv_blocks += [
                DecoderBlock(
                    in_channels, out_channels, stride,
                    dilations=block_dilations, unit_kernel_size=unit_kernel_size,
                    bias=bias
                )
            ]
        self.num_blocks = len(self.conv_blocks)
        self.conv2 = Conv1d(out_channels, output_channels, kernel_size, 1, bias=False)

    def forward(self, z):
        x = self.conv1(z)
        for i in range(self.num_blocks):
            x = self.conv_blocks[i](x)
        x = self.conv2(x)
        return x
