# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.

from typing import Dict, List, Optional, Tuple

import torch
import torch.nn as nn
import torch.nn.functional as F

from gluonts.core.component import validated
from gluonts.torch.distributions import (
    DistributionOutput,
    StudentTOutput,
)
from gluonts.torch.modules.scaler import MeanScaler, NOPScaler
from gluonts.torch.modules.feature import FeatureEmbedder
from gluonts.torch.modules.loss import DistributionLoss, NegativeLogLikelihood
from gluonts.torch.util import (
    lagged_sequence_values,
    repeat_along_dim,
    unsqueeze_expand,
)
from gluonts.itertools import prod

from .PatchTST_backbone import PatchTST_backbone
from .PatchTST_layers import series_decomp


class ProbabilisticPatchTSTModel(nn.Module):
    """
    Module implementing the ProbabilisticPatchTST model.

    Parameters
    ----------
    context_length
        Length of the RNN unrolling prior to the forecast date.
    prediction_length
        Number of time points to predict.
    num_feat_dynamic_real
        Number of dynamic real features that will be provided to ``forward``.
    num_feat_static_real
        Number of static real features that will be provided to ``forward``.
    num_feat_static_cat
        Number of static categorical features that will be provided to
        ``forward``.
    cardinality
        List of cardinalities, one for each static categorical feature.
    embedding_dimension
        Dimension of the embedding space, one for each static categorical
        feature.
    n_block
        Number of layers in the RNN.
    hidden_size
        Size of the hidden layers in the RNN.
    dropout_rate
        Dropout rate to be applied at training time.
    distr_output
        Type of distribution to be output by the model at each time step
    scaling
        Whether to apply mean scaling to the observations (target).
    """

    @validated()
    def __init__(
        self,
        context_length: int,
        prediction_length: int,
        patch_len: int = 8,
        stride: int = 4,
        n_block: int = 2,
        hidden_size: int = 128,
        n_head: int = 4,
        d_ff: int = 256,
        dropout_rate: float = 0.1,
        distr_output: DistributionOutput = StudentTOutput(),
        scaling: bool = True,
    ) -> None:
        super().__init__()

        assert distr_output.event_shape == ()

        self.context_length = context_length
        self.prediction_length = prediction_length

        self.backbone = PatchTST_backbone(
            c_in=1,
            context_window=context_length,
            target_window=prediction_length,
            patch_len=patch_len,
            stride=stride,
            n_layers=n_block,
            d_model=hidden_size,
            n_heads=n_head,
            d_ff=d_ff,
            dropout=dropout_rate,
            fc_dropout=dropout_rate,
            padding_patch="end",
        )

        if scaling:
            self.scaler = MeanScaler(dim=-1, keepdim=True)
        else:
            self.scaler = NOPScaler(dim=-1, keepdim=True)

        self.distr_output = distr_output
        self.args_proj = distr_output.get_args_proj(hidden_size)

    @property
    def _past_length(self) -> int:
        return self.context_length

    def input_shapes(self, batch_size=1) -> Dict[str, Tuple[int, ...]]:
        return {
            "past_target": (batch_size, self._past_length),
            "past_observed_values": (batch_size, self._past_length),
        }

    def input_types(self) -> Dict[str, torch.dtype]:
        return {
            "past_target": torch.float,
            "past_observed_values": torch.float,
        }

    def forward(
        self,
        # feat_static_cat: torch.Tensor,
        # feat_static_real: torch.Tensor,
        # past_time_feat: torch.Tensor,
        past_target: torch.Tensor,
        past_observed_values: torch.Tensor,
        # future_time_feat: torch.Tensor,
    ) -> torch.Tensor:
        """
        Invokes the model on input data, and produce outputs future samples.

        Parameters
        ----------
        feat_static_cat
            Tensor of static categorical features,
            shape: ``(batch_size, num_feat_static_cat)``.
        feat_static_real
            Tensor of static real features,
            shape: ``(batch_size, num_feat_static_real)``.
        past_time_feat
            Tensor of dynamic real features in the past,
            shape: ``(batch_size, past_length, num_feat_dynamic_real)``.
        past_target
            Tensor of past target values,
            shape: ``(batch_size, past_length)``.
        past_observed_values
            Tensor of observed values indicators,
            shape: ``(batch_size, past_length)``.
        future_time_feat
            (Optional) tensor of dynamic real features in the past,
            shape: ``(batch_size, prediction_length, num_feat_dynamic_real)``.
        """

        _, scale = self.scaler(past_target, past_observed_values)

        past_target = (past_target / scale).unsqueeze(-2)

        output = self.backbone(past_target).squeeze()
        distr_args = self.args_proj(output)
        return distr_args, torch.zeros_like(scale), scale
