import os, sys
from contextlib import contextmanager
import numpy as np
import torch.nn as nn
import torch
import torch.nn.functional as F
import cv2
from copy import deepcopy

from core.components.base_model import BaseModel
from core.modules.losses import L2Loss, BCELoss, CELoss, BCEWithLogitsLoss
from core.modules.subnetworks import Encoder, Decoder, Predictor
from core.modules.recurrent_modules import RecurrentPredictor
from core.utils.general_utils import AttrDict, ParamDict, batch_apply
from core.utils.pytorch_utils import pad_seq, make_one_hot, ar2ten, ten2ar
from core.modules.layers import LayerBuilderParams
from core.utils.vis_utils import make_gif_strip, make_image_seq_strip, make_image_strip
from core.components.checkpointer import CheckpointHandler

class MultiSegmentationModel(BaseModel):
    def __init__(self, params, logger):
        super().__init__(logger)
        self._hp = self._default_hparams()
        self._hp.overwrite(params)
        self._hp.builder = LayerBuilderParams(self._hp.use_convs, self._hp.normalization)
        self._task_names = [task_name().name if not isinstance(task_name, str) else task_name
                              for task_name in self._hp.task_names]

        self.build_network()

    @contextmanager
    def val_mode(self):
        pass
        yield
        pass

    def _default_hparams(self):
        default_dict = ParamDict({
            'use_skips': False,
            'skips_stride': 2,
            'add_weighted_pixel_copy': False, # if True, adds pixel copying stream for decoder
            'pixel_shift_decoder': False,
            'use_convs': True,
            'detach_reconstruction': True,
            'detach_action_head': False,
            'n_cond_frames': 1,
            'detach_seg_mask': True,
            'seg_dec_activation': None,
            'n_class': 1,
            'normalization': 'none',
            'action_space_type': 'discrete',
            'use_seg_mask': False,
            'use_action_mask': False,
            'mask_sz': 300
        })

        # Network size
        default_dict.update({
            'img_sz': 32,
            'input_nc': 3,
            'ngf': 8,
            'nz_enc': 32,
            'nz_mid': 32,
            'n_processing_layers': 3,
            'n_pixel_sources': 1,
        })

        # Loss weights
        default_dict.update({
            'img_mse_weight': 1.,
            'reward_weights': 1.,
            'model_checkpoint': None,
            'model_epoch': 'latest',
            'color_map': None,
            'agent_class': None
        })

        # add new params to parent params
        parent_params = super()._default_hparams()
        parent_params.overwrite(default_dict)
        return parent_params

    def build_network(self):
        self.encoder = Encoder(self._hp)
        self.decoder = Decoder(self._hp)

        seg_decoder_hp = deepcopy(self._hp)
        seg_decoder_hp.input_nc = self._hp.n_class
        seg_decoder_hp.dec_last_activation = self._hp.seg_dec_activation
        self.seg_decoder = Decoder(seg_decoder_hp)

        if self._hp.model_checkpoint:
            self._load_checkpoint()

    def forward(self, inputs):
        """
        forward pass at training time
        """
        output = AttrDict()

        # encode inputs
        enc = self.encoder(inputs.images[:, 0])
        output.update({'pred': enc, 'rec_input': enc})


        rec_input = output.rec_input.detach() if self._hp.detach_reconstruction else output.rec_input
        output.output_imgs = self.decoder(rec_input).images.unsqueeze(1)

        seg_input = output.pred.detach() if self._hp.detach_seg_mask else output.pred
        output.output_seg = self.seg_decoder(seg_input).images.unsqueeze(1)

        return output

    def loss(self, model_output, inputs):
        losses = AttrDict()

        # image reconstruction loss
        losses.seq_img_mse = L2Loss(self._hp.img_mse_weight)(model_output.output_imgs,
                                                             inputs.images[:, :1])

        losses.seq_seg_entropy = CELoss()(model_output.output_seg.reshape(self._hp.batch_size, self._hp.n_class, -1),
                                          inputs.target_seg[:, :1].reshape(self._hp.batch_size, -1).type(torch.long))

        # print(losses)
        losses.total = self._compute_total_loss(losses)
        return losses


    def log_outputs(self, model_output, inputs, losses, step, log_images, phase):
        super()._log_losses(losses, step, log_images, phase)
        if log_images:
            # log predicted images
            img_strip = make_image_strip([inputs.images[:, 0, -int(self._hp.input_nc//self._hp.n_frames):],
                                          model_output.output_imgs[:, 0, -int(self._hp.input_nc//self._hp.n_frames):]])
            self._logger.log_images(img_strip[None], 'generation', step, phase)

            output_labels = torch.argmax(model_output.output_seg.squeeze(1), dim=1)
            b, _, nc, h, w = model_output.output_seg.shape
            pred_seg = torch.zeros((b, h, w, 3), device=self._hp.device)
            gt_seg = torch.zeros((b, h, w, 3), device=self._hp.device)
            for c in range(nc):
                pred_seg[output_labels==c] = ar2ten(np.array(self._hp.color_map[str(c)]), device=self._hp.device).type(torch.float32)
                gt_seg[inputs.target_seg[:, :1].squeeze(1)==c] = ar2ten(np.array(self._hp.color_map[str(c)]), device=self._hp.device).type(torch.float32)
            input_images = inputs.images[:, 0, -int(self._hp.input_nc//self._hp.n_frames):]
            if input_images.shape[1] == 1:
                input_images = input_images.repeat((1, 3, 1, 1))
            mask_strip = make_image_strip([(input_images+1)*255/2,
                                           gt_seg.permute((0, 3, 1, 2)), pred_seg.permute((0, 3, 1, 2))])
            self._logger.log_images(mask_strip[None], 'segmentation', step, phase)
        # attention mask
        self._log_attention_mask(inputs, step, phase)

    def forward_encoder(self, inputs):
        enc = self.encoder(inputs)
        return enc

    @property
    def resolution(self):
        return self._hp.img_sz


    def _load_checkpoint(self):
        """Loads weights for a given model from the given checkpoint directory."""
        checkpoint = self._hp.model_checkpoint
        epoch = self._hp.model_epoch
        # self.device = self._hp.device
        checkpoint_dir = checkpoint if os.path.basename(checkpoint) == 'weights' \
                            else os.path.join(checkpoint, 'weights')     # checkpts in 'weights' dir
        checkpoint_path = CheckpointHandler.get_resume_ckpt_file(epoch, checkpoint_dir)
        agent = self._hp.agent_class(self._hp.agent_config)
        CheckpointHandler.load_weights(checkpoint_path, model=agent)
        encoder = agent.policy.encoder
        self.encoder.load_state_dict(encoder.state_dict(), self._hp.device)

