
from methods.base import BaseAdaptation
from methods import register_method
from utils.utils import inputs_to_device
from layers import DEPTH_METRIC_NAMES, DEPTH_METRIC_NAMES_LOCAL, DEPTH_METRIC_NAMES_UNSUP
import torch
from layers import disp_to_depth, compute_depth_errors_adadepth, compute_depth_errors
from networks import get_supervised_models, get_self_supervised_models
from utils.svdp_augs import SVDPMultiScaleFlipAug, Resize, RandomFlip
from utils.losses import DepthLoss
from layers import update_ema_variables

import torch.nn.functional as F
import copy


@register_method(name='consistency')
class Consistency(BaseAdaptation):
    def __init__(self, opt, **kwargs):
        super().__init__(opt, **kwargs)
        if self.opt.model_type == "supervised":
            self.models = get_supervised_models(self.opt)
        else:
            # self.models = get_self_supervised_models(self.opt)
            # TODO: loss calculation is different for self-supervised models
            raise NotImplementedError("Self-supervised models not implemented yet")

        for m in self.models.values():
            m.eval()
        
        # create ema models
        # ema models for regularisation of global scale
        self.models_ema = copy.deepcopy(self.models)
        for model in self.models_ema.values():
            model.eval()
            for param in model.parameters():
                param.requires_grad = False
                param.detach_()
 
        parameters_to_train = []
        parameters_to_train += list(self.models["encoder"].parameters())
        parameters_to_train += list(self.models["depth"].parameters())
        self.optimizer = torch.optim.Adam(parameters_to_train, self.opt.learning_rate)
 
        self.svdp_augs = SVDPMultiScaleFlipAug(
            img_scale=(self.opt.width, self.opt.height), 
            img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
            flip=False,
            patch_size=self.opt.patch_size,
            transforms=[
                Resize(keep_ratio=True),
                RandomFlip(),
                # dict(type='Normalize', **img_norm_cfg), # assume the images are already normalized form dataloader
            ])
        
        self.loss = DepthLoss(loss='mse')

    def process_batch(self, inputs):
        inputs_to_device(inputs, self.opt.device)
        
        input_img = (inputs["color_uncrop", 0, 0]-self.opt.mean)/self.opt.std

        features = self.models["encoder"](input_img)
        pred_depth = self.models["depth"](features)
 
        aug_x = self.svdp_augs({'img': input_img})
        aug_depth = torch.zeros_like(pred_depth)
        for x, flip, flip_direction in zip(aug_x['img'], aug_x['flip'], aug_x['flip_direction']):
            with torch.no_grad():
                feats = self.models_ema["encoder"](x)
                cur_pred = self.models_ema["depth"](feats)
 
            if flip:
                # TODO: make sure that the right dimension is flipped
                assert flip_direction in ['horizontal', 'vertical']
                if flip_direction == 'horizontal':
                    cur_pred = torch.flip(cur_pred, [3])
                elif flip_direction == 'vertical':
                    cur_pred = torch.flip(cur_pred, [2])
            cur_pred = F.interpolate(cur_pred, pred_depth.shape[2:], mode="bilinear", align_corners=False)

            aug_depth = aug_depth + cur_pred
        # average prediction
        aug_depth = aug_depth / len(aug_x['img'])

        loss = self.loss(pred_depth, aug_depth).mean()
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        for model, model_ema in zip(self.models.values(), self.models_ema.values()):
            update_ema_variables(model, model_ema, 0.999)

        with torch.no_grad():
            features = self.models_ema["encoder"](input_img)
            depth_out = self.models_ema["depth"](features)

        error = list(compute_depth_errors_adadepth(self.opt, inputs['depth_gt_uncrop'], depth_out, median_scaling=True))
        # error = list(compute_depth_errors_adadepth(self.opt, inputs['depth_gt_uncrop'], depth_out, median_scaling=False))

        for idx, term in enumerate(error):
            error[idx] = term.detach().cpu().numpy()

        outputs = {}
        outputs['depth'] = depth_out

        losses = {}
        
        metrics = {
            'error': error,
        }
        
        return outputs, metrics, losses
