# Copyright 2022-present, Lorenzo Bonicelli, Pietro Buzzega, Matteo Boschini, Angelo Porrello, Simone Calderara.
# All rights reserved.
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.

import torch

from models.utils.continual_model import ContinualModel
from utils.args import add_rehearsal_args, ArgumentParser, add_management_args, add_experiment_args, \
    add_aux_dataset_args
from utils.buffer import Buffer

def get_parser() -> ArgumentParser:
    parser = ArgumentParser(description='Continual learning via  ER')
    add_management_args(parser)
    add_experiment_args(parser)
    add_rehearsal_args(parser)
    add_aux_dataset_args(parser)

    parser.add_argument('--alpha', type=float,
                        help='Penalty weight.', default=0)
    parser.add_argument('--beta', type=float,
                        help='Penalty weight.', default=0)
    parser.add_argument('--lamb', type=float,
                        help='Penalty weight.', default=0)


    return parser

class ErACE(ContinualModel):
    NAME = 'er_ace'
    COMPATIBILITY = ['class-il', 'task-il']

    def begin_task(self, dataset):
        if self.current_task == 0:
            self.load_initial_checkpoint()
            self.reset_classifier()

    def end_task(self, dataset):
        self.current_task += 1
        # update buffer
        if self.args.update_buffer_at_task_end:
            print(
                f"At task {self.current_task} end before update: buffer is {len(self.buffer)}, buffer_backup is {len(self.buffer_backup)}")
            self.buffer = self.buffer_backup

    def __init__(self, backbone, loss, args, transform):
        super().__init__(backbone, loss, args, transform)
        self.buffer = Buffer(self.args.buffer_size, self.device)
        self.seen_so_far = torch.tensor([]).long().to(self.device)
        self.current_task = 0

    def observe(self, inputs: torch.Tensor, labels: torch.Tensor, not_aug_inputs: torch.Tensor, epoch=None, task_id_nominal=None, teacher=None, noise=None):
        present = labels.unique()
        self.seen_so_far = torch.cat([self.seen_so_far, present]).unique()

        logits = self.net(inputs)
        mask = torch.zeros_like(logits)
        mask[:, present] = 1

        self.opt.zero_grad()
        if self.seen_so_far.max() < (self.dataset.N_CLASSES - 1):
            mask[:, self.seen_so_far.max():] = 1

        if self.current_task > 0:
            logits = logits.masked_fill(mask == 0, torch.finfo(logits.dtype).min)

        loss = self.loss(logits, labels)
        loss_re = torch.tensor(0.)

        if self.current_task > 0:
            # sample from buffer
            buf_inputs, buf_labels = self.buffer.get_data_old(
                self.setting.minibatch_size, transform=self.transform)
            loss_re = self.loss(self.net(buf_inputs), buf_labels)

            loss += loss_re

        loss.backward()
        self.opt.step()

        self.buffer.add_data(examples=not_aug_inputs,
                             labels=labels,
                             is_noise=noise if noise is not None else None)

        a = {}
        return loss.item(), 0, a, 0, 0
