# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.

"""Multi-view test a video classification model."""

import numpy as np
import os
import pickle
import torch
from fvcore.common.file_io import PathManager
import cv2
from einops import rearrange, reduce, repeat
import scipy.io

import timesformer.utils.checkpoint as cu
import timesformer.utils.distributed as du
import timesformer.utils.logging as logging
import timesformer.utils.misc as misc
import timesformer.visualization.tensorboard_vis as tb
from timesformer.datasets import loader
from timesformer.models import build_model
from timesformer.utils.meters import TestMeter
from timm.utils import accuracy

import sys
sys.path.append('/home/ny525072/VideoMamba/videomamba/video_sm/')
from kinetics_dataloader import KineticsDL, CustomBatchSampler, multiple_samples_collate
from COIN_loader import COINDL
from smth_loader import SmthSmthDL
from ucf_dataloader import UCFDL
from breakfast_loader import BkfstDL
from hmdb_loader import HMDBDL
from tqdm import tqdm
from breakfast_loader import BkfstDL
from COIN_loader import COINDL


logger = logging.get_logger(__name__)

def custom_retrieval(model, train_loader, val_loader):
    model.cuda()
    model.eval()
    train_features = []
    test_features = []
    train_labels = []
    labels_list = []
    model.stflex = False
    model.flexible = False
    
    for frames, labels in tqdm(train_loader):
        frames, labels = frames.cuda(), labels.cuda()
        feat = model.model.forward_features(frames)
        zipped = zip(feat, labels)
        for feature, lbl in zipped:
            train_features.append(feature.detach().cpu())
            train_labels.append(lbl.detach().cpu().item())
    
    for frames, labels in tqdm(val_loader):
        frames, labels = frames.cuda(), labels.cuda()
        feat = model.model.forward_features(frames)
        zipped = zip(feat, labels)
        for feature, lbl in zipped:
            test_features.append(feature.detach().cpu())
            labels_list.append(lbl.detach().cpu().item())
    
    train_features = torch.stack(train_features)
    test_features = torch.stack(test_features)
    
    correct = 0
    print(train_features.shape, test_features.shape)
    for i, probe in enumerate(tqdm(test_features)):
        probe_sim = torch.nn.CosineSimilarity()(probe.unsqueeze(0).detach().cpu(), train_features.detach().cpu())
        first, arg = torch.topk(probe_sim.flatten(), 2).indices
        # print(labels_list[i] == labels_list[arg.item()])
        if labels_list[i] == train_labels[first.item()]:
            correct += 1
    
    print(i)
    accuracy = correct / i
    print(correct, i)
    print(f'Test Accuracy: {accuracy}')
    return accuracy


def custom_test(data_loader, model, maxk=5):
    model.eval()
    correct = 0
    # model.flexivit = False
    # model.flex_all = False
    model.stflex = False
    model.flexible = False
    total = len(data_loader) * 8

    for batch in tqdm(data_loader):
        videos = batch[0]
        target = batch[1]

        videos = videos.to('cuda', non_blocking=True)
        target = target.to('cuda', non_blocking=True)

        output = model(videos)
        # print(output.shape)
        preds = output.argmax(dim=1)
        # print(preds, target)
        correct += sum(preds == target).item()
        print(correct / total)
    # gather the stats from all processes
    print('final: ', correct / total)

@torch.no_grad()
def perform_test(test_loader, model, test_meter, cfg, writer=None):
    """
    For classification:
    Perform mutli-view testing that uniformly samples N clips from a video along
    its temporal axis. For each clip, it takes 3 crops to cover the spatial
    dimension, followed by averaging the softmax scores across all Nx3 views to
    form a video-level prediction. All video predictions are compared to
    ground-truth labels and the final testing performance is logged.
    For detection:
    Perform fully-convolutional testing on the full frames without crop.
    Args:
        test_loader (loader): video testing loader.
        model (model): the pretrained video model to test.
        test_meter (TestMeter): testing meters to log and ensemble the testing
            results.
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
        writer (TensorboardWriter object, optional): TensorboardWriter object
            to writer Tensorboard log.
    """
    # Enable eval mode.
    model.eval()
    test_meter.iter_tic()

    for cur_iter, (inputs, labels, video_idx, meta) in enumerate(tqdm(test_loader)):
        if cfg.NUM_GPUS:
            # Transfer the data to the current GPU device.
            if isinstance(inputs, (list,)):
                for i in range(len(inputs)):
                    inputs[i] = inputs[i].cuda(non_blocking=True)
            else:
                inputs = inputs.cuda(non_blocking=True)

            # Transfer the data to the current GPU device.
            labels = labels.cuda()
            video_idx = video_idx.cuda()
            # for key, val in meta.items():
            #     if isinstance(val, (list,)):
            #         for i in range(len(val)):
            #             val[i] = val[i].cuda(non_blocking=True)
            #     else:
            #         meta[key] = val.cuda(non_blocking=True)
        test_meter.data_toc()

        if cfg.DETECTION.ENABLE:
            # Compute the predictions.
            preds = model(inputs, meta["boxes"])
            ori_boxes = meta["ori_boxes"]
            metadata = meta["metadata"]

            preds = preds.detach().cpu() if cfg.NUM_GPUS else preds.detach()
            ori_boxes = (
                ori_boxes.detach().cpu() if cfg.NUM_GPUS else ori_boxes.detach()
            )
            # metadata = (
            #     metadata.detach().cpu() if cfg.NUM_GPUS else metadata.detach()
            # )

            if cfg.NUM_GPUS > 1:
                preds = torch.cat(du.all_gather_unaligned(preds), dim=0)
                ori_boxes = torch.cat(du.all_gather_unaligned(ori_boxes), dim=0)
                metadata = torch.cat(du.all_gather_unaligned(metadata), dim=0)

            test_meter.iter_toc()
            # Update and log stats.
            test_meter.update_stats(preds, ori_boxes, metadata)
            test_meter.log_iter_stats(None, cur_iter)
        else:
            # Perform the forward pass.
            preds = model(inputs)

            # Gather all the predictions across all the devices to perform ensemble.
            if cfg.NUM_GPUS > 1:
                preds, labels, video_idx = du.all_gather(
                    [preds, labels, video_idx]
                )
            if cfg.NUM_GPUS:
                preds = preds.cpu()
                labels = labels.cpu()
                video_idx = video_idx.cpu()

            test_meter.iter_toc()
            # Update and log stats.
            test_meter.update_stats(
                preds.detach(), labels.detach(), video_idx.detach()
            )
            test_meter.log_iter_stats(cur_iter)

        test_meter.iter_tic()

    # Log epoch stats and print the final testing results.
    if not cfg.DETECTION.ENABLE:
        all_preds = test_meter.video_preds.clone().detach()
        all_labels = test_meter.video_labels
        if cfg.NUM_GPUS:
            all_preds = all_preds.cpu()
            all_labels = all_labels.cpu()
        if writer is not None:
            writer.plot_eval(preds=all_preds, labels=all_labels)

        if cfg.TEST.SAVE_RESULTS_PATH != "":
            save_path = os.path.join(cfg.OUTPUT_DIR, cfg.TEST.SAVE_RESULTS_PATH)

            with PathManager.open(save_path, "wb") as f:
                pickle.dump([all_labels, all_labels], f)

            logger.info(
                "Successfully saved prediction results to {}".format(save_path)
            )

    test_meter.finalize_metrics()
    return test_meter


def test(cfg):
    """
    Perform multi-view testing on the pretrained video model.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set up environment.
    du.init_distributed_training(cfg)
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)
    num_frames = cfg.DATA.NUM_FRAMES
    resolution = cfg.DATA.TRAIN_CROP_SIZE
    cfg.DATA.NUM_FRAMES = 16
    cfg.DATA.TRAIN_CROP_SIZE = 224
    # Setup logging format.
    logging.setup_logging(cfg.OUTPUT_DIR)

    # Print config.
    logger.info("Test with config:")
    logger.info(cfg)

    # Build the video model and print model statistics.



    model = build_model(cfg)
    if du.is_master_proc() and cfg.LOG_MODEL_INFO:
        misc.log_model_info(model, cfg, use_train_input=False)

    cu.load_test_checkpoint(cfg, model)
    # chkpt = torch.load(cfg.TEST.CHECKPOINT_FILE_PATH)['model_state']
    # print(chkpt.keys())
    # model.load_state_dict(chkpt)

    #
    # if num_frames != 16:
    #     cfg.TRAIN.FLEXIBLE = True
    #     print(f"Temporal interpolate from 16 to {num_frames}")
    #     temp_pos_embed = model.temporal_pos_embedding.permute(0, 2, 1)
    #     temp_pos_embed = torch.nn.functional.interpolate(
    #         temp_pos_embed, size=(num_frames,), mode='linear', align_corners=False
    #     )
    #     temp_pos_embed = temp_pos_embed.permute(0, 2, 1)
    #     model.temporal_pos_embedding = nn.Parameter(temp_pos_embed)
    if resolution != 224:
        cfg.TRAIN.STFLEX = True


    # Create video testing loaders.
    # test_loader = loader.construct_loader(cfg, "test")
    # dataset_val = KineticsDL('test', 8, False, False, False, False, False)

    if num_frames >= 32 or resolution > 224:
        cfg.TEST.BATCH_SIZE = 1

    dataset = 'ucf'
    print(num_frames, resolution, dataset)

    if dataset == 'breakfast':
        train_dataset = BkfstDL('train', num_frames=num_frames, resolution=resolution)
        test_dataset = BkfstDL('test', num_frames=num_frames, resolution=resolution)
    
    elif dataset == 'coin':
        train_dataset = COINDL('train', num_frames=num_frames, resolution=resolution)
        test_dataset = COINDL('test', num_frames=num_frames, resolution=resolution)
        nb_classes = 180

    elif dataset == 'ucf':
        train_dataset = UCFDL('train', num_frames=num_frames, resolution=resolution)
        test_dataset = UCFDL('test', num_frames=num_frames, resolution=resolution)
        nb_classes = 101

    elif dataset == 'SSV2':
        train_dataset = SmthSmthDL('train', num_frames=num_frames, resolution=resolution)
        test_dataset = SmthSmthDL('test', num_frames=num_frames, resolution=resolution)
        nb_classes = 174

    elif dataset == 'breakfast':
        train_dataset = BkfstDL('train', num_frames=num_frames, resolution=resolution)
        test_dataset = BkfstDL('test', num_frames=num_frames, resolution=resolution)
        nb_classes = 10

    elif dataset == 'hmdb':
        train_dataset = HMDBDL('train', num_frames=num_frames, resolution=resolution)
        test_dataset = HMDBDL('test', num_frames=num_frames, resolution=resolution)
        nb_classes = 51

    elif dataset == 'ntu':
        train_dataset = NTU120DL('train', num_frames=num_frames, resolution=resolution)
        test_dataset = NTU120DL('test', num_frames=num_frames, resolution=resolution)
        nb_classes = 120

    elif dataset == 'diving':
        train_dataset = DivingDL('train', num_frames=num_frames, resolution=resolution)
        test_dataset = DivingDL('test', num_frames=num_frames, resolution=resolution)
        nb_classes = 48


    total_params = sum(p.numel() for p in model.parameters())
    print(f"Number of parameters: {total_params}")

    test_loader = torch.utils.data.DataLoader(test_dataset, num_workers=8, batch_size=cfg.TEST.BATCH_SIZE, shuffle=False,
                             collate_fn=multiple_samples_collate)
    train_loader = torch.utils.data.DataLoader(train_dataset, num_workers=8, batch_size=cfg.TEST.BATCH_SIZE, shuffle=False,
                              collate_fn=multiple_samples_collate)
    

    logger.info("Testing model for {} iterations".format(len(test_loader)))

    # assert (
    #     len(test_loader.dataset)
    #     % (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS)
    #     == 0
    # )
    # Create meters for multi-view testing.
    # test_meter = TestMeter(
    #     len(test_loader.dataset)
    #     // (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS),
    #     cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS,
    #     cfg.MODEL.NUM_CLASSES,
    #     len(test_loader),
    #     cfg.DATA.MULTI_LABEL,
    #     cfg.DATA.ENSEMBLE_METHOD,
    # )

    # # Set up writer for logging to Tensorboard format.
    # if cfg.TENSORBOARD.ENABLE and du.is_master_proc(
    #     cfg.NUM_GPUS * cfg.NUM_SHARDS
    # ):
    #     writer = tb.TensorboardWriter(cfg)
    # else:
    #     writer = None


    # custom_test(test_loader, model)
    custom_retrieval(model, train_loader, test_loader)
    print(resolution, num_frames, cfg.TEST.CHECKPOINT_FILE_PATH, dataset)
    # # Perform multi-view test on the entire dataset.
    # test_meter = perform_test(test_loader, model, test_meter, cfg, writer)
    # if writer is not None:
    #     writer.close()