import json
import os.path
import numpy as np
from torch.utils.data import Dataset
from typing import List
from collections import OrderedDict
from abc import ABC, abstractmethod

from DataHandling.DataUtils import collect_models_results
from Utils import logger
from Utils.utils import get_model_id_from_file_name


class FileBasedDataset(Dataset, ABC):
    def __init__(self, files_paths: List[str], results_files_locations: List[str], result_metric_name: str):
        """
        The base for my pytorch datasets. This class should make it easier for getting the labels of a dataset from the
        results files. It also does some basic work needed for implementing a pytorch dataset
        :param files_paths: List of full paths for input data files - This class doesn't open them !!
        :param results_files_locations: list of folder that contain all the relevant results files
        :param result_metric_name: name of metric to use from the result file
        """
        super(FileBasedDataset, self).__init__()
        self._files_paths = files_paths.copy()
        self._dataset_size = len(files_paths)
        self._result_metric_name = result_metric_name
        self._models_ids = [get_model_id_from_file_name(curr_file) for curr_file in files_paths]

        if len(results_files_locations) == 0:
            self._results_ids = None
            self._y_true = None
            logger().warning('FileBasedDataset', 'Not using results file')
        else:
            _, ordered_results = collect_models_results(self._files_paths, results_files_locations, result_metric_name)
            if len(self._files_paths) != len(ordered_results):
                missing = [get_model_id_from_file_name(x)
                           for x in self._files_paths if x not in ordered_results.keys()]
                logger().error('FileBasedDataset', ValueError,
                               f"Couldn't find results for all requested files\n\n"
                               f"Missing models results: {missing},\t{results_files_locations:},\n"
                               f"results found = {list(ordered_results.keys())}\nrequested files: {files_paths}")

            self._y_true = np.array([ordered_results[curr_id] for curr_id in self._models_ids])

    def __len__(self):
        return self._dataset_size

    def get_data_by_model_id(self, model_id: str):
        pass

    @abstractmethod
    def __getitem__(self, index):
        pass

    @property
    def models_results(self):
        return self._y_true.copy()

    @property
    def models_ids(self):
        return self._models_ids.copy()

    @property
    def files(self):
        return self._files_paths

    @staticmethod
    @abstractmethod
    def create_dataset(*args, **kwargs):
        """
        Creates a FileBasedDataset
        :param args: placeholder
        :param kwargs: placeholder
        :return:
        """
        pass
