import os

from datasets import DatasetDict, load_dataset, load_from_disk


class BaseDataset:
    dataset_type = None
    path = None  # the hugging face path
    name = None
    subsets = []
    test_set = None
    eval_method = None
    metric = None
    keys = []
    raw_path = None

    def __init__(self, *args, **kwargs):
        self.dataset = None

    def download(self):

        data_subsets = {}

        for s in self.subsets:
            output_path = self.raw_path + "/" + s
            if not os.path.exists(output_path):
                data_subsets[s] = load_dataset(
                    self.path, s, keep_in_memory=True, trust_remote_code=True
                )["train"]

                data_subsets[s].save_to_disk(output_path)
            else:
                print("path exists" + s)
                data_subsets[s] = load_from_disk(output_path)

        self.dataset = DatasetDict(data_subsets)

    def download(self):
        raise NotImplementedError(
            f"download not implemented for {self.__class__.__name__}"
        )

    def load_dataset_for_train(self):
        raise NotImplementedError(
            f"load_dataset_for_train not implemented for {self.__class__.__name__}"
        )

    def load_dataset_for_eval(self):
        raise NotImplementedError(
            f"load_dataset_for_eval not implemented for {self.__class__.__name__}"
        )

    @staticmethod
    def format_prompt(prompt):
        raise NotImplementedError(f"format_prompt not implemented")
