import json
import datasets
from tqdm.auto import tqdm

def data_aug(data, aug=False):
    if aug:
        if " " in data:
            for c in "·~*`":
                if c not in data:
                    return data.replace(" ", c)
                

    
    return data     


def parse(dataset_file):
    '''
    parsing dataset file
    '''
    return dataset_file

def load_dataset(task, ft=None):
    if ft is None:
        if task == "s":
            file_path = "data/ViTC/vitc-s.json"
        elif task == "l":
            file_path = "data/ViTC/vitc-l.json"
        elif task == "mnist":
            file_path = "data/ViTC/vitc-mnisttest.json"
        else:
            raise ValueError("task should be s, l or mnist")
    else:
    # test split for fine-tuning validation
        if ft == "s":
            file_path = "data/ViTC/vitc-s-test.json"
        elif ft == "s-sample":
            file_path = "data/ViTC/vitc-s-test_unseensamples.json"
        elif ft == "s-font":
            file_path = "data/ViTC/vitc-s-test_unseenfonts.json"
        else:
            raise ValueError("task should be s for fine-tuning validation")
    
    
    with open(file_path, "r") as f:
        dataset = json.load(f)

    number_dataset = len(dataset)

    
    return parse(dataset)