import datasets
from functools import partial

from src.config import train_data_splits
from src.utils import get_hf_dataset_name
from src.upload_to_huggingface.upload_to_huggingface_test_set import add_image_to_example


if __name__ == "__main__":
    repository_name = get_hf_dataset_name("train")
    
    dataset = datasets.load_dataset("./dataset/train", data_files={"train_all_first_100": f"train_all_first_100.jsonl"}, split="train_all_first_100")
    
    # add images to dataset
    dataset = dataset.map(partial(add_image_to_example, split="train_all_first_100"))
    dataset.push_to_hub(repository_name, split="train_all_first_100", private=True)
    
    for split in train_data_splits:
        split_name = f"{split}_50"
        dataset = datasets.load_dataset("./dataset/train/splits", data_files={split_name: f"{split_name}.jsonl"}, split=split_name)
        
        # add images to dataset
        dataset = dataset.map(partial(add_image_to_example, split=split_name))
        dataset.push_to_hub(repository_name, split=split_name, private=True)
