import datasets
import PIL.Image
from functools import partial

from src.path import test_dataset_dir, train_dataset_dir
from src.config import visonlyqa_real_splits, visonlyqa_synthetic_splits
from src.utils import get_hf_dataset_name


def add_image_to_example(example, split="test"):
    dataset_dir = test_dataset_dir if split == "test" else train_dataset_dir
    
    image_path = example["image_path"]
    image = PIL.Image.open(dataset_dir / image_path)
    new_example = {"decoded_image": image}
    new_example.update(example)
    return new_example


if __name__ == "__main__":
    for real_synthetic in ["synthetic", "real"]:
        repository_name = get_hf_dataset_name(f"eval_{real_synthetic}")
        
        all_data = []
        splits_list = {"synthetic": visonlyqa_synthetic_splits, "real": visonlyqa_real_splits}[real_synthetic]
        for split_name in splits_list:
            print(split_name)
            
            dataset = datasets.load_dataset(f"./dataset/test/{real_synthetic}", data_files={split_name: f"{split_name}.jsonl"}, split=split_name)
            all_data.append(dataset)
            
            # add images to dataset
            dataset = dataset.map(partial(add_image_to_example, split="test"))
            dataset.push_to_hub(repository_name, split=split_name, private=True)
