import json
import random
import os


random.seed(0)


for dataset in ["boolq", "piqa", "siqa", "hellaswag", "winogrande", "arce", "arcc", "obqa"]:
    for split in ['train', 'test']: 
        file_path = f'data/{dataset}_{split}_0-shot.json'
        data = json.load(open(file_path))
        if split == 'train':
            data = random.sample(data, 1000)
        else:
            data = random.sample(data, 300)

        with open(f'data/cl/{dataset}_{split}.json', 'w') as f:
            json.dump(data, f, ensure_ascii=False, indent=2)

        if not os.path.exists(f'data/cl/dataset_info.json'):
            dataset_info = {}
        else:
            dataset_info = json.load(open(f'data/cl/dataset_info.json'))

        dataset_info[f'{dataset}_{split}'] = {
            'file_name': f'{dataset}_{split}.json',
            "columns": {
                'prompt': 'instruction',
                'query': 'input',
                'response': 'output',
            },
        }

        json.dump(dataset_info, open(f'data/cl/dataset_info.json', 'w'), ensure_ascii=False, indent=2)

