import os
from datasets import load_dataset, DatasetDict

def upload_to_hf(username, dataset_name):
    prefixes = [
        'whp_corpus_mmlu_', 'corpus_mmlu_', 'fwf_corpus_mmlu_', 
        # 'mmlu_'
    ]
    dataset_dict = {}

    for prefix in prefixes:
        for file in os.listdir('.'):
            if file.startswith(prefix) and file.endswith('.jsonl'):
                split = f"""{prefix}_{
                    file.replace(f'{prefix}', '')
                    .replace('.jsonl', '')
                    .replace(' ', '_')
                }"""
                dataset = load_dataset('json', data_files=file, split='train')
                dataset_dict[split] = dataset
    
    combined_dataset = DatasetDict(dataset_dict)
    combined_dataset.push_to_hub(f'{username}/{dataset_name}')

if __name__ == '__main__':
    username = input("Enter your Hugging Face username: ")
    dataset_name = input("Enter the dataset name: ")
    upload_to_hf(username, dataset_name)
