from datasets import load_dataset
import os

splits = {
    '330k_train': 'round0/330k/train.jsonl.xz',
    '330k_test': 'round0/330k/test.jsonl.xz',
    '30k_train': 'round0/30k/train.jsonl.gz',
    '30k_test': 'round0/30k/test.jsonl.gz'
}

dataset = load_dataset("PKU-Alignment/BeaverTails", data_files=splits)

output_dir = "data/BeaverTails/BeaverTails_output"
os.makedirs(output_dir, exist_ok=True)

for split_name, file_path in splits.items():
    data = dataset[split_name]
    
    df = data.to_pandas()
    
    file_name = split_name + ".csv"
    df.to_csv(os.path.join(output_dir, file_name), index=False)

    print(f"Saved {split_name} as {file_name}")
