from utils import load_single_dataset, save_dataset
import datasets

eurus_dataset: datasets.Dataset = load_single_dataset("~/datasets/PRIME-RL-EurusPRM-Stage1-Data", dataset_split="train")
eurus_dataset = eurus_dataset.add_column("prompt", [r[0]['content'] for r in eurus_dataset["response"]])
eurus_dataset = eurus_dataset.add_column("output", [r[1]['content'] for r in eurus_dataset["response"]])
eurus_dataset = eurus_dataset.remove_columns("response")
eurus_dataset = eurus_dataset.shuffle()

valid_ds = eurus_dataset.select(range(13000))
train_ds = eurus_dataset.select(range(13000, len(eurus_dataset)))

train_ds.to_parquet("~/datasets/PRIME-RL-EurusPRM-Stage1-Data-forverl/train.parquet")
valid_ds.to_parquet("~/datasets/PRIME-RL-EurusPRM-Stage1-Data-forverl/valid.parquet")
