
import jsonlines
import pandas as pd
from datasets import DatasetDict, Dataset


def return_res(file):
    data_res = list(jsonlines.open(file))
    for item in data_res:
        item.pop('metadata')
    return data_res


non_cls = return_res('Task_1/openr1_math_non_cls_training.jsonl')
cls = return_res('Task_1/openr1_math_cls_training.jsonl')
training_data = non_cls + cls


new_data = DatasetDict({
    'train': Dataset.from_pandas(pd.DataFrame(data=training_data)),
})

new_data.push_to_hub("anonyneurips25/CRITIC-math-sft")
