import os
import sys
pwd_path = os.path.dirname(__file__)
print(f"pwd_path: {pwd_path}")
# import jsonlines
import json

from datasets import load_dataset

def get_dataset():
    def process_example_func(example):
        question=example["problem"]
        answer=example["answer"]
        answer=str(answer).split("####")[-1].strip()
        
        return {
            "problem" : question,
            "solution" : answer,
            }
        
    dataset = load_dataset("HuggingFaceH4/MATH-500", split = "test")
    print(dataset.column_names)
    dataset = dataset.map(process_example_func, num_proc=64, batched = False, remove_columns=dataset.column_names)
    return dataset


dataset=get_dataset()
all_data=[x for x in dataset]
fp=os.path.join(pwd_path, "../dataset/math.jsonl")
# jsonlines.open(fp, mode="w").write_all(all_data)
with open(fp, mode="w", encoding="utf-8") as f:
    for item in all_data:
        f.write(json.dumps(item, ensure_ascii=False) + "\n")
        