import os
os.environ["HF_ENDPOINT"]="https://hf-mirror.com"
os.environ["http_proxy"]=""
os.environ["https_proxy"]=""
import sys
pwd_path = os.path.dirname(__file__)
print(f"pwd_path: {pwd_path}")
import jsonlines
import json

from datasets import load_dataset

def get_dataset():
    def process_example_func(example):
        question=example["problem"]
        answer=example["answer"]
        # answer=str(answer).split("####")[-1].strip()
        
        return {
            "problem" : question,
            "solution" : answer,
            }
        
    dataset = load_dataset("zwhe99/simplerl-minerva-math", split = "test")
    print(dataset.column_names)
    dataset = dataset.map(process_example_func, num_proc=64, batched = False, remove_columns=dataset.column_names)
    return dataset
# dataset = load_dataset("zwhe99/simplerl-OlympiadBench", split='test')

dataset=get_dataset()
all_data=[x for x in dataset]
fp=os.path.join(pwd_path, "../dataset/minerva.jsonl")
jsonlines.open(fp, mode="w").write_all(all_data)
# with open(fp, mode="w", encoding="utf-8") as f:
#     for item in all_data:
#         f.write(json.dumps(item, ensure_ascii=False) + "\n")
        