import os
import sys
pwd_path = os.path.dirname(__file__)
print(f"pwd_path: {pwd_path}")
import jsonlines


from datasets import load_dataset
def get_dataset():
    def process_example_func(example):
        question=example["question"]
        answer=example["answer"]
        answer=str(answer).split("####")[-1].strip()
        
        return {
            "problem" : question,
            "solution" : answer,
            }
        
    dataset = load_dataset("openai/gsm8k", "main", split = "test")
    print(dataset.column_names)
    dataset = dataset.map(process_example_func, num_proc=64, batched = False, remove_columns=dataset.column_names)
    return dataset


dataset=get_dataset()
all_data=[x for x in dataset]
fp=os.path.join(pwd_path, "../dataset/gsm8k.jsonl")
jsonlines.open(fp, mode="w").write_all(all_data)