import json

def extract_hash_answer(text: str) -> str | None:
    if "####" not in text:
        return None
    return text.split("####")[1].strip().replace(",", "").replace("$", "")

dataset_path = "datasets/gsm_symbolic_p2_test.jsonl"
output_path = "datasets/gsm_symbolic_p2_test.jsonl"

with open(dataset_path, "r") as f:
    data = [json.loads(line) for line in f]

for item in data:
    item['prompt'] = "Solve the following math problem step by step:\n" + item['question']
    item['final_answer'] = extract_hash_answer(item['answer'])


with open(output_path, "w") as f:
    for item in data:
        f.write(json.dumps(item) + "\n")