import os
import json
import re
from math_verify import parse, verify

def extract_boxed_answer(text):
    match = re.search(r"\\boxed\{(.*?)\}", text)
    return match.group(1) if match else None



def check_math_answer():
    results_dir = "./results/vllm_infer"

    for filename in os.listdir(results_dir):
        if filename.startswith("math500") and filename.endswith(".jsonl"):
            filepath = os.path.join(results_dir, filename)
            correct = 0
            total = 0

            with open(filepath, "r") as f:
                for line in f:
                    data = json.loads(line)
                    id = data["problem_id"]
                    gold = parse(data["gold_answer"])
                    answer_str = extract_boxed_answer(data["model_response"])
                    total += 1
                    if answer_str is None:
                        continue
                    answer = parse(f"${answer_str}$")
                    if verify(gold, answer):
                        correct += 1
                        #print(f"{id}: {gold} {answer}")

            acc = correct / total if total > 0 else 0
            print(f"{filename}: {correct} / {total} = {acc:.2%}")

if __name__ == "__main__":
    check_math_answer()
