from collections import defaultdict

from tqdm import tqdm

from verl.utils.reward_score.ttrl.auto_extract import auto_extract
from verl.utils.reward_score.ttrl.qwen.qwen_eval import qwen_reward_fn, qwen_reward_fn_gpqa, simplerl_reward_fn


def compute_score(data_source, model_response, gt_answer):

    task2verify = {
        "math": qwen_reward_fn,
        "simplerl_math": simplerl_reward_fn,
        "gpqa": qwen_reward_fn_gpqa,
    }
    assert data_source in task2verify, f"{data_source} not in {list(task2verify.keys())}"
    verify_fn = task2verify[data_source]

    reward = verify_fn(model_response, gt_answer)
        
    return {
            "score": reward,
            "acc": reward,
            "extracted_gt": gt_answer,
            "pred": auto_extract(data_source, model_response)
        }