import sys
import os
from gta_policy_eval_utils import run_eval
from typing import Any, Dict, List

import re
def format_reward(response: str) -> float:
    pattern = re.compile(r"<think>.*</think>", re.DOTALL)
    format_match = re.search(pattern, response)
    return 1.0 if format_match else 0.0

def compute_score(reward_inputs: List[Dict[str, Any]]) -> List[Dict[str, float]]:
    if not isinstance(reward_inputs, list):
        raise ValueError("Please use `reward_type=batch` for math reward function.")

    scores = []
    for reward_input in reward_inputs:
        response_text = reward_input["response"]
        ground_truth_text = reward_input["ground_truth"]
        eval_result = run_eval(response_text, ground_truth_text)
        
        format_score = format_reward(response_text)
        overall_score = 0.1 * format_score + 0.9 * eval_result['overall_score']
        scores.append(
            {
                "overall": overall_score,
                "overall_tool_calling_score": eval_result['overall_score'],
                "tool_matching_acc": eval_result['tool_name_match'],
                "argument_score": eval_result['arguments_eval']['overall_score'],
                "format": format_score,
            }
        )

    return scores
