"""
Answer Comparison Utilities for IMProofBench

This module provides shared functions for comparing model answers against
correct answers. Used by both the agentic (Inspect-based) and non-agentic
evaluators to ensure consistent evaluation logic.

Key Features:
- Case-insensitive comparison
- Boolean equivalence handling (yes/no/true/false/1/0)
- Whitespace and formatting normalization
- Mathematical notation tolerance (removes $, normalizes brackets)
"""


def normalize_answer(text: str) -> str:
    """
    Normalize an answer string for comparison.

    Applies the following normalizations:
    1. Convert to lowercase
    2. Strip whitespace
    3. Handle boolean equivalences (yes/no/true/false/1/0/y/n/t/f)
    4. Remove formatting characters (spaces, dollar signs)
    5. Normalize brackets: replace () with []

    Args:
        text: The answer text to normalize

    Returns:
        Normalized answer string

    Examples:
        >>> normalize_answer("Yes")
        'true'
        >>> normalize_answer("NO")
        'false'
        >>> normalize_answer("  123,456  ")
        '123,456'
        >>> normalize_answer("$42$")
        '42'
        >>> normalize_answer("(1, 2)")
        '[1,2]'
    """
    if not text:
        return ""

    text = str(text).strip().lower()

    # Handle boolean equivalences
    if text in ['true', 'yes', '1', 't', 'y']:
        return 'true'
    if text in ['false', 'no', '0', 'f', 'n']:
        return 'false'

    # Remove formatting and normalize brackets
    text = text.replace(' ', '').replace('$', '').replace('(', '[').replace(')', ']')

    return text


def check_answer_correctness(model_answer: str, correct_answer: str) -> bool:
    """
    Check if the model's answer matches the correct answer.

    Uses normalization to handle common variations in how answers
    are expressed (case, boolean variations, formatting).

    Args:
        model_answer: The model's submitted answer
        correct_answer: The correct answer from the database

    Returns:
        True if answers match after normalization, False otherwise

    Examples:
        >>> check_answer_correctness("Yes", "yes")
        True
        >>> check_answer_correctness("YES", "true")
        True
        >>> check_answer_correctness("(1, 2)", "[1,2]")
        True
        >>> check_answer_correctness("No", "yes")
        False
    """
    normalized_model = normalize_answer(model_answer)
    normalized_correct = normalize_answer(correct_answer)

    return normalized_model == normalized_correct
