"""Code processing utilities."""
import re
import difflib
import logging

logger = logging.getLogger(__name__)


def parse_code_result(output_string: str, code_language_types: list[str]) -> str | None:
    """Extract code block from markdown code block.
    
    Args:
        output_string: String containing markdown code block
        code_language_types: List of language types to check for (e.g., ['python', 'cpp'])
        
    Returns:
        Extracted code block or None if not found
    """
    trimmed = output_string.strip()
    
    # Extracting the first occurrence of content between backticks
    code_match = re.search(r"```(.*?)```", trimmed, re.DOTALL)
    
    if code_match:
        # Strip leading and trailing whitespace from the extracted code
        code_block = code_match.group(1).strip()
        
        # depends on code_language_type: cpp, python, etc.
        # sometimes the block of code is ```cpp ... ``` instead of ``` ... ```
        # in this case strip the cpp out
        for code_type in code_language_types:
            if code_block.startswith(code_type):
                code_block = code_block[len(code_type):].strip()
        return code_block
    return None


def extract_first_code(output_string: str, code_language_types: list[str]) -> tuple[str | None, str | None]:
    """Extract first code block from model output and the content before it.
    
    Args:
        output_string: The input string containing the code block
        code_language_types: List of code language types to check for (e.g., ['python', 'cpp'])
        
    Returns:
        tuple: (content_before_code, code_block)
            - content_before_code: Content before the code block (None if no code block found)
            - code_block: The extracted code (None if no code block found)
    """
    trimmed = output_string.strip()
    lang_set = {t.lower() for t in code_language_types}
    pattern = r"""
        ```                            # Starting ```
        (?P<lang>[a-zA-Z0-9_+\-]+)?    # Optional language name (e.g., python / cpp)
        \s*                            # Whitespace after language name
        \n?                            # Optional newline
        (?P<code>.*?)                  # Code content (non-greedy)
        (?:```|$)                      # Until next ``` or string end
    """
    code_match = re.search(pattern, trimmed, re.DOTALL | re.VERBOSE)
    if not code_match:
        return None, None
    
    lang = code_match.group("lang")
    code_block = code_match.group("code").strip()
    if lang and lang_set and lang.lower() not in lang_set:
        return None, None
    content_before = trimmed[:code_match.start()].strip()
    
    return content_before, code_block


def compute_code_diff(code_before: str, code_after: str) -> str:
    """Generate a unified diff text, similar to git diff.
    
    Args:
        code_before: Code before changes
        code_after: Code after changes
        
    Returns:
        Unified diff string
    """
    before_lines = code_before.splitlines(keepends=True)
    after_lines = code_after.splitlines(keepends=True)
    
    diff_lines = difflib.unified_diff(
        before_lines,
        after_lines,
        fromfile="before",
        tofile="after",
        lineterm=""
    )
    return "".join(diff_lines)


def parser_error_api(info: str) -> list[str]:
    """Parse error messages to extract API names that caused errors.
    
    Args:
        info: Error message string
        
    Returns:
        List of API names found in error message
    """
    if info:
        error_apis = re.findall(r"error: no matching .*? \'(.+?)\'", info)
        return error_apis
    return []

