"""Utility functions for parsing LLM API responses."""

import json
from typing import Any, Union


def extract_json_from_response(response: str) -> Any:
    """Extract JSON content from LLM response, handling various formats.
    
    Args:
        response: Raw response from LLM API
        
    Returns:
        Parsed JSON object
        
    Raises:
        json.JSONDecodeError: If the response cannot be parsed as valid JSON
    """
    import re
    
    # Clean the response - remove markdown code blocks if present
    cleaned_response = response.strip()
    
    # Handle markdown code blocks with language specifiers (e.g., ```json)
    if cleaned_response.startswith('```'):
        # Remove markdown code blocks
        lines = cleaned_response.split('\n')
        cleaned_lines = []
        in_code_block = False
        for line in lines:
            if line.startswith('```'):
                in_code_block = not in_code_block
                continue
            if in_code_block:
                cleaned_lines.append(line)
        cleaned_response = '\n'.join(cleaned_lines)
    
    # Try to find JSON object/array in the response using a more robust approach
    # Look for the first complete JSON object or array
    json_start_chars = ['{', '[']
    
    for start_char in json_start_chars:
        start_pos = cleaned_response.find(start_char)
        if start_pos == -1:
            continue
            
        # Find the matching closing character
        end_char = '}' if start_char == '{' else ']'
        bracket_count = 0
        in_string = False
        escape_next = False
        
        for i, char in enumerate(cleaned_response[start_pos:], start_pos):
            if escape_next:
                escape_next = False
                continue
                
            if char == '\\':
                escape_next = True
                continue
                
            if char == '"' and not escape_next:
                in_string = not in_string
                continue
                
            if not in_string:
                if char == start_char:
                    bracket_count += 1
                elif char == end_char:
                    bracket_count -= 1
                    if bracket_count == 0:
                        # Found complete JSON structure
                        json_str = cleaned_response[start_pos:i+1]
                        try:
                            return json.loads(json_str)
                        except json.JSONDecodeError:
                            continue
    
    # If no complete JSON structure found, try parsing the entire cleaned response
    try:
        return json.loads(cleaned_response)
    except json.JSONDecodeError:
        # Last resort: try to extract JSON from quoted strings
        # Look for content between quotes that might be JSON
        quoted_content = re.findall(r'"([^"]*)"', cleaned_response)
        for content in quoted_content:
            try:
                return json.loads(content)
            except json.JSONDecodeError:
                continue
        
        # If all else fails, try to handle common Claude response patterns
        # Sometimes Claude returns responses with extra text before/after JSON
        if "```json" in cleaned_response.lower():
            # Try to extract content between ```json and ```
            start_marker = cleaned_response.lower().find("```json")
            if start_marker != -1:
                start_marker += 7  # Length of "```json"
                end_marker = cleaned_response.find("```", start_marker)
                if end_marker != -1:
                    json_content = cleaned_response[start_marker:end_marker].strip()
                    try:
                        return json.loads(json_content)
                    except json.JSONDecodeError:
                        pass
        
        # Last resort: try to find any JSON-like structure in the response
        # Look for patterns that might be JSON even if not perfectly formatted
        json_patterns = [
            r'\{[^{}]*"[^"]*"[^{}]*:[^{}]*\}',  # Simple object pattern
            r'\[[^\[\]]*"[^"]*"[^\[\]]*\]',     # Simple array pattern
        ]
        
        for pattern in json_patterns:
            matches = re.findall(pattern, cleaned_response)
            for match in matches:
                try:
                    return json.loads(match)
                except json.JSONDecodeError:
                    continue
        
        # Additional fallback: try to handle incomplete JSON arrays
        # Sometimes responses are cut off or have syntax issues
        if cleaned_response.strip().startswith('['):
            # Try to fix common issues with incomplete arrays
            fixed_response = cleaned_response.strip()
            
            # If it doesn't end with ], try to add it
            if not fixed_response.endswith(']'):
                # Find the last complete string or object
                last_complete_pos = -1
                for i in range(len(fixed_response) - 1, -1, -1):
                    if fixed_response[i] in ['"', '}']:
                        # Look for the start of this string/object
                        if fixed_response[i] == '"':
                            # Find the start of this string
                            for j in range(i - 1, -1, -1):
                                if fixed_response[j] == '"' and (j == 0 or fixed_response[j-1] != '\\'):
                                    last_complete_pos = i + 1
                                    break
                        elif fixed_response[i] == '}':
                            # Find the start of this object
                            brace_count = 1
                            for j in range(i - 1, -1, -1):
                                if fixed_response[j] == '}':
                                    brace_count += 1
                                elif fixed_response[j] == '{':
                                    brace_count -= 1
                                    if brace_count == 0:
                                        last_complete_pos = i + 1
                                        break
                        break
                
                if last_complete_pos > 0:
                    fixed_response = fixed_response[:last_complete_pos] + ']'
            
            try:
                return json.loads(fixed_response)
            except json.JSONDecodeError:
                pass
        
        # Handle very long responses that might be cut off (like in the log)
        # Look for patterns that suggest the response was truncated
        if len(cleaned_response) > 10000:  # Very long response
            # Try to find the last complete JSON element
            # Look for patterns like: "text", "text", "incomplete...
            import re
            
            # First, try to find all complete quoted strings that look like solutions
            # Look for strings that are substantial and contain solution-like content
            quoted_strings = re.findall(r'"([^"]*)"', cleaned_response)
            if quoted_strings:
                # Filter for strings that look like actual solutions (not just keywords)
                valid_strings = []
                for s in quoted_strings:
                    # Must be substantial length and contain solution-like content
                    if (len(s) > 50 and len(s) < 1000 and 
                        any(keyword in s.lower() for keyword in 
                            ['bridge', 'system', 'protocol', 'dynamic', 'batch', 'allocation', 
                             'scheduling', 'traffic', 'vehicle', 'direction', 'switch', 'queue'])):
                        valid_strings.append(s)
                
                if valid_strings:
                    # Remove duplicates while preserving order
                    unique_strings = list(dict.fromkeys(valid_strings))
                    return unique_strings[:50]  # Limit to 50 solutions
            
            # If that didn't work, try a more aggressive extraction approach
            # Look for patterns that suggest individual solutions in the response
            solution_patterns = [
                r'"([^"]*bridge[^"]*)"',  # Solutions mentioning bridge
                r'"([^"]*system[^"]*)"',  # Solutions mentioning system
                r'"([^"]*protocol[^"]*)"',  # Solutions mentioning protocol
                r'"([^"]*dynamic[^"]*)"',  # Solutions mentioning dynamic
                r'"([^"]*batch[^"]*)"',  # Solutions mentioning batch
                r'"([^"]*allocation[^"]*)"',  # Solutions mentioning allocation
                r'"([^"]*scheduling[^"]*)"',  # Solutions mentioning scheduling
                r'"([^"]*traffic[^"]*)"',  # Solutions mentioning traffic
                r'"([^"]*vehicle[^"]*)"',  # Solutions mentioning vehicle
            ]
            
            extracted_solutions = []
            for pattern in solution_patterns:
                matches = re.findall(pattern, cleaned_response, re.IGNORECASE)
                extracted_solutions.extend(matches)
            
            # Remove duplicates and filter by length
            unique_solutions = list(dict.fromkeys(extracted_solutions))
            valid_solutions = [s for s in unique_solutions if len(s) > 20 and len(s) < 500]
            
            if valid_solutions:
                return valid_solutions[:50]  # Limit to 50 solutions
        
        # If all else fails, raise the original error
        raise json.JSONDecodeError(f"Could not extract valid JSON from response: {response[:200]}...", response, 0)


def extract_numeric_from_response(response: str) -> Union[int, float]:
    """Extract numeric value from LLM response.
    
    Args:
        response: Raw response from LLM API
        
    Returns:
        Extracted numeric value
        
    Raises:
        ValueError: If no numeric value can be extracted
    """
    cleaned_response = response.strip()
    
    # Try to extract numeric value
    try:
        return float(cleaned_response)
    except ValueError:
        # Look for numbers in the text
        import re
        numbers = re.findall(r'-?\d+\.?\d*', cleaned_response)
        if numbers:
            return float(numbers[0])
        raise ValueError(f"No numeric value found in response: {response}")


def extract_score_and_reasoning_from_response(response: str) -> tuple[float, str]:
    """Extract score and reasoning from LLM JSON response.
    
    Args:
        response: Raw response from LLM API containing JSON with 'score' and 'reasoning' fields
        
    Returns:
        Tuple of (score, reasoning)
        
    Raises:
        ValueError: If the response cannot be parsed or doesn't contain required fields
    """
    try:
        parsed_json = extract_json_from_response(response)
        
        if not isinstance(parsed_json, dict):
            raise ValueError("Response is not a JSON object")
        
        score = parsed_json.get("score")
        reasoning = parsed_json.get("reasoning")
        
        if score is None or reasoning is None:
            raise ValueError("Response missing 'score' or 'reasoning' field")
        
        # Convert score to float
        try:
            score_float = float(score)
        except (ValueError, TypeError):
            raise ValueError(f"Score '{score}' is not a valid number")
        
        # Ensure reasoning is a string
        if not isinstance(reasoning, str):
            reasoning = str(reasoning)
        
        return score_float, reasoning.strip()
        
    except json.JSONDecodeError as e:
        raise ValueError(f"Failed to parse JSON response: {e}")
    except Exception as e:
        raise ValueError(f"Failed to extract score and reasoning: {e}")
