"""
Modern LinkerGeneratorAgent using the unified BaseAgent system
Generates detailed link descriptions from object captions
"""

import os
import re
import json
import ast
import logging
from typing import Dict, Any, List
from agents.base_agent import BaseAgent
from utils.output_parser import OutputFormatError


class LinkerOutput:
    """Utility class for parsing linker JSON output from LLM responses"""

    @classmethod
    def extract_from_response(cls, text: str) -> Dict[str, Any]:
        """
        Extract and parse JSON object from LLM output (hierarchical format only)

        Args:
            text: Raw LLM response text

        Returns:
            Parsed JSON object as dictionary containing hierarchical structure

        Raises:
            OutputFormatError: If JSON cannot be extracted or parsed
        """
        # Clean up markdown formatting
        text = text.strip()
        if text.startswith('```json'):
            text = text[7:]
        if text.startswith('```'):
            text = text[3:]
        if text.endswith('```'):
            text = text[:-3]

        # Find JSON object in the text (starts with { and ends with })
        match = re.search(r'\{.*\}', text, re.DOTALL)
        if not match:
            raise OutputFormatError("No JSON object found in LLM output. Expected hierarchical format starting with '{'")

        json_str = match.group(0)

        # Clean up comments and trailing commas
        json_str = re.sub(r'//.*', '', json_str)  # Remove comments
        json_str = re.sub(r',\s*([}\]])', r'\1', json_str)  # Remove trailing commas

        # Try to parse JSON
        try:
            result = json.loads(json_str)
            # Validate that it has the expected structure
            if not isinstance(result, dict):
                raise OutputFormatError("Expected JSON object, got array or other type")
            if 'hierarchy' not in result:
                raise OutputFormatError("Missing 'hierarchy' field in JSON output")
            if 'structure' not in result.get('hierarchy', {}):
                raise OutputFormatError("Missing 'structure' field in hierarchy")
            return result
        except json.JSONDecodeError as e:
            # Fallback to ast.literal_eval for Python-like syntax
            try:
                result = ast.literal_eval(json_str)
                if not isinstance(result, dict):
                    raise OutputFormatError("Invalid structure: expected dictionary")
                if 'hierarchy' not in result:
                    raise OutputFormatError("Invalid structure: missing 'hierarchy' field")
                if 'structure' not in result.get('hierarchy', {}):
                    raise OutputFormatError("Invalid structure: missing 'structure' field")
                return result
            except (ValueError, SyntaxError) as ast_error:
                raise OutputFormatError(
                    f"Failed to parse JSON: {e}\n"
                    f"Also failed with ast.literal_eval: {ast_error}\n"
                    f"Original content: {text[:500]}..."
                )


class LinkerGeneratorAgent(BaseAgent):
    """
    Agent for generating detailed link descriptions from object captions.
    
    Uses the unified BaseAgent system for consistent LLM interaction,
    error handling, and metrics collection.
    """
    
    def __init__(self, config_manager):
        """
        Initialize the LinkerGeneratorAgent
        
        Args:
            config_manager: Configuration manager instance
        """
        super().__init__(config_manager, 'linker_generator')
        self.logger = logging.getLogger(self.__class__.__name__)
        
    def _load_system_prompt(self) -> str:
        """Load system prompt for link generation"""
        # Import the system prompt from the existing prompt module
        from prompt.linker import system_prompt
        return system_prompt
    
    def _format_user_prompt(self, input_data: Dict[str, Any]) -> str:
        """
        Format user prompt for link generation

        Args:
            input_data: Dictionary containing 'caption' key

        Returns:
            Formatted user prompt
        """
        caption = input_data.get('caption', '')
        return (
            "Please analyze the following object description and generate a "
            f"hierarchical JSON structure as specified. Description: {caption}"
        )
    
    def parse_response(self, response: str) -> Dict[str, Any]:
        """
        Parse LLM response into structured hierarchical data

        Args:
            response: Raw LLM response

        Returns:
            Dictionary containing hierarchical structure

        Raises:
            OutputFormatError: If response format is invalid
        """
        try:
            result = LinkerOutput.extract_from_response(response)

            # Log structure statistics
            if 'hierarchy' in result and 'structure' in result['hierarchy']:
                structure = result['hierarchy']['structure']
                total_components = self._count_components(structure)
                self.logger.info(f"Generated hierarchy with {len(structure)} top-level links, {total_components} total components")

            return result
        except OutputFormatError as e:
            self.logger.error(f"Failed to parse linker response: {e}")
            raise

    def _count_components(self, structure: List[Dict], level: int = 0) -> int:
        """Recursively count all components in the hierarchy"""
        count = len(structure)
        for item in structure:
            if 'children' in item and item['children']:
                count += self._count_components(item['children'], level + 1)
        return count
    
    def _prepare_input_data(self, caption: str, **kwargs) -> Dict[str, Any]:
        """
        Prepare input data from method arguments
        
        Args:
            caption: Object description caption
            **kwargs: Additional arguments
            
        Returns:
            Dictionary of input data for the agent
        """
        return {'caption': caption}
    
    def save_output(self, result: Dict[str, Any], output_folder: str, metrics: Dict[str, Any] = None):
        """
        Save generated hierarchical structure to configs folder

        Args:
            result: Generated hierarchical data
            output_folder: Directory to save output
            metrics: Generation metrics (optional)
        """
        # Create configs folder
        configs_folder = os.path.join(output_folder, 'configs')
        os.makedirs(configs_folder, exist_ok=True)

        # Save the hierarchical JSON output to configs
        output_path = os.path.join(configs_folder, 'links_hierarchy.json')
        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump(result, f, indent=2, ensure_ascii=False)

        if 'hierarchy' in result and 'structure' in result['hierarchy']:
            total = self._count_components(result['hierarchy']['structure'])
            self.logger.info(f"Saved hierarchical structure with {total} components to {output_path}")
        else:
            self.logger.info(f"Saved hierarchical structure to {output_path}")
