"""
SceneExtractor system output generator.
"""

import json
import sys
import re
from pathlib import Path
from typing import Any, Dict, List, Optional

# Add project root to path for imports
project_root = Path(__file__).parent.parent.parent.parent.parent
sys.path.append(str(project_root))

from src.llm.agent.scene_extraction import SceneExtractionResult
from src.llm.agent.agent_prompt import scene_extraction_prompt
from src.llm.llms import get_llm
from .base_generator import BaseSystemGenerator


class SceneExtractorGenerator(BaseSystemGenerator):
    """Generator for SceneExtractor system outputs."""
    
    def __init__(self, config):
        """Initialize the SceneExtractor generator."""
        super().__init__(config)
        
        # Initialize SceneExtractor with the specified model
        # Use the configured model (either from --model or from text.txt)
        model_id = config.driveguard_model_id or config.model_id
        llm = get_llm(model_id)
        self.model_id = model_id
        self.llm = llm
        
        # Determine if structured output is supported and create extractor
        self.uses_structured_output = self._test_structured_output()
        self.scene_extractor = self._create_scene_extractor()
    
    def _test_structured_output(self) -> bool:
        """Test if the model supports structured output."""
        try:
            # Check if this is a gateway model that likely doesn't support json_schema
            if self.model_id.startswith("gateway:"):
                print(f"Model {self.model_id} detected as gateway model, using text parsing")
                return False
            
            # Also check for other models known to have issues with structured output
            problematic_patterns = ["together.ai", "huggingface", "replicate"]
            if any(pattern in self.model_id.lower() for pattern in problematic_patterns):
                print(f"Model {self.model_id} detected as potentially incompatible with structured output, using text parsing")
                return False
            
            # For other models, try to create a structured output chain
            test_extractor = self.llm.with_structured_output(SceneExtractionResult)
            return True
        except Exception as e:
            print(f"Model {self.model_id} failed structured output test: {e}, using text parsing")
            return False
    
    def _create_scene_extractor(self):
        """Create scene extractor with structured output if supported."""
        if self.uses_structured_output:
            try:
                return scene_extraction_prompt | self.llm.with_structured_output(SceneExtractionResult).with_retry()
            except Exception:
                # If it fails during creation, fall back to regular chain
                self.uses_structured_output = False
                return scene_extraction_prompt | self.llm.with_retry()
        else:
            return scene_extraction_prompt | self.llm.with_retry()
        
    def get_component_name(self) -> str:
        """Return the component name."""
        return "scene"
    
    def get_ground_truth_list(self, video_filter: Optional[List[str]] = None) -> List[Path]:
        """Get list of ground truth files to process instead of video files."""
        ground_truth_dir = self.config.ground_truth_dir
        all_ground_truth = list(ground_truth_dir.glob("*.json"))
        all_ground_truth.sort()  # Ensure consistent ordering
        
        if video_filter:
            # Filter by video IDs (e.g., ["0000", "0001"])
            filtered_files = []
            for gt_file in all_ground_truth:
                video_id = gt_file.stem.split('_')[0]  # Extract ID from filename
                if video_id in video_filter:
                    filtered_files.append(gt_file)
            return filtered_files
        
        return all_ground_truth
    
    def get_ground_truth_path(self, video_path: Path) -> Path:
        """Get ground truth annotation file path for a video."""
        video_id = video_path.stem
        ground_truth_dir = self.config.project_root / "data" / "evaluation" / "ground_truth"
        ground_truth_path = ground_truth_dir / f"{video_id}.json"
        return ground_truth_path
    
    def _parse_scenes_from_text(self, text_response: str) -> list[str]:
        """Parse scenes from text response when structured output is not available."""
        scenes = []
        
        # Try to find JSON in the response first
        json_match = re.search(r'\{[^}]*"scenes"[^}]*\}', text_response, re.DOTALL)
        if json_match:
            try:
                parsed = json.loads(json_match.group())
                if 'scenes' in parsed and isinstance(parsed['scenes'], list):
                    return parsed['scenes']
            except json.JSONDecodeError:
                pass
        
        # Fallback: Look for numbered lists, bullet points, or line-separated scenes
        # Look for patterns like "1. scene", "- scene", or just lines with scenes
        lines = text_response.split('\n')
        
        for line in lines:
            line = line.strip()
            if not line:
                continue
            
            # Remove numbering and bullet points
            scene_text = re.sub(r'^[\d\-\*\.]+\s*', '', line)
            scene_text = scene_text.strip('"\'')
            
            # Skip very short lines or lines that look like headers
            if len(scene_text) > 20 and not any(keyword in scene_text.lower() 
                                              for keyword in ['scene', 'extract', 'annotation']):
                scenes.append(scene_text)
        
        # If we still don't have scenes, try to split by periods or other delimiters
        if not scenes:
            # Look for the main content and try to split it
            main_content = re.sub(r'(Here are the|The scenes are|Scenes:|Scene \d+:)', '', text_response, flags=re.IGNORECASE)
            potential_scenes = [s.strip() for s in re.split(r'[.]\s*(?=[A-Z])', main_content) if len(s.strip()) > 20]
            scenes.extend(potential_scenes[:5])  # Max 5 scenes
        
        return scenes[:5]  # Limit to 5 scenes maximum
    
    def generate_output_from_ground_truth(self, ground_truth_path: Path) -> list[str]:
        """
        Generate scene extraction output from ground truth annotation file.
        
        Args:
            ground_truth_path: Path to the ground truth JSON file
            
        Returns:
            List of extracted scenes
        """
        try:
            with open(ground_truth_path, 'r') as f:
                ground_truth_data = json.load(f)
            
            # Extract annotation text
            if 'ground_truth' in ground_truth_data and 'annotation' in ground_truth_data['ground_truth']:
                annotation_text = ground_truth_data['ground_truth']['annotation']
            else:
                raise ValueError(f"Invalid ground truth format in {ground_truth_path}")
            
            # Use the initialized scene_extractor to extract scenes with runtime fallback
            try:
                result = self.scene_extractor.invoke({'annotation': annotation_text})
                
                if self.uses_structured_output:
                    # Structured output should return a dict with 'scenes' key
                    scenes = result['scenes'] if isinstance(result, dict) else result
                else:
                    # Unstructured output needs parsing
                    if hasattr(result, 'content'):
                        # LangChain message object - extract content
                        scenes = self._parse_scenes_from_text(result.content)
                    elif isinstance(result, str):
                        scenes = self._parse_scenes_from_text(result)
                    elif isinstance(result, dict) and 'scenes' in result:
                        scenes = result['scenes']
                    else:
                        # Try to parse the result as text
                        text_content = str(result)
                        if hasattr(result, 'content'):
                            text_content = result.content
                        scenes = self._parse_scenes_from_text(text_content)
                        
            except Exception as structured_error:
                # If structured output fails at runtime, fall back to text parsing
                if self.uses_structured_output and ("tool" in str(structured_error).lower() or "json_schema" in str(structured_error).lower()):
                    print(f"Warning: Structured output failed for model {self.model_id}, falling back to text parsing")
                    print(f"Error details: {structured_error}")
                    
                    # Create a text-only extractor and retry
                    fallback_extractor = scene_extraction_prompt | self.llm.with_retry()
                    result = fallback_extractor.invoke({'annotation': annotation_text})
                    
                    # Parse as text
                    if hasattr(result, 'content'):
                        scenes = self._parse_scenes_from_text(result.content)
                    elif isinstance(result, str):
                        scenes = self._parse_scenes_from_text(result)
                    else:
                        text_content = str(result)
                        scenes = self._parse_scenes_from_text(text_content)
                else:
                    # Re-raise if it's not a structured output issue
                    raise structured_error
            
            return scenes
            
        except Exception as e:
            raise Exception(f"Failed to generate scene extraction: {e}")
    
    def generate_output(self, video_path: Path) -> list[str]:
        """
        Generate scene extraction output from ground truth annotation.
        
        Args:
            video_path: Path to the video file (used to find corresponding ground truth)
            
        Returns:
            List of extracted scenes
        """
        # Find corresponding ground truth file
        ground_truth_path = self.get_ground_truth_path(video_path)
        return self.generate_output_from_ground_truth(ground_truth_path)
    
    def create_output_metadata(
        self, 
        video_path: Path, 
        content: Any,
        generation_time: float,
        additional_metadata: Dict = None
    ) -> Dict[str, Any]:
        """Create output metadata with SceneExtractor-specific info."""
        
        # Count scenes
        scene_count = len(content) if isinstance(content, list) else 0
        
        # Prepare additional metadata
        scene_metadata = {
            "scene_count": scene_count,
            "model_type": "text",
            "prompt_type": "scene_extraction",
            "source": "ground_truth_annotation"
        }
        
        if additional_metadata:
            scene_metadata.update(additional_metadata)
        
        return super().create_output_metadata(
            video_path, 
            content, 
            generation_time, 
            scene_metadata
        )
    
    def process_videos(
        self, 
        video_filter: Optional[List[str]] = None,
        progress_callback: Optional[callable] = None
    ) -> Dict[str, Any]:
        """
        Process multiple ground truth files for scene extraction.
        Override the base method to work with ground truth files instead of video files.
        """
        # Get list of ground truth files to process
        ground_truth_files = self.get_ground_truth_list(video_filter)
        
        # For each ground truth file, we need to create a corresponding "video path"
        # for the output file naming system to work correctly
        videos_to_process = []
        for gt_file in ground_truth_files:
            # Create a fake video path based on the ground truth filename
            video_name = gt_file.stem + ".mp4"  # Convert 0000_something.json -> 0000_something.mp4
            fake_video_path = self.config.dashcam_videos_dir / video_name
            
            # Check if we should process this file
            if self.config.should_process_video(fake_video_path):
                videos_to_process.append((fake_video_path, gt_file))
        
        self.stats["total_videos"] = len(ground_truth_files)
        self.stats["skipped"] = len(ground_truth_files) - len(videos_to_process)
        self.stats["start_time"] = self._get_current_time()
        
        print(f"Found {len(ground_truth_files)} total ground truth files")
        print(f"Processing {len(videos_to_process)} files")
        print(f"Skipping {self.stats['skipped']} existing files")
        print(f"Component: {self.get_component_name()}")
        print(f"Model: {self.config.model_id}")
        print()
        
        # Process each ground truth file
        for i, (video_path, gt_file) in enumerate(videos_to_process, 1):
            print(f"[{i}/{len(videos_to_process)}] Processing {gt_file.name}...")
            
            success = self.process_ground_truth_file(video_path, gt_file)
            
            if success:
                self.stats["processed"] += 1
            else:
                self.stats["failed"] += 1
            
            # Call progress callback if provided
            if progress_callback:
                progress_callback(i, len(videos_to_process), success)
        
        self.stats["end_time"] = self._get_current_time()
        
        # Print final statistics
        self.print_summary()
        
        return self.stats
    
    def process_ground_truth_file(self, video_path: Path, ground_truth_path: Path) -> bool:
        """
        Process a single ground truth file and save output.
        
        Args:
            video_path: Fake video path for output file naming
            ground_truth_path: Path to the ground truth file
            
        Returns:
            True if successful, False otherwise
        """
        import time
        
        try:
            start_time = time.time()
            
            # Generate the output from ground truth file
            content = self.generate_output_from_ground_truth(ground_truth_path)
            
            generation_time = time.time() - start_time
            
            # Create standardized output using the fake video path for file naming
            output_data = self.create_output_metadata(
                video_path, 
                content, 
                generation_time
            )
            
            # Save to file
            if self.save_output(video_path, output_data):
                print(f"✓ Completed {ground_truth_path.name} in {generation_time:.2f}s")
                return True
            else:
                print(f"✗ Failed to save output for {ground_truth_path.name}")
                return False
                
        except Exception as e:
            print(f"✗ Error processing {ground_truth_path.name}: {e}")
            return False
    
    def _get_current_time(self) -> str:
        """Get current time as ISO string."""
        from datetime import datetime
        return datetime.now().isoformat()