#!/usr/bin/env python3
"""Script to extract scenes from ground truth annotations using SceneExtractor."""

import json
import sys
from pathlib import Path
from typing import Dict, List, Any

# Add project root to path
root = Path(__file__).parent.parent
sys.path.append(str(root))

from src.llm.agent.scene_extraction import SceneExtractor
from src.utils.log import logger


def extract_scenes_from_file(ground_truth_file: Path, scene_extractor: SceneExtractor) -> bool:
    """Extract scenes from annotation in a ground truth file.
    
    Args:
        ground_truth_file (Path): Path to the ground truth JSON file.
        scene_extractor (SceneExtractor): Initialized scene extractor.
        
    Returns:
        bool: True if file was updated, False if skipped or error occurred.
    """
    try:
        # Read existing file
        with open(ground_truth_file, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        # Check if annotation exists and is not placeholder
        annotation = data.get('ground_truth', {}).get('annotation', '')
        
        if not annotation or annotation == "MANUAL_ANNOTATION_REQUIRED":
            logger.warning(f"No annotation found or still placeholder in {ground_truth_file}")
            return False
        
        # Check if scenes are already populated with real content (not template examples)
        existing_scenes = data.get('ground_truth', {}).get('scenes', [])
        
        # Check if scenes contain template examples (indicating they haven't been manually edited)
        has_template_examples = any("Example:" in scene for scene in existing_scenes)
        
        # Skip if scenes are already populated with real content (no template examples)
        if existing_scenes and not has_template_examples:
            # Additional check: make sure it's not just empty or placeholder content
            has_real_scenes = len(existing_scenes) > 0 and all(
                scene not in ["MANUAL_ANNOTATION_REQUIRED", ""] 
                for scene in existing_scenes
            )
            if has_real_scenes:
                logger.info(f"Skipping {ground_truth_file.name} - scenes already manually populated")
                return False
        
        logger.info(f"Extracting scenes from annotation in {ground_truth_file.name}")
        logger.debug(f"Annotation length: {len(annotation)} characters")
        
        # Extract scenes using SceneExtractor
        extracted_scenes = scene_extractor.extract(annotation)
        
        if not extracted_scenes:
            logger.warning(f"No scenes extracted from {ground_truth_file.name}")
            return False
        
        # Update scenes in the data
        data['ground_truth']['scenes'] = extracted_scenes
        
        # Write updated file
        with open(ground_truth_file, 'w', encoding='utf-8') as f:
            json.dump(data, f, indent=2, ensure_ascii=False)
        
        logger.info(f"✅ Extracted {len(extracted_scenes)} scenes for {ground_truth_file.name}")
        for i, scene in enumerate(extracted_scenes, 1):
            logger.info(f"   Scene {i}: {scene[:80]}...")
        
        return True
        
    except Exception as e:
        logger.error(f"❌ Failed to extract scenes from {ground_truth_file}: {e}")
        return False


def extract_scenes_from_all_files(ground_truth_dir: Path) -> None:
    """Extract scenes from annotations in all ground truth files.
    
    Args:
        ground_truth_dir (Path): Directory containing ground truth JSON files.
    """
    if not ground_truth_dir.exists():
        logger.error(f"Ground truth directory not found: {ground_truth_dir}")
        return
    
    # Find all JSON files
    json_files = list(ground_truth_dir.glob("*.json"))
    
    if not json_files:
        logger.warning(f"No JSON files found in {ground_truth_dir}")
        return
    
    logger.info(f"Found {len(json_files)} ground truth files")
    logger.info("Initializing SceneExtractor...")
    
    # Initialize scene extractor
    try:
        scene_extractor = SceneExtractor()
        logger.info("✅ SceneExtractor initialized successfully")
    except Exception as e:
        logger.error(f"❌ Failed to initialize SceneExtractor: {e}")
        logger.error("Make sure your API keys and settings are properly configured")
        return
    
    logger.info("=" * 50)
    
    updated_count = 0
    skipped_count = 0
    failed_count = 0
    
    for json_file in json_files:
        logger.info(f"Processing: {json_file.name}")
        
        try:
            if extract_scenes_from_file(json_file, scene_extractor):
                updated_count += 1
            else:
                skipped_count += 1
        except Exception as e:
            logger.error(f"Failed to process {json_file.name}: {e}")
            failed_count += 1
        
        logger.info("-" * 30)
    
    # Summary
    logger.info("=" * 50)
    logger.info(f"SUMMARY:")
    logger.info(f"✅ Updated: {updated_count} files")
    logger.info(f"⏭️ Skipped: {skipped_count} files")
    logger.info(f"❌ Failed: {failed_count} files")
    logger.info(f"📁 Total: {len(json_files)} files")
    
    if updated_count > 0:
        logger.info("\\n📝 Next steps:")
        logger.info("1. Review the extracted scenes in your IDE")
        logger.info("2. Edit scenes manually if needed for better accuracy")
        logger.info("3. Run populate_scenes.py to fill violations/accidents")
        logger.info("4. Complete manual annotation of violations and accidents")


def process_single_file(file_path: str) -> None:
    """Process a single ground truth file.
    
    Args:
        file_path (str): Path to the specific JSON file to process.
    """
    file_path = Path(file_path)
    
    if not file_path.exists():
        logger.error(f"File not found: {file_path}")
        return
    
    if not file_path.suffix == '.json':
        logger.error(f"File must be a JSON file: {file_path}")
        return
    
    logger.info(f"Processing single file: {file_path.name}")
    logger.info("Initializing SceneExtractor...")
    
    try:
        scene_extractor = SceneExtractor()
        logger.info("✅ SceneExtractor initialized successfully")
    except Exception as e:
        logger.error(f"❌ Failed to initialize SceneExtractor: {e}")
        return
    
    logger.info("=" * 50)
    
    success = extract_scenes_from_file(file_path, scene_extractor)
    
    logger.info("=" * 50)
    if success:
        logger.info("✅ Scene extraction completed successfully")
    else:
        logger.info("⏭️ Scene extraction skipped or failed")


def main():
    """Main function to extract scenes from ground truth annotations."""
    
    print("=" * 60)
    print("EXTRACT SCENES FROM GROUND TRUTH ANNOTATIONS")
    print("=" * 60)
    
    # Configuration
    ground_truth_dir = root / "data" / "evaluation" / "ground_truth"
    
    # Check for single file argument
    if len(sys.argv) > 1:
        single_file = sys.argv[1]
        print(f"🎯 Single file mode: {single_file}")
        print()
        process_single_file(single_file)
        return
    
    # Check if directory exists
    if not ground_truth_dir.exists():
        print(f"❌ Ground truth directory not found: {ground_truth_dir}")
        print("Please ensure the data/evaluation/ground_truth directory exists")
        return
    
    print(f"📁 Processing files in: {ground_truth_dir}")
    print("🔄 This will use the SceneExtractor agent to automatically extract scenes")
    print("💡 Requires API access (OpenAI/Groq/etc.) configured in your settings")
    print()
    
    # Process all files
    extract_scenes_from_all_files(ground_truth_dir)
    
    print()
    print("=" * 60)
    print("SCENE EXTRACTION COMPLETE")
    print("=" * 60)


if __name__ == "__main__":
    main()