#!/usr/bin/env python3
"""Script to populate violations and accidents sections with all scenes from ground truth files."""

import json
import sys
from pathlib import Path
from typing import Dict, List, Any, Optional

# Add project root to path
root = Path(__file__).parent.parent
sys.path.append(str(root))

from src.llm.llms import get_llm
from src.utils.settings import settings
from src.utils.log import logger


def create_violation_analysis_prompt(scene: str, system_violations: List[Dict], system_annotation: str) -> str:
    """Create prompt for analyzing violations in a scene.
    
    Args:
        scene (str): The specific scene to analyze.
        system_violations (List[Dict]): System-detected violations.
        system_annotation (str): Full system annotation for context.
        
    Returns:
        str: Prompt for violation analysis.
    """
    system_violations_text = ""
    if system_violations:
        system_violations_text = "System detected violations:\n"
        for v in system_violations:
            system_violations_text += f"- {v.get('scene', '')}: {v.get('violation', '')} - {v.get('reason', '')}\n"
    
    return f"""You are an expert traffic safety analyst evaluating violations in dashcam footage.

Scene to analyze: {scene}

System annotation context:
{system_annotation}

{system_violations_text}

For this specific scene, determine:
1. Is there a traffic violation? (found/not_found)
2. If found, provide a specific, factual reason

Rules for analysis:
- Only mark "found" if there's a clear, observable traffic law violation
- Be specific about what rule was violated (e.g., "Failed to yield", "Exceeded speed limit", "Improper lane change")
- Consider context from the full system annotation
- If system detected violations in related scenes, consider if they apply here

Respond in JSON format:
{{"violation": "found" or "not_found", "reason": "specific violation description or 'No traffic rule violation found'"}}"""


def create_accident_analysis_prompt(scene: str, system_accidents: List[Dict], system_annotation: str) -> str:
    """Create prompt for analyzing accident risks in a scene.
    
    Args:
        scene (str): The specific scene to analyze.
        system_accidents (List[Dict]): System-detected accident risks.
        system_annotation (str): Full system annotation for context.
        
    Returns:
        str: Prompt for accident analysis.
    """
    system_accidents_text = ""
    if system_accidents:
        system_accidents_text = "System detected accident risks:\n"
        for a in system_accidents:
            system_accidents_text += f"- {a.get('scene', '')}: {a.get('accident', '')} - {a.get('consequence', '')}\n"
    
    return f"""You are an expert traffic safety analyst evaluating accident risks in dashcam footage.

Scene to analyze: {scene}

System annotation context:
{system_annotation}

{system_accidents_text}

For this specific scene, determine:
1. Is there accident risk or potential? (found/not_found)
2. If found, describe the potential consequence

Rules for analysis:
- Only mark "found" if there's genuine collision risk or near-miss potential
- Describe realistic consequences (e.g., "Rear-end collision", "Side-impact crash", "Loss of vehicle control")
- Consider context from the full system annotation
- If system detected risks in related scenes, consider if they apply here

Respond in JSON format:
{{"accident": "found" or "not_found", "consequence": "potential accident description or 'No possible accident is found'"}}"""


def create_direct_analysis_prompt(scene: str, ground_truth_annotation: str) -> str:
    """Create prompt for direct analysis when no system output is available.
    
    Args:
        scene (str): The specific scene to analyze.
        ground_truth_annotation (str): Ground truth annotation for context.
        
    Returns:
        str: Prompt for direct analysis.
    """
    return f"""You are an expert traffic safety analyst evaluating a driving scene from dashcam footage.

Scene to analyze: {scene}

Full context annotation:
{ground_truth_annotation}

Analyze this specific scene for:
1. Traffic violations (if any)
2. Accident risks or potential (if any)

Rules for analysis:
- Only mark violations as "found" if there's a clear, observable traffic law violation
- Only mark accidents as "found" if there's genuine collision risk or near-miss potential
- Be specific about violations (e.g., "Failed to yield", "Improper lane change", "Following too closely")
- Be realistic about accident consequences (e.g., "Rear-end collision", "Side-impact crash")
- If no violation or accident risk exists, clearly state so

Respond in JSON format:
{{
  "violation": {{
    "violation": "found" or "not_found",
    "reason": "specific violation description or 'No traffic rule violation found'"
  }},
  "accident": {{
    "accident": "found" or "not_found", 
    "consequence": "potential accident description or 'No possible accident is found'"
  }}
}}"""


def analyze_scene_with_llm(scene: str, system_violations: List[Dict], system_accidents: List[Dict], 
                          system_annotation: str, ground_truth_annotation: str = "", 
                          model_id: Optional[str] = None) -> Dict:
    """Analyze a scene for violations and accidents using LLM.
    
    Args:
        scene (str): Scene to analyze.
        system_violations (List[Dict]): System-detected violations.
        system_accidents (List[Dict]): System-detected accidents.
        system_annotation (str): Full system annotation.
        model_id (str, optional): LLM model to use.
        
    Returns:
        Dict: Analysis results with violation and accident information.
    """
    # Get LLM
    if model_id:
        llm = get_llm(model_id)
    else:
        llm = get_llm(settings.app.llm['main'])
    
    try:
        # Choose analysis approach based on available data
        if system_annotation or system_violations or system_accidents:
            # Use system-output-based analysis
            violation_prompt = create_violation_analysis_prompt(scene, system_violations, system_annotation)
            accident_prompt = create_accident_analysis_prompt(scene, system_accidents, system_annotation)
        else:
            # Use direct analysis from ground truth annotation
            direct_prompt = create_direct_analysis_prompt(scene, ground_truth_annotation)
            
            try:
                direct_response = llm.invoke(direct_prompt)
                combined_data = json.loads(direct_response.content.strip())
                return combined_data
            except (json.JSONDecodeError, KeyError):
                # Fallback to separate analysis
                violation_prompt = create_violation_analysis_prompt(scene, [], ground_truth_annotation)
                accident_prompt = create_accident_analysis_prompt(scene, [], ground_truth_annotation)
        
        # Analyze violations
        violation_response = llm.invoke(violation_prompt)
        try:
            violation_data = json.loads(violation_response.content.strip())
        except json.JSONDecodeError:
            violation_data = {"violation": "found/not_found", "reason": "Analysis failed"}
        
        # Analyze accidents
        accident_response = llm.invoke(accident_prompt)
        try:
            accident_data = json.loads(accident_response.content.strip())
        except json.JSONDecodeError:
            accident_data = {"accident": "found/not_found", "consequence": "Analysis failed"}
        
        return {
            "violation": violation_data,
            "accident": accident_data
        }
        
    except Exception as e:
        logger.error(f"LLM analysis failed: {e}")
        return {
            "violation": {"violation": "found/not_found", "reason": "Specific violation description"},
            "accident": {"accident": "found/not_found", "consequence": "Potential accident description"}
        }


def populate_scenes_in_file(ground_truth_file: Path, system_output_dir: Optional[Path] = None, 
                           model_id: Optional[str] = None) -> bool:
    """Populate violations and accidents sections with all scenes from a ground truth file.
    
    Args:
        ground_truth_file (Path): Path to the ground truth JSON file.
        system_output_dir (Path, optional): Directory containing system outputs.
        model_id (str, optional): LLM model to use for analysis.
        
    Returns:
        bool: True if file was updated, False if skipped or error occurred.
    """
    try:
        # Read existing file
        with open(ground_truth_file, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        # Extract scenes
        scenes = data.get('ground_truth', {}).get('scenes', [])
        
        # Try to read system output for intelligent analysis
        system_data = None
        if system_output_dir:
            video_id = ground_truth_file.stem
            system_output_file = system_output_dir / f"{video_id}.json"
            
            if system_output_file.exists():
                try:
                    with open(system_output_file, 'r', encoding='utf-8') as f:
                        system_data = json.load(f)
                    logger.info(f"Found system output for {ground_truth_file.name}")
                except Exception as e:
                    logger.warning(f"Failed to read system output {system_output_file}: {e}")
            else:
                logger.info(f"No system output found for {ground_truth_file.name}")
        
        # Extract system analysis data
        system_annotation = ""
        system_violations = []
        system_accidents = []
        
        if system_data and 'system_outputs' in system_data:
            system_outputs = system_data['system_outputs']
            system_annotation = system_outputs.get('annotation', '')
            system_violations = system_outputs.get('violations', [])
            system_accidents = system_outputs.get('accidents', [])
        
        if not scenes:
            logger.warning(f"No scenes found in {ground_truth_file}")
            return False
        
        # Check if scenes have been manually edited (no template examples)
        has_template_examples = any("Example:" in scene for scene in scenes)
        
        if has_template_examples:
            logger.info(f"Skipping {ground_truth_file.name} - scenes still contain template examples")
            logger.info("Please run extract_scenes_from_annotation.py first or manually edit scenes")
            return False
        
        # Check if violations and accidents are already populated to avoid overriding manual input
        violations = data.get('ground_truth', {}).get('violations', [])
        accidents = data.get('ground_truth', {}).get('accidents', [])
        
        # Check if violations are already populated with real content
        violations_populated = False
        if violations:
            # Check for manual edits: non-placeholder violation status or custom reasons
            for violation in violations:
                violation_status = violation.get('violation', '')
                reason = violation.get('reason', '')
                if (violation_status not in ['found/not_found', ''] or 
                    reason not in ['Specific violation description', '']):
                    violations_populated = True
                    break
        
        # Check if accidents are already populated with real content  
        accidents_populated = False
        if accidents:
            # Check for manual edits: non-placeholder accident status or custom consequences
            for accident in accidents:
                accident_status = accident.get('accident', '')
                consequence = accident.get('consequence', '')
                if (accident_status not in ['found/not_found', ''] or
                    consequence not in ['Potential accident description', '']):
                    accidents_populated = True
                    break
        
        if violations_populated and accidents_populated:
            logger.info(f"Skipping {ground_truth_file.name} - violations and accidents already manually populated")
            return False
        
        if violations_populated:
            logger.info(f"Violations already populated in {ground_truth_file.name} - will only update accidents")
        
        if accidents_populated:
            logger.info(f"Accidents already populated in {ground_truth_file.name} - will only update violations")
        
        # Create violation and accident entries for each scene
        violations_list = []
        accidents_list = []
        
        # Get ground truth annotation for LLM analysis
        ground_truth_annotation = data.get('ground_truth', {}).get('annotation', '')
        
        # Use LLM analysis by default, with optional model override
        use_llm_analysis = True
        has_system_data = system_data is not None
        effective_model_id = model_id or settings.app.llm['main']
        
        if has_system_data:
            logger.info(f"Using LLM analysis with system output data for {ground_truth_file.name}")
        elif ground_truth_annotation:
            logger.info(f"Using LLM analysis with ground truth annotation for {ground_truth_file.name}")
        else:
            logger.info(f"Using basic LLM analysis for {ground_truth_file.name}")
        
        logger.info(f"Model: {effective_model_id}")
        
        for scene in scenes:
            if use_llm_analysis:
                # Use LLM to analyze the scene with system output context
                try:
                    analysis = analyze_scene_with_llm(
                        scene, system_violations, system_accidents, system_annotation, 
                        ground_truth_annotation, effective_model_id
                    )
                    
                    violation_entry = {
                        "scene": scene,
                        "violation": analysis['violation']['violation'],
                        "reason": analysis['violation']['reason']
                    }
                    
                    accident_entry = {
                        "scene": scene,
                        "accident": analysis['accident']['accident'],
                        "consequence": analysis['accident']['consequence']
                    }
                    
                except Exception as e:
                    logger.warning(f"LLM analysis failed for scene '{scene}': {e}")
                    # Fall back to placeholder
                    violation_entry = {
                        "scene": scene,
                        "violation": "found/not_found",
                        "reason": "Specific violation description"
                    }
                    accident_entry = {
                        "scene": scene,
                        "accident": "found/not_found",
                        "consequence": "Potential accident description"
                    }
            else:
                # Use placeholder templates
                violation_entry = {
                    "scene": scene,
                    "violation": "found/not_found",
                    "reason": "Specific violation description"
                }
                accident_entry = {
                    "scene": scene,
                    "accident": "found/not_found",
                    "consequence": "Potential accident description"
                }
            
            violations_list.append(violation_entry)
            accidents_list.append(accident_entry)
        
        # Update the data structure
        if not violations_populated:
            data['ground_truth']['violations'] = violations_list
            logger.info(f"Populated {len(scenes)} violation entries with LLM analysis")
        
        if not accidents_populated:
            data['ground_truth']['accidents'] = accidents_list
            logger.info(f"Populated {len(scenes)} accident entries with LLM analysis")
        
        # Write updated file
        with open(ground_truth_file, 'w', encoding='utf-8') as f:
            json.dump(data, f, indent=2, ensure_ascii=False)
        
        logger.info(f"✅ Updated {ground_truth_file.name}")
        return True
        
    except Exception as e:
        logger.error(f"❌ Failed to process {ground_truth_file}: {e}")
        return False


def populate_all_ground_truth_files(ground_truth_dir: Path, system_output_dir: Optional[Path] = None, 
                                   model_id: Optional[str] = None) -> None:
    """Populate all ground truth files in a directory.
    
    Args:
        ground_truth_dir (Path): Directory containing ground truth JSON files.
        system_output_dir (Path, optional): Directory containing system outputs.
        model_id (str, optional): LLM model to use for analysis.
    """
    if not ground_truth_dir.exists():
        logger.error(f"Ground truth directory not found: {ground_truth_dir}")
        return
    
    # Find all JSON files
    json_files = list(ground_truth_dir.glob("*.json"))
    
    if not json_files:
        logger.warning(f"No JSON files found in {ground_truth_dir}")
        return
    
    logger.info(f"Found {len(json_files)} ground truth files")
    logger.info("=" * 50)
    
    updated_count = 0
    skipped_count = 0
    
    for json_file in json_files:
        logger.info(f"Processing: {json_file.name}")
        
        if populate_scenes_in_file(json_file, system_output_dir, model_id):
            updated_count += 1
        else:
            skipped_count += 1
        
        logger.info("-" * 30)
    
    # Summary
    logger.info("=" * 50)
    logger.info(f"SUMMARY:")
    logger.info(f"✅ Updated: {updated_count} files")
    logger.info(f"⏭️ Skipped: {skipped_count} files")
    logger.info(f"📁 Total: {len(json_files)} files")
    
    if updated_count > 0:
        logger.info("\\n📝 Next steps:")
        logger.info("1. Review the populated files in your IDE")
        logger.info("2. Review LLM-generated analysis and refine as needed")
        logger.info("3. Adjust any 'found/not_found' values or descriptions if needed")
        logger.info("4. Run the RAGAS evaluation when annotations are complete")


def main():
    """Main function to populate ground truth files with scenes."""
    
    print("=" * 60)
    print("POPULATE SCENES IN GROUND TRUTH FILES WITH SMART ANALYSIS")
    print("=" * 60)
    
    # Parse command line arguments
    model_id = None
    system_output_dir = None
    
    if len(sys.argv) > 1:
        if sys.argv[1].startswith('--model='):
            model_id = sys.argv[1].split('=')[1]
        elif sys.argv[1] == '--help' or sys.argv[1] == '-h':
            print("Usage: python 2_4_populate_scenes.py [--model=model_id]")
            print("Options:")
            print("  --model=MODEL_ID    Override default LLM model for analysis")
            print("")
            print("Features:")
            print("  ✅ Uses LLM analysis by default (from settings.app.llm['main'])")
            print("  ✅ Automatically uses system outputs when available for enhanced context")
            print("  ✅ Falls back to ground truth annotation when system outputs unavailable")
            print("  ✅ Generates intelligent violation/accident analysis for each scene")
            print("")
            print("Examples:")
            print("  python 2_4_populate_scenes.py")
            print("  python 2_4_populate_scenes.py --model=\"openai:gpt-4o\"")
            print("  python 2_4_populate_scenes.py --model=\"groq:llama-3.3-70b-versatile\"")
            return
    
    if len(sys.argv) > 2 and sys.argv[2].startswith('--model='):
        model_id = sys.argv[2].split('=')[1]
    
    # Configuration
    ground_truth_dir = root / "data" / "evaluation" / "ground_truth"
    default_system_output_dir = root / "data" / "evaluation" / "system_outputs"
    
    # Use system outputs if available (default behavior)
    if default_system_output_dir.exists():
        system_output_dir = default_system_output_dir
    
    # Check if directory exists
    if not ground_truth_dir.exists():
        print(f"❌ Ground truth directory not found: {ground_truth_dir}")
        print("Please ensure the data/evaluation/ground_truth directory exists")
        return
    
    print(f"📁 Processing files in: {ground_truth_dir}")
    if system_output_dir and system_output_dir.exists():
        print(f"🔍 System outputs available: {system_output_dir}")
    else:
        print("📝 No system outputs found - will use ground truth annotations")
    
    # Show which model will be used
    effective_model = model_id or settings.app.llm['main']
    print(f"🤖 Using LLM model: {effective_model}")
    
    if model_id:
        print("   (Overridden from command line)")
    else:
        print("   (From settings.app.llm['main'])")
    
    if system_output_dir and system_output_dir.exists():
        print("🧠 Will use intelligent analysis with system output context")
    else:
        print("🧠 Will use intelligent analysis with ground truth context")
    print()
    
    # Process all files
    populate_all_ground_truth_files(ground_truth_dir, system_output_dir, model_id)
    
    print()
    print("=" * 60)
    print("SCENE POPULATION COMPLETE")
    print("=" * 60)


if __name__ == "__main__":
    main()