"""Script to prepare evaluation data for DriveGuard workflow assessment."""

import sys
import json
import argparse
from pathlib import Path
from typing import Optional, Dict

# Add project root to path
root = Path(__file__).parent.parent
sys.path.append(str(root))

from src.llm.workflow.evaluate_driving_video import DrivingVideoEvaluator
from evaluation.ragas_evaluation_setup import DriveGuardEvaluationDataset, DriveGuardEvaluationSample
from src.utils.log import logger


def create_ground_truth_templates(video_directory: Path, output_directory: Path):
    """Create ground truth templates for all videos in a directory.
    
    Args:
        video_directory (Path): Directory containing video files.
        output_directory (Path): Directory to save ground truth templates.
    """
    logger.info(f"Creating ground truth templates from videos in: {video_directory}")
    
    # Find all video files
    video_extensions = ['.mp4', '.avi', '.mov', '.mkv']
    video_files = []
    
    for ext in video_extensions:
        video_files.extend(video_directory.glob(f"*{ext}"))
    
    if not video_files:
        logger.warning(f"No video files found in {video_directory}")
        return
    
    # Create evaluation dataset
    dataset = DriveGuardEvaluationDataset(output_directory)
    
    # Create templates for each video
    for i, video_path in enumerate(video_files):
        video_id = f"{video_path.stem}"
        logger.info(f"Creating template for: {video_path.name}")
        
        template_path = dataset.save_template(video_id, str(video_path))
        logger.info(f"Template saved: {template_path}")
    
    logger.info(f"Created {len(video_files)} ground truth templates")
    logger.info(f"Please manually annotate the templates in: {output_directory}")


def generate_system_outputs(video_directory: Path, output_directory: Path, 
                           model_id: Optional[str] = None, 
                           model_overrides: Optional[Dict[str, str]] = None,
                           experiment_name: Optional[str] = None):
    """Generate system outputs for evaluation videos.
    
    Args:
        video_directory (Path): Directory containing video files.
        output_directory (Path): Directory to save system outputs.
        model_id (str): Model ID to use for video annotation.
        model_overrides (dict): Override models for specific components.
        experiment_name (str): Name of the experiment for tracking.
    """
    logger.info(f"Generating system outputs for videos in: {video_directory}")
    
    # Find all video files
    video_extensions = ['.mp4', '.avi', '.mov', '.mkv']
    video_files = []
    
    for ext in video_extensions:
        video_files.extend(video_directory.glob(f"*{ext}"))
    
    if not video_files:
        logger.warning(f"No video files found in {video_directory}")
        return
    
    # Create output directory
    output_directory.mkdir(parents=True, exist_ok=True)
    
    # Initialize evaluator with model overrides
    evaluator = DrivingVideoEvaluator(model_id=model_id, model_overrides=model_overrides)
    
    # Log experiment information
    if experiment_name:
        logger.info(f"Running experiment: {experiment_name}")
    if model_overrides:
        logger.info(f"Model overrides: {model_overrides}")
    
    # Process each video
    for i, video_path in enumerate(video_files):
        video_id = f"{video_path.stem}"
        output_file = output_directory / f"{video_id}.json"
        
        if output_file.exists():
            logger.info(f"Skipping existing output: {output_file}")
            continue
        
        try:
            logger.info(f"Processing video: {video_path.name}")
            
            # Get detailed evaluation results
            results = evaluator.evaluate_with_details(video_path)
            
            # Format system output for evaluation
            system_output = {
                "video_id": video_id,
                "video_path": str(video_path),
                "timestamp": results.get('timestamp'),
                "experiment_name": experiment_name,
                "models_used": results.get('models_used', {}),
                "component_timings": results.get('component_timings', {}),
                "performance_metrics": results.get('performance_metrics', {}),
                "system_outputs": {
                    "annotation": results['annotation'],
                    "scenes": results['scenes'],
                    "violations": [
                        {
                            "scene": r['scene'],
                            "violation": r['analysis']['violation'],
                            "reason": r['analysis']['reason']
                        }
                        for r in results['rule_analysis']
                    ],
                    "accidents": [
                        {
                            "scene": r['scene'],
                            "accident": r['analysis']['accident'],
                            "consequence": r['analysis']['consequence']
                        }
                        for r in results['accident_analysis']
                    ],
                    "assessment": results['assessment']
                },
                "evaluation_time": results['evaluation_time'],
                "summary": results['summary']
            }
            
            # Save system output
            with open(output_file, 'w', encoding='utf-8') as f:
                json.dump(system_output, f, indent=2, ensure_ascii=False)
            
            logger.info(f"System output saved: {output_file}")
            logger.info(f"Safety Score: {results['assessment']['safety_score']}/10")
            
        except Exception as e:
            logger.error(f"Failed to process {video_path}: {e}")
            
            # Save error information
            error_output = {
                "video_id": video_id,
                "video_path": str(video_path),
                "error": str(e),
                "status": "failed"
            }
            
            with open(output_file, 'w', encoding='utf-8') as f:
                json.dump(error_output, f, indent=2, ensure_ascii=False)
    
    logger.info(f"System output generation completed for {len(video_files)} videos")


def create_evaluation_sample_from_files(ground_truth_file: Path, system_output_file: Path) -> DriveGuardEvaluationSample:
    """Create an evaluation sample from ground truth and system output files.
    
    Args:
        ground_truth_file (Path): Path to ground truth JSON file.
        system_output_file (Path): Path to system output JSON file.
        
    Returns:
        DriveGuardEvaluationSample: Complete evaluation sample.
    """
    # Load files
    with open(ground_truth_file, 'r', encoding='utf-8') as f:
        gt_data = json.load(f)
    
    with open(system_output_file, 'r', encoding='utf-8') as f:
        sys_data = json.load(f)
    
    # Create evaluation sample
    gt = gt_data['ground_truth']
    sys_out = sys_data['system_outputs']
    
    sample = DriveGuardEvaluationSample(
        video_id=gt_data['video_id'],
        video_path=gt_data['video_path'],
        
        # Ground truth
        ground_truth_annotation=gt['annotation'],
        ground_truth_scenes=gt['scenes'],
        ground_truth_violations=gt['violations'],
        ground_truth_accidents=gt['accidents'],
        ground_truth_assessment=gt['assessment'],
        
        # System outputs
        system_annotation=sys_out['annotation'],
        system_scenes=sys_out['scenes'],
        system_violations=sys_out['violations'],
        system_accidents=sys_out['accidents'],
        system_assessment=sys_out['assessment']
    )
    
    return sample


def load_evaluation_dataset(ground_truth_dir: Path, system_output_dir: Path) -> DriveGuardEvaluationDataset:
    """Load complete evaluation dataset from ground truth and system output directories.
    
    Args:
        ground_truth_dir (Path): Directory containing ground truth files.
        system_output_dir (Path): Directory containing system output files.
        
    Returns:
        DriveGuardEvaluationDataset: Complete evaluation dataset.
    """
    dataset = DriveGuardEvaluationDataset(ground_truth_dir)
    
    # Find matching ground truth and system output files
    gt_files = list(ground_truth_dir.glob("*.json"))
    
    for gt_file in gt_files:
        # Extract video ID from ground truth filename
        video_id = gt_file.name.replace(".json", "")
        sys_file = system_output_dir / f"{video_id}.json"
        
        if not sys_file.exists():
            logger.warning(f"No system output found for: {gt_file}")
            continue
        
        try:
            # Check if ground truth is properly annotated
            with open(gt_file, 'r', encoding='utf-8') as f:
                gt_data = json.load(f)
            
            if gt_data['ground_truth']['annotation'] == "MANUAL_ANNOTATION_REQUIRED":
                logger.warning(f"Ground truth not completed: {gt_file}")
                continue
            
            # Create evaluation sample
            sample = create_evaluation_sample_from_files(gt_file, sys_file)
            dataset.add_sample(sample)
            
            logger.info(f"Added evaluation sample: {video_id}")
            
        except Exception as e:
            logger.error(f"Failed to load sample {video_id}: {e}")
    
    logger.info(f"Loaded {len(dataset.samples)} evaluation samples")
    return dataset


def parse_arguments():
    """Parse command line arguments."""
    parser = argparse.ArgumentParser(description="DriveGuard Evaluation Data Preparation")
    parser.add_argument("--model", type=str, help="Override model for video annotation component")
    parser.add_argument("--experiment", type=str, help="Experiment name for tracking")
    parser.add_argument("--model-overrides", type=str, help="JSON string of component model overrides")
    parser.add_argument("--skip-generation", action="store_true", help="Skip system output generation")
    parser.add_argument("--skip-templates", action="store_true", help="Skip ground truth template creation")
    
    return parser.parse_args()


def main():
    """Main function to demonstrate the evaluation data preparation process."""
    args = parse_arguments()
    
    # Parse model overrides if provided
    model_overrides = None
    if args.model_overrides:
        try:
            model_overrides = json.loads(args.model_overrides)
        except json.JSONDecodeError as e:
            logger.error(f"Invalid JSON for model overrides: {e}")
            return
    
    # Configuration
    video_dir = root / "data" / "dashcam"
    ground_truth_dir = root / "data" / "evaluation" / "ground_truth"
    system_output_dir = root / "data" / "evaluation" / "system_outputs"
    
    print("="*60)
    print("DRIVEGUARD EVALUATION DATA PREPARATION")
    print("="*60)
    
    # Display configuration
    if args.experiment:
        print(f"Experiment: {args.experiment}")
    if args.model:
        print(f"Video annotation model override: {args.model}")
    if model_overrides:
        print(f"Component model overrides: {model_overrides}")
    
    # Step 1: Create ground truth templates
    if not args.skip_templates:
        print("\\n1. Creating Ground Truth Templates")
        print("-" * 40)
        if video_dir.exists():
            create_ground_truth_templates(video_dir, ground_truth_dir)
        else:
            print(f"Video directory not found: {video_dir}")
            print("Please add video files to the data/dashcam directory")
    else:
        print("\\n1. Skipping Ground Truth Template Creation")
    
    # Step 2: Generate system outputs 
    if not args.skip_generation:
        print("\\n2. Generating System Outputs")
        print("-" * 40)
        if video_dir.exists():
            generate_system_outputs(
                video_dir, 
                system_output_dir,
                model_id=args.model,
                model_overrides=model_overrides,
                experiment_name=args.experiment
            )
        else:
            print(f"Video directory not found: {video_dir}")
    else:
        print("\\n2. Skipping System Output Generation")
    
    # Step 3: Instructions for manual annotation
    print("\\n3. Manual Annotation Instructions")
    print("-" * 40)
    print("To complete the evaluation setup:")
    print(f"1. Navigate to: {ground_truth_dir}")
    print("2. Open each *.json file")
    print("3. Replace 'MANUAL_ANNOTATION_REQUIRED' with expert annotations")
    print("4. Fill in all ground truth fields based on expert analysis")
    print("5. Run the RAGAS evaluation script")
    
    # Step 4: Show next steps
    print("\\n4. Next Steps")
    print("-" * 40)
    print("After completing manual annotations:")
    print("• Run: python evaluation/run_ragas_evaluation.py")
    print("• View results in: data/evaluation/report/evaluation_report.md")
    
    print("\\n" + "="*60)
    print("EVALUATION SETUP COMPLETE")
    print("="*60)


if __name__ == "__main__":
    main()