"""
Main CLI interface for system output generation.

Usage:
    uv run python -m evaluation.make_dataset.s5_generate_system_output --component annotation --model gpt-4o-2024-11-20
    uv run python -m evaluation.make_dataset.s5_generate_system_output --status
    uv run python -m evaluation.make_dataset.s5_generate_system_output --component annotation --model gpt-4o-2024-11-20 --videos "0001,0002,0003"
"""

import argparse
import sys
from pathlib import Path
from typing import List, Optional

# Add project root to path for imports
project_root = Path(__file__).parent.parent.parent.parent
sys.path.append(str(project_root))

from .config import create_default_config, AVAILABLE_COMPONENTS
from .utils.file_manager import FileManager
from .utils.model_tracker import ModelTracker
from .generators.video_annotator import VideoAnnotatorGenerator


def create_generator(component: str, config):
    """Create appropriate generator for the component."""
    if component == "annotation":
        return VideoAnnotatorGenerator(config)
    elif component == "scene":
        from .generators.scene_extractor import SceneExtractorGenerator
        return SceneExtractorGenerator(config)
    elif component == "violation":
        from .generators.violation_checker import ViolationChecker
        return ViolationChecker(config)
    elif component == "accident":
        from .generators.accident_checker import AccidentChecker
        return AccidentChecker(config)
    elif component == "assessment":
        from .generators.assessment_generator import AssessmentGenerator
        return AssessmentGenerator(config)
    else:
        raise ValueError(f"Generator not implemented for component: {component}")


def parse_video_filter(video_str: Optional[str]) -> Optional[List[str]]:
    """Parse video filter string into list of video IDs."""
    if not video_str:
        return None
    
    # Handle comma-separated values
    videos = [v.strip() for v in video_str.split(',')]
    
    # Ensure 4-digit format
    formatted_videos = []
    for video in videos:
        try:
            # Convert to int and back to ensure valid number
            video_num = int(video)
            formatted_videos.append(f"{video_num:04d}")
        except ValueError:
            print(f"Warning: Invalid video ID '{video}', skipping")
    
    return formatted_videos if formatted_videos else None


def read_annotation_models() -> List[str]:
    """Read model IDs from evaluation/models/annotation.txt."""
    annotation_file = project_root / "evaluation" / "models" / "annotation.txt"
    
    if not annotation_file.exists():
        raise FileNotFoundError(f"Model list file not found: {annotation_file}")
    
    models = []
    with open(annotation_file, 'r') as f:
        for line in f:
            model = line.strip()
            if model and not model.startswith('#'):  # Skip empty lines and comments
                models.append(model)
    
    if not models:
        raise ValueError("No models found in annotation.txt")
    
    return models


def read_text_models() -> List[str]:
    """Read model IDs from evaluation/models/text.txt."""
    text_file = project_root / "evaluation" / "models" / "text.txt"
    
    if not text_file.exists():
        raise FileNotFoundError(f"Model list file not found: {text_file}")
    
    models = []
    with open(text_file, 'r') as f:
        for line in f:
            model = line.strip()
            if model and not model.startswith('#'):  # Skip empty lines and comments
                models.append(model)
    
    if not models:
        raise ValueError("No models found in text.txt")
    
    return models


def print_status(file_manager: FileManager):
    """Print current status of all system outputs."""
    status = file_manager.get_status_summary()
    
    print("=" * 60)
    print("SYSTEM OUTPUT STATUS")
    print("=" * 60)
    
    if status["total_components"] == 0:
        print("No system outputs found.")
        return
    
    for component, component_info in status["components"].items():
        print(f"\n📁 Component: {component}")
        print(f"   Total models: {component_info['total_models']}")
        
        for model, model_info in component_info["models"].items():
            print(f"   └── {model}: {model_info['output_count']} outputs")
            print(f"       📂 {model_info['output_dir']}")


def process_with_model(args, model_id: str, video_filter: Optional[List[str]], project_root: Path) -> int:
    """Process videos with a specific model."""
    # Validate model ID
    from .config import SUPPORTED_MODEL_IDS
    if model_id not in SUPPORTED_MODEL_IDS:
        print(f"Error: Unsupported model '{model_id}'")
        print(f"Available models: {', '.join(SUPPORTED_MODEL_IDS)}")
        return 1
    
    # Create configuration
    config = create_default_config(args.component, model_id, project_root or args.project_root, args.all)
    config.fps = args.fps
    config.overwrite_existing = args.overwrite
    
    print(f"\nConfiguration:")
    print(f"  Component: {config.component}")
    print(f"  DriveGuard Model: {config.driveguard_model_id}")
    print(f"  API Model: {config.model_id}")
    print(f"  FPS: {config.fps}")
    print(f"  Output directory: {config.model_output_dir}")
    print(f"  Overwrite existing: {config.overwrite_existing}")
    print(f"  Process all videos: {config.process_all_videos}")
    
    # Create generator
    try:
        generator = create_generator(args.component, config)
    except ValueError as e:
        print(f"Error: {e}")
        return 1
    
    # Initialize model tracker
    tracking_dir = config.project_root / "data" / "evaluation" / "tracking"
    model_tracker = ModelTracker(tracking_dir)
    
    # Get video list for planning
    videos_to_process = config.get_video_list(video_filter)
    videos_needing_processing = [
        video for video in videos_to_process
        if config.should_process_video(video)
    ]
    
    # Start tracking
    run_id = model_tracker.start_run(
        config.driveguard_model_id or config.model_id,
        config.component,
        len(videos_needing_processing),
        {
            "fps": config.fps,
            "overwrite_existing": config.overwrite_existing,
            "video_filter": video_filter
        }
    )
    
    print(f"\nStarted tracking run: {run_id}")
    
    # Process videos
    try:
        def progress_callback(current: int, total: int, success: bool):
            """Progress callback for model tracker."""
            # Update model tracker with current progress
            stats = generator.stats
            model_tracker.update_run_progress(
                run_id,
                stats["processed"] + stats["failed"],
                stats["processed"],
                stats["failed"],
                0.0  # We don't have cumulative time here
            )
        
        final_stats = generator.process_videos(video_filter, progress_callback)
        
        # Finish tracking
        model_tracker.finish_run(run_id, final_stats)
        
        print(f"\nRun {run_id} completed successfully!")
        return 0
        
    except KeyboardInterrupt:
        print("\nGeneration interrupted by user")
        model_tracker.add_run_error(run_id, {
            "error_type": "user_interrupt",
            "error_message": "Generation interrupted by user"
        })
        return 1
    
    except Exception as e:
        print(f"\nError during generation: {e}")
        model_tracker.add_run_error(run_id, {
            "error_type": "generation_error",
            "error_message": str(e)
        })
        return 1


def print_detailed_status(file_manager: FileManager, component: str, model_id: str):
    """Print detailed status for a specific component and model."""
    if not file_manager.get_model_dir(component, model_id).exists():
        print(f"No outputs found for {component}/{model_id}")
        return
    
    output_count = file_manager.count_outputs(component, model_id)
    output_files = file_manager.list_output_files(component, model_id)
    
    print(f"\n📊 Detailed Status: {component}/{model_id}")
    print(f"Total outputs: {output_count}")
    print(f"Output directory: {file_manager.get_model_dir(component, model_id)}")
    
    if output_files:
        print("\nRecent outputs:")
        for file_path in output_files[-5:]:  # Show last 5
            print(f"  • {file_path.name}")
    
    # Validate outputs
    validation = file_manager.validate_outputs(component, model_id)
    print(f"\nValidation:")
    print(f"  Valid files: {validation['valid_files']}")
    print(f"  Invalid files: {validation['invalid_files']}")
    
    if validation['errors']:
        print(f"  Errors: {len(validation['errors'])}")
        for filename, error in validation['errors'][:3]:  # Show first 3 errors
            print(f"    • {filename}: {error}")


def main():
    """Main CLI function."""
    parser = argparse.ArgumentParser(
        description="Generate system outputs for DriveGuard evaluation",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Generate annotations for all models in annotation.txt (annotation component only)
  uv run python -m evaluation.make_dataset.s5_generate_system_output --component annotation
  
  # Generate scene extraction for all models in text.txt (scene component only)
  uv run python -m evaluation.make_dataset.s5_generate_system_output --component scene
  
  # Generate violation checking for all models in text.txt (violation component only)
  uv run python -m evaluation.make_dataset.s5_generate_system_output --component violation
  
  # Generate annotations for a single model
  uv run python -m evaluation.make_dataset.s5_generate_system_output --component annotation --model "openai:gpt-4o"
  
  # Generate scene extraction for a single model
  uv run python -m evaluation.make_dataset.s5_generate_system_output --component scene --model "openai:gpt-4o"
  
  # Generate violation checking for a single model
  uv run python -m evaluation.make_dataset.s5_generate_system_output --component violation --model "openai:gpt-4o"
  
  # Generate annotations for ALL videos
  uv run python -m evaluation.make_dataset.s5_generate_system_output --component annotation --model "openai:gpt-4o" --all
  
  # Generate for specific videos only  
  uv run python -m evaluation.make_dataset.s5_generate_system_output --component violation --videos "1,2,3"
  
  # Check overall status
  uv run python -m evaluation.make_dataset.s5_generate_system_output --status
  
  # Check specific component status
  uv run python -m evaluation.make_dataset.s5_generate_system_output --status --component violation --model "openai:gpt-4o"
  
Available components: annotation, scene, violation, accident, assessment
Available models: See evaluation/models/annotation.txt (annotation) or evaluation/models/text.txt (scene, violation)

Note: By default, only videos with ground truth annotations will be processed.
      Use --all to process all videos in the dashcam directory.
      For annotation component, omit --model to process all models in evaluation/models/annotation.txt
      For scene/violation components, omit --model to process all models in evaluation/models/text.txt
        """
    )
    
    parser.add_argument(
        "--component", 
        choices=AVAILABLE_COMPONENTS,
        help="Component to generate outputs for"
    )
    
    parser.add_argument(
        "--model",
        help="Model ID to use for generation"
    )
    
    parser.add_argument(
        "--videos",
        help="Comma-separated list of video IDs to process (e.g., '1,2,3')"
    )
    
    parser.add_argument(
        "--status",
        action="store_true",
        help="Show current status of system outputs"
    )
    
    parser.add_argument(
        "--overwrite",
        action="store_true",
        help="Overwrite existing outputs"
    )
    
    parser.add_argument(
        "--fps",
        type=int,
        default=2,
        help="Frames per second for video processing (default: 2)"
    )
    
    parser.add_argument(
        "--project-root",
        type=Path,
        help="Project root directory (auto-detected if not specified)"
    )
    
    parser.add_argument(
        "--all",
        action="store_true",
        help="Process all videos instead of only those with ground truth"
    )
    
    args = parser.parse_args()
    
    # Initialize file manager
    config = create_default_config("annotation", project_root=args.project_root)
    file_manager = FileManager(config.output_base_dir)
    
    # Handle status request
    if args.status:
        if args.component and args.model:
            print_detailed_status(file_manager, args.component, args.model)
        else:
            print_status(file_manager)
        return
    
    # Validate required arguments for generation
    if not args.component:
        parser.error("--component is required for generation")
    
    # Parse video filter
    video_filter = parse_video_filter(args.videos)
    
    if video_filter:
        print(f"Will process {len(video_filter)} specific videos: {', '.join(video_filter)}")
    else:
        if args.all:
            print("Will process all available videos")
        else:
            print("Will process only videos with ground truth")
    
    # Handle multi-model processing for annotation component without --model
    if not args.model and args.component == "annotation":
        try:
            models = read_annotation_models()
            print(f"\nRunning annotation for {len(models)} models from annotation.txt:")
            for i, model in enumerate(models, 1):
                print(f"  {i}. {model}")
            
            # Track results
            results = []
            
            # Process each model
            for i, model_id in enumerate(models, 1):
                print(f"\n{'='*60}")
                print(f"Processing model {i}/{len(models)}: {model_id}")
                print(f"{'='*60}")
                
                result = process_with_model(args, model_id, video_filter, project_root)
                results.append((model_id, result))
                
                # Add some spacing between models
                if i < len(models):
                    print("\n" + "="*60 + "\n")
            
            # Summary report
            print(f"\n{'='*60}")
            print("MULTI-MODEL PROCESSING COMPLETE")
            print(f"{'='*60}")
            
            successful = sum(1 for _, result in results if result == 0)
            failed = len(results) - successful
            
            print(f"Total models: {len(models)}")
            print(f"Successful: {successful}")
            print(f"Failed: {failed}")
            
            if failed > 0:
                print("\nFailed models:")
                for model_id, result in results:
                    if result != 0:
                        print(f"  - {model_id}")
            
            return 0 if failed == 0 else 1
            
        except FileNotFoundError as e:
            print(f"Error: {e}")
            print("\nMake sure the annotation.txt file exists at: evaluation/models/annotation.txt")
            return 1
        except Exception as e:
            print(f"Error reading model list: {e}")
            return 1
    
    # Handle multi-model processing for scene component without --model
    elif not args.model and args.component == "scene":
        try:
            models = read_text_models()
            print(f"\nRunning scene extraction for {len(models)} models from text.txt:")
            for i, model in enumerate(models, 1):
                print(f"  {i}. {model}")
            
            # Track results
            results = []
            
            # Process each model
            for i, model_id in enumerate(models, 1):
                print(f"\n{'='*60}")
                print(f"Processing model {i}/{len(models)}: {model_id}")
                print(f"{'='*60}")
                
                result = process_with_model(args, model_id, video_filter, project_root)
                results.append((model_id, result))
                
                # Add some spacing between models
                if i < len(models):
                    print("\n" + "="*60 + "\n")
            
            # Summary report
            print(f"\n{'='*60}")
            print("MULTI-MODEL PROCESSING COMPLETE")
            print(f"{'='*60}")
            
            successful = sum(1 for _, result in results if result == 0)
            failed = len(results) - successful
            
            print(f"Total models: {len(models)}")
            print(f"Successful: {successful}")
            print(f"Failed: {failed}")
            
            if failed > 0:
                print("\nFailed models:")
                for model_id, result in results:
                    if result != 0:
                        print(f"  - {model_id}")
            
            return 0 if failed == 0 else 1
            
        except FileNotFoundError as e:
            print(f"Error: {e}")
            print("\nMake sure the text.txt file exists at: evaluation/models/text.txt")
            return 1
        except Exception as e:
            print(f"Error reading model list: {e}")
            return 1
    
    # Handle multi-model processing for violation component without --model
    elif not args.model and args.component == "violation":
        try:
            models = read_text_models()
            print(f"\nRunning violation checking for {len(models)} models from text.txt:")
            for i, model in enumerate(models, 1):
                print(f"  {i}. {model}")
            
            # Track results
            results = []
            
            # Process each model
            for i, model_id in enumerate(models, 1):
                print(f"\n{'='*60}")
                print(f"Processing model {i}/{len(models)}: {model_id}")
                print(f"{'='*60}")
                
                result = process_with_model(args, model_id, video_filter, project_root)
                results.append((model_id, result))
                
                # Add some spacing between models
                if i < len(models):
                    print("\n" + "="*60 + "\n")
            
            # Summary report
            print(f"\n{'='*60}")
            print("MULTI-MODEL PROCESSING COMPLETE")
            print(f"{'='*60}")
            
            successful = sum(1 for _, result in results if result == 0)
            failed = len(results) - successful
            
            print(f"Total models: {len(models)}")
            print(f"Successful: {successful}")
            print(f"Failed: {failed}")
            
            if failed > 0:
                print("\nFailed models:")
                for model_id, result in results:
                    if result != 0:
                        print(f"  - {model_id}")
            
            return 0 if failed == 0 else 1
            
        except FileNotFoundError as e:
            print(f"Error: {e}")
            print("\nMake sure the text.txt file exists at: evaluation/models/text.txt")
            return 1
        except Exception as e:
            print(f"Error reading model list: {e}")
            return 1
    
    # Handle multi-model processing for accident component without --model
    elif not args.model and args.component == "accident":
        try:
            models = read_text_models()
            print(f"\nRunning accident risk assessment for {len(models)} models from text.txt:")
            for i, model in enumerate(models, 1):
                print(f"  {i}. {model}")
            
            # Track results
            results = []
            
            # Process each model
            for i, model_id in enumerate(models, 1):
                print(f"\n{'='*60}")
                print(f"Processing model {i}/{len(models)}: {model_id}")
                print(f"{'='*60}")
                
                result = process_with_model(args, model_id, video_filter, project_root)
                results.append((model_id, result))
                
                # Add some spacing between models
                if i < len(models):
                    print("\n" + "="*60 + "\n")
            
            # Summary report
            print(f"\n{'='*60}")
            print("MULTI-MODEL PROCESSING COMPLETE")
            print(f"{'='*60}")
            
            successful = sum(1 for _, result in results if result == 0)
            failed = len(results) - successful
            
            print(f"Total models: {len(models)}")
            print(f"Successful: {successful}")
            print(f"Failed: {failed}")
            
            if failed > 0:
                print("\nFailed models:")
                for model_id, result in results:
                    if result != 0:
                        print(f"  - {model_id}")
            
            return 0 if failed == 0 else 1
            
        except FileNotFoundError as e:
            print(f"Error: {e}")
            print("\nMake sure the text.txt file exists at: evaluation/models/text.txt")
            return 1
        except Exception as e:
            print(f"Error reading model list: {e}")
            return 1
    
    # Handle multi-model processing for assessment component without --model
    elif not args.model and args.component == "assessment":
        try:
            models = read_text_models()
            print(f"\nRunning comprehensive driving assessment for {len(models)} models from text.txt:")
            for i, model in enumerate(models, 1):
                print(f"  {i}. {model}")
            
            # Track results
            results = []
            
            # Process each model
            for i, model_id in enumerate(models, 1):
                print(f"\n{'='*60}")
                print(f"Processing model {i}/{len(models)}: {model_id}")
                print(f"{'='*60}")
                
                result = process_with_model(args, model_id, video_filter, project_root)
                results.append((model_id, result))
                
                # Add some spacing between models
                if i < len(models):
                    print("\n" + "="*60 + "\n")
            
            # Summary report
            print(f"\n{'='*60}")
            print("MULTI-MODEL PROCESSING COMPLETE")
            print(f"{'='*60}")
            
            successful = sum(1 for _, result in results if result == 0)
            failed = len(results) - successful
            
            print(f"Total models: {len(models)}")
            print(f"Successful: {successful}")
            print(f"Failed: {failed}")
            
            if failed > 0:
                print("\nFailed models:")
                for model_id, result in results:
                    if result != 0:
                        print(f"  - {model_id}")
            
            return 0 if failed == 0 else 1
            
        except FileNotFoundError as e:
            print(f"Error: {e}")
            print("\nMake sure the text.txt file exists at: evaluation/models/text.txt")
            return 1
        except Exception as e:
            print(f"Error reading model list: {e}")
            return 1
    
    # Single model processing
    elif not args.model:
        parser.error("--model is required for generation")
    
    # Single model processing
    else:
        return process_with_model(args, args.model, video_filter, project_root)


if __name__ == "__main__":
    sys.exit(main())