"""Main CLI interface for DriveGuard component evaluation."""

import argparse
import sys
from pathlib import Path
from typing import Dict, List, Any, Optional

# Add project root to path
root = Path(__file__).parent.parent.parent
sys.path.append(str(root))

from .config import ModelConfigLoader, EvaluationConfig
from .utils import EvaluationStateManager, DataLoader, EvaluationCache
from .evaluator import ComponentEvaluator
from .metrics import TimingMetrics
from .reporting import ReportGenerator


def setup_cli_parser() -> argparse.ArgumentParser:
    """Setup command line argument parser."""
    parser = argparse.ArgumentParser(
        description="DriveGuard Component Evaluation System",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Evaluate annotation component (resume by default)
  uv run python -m evaluation.component_eval --component annotation
  
  # Force re-evaluation of all models
  uv run python -m evaluation.component_eval --component scene --overwrite
  
  # Evaluate all components
  uv run python -m evaluation.component_eval --all-components
  
  # Check evaluation status
  uv run python -m evaluation.component_eval --status
  
  # Show model configuration
  uv run python -m evaluation.component_eval --show-models
        """
    )
    
    # Component selection
    parser.add_argument(
        '--component', 
        choices=['annotation', 'scene', 'violation', 'accident', 'assessment'],
        help='Evaluate specific component'
    )
    parser.add_argument(
        '--all-components', 
        action='store_true',
        help='Evaluate all components'
    )
    
    # Evaluation behavior (DEFAULT: resume)
    parser.add_argument(
        '--overwrite', 
        action='store_true', 
        help='Force re-evaluation of all models (ignore cached results)'
    )
    
    # Information commands (no evaluation)
    parser.add_argument(
        '--status', 
        action='store_true',
        help='Show evaluation progress status'
    )
    parser.add_argument(
        '--show-models', 
        action='store_true', 
        help='Display current model configuration'
    )
    parser.add_argument(
        '--validate-models', 
        action='store_true',
        help='Validate model configuration files'
    )
    
    # Output options
    parser.add_argument(
        '--export-csv', 
        action='store_true',
        help='Export results to CSV after evaluation'
    )
    parser.add_argument(
        '--report', 
        action='store_true',
        help='Generate detailed evaluation report'
    )
    parser.add_argument(
        '--timing-analysis',
        action='store_true',
        help='Show detailed timing analysis across all models'
    )
    
    return parser


def show_evaluation_status():
    """Display current evaluation status."""
    try:
        state_manager = EvaluationStateManager()
        progress = state_manager.get_progress_summary()
        
        print("\n" + "="*60)
        print("DRIVEGUARD COMPONENT EVALUATION STATUS")
        print("="*60)
        
        if not progress:
            print("❌ No evaluation data found.")
            return
        
        # Summary table
        print(f"\n{'Component':<20} {'Total Models':<12} {'Completed':<10} {'Remaining':<10} {'Progress':<10}")
        print("-" * 70)
        
        total_models = 0
        total_completed = 0
        
        for component, data in progress.items():
            total = data['total_models']
            completed = len(data['completed_models'])
            remaining = len(data['pending_models'])
            progress_pct = data['completion_percentage']
            
            total_models += total
            total_completed += completed
            
            status_icon = "✅" if remaining == 0 else "⏳"
            print(f"{component:<20} {total:<12} {completed:<10} {remaining:<10} {progress_pct:>6.1f}% {status_icon}")
        
        overall_progress = (total_completed / total_models * 100) if total_models > 0 else 0
        print("-" * 70)
        print(f"{'TOTAL':<20} {total_models:<12} {total_completed:<10} {total_models-total_completed:<10} {overall_progress:>6.1f}%")
        
        # Next recommended action
        print(f"\nOverall Progress: {total_completed}/{total_models} evaluations complete ({overall_progress:.1f}%)")
        
        if total_completed < total_models:
            # Find component with least progress
            min_progress = min(progress.values(), key=lambda x: x['completion_percentage'])
            min_component = None
            for comp, data in progress.items():
                if data['completion_percentage'] == min_progress['completion_percentage']:
                    min_component = comp
                    break
            
            if min_component and min_progress['pending_models']:
                print(f"\nNext recommended action:")
                print(f"  uv run python -m evaluation.component_eval --component {min_component}")
        else:
            print("\n🎉 All evaluations complete!")
            print("\nNext steps:")
            print("  uv run python -m evaluation.component_eval --timing-analysis")
            print("  uv run python -m evaluation.component_eval --report")
            print("  uv run python -m evaluation.component_eval --export-csv")
        
    except Exception as e:
        print(f"❌ Failed to show status: {e}")


def display_model_configuration():
    """Display current model configuration."""
    try:
        print("\n" + "="*60)
        print("CURRENT MODEL CONFIGURATION")
        print("="*60)
        
        models_config = ModelConfigLoader.get_all_models()
        
        # Summary table
        print(f"\n{'Component':<20} {'Model Count':<12} {'Source File'}")
        print("-" * 60)
        
        for component, models in models_config.items():
            source_file = 'annotation.txt' if component == 'annotation' else 'text.txt'
            print(f"{component:<20} {len(models):<12} evaluation/models/{source_file}")
        
        # Detailed model lists
        print(f"\n{'ANNOTATION MODELS'} (from annotation.txt):")
        annotation_models = models_config['annotation']
        for i, model in enumerate(annotation_models, 1):
            print(f"  {i:2d}. {model}")
        
        print(f"\n{'TEXT MODELS'} (from text.txt):")
        text_models = models_config['scene']  # Same as text.txt
        for i, model in enumerate(text_models, 1):
            print(f"  {i:2d}. {model}")
        
        # Model overlap analysis
        annotation_set = set(annotation_models)
        text_set = set(text_models)
        common_models = annotation_set.intersection(text_set)
        text_only = text_set - annotation_set
        
        print(f"\nModel Analysis:")
        print(f"  Common models: {len(common_models)}")
        print(f"  Text-only models: {len(text_only)}")
        
        if text_only:
            print(f"\n  Text-only models:")
            for model in sorted(text_only):
                print(f"    - {model}")
        
    except Exception as e:
        print(f"❌ Failed to display model configuration: {e}")


def validate_model_files():
    """Validate model configuration files."""
    try:
        print("\n" + "="*60)
        print("MODEL CONFIGURATION VALIDATION")
        print("="*60)
        
        annotation_models, text_models = ModelConfigLoader.validate_model_files()
        
        print(f"\nValidation Results:")
        print(f"✅ Annotation models file: {len(annotation_models)} models")
        print(f"✅ Text models file: {len(text_models)} models")
        
        # Check for potential issues
        issues = []
        
        for model in annotation_models + text_models:
            if ':' not in model:
                issues.append(f"Model '{model}' missing platform prefix (e.g., 'openai:' or 'groq:')")
        
        if issues:
            print(f"\n⚠️  Potential Issues Found:")
            for issue in issues[:5]:  # Show first 5 issues
                print(f"  - {issue}")
            if len(issues) > 5:
                print(f"  - ... and {len(issues) - 5} more issues")
        else:
            print(f"\n✅ No issues found. Model configurations look good!")
        
    except Exception as e:
        print(f"❌ Model validation failed: {e}")


def show_timing_analysis():
    """Display timing analysis across all components and models."""
    try:
        timing_metrics = TimingMetrics()
        data_loader = DataLoader()
        
        print("\n" + "="*80)
        print("DRIVEGUARD TIMING ANALYSIS")
        print("="*80)
        
        # Get all components
        model_config = ModelConfigLoader()
        components = ['annotation', 'scene', 'violation', 'accident', 'assessment']
        
        overall_timing_data = {}
        component_summaries = {}
        
        for component in components:
            print(f"\n📊 {component.upper()} COMPONENT")
            print("-" * 50)
            
            models = model_config.get_component_models(component)
            component_timings = {}
            
            for model in models:
                # Load system outputs to extract timing data
                ground_truth_files = data_loader.load_ground_truth_files()
                model_times = []
                
                for video_id, _ in ground_truth_files:
                    sys_data = data_loader.load_system_output(component, model, video_id)
                    if sys_data:
                        gen_time = timing_metrics.extract_generation_time(sys_data)
                        if gen_time is not None:
                            model_times.append(gen_time)
                
                if model_times:
                    stats = timing_metrics.calculate_timing_stats(model_times)
                    component_timings[model] = stats
                    overall_timing_data[f"{component}/{model}"] = model_times
                    
                    print(f"  {model:<30} {timing_metrics.format_time(stats['mean_time']):>8} avg  {stats['count']:>3} videos")
            
            # Component timing comparison
            if component_timings:
                comparison = timing_metrics.compare_model_timing({k: overall_timing_data[f"{component}/{k}"] for k in component_timings.keys()})
                component_summaries[component] = comparison
                
                if comparison.get('fastest_model') and comparison.get('slowest_model'):
                    fastest = comparison['fastest_model']
                    slowest = comparison['slowest_model']
                    print(f"\n  🚀 Fastest: {fastest['model']} ({timing_metrics.format_time(fastest['mean_time'])})")
                    print(f"  🐌 Slowest: {slowest['model']} ({timing_metrics.format_time(slowest['mean_time'])})")
                    print(f"  📈 Speed ratio: {comparison['speed_ratio']:.1f}x")
        
        # Cross-component analysis
        print(f"\n" + "="*80)
        print("CROSS-COMPONENT TIMING COMPARISON")
        print("="*80)
        
        if component_summaries:
            print(f"\n{'Component':<20} {'Fastest Model':<25} {'Avg Time':<12} {'Slowest Model':<25} {'Avg Time':<12}")
            print("-" * 95)
            
            for component, summary in component_summaries.items():
                if summary.get('fastest_model') and summary.get('slowest_model'):
                    fastest = summary['fastest_model']
                    slowest = summary['slowest_model']
                    
                    fastest_time = timing_metrics.format_time(fastest['mean_time'])
                    slowest_time = timing_metrics.format_time(slowest['mean_time'])
                    
                    print(f"{component:<20} {fastest['model']:<25} {fastest_time:<12} {slowest['model']:<25} {slowest_time:<12}")
        
        # Overall fastest/slowest across all components
        if overall_timing_data:
            all_model_stats = {}
            for model_key, times in overall_timing_data.items():
                if times:
                    stats = timing_metrics.calculate_timing_stats(times)
                    all_model_stats[model_key] = stats['mean_time']
            
            if all_model_stats:
                global_fastest = min(all_model_stats.items(), key=lambda x: x[1])
                global_slowest = max(all_model_stats.items(), key=lambda x: x[1])
                
                print(f"\n🏆 GLOBAL PERFORMANCE LEADERS")
                print(f"🚀 Overall fastest: {global_fastest[0]} ({timing_metrics.format_time(global_fastest[1])})")
                print(f"🐌 Overall slowest: {global_slowest[0]} ({timing_metrics.format_time(global_slowest[1])})")
                print(f"📈 Global speed ratio: {global_slowest[1] / global_fastest[1]:.1f}x")
        
        print()
    
    except Exception as e:
        print(f"❌ Error showing timing analysis: {e}")


def generate_evaluation_reports(all_results: Dict[str, Dict[str, Any]], evaluator: Optional[ComponentEvaluator] = None):
    """Generate evaluation reports for completed components.
    
    Args:
        all_results: Dictionary mapping component names to their evaluation results
        evaluator: Optional ComponentEvaluator instance to reuse. If None, creates a new one.
    """
    report_generator = ReportGenerator(
        results_dir=EvaluationConfig.RESULTS_DIR,
        reports_dir=EvaluationConfig.REPORTS_DIR
    )
    
    # Reuse existing evaluator or create new one
    if evaluator is None:
        evaluator = ComponentEvaluator()
    
    # Generate reports for components that have results or load from cache
    components_with_reports = []
    
    for component in EvaluationConfig.COMPONENTS:
        results = all_results.get(component)
        
        # If no results from current run, try to load from cache
        if not results or not any(results.values()):
            print(f"  📂 Loading cached results for {component}...")
            results = evaluator.load_evaluation_results(component)
        
        if results and any(results.values()):
            print(f"  📝 Generating report for {component}...")
            report_file = report_generator.save_component_report(component, results)
            print(f"    ✅ Report saved: {report_file}")
            components_with_reports.append(component)
        else:
            print(f"    ⏭️ Skipping {component} - no evaluation results available")
    
    if components_with_reports:
        print(f"\n📋 Generated reports for {len(components_with_reports)} components:")
        for component in components_with_reports:
            report_path = EvaluationConfig.REPORTS_DIR / f"{component}_evaluation_report.md"
            print(f"  • {component}: {report_path}")
    else:
        print(f"\n⚠️  No reports generated - no evaluation results found")
        print(f"💡 Run evaluations first: uv run python -m evaluation.component_eval --component <component>")

def export_results_to_csv(all_results: Dict[str, Dict[str, Any]]):
    """Export evaluation results to CSV format.
    
    Args:
        all_results: Dictionary mapping component names to their evaluation results
    """
    import csv
    from datetime import datetime
    
    evaluator = ComponentEvaluator()
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    # Export each component to separate CSV
    components_exported = []
    
    for component in EvaluationConfig.COMPONENTS:
        results = all_results.get(component)
        
        # If no results from current run, try to load from cache
        if not results or not any(results.values()):
            print(f"  📂 Loading cached results for {component}...")
            results = evaluator.load_evaluation_results(component)
        
        if results and any(results.values()):
            csv_file = EvaluationConfig.EXPORTS_DIR / f"{component}_results_{timestamp}.csv"
            print(f"  📊 Exporting {component} to CSV...")
            
            with open(csv_file, 'w', newline='', encoding='utf-8') as f:
                writer = csv.writer(f)
                
                # Write header
                header = ['model', 'video_count', 'traditional_metrics', 'llm_judge_scores']
                writer.writerow(header)
                
                # Write data
                for model, result in results.items():
                    if result:
                        video_count = len(result.get('video_results', {}))
                        trad_metrics = result.get('traditional_metrics', [])
                        llm_scores = result.get('llm_judge_scores', [])
                        
                        # Average traditional metrics
                        if trad_metrics:
                            avg_trad = sum(sum(m.values()) for m in trad_metrics if isinstance(m, dict)) / len(trad_metrics)
                        else:
                            avg_trad = 0
                        
                        # Average LLM scores  
                        if llm_scores:
                            all_scores = []
                            for score_dict in llm_scores:
                                if isinstance(score_dict, dict):
                                    all_scores.extend([v for v in score_dict.values() if isinstance(v, (int, float))])
                            avg_llm = sum(all_scores) / len(all_scores) if all_scores else 0
                        else:
                            avg_llm = 0
                        
                        writer.writerow([model, video_count, f"{avg_trad:.3f}", f"{avg_llm:.1f}"])
            
            print(f"    ✅ CSV exported: {csv_file}")
            components_exported.append(component)
        else:
            print(f"    ⏭️ Skipping {component} - no evaluation results available")
    
    if components_exported:
        print(f"\n📊 Exported {len(components_exported)} components to CSV")
    else:
        print(f"\n⚠️  No CSV files exported - no evaluation results found")


def main():
    """Main entry point for component evaluation."""
    parser = setup_cli_parser()
    args = parser.parse_args()
    
    # Ensure directories exist
    EvaluationConfig.ensure_directories()
    
    # Information commands (no evaluation)
    if args.status:
        show_evaluation_status()
        return
    
    if args.show_models:
        display_model_configuration()
        return
    
    if args.validate_models:
        validate_model_files()
        return
    
    if args.timing_analysis:
        show_timing_analysis()
        return
    
    # Standalone report/export commands
    if args.report and not (args.component or args.all_components):
        print("📝 Generating reports for all available results...")
        try:
            # Create evaluator for standalone report generation
            evaluator = ComponentEvaluator()
            generate_evaluation_reports({}, evaluator)  # Empty dict means load from cache
            print("✅ Report generation complete")
        except Exception as e:
            print(f"❌ Report generation failed: {e}")
        return
    
    if args.export_csv and not (args.component or args.all_components):
        print("📊 Exporting all available results to CSV...")
        try:
            export_results_to_csv({})  # Empty dict means load from cache
            print("✅ CSV export complete")  
        except Exception as e:
            print(f"❌ CSV export failed: {e}")
        return
    
    # Check that at least one evaluation option is specified
    if not (args.component or args.all_components):
        print("❌ Please specify --component, --all-components, or an information command.")
        parser.print_help()
        return
    
    try:
        # Initialize evaluator
        print("\n" + "="*60)
        print("DRIVEGUARD COMPONENT EVALUATION")
        print("="*60)
        
        evaluator = ComponentEvaluator()
        
        # Determine components to evaluate
        if args.all_components:
            components = EvaluationConfig.COMPONENTS
        else:
            components = [args.component]
        
        # Run evaluations
        all_results = {}
        for component in components:
            print(f"\n🔍 Evaluating {component} component...")
            
            try:
                results = evaluator.evaluate_component(component, overwrite=args.overwrite)
                all_results[component] = results
                
                if results:
                    success_count = len([r for r in results.values() if r is not None])
                    total_count = len(results)
                    print(f"✅ {component}: {success_count}/{total_count} models evaluated successfully")
                else:
                    print(f"❌ {component}: No results obtained")
                    
            except Exception as e:
                print(f"❌ Failed to evaluate {component}: {e}")
                continue
        
        # Post-evaluation actions
        if all_results and any(all_results.values()):
            print(f"\n🎉 Evaluation completed!")
            
            if args.export_csv:
                print("📊 Exporting results to CSV...")
                try:
                    export_results_to_csv(all_results)
                    print("✅ Results exported to CSV")
                except Exception as e:
                    print(f"❌ CSV export failed: {e}")
            
            if args.report:
                print("📝 Generating evaluation reports...")
                try:
                    generate_evaluation_reports(all_results, evaluator)
                    print("✅ Reports generated successfully")
                except Exception as e:
                    print(f"❌ Report generation failed: {e}")
            
            print(f"\nNext steps:")
            print(f"  uv run python -m evaluation.component_eval --status")
            print(f"  uv run python -m evaluation.component_eval --timing-analysis")
            if not args.report:
                print(f"  uv run python -m evaluation.component_eval --report")
            if not args.export_csv:
                print(f"  uv run python -m evaluation.component_eval --export-csv")
        else:
            print(f"\n❌ No successful evaluations completed.")
    
    except KeyboardInterrupt:
        print(f"\n\n⚠️  Evaluation interrupted by user.")
        print(f"💡 Run the same command again to resume from where you left off.")
    except Exception as e:
        print(f"\n❌ Evaluation failed: {e}")
        print(f"💡 Check your configuration and try again.")


if __name__ == "__main__":
    main()