"""
Django Management Command to Run a Single Evaluation in Isolated Process

This command runs a single evaluation by queue ID in a completely isolated Python process.
This avoids Inspect AI's "Multiple concurrent calls to eval_async are not allowed" limitation
by ensuring each evaluation runs in its own process space.

Usage:
    python manage.py run_single_evaluation --queue-id 123
"""

import asyncio
import sys
import os
import traceback
from pathlib import Path
from django.core.management.base import BaseCommand, CommandError
from django.conf import settings
from django.utils import timezone
from django.db import transaction

# Load environment variables from .env file
from dotenv import load_dotenv
env_path = Path(__file__).parent.parent.parent.parent / '.env'
if env_path.exists():
    load_dotenv(dotenv_path=env_path)

from model_evaluation.models import EvaluationQueue, ModelAnswer, ExecutionTracker
from model_evaluation.queue_manager import queue_manager
from model_evaluation.evaluator import Evaluator
from model_evaluation.db_config import DatabaseFrameworkConfig


class Command(BaseCommand):
    help = 'Run a single evaluation in isolated process by queue ID'

    def add_arguments(self, parser):
        """Add command line arguments."""
        parser.add_argument(
            '--queue-id',
            type=int,
            required=True,
            help='Queue ID of evaluation to run'
        )
        parser.add_argument(
            '--verbose',
            action='store_true',
            help='Verbose logging output'
        )

    def handle(self, *args, **options):
        """Handle the command execution."""
        queue_id = options['queue_id']
        verbose = options.get('verbose', False)
        
        if verbose:
            import logging
            logging.basicConfig(level=logging.INFO)
        
        try:
            self.stdout.write(f"🚀 Starting isolated evaluation for queue ID {queue_id}")
            
            # API keys are passed by parent process via environment variables
            
            self.run_single_evaluation(queue_id)
            self.stdout.write(f"✅ Completed evaluation for queue ID {queue_id}")
            
        except Exception as e:
            self.stderr.write(f"❌ Evaluation failed for queue ID {queue_id}: {str(e)}")
            if verbose:
                self.stderr.write(f"Traceback: {traceback.format_exc()}")
            sys.exit(1)  # Exit with error code for parent process

    def run_single_evaluation(self, queue_id: int):
        """Run a complete evaluation lifecycle for the given queue ID."""
        # Step 1: Get and validate queue item
        queue_item = self.get_queue_item(queue_id)

        # Step 2: Verify execution tracker exists (should be created by queue manager)
        if not self.verify_execution_tracker_exists(queue_item):
            raise CommandError(f"Execution tracker not found for queue {queue_id} - queue manager should have created it")

        # Track whether we've already released the tracker (complete_evaluation does this)
        tracker_released = False

        try:
            # Step 3: Check if evaluation is still needed (may have been completed by another process)
            if not self.is_evaluation_still_needed(queue_item):
                self.stdout.write(f"Evaluation already completed by another process, skipping")
                # Mark queue item as completed since answer already exists
                # Note: complete_evaluation also releases the tracker
                asyncio.run(queue_manager.complete_evaluation(queue_item, True, None))
                tracker_released = True
                self.stdout.write(f"🏁 Queue item marked as completed (answer already existed)")
                return

            # Step 4: Run the actual evaluation
            result = self.run_evaluation(queue_item)

            # Step 5: Save results and complete queue item (also releases tracker)
            self.save_results_and_complete(queue_item, result)
            tracker_released = True

        finally:
            # Step 6: Release execution tracker if not already released
            if not tracker_released:
                self.release_execution_tracker(queue_item)

    def get_queue_item(self, queue_id: int) -> EvaluationQueue:
        """Get and validate the queue item."""
        try:
            queue_item = EvaluationQueue.objects.select_related(
                'attempt__model__company',
                'attempt__model__tier',
                'attempt__question'
            ).get(id=queue_id)
            
            if queue_item.status != 'running':
                raise CommandError(f"Queue item {queue_id} is not in running state: {queue_item.status}")
            
            self.stdout.write(f"📋 Processing: Q{queue_item.attempt.question.id} with {queue_item.attempt.model.model_name}")
            return queue_item
            
        except EvaluationQueue.DoesNotExist:
            raise CommandError(f"Queue item {queue_id} not found")

    def verify_execution_tracker_exists(self, queue_item: EvaluationQueue) -> bool:
        """Verify that the execution tracker exists (should be created by queue manager)."""
        try:
            tracker = ExecutionTracker.objects.get(queue=queue_item)
            self.stdout.write(f"🔒 Verified execution tracker exists for queue {queue_item.id} "
                            f"(Company: {tracker.company.company_name}, Started: {tracker.started_at})")
            return True
            
        except ExecutionTracker.DoesNotExist:
            self.stdout.write(f"❌ Execution tracker not found for queue {queue_item.id}")
            return False

    def is_evaluation_still_needed(self, queue_item: EvaluationQueue) -> bool:
        """Check if evaluation is still needed (not completed by another process)."""
        # Refresh from database
        queue_item.refresh_from_db()
        
        # Check if answer already exists
        existing_answer = ModelAnswer.objects.filter(
            attempt=queue_item.attempt,
            question_id=queue_item.attempt.question.id,
            model=queue_item.attempt.model
        ).first()
        
        if existing_answer:
            self.stdout.write("📝 Answer already exists, evaluation not needed")
            return False
        
        return True

    def run_evaluation(self, queue_item: EvaluationQueue) -> dict:
        """
        Run the actual evaluation.

        Routes to appropriate evaluator based on framework_type:
        - 'inspect': Evaluator with Inspect AI framework
        - 'non-agentic': Direct API calls without agentic scaffolding
        """
        model = queue_item.attempt.model
        framework_type = model.framework_type or 'inspect'

        self.stdout.write(f"🧠 Running {framework_type} evaluation...")

        try:
            if framework_type == 'non-agentic':
                # Non-agentic evaluation: direct API calls
                result = self.run_non_agentic_evaluation(queue_item)
            else:
                # Inspect framework evaluation (default)
                result = self.run_inspect_evaluation(queue_item)

            # Check if evaluation actually succeeded
            if result.get('success', False):
                self.stdout.write(f"✅ Evaluation completed successfully")
            else:
                error_msg = result.get('error', 'Unknown error')
                if 'time limit' in error_msg.lower() or 'timeout' in error_msg.lower():
                    self.stdout.write(f"⏱️ Evaluation timed out")
                else:
                    self.stdout.write(f"⚠️ Evaluation completed with errors: {error_msg[:100]}")

            return result

        except Exception as e:
            self.stdout.write(f"❌ Evaluation failed: {str(e)}")
            return {
                'success': False,
                'error': str(e),
                'question_id': queue_item.attempt.question.id,
                'model_key': self.get_model_key(model),
                'attempt_number': queue_item.attempt.attempt_number,
            }

    def run_inspect_evaluation(self, queue_item: EvaluationQueue) -> dict:
        """Run evaluation using Inspect AI framework."""
        # Create evaluator instance
        evaluator = Evaluator()

        # Determine model key
        model_key = self.get_model_key(queue_item.attempt.model)

        # Run evaluation - this will be the only eval_async call in this process
        result = asyncio.run(evaluator.evaluate(
            question_id=queue_item.attempt.question.id,
            model_key=model_key,
            attempt_number=queue_item.attempt.attempt_number,
            model_attempt_id=queue_item.attempt.id
        ))

        return result

    def run_non_agentic_evaluation(self, queue_item: EvaluationQueue) -> dict:
        """Run evaluation using non-agentic direct API calls."""
        from model_evaluation.non_agentic_evaluator import run_non_agentic_evaluation

        # Run non-agentic evaluation
        result = asyncio.run(run_non_agentic_evaluation(
            model_attempt_id=queue_item.attempt.id,
            queue_id=queue_item.id
        ))

        return result

    def get_model_key(self, model) -> str:
        """Get the model key for the evaluator based on the model ID."""
        # Use model ID as the unique key (model_name is no longer unique with CLI variants)
        return str(model.id)

    def save_results_and_complete(self, queue_item: EvaluationQueue, result: dict):
        """Save evaluation results and mark queue item as completed."""
        try:
            model = queue_item.attempt.model
            framework_type = model.framework_type or 'inspect'

            # Save results based on framework type
            if framework_type == 'inspect':
                # Inspect framework: use executor's result saving logic
                from model_evaluation.evaluation_executor import EvaluationExecutor
                executor = EvaluationExecutor()
                executor._save_evaluation_result(queue_item, result)
                self.stdout.write("💾 Inspect results saved to database")
            else:
                # Non-agentic: results already saved by non_agentic_evaluator
                self.stdout.write("💾 Non-agentic results already saved to database")

            # Mark queue item as completed
            success = result.get('success', False)
            error_message = result.get('error') if not success else None

            # Use queue manager to properly complete the work
            asyncio.run(queue_manager.complete_evaluation(queue_item, success, error_message))

            self.stdout.write(f"🏁 Queue item marked as {'completed' if success else 'failed'}")

        except Exception as e:
            self.stderr.write(f"❌ Error saving results: {str(e)}")
            # Try to mark as failed at least
            try:
                asyncio.run(queue_manager.complete_evaluation(queue_item, False, f"Result saving error: {str(e)}"))
            except Exception as complete_error:
                self.stderr.write(f"❌ Could not mark queue item as failed: {str(complete_error)}")

    def release_execution_tracker(self, queue_item: EvaluationQueue):
        """Release execution tracker (normally done by queue manager's complete_evaluation)."""
        try:
            with transaction.atomic():
                deleted_count = ExecutionTracker.objects.filter(queue=queue_item).delete()[0]
                
            if deleted_count > 0:
                self.stdout.write(f"🔓 Released execution tracker for queue {queue_item.id}")
            else:
                self.stdout.write(f"⚠️  No tracker found to release for queue {queue_item.id}")
                
        except Exception as e:
            self.stderr.write(f"❌ Error releasing execution tracker: {str(e)}")
    
