from django.db import models
from django.conf import settings
from participants.models import Participant
from questions.models import Question, Subquestion
import json


class Company(models.Model):
    """
    AI companies whose models are being evaluated.
    
    Companies like OpenAI, Anthropic, Google, Grok that provide API access
    to their models for evaluation purposes.
    Company names match the provider prefixes used in Inspect framework.
    """
    id = models.AutoField(primary_key=True)
    company_name = models.TextField(blank=True, null=True)
    api_key = models.TextField(blank=True, null=True)  # Environment variable name for API key
    
    class Meta:
        managed = False
        db_table = 'companies'
        verbose_name_plural = "Companies"
    
    def __str__(self):
        return self.company_name or f"Company {self.id}"


class ModelTier(models.Model):
    """
    Tier system for organizing models by evaluation priority and resource requirements.
    
    Tier 1: Priority models requiring immediate grading
    Tier 2: Optional comparison models  
    Tier 3: Research interest models
    Tier 4: Second attempts and lower priority models
    Tier 5: Retired/inactive models
    """
    id = models.AutoField(primary_key=True)
    tier_number = models.IntegerField()  # 1-5, with constraints in database
    tier_name = models.TextField()
    description = models.TextField(blank=True, null=True)
    requires_main_grading = models.BooleanField(default=True)
    appears_on_leaderboard = models.BooleanField(default=True)
    is_active = models.BooleanField(default=True)
    
    class Meta:
        managed = False
        db_table = 'model_tiers'
        ordering = ['tier_number']
    
    def __str__(self):
        return f"Tier {self.tier_number}: {self.tier_name}"


class Model(models.Model):
    """
    Specific AI models being evaluated in the benchmark.

    Each model belongs to a company and is assigned to a tier for evaluation prioritization.
    Supports multiple evaluation frameworks:
    - inspect: Inspect Framework with agentic tools (Docker sandbox, tools, etc.)
    - cli: Command-line interface framework (future feature)
    - non-agentic: Direct API calls without agentic scaffolding (for ablation testing)
    """
    FRAMEWORK_CHOICES = [
        ('inspect', 'Inspect Framework'),
        ('cli', 'CLI Framework'),
        ('non-agentic', 'Non-Agentic Direct API'),
    ]
    
    id = models.AutoField(primary_key=True)
    company = models.ForeignKey(Company, on_delete=models.CASCADE, db_column='company_id')
    model_name = models.TextField(blank=True, null=True)
    display_name = models.TextField(blank=True, null=True)  # Human-readable name for UI
    instructions = models.TextField(blank=True, null=True)  # Special prompting instructions
    tier = models.ForeignKey(ModelTier, on_delete=models.CASCADE, db_column='tier_id')
    framework_type = models.CharField(max_length=20, choices=FRAMEWORK_CHOICES, blank=True, null=True)
    is_active = models.BooleanField(default=True)
    release_date = models.DateField(blank=True, null=True)  # Date when model was released

    class Meta:
        managed = False
        db_table = 'models'
        ordering = ['tier__tier_number', 'company__company_name', 'model_name']
    
    def __str__(self):
        if self.display_name:
            return self.display_name
        company_name = self.company.company_name if self.company else "Unknown Company"
        return f"{company_name} {self.model_name}" if self.model_name else f"Model {self.id}"
    
    def get_model_args(self):
        """Get model_args as a dictionary"""
        return {arg.arg_name: arg.get_value() for arg in self.model_args.all()}
    
    def get_reasoning_args(self):
        """Get reasoning_args as a dictionary"""
        return {arg.arg_name: arg.get_value() for arg in self.reasoning_args.all()}
    
    def get_full_model_name(self):
        """Get the full model name with provider prefix for Inspect framework"""
        # Company names now match provider prefixes exactly
        if self.company and self.model_name:
            return f"{self.company.company_name}/{self.model_name}"
        return self.model_name or ""


class ModelAttempt(models.Model):
    """
    Records each evaluation attempt by a model on a question.
    
    Supports up to 2 attempts per (model, question) pair, with attempt_number
    distinguishing first attempts (Tier 1 grading) from second attempts (Tier 4 grading).
    """
    id = models.AutoField(primary_key=True)
    model = models.ForeignKey(Model, on_delete=models.CASCADE, db_column='model_id')
    question = models.ForeignKey(Question, on_delete=models.CASCADE, db_column='question_id')
    time = models.DateTimeField()  # Automatically set by database
    attempt_number = models.IntegerField(default=1)  # 1 or 2, with database constraints
    
    class Meta:
        managed = False
        db_table = 'model_attempts'
        ordering = ['-time']
    
    def __str__(self):
        return f"{self.model} - {self.question.title} (Attempt {self.attempt_number})"


class ModelAnswer(models.Model):
    """
    Model's response to main questions.
    
    Stores the full text response and optional terminal log hash for CLI sessions.
    Failed evaluations store error messages in the answer field.
    Includes grading release status for workflow management.
    """
    id = models.AutoField(primary_key=True)
    attempt = models.ForeignKey(ModelAttempt, on_delete=models.CASCADE, db_column='attempt_id')
    question = models.ForeignKey(Question, on_delete=models.CASCADE, db_column='question_id')
    model = models.ForeignKey(Model, on_delete=models.CASCADE, db_column='model_id')
    answer = models.TextField(blank=True, null=True)  # Full text response or error message
    terminal_log_hash = models.TextField(blank=True, null=True)  # SHA-256 hash for log retrieval
    released_for_grading = models.BooleanField(default=False)  # Whether released to question author for grading
    ran_out_of_tokens = models.BooleanField(default=False)  # Whether model exhausted token limit
    
    class Meta:
        managed = False
        db_table = 'model_answers'
        ordering = ['-attempt__time']
    
    def __str__(self):
        return f"{self.model} answer to {self.question.title}"


class ModelSubquestionAnswer(models.Model):
    """
    Model's responses to automatically-gradable subquestions.

    Includes automatic evaluation results for leaderboard calculations,
    with admin override capability for handling edge cases.

    For non-agentic evaluations:
    - full_response: Complete model output before answer extraction
    - answer: Extracted answer from \boxed{} or fallback to full_response
    - parsing_failure: Indicates if \boxed{} extraction failed
    """
    id = models.AutoField(primary_key=True)
    attempt = models.ForeignKey(ModelAttempt, on_delete=models.CASCADE, db_column='attempt_id')
    subquestion = models.ForeignKey(Subquestion, on_delete=models.CASCADE, db_column='subquestion_id')
    model = models.ForeignKey(Model, on_delete=models.CASCADE, db_column='model_id')
    answer = models.TextField(blank=True, null=True)  # Model's extracted answer
    full_response = models.TextField(blank=True, null=True)  # Complete model output (non-agentic only)
    parsing_failure = models.BooleanField(default=False)  # True if \boxed{} extraction failed
    is_correct = models.IntegerField(blank=True, null=True)  # Boolean result of automatic evaluation
    admin_override = models.IntegerField(blank=True, null=True)  # Admin override: 0=False, 1=True, None=not set
    ran_out_of_tokens = models.BooleanField(default=False)  # Whether model exhausted token limit
    
    class Meta:
        managed = False
        db_table = 'model_subquestion_answers'
        ordering = ['subquestion__subquestion_order']
    
    def __str__(self):
        return f"{self.model} - {self.subquestion}"
    
    @property
    def effective_correctness(self):
        """
        Returns the effective correctness value, considering admin override.
        If admin_override is set, it takes precedence over is_correct.
        """
        if self.admin_override is not None:
            return self.admin_override
        return self.is_correct
    
    def get_correctness_display(self):
        """
        Returns a human-readable display of correctness status,
        indicating if it's overridden by an admin.
        """
        if self.admin_override is not None:
            if self.admin_override == 1:
                return "Correct (Admin Override)"
            else:
                return "Incorrect (Admin Override)"
        elif self.is_correct is not None:
            if self.is_correct == 1:
                return "Correct"
            else:
                return "Incorrect"
        return "Not graded"


class EvaluationQueue(models.Model):
    """
    Global evaluation queue with attempt-based architecture.
    
    Manages the order and status of specific model attempts with company concurrency limits
    and automatic retry handling. Tier prioritization is derived from attempt -> model -> tier relationships.
    """
    STATUS_CHOICES = [
        ('pending', 'Pending'),
        ('running', 'Running'),
        ('completed', 'Completed'),
        ('failed', 'Failed'),
        ('cancelled', 'Cancelled'),
    ]
    
    id = models.AutoField(primary_key=True)
    attempt = models.ForeignKey(ModelAttempt, on_delete=models.CASCADE, db_column='attempt_id')
    submitted_at = models.DateTimeField()  # Auto-set by database
    status = models.CharField(max_length=20, choices=STATUS_CHOICES, blank=True, null=True)
    started_at = models.DateTimeField(blank=True, null=True)
    completed_at = models.DateTimeField(blank=True, null=True)
    error_message = models.TextField(blank=True, null=True)
    # Retry fields removed in migration 0009_remove_retry_fields
    
    class Meta:
        managed = False
        db_table = 'evaluation_queue'
        # Order by tier priority (derived from attempt.model.tier.tier_number) then submission time
        ordering = ['submitted_at']  # Default ordering, tier ordering handled in queries
    
    @property
    def model(self):
        """Convenience property to access model via attempt."""
        return self.attempt.model
    
    @property
    def question(self):
        """Convenience property to access question via attempt."""
        return self.attempt.question
    
    @property
    def tier_priority(self):
        """Convenience property to access tier priority via attempt -> model -> tier."""
        return self.attempt.model.tier.tier_number
    
    @property
    def attempt_number(self):
        """Convenience property to access attempt number."""
        return self.attempt.attempt_number
    
    def __str__(self):
        return f"Queue #{self.id}: {self.attempt} [{self.status}]"


class CompanyExecutionLock(models.Model):
    """
    Prevents concurrent evaluations per company to respect API rate limits.
    
    Only one evaluation per company can run simultaneously to prevent
    API quota exhaustion and ensure fair resource allocation.
    """
    id = models.AutoField(primary_key=True)
    company = models.ForeignKey(Company, on_delete=models.CASCADE, db_column='company_id')
    locked_at = models.DateTimeField()  # Auto-set by database
    locked_by_queue = models.ForeignKey(EvaluationQueue, on_delete=models.SET_NULL, 
                                       db_column='locked_by_queue_id', blank=True, null=True)
    
    class Meta:
        managed = False
        db_table = 'company_execution_locks'
        ordering = ['-locked_at']
    
    def __str__(self):
        return f"{self.company.company_name} locked by Queue #{self.locked_by_queue_id}"




class ExecutionTracker(models.Model):
    """
    Tracks currently running evaluations for improved parallelism control.
    
    Replaces CompanyExecutionLock with a more flexible system that:
    - Tracks all running evaluations globally (max 4)
    - Enforces per-company limits (max 2) 
    - Provides better cancellation handling with subprocess PIDs
    """
    id = models.AutoField(primary_key=True)
    queue = models.ForeignKey(EvaluationQueue, on_delete=models.CASCADE, db_column='queue_id')
    company = models.ForeignKey(Company, on_delete=models.CASCADE, db_column='company_id')
    model = models.ForeignKey(Model, on_delete=models.CASCADE, db_column='model_id')
    question = models.ForeignKey(Question, on_delete=models.CASCADE, db_column='question_id')
    attempt_number = models.IntegerField()
    subprocess_pid = models.IntegerField(blank=True, null=True)
    started_at = models.DateTimeField(auto_now_add=True)
    
    class Meta:
        managed = False
        db_table = 'execution_tracker'
        ordering = ['-started_at']
    
    def __str__(self):
        return f"Execution: {self.model.model_name} on Q{self.question_id} (PID: {self.subprocess_pid})"


# ============================================================================
# GRADING SYSTEM MODELS
# ============================================================================

class GradingState(models.Model):
    """
    Lookup table for valid grading states.
    
    Provides data integrity for grading responses through foreign key constraints.
    Contains states: true, false, not_sure, not_applicable.
    """
    id = models.AutoField(primary_key=True)
    state_code = models.TextField(db_column='state_name', unique=True)  # 'true', 'false', 'not_sure', 'not_applicable'
    state_label = models.TextField(db_column='display_name')  # Human-readable label: 'Yes', 'No', 'Not Sure', 'Not Applicable'
    state_type = models.TextField()  # 'binary' or 'special' - NOW IN CORRECT COLUMN
    description = models.TextField(blank=True, null=True)  # Descriptive text
    is_active = models.BooleanField(default=True)  # Accidentally removed in consolidation - restored
    
    class Meta:
        managed = False
        db_table = 'grading_states'
        ordering = ['id']
    
    def __str__(self):
        return f"{self.state_label} ({self.state_code})"


class ModelGradingSession(models.Model):
    """
    Grading session for a specific grader and question.
    
    Manages the overall grading workflow for a question's model answers,
    including alias generation, progress tracking, and finalization.
    """
    id = models.AutoField(primary_key=True)
    grader = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, db_column='grader_id')
    question = models.ForeignKey(Question, on_delete=models.CASCADE, db_column='question_id')
    session_status = models.TextField(default='active')  # 'active' or 'finalized'
    created_at = models.DateTimeField(auto_now_add=True)
    finalized_at = models.DateTimeField(blank=True, null=True)
    grader_notes = models.TextField(blank=True, null=True)  # Personal grading criteria notes
    
    class Meta:
        managed = False
        db_table = 'model_grading_sessions'
        unique_together = [['grader', 'question']]
        ordering = ['-created_at']
    
    def __str__(self):
        status = "Finalized" if self.session_status == 'finalized' else "Active"
        return f"Grading Session #{self.id}: {self.grader.email} - Q{self.question_id} [{status}]"
    
    @property
    def is_finalized(self):
        """Check if the grading session has been finalized."""
        return self.session_status == 'finalized'


class ModelGradingAlias(models.Model):
    """
    Anonymized aliases for blind grading.
    
    Maps model answers to aliases like "Answer A", "Answer B" etc.,
    maintaining consistent randomization per grader across sessions.
    """
    id = models.AutoField(primary_key=True)
    session = models.ForeignKey(ModelGradingSession, on_delete=models.CASCADE, 
                               db_column='session_id', related_name='aliases')
    model_answer = models.ForeignKey(ModelAnswer, on_delete=models.CASCADE, 
                                    db_column='model_answer_id')
    alias = models.TextField(db_column='alias_name')  # "Answer A", "Answer B", etc.
    alias_order = models.IntegerField(db_column='sort_order')  # For consistent ordering in UI
    tier_group = models.IntegerField()  # 1 for Tier 1, 2 for Tiers 2-3
    
    class Meta:
        managed = False
        db_table = 'model_grading_aliases'
        unique_together = [
            ['session', 'model_answer'],
            ['session', 'alias']
        ]
        ordering = ['alias_order']
    
    def __str__(self):
        return f"{self.alias} → {self.model_answer.model.model_name}"


class ModelGrading(models.Model):
    """
    Individual grading record for a model answer.
    
    Contains the actual grading data: 8 binary categories, progress grade,
    and metadata. Uses foreign keys to grading_states for data integrity.
    """
    GRADING_STATUS_CHOICES = [
        ('not_started', 'Not Started'),
        ('in_progress', 'In Progress'),
        ('completed', 'Completed'),
        ('given_up', 'Given Up'),
    ]
    
    id = models.AutoField(primary_key=True)
    session = models.ForeignKey(ModelGradingSession, on_delete=models.CASCADE, 
                               db_column='session_id', related_name='gradings')
    model_answer = models.ForeignKey(ModelAnswer, on_delete=models.CASCADE, 
                                    db_column='model_answer_id')
    grader = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, db_column='grader_id')
    
    # Grading status
    grading_status = models.TextField(choices=GRADING_STATUS_CHOICES, default='not_started')
    
    # Binary grading categories - using old Python names for backward compatibility
    # but mapped to new simpler database column names
    error_incorrect_logic = models.ForeignKey(
        GradingState, on_delete=models.SET_NULL, null=True, blank=True,
        db_column='correct_answer', related_name='+'
    )
    error_hallucinated = models.ForeignKey(
        GradingState, on_delete=models.SET_NULL, null=True, blank=True,
        db_column='uses_correct_method', related_name='+'
    )
    error_calculation = models.ForeignKey(
        GradingState, on_delete=models.SET_NULL, null=True, blank=True,
        db_column='logical_reasoning', related_name='+'
    )
    error_conceptual = models.ForeignKey(
        GradingState, on_delete=models.SET_NULL, null=True, blank=True,
        db_column='addresses_question', related_name='+'
    )
    achievement_understanding = models.ForeignKey(
        GradingState, on_delete=models.SET_NULL, null=True, blank=True,
        db_column='mathematically_sound', related_name='+'
    )
    achievement_correct_result = models.ForeignKey(
        GradingState, on_delete=models.SET_NULL, null=True, blank=True,
        db_column='clear_communication', related_name='+'
    )
    achievement_insight = models.ForeignKey(
        GradingState, on_delete=models.SET_NULL, null=True, blank=True,
        db_column='uses_appropriate_tools', related_name='+'
    )
    achievement_usefulness = models.ForeignKey(
        GradingState, on_delete=models.SET_NULL, null=True, blank=True,
        db_column='demonstrates_understanding', related_name='+'
    )
    
    # Overall progress grade (0-3)
    progress_grade = models.IntegerField(blank=True, null=True)  # 0-3 scale
    
    # Additional fields (accidentally removed in consolidation - restored)
    comments = models.TextField(blank=True, null=True)
    flag_for_organizers = models.BooleanField(default=False)
    
    # N/A toggle
    not_applicable = models.BooleanField(default=False)
    
    # Timestamps
    created_at = models.DateTimeField(auto_now_add=True)
    updated_at = models.DateTimeField(auto_now=True)
    finalized_at = models.DateTimeField(blank=True, null=True)
    
    class Meta:
        managed = False
        db_table = 'model_gradings'
        unique_together = [['grader', 'model_answer']]
        ordering = ['-updated_at']
    
    def __str__(self):
        return f"Grading #{self.id}: {self.grader.email} - {self.model_answer} [{self.grading_status}]"
    
    @property
    def is_complete(self):
        """Check if all required grading fields are filled."""
        # All 8 binary categories must be set
        required_fields = [
            self.error_incorrect_logic_id,
            self.error_hallucinated_id,
            self.error_calculation_id,
            self.error_conceptual_id,
            self.achievement_understanding_id,
            self.achievement_correct_result_id,
            self.achievement_insight_id,
            self.achievement_usefulness_id,
        ]
        
        # Progress grade must be set (0 is valid)
        if self.progress_grade is None:
            return False
            
        # All required fields must have a value
        return all(field is not None for field in required_fields)
    
    def get_progress_grade_display(self):
        """Return human-readable progress grade."""
        grades = {
            0: "No Progress",
            1: "Minor Progress",
            2: "Major Progress", 
            3: "Complete Solution"
        }
        return grades.get(self.progress_grade, "Not Graded")


class ModelArg(models.Model):
    """
    Arguments passed to model client initialization (AsyncOpenAI, AsyncAnthropic, etc.)
    
    Stores configuration parameters that are used when creating the API client instance.
    Each parameter is stored as a separate row with JSON-encoded values.
    """
    id = models.AutoField(primary_key=True)
    model = models.ForeignKey(Model, on_delete=models.CASCADE, db_column='model_id', related_name='model_args')
    arg_name = models.TextField()
    arg_value = models.TextField()  # JSON-encoded value
    created_at = models.DateTimeField(auto_now_add=True)
    updated_at = models.DateTimeField(auto_now=True)
    
    class Meta:
        managed = False
        db_table = 'model_args'
        unique_together = [['model', 'arg_name']]
        ordering = ['arg_name']
    
    def __str__(self):
        return f"{self.model.display_name or self.model.model_name}: {self.arg_name}={self.arg_value}"
    
    def get_value(self):
        """Decode JSON value to Python object"""
        try:
            return json.loads(self.arg_value)
        except json.JSONDecodeError:
            # If it's not valid JSON, return as string
            return self.arg_value
    
    def set_value(self, value):
        """Encode Python object as JSON string"""
        self.arg_value = json.dumps(value)


class ReasoningArg(models.Model):
    """
    Arguments passed directly to eval() function
    
    Stores configuration parameters that are passed to the Inspect eval() function,
    such as reasoning_effort, max_tokens, cache_prompt, etc.
    Each parameter is stored as a separate row with JSON-encoded values.
    """
    id = models.AutoField(primary_key=True)
    model = models.ForeignKey(Model, on_delete=models.CASCADE, db_column='model_id', related_name='reasoning_args')
    arg_name = models.TextField()
    arg_value = models.TextField()  # JSON-encoded value
    created_at = models.DateTimeField(auto_now_add=True)
    updated_at = models.DateTimeField(auto_now=True)
    
    class Meta:
        managed = False
        db_table = 'reasoning_args'
        unique_together = [['model', 'arg_name']]
        ordering = ['arg_name']
    
    def __str__(self):
        return f"{self.model.display_name or self.model.model_name}: {self.arg_name}={self.arg_value}"
    
    def get_value(self):
        """Decode JSON value to Python object"""
        try:
            return json.loads(self.arg_value)
        except json.JSONDecodeError:
            # If it's not valid JSON, return as string
            return self.arg_value
    
    def set_value(self, value):
        """Encode Python object as JSON string"""
        self.arg_value = json.dumps(value)