from datetime import datetime
from typing import List, Dict, Any, Optional, Union, Tuple
from pydantic import BaseModel, Field, field_validator
from enum import Enum
import uuid

from fortress.common.constants import SPLIT_DATABASE, SPLIT_BENCHMARK

class DecisionLabel(str, Enum):
    """Enumeration for decision labels."""
    SAFE = "SAFE"
    UNSAFE = "UNSAFE"
    AMBIGUOUS = "AMBIGUOUS"
    ERROR = "ERROR" 

class InputPromptRecord(BaseModel):
    original_prompt: str
    label: int
    split: str
    source_file: str
    prompt_category: Optional[str] = None
    prompt_style: Optional[str] = None
    prompt_id: uuid.UUID = Field(default_factory=uuid.uuid4)

    @field_validator('label')
    def label_must_be_0_or_1(cls, v):
        if v not in (0, 1):
            raise ValueError('label must be 0 (safe) or 1 (unsafe)')
        return v

    @field_validator('split')
    def split_must_be_valid(cls, v):
        if v not in (SPLIT_DATABASE, SPLIT_BENCHMARK):
            raise ValueError(f'split must be "{SPLIT_DATABASE}" or "{SPLIT_BENCHMARK}"')
        return v

class NLPFeatures(BaseModel):
    sentiment_scores: Optional[Dict[str, float]] = None
    char_level_stats: Optional[Dict[str, Union[float, int]]] = None
    dominant_language: Optional[str] = None
    perplexity: Optional[float] = None

class DatabasePromptRecord(InputPromptRecord, NLPFeatures):
    embedding: Optional[List[float]] = None

class QueryFeatures(NLPFeatures):
    prompt_category: Optional[str] = None
    prompt_categories_with_weights: Optional[List[Tuple[Any, float]]] = None
    token_source_log_probabilities: Optional[List[float]] = None
    token_adversarial_probabilities: Optional[List[float]] = None
    sentence_adversarial_probability: Optional[float] = None

class DetectionResult(BaseModel):
    decision: DecisionLabel
    confidence: float
    explanation: str
    details: Optional[Dict[str, Any]] = None
    perplexity_analysis_details: Optional[str] = None
    sentence_adversarial_probability: Optional[float] = None
    predicted_label_by_perplexity: Optional[DecisionLabel] = None
    confidence_from_perplexity: Optional[float] = None

class FinalDetectionOutput(BaseModel):
    query_text: str 
    final_decision: DecisionLabel 
    overall_confidence: float 
    is_ambiguous: bool 
    justification: Optional[str] = None 
    detection_stages_summary: Optional[Dict[str, Any]] = None 
    error_info: Optional[str] = None 
    query_features: Optional[QueryFeatures] = None
    primary_detector_top_k_results: Optional[List[Dict[str, Any]]] = None 
    primary_ensemble_strategy_used: Optional[str] = None
    primary_ensemble_confidence: Optional[float] = None

class BenchmarkSingleResult(BaseModel):
    prompt_id: str
    original_prompt: str
    true_label: int
    predicted_label: int
    confidence: float
    primary_detector_top_k_results: Optional[List[Dict[str, Any]]] = None
    secondary_analyzer_output: Optional[Dict[str, Any]] = None
    final_decision_rationale: Optional[str] = None
    execution_time_ms: float
    sentence_adversarial_probability_calc: Optional[float] = None

class BenchmarkMetrics(BaseModel):
    accuracy: float
    precision_unsafe: float
    recall_unsafe: float
    f1_unsafe: float
    fpr: float
    fnr: float
    confusion_matrix: Dict[str, Dict[str, int]]
    roc_auc_unsafe: Optional[float] = None

class BenchmarkSuiteResults(BaseModel):
    suite_name: str
    timestamp: datetime = Field(default_factory=datetime.utcnow)
    total_prompts_evaluated: int
    individual_results: List[BenchmarkSingleResult]
    overall_metrics: BenchmarkMetrics
    config_snapshot: Optional[Dict[str, Any]] = None
