"""Utilities for component evaluation including caching and data management."""

import json
import hashlib
import time
from pathlib import Path
from typing import Dict, List, Any, Optional, Tuple
from datetime import datetime

from .config import EvaluationConfig, ModelConfigLoader


class EvaluationCache:
    """Scalable hierarchical file-based caching system for evaluation results."""
    
    def __init__(self):
        """Initialize hierarchical cache directories."""
        self.traditional_cache_dir = EvaluationConfig.TRADITIONAL_CACHE_DIR
        self.llm_judge_cache_dir = EvaluationConfig.LLM_JUDGE_CACHE_DIR
        
        # Ensure base cache directories exist
        self.traditional_cache_dir.mkdir(parents=True, exist_ok=True)
        self.llm_judge_cache_dir.mkdir(parents=True, exist_ok=True)
    
    def _sanitize_model_name(self, model: str) -> str:
        """Convert model name to match system output directory naming.
        
        Args:
            model: Model identifier (e.g., "openai:gpt-4.1", "gateway:anthropic/claude-opus-4-1-20250805")
            
        Returns:
            Directory name matching system outputs (e.g., "gpt-4.1", "claude-opus-4-1-20250805", "kimi-k2-instruct-0905")
        """
        # Use same simplified logic as DataLoader.load_system_output for consistency
        # All models now use single-level directory structure after flattening
        # Gateway models: gateway:provider/model -> model (e.g., "claude-opus-4-1-20250805")
        # Groq slash models: groq:provider/model -> model (e.g., "kimi-k2-instruct-0905")
        # Standard models: platform:model -> model (e.g., "gpt-4o")
        if "/" in model:
            # For any model with slash, use the part after the slash
            return model.split("/")[-1]
        else:
            # For standard models, use the part after the colon
            return model.split(":")[-1] if ":" in model else model
    
    def get_cache_path(self, component: str, model: str, video_id: str, eval_type: str = "traditional") -> Path:
        """Get file path for cached evaluation result.
        
        Args:
            component: Component name (e.g., "violation", "accident")
            model: Model identifier (e.g., "openai:gpt-4.1")  
            video_id: Video identifier (e.g., "0000_cut_off_accident")
            eval_type: Type of evaluation ("traditional" or "llm_judge")
            
        Returns:
            Path to cache file (e.g., cache/traditional/violation/openai_gpt-4.1/0000_cut_off_accident.json)
        """
        base_dir = self.traditional_cache_dir if eval_type == "traditional" else self.llm_judge_cache_dir
        safe_model = self._sanitize_model_name(model)
        return base_dir / component / safe_model / f"{video_id}.json"
    
    def get_cached_result(self, component: str, model: str, video_id: str, 
                         eval_type: str = "traditional") -> Optional[Dict[str, Any]]:
        """Get cached evaluation result with O(1) file access.
        
        Args:
            component: Component name
            model: Model identifier  
            video_id: Video identifier
            eval_type: Type of evaluation ("traditional" or "llm_judge")
            
        Returns:
            Cached result if available, None otherwise
        """
        cache_path = self.get_cache_path(component, model, video_id, eval_type)
        
        if cache_path.exists():
            try:
                with open(cache_path, 'r', encoding='utf-8') as f:
                    return json.load(f)
            except (json.JSONDecodeError, FileNotFoundError) as e:
                print(f"⚠️  Invalid cache file {cache_path}: {e}")
                return None
        
        return None
    
    def cache_result(self, component: str, model: str, video_id: str, result: Dict[str, Any], 
                    eval_type: str = "traditional"):
        """Cache evaluation result with O(1) file write.
        
        Args:
            component: Component name
            model: Model identifier
            video_id: Video identifier
            result: Evaluation result to cache
            eval_type: Type of evaluation ("traditional" or "llm_judge")
        """
        cache_path = self.get_cache_path(component, model, video_id, eval_type)
        
        # Create directory structure if it doesn't exist
        cache_path.parent.mkdir(parents=True, exist_ok=True)
        
        # Create cache entry with metadata
        cache_entry = {
            "component": component,
            "model": model,
            "video_id": video_id,
            "result": result,
            "cached_at": time.time(),
            "eval_type": eval_type
        }
        
        try:
            with open(cache_path, 'w', encoding='utf-8') as f:
                json.dump(cache_entry, f, indent=2, ensure_ascii=False, default=str)
        except Exception as e:
            print(f"⚠️  Failed to save cache result to {cache_path}: {e}")
    
    def clear_cache(self, component: Optional[str] = None, model: Optional[str] = None, 
                   eval_type: Optional[str] = None):
        """Clear cached results with selective filtering.
        
        Args:
            component: Optional component filter (clears all if None)
            model: Optional model filter (clears all models if None)  
            eval_type: Optional evaluation type filter ("traditional", "llm_judge", or None for both)
        """
        import shutil
        
        if eval_type is None:
            # Clear both traditional and llm_judge
            eval_types = ["traditional", "llm_judge"]
        else:
            eval_types = [eval_type]
        
        for e_type in eval_types:
            base_dir = self.traditional_cache_dir if e_type == "traditional" else self.llm_judge_cache_dir
            
            if component is None:
                # Clear entire evaluation type directory
                if base_dir.exists():
                    shutil.rmtree(base_dir)
                    base_dir.mkdir(parents=True, exist_ok=True)
            elif model is None:
                # Clear entire component directory  
                component_dir = base_dir / component
                if component_dir.exists():
                    shutil.rmtree(component_dir)
            else:
                # Clear specific model directory
                model_dir = base_dir / component / self._sanitize_model_name(model)
                if model_dir.exists():
                    shutil.rmtree(model_dir)
    
    def get_cache_stats(self) -> Dict[str, Any]:
        """Get cache statistics for monitoring and debugging.
        
        Returns:
            Dictionary with cache statistics
        """
        stats = {
            "traditional": {"components": {}, "total_files": 0},
            "llm_judge": {"components": {}, "total_files": 0}
        }
        
        for eval_type, base_dir in [("traditional", self.traditional_cache_dir), 
                                   ("llm_judge", self.llm_judge_cache_dir)]:
            if base_dir.exists():
                for component_dir in base_dir.iterdir():
                    if component_dir.is_dir():
                        component = component_dir.name
                        stats[eval_type]["components"][component] = {}
                        
                        for model_dir in component_dir.iterdir():
                            if model_dir.is_dir():
                                model = model_dir.name
                                file_count = len(list(model_dir.glob("*.json")))
                                stats[eval_type]["components"][component][model] = file_count
                                stats[eval_type]["total_files"] += file_count
        
        return stats


class EvaluationStateManager:
    """Manages evaluation state for resumption."""
    
    def __init__(self):
        self.state_file = EvaluationConfig.STATE_FILE
        self.state_file.parent.mkdir(parents=True, exist_ok=True)
        self.models_config = None
    
    def load_state(self) -> Dict[str, Any]:
        """Load evaluation state from disk."""
        if self.state_file.exists():
            try:
                with open(self.state_file, 'r', encoding='utf-8') as f:
                    return json.load(f)
            except (json.JSONDecodeError, FileNotFoundError):
                pass
        
        return {
            "created_at": datetime.utcnow().isoformat(),
            "components": {}
        }
    
    def save_state(self, state: Dict[str, Any]):
        """Save evaluation state to disk."""
        state["updated_at"] = datetime.utcnow().isoformat()
        
        try:
            with open(self.state_file, 'w', encoding='utf-8') as f:
                json.dump(state, f, indent=2, ensure_ascii=False)
        except Exception as e:
            print(f"⚠️  Failed to save state: {e}")
    
    def get_models_config(self) -> Dict[str, List[str]]:
        """Get current models configuration."""
        if self.models_config is None:
            self.models_config = ModelConfigLoader.get_all_models()
        return self.models_config
    
    def is_evaluation_complete(self, component: str, model: str) -> bool:
        """Check if evaluation is complete for a component-model pair.
        
        Args:
            component: Component name
            model: Model identifier
            
        Returns:
            True if evaluation is complete, False otherwise
        """
        # Load ground truth files to get list of videos
        ground_truth_dir = EvaluationConfig.GROUND_TRUTH_DIR
        ground_truth_files = list(ground_truth_dir.glob("*.json"))
        
        if not ground_truth_files:
            return False
        
        # Check if system outputs exist for all videos
        cache = EvaluationCache()
        
        for gt_file in ground_truth_files:
            video_id = gt_file.stem
            
            # Check if both traditional and LLM judge results are cached
            # FIXED: Use sanitized model name for cache lookup consistency
            traditional_result = cache.get_cached_result(component, model, video_id, "traditional")
            llm_judge_result = cache.get_cached_result(component, model, video_id, "llm_judge")
            
            if traditional_result is None or llm_judge_result is None:
                return False
        
        return True
    
    def get_progress_summary(self) -> Dict[str, Any]:
        """Get evaluation progress summary.
        
        Returns:
            Progress summary for all components
        """
        models_config = self.get_models_config()
        summary = {}
        
        for component, models in models_config.items():
            completed = []
            pending = []
            
            for model in models:
                if self.is_evaluation_complete(component, model):
                    completed.append(model)
                else:
                    pending.append(model)
            
            summary[component] = {
                'total_models': len(models),
                'completed_models': completed,
                'pending_models': pending,
                'completion_percentage': len(completed) / len(models) * 100 if models else 0
            }
        
        return summary
    
    def get_completed_models(self, component: str) -> List[str]:
        """Get list of completed models for a component.
        
        Args:
            component: Component name
            
        Returns:
            List of completed model identifiers
        """
        models_config = self.get_models_config()
        models = models_config.get(component, [])
        
        completed = []
        for model in models:
            if self.is_evaluation_complete(component, model):
                completed.append(model)
        
        return completed


class DataLoader:
    """Loads ground truth and system output data."""
    
    @staticmethod
    def load_ground_truth_files() -> List[Tuple[str, Dict[str, Any]]]:
        """Load all ground truth files.
        
        Returns:
            List of (video_id, ground_truth_data) tuples
        """
        ground_truth_dir = EvaluationConfig.GROUND_TRUTH_DIR
        ground_truth_files = []
        
        for gt_file in ground_truth_dir.glob("*.json"):
            try:
                with open(gt_file, 'r', encoding='utf-8') as f:
                    gt_data = json.load(f)
                ground_truth_files.append((gt_file.stem, gt_data))
            except Exception as e:
                print(f"⚠️  Failed to load {gt_file}: {e}")
        
        return ground_truth_files
    
    @staticmethod
    def load_system_output(component: str, model: str, video_id: str) -> Optional[Dict[str, Any]]:
        """Load system output for a specific component, model, and video.
        
        Args:
            component: Component name
            model: Model identifier
            video_id: Video identifier
            
        Returns:
            System output data if available, None otherwise
        """
        # Convert model id to directory name using simplified logic
        # All models now use single-level directory structure after flattening
        # Gateway models: gateway:provider/model -> model (e.g., "claude-opus-4-1-20250805")
        # Groq slash models: groq:provider/model -> model (e.g., "kimi-k2-instruct-0905")
        # Standard models: platform:model -> model (e.g., "gpt-4o")
        if "/" in model:
            # For any model with slash, use the part after the slash
            model_dir_name = model.split("/")[-1]
        else:
            # For standard models, use the part after the colon
            model_dir_name = model.split(":")[-1] if ":" in model else model
        
        system_output_file = (EvaluationConfig.SYSTEM_OUTPUTS_DIR / 
                             component / model_dir_name / f"{video_id}.json")
        
        if system_output_file.exists():
            try:
                with open(system_output_file, 'r', encoding='utf-8') as f:
                    return json.load(f)
            except Exception as e:
                print(f"⚠️  Failed to load system output {system_output_file}: {e}")
        
        return None
    
    @staticmethod
    def get_available_system_outputs(component: str) -> Dict[str, List[str]]:
        """Get available system outputs by model.
        
        Args:
            component: Component name
            
        Returns:
            Dictionary mapping model names to list of available video IDs
        """
        component_dir = EvaluationConfig.SYSTEM_OUTPUTS_DIR / component
        available_outputs = {}
        
        if component_dir.exists():
            for model_dir in component_dir.iterdir():
                if model_dir.is_dir():
                    model_name = model_dir.name
                    video_files = list(model_dir.glob("*.json"))
                    video_ids = [f.stem for f in video_files]
                    if video_ids:
                        available_outputs[model_name] = video_ids
        
        return available_outputs