"""Ground Truth Manager for handling ground truth data persistence.

This manager handles loading, saving, and managing ground truth data
in the standard format compatible with the RAGAS evaluation pipeline.
"""

import json
import logging
from pathlib import Path
from typing import Dict, List, Optional, Any
from datetime import datetime

# Import models from parent module
import sys
sys.path.append(str(Path(__file__).parent.parent))
from models import GroundTruthData

logger = logging.getLogger(__name__)


class GroundTruthManager:
    """Manages ground truth data persistence and format compatibility."""
    
    def __init__(self, ground_truth_dir: str = "data/evaluation/ground_truth"):
        """Initialize the ground truth manager.
        
        Args:
            ground_truth_dir (str): Directory containing ground truth files.
        """
        self.ground_truth_dir = Path(ground_truth_dir)
        self.ground_truth_dir.mkdir(parents=True, exist_ok=True)
        
        # Ground truth data cache
        self._gt_cache: Dict[str, GroundTruthData] = {}
    
    def load_ground_truth(self, video_id: str) -> Optional[GroundTruthData]:
        """Load ground truth data for a video clip.
        
        Args:
            video_id (str): Video clip identifier.
            
        Returns:
            Optional[GroundTruthData]: Ground truth data if exists, None otherwise.
        """
        # Check cache first
        if video_id in self._gt_cache:
            return self._gt_cache[video_id]
        
        gt_file = self.ground_truth_dir / f"{video_id}.json"
        
        if not gt_file.exists():
            logger.debug(f"No ground truth file found for {video_id}")
            return None
        
        try:
            with open(gt_file, 'r', encoding='utf-8') as f:
                gt_data_raw = json.load(f)
            
            gt_data = GroundTruthData(
                video_id=gt_data_raw.get('video_id', video_id),
                video_path=gt_data_raw.get('video_path', f"data/dashcam/{video_id}.mp4"),
                ground_truth=gt_data_raw.get('ground_truth', {})
            )
            
            # Cache the result
            self._gt_cache[video_id] = gt_data
            
            return gt_data
            
        except Exception as e:
            logger.error(f"Failed to load ground truth for {video_id}: {e}")
            return None
    
    def save_ground_truth(self, video_id: str, video_path: str, 
                         ground_truth_data: Dict[str, Any]) -> bool:
        """Save ground truth data for a video clip.
        
        Args:
            video_id (str): Video clip identifier.
            video_path (str): Path to the video file.
            ground_truth_data (Dict[str, Any]): Ground truth data to save.
            
        Returns:
            bool: True if saved successfully, False otherwise.
        """
        try:
            gt_data = GroundTruthData(
                video_id=video_id,
                video_path=video_path,
                ground_truth=ground_truth_data
            )
            
            gt_file = self.ground_truth_dir / f"{video_id}.json"
            
            # Convert to standard format
            output_data = {
                "video_id": gt_data.video_id,
                "video_path": gt_data.video_path,
                "ground_truth": gt_data.ground_truth
            }
            
            # Add evaluation criteria for RAGAS compatibility
            if "evaluation_criteria" not in output_data:
                output_data["evaluation_criteria"] = {
                    "annotation_quality": "How accurate is the scene description?",
                    "scene_extraction": "Are all important scenes identified?", 
                    "violation_detection": "Are traffic violations correctly identified?",
                    "accident_assessment": "Are accident risks properly evaluated?",
                    "safety_scoring": "Is the safety score appropriate?",
                    "advice_relevance": "Are recommendations actionable and relevant?"
                }
            
            # Save to file
            with open(gt_file, 'w', encoding='utf-8') as f:
                json.dump(output_data, f, indent=2, ensure_ascii=False)
            
            # Update cache
            self._gt_cache[video_id] = gt_data
            
            logger.info(f"Saved ground truth for {video_id}")
            return True
            
        except Exception as e:
            logger.error(f"Failed to save ground truth for {video_id}: {e}")
            return False
    
    def update_ground_truth(self, video_id: str, step: int, step_data: Dict[str, Any]) -> bool:
        """Update ground truth data for a specific step.
        
        Args:
            video_id (str): Video clip identifier.
            step (int): Step number (1-5).
            step_data (Dict[str, Any]): Data to update for this step.
            
        Returns:
            bool: True if updated successfully, False otherwise.
        """
        # Load existing ground truth or create new
        gt_data = self.load_ground_truth(video_id)
        
        if gt_data is None:
            # Create new ground truth structure
            gt_data = GroundTruthData(
                video_id=video_id,
                video_path=f"data/dashcam/{video_id}.mp4",
                ground_truth={}
            )
        
        # Update specific step data
        if step == 1:  # Annotation
            if 'annotation' in step_data:
                gt_data.ground_truth['annotation'] = step_data['annotation']
        
        elif step == 2:  # Scenes
            if 'scenes' in step_data:
                gt_data.ground_truth['scenes'] = step_data['scenes']
        
        elif step == 3:  # Violations
            if 'violations' in step_data:
                gt_data.ground_truth['violations'] = step_data['violations']
        
        elif step == 4:  # Accidents
            if 'accidents' in step_data:
                gt_data.ground_truth['accidents'] = step_data['accidents']
        
        elif step == 5:  # Assessment
            if 'assessment' in step_data:
                gt_data.ground_truth['assessment'] = step_data['assessment']
        
        # Save updated data
        return self.save_ground_truth(video_id, gt_data.video_path, gt_data.ground_truth)
    
    def get_step_data(self, video_id: str, step: int) -> Optional[Any]:
        """Get data for a specific step.
        
        Args:
            video_id (str): Video clip identifier.
            step (int): Step number (1-5).
            
        Returns:
            Optional[Any]: Step data if exists, None otherwise.
        """
        gt_data = self.load_ground_truth(video_id)
        
        if gt_data is None:
            return None
        
        step_keys = {
            1: 'annotation',
            2: 'scenes', 
            3: 'violations',
            4: 'accidents',
            5: 'assessment'
        }
        
        step_key = step_keys.get(step)
        if step_key:
            return gt_data.ground_truth.get(step_key)
        
        return None
    
    def is_step_populated(self, video_id: str, step: int) -> bool:
        """Check if a step has been populated with data.
        
        Args:
            video_id (str): Video clip identifier.
            step (int): Step number (1-5).
            
        Returns:
            bool: True if step is populated with real data.
        """
        step_data = self.get_step_data(video_id, step)
        
        if step_data is None:
            return False
        
        # Check for placeholder content
        if step == 1:  # Annotation
            return (isinstance(step_data, str) and 
                   step_data not in ["MANUAL_ANNOTATION_REQUIRED", ""] and
                   len(step_data.strip()) > 0)
        
        elif step == 2:  # Scenes
            return (isinstance(step_data, list) and 
                   len(step_data) > 0 and
                   not any("Example:" in scene for scene in step_data))
        
        elif step in [3, 4]:  # Violations, Accidents
            if not isinstance(step_data, list) or len(step_data) == 0:
                return False
            
            # Check for non-placeholder content
            for item in step_data:
                if isinstance(item, dict):
                    status = item.get('violation' if step == 3 else 'accident', '')
                    reason = item.get('reason' if step == 3 else 'consequence', '')
                    
                    if (status in ['found/not_found', ''] or 
                        reason in ['Specific violation description', 'Potential accident description', '']):
                        return False
            return True
        
        elif step == 5:  # Assessment
            if not isinstance(step_data, dict):
                return False
            
            overall_eval = step_data.get('overall_evaluation', '')
            safety_score = step_data.get('safety_score', 0)
            
            placeholder_evals = [
                "Manual evaluation of driving behavior",
                "Manual evaluation required", 
                "Assessment generation failed - manual evaluation required"
            ]
            
            return (overall_eval not in placeholder_evals and
                   len(overall_eval.strip()) > 0 and
                   isinstance(safety_score, int) and
                   1 <= safety_score <= 10)
        
        return False
    
    def get_completion_status(self, video_id: str) -> Dict[str, Any]:
        """Get completion status for all steps of a clip.
        
        Args:
            video_id (str): Video clip identifier.
            
        Returns:
            Dict[str, Any]: Completion status for each step.
        """
        status = {}
        
        for step in range(1, 6):
            step_names = {
                1: "annotation",
                2: "scenes",
                3: "violations", 
                4: "accidents",
                5: "assessment"
            }
            
            step_name = step_names[step]
            is_populated = self.is_step_populated(video_id, step)
            step_data = self.get_step_data(video_id, step)
            
            status[step_name] = {
                'step_number': step,
                'populated': is_populated,
                'has_data': step_data is not None,
                'data_type': type(step_data).__name__ if step_data is not None else None
            }
        
        # Overall completion
        completed_steps = sum(1 for s in status.values() if s['populated'])
        status['overall'] = {
            'completed_steps': completed_steps,
            'total_steps': 5,
            'completion_percentage': (completed_steps / 5) * 100,
            'is_complete': completed_steps == 5
        }
        
        return status
    
    def export_for_ragas(self, video_id: str, export_dir: Optional[str] = None) -> Optional[str]:
        """Export ground truth data in RAGAS-compatible format.
        
        Args:
            video_id (str): Video clip identifier.
            export_dir (str, optional): Directory to export to. Defaults to ground_truth_dir.
            
        Returns:
            Optional[str]: Path to exported file if successful, None otherwise.
        """
        gt_data = self.load_ground_truth(video_id)
        
        if gt_data is None:
            logger.error(f"No ground truth data found for {video_id}")
            return None
        
        if export_dir:
            export_path = Path(export_dir)
            export_path.mkdir(parents=True, exist_ok=True)
            export_file = export_path / f"{video_id}.json"
        else:
            export_file = self.ground_truth_dir / f"{video_id}.json"
        
        # Standard RAGAS format
        ragas_data = {
            "video_id": gt_data.video_id,
            "video_path": gt_data.video_path,
            "ground_truth": gt_data.ground_truth,
            "evaluation_criteria": {
                "annotation_quality": "How accurate is the scene description?",
                "scene_extraction": "Are all important scenes identified?",
                "violation_detection": "Are traffic violations correctly identified?",
                "accident_assessment": "Are accident risks properly evaluated?",
                "safety_scoring": "Is the safety score appropriate?",
                "advice_relevance": "Are recommendations actionable and relevant?"
            },
            "exported_at": datetime.now().isoformat(),
            "format_version": "1.0"
        }
        
        try:
            with open(export_file, 'w', encoding='utf-8') as f:
                json.dump(ragas_data, f, indent=2, ensure_ascii=False)
            
            logger.info(f"Exported RAGAS-compatible ground truth for {video_id}")
            return str(export_file)
            
        except Exception as e:
            logger.error(f"Failed to export ground truth for {video_id}: {e}")
            return None
    
    def list_all_clips(self) -> List[str]:
        """List all video IDs that have ground truth files.
        
        Returns:
            List[str]: List of video IDs with ground truth data.
        """
        gt_files = list(self.ground_truth_dir.glob("*.json"))
        return [f.stem for f in gt_files]
    
    def get_statistics(self) -> Dict[str, Any]:
        """Get statistics about ground truth data.
        
        Returns:
            Dict[str, Any]: Statistics about ground truth completion.
        """
        all_clips = self.list_all_clips()
        
        if not all_clips:
            return {
                'total_clips': 0,
                'completion_stats': {},
                'step_completion': {}
            }
        
        step_completion = {i: 0 for i in range(1, 6)}
        overall_completion = 0
        
        for video_id in all_clips:
            status = self.get_completion_status(video_id)
            
            if status['overall']['is_complete']:
                overall_completion += 1
            
            for step in range(1, 6):
                step_names = {1: "annotation", 2: "scenes", 3: "violations", 4: "accidents", 5: "assessment"}
                step_name = step_names[step]
                
                if status[step_name]['populated']:
                    step_completion[step] += 1
        
        return {
            'total_clips': len(all_clips),
            'completion_stats': {
                'fully_completed': overall_completion,
                'completion_percentage': (overall_completion / len(all_clips)) * 100
            },
            'step_completion': {
                f'step_{step}_{["annotation", "scenes", "violations", "accidents", "assessment"][step-1]}': {
                    'completed': count,
                    'percentage': (count / len(all_clips)) * 100
                }
                for step, count in step_completion.items()
            }
        }