"""Video discovery and metadata integration (read-only)."""

import json
from pathlib import Path
from typing import Dict, List, Optional, Tuple
import logging
from datetime import datetime
import random
from collections import defaultdict

from evaluation.make_dataset.s2_video_reviewer.models import VideoMetadata, VideoReviewStatus, VideoQueue

logger = logging.getLogger(__name__)


class VideoManager:
    """Manages video discovery and metadata integration (read-only access)."""
    
    def __init__(self, project_root: Optional[Path] = None):
        """Initialize the video manager.
        
        Args:
            project_root: Path to project root. If None, will auto-detect.
        """
        if project_root is None:
            # Auto-detect project root (this file is in evaluation/make_dataset/s2_video_reviewer/)
            project_root = Path(__file__).parent.parent.parent.parent
        
        self.project_root = Path(project_root)
        self.videos_dir = self.project_root / "data" / "data_prepare" / "raw_videos"
        self.checkpoint_file = self.videos_dir / ".download_checkpoint.json"
        self.review_progress_dir = self.project_root / "data" / "data_prepare" / "review_progress"
        
        # Ensure review progress directory exists
        self.review_progress_dir.mkdir(parents=True, exist_ok=True)
        
        logger.info(f"VideoManager initialized with videos_dir: {self.videos_dir}")
    
    def load_existing_metadata(self) -> Dict[str, VideoMetadata]:
        """Load video metadata from existing download checkpoint (read-only).
        
        Returns:
            Dictionary mapping video_id to VideoMetadata
        """
        if not self.checkpoint_file.exists():
            logger.warning(f"Checkpoint file not found: {self.checkpoint_file}")
            return {}
        
        try:
            with open(self.checkpoint_file, 'r') as f:
                checkpoint_data = json.load(f)
            
            video_metadata = checkpoint_data.get('video_metadata', {})
            logger.info(f"Loaded metadata for {len(video_metadata)} videos from checkpoint")
            
            # Convert to VideoMetadata objects
            metadata_dict = {}
            for key, meta in video_metadata.items():
                try:
                    # Extract video_id from key (format: "category:video_id")
                    if ':' in key:
                        video_id = key.split(':', 1)[1]
                    else:
                        video_id = key
                    
                    # Create VideoMetadata object
                    video_meta = VideoMetadata(**meta)
                    metadata_dict[video_id] = video_meta
                    
                except Exception as e:
                    logger.warning(f"Failed to parse metadata for {key}: {e}")
                    continue
            
            return metadata_dict
            
        except Exception as e:
            logger.error(f"Failed to load checkpoint file: {e}")
            return {}
    
    def discover_video_files(self) -> List[Tuple[Path, str]]:
        """Discover all video files in the existing folder structure (read-only).
        
        Returns:
            List of tuples (video_path, video_id)
        """
        if not self.videos_dir.exists():
            logger.warning(f"Videos directory not found: {self.videos_dir}")
            return []
        
        video_files = []
        video_extensions = {'.mp4', '.mkv', '.webm', '.avi', '.mov'}
        
        # Recursively find all video files
        for video_path in self.videos_dir.rglob('*'):
            if video_path.suffix.lower() in video_extensions and video_path.is_file():
                # Extract video ID from filename (format: "video_id_title.ext")
                filename = video_path.stem
                video_id = filename.split('_')[0] if '_' in filename else filename
                video_files.append((video_path, video_id))
        
        logger.info(f"Discovered {len(video_files)} video files")
        return video_files
    
    def build_video_queue(self) -> VideoQueue:
        """Build a video review queue from discovered videos and metadata.
        
        Returns:
            VideoQueue with all discoverable videos
        """
        # Load existing metadata
        metadata_dict = self.load_existing_metadata()
        
        # Discover video files
        video_files = self.discover_video_files()
        
        # Build video review status objects
        video_statuses = []
        
        for video_path, video_id in video_files:
            try:
                # Get metadata if available
                metadata = metadata_dict.get(video_id)
                
                # Determine video category from path
                category = video_path.parent.name if video_path.parent != self.videos_dir else "unknown"
                
                # Create VideoReviewStatus
                status = VideoReviewStatus(
                    video_id=video_id,
                    video_path=str(video_path),
                    video_category=category,
                    video_title=metadata.title if metadata else video_path.stem,
                    video_duration=metadata.duration if metadata and metadata.duration else 0.0,
                    
                    # Copy metadata fields if available
                    relevance_score=metadata.relevance_score if metadata else 0,
                    safety_keywords=metadata.safety_keywords if metadata else [],
                    weather_mentioned=metadata.weather_mentioned if metadata else [],
                    time_of_day=metadata.time_of_day if metadata else []
                )
                
                video_statuses.append(status)
                
            except Exception as e:
                logger.warning(f"Failed to process video {video_path}: {e}")
                continue
        
        # Sort by priority (relevance score desc, then by category)
        video_statuses.sort(key=lambda v: (-v.relevance_score, v.video_category, v.video_id))
        
        # Create video queue
        queue = VideoQueue(videos=video_statuses)
        logger.info(f"Built video queue with {len(video_statuses)} videos")
        
        return queue

    def is_video_completed(self, video_id: str, clip_markings: Dict = None) -> bool:
        """Check if a video has already been reviewed/completed.
        
        Args:
            video_id: The video ID to check
            clip_markings: Optional clip markings data to avoid reloading
            
        Returns:
            True if video has been reviewed (has clips, marked as no clips, or skipped)
        """
        if clip_markings is None:
            # Load clip markings from individual video files
            clip_markings = {}
            marking_files = list(self.review_progress_dir.glob("*_review_markings.json"))
            
            for marking_file in marking_files:
                try:
                    with open(marking_file, 'r', encoding='utf-8') as f:
                        data = json.load(f)
                    
                    video_info = data.get('video_info', {})
                    file_video_id = video_info.get('video_id')
                    
                    if file_video_id:
                        clip_markings_data = data.get('clip_markings', [])
                        if clip_markings_data:
                            clip_markings[file_video_id] = clip_markings_data
                            
                except Exception as e:
                    logger.warning(f"Failed to load markings from {marking_file.name}: {e}")
                    continue
        
        # Check if video has clips (indicates it was reviewed)
        if video_id in clip_markings and len(clip_markings[video_id]) > 0:
            return True
        
        # Check video review status (this would need to be loaded from review state)
        # For now, we'll consider a video completed if it has clips marked
        # Future enhancement: check review_state.json for videos marked as "no_clips" or "skipped"
        
        return False
    
    def build_video_queue(self, include_completed: bool = False) -> VideoQueue:
        """Build a video review queue from discovered videos and metadata.
        
        Args:
            include_completed: If True, include videos that have already been reviewed
        
        Returns:
            VideoQueue with discoverable videos (filtered by completion status)
        """
        # Load existing metadata
        metadata_dict = self.load_existing_metadata()
        
        # Load clip markings for completion checking and review status setting
        clip_markings = self._load_individual_video_markings()
        
        # Discover video files
        video_files = self.discover_video_files()
        
        # Build video review status objects
        video_statuses = []
        completed_count = 0
        
        for video_path, video_id in video_files:
            try:
                # Check if video is completed and should be filtered
                is_completed = self.is_video_completed(video_id, clip_markings)
                if is_completed and not include_completed:
                    completed_count += 1
                    logger.debug(f"Skipping completed video: {video_id}")
                    continue
                
                # Get metadata if available
                metadata = metadata_dict.get(video_id)
                
                # Determine video category from path
                category = video_path.parent.name if video_path.parent != self.videos_dir else "unknown"
                
                # Set review status based on completion
                review_status = "not_reviewed"
                clips_marked = 0
                if is_completed and video_id in clip_markings:
                    review_status = "has_clips" if len(clip_markings[video_id]) > 0 else "no_clips"
                    clips_marked = len(clip_markings[video_id])
                
                # Create VideoReviewStatus
                status = VideoReviewStatus(
                    video_id=video_id,
                    video_path=str(video_path),
                    video_category=category,
                    video_title=metadata.title if metadata else video_path.stem,
                    video_duration=metadata.duration if metadata and metadata.duration else 0.0,
                    review_status=review_status,
                    clips_marked=clips_marked,
                    
                    # Copy metadata fields if available
                    relevance_score=metadata.relevance_score if metadata else 0,
                    safety_keywords=metadata.safety_keywords if metadata else [],
                    weather_mentioned=metadata.weather_mentioned if metadata else [],
                    time_of_day=metadata.time_of_day if metadata else []
                )
                
                video_statuses.append(status)
                
            except Exception as e:
                logger.warning(f"Failed to process video {video_path}: {e}")
                continue
        
        # Sort by priority (incomplete videos first, then by relevance score desc, then by category)
        video_statuses.sort(key=lambda v: (
            v.review_status != "not_reviewed",  # Unreviewed videos first
            -v.relevance_score, 
            v.video_category, 
            v.video_id
        ))
        
        # Create video queue
        queue = VideoQueue(videos=video_statuses)
        
        if completed_count > 0:
            logger.info(f"Built video queue with {len(video_statuses)} videos (filtered out {completed_count} completed videos)")
        else:
            logger.info(f"Built video queue with {len(video_statuses)} videos")
        
        return queue
    
    def build_shuffled_video_queue(self, include_completed: bool = False) -> VideoQueue:
        """Build a video review queue with category shuffling for variety.
        
        This method creates a queue where videos from different categories are interleaved
        to avoid reviewing many videos from the same category consecutively. The shuffle
        is deterministic based on the queue creation date for reproducible ordering.
        
        Args:
            include_completed: If True, include videos that have already been reviewed
        
        Returns:
            VideoQueue with videos shuffled across categories
        """
        # First build the standard queue to get all the video statuses
        standard_queue = self.build_video_queue(include_completed=include_completed)
        
        if not standard_queue.videos:
            return standard_queue
        
        # Separate reviewed from unreviewed videos
        unreviewed_videos = []
        reviewed_videos = []
        
        for video in standard_queue.videos:
            if video.review_status == "not_reviewed":
                unreviewed_videos.append(video)
            else:
                reviewed_videos.append(video)
        
        # Group unreviewed videos by category
        videos_by_category = defaultdict(list)
        for video in unreviewed_videos:
            videos_by_category[video.video_category].append(video)
        
        # Sort videos within each category by priority (relevance score desc, then video_id)
        for category in videos_by_category:
            videos_by_category[category].sort(key=lambda v: (-v.relevance_score, v.video_id))
        
        # Create deterministic shuffle seed based on creation date
        # This ensures the same shuffle order on app restart
        seed_date = datetime.now().strftime("%Y-%m-%d")
        random.seed(f"driveguard-shuffle-{seed_date}")
        
        # Create interleaved sequence (round-robin across categories)
        shuffled_unreviewed = []
        categories = list(videos_by_category.keys())
        random.shuffle(categories)  # Randomize category order
        
        # Round-robin through categories
        max_videos_per_category = max(len(videos) for videos in videos_by_category.values()) if videos_by_category else 0
        
        for round_idx in range(max_videos_per_category):
            for category in categories:
                if round_idx < len(videos_by_category[category]):
                    shuffled_unreviewed.append(videos_by_category[category][round_idx])
        
        # Combine reviewed videos with shuffled unreviewed videos
        # Reviewed videos first (original order), then unreviewed videos (shuffled)
        all_shuffled_videos = reviewed_videos + shuffled_unreviewed
        
        # Create new queue with shuffled videos
        shuffled_queue = VideoQueue(
            queue_id=standard_queue.queue_id,
            created_at=standard_queue.created_at,
            last_updated=datetime.now(),
            videos=all_shuffled_videos,
            current_index=0,  # Will be set to first unreviewed video by caller
            shuffle_categories=True
        )
        
        # Store the index where unreviewed videos begin for caller use
        shuffled_queue.first_unreviewed_index = len(reviewed_videos)
        
        logger.info(f"Built shuffled video queue: {len(reviewed_videos)} reviewed videos first, "
                   f"then {len(shuffled_unreviewed)} unreviewed videos from {len(categories)} categories")
        
        return shuffled_queue

    def get_video_by_id(self, video_id: str) -> Optional[VideoReviewStatus]:
        """Get a specific video by ID.
        
        Args:
            video_id: The video ID to find
            
        Returns:
            VideoReviewStatus if found, None otherwise
        """
        queue = self.build_video_queue()
        for video in queue.videos:
            if video.video_id == video_id:
                return video
        return None
    
    def get_videos_by_category(self, category: str) -> List[VideoReviewStatus]:
        """Get all videos in a specific category.
        
        Args:
            category: The category to filter by
            
        Returns:
            List of VideoReviewStatus objects
        """
        queue = self.build_video_queue()
        return [video for video in queue.videos if video.video_category == category]
    
    def get_categories(self) -> List[str]:
        """Get all available video categories.
        
        Returns:
            List of category names
        """
        queue = self.build_video_queue()
        categories = set(video.video_category for video in queue.videos)
        return sorted(list(categories))
    
    def get_queue_statistics(self) -> Dict:
        """Get statistics about the video queue.
        
        Returns:
            Dictionary with queue statistics
        """
        queue = self.build_video_queue()
        
        # Count by category
        category_counts = {}
        total_duration = 0.0
        total_relevance = 0
        
        for video in queue.videos:
            category = video.video_category
            category_counts[category] = category_counts.get(category, 0) + 1
            total_duration += video.video_duration
            total_relevance += video.relevance_score
        
        # Count by relevance score
        relevance_distribution = {}
        for video in queue.videos:
            score = video.relevance_score
            relevance_distribution[score] = relevance_distribution.get(score, 0) + 1
        
        return {
            'total_videos': len(queue.videos),
            'total_duration_hours': total_duration / 3600.0,
            'average_relevance_score': total_relevance / len(queue.videos) if queue.videos else 0,
            'categories': len(category_counts),
            'category_counts': category_counts,
            'relevance_distribution': relevance_distribution,
            'videos_with_metadata': sum(1 for v in queue.videos if v.relevance_score > 0)
        }
    
    def _load_individual_video_markings(self) -> Dict[str, List]:
        """Load clip markings from individual video files.
        
        Returns:
            Dictionary mapping video_id to list of clip markings (as dicts)
        """
        markings = {}
        
        # Find all individual marking files
        marking_files = list(self.review_progress_dir.glob("*_review_markings.json"))
        
        for marking_file in marking_files:
            try:
                with open(marking_file, 'r', encoding='utf-8') as f:
                    data = json.load(f)
                
                video_info = data.get('video_info', {})
                video_id = video_info.get('video_id')
                
                if not video_id:
                    logger.warning(f"No video_id found in {marking_file.name}, skipping")
                    continue
                
                # Store clip markings as raw dicts (no need for ClipMarking objects here)
                clips_data = data.get('clip_markings', [])
                if clips_data:
                    markings[video_id] = clips_data
                    logger.debug(f"Loaded {len(clips_data)} clips for video {video_id}")
                
            except Exception as e:
                logger.warning(f"Failed to load markings from {marking_file.name}: {e}")
                continue
        
        return markings
    
    def validate_video_access(self, video_path: str) -> bool:
        """Validate that a video file exists and is accessible.
        
        Args:
            video_path: Path to the video file
            
        Returns:
            True if video is accessible, False otherwise
        """
        try:
            path = Path(video_path)
            return path.exists() and path.is_file() and path.stat().st_size > 0
        except Exception as e:
            logger.warning(f"Video access validation failed for {video_path}: {e}")
            return False
    
    def get_video_info(self, video_path: str) -> Dict:
        """Get basic information about a video file.
        
        Args:
            video_path: Path to the video file
            
        Returns:
            Dictionary with video file information
        """
        try:
            path = Path(video_path)
            if not path.exists():
                return {'error': 'File not found'}
            
            stat = path.stat()
            return {
                'exists': True,
                'size_mb': stat.st_size / (1024 * 1024),
                'modified': datetime.fromtimestamp(stat.st_mtime).isoformat(),
                'filename': path.name,
                'directory': path.parent.name,
                'extension': path.suffix
            }
            
        except Exception as e:
            logger.error(f"Failed to get video info for {video_path}: {e}")
            return {'error': str(e)}