"""Dashcam video annotation agent for traffic scene analysis."""

import base64
import cv2

from .agent_prompt import dashcam_annotation_prompt
from ..llms import get_llm
from ...utils.settings import settings

# Model-specific image limits
MODEL_IMAGE_LIMITS = {
    "gemma": 16,        # Nebius Gemma models
    "llama": 16,        # Conservative limit for Llama models
    "qwen2.5-vl": 16,   # Qwen2.5-VL models specifically
    "qwen2-5-vl": 16,   # Alternative naming format (qwen2-5-vl-72b-instruct)
    # Other models (gpt, claude, gemini) have no practical limits
}


class VideoAnnotator:
    """Agent for annotating dashcam videos with traffic scene analysis."""
    
    def __init__(self, model_id=None):
        """Initialize the video annotator agent.
        
        Args:
            model_id (str, optional): Model ID to use. Defaults to multimodal setting.
        """
        llm = get_llm(model_id) if model_id else get_llm(settings.app.llm['multimodal'])
        self.client = llm.with_retry()
        self.model_id = model_id or settings.app.llm['multimodal']
        
    @staticmethod
    def video2frames(video_path, fps=2):
        """
        Converts a video into a list of base64 encoded frames.

        Args:
            video_path (str): Path to the video file.
            fps (int): The desired sampling rate (frames per second).

        Returns:
            list: A list of base64 encoded frames.
        """
        video = cv2.VideoCapture(video_path)
        base64_frames = []
        frame_count = 0
        while video.isOpened():
            success, frame = video.read()
            if not success:
                break
            # Sample frames based on the desired FPS
            if frame_count % int(video.get(cv2.CAP_PROP_FPS) / fps) == 0:
                _, buffer = cv2.imencode(".jpg", frame)
                base64_frames.append(base64.b64encode(buffer).decode("utf-8"))
            frame_count += 1
        video.release()
        return base64_frames
    
    def get_max_images_for_model(self, model_id):
        """Get maximum images allowed for a model."""
        if not model_id:
            return None
        
        model_name = model_id.split(":", 1)[1] if ":" in model_id else model_id
        
        for model_key, limit in MODEL_IMAGE_LIMITS.items():
            if model_key in model_name.lower():
                return limit
        return None  # No limit for other models
    
    def sample_frames_evenly(self, frames, max_frames):
        """Sample frames evenly distributed across the video timeline."""
        if len(frames) <= max_frames:
            return frames
        
        # Calculate step size to distribute frames evenly
        step = len(frames) / max_frames
        indices = [int(i * step) for i in range(max_frames)]
        
        return [frames[i] for i in indices]
    
    def annotate(self, base64_frames, prompt=None, model_id=None):
        """Annotate video frames with traffic scene analysis.
        
        Args:
            base64_frames (list): List of base64 encoded frames.
            prompt (str, optional): Custom prompt. Defaults to dashcam_annotation_prompt.
            model_id (str, optional): Model ID to use. Defaults to instance model_id.
            
        Returns:
            str: Traffic scene annotation.
        """
        annotation_prompt = prompt if prompt else dashcam_annotation_prompt
        if model_id is None:
            model_id = self.model_id
        
        # Apply model-specific image limits
        max_images = self.get_max_images_for_model(model_id)
        if max_images and len(base64_frames) > max_images:
            original_count = len(base64_frames)
            base64_frames = self.sample_frames_evenly(base64_frames, max_images)
            print(f"  Frame sampling: {original_count} → {len(base64_frames)} frames (model limit: {max_images})")
        
        # Determine media type based on model - Anthropic and Google require 'jpeg', OpenAI accepts 'jpg'
        platform = model_id.split(":", 1)[0] if ":" in model_id else "openai"
        if platform == "gateway":
            # Check if it's a model that requires image/jpeg (Anthropic Claude, Google Gemini)
            model_name = model_id.split(":", 1)[1]
            if any(provider in model_name.lower() for provider in ["claude", "gemini"]):
                media_type = "image/jpeg"
            else:
                media_type = "image/jpg"
        else:
            media_type = "image/jpg"
        
        input_data = [
            {"role": "system", "content": annotation_prompt},
            {"role": "user", "content": [
                {"type": "text", "text": "Dashcam Video frames from the ego vehicle. (2 frames/second)"},
                *map(lambda x: {"type": "image_url",
                                "image_url": {"url": f'data:{media_type};base64,{x}', "detail": "high"}}, base64_frames)
            ]}
        ]
        response = self.client.invoke(input_data).content
        return response
    
    def annotate_video(self, video_path, fps=2, prompt=None):
        """Complete agent workflow: convert video to frames and annotate.
        
        Args:
            video_path (str): Path to the video file.
            fps (int): Frames per second to sample. Defaults to 2.
            prompt (str, optional): Custom prompt. Defaults to dashcam_annotation_prompt.
            
        Returns:
            str: Traffic scene annotation of the video.
        """
        base64_frames = self.video2frames(video_path, fps)
        return self.annotate(base64_frames, prompt)