'''
GUI Agent System - Caption and Action Agents
'''

import os
import json
from typing import List, Dict, Any, Optional, Tuple
from PIL import Image
import logging

class CaptionAgent:
    """Agent responsible for generating text descriptions of GUI screenshots"""
    
    def __init__(self, prompts_file: str = "prompts.json"):
        """Initialize the caption agent with prompts"""
        with open(prompts_file, 'r', encoding='utf-8') as f:
            prompts = json.load(f)
        self.caption_prompt = prompts["caption_agent"]["prompt"]

    def generate_messages(self, screenshot_path: str) -> List[Dict[str, Any]]:
        """Generate messages for captioning a single screenshot"""
        try:
            # Verify image exists
            if not os.path.exists(screenshot_path):
                raise FileNotFoundError(f"Screenshot not found: {screenshot_path}")
            
            messages = [
                {"role": "user", "content": [
                    {"type": "text", "text": self.caption_prompt},
                    {"type": "image", "image": screenshot_path},
                ]},
            ]
            
            return messages
            
        except Exception as e:
            logging.error(f"Error generating messages for screenshot {screenshot_path}: {e}")
            raise


class ActionAgent:
    """Agent responsible for describing the effects of actions on GUI state"""
    
    def __init__(self, prompts_file: str = "prompts.json"):
        """Initialize the action agent with prompts"""
        with open(prompts_file, 'r', encoding='utf-8') as f:
            prompts = json.load(f)
        self.action_prompt = prompts["action_agent"]["prompt"]

    def generate_messages(self, 
                         before_screenshot: str, 
                         after_screenshot: str, 
                         action: str, 
                         instruction: str) -> List[Dict[str, Any]]:
        """Generate messages for describing action effect"""
        try:
            # Verify both images exist
            if not os.path.exists(before_screenshot):
                raise FileNotFoundError(f"Before screenshot not found: {before_screenshot}")
            if not os.path.exists(after_screenshot):
                raise FileNotFoundError(f"After screenshot not found: {after_screenshot}")
            
            messages = [
                {"role": "user", "content": [
                    {"type": "text", "text": self.action_prompt.format(
                        instruction=instruction, 
                        action=action
                    )},
                    {"type": "image", "image": before_screenshot},
                    {"type": "image", "image": after_screenshot},
                ]},
            ]
            
            return messages
            
        except Exception as e:
            logging.error(f"Error generating messages for action effect: {e}")
            raise


class DecisionAgent:
    """Agent responsible for making decisions based on current state and history"""
    
    def __init__(self, prompts_file: str = "prompts.json"):
        """Initialize the decision agent with prompts"""
        with open(prompts_file, 'r', encoding='utf-8') as f:
            prompts = json.load(f)
        self.decision_prompt = prompts["decision_agent"]["prompt"]
        
        # Initialize state history
        self.screenshot_history: List[str] = []
        self.caption_history: List[str] = []
        self.action_history: List[str] = []
        self.action_summary_history: List[str] = []
        
        # Initialize other agents
        self.caption_agent = CaptionAgent(prompts_file)
        self.action_agent = ActionAgent(prompts_file)

    def get_state(self) -> str:
        """Get current screenshot - to be implemented"""
        # TODO: Implement actual screenshot capture
        # For now, return a placeholder
        return "current_screenshot.jpg"

    def execute_action(self, action: str) -> None:
        """Execute the given action - to be implemented"""
        # TODO: Implement actual action execution
        logging.info(f"Executing action: {action}")

    def generate_messages(self, instruction: str, current_screenshot: str) -> List[Dict[str, Any]]:
        """Generate messages for decision making"""
        try:
            # Format action summaries
            action_summaries = ""
            for i, summary in enumerate(self.action_summary_history):
                action_summaries += f"Action {i+1}: {summary}\n"
            
            messages = [
                {"role": "user", "content": [
                    {"type": "text", "text": self.decision_prompt.format(
                        action_summaries=action_summaries,
                        instruction=instruction
                    )},
                    {"type": "image", "image": current_screenshot},
                ]},
            ]
            
            return messages
            
        except Exception as e:
            logging.error(f"Error generating messages for decision: {e}")
            raise

    def step(self, instruction: str, vllm_inference) -> str:
        """Execute one step of the decision process"""
        try:
            # 1. Get current screenshot and add to history
            current_screenshot = self.get_state()
            self.screenshot_history.append(current_screenshot)
            
            # 2. Generate caption for current screenshot
            caption_messages = self.caption_agent.generate_messages(current_screenshot)
            caption_result = vllm_inference.inference_batch(
                dataset=[{"messages": caption_messages}],
                batch_size=1,
                max_tokens=256,
                temperature=0.3,
                top_p=0.9
            )
            current_caption = caption_result[0].strip() if caption_result else "Failed to generate caption"
            self.caption_history.append(current_caption)
            
            # 3. Generate action summary if not initial state
            if len(self.screenshot_history) > 1 and len(self.action_history) > 0:
                previous_screenshot = self.screenshot_history[-2]
                last_action = self.action_history[-1]
                
                action_messages = self.action_agent.generate_messages(
                    before_screenshot=previous_screenshot,
                    after_screenshot=current_screenshot,
                    action=last_action,
                    instruction=instruction
                )
                
                action_summary_result = vllm_inference.inference_batch(
                    dataset=[{"messages": action_messages}],
                    batch_size=1,
                    max_tokens=256,
                    temperature=0.3,
                    top_p=0.9
                )
                
                action_summary = action_summary_result[0].strip() if action_summary_result else "Failed to generate action summary"
                self.action_summary_history.append(action_summary)
            
            # 4. Generate decision
            decision_messages = self.generate_messages(instruction, current_screenshot)
            decision_result = vllm_inference.inference_batch(
                dataset=[{"messages": decision_messages}],
                batch_size=1,
                max_tokens=1024,
                temperature=0.7,
                top_p=0.9
            )
            
            current_action = decision_result[0].strip() if decision_result else "wait()"
            self.action_history.append(current_action)
            
            # 5. Execute action
            self.execute_action(current_action)
            
            return current_action
            
        except Exception as e:
            logging.error(f"Error in decision step: {e}")
            raise

    def reset(self):
        """Reset the agent's history"""
        self.screenshot_history.clear()
        self.caption_history.clear()
        self.action_history.clear()
        self.action_summary_history.clear()

    def get_history(self) -> Dict[str, List[str]]:
        """Get the complete history"""
        return {
            'screenshots': self.screenshot_history.copy(),
            'captions': self.caption_history.copy(),
            'actions': self.action_history.copy(),
            'action_summaries': self.action_summary_history.copy()
        } 