from agent import Agent
from utils.llm import rules, prepend_history, query_llm, models_cost
from levels.utils import convert_to_prompt
from prompts.planner_agent import planner_info_prompt, planner_examples, executor_info_prompt, executor_examples
import json
from collections import defaultdict
import os


class PlannerAgent(Agent):
    def __init__(self, env, model, agent_id, total_num_agents, with_feedback, with_notes, look_ahead_steps):
        self.env = env
        self.model = model
        self.agent_id = agent_id
        self.total_num_agents = total_num_agents
        self.with_feedback = with_feedback
        self.with_notes = with_notes
        self.look_ahead_steps = look_ahead_steps

        # Load historical summary based on number of agents
        self.historical_summary = self.load_historical_summary()

        self.history, self.feedback, self.suggestions = self.initialize_prompt()
        self.initial_history_length = len(self.history)
        self.prompt_history = []
        self.previous_actions = []
        self.last_plan = ""

    def load_historical_summary(self):
        """Load historical summary for the current number of agents."""
        try:
            summary_file = f"summary_report_{self.total_num_agents}agent.md"
            if os.path.exists(summary_file):
                with open(summary_file, 'r') as f:
                    return f.read().strip()
            else:
                print(f"Warning: Summary file not found: {summary_file}")
                return "No historical summary available."
        except Exception as e:
            print(f"Warning: Could not load historical summary: {e}")
            return "No historical summary available."

    def initialize_prompt(self):
        pre_prompt = ("user", rules(self.env, self.with_notes))
        
        # Format model costs information exactly like baseline
        model_costs_str = ""
        for agent_idx in range(self.total_num_agents):
            agent_model = f"Agent {agent_idx}'s Model ({self.model}):"
            costs = models_cost.get(self.model, {"input": 0, "output": 0})
            model_costs_str += f"  - {agent_model}\n    Input: ${costs['input']:.3f}/1M tokens, Output: ${costs['output']:.3f}/1M tokens\n"
        
        info_prompt = ("user", planner_info_prompt.format(
            total_num_agents=self.total_num_agents,
            historical_summary=self.historical_summary,
            model_costs=model_costs_str
        ))
        history = [pre_prompt] + [info_prompt] + planner_examples

        if self.with_feedback:
            feedback = '-execution error messages:\n  --  []\n'
            suggestions = '-execution suggestions:\n  --  []\n'
        else:
            feedback = ''
            suggestions = ''

        return history, feedback, suggestions

    def plan(self, obs, step, verbose=False):
        """
        Generate a high-level plan for all agents based on the current game state.
        This method is called when a new plan is needed - at the start of the game and
        when there are changes to dish orders (new, cancelled, or completed).
        
        Args:
            obs: The current observation of the game state
            step: The current step number
            verbose: Whether to print debug information
            
        Returns:
            tuple: (plan, price, history) containing the high-level plan string, the cost, and the updated history
        """
        # update history
        if self.with_feedback and step != 0:
            self.feedback = '-execution error messages:\n  --  ' + str(self.env.feedback) + '\n'

        if self.with_notes and step != 0:
            self.suggestions = '-execution suggestions:\n  --  ' + str(self.env.suggestions) + '\n'
            if 'agent ids cannot be the same' in self.feedback:
                self.suggestions += f'  --  agent ids cannot be the same. \n'
            
            # Add information about dish events
            if obs.just_new_task:
                self.suggestions += f'  --  A new dish order has been received. Adjust the plan accordingly.\n'
            if obs.just_failed:
                self.suggestions += f'  --  A dish order has been canceled. Adjust the plan accordingly.\n'
            if obs.task_just_success:
                self.suggestions += f'  --  A dish has been completed. Adjust the plan accordingly.\n'

        prompt = self.feedback + self.suggestions + convert_to_prompt(obs) + '-plan:\n'
        
        # Update history with the prompt
        self.history = prepend_history(self.history, prompt, role='user', verbose=verbose)
        
        # Manage history length for LLM query
        if len(self.history) > self.look_ahead_steps + self.initial_history_length:
            query_history = (self.history[:self.initial_history_length] + 
                          self.history[-(self.look_ahead_steps):])
        else:
            query_history = self.history

        # GENERATE PLAN
        if verbose:
            print("Generating new plan for all agents...")
        
        # print("PLAN Prompt: ", query_history)
        plan_response, price, tokens = query_llm(query_history, max_tokens=400, model=self.model, stop=None)
        plan = plan_response.strip()
        print("PLAN tokens: ", tokens)
        print("PLAN price: ", price)
        
        
        # Update history with the plan response
        self.history = prepend_history(self.history, plan, role='assistant', verbose=verbose)
        
        self.last_plan = plan
        
        if verbose:
            print("Generated plan:", plan[:100] + "..." if len(plan) > 100 else plan)

        return plan, price, tokens
    
    def clean_plan(self, plan):
        """
        Just return the plan as is, since we're now using a single paragraph format.
        """
        return plan.strip()
    
    


executor_info_prompt = """You are an executor agent (Agent {agent_id}) in a kitchen environment with {total_num_agents} total agents.
Your task is to determine your next action based on your specific role within the overall plan.

You have been given a concise, high-level plan that describes how all agents should work together to complete dish orders.
As Agent {agent_id}, focus ONLY on the parts of the plan that involve your agent.
Convert the high-level instructions into a specific action for this turn.
DO NOT try to execute actions for other agents.

Your available actions are:
- noop_agent{agent_id}: Do nothing
- goto_agent{agent_id}_location: Move to position X
- put_agent{agent_id}_object_location: Put held item on position X
- activate_agent{agent_id}_tool: Activate tool at position X
- get_agent{agent_id}_object_location: Pick up item at position X

Return ONLY your next action in the format: action_agent{agent_id}_parameters
"""

# Examples for the executor agents
executor_examples = [
    # Example for Agent 0
    ("user", """
THIS IS AN EXAMPLE 
-execution error messages:
  --  []
-execution suggestions:
  --  []
-game state:
current game level: level_4
current dishes:
    name: tunaSashimi lifetime: 10
current game step: 0
maximum game steps: 60

-agent state:
at(agent0, servingtable0)
hold(agent0, None)
at(agent1, servingtable0)
hold(agent1, None)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(chopboard0, None)
inside(mixer0, None)

-accomplished task:

-overall plan:
Agent 0 will retrieve raw tuna from storage and bring it to the chopboard while Agent 1 moves to the chopboard to prepare for chopping. Once Agent 0 places the tuna on the chopboard, Agent 1 will chop it and deliver the finished sashimi to the serving table before the 10-step lifetime expires.

-action:
"""),
    ("assistant", "goto_agent0_storage0"),
    
    # Example for Agent 1 (with the same plan)
    ("user", """
-execution error messages:
  --  []
-execution suggestions:
  --  []
-game state:
current game level: level_4
current dishes:
    name: tunaSashimi lifetime: 9
current game step: 1
maximum game steps: 60

-agent state:
at(agent0, storage0)
hold(agent0, None)
at(agent1, storage0)
hold(agent1, None)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(chopboard0, None)
inside(mixer0, None)

-accomplished task:

-overall plan:
Agent 0 will retrieve raw tuna from storage and bring it to the chopboard while Agent 1 moves to the chopboard to prepare for chopping. Once Agent 0 places the tuna on the chopboard, Agent 1 will chop it and deliver the finished sashimi to the serving table before the 10-step lifetime expires.

-action:
"""),
    ("assistant", "get_agent0_tuna"),
    
    # Example for Agent 0 with an updated plan
    ("user", """
-execution error messages:
  --  []
-execution suggestions:
  --  []
  --  You are Agent 0. Follow your role in the plan.
-game state:
current game level: level_4
current dishes:
    name: tunaSashimi lifetime: 8
    name: onigiri lifetime: 15
current game step: 2
maximum game steps: 60

-agent state:
at(agent0, storage0)
hold(agent0, raw_tuna)
at(agent1, storage0)
hold(agent1, None)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(chopboard0, None)
inside(mixer0, None)

-accomplished task:

-overall plan:
Agent 0 should place the raw tuna on the chopboard immediately, then go to storage to fetch rice for onigiri while Agent 1 chops the tuna, delivers the sashimi to the serving table, and then assists with mixing rice and nori for the onigiri which has more time remaining.

-action:
"""),
    ("assistant", "goto_agent0_chopboard0")
]
    

class ExecutorAgent(Agent):
    def __init__(self, env, model, agent_id, total_num_agents, with_feedback, with_notes, look_ahead_steps):
        super().__init__(env, model, agent_id, total_num_agents, with_feedback, with_notes, 5)

        self.history, self.feedback, self.suggestions = self.initialize_prompt()
        self.initial_history_length = len(self.history)
        self.prompt_history = []

    def initialize_prompt(self):
        pre_prompt = ("user", rules(self.env, self.with_notes))
        info_prompt = ("user", executor_info_prompt.format(total_num_agents=self.total_num_agents, agent_id=self.agent_id))
        history = [pre_prompt] + executor_examples + [info_prompt]

        if self.with_feedback:
            feedback = '-execution error messages:\n  --  []\n'
            suggestions = '-execution suggestions:\n  --  []\n'
        else:
            feedback = ''
            suggestions = ''

        return history, feedback, suggestions
    
    def step(self, obs, step, plan, verbose=False):
        """
        Generate the next action for this agent based on the overall plan and current state.
        
        Args:
            obs: The current observation of the game state
            step: The current step number
            plan: The high-level plan generated by the planner agent
            verbose: Whether to print debug information
            
        Returns:
            tuple: (parsed_actions, price, history) containing the next action for this agent,
            the cost of the query, and the updated conversation history
        """
        # update history
        if self.with_feedback and step != 0:
            self.feedback = '-execution error messages:\n  --  ' + str(self.env.feedback) + '\n'

        if self.with_notes and step != 0:
            self.suggestions = '-execution suggestions:\n  --  ' + str(self.env.suggestions) + '\n'
            if 'agent ids cannot be the same' in self.feedback:
                self.suggestions += f'  --  You can only control and plan the actions for agent{self.agent_id}. \n'
            self.suggestions += f'  --  You are Agent {self.agent_id}. Follow your role in the plan.\n'
        
        prompt = self.feedback + self.suggestions + convert_to_prompt(obs) + '-overall plan:\n' + plan.strip() + '\n\n-action:\n'
        
        # Update history with the prompt
        self.history = prepend_history(self.history, prompt, role='user', verbose=verbose)
        
        # Manage history length for LLM query
        if len(self.history) > self.look_ahead_steps + self.initial_history_length:
            query_history = (self.history[:self.initial_history_length] + 
                          self.history[-(self.look_ahead_steps):])
        else:
            query_history = self.history

        # GENERATE ACTION
        if verbose:
            print(f"Generating action for Agent {self.agent_id} based on plan...")
            
        # Generate action
        # print("QUERY HISTORY: ", query_history)
        action_response, price, tokens = query_llm(query_history, model=self.model)
        print(f"ACTION [{self.agent_id}] tokens: ", tokens)
        print(f"ACTION [{self.agent_id}] price: ", price)
        
        # Update history with the action response
        self.history = prepend_history(self.history, action_response, role='assistant', verbose=verbose)
        
        if verbose:
            print(f"Agent {self.agent_id} action: {action_response}")

        try:
            parsed_actions = self.extract_actions(action_response)
            if parsed_actions:
                parsed_actions = [parsed_actions[0]]
        except:
            parsed_actions = []

        return parsed_actions, price, tokens