from agent import Agent
from utils.llm import rules, prepend_history, query_llm, models_cost
from levels.utils import convert_to_prompt
import re


planner_info_prompt = """
----- END OF EXAMPLES -----

You are a strategic planner for a kitchen environment with {total_num_agents} agents.
Your task is to create a high-level plan for all agents to efficiently complete dish orders.

Model Cost Information (per million tokens):
{model_costs}

Current information about the kitchen environment:
- The layout of the kitchen and the position of all ingredients, cooking stations, and agents
- Any active dish orders, their requirements, and time limits
- The status of dishes in progress
- Any recently completed or canceled orders

Generate a CONCISE, one-paragraph plan that efficiently assigns tasks to all agents to complete the dishes.
Your plan should prioritize dishes based on time constraints, assign specific roles to each agent,
and coordinate agent movements to avoid inefficiencies.

Remember that your plan will be triggered at the start and whenever a dish is added, removed, or completed.
Consider the cost of each agent's actions when making your plan, as each agent uses a different model with different costs.
"""

# Example for the planner agent
planner_examples = [
    # Initial plan example
    ("user", """
     THIS IS AN EXAMPLE FOR 2 AGENTS
-execution error messages:
  --  []
-execution suggestions:
  --  []
-game state:
current game level: level_4
current dishes:
    name: tunaSashimi lifetime: 10
current game step: 0
maximum game steps: 60

-agent state:
at(agent0, servingtable0)
hold(agent0, None)
at(agent1, servingtable0)
hold(agent1, None)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(chopboard0, None)
inside(mixer0, None)

-accomplished task:

-plan:
"""),
    ("assistant", """Agent 0 will retrieve raw tuna from storage and bring it to the chopboard while Agent 1 moves to the chopboard to prepare for chopping. Once Agent 0 places the tuna on the chopboard, Agent 1 will chop it and deliver the finished sashimi to the serving table before the 10-step lifetime expires."""),

    # Example with a new dish added
    ("user", """
-execution error messages:
  --  []
-execution suggestions:
  --  []
  --  A new dish order has been received. Adjust the plan accordingly.
-game state:
current game level: level_4
current dishes:
    name: tunaSashimi lifetime: 8
    name: onigiri lifetime: 15
current game step: 2
maximum game steps: 60

-agent state:
at(agent0, chopboard0)
hold(agent0, raw_tuna)
at(agent1, chopboard0)
hold(agent1, None)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(chopboard0, None)
inside(mixer0, None)

-accomplished task:

-plan:
"""),
    ("assistant", """Agent 0 should place the raw tuna on the chopboard immediately, then go to storage to fetch rice for onigiri while Agent 1 chops the tuna, delivers the sashimi to the serving table, and then assists with mixing rice and nori for the onigiri which has more time remaining.""")
]


class PlannerAgent(Agent):
    def __init__(self, env, model, agent_id, total_num_agents, with_feedback, with_notes, look_ahead_steps):
        self.env = env
        self.model = model
        self.agent_id = agent_id
        self.total_num_agents = total_num_agents
        self.with_feedback = with_feedback
        self.with_notes = with_notes
        self.look_ahead_steps = look_ahead_steps

        self.history, self.feedback, self.suggestions = self.initialize_prompt()
        self.initial_history_length = len(self.history)
        self.prompt_history = []
        self.previous_actions = []
        self.last_plan = ""

    def initialize_prompt(self):
        pre_prompt = ("user", rules(self.env, self.with_notes))
        
        # Format model costs information
        model_costs_str = ""
        for agent_idx in range(self.total_num_agents):
            agent_model = f"Agent {agent_idx}'s Model ({self.model}):"
            costs = models_cost.get(self.model, {"input": 0, "output": 0})
            model_costs_str += f"  - {agent_model}\n    Input: ${costs['input']:.3f}/1M tokens, Output: ${costs['output']:.3f}/1M tokens\n"
        
        info_prompt = ("user", planner_info_prompt.format(
            total_num_agents=self.total_num_agents,
            model_costs=model_costs_str,
            performance_history="No performance history yet."
        ))
        history = [pre_prompt] + [info_prompt] + planner_examples

        if self.with_feedback:
            feedback = '-execution error messages:\n  --  []\n'
            suggestions = '-execution suggestions:\n  --  []\n'
        else:
            feedback = ''
            suggestions = ''

        return history, feedback, suggestions

    def plan(self, obs, step, verbose=False, performance_history=None):
        """
        Generate a high-level plan for all agents based on the current game state and performance history.
        This method is called when a new plan is needed - at the start of the game and
        when there are changes to dish orders (new, cancelled, or completed).
        
        Args:
            obs: The current observation of the game state
            step: The current step number
            verbose: Whether to print debug information
            performance_history: Performance history of agents to guide task assignment
            
        Returns:
            tuple: (plan, price, history) containing the high-level plan string, the cost, and the updated history
        """
        # Update history
        if self.with_feedback and step != 0:
            self.feedback = '-execution error messages:\n  --  ' + str(self.env.feedback) + '\n'

        if self.with_notes and step != 0:
            self.suggestions = '-execution suggestions:\n  --  ' + str(self.env.suggestions) + '\n'
            if 'agent ids cannot be the same' in self.feedback:
                self.suggestions += f'  --  agent ids cannot be the same. \n'
            
            # Add information about dish events
            if obs.just_new_task:
                self.suggestions += f'  --  A new dish order has been received. Adjust the plan accordingly.\n'
            if obs.just_failed:
                self.suggestions += f'  --  A dish order has been canceled. Adjust the plan accordingly.\n'
            if obs.task_just_success:
                self.suggestions += f'  --  A dish has been completed. Adjust the plan accordingly.\n'

        # Format the prompt with performance history
        if performance_history is None:
            performance_history = "No performance history available."

        prompt = (
            self.feedback + 
            self.suggestions + 
            convert_to_prompt(obs) + 
            f'-performance history:\n{performance_history}\n' +
            '-plan:\n'
        )

        # Update history with the prompt
        self.history = prepend_history(self.history, prompt, role='user', verbose=verbose)
        
        # Manage history length for LLM query
        if len(self.history) > self.look_ahead_steps + self.initial_history_length:
            query_history = (self.history[:self.initial_history_length] + 
                          self.history[-(self.look_ahead_steps):])
        else:
            query_history = self.history

        # GENERATE PLAN
        if verbose:
            print("Generating new plan for all agents...")
        
        plan_response, price, tokens = query_llm(query_history, max_tokens=400, model=self.model, stop=None)
        plan = plan_response.strip()
        print("PLAN tokens: ", tokens)
        print("PLAN price: ", price)
        
        # Update history with the plan response
        self.history = prepend_history(self.history, plan, role='assistant', verbose=verbose)
        self.last_plan = plan
        
        if verbose:
            print("Generated plan:", plan[:100] + "..." if len(plan) > 100 else plan)

        return plan, price, tokens

    def clean_plan(self, plan):
        """Just return the plan as is, since we're now using a single paragraph format."""
        return plan.strip()
    
    


executor_info_prompt = """You are an executor agent (Agent {agent_id}) in a kitchen environment with {total_num_agents} total agents.
Your task is to determine your next action based on your specific role within the overall plan.

You have been given a concise, high-level plan that describes how all agents should work together to complete dish orders.
As Agent {agent_id}, focus ONLY on the parts of the plan that involve your agent.
Convert the high-level instructions into a specific action for this turn.
DO NOT try to execute actions for other agents.

Your available actions are:
- noop_agent{agent_id}: Do nothing
- goto_agent{agent_id}_location: Move to position X
- put_agent{agent_id}_object_location: Put held item on position X
- activate_agent{agent_id}_tool: Activate tool at position X
- get_agent{agent_id}_object_location: Pick up item at position X

Return ONLY your next action in the format: action_agent{agent_id}_parameters
"""

# Examples for the executor agents
executor_examples = [
    # Example for Agent 0
    ("user", """
THIS IS AN EXAMPLE 
-execution error messages:
  --  []
-execution suggestions:
  --  []
-game state:
current game level: level_4
current dishes:
    name: tunaSashimi lifetime: 10
current game step: 0
maximum game steps: 60

-agent state:
at(agent0, servingtable0)
hold(agent0, None)
at(agent1, servingtable0)
hold(agent1, None)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(chopboard0, None)
inside(mixer0, None)

-accomplished task:

-overall plan:
Agent 0 will retrieve raw tuna from storage and bring it to the chopboard while Agent 1 moves to the chopboard to prepare for chopping. Once Agent 0 places the tuna on the chopboard, Agent 1 will chop it and deliver the finished sashimi to the serving table before the 10-step lifetime expires.

-action:
"""),
    ("assistant", "goto_agent0_storage0"),
    
    # Example for Agent 1 (with the same plan)
    ("user", """
-execution error messages:
  --  []
-execution suggestions:
  --  []
-game state:
current game level: level_4
current dishes:
    name: tunaSashimi lifetime: 9
current game step: 1
maximum game steps: 60

-agent state:
at(agent0, storage0)
hold(agent0, None)
at(agent1, storage0)
hold(agent1, None)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(chopboard0, None)
inside(mixer0, None)

-accomplished task:

-overall plan:
Agent 0 will retrieve raw tuna from storage and bring it to the chopboard while Agent 1 moves to the chopboard to prepare for chopping. Once Agent 0 places the tuna on the chopboard, Agent 1 will chop it and deliver the finished sashimi to the serving table before the 10-step lifetime expires.

-action:
"""),
    ("assistant", "get_agent0_tuna"),
    
    # Example for Agent 0 with an updated plan
    ("user", """
-execution error messages:
  --  []
-execution suggestions:
  --  []
  --  You are Agent 0. Follow your role in the plan.
-game state:
current game level: level_4
current dishes:
    name: tunaSashimi lifetime: 8
    name: onigiri lifetime: 15
current game step: 2
maximum game steps: 60

-agent state:
at(agent0, storage0)
hold(agent0, raw_tuna)
at(agent1, storage0)
hold(agent1, None)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(chopboard0, None)
inside(mixer0, None)

-accomplished task:

-overall plan:
Agent 0 should place the raw tuna on the chopboard immediately, then go to storage to fetch rice for onigiri while Agent 1 chops the tuna, delivers the sashimi to the serving table, and then assists with mixing rice and nori for the onigiri which has more time remaining.

-action:
"""),
    ("assistant", "goto_agent0_chopboard0")
]
    

class ExecutorAgent(Agent):
    def __init__(self, env, model, agent_id, total_num_agents, with_feedback, with_notes, look_ahead_steps):
        super().__init__(env, model, agent_id, total_num_agents, with_feedback, with_notes, 5)

        self.history, self.feedback, self.suggestions = self.initialize_prompt()
        self.initial_history_length = len(self.history)
        self.prompt_history = []

    def initialize_prompt(self):
        pre_prompt = ("user", rules(self.env, self.with_notes))
        info_prompt = ("user", executor_info_prompt.format(total_num_agents=self.total_num_agents, agent_id=self.agent_id))
        history = [pre_prompt] + executor_examples + [info_prompt]

        if self.with_feedback:
            feedback = '-execution error messages:\n  --  []\n'
            suggestions = '-execution suggestions:\n  --  []\n'
        else:
            feedback = ''
            suggestions = ''

        return history, feedback, suggestions
    
    def step(self, obs, step, plan, verbose=False):
        """
        Generate the next action for this agent based on the overall plan and current state.
        
        Args:
            obs: The current observation of the game state
            step: The current step number
            plan: The high-level plan generated by the planner agent
            verbose: Whether to print debug information
            
        Returns:
            tuple: (parsed_actions, price, history) containing the next action for this agent,
            the cost of the query, and the updated conversation history
        """
        # update history
        if self.with_feedback and step != 0:
            self.feedback = '-execution error messages:\n  --  ' + str(self.env.feedback) + '\n'

        if self.with_notes and step != 0:
            self.suggestions = '-execution suggestions:\n  --  ' + str(self.env.suggestions) + '\n'
            if 'agent ids cannot be the same' in self.feedback:
                self.suggestions += f'  --  You can only control and plan the actions for agent{self.agent_id}. \n'
            self.suggestions += f'  --  You are Agent {self.agent_id}. Follow your role in the plan.\n'
        
        prompt = self.feedback + self.suggestions + convert_to_prompt(obs) + '-overall plan:\n' + plan.strip() + '\n\n-action:\n'
        
        # Update history with the prompt
        self.history = prepend_history(self.history, prompt, role='user', verbose=verbose)
        
        # Manage history length for LLM query
        if len(self.history) > self.look_ahead_steps + self.initial_history_length:
            query_history = (self.history[:self.initial_history_length] + 
                          self.history[-(self.look_ahead_steps):])
        else:
            query_history = self.history

        # GENERATE ACTION
        if verbose:
            print(f"Generating action for Agent {self.agent_id} based on plan...")
            
        # Generate action
        # print("QUERY HISTORY: ", query_history)
        action_response, price, tokens = query_llm(query_history, model=self.model)
        print(f"ACTION [{self.agent_id}] tokens: ", tokens)
        print(f"ACTION [{self.agent_id}] price: ", price)
        
        # Update history with the action response
        self.history = prepend_history(self.history, action_response, role='assistant', verbose=verbose)
        
        if verbose:
            print(f"Agent {self.agent_id} action: {action_response}")

        try:
            parsed_actions = self.extract_actions(action_response)
            if parsed_actions:
                parsed_actions = [parsed_actions[0]]
        except:
            parsed_actions = []

        return parsed_actions, price, tokens