from agent import Agent
from utils.llm import rules, prepend_history, query_llm
from levels.utils import convert_to_prompt
import re


planner_info_prompt = """
----- END OF EXAMPLES -----

You are a strategic planner for a kitchen environment with {total_num_agents} agents.
Your task is to create a high-level plan for all agents to efficiently complete dish orders.

Current information about the kitchen environment:
- The layout of the kitchen and the position of all ingredients, cooking stations, and agents
- Any active dish orders, their requirements, and time limits
- The status of dishes in progress
- Any recently completed or canceled orders

Generate a CONCISE, one-paragraph plan that efficiently assigns tasks to all agents to complete the dishes.
Your plan should prioritize dishes based on time constraints, assign specific roles to each agent,
and coordinate agent movements to avoid inefficiencies.

Remember that your plan will be triggered at the start and whenever a dish is added, removed, or completed.
"""

planner_info_prompts = [
    """
----- END OF EXAMPLES -----

You are the strategic planner for a kitchen staffed by {total_num_agents} agents.
Your goal is to generate a high-level plan that enables agents to fulfill dish orders efficiently.

You have access to:
- The full kitchen layout, including agents, ingredients, and appliances
- Details on all active orders, including deadlines and required steps
- The progress status of any ongoing dishes
- Notifications about completed or canceled dishes

Write a single, clear paragraph that assigns roles and actions to each agent.
Your plan should optimize time by prioritizing urgent orders and minimizing movement overlap.

This planning routine is activated at the start and whenever an order is updated, completed, or canceled.
""",

"""
----- END OF EXAMPLES -----

As the strategic planner in a kitchen with {total_num_agents} agents, your job is to design an efficient high-level plan to complete all active dish orders.

You are provided with:
- The spatial layout of the kitchen, including agents, tools, and ingredient locations
- A list of current orders, their components, and any timing constraints
- The current state of dishes being prepared
- A record of recently completed or canceled dishes

Compose a concise paragraph outlining how to allocate tasks and coordinate agents to complete the orders efficiently.
Your plan should account for timing, minimize idle time, and assign roles to avoid conflicts.

This planning step will run at initialization and every time there is a change to the set of orders.
""",

"""
----- END OF EXAMPLES -----

You are a high-level planner for a kitchen with {total_num_agents} agents.
Your role is to generate a brief paragraph describing how agents should cooperate to complete dish orders efficiently.

You will receive:
- A detailed map of the kitchen, including all agent and item locations
- A list of active dish orders with deadlines and required steps
- The progress status of any ongoing preparations
- Updates about completed or canceled dishes

Create a clear, task-oriented plan that assigns responsibilities to agents, prioritizes dishes by urgency, and avoids agent overlap.

This planning is executed when the kitchen starts and whenever dish orders change.
"""
]

# Example for the planner agent
planner_examples = [
    # Initial plan example
    ("user", """
     THIS IS AN EXAMPLE FOR 2 AGENTS
-execution error messages:
  --  []
-execution suggestions:
  --  []
-game state:
current game level: level_4
current dishes:
    name: tunaSashimi lifetime: 10
current game step: 0
maximum game steps: 60

-agent state:
at(agent0, servingtable0)
hold(agent0, None)
at(agent1, servingtable0)
hold(agent1, None)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(chopboard0, None)
inside(mixer0, None)

-accomplished task:

-plan:
"""),
    ("assistant", """Agent 0 will retrieve raw tuna from storage and bring it to the chopboard while Agent 1 moves to the chopboard to prepare for chopping. Once Agent 0 places the tuna on the chopboard, Agent 1 will chop it and deliver the finished sashimi to the serving table before the 10-step lifetime expires."""),

    # Example with a new dish added
    ("user", """
-execution error messages:
  --  []
-execution suggestions:
  --  []
  --  A new dish order has been received. Adjust the plan accordingly.
-game state:
current game level: level_4
current dishes:
    name: tunaSashimi lifetime: 8
    name: onigiri lifetime: 15
current game step: 2
maximum game steps: 60

-agent state:
at(agent0, chopboard0)
hold(agent0, raw_tuna)
at(agent1, chopboard0)
hold(agent1, None)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(chopboard0, None)
inside(mixer0, None)

-accomplished task:

-plan:
"""),
    ("assistant", """Agent 0 should place the raw tuna on the chopboard immediately, then go to storage to fetch rice for onigiri while Agent 1 chops the tuna, delivers the sashimi to the serving table, and then assists with mixing rice and nori for the onigiri which has more time remaining.""")
]


class PlannerAgent(Agent):
    def __init__(self, env, model, agent_id, total_num_agents, with_feedback, with_notes, look_ahead_steps, ablation=False):
        self.env = env
        self.model = model
        self.agent_id = agent_id
        self.total_num_agents = total_num_agents
        self.with_feedback = with_feedback
        self.with_notes = with_notes
        self.ablation = ablation
        self.current_prompt_index = 0
        self.look_ahead_steps = look_ahead_steps

        self.history, self.feedback, self.suggestions = self.initialize_prompt()
        self.initial_history_length = len(self.history)
        self.prompt_history = []
        self.previous_actions = []
        self.last_plan = ""

    def initialize_prompt(self):
        pre_prompt = ("user", rules(self.env, self.with_notes))
        info_prompt = ("user", planner_info_prompt.format(total_num_agents=self.total_num_agents))
        history = [pre_prompt] + [info_prompt] + planner_examples

        if self.with_feedback:
            feedback = '-execution error messages:\n  --  []\n'
            suggestions = '-execution suggestions:\n  --  []\n'
        else:
            feedback = ''
            suggestions = ''

        return history, feedback, suggestions

    def plan(self, obs, step, verbose=False):
        """
        Generate a high-level plan for all agents based on the current game state.
        This method is called when a new plan is needed - at the start of the game and
        when there are changes to dish orders (new, cancelled, or completed).
        
        Args:
            obs: The current observation of the game state
            step: The current step number
            verbose: Whether to print debug information
            
        Returns:
            tuple: (plan, price, history) containing the high-level plan string, the cost, and the updated history
        """
        # update history
        if self.with_feedback and step != 0:
            self.feedback = '-execution error messages:\n  --  ' + str(self.env.feedback) + '\n'

        if self.with_notes and step != 0:
            self.suggestions = '-execution suggestions:\n  --  ' + str(self.env.suggestions) + '\n'
            if 'agent ids cannot be the same' in self.feedback:
                self.suggestions += f'  --  agent ids cannot be the same. \n'
            
            # Add information about dish events
            if obs.just_new_task:
                self.suggestions += f'  --  A new dish order has been received. Adjust the plan accordingly.\n'
            if obs.just_failed:
                self.suggestions += f'  --  A dish order has been canceled. Adjust the plan accordingly.\n'
            if obs.task_just_success:
                self.suggestions += f'  --  A dish has been completed. Adjust the plan accordingly.\n'

        if self.ablation and step > 0: 
            self.current_prompt_index = (self.current_prompt_index + 1) % len(planner_info_prompts)
            new_info_prompt = ("user", planner_info_prompts[self.current_prompt_index].format(total_num_agents=self.total_num_agents))
            self.history[1] = new_info_prompt
        
        prompt = self.feedback + self.suggestions + convert_to_prompt(obs) + '-plan:\n'
        
        # Update history with the prompt
        self.history = prepend_history(self.history, prompt, role='user', verbose=verbose)
        
        # Manage history length for LLM query
        if len(self.history) > self.look_ahead_steps + self.initial_history_length:
            query_history = (self.history[:self.initial_history_length] + 
                          self.history[-(self.look_ahead_steps):])
        else:
            query_history = self.history

        # GENERATE PLAN
        if verbose:
            print("Generating new plan for all agents...")
        
        # print("PLAN Prompt: ", query_history)
        plan_response, price, tokens = query_llm(query_history, max_tokens=400, model=self.model, stop=None)
        plan = plan_response.strip()
        print("PLAN tokens: ", tokens)
        print("PLAN price: ", price)
        
        
        # Update history with the plan response
        self.history = prepend_history(self.history, plan, role='assistant', verbose=verbose)
        
        self.last_plan = plan
        
        if verbose:
            print("Generated plan:", plan[:100] + "..." if len(plan) > 100 else plan)

        return plan, price, tokens
    
    def clean_plan(self, plan):
        """
        Just return the plan as is, since we're now using a single paragraph format.
        """
        return plan.strip()
    
    


executor_info_prompt = """You are an executor agent (Agent {agent_id}) in a kitchen environment with {total_num_agents} total agents.
Your task is to determine your next action based on your specific role within the overall plan.

You have been given a concise, high-level plan that describes how all agents should work together to complete dish orders.
As Agent {agent_id}, focus ONLY on the parts of the plan that involve your agent.
Convert the high-level instructions into a specific action for this turn.
DO NOT try to execute actions for other agents.

Your available actions are:
- noop_agent{agent_id}: Do nothing
- goto_agent{agent_id}_location: Move to position X
- put_agent{agent_id}_object_location: Put held item on position X
- activate_agent{agent_id}_tool: Activate tool at position X
- get_agent{agent_id}_object_location: Pick up item at position X

Return ONLY your next action in the format: action_agent{agent_id}_parameters
"""

executor_info_prompts = ["""You are Agent {agent_id}, one of {total_num_agents} executor agents operating in a kitchen environment.
Your job is to decide your next move based on your assigned role in the overall plan.

You've received a high-level plan that outlines what each agent should do to fulfill dish orders.
Your focus should be ONLY on the instructions that apply to Agent {agent_id}.
Translate those into a single, concrete action for this step.
Ignore actions meant for other agents.

Your available action types include:
- noop_agent{agent_id}: Take no action
- goto_agent{agent_id}_location: Move to a specific location
- put_agent{agent_id}_object_location: Place a held item at a location
- activate_agent{agent_id}_tool: Use a tool at a location
- get_agent{agent_id}_object_location: Pick up an item at a location

Respond with only your next action in this format: action_agent{agent_id}_parameters
""", 
"""You are Agent {agent_id}, functioning as one of {total_num_agents} executor agents in a coordinated kitchen setting.
Your task is to determine the most appropriate next action based solely on your designated responsibilities.

A high-level plan has been shared that details how all agents should contribute to completing dish orders.
Identify only the instructions relevant to you, Agent {agent_id}, and translate them into one concrete action.
Do not interpret or carry out actions assigned to other agents.

Available actions for your agent:
- noop_agent{agent_id}: Remain idle
- goto_agent{agent_id}_location: Navigate to a given location
- put_agent{agent_id}_object_location: Place your item somewhere
- activate_agent{agent_id}_tool: Operate a tool at a specific spot
- get_agent{agent_id}_object_location: Pick up an object at a location

Only return a single action in this exact format: action_agent{agent_id}_parameters
""",
"""You are Agent {agent_id}, one of {total_num_agents} agents executing tasks in a shared kitchen environment.
Your job is to decide your next action based on your role in the given high-level plan.

Read the plan carefully and focus ONLY on the parts assigned to Agent {agent_id}.
Turn those instructions into a single action for this step.
Ignore all actions intended for other agents.

Your action options are:
- noop_agent{agent_id}: Do nothing
- goto_agent{agent_id}_location: Move to a location
- put_agent{agent_id}_object_location: Drop an object at a location
- activate_agent{agent_id}_tool: Use a tool
- get_agent{agent_id}_object_location: Pick up an item

Reply with exactly one action using this format: action_agent{agent_id}_parameters
"""
]



# Examples for the executor agents
executor_examples = [
    # Example for Agent 0
    ("user", """
THIS IS AN EXAMPLE 
-execution error messages:
  --  []
-execution suggestions:
  --  []
-game state:
current game level: level_4
current dishes:
    name: tunaSashimi lifetime: 10
current game step: 0
maximum game steps: 60

-agent state:
at(agent0, servingtable0)
hold(agent0, None)
at(agent1, servingtable0)
hold(agent1, None)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(chopboard0, None)
inside(mixer0, None)

-accomplished task:

-overall plan:
Agent 0 will retrieve raw tuna from storage and bring it to the chopboard while Agent 1 moves to the chopboard to prepare for chopping. Once Agent 0 places the tuna on the chopboard, Agent 1 will chop it and deliver the finished sashimi to the serving table before the 10-step lifetime expires.

-action:
"""),
    ("assistant", "goto_agent0_storage0"),
    
    # Example for Agent 1 (with the same plan)
    ("user", """
-execution error messages:
  --  []
-execution suggestions:
  --  []
-game state:
current game level: level_4
current dishes:
    name: tunaSashimi lifetime: 9
current game step: 1
maximum game steps: 60

-agent state:
at(agent0, storage0)
hold(agent0, None)
at(agent1, storage0)
hold(agent1, None)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(chopboard0, None)
inside(mixer0, None)

-accomplished task:

-overall plan:
Agent 0 will retrieve raw tuna from storage and bring it to the chopboard while Agent 1 moves to the chopboard to prepare for chopping. Once Agent 0 places the tuna on the chopboard, Agent 1 will chop it and deliver the finished sashimi to the serving table before the 10-step lifetime expires.

-action:
"""),
    ("assistant", "get_agent0_tuna"),
    
    # Example for Agent 0 with an updated plan
    ("user", """
-execution error messages:
  --  []
-execution suggestions:
  --  []
  --  You are Agent 0. Follow your role in the plan.
-game state:
current game level: level_4
current dishes:
    name: tunaSashimi lifetime: 8
    name: onigiri lifetime: 15
current game step: 2
maximum game steps: 60

-agent state:
at(agent0, storage0)
hold(agent0, raw_tuna)
at(agent1, storage0)
hold(agent1, None)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(chopboard0, None)
inside(mixer0, None)

-accomplished task:

-overall plan:
Agent 0 should place the raw tuna on the chopboard immediately, then go to storage to fetch rice for onigiri while Agent 1 chops the tuna, delivers the sashimi to the serving table, and then assists with mixing rice and nori for the onigiri which has more time remaining.

-action:
"""),
    ("assistant", "goto_agent0_chopboard0")
]
    

class ExecutorAgent(Agent):
    def __init__(self, env, model, agent_id, total_num_agents, with_feedback, with_notes, look_ahead_steps, ablation=False):
        super().__init__(env, model, agent_id, total_num_agents, with_feedback, with_notes, 5)

        self.history, self.feedback, self.suggestions = self.initialize_prompt()
        self.initial_history_length = len(self.history)
        self.prompt_history = []
        self.ablation = ablation
        self.current_prompt_index = 0

    def initialize_prompt(self):
        pre_prompt = ("user", rules(self.env, self.with_notes))
        info_prompt = ("user", executor_info_prompt.format(total_num_agents=self.total_num_agents, agent_id=self.agent_id))
        history = [pre_prompt] + executor_examples + [info_prompt]

        if self.with_feedback:
            feedback = '-execution error messages:\n  --  []\n'
            suggestions = '-execution suggestions:\n  --  []\n'
        else:
            feedback = ''
            suggestions = ''

        return history, feedback, suggestions
    
    def step(self, obs, step, plan, verbose=False):
        """
        Generate the next action for this agent based on the overall plan and current state.
        
        Args:
            obs: The current observation of the game state
            step: The current step number
            plan: The high-level plan generated by the planner agent
            verbose: Whether to print debug information
            
        Returns:
            tuple: (parsed_actions, price, history) containing the next action for this agent,
            the cost of the query, and the updated conversation history
        """
        # update history
        if self.with_feedback and step != 0:
            self.feedback = '-execution error messages:\n  --  ' + str(self.env.feedback) + '\n'

        if self.with_notes and step != 0:
            self.suggestions = '-execution suggestions:\n  --  ' + str(self.env.suggestions) + '\n'
            if 'agent ids cannot be the same' in self.feedback:
                self.suggestions += f'  --  You can only control and plan the actions for agent{self.agent_id}. \n'
            self.suggestions += f'  --  You are Agent {self.agent_id}. Follow your role in the plan.\n'
        
                # If ablation is true, update the info prompt for each step
        if self.ablation and step > 0:
            # Cycle through the available prompts
            self.current_prompt_index = (self.current_prompt_index + 1) % len(executor_info_prompts)
            new_info_prompt = ("user", f"{executor_info_prompts[self.current_prompt_index].format(total_num_agents=self.total_num_agents, agent_id=self.agent_id)}")
            # Replace the last info prompt in the history
            self.history[self.initial_history_length-1] = new_info_prompt

        prompt = self.feedback + self.suggestions + convert_to_prompt(obs) + '-overall plan:\n' + plan.strip() + '\n\n-action:\n'
        
        # Update history with the prompt
        self.history = prepend_history(self.history, prompt, role='user', verbose=verbose)
        
        # Manage history length for LLM query
        if len(self.history) > self.look_ahead_steps + self.initial_history_length:
            query_history = (self.history[:self.initial_history_length] + 
                          self.history[-(self.look_ahead_steps):])
        else:
            query_history = self.history

        # GENERATE ACTION
        if verbose:
            print(f"Generating action for Agent {self.agent_id} based on plan...")
            
        # Generate action
        # print("QUERY HISTORY: ", query_history)
        action_response, price, tokens = query_llm(query_history, model=self.model)
        print(f"ACTION [{self.agent_id}] tokens: ", tokens)
        print(f"ACTION [{self.agent_id}] price: ", price)
        
        # Update history with the action response
        self.history = prepend_history(self.history, action_response, role='assistant', verbose=verbose)
        
        if verbose:
            print(f"Agent {self.agent_id} action: {action_response}")

        try:
            parsed_actions = self.extract_actions(action_response)
            if parsed_actions:
                parsed_actions = [parsed_actions[0]]
        except:
            parsed_actions = []

        return parsed_actions, price, tokens