from agent import Agent
from utils.llm import rules, prepend_history, query_llm
from levels.utils import convert_to_prompt
import re


planner_info_prompt = """
----- END OF EXAMPLES -----

You are a strategic planner for a kitchen environment with {total_num_agents} agents.
Your task is to create a high-level plan for all agents to efficiently complete dish orders.

Current information about the kitchen environment:
- The layout of the kitchen and the position of all ingredients, cooking stations, and agents
- Any active dish orders, their requirements, and time limits
- The status of dishes in progress
- Any recently completed or canceled orders

Generate a clear, organized plan that:
1. Prioritizes dishes based on time constraints
2. Assigns specific roles to each agent (e.g., Agent 0 handles chopping, Agent 1 handles cooking)
3. Coordinates agent movements to avoid collisions and inefficient paths
4. Adapts to new orders, cancellations, or completed dishes

Your plan should be strategic and high-level, not detailed movement instructions.
Executor agents will use your plan to determine their specific actions.

Follow the format:
```
OVERALL STRATEGY:
[Brief description of the overall approach]

DISH PRIORITIES:
[List of dishes in order of priority with reasoning]

AGENT ASSIGNMENTS:
Agent 0: [Role and responsibilities]
Agent 1: [Role and responsibilities]
...
```

Remember that your plan will be triggered at the start and whenever a dish is added, removed, or completed.
"""

# Example for the planner agent
planner_examples = [
    # Initial plan example
    ("user", """
     THIS IS AN EXAMPLE FOR 2 AGENTS
-execution error messages:
  --  []
-execution suggestions:
  --  []
-game state:
current game level: level_4
current dishes:
    name: tunaSashimi lifetime: 10
current game step: 0
maximum game steps: 60

-agent state:
at(agent0, servingtable0)
hold(agent0, None)
at(agent1, servingtable0)
hold(agent1, None)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(chopboard0, None)
inside(mixer0, None)

-accomplished task:

-plan:
"""),
    ("assistant", """
OVERALL STRATEGY:
We need to prepare tuna sashimi efficiently within the given lifetime. This requires getting raw tuna from storage, chopping it on the chopboard, and serving it on the serving table.

DISH PRIORITIES:
1. Tuna Sashimi - This is our only active order and we need to complete it before the lifetime expires.

AGENT ASSIGNMENTS:
Agent 0: Primary ingredient handler - Responsible for getting the raw tuna from storage and bringing it to the chopping board.
Agent 1: Primary preparation agent - Responsible for chopping the tuna and delivering the finished sashimi to the serving table.
"""),

    # Example with a new dish added
    ("user", """
-execution error messages:
  --  []
-execution suggestions:
  --  []
  --  A new dish order has been received. Adjust the plan accordingly.
-game state:
current game level: level_4
current dishes:
    name: tunaSashimi lifetime: 8
    name: onigiri lifetime: 15
current game step: 2
maximum game steps: 60

-agent state:
at(agent0, chopboard0)
hold(agent0, raw_tuna)
at(agent1, chopboard0)
hold(agent1, None)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(chopboard0, None)
inside(mixer0, None)

-accomplished task:

-plan:
"""),
    ("assistant", """
OVERALL STRATEGY:
We need to efficiently complete both the tuna sashimi (which is already in progress) and the new onigiri order. Given the current state, we should first complete the tuna sashimi process since Agent 0 is already holding the raw tuna, then move on to preparing the onigiri.

DISH PRIORITIES:
1. Tuna Sashimi - This dish is already in progress with Agent 0 holding the raw tuna, and has less remaining lifetime.
2. Onigiri - New order with more time remaining.

AGENT ASSIGNMENTS:
Agent 0: Complete the tuna sashimi preparation by placing the raw tuna on the chopboard, then transition to getting rice for the onigiri.
Agent 1: Chop the tuna on the chopboard once Agent 0 places it, deliver the sashimi to the serving table, then assist with onigiri preparation.
""")
]


class PlannerAgent(Agent):
    def __init__(self, env, model, agent_id, total_num_agents, with_feedback, with_notes, look_ahead_steps):
        self.env = env
        self.model = model
        self.agent_id = agent_id
        self.total_num_agents = total_num_agents
        self.with_feedback = with_feedback
        self.with_notes = with_notes
        self.look_ahead_steps = look_ahead_steps

        self.history, self.feedback, self.suggestions = self.initialize_prompt()
        self.initial_history_length = len(self.history)
        self.prompt_history = []
        self.previous_actions = []
        self.last_plan = ""

    def initialize_prompt(self):
        pre_prompt = ("user", rules(self.env, self.with_notes))
        info_prompt = ("user", planner_info_prompt.format(total_num_agents=self.total_num_agents))
        history = [pre_prompt] + [info_prompt] + planner_examples

        if self.with_feedback:
            feedback = '-execution error messages:\n  --  []\n'
            suggestions = '-execution suggestions:\n  --  []\n'
        else:
            feedback = ''
            suggestions = ''

        return history, feedback, suggestions

    def plan(self, obs, step, verbose=False):
        """
        Generate a high-level plan for all agents based on the current game state.
        This method is called when a new plan is needed - at the start of the game and
        when there are changes to dish orders (new, cancelled, or completed).
        
        Args:
            obs: The current observation of the game state
            step: The current step number
            verbose: Whether to print debug information
            
        Returns:
            A high-level plan string for all agents
        """
        # update history
        if self.with_feedback and step != 0:
            self.feedback = '-execution error messages:\n  --  ' + str(self.env.feedback) + '\n'

        if self.with_notes and step != 0:
            self.suggestions = '-execution suggestions:\n  --  ' + str(self.env.suggestions) + '\n'
            if 'agent ids cannot be the same' in self.feedback:
                self.suggestions += f'  --  agent ids cannot be the same. \n'
            
            # Add information about dish events
            if obs.just_new_task:
                self.suggestions += f'  --  A new dish order has been received. Adjust the plan accordingly.\n'
            if obs.just_failed:
                self.suggestions += f'  --  A dish order has been canceled. Adjust the plan accordingly.\n'
            if obs.task_just_success:
                self.suggestions += f'  --  A dish has been completed. Adjust the plan accordingly.\n'

        prompt = self.feedback + self.suggestions + convert_to_prompt(obs) + '-plan:\n'
        
        # cap message length
        if len(self.history) < self.look_ahead_steps + self.initial_history_length:
            self.history = prepend_history(self.history, prompt, verbose=verbose)
        else:
            self.history = (self.history[:self.initial_history_length] + 
                          self.history[-(self.look_ahead_steps-1):])
            self.history = prepend_history(self.history, prompt, verbose=verbose)

        # GENERATE PLAN
        if verbose:
            print("Generating new plan for all agents...")
        
        plan, price = query_llm(self.history, max_tokens=900, model=self.model, stop=None)
        plan = self.clean_plan(plan)


        self.last_plan = plan
        
        # Add the plan to the history
        self.history = prepend_history(self.history, plan, role='assistant', verbose=verbose)

        if verbose:
            print("Generated plan:", plan[:100] + "..." if len(plan) > 100 else plan)

        return plan, price
    
    def clean_plan(self, plan):
        """
        Clean the plan to only contain the required sections.
        """
        required_sections = ["OVERALL STRATEGY", "DISH PRIORITIES", "AGENT ASSIGNMENTS", "COORDINATION NOTES"]
        cleaned_plan = []
        
        # Split plan into sections
        sections = plan.split('\n')
        current_section = None
        
        for line in sections:
            # Check if line starts a new required section
            is_section_header = False
            for section in required_sections:
                if section in line:
                    current_section = section
                    cleaned_plan.append(line)
                    is_section_header = True
                    break
                    
            # Add line if it belongs to a required section and is not a section header
            if current_section and line.strip() and not is_section_header:
                cleaned_plan.append(line)
                
            # Check if section ends
            if line.strip() == '':
                current_section = None
                
        # Join sections back together
        return '\n'.join(cleaned_plan)
    
    
    def update_history(self, actions, role='assistant', verbose=False):
        # For the planner, we just need to add the plan to the history
        self.history = prepend_history(self.history, self.last_plan, role, verbose=verbose)


executor_info_prompt = """You are an executor agent (Agent {agent_id}) in a kitchen environment with {total_num_agents} total agents.
Your task is to determine your next action based on your specific role within the overall plan.

You have been given a high-level plan that outlines:
- The overall strategy for completing all dishes
- Dish priorities
- Agent assignments and roles
- Coordination notes for all agents

As Agent {agent_id}, focus ONLY on your assigned responsibilities from the plan.
Convert the high-level instructions into a specific action for this turn.
DO NOT try to execute actions for other agents.

Your available actions are:
- noop_agent{agent_id}: Do nothing
- goto_agent{agent_id}_location: Move to position X
- put_agent{agent_id}_object_location: Put held item on position X
- activate_agent{agent_id}_tool: Activate tool at position X
- get_agent{agent_id}_object_location: Pick up item at position X

Return ONLY your next action in the format: action_agent{agent_id}_parameters
"""

# Examples for the executor agents
executor_examples = [
    # Example for Agent 0
    ("user", """
THIS IS AN EXAMPLE 
-execution error messages:
  --  []
-execution suggestions:
  --  []
-game state:
current game level: level_4
current dishes:
    name: tunaSashimi lifetime: 10
current game step: 0
maximum game steps: 60

-agent state:
at(agent0, servingtable0)
hold(agent0, None)
at(agent1, servingtable0)
hold(agent1, None)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(chopboard0, None)
inside(mixer0, None)

-accomplished task:

-overall plan:
OVERALL STRATEGY:
We need to prepare tuna sashimi efficiently within the given lifetime. This requires getting raw tuna from storage, chopping it on the chopboard, and serving it on the serving table.

DISH PRIORITIES:
1. Tuna Sashimi - This is our only active order and we need to complete it before the lifetime expires.

AGENT ASSIGNMENTS:
Agent 0: Primary ingredient handler - Responsible for getting the raw tuna from storage and bringing it to the chopping board.
Agent 1: Primary preparation agent - Responsible for chopping the tuna and delivering the finished sashimi to the serving table.

-action:
"""),
    ("assistant", "goto_agent0_storage0"),
    
    # Example for Agent 1 (with the same plan)
    ("user", """
-execution error messages:
  --  []
-execution suggestions:
  --  []
-game state:
current game level: level_4
current dishes:
    name: tunaSashimi lifetime: 9
current game step: 1
maximum game steps: 60

-agent state:
at(agent0, storage0)
hold(agent0, None)
at(agent1, storage0)
hold(agent1, None)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(chopboard0, None)
inside(mixer0, None)

-accomplished task:

-overall plan:
OVERALL STRATEGY:
We need to prepare tuna sashimi efficiently within the given lifetime. This requires getting raw tuna from storage, chopping it on the chopboard, and serving it on the serving table.

DISH PRIORITIES:
1. Tuna Sashimi - This is our only active order and we need to complete it before the lifetime expires.

AGENT ASSIGNMENTS:
Agent 0: Primary ingredient handler - Responsible for getting the raw tuna from storage and bringing it to the chopping board.
Agent 1: Primary preparation agent - Responsible for chopping the tuna and delivering the finished sashimi to the serving table.

-action:
"""),
    ("assistant", "get_agent0_tuna"),
    
    # Example for Agent 0 with an updated plan
    ("user", """
-execution error messages:
  --  []
-execution suggestions:
  --  []
  --  You are Agent 0. Follow your role in the plan.
-game state:
current game level: level_4
current dishes:
    name: tunaSashimi lifetime: 8
    name: onigiri lifetime: 15
current game step: 2
maximum game steps: 60

-agent state:
at(agent0, storage0)
hold(agent0, raw_tuna)
at(agent1, storage0)
hold(agent1, None)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(chopboard0, None)
inside(mixer0, None)

-accomplished task:

-overall plan:
OVERALL STRATEGY:
We need to efficiently complete both the tuna sashimi (which is already in progress) and the new onigiri order. Given the current state, we should first complete the tuna sashimi process since Agent 0 is already holding the raw tuna, then move on to preparing the onigiri.

DISH PRIORITIES:
1. Tuna Sashimi - This dish is already in progress with Agent 0 holding the raw tuna, and has less remaining lifetime.
2. Onigiri - New order with more time remaining.

AGENT ASSIGNMENTS:
Agent 0: Complete the tuna sashimi preparation by placing the raw tuna on the chopboard, then transition to getting rice for the onigiri.
Agent 1: Chop the tuna on the chopboard once Agent 0 places it, deliver the sashimi to the serving table, then assist with onigiri preparation.

-action:
"""),
    ("assistant", "goto_agent0_chopboard0")
]
    

class ExecutorAgent(Agent):
    def __init__(self, env, model, agent_id, total_num_agents, with_feedback, with_notes, look_ahead_steps):
        super().__init__(env, model, agent_id, total_num_agents, with_feedback, with_notes, look_ahead_steps)

        self.history, self.feedback, self.suggestions = self.initialize_prompt()
        self.initial_history_length = len(self.history)
        self.prompt_history = []

    def initialize_prompt(self):
        pre_prompt = ("user", rules(self.env, self.with_notes))
        info_prompt = ("user", executor_info_prompt.format(total_num_agents=self.total_num_agents, agent_id=self.agent_id))
        history = [pre_prompt] + executor_examples + [info_prompt]

        if self.with_feedback:
            feedback = '-execution error messages:\n  --  []\n'
            suggestions = '-execution suggestions:\n  --  []\n'
        else:
            feedback = ''
            suggestions = ''

        return history, feedback, suggestions
    
    def step(self, obs, step, plan, verbose=False):
        """
        Generate the next action for this agent based on the overall plan and current state.
        
        Args:
            obs: The current observation of the game state
            step: The current step number
            plan: The high-level plan generated by the planner agent
            verbose: Whether to print debug information
            
        Returns:
            A list containing the next action for this agent
        """
        # update history
        if self.with_feedback and step != 0:
            self.feedback = '-execution error messages:\n  --  ' + str(self.env.feedback) + '\n'

        if self.with_notes and step != 0:
            self.suggestions = '-execution suggestions:\n  --  ' + str(self.env.suggestions) + '\n'
            if 'agent ids cannot be the same' in self.feedback:
                self.suggestions += f'  --  You can only control and plan the actions for agent{self.agent_id}. \n'
            self.suggestions += f'  --  You are Agent {self.agent_id}. Follow your role in the plan.\n'
        
        prompt = self.feedback + self.suggestions + convert_to_prompt(obs) + '-overall plan:\n' + plan.strip() + '\n\n-action:\n'
        # cap message length
        if len(self.history) < self.look_ahead_steps + self.initial_history_length:
            self.history = prepend_history(self.history, prompt, verbose=verbose)
        else:
            self.history = (self.history[:self.initial_history_length] + 
                          self.history[-(self.look_ahead_steps-1):])
            self.history = prepend_history(self.history, prompt, verbose=verbose)

        # GENERATE ACTION
        if verbose:
            print(f"Generating action for Agent {self.agent_id} based on plan...")
            
        # print(f"HISTORY[agent{self.agent_id}][step={step}]: ", self.history)
        action, price = query_llm(self.history, model=self.model)
        
        if verbose:
            print(f"Agent {self.agent_id} action: {action}")

        try:
            parsed_actions = self.extract_actions(action)
            if parsed_actions:
                parsed_actions = [parsed_actions[0]]
        except:
            parsed_actions = []

        if parsed_actions:
            self.update_history(parsed_actions, role='assistant', verbose=verbose)

        if parsed_actions:
            return [parsed_actions[0]], price
        return parsed_actions, price