from my_agent.agent import Agent
from utils.llm import rules, prepend_history, query_llm
from levels.utils import convert_to_prompt
import re


orchestrator_info_prompt = """
----- END OF EXAMPLES -----

In this game, there are {total_num_agents} agents available, so you should generate the actions for all the {total_num_agents} agents.

When asked for reasoning, you will explain your thought process about the current state, what needs to be done by each agent, and why.
When asked for actions, you will provide the actions for all {total_num_agents} agents, one action per line.

Possible action types include:
- goto_agent[id]_[location]
- get_agent[id]_[item]_[location]
- put_agent[id]_[location]
- activate_agent[id]_[appliance]
- noop_agent[id]

Follow the formats exactly as shown in the examples, as your responses will be automatically parsed.
"""

orchestrator_examples = [
("user", 
"""----- START OF EXAMPLE -----
There are 2 agents available. So you can execute 2 actions at a time.
This is an example you can use as a reference for a different level.

-execution error messages:
  --  []
-execution suggestions:
  --  []
-game state:
current game level: level_1
current dishes:
    name: salmonMeatcake lifetime: 10
current game step: 0
maximum game steps: 60

-agent state:
at(agent0, servingtable0)
hold(agent0, None)
at(agent1, servingtable0)
hold(agent1, None)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(blender0, None)
inside(blender1, None)

-accomplished task:

-reasoning:
"""),

("assistant",
"""Both agents need to work together to make a salmonMeatcake. Looking at the current state, both agents are at the servingtable0 and not holding anything. The first step is to get the ingredients needed for the salmonMeatcake. Both agents should go to storage0 to get the required ingredients. Agent0 should get salmon and agent1 should get flour."""),

("user", "-actions:\n"),

("assistant",
"""goto_agent0_storage0
goto_agent1_storage0"""),

("user",
"""-execution error messages:
  --  []
-execution suggestions:
  --  []
-game state:
current game level: level_1
current dishes:
    name: cake lifetime: 9
current game step: 1
maximum game steps: 60

-agent state:
at(agent0, storage0)
hold(agent0, None)
at(agent1, storage0)
hold(agent1, None)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(blender0, None)
inside(blender1, None)

-accomplished task:

-reasoning:
"""),

("assistant","""Now both agents have reached the storage. They need to collect the ingredients for the salmonMeatcake. Agent0 should get the salmon and agent1 should get the flour, as these are the main ingredients needed for the dish."""),

("user", "-actions:\n"),

("assistant","""get_agent0_salmon_storage0
get_agent1_flour_storage0"""),

("user", """-execution error messages:
  --  []
-execution suggestions:
  --  []
-game state:
current game level: level_1
current dishes:
    name: cake lifetime: 8
current game step: 2
maximum game steps: 60

-agent state:
at(agent0, storage0)
hold(agent0, salmon)
at(agent1, storage0)
hold(agent1, flour)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(blender0, None)
inside(blender1, None)

-accomplished task:

-reasoning:
"""),

("assistant","""Both agents now have their respective ingredients. Agent0 has salmon and Agent1 has flour. The next step is to process these ingredients in a blender to make the salmonMeatcake. Both agents should go to the blender to combine these ingredients."""),

("user", "-actions:\n"),

("assistant","""goto_agent0_blender0
goto_agent1_blender0""")]


class OrchestratorAgent(Agent):
    def __init__(self, env, model, agent_id, total_num_agents, with_feedback, with_notes, look_ahead_steps):
        self.env = env
        self.model = model
        self.agent_id = agent_id
        self.total_num_agents = total_num_agents
        self.with_feedback = with_feedback
        self.with_notes = with_notes
        self.look_ahead_steps = look_ahead_steps

        self.history, self.feedback, self.suggestions = self.initialize_prompt()
        self.initial_history_length = len(self.history)
        self.prompt_history = []
        self.previous_actions = []
        self.previous_reasoning = ""


    def initialize_prompt(self):
        pre_prompt = ("user" , rules(self.env, self.with_notes))
        info_prompt = ("user", f"{orchestrator_info_prompt.format(total_num_agents=self.total_num_agents)}")
        history = [pre_prompt] + orchestrator_examples + [info_prompt]

        if self.with_feedback:
            feedback = '-execution error messages:\n  --  []\n'
            suggestions = '-execution suggestions:\n  --  []\n'
        else:
            feedback = ''
            suggestions = ''

        return history, feedback, suggestions

    
    def step(self, obs, step, verbose=False):
        """
        Generate a plan for the orchestrator agent for a given step.
        Args:
            obs (object): The current observation of the environment.
            step (int): The current step number in the simulation.
            verbose (bool, optional): If True, enables verbose logging. Defaults to False.

        Returns:
            tuple: (parsed_actions, total_price) where parsed_actions is a list of action strings 
                   and total_price is the sum of the costs of both LLM queries.
        """
        
        # update history
        if self.with_feedback and step != 0:
            self.feedback = '-execution error messages:\n  --  ' + str(self.env.feedback) + '\n'
        
        if self.with_notes and step !=0:
            self.suggestions = '-execution suggestions:\n  --  ' + str(self.env.suggestions) + '\n'
            if 'agent ids cannot be the same' in self.feedback:
                self.suggestions += f'  --  You can only control and plan the actions for agent{self.agent_id}. \n'
        
        # First query for reasoning
        reasoning_prompt = self.feedback + self.suggestions + convert_to_prompt(obs) + '-reasoning:\n'
        self.history = prepend_history(self.history, reasoning_prompt, role='user', verbose=verbose)

        if len(self.history) > self.look_ahead_steps + self.initial_history_length:
            # Keep initial instructions plus most recent conversations
            query_history = (self.history[:self.initial_history_length] + 
                          self.history[-(self.look_ahead_steps):])
        else:
            query_history = self.history

        # Generate reasoning
        reasoning_response, reasoning_price, reasoning_tokens = query_llm(query_history, model=self.model, stop=None, max_tokens=400)
        reasoning = reasoning_response.strip()
        self.history = prepend_history(self.history, reasoning_response, role='assistant', verbose=verbose)

        # Second query for action
        action_prompt = "-action:\n"
        self.history = prepend_history(self.history, action_prompt, role='user', verbose=verbose)

        # Manage history length again for action query
        if len(self.history) > self.look_ahead_steps + self.initial_history_length:
            query_history = (self.history[:self.initial_history_length] + 
                          self.history[-(self.look_ahead_steps):])
        else:
            query_history = self.history

        # Generate action
        actions_response, actions_price, actions_tokens = query_llm(query_history, model=self.model, stop=None, max_tokens=100)
        self.history = prepend_history(self.history, actions_response, role='assistant', verbose=verbose)

        # Parse actions from response
        parsed_actions = self.extract_actions(actions_response)
        
        # Store the reasoning 
        self.previous_reasoning = reasoning
        
        # Update history with both the reasoning and actions
        if parsed_actions:
            parsed_actions = parsed_actions
            self.previous_actions = parsed_actions
        
        # Calculate total price
        total_price = reasoning_price + actions_price

        return parsed_actions, total_price, {'reasoning': reasoning_tokens, 'actions': actions_tokens}
        
    