from my_agent.agent import Agent
from utils.llm import rules, prepend_history, query_llm
from levels.utils import convert_to_prompt
from prompts.orchestrator_agent_action_only import orchestrator_examples, orchestrator_info_prompt


orchestrator_info_prompt = """
----- END OF EXAMPLES -----

In this game, there are {total_num_agents} agents available, so you should generate the actions for all the {total_num_agents} agents.
Your task is to generate the actions for all the agents given the state of the game.
Follow the return format from the example. Return only the action.
"""

orchestrator_examples = [
("user", 
"""----- START OF EXAMPLE -----
There are 2 agents available. So you can execute 2 actions at a time.
This is an example you can use as a reference for a different level.

-execution error messages:
  --  []
-execution suggestions:
  --  []
-game state:
current game level: level_4
current dishes:
    name: tunaSashimi lifetime: 10
current game step: 0
maximum game steps: 60

-agent state:
at(agent0, servingtable0)
hold(agent0, None)
at(agent1, servingtable0)
hold(agent1, None)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(chopboard0, None)
inside(mixer0, None)

-accomplished task:

-action:
"""),

("assistant",
"""goto_agent0_storage0
goto_agent1_storage0"""),

("user",
"""-execution error messages:
  --  []
-execution suggestions:
  --  []
-game state:
current game level: level_4
current dishes:
    name: tunaSashimi lifetime: 9
current game step: 1
maximum game steps: 60

-agent state:
at(agent0, storage0)
hold(agent0, None)
at(agent1, storage0)
hold(agent1, None)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(chopboard0, None)
inside(mixer0, None)

-accomplished task:

-action: 
"""),

("assistant","""
get_agent0_tuna_storage0
get_agent1_tuna_storage0"""),

("user", """-execution error messages:
  --  []
-execution suggestions:
  --  []
-game state:
current game level: level_4
current dishes:
    name: tunaSashimi lifetime: 8
current game step: 2
maximum game steps: 60

-agent state:
at(agent0, storage0)
hold(agent0, tuna)
at(agent1, storage0)
hold(agent1, tuna)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(chopboard0, None)
inside(mixer0, None)

-accomplished task:

-action:
"""),

("assistant","""
goto_agent0_chopboard0
goto_agent1_chopboard0
""")]


class OrchestratorAgent(Agent):
    def __init__(self, env, model, agent_id, total_num_agents, with_feedback, with_notes, look_ahead_steps):
        self.env = env
        self.model = model
        self.agent_id = agent_id
        self.total_num_agents = total_num_agents
        self.with_feedback = with_feedback
        self.with_notes = with_notes
        self.look_ahead_steps = look_ahead_steps

        self.history, self.feedback, self.suggestions = self.initialize_prompt()
        self.initial_history_length = len(self.history)
        self.prompt_history = []
        self.previous_actions = []


    def initialize_prompt(self):
        pre_prompt = ("user" , rules(self.env, self.with_notes))
        info_prompt = ("user", f"{orchestrator_info_prompt.format(total_num_agents=self.total_num_agents)}")
        history = [pre_prompt] + orchestrator_examples + [info_prompt]

        if self.with_feedback:
            feedback = '-execution error messages:\n  --  []\n'
            suggestions = '-execution suggestions:\n  --  []\n'
        else:
            feedback = ''
            suggestions = ''

        return history, feedback, suggestions

    
    def step(self, obs, step, verbose=False):
        """
        Generate a plan for the orchestrator agent for a given step.
        Args:
            obs (object): The current observation of the environment.
            step (int): The current step number in the simulation.
            verbose (bool, optional): If True, enables verbose logging. Defaults to False.

        Returns:
            string: A plan for each of the  agents.
        """
        
        # update history
        if self.with_feedback and step != 0:
            self.feedback = '-execution error messages:\n  --  ' + str(self.env.feedback) + '\n'
            # print(f"FEEDBACK[agent{self.agent_id}]: ", self.feedback)
        if self.with_notes and step !=0:
            self.suggestions = '-execution suggestions:\n  --  ' + str(self.env.suggestions) + '\n'
            if 'agent ids cannot be the same' in self.feedback:
                self.suggestions += f'  --  You can only control and plan the actions for agent{self.agent_id}. \n'
            # print(f"SUGGESTIONS[agent{self.agent_id}]: ", self.suggestions)
        
        # Add success rates to the prompt                    
        prompt = self.feedback + self.suggestions + convert_to_prompt(obs) + '-action:\n'

        # print("PROMPT: ", prompt)

        # cap message length
        if len(self.history) < self.look_ahead_steps + self.initial_history_length:
            self.history = prepend_history(self.history, prompt, verbose=verbose)
        else:
            self.history = (self.history[:self.initial_history_length] + 
                          self.history[-(self.look_ahead_steps-1):])
            self.history = prepend_history(self.history, prompt, verbose=verbose)


        # GENERATE ACTION

        # print(f"HISTORY[agent{self.agent_id}]:", self.history)
        action, price = query_llm(self.history, model=self.model, stop=None)

        try:
            parsed_actions = self.extract_actions(action)
        except:
            parsed_actions = []

        if parsed_actions:
            self.update_history(parsed_actions, role='assistant', verbose=verbose)
            self.previous_actions = parsed_actions

        return parsed_actions, price