from my_agent.agent import Agent
from utils.llm import rules, prepend_history, query_llm
from levels.utils import convert_to_prompt
import re


orchestrator_info_prompt = """
----- END OF EXAMPLES -----

In this game, there are {total_num_agents} agents available, so you should generate the actions for all the {total_num_agents} agents.

When asked for reasoning, you will explain your thought process about the current state, what needs to be done by each agent, and why.
When asked for actions, you will provide the actions for all {total_num_agents} agents, one action per line.

Possible action types include:
- goto_agent[id]_[location]
- get_agent[id]_[item]_[location]
- put_agent[id]_[location]
- activate_agent[id]_[appliance]
- noop_agent[id]

Follow the formats exactly as shown in the examples, as your responses will be automatically parsed.
"""

orchestrator_info_prompts = [
    """
----- END OF EXAMPLES -----

This game involves {total_num_agents} agents. Your task is to generate actions for all {total_num_agents} agents.

If prompted for reasoning, clearly describe your thought process, including the current situation, each agent's responsibilities, and the rationale behind the plan.

If prompted for actions, list one action per agent, with each on a separate line.

Permitted action types are:
- goto_agent[id]_[location]
- get_agent[id]_[item]_[location]
- put_agent[id]_[location]
- activate_agent[id]_[appliance]
- noop_agent[id]

Be precise. Match the formatting shown in the examples exactly, as responses are parsed automatically.
""",

"""
----- END OF EXAMPLES -----

You are participating in a scenario involving {total_num_agents} agents. You are responsible for generating an action for each of the {total_num_agents} agents.

When reasoning is requested, outline your analysis of the current context, what tasks are needed, which agents should perform them, and your justification.

When actions are requested, return one action per agent, formatted as one action per line.

Supported action types are:
- goto_agent[id]_[location]
- get_agent[id]_[item]_[location]
- put_agent[id]_[location]
- activate_agent[id]_[appliance]
- noop_agent[id]

Adhere strictly to the formats provided above—responses are programmatically parsed.
""",

"""
----- END OF EXAMPLES -----

There are {total_num_agents} agents in this game. You must output actions for each of the {total_num_agents} agents.

If prompted for reasoning, explain the current situation, the goal for each agent, and your decision-making process.

If prompted for actions, return exactly one action per agent, listed on separate lines.

The allowed action formats are:
- goto_agent[id]_[location]
- get_agent[id]_[item]_[location]
- put_agent[id]_[location]
- activate_agent[id]_[appliance]
- noop_agent[id]

Do not deviate from these formats—responses will be parsed by a system expecting this exact structure.
"""
]


orchestrator_examples = [
("user", 
"""----- START OF EXAMPLE -----
There are 2 agents available. So you can execute 2 actions at a time.
This is an example you can use as a reference for a different level.

-execution error messages:
  --  []
-execution suggestions:
  --  []
-game state:
current game level: level_1
current dishes:
    name: salmonMeatcake lifetime: 10
current game step: 0
maximum game steps: 60

-agent state:
at(agent0, servingtable0)
hold(agent0, None)
at(agent1, servingtable0)
hold(agent1, None)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(blender0, None)
inside(blender1, None)

-accomplished task:

-reasoning:
"""),

("assistant",
"""Both agents need to work together to make a salmonMeatcake. Looking at the current state, both agents are at the servingtable0 and not holding anything. The first step is to get the ingredients needed for the salmonMeatcake. Both agents should go to storage0 to get the required ingredients. Agent0 should get salmon and agent1 should get flour."""),

("user", "-actions:\n"),

("assistant",
"""goto_agent0_storage0
goto_agent1_storage0"""),

("user",
"""-execution error messages:
  --  []
-execution suggestions:
  --  []
-game state:
current game level: level_1
current dishes:
    name: cake lifetime: 9
current game step: 1
maximum game steps: 60

-agent state:
at(agent0, storage0)
hold(agent0, None)
at(agent1, storage0)
hold(agent1, None)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(blender0, None)
inside(blender1, None)

-accomplished task:

-reasoning:
"""),

("assistant","""Now both agents have reached the storage. They need to collect the ingredients for the salmonMeatcake. Agent0 should get the salmon and agent1 should get the flour, as these are the main ingredients needed for the dish."""),

("user", "-actions:\n"),

("assistant","""get_agent0_salmon_storage0
get_agent1_flour_storage0"""),

("user", """-execution error messages:
  --  []
-execution suggestions:
  --  []
-game state:
current game level: level_1
current dishes:
    name: cake lifetime: 8
current game step: 2
maximum game steps: 60

-agent state:
at(agent0, storage0)
hold(agent0, salmon)
at(agent1, storage0)
hold(agent1, flour)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(blender0, None)
inside(blender1, None)

-accomplished task:

-reasoning:
"""),

("assistant","""Both agents now have their respective ingredients. Agent0 has salmon and Agent1 has flour. The next step is to process these ingredients in a blender to make the salmonMeatcake. Both agents should go to the blender to combine these ingredients."""),

("user", "-actions:\n"),

("assistant","""goto_agent0_blender0
goto_agent1_blender0""")]


class OrchestratorAgent(Agent):
    def __init__(self, env, model, agent_id, total_num_agents, with_feedback, with_notes, look_ahead_steps, ablation=False):
        self.env = env
        self.model = model
        self.agent_id = agent_id
        self.total_num_agents = total_num_agents
        self.with_feedback = with_feedback
        self.with_notes = with_notes
        self.look_ahead_steps = look_ahead_steps
        self.ablation = ablation
        self.current_prompt_index = 0

        self.history, self.feedback, self.suggestions = self.initialize_prompt()
        self.initial_history_length = len(self.history)
        self.prompt_history = []
        self.previous_actions = []
        self.previous_reasoning = ""


    def initialize_prompt(self):
        pre_prompt = ("user" , rules(self.env, self.with_notes))
        
        if self.ablation:
            # Use the first prompt from the list for initialization
            info_prompt = ("user", f"{orchestrator_info_prompts[0].format(total_num_agents=self.total_num_agents)}")
        else:
            info_prompt = ("user", f"{orchestrator_info_prompt.format(total_num_agents=self.total_num_agents)}")
            
        history = [pre_prompt] + orchestrator_examples + [info_prompt]

        if self.with_feedback:
            feedback = '-execution error messages:\n  --  []\n'
            suggestions = '-execution suggestions:\n  --  []\n'
        else:
            feedback = ''
            suggestions = ''

        return history, feedback, suggestions

    
    def step(self, obs, step, verbose=False):
        """
        Generate a plan for the orchestrator agent for a given step.
        Args:
            obs (object): The current observation of the environment.
            step (int): The current step number in the simulation.
            verbose (bool, optional): If True, enables verbose logging. Defaults to False.

        Returns:
            tuple: (parsed_actions, total_price) where parsed_actions is a list of action strings 
                   and total_price is the sum of the costs of both LLM queries.
        """
        
        # update history
        if self.with_feedback and step != 0:
            self.feedback = '-execution error messages:\n  --  ' + str(self.env.feedback) + '\n'
        
        if self.with_notes and step !=0:
            self.suggestions = '-execution suggestions:\n  --  ' + str(self.env.suggestions) + '\n'
            if 'agent ids cannot be the same' in self.feedback:
                self.suggestions += f'  --  You can only control and plan the actions for agent{self.agent_id}. \n'
        
        # If ablation is true, update the info prompt for each step
        if self.ablation and step > 0:
            # Cycle through the available prompts
            self.current_prompt_index = (self.current_prompt_index + 1) % len(orchestrator_info_prompts)
            new_info_prompt = ("user", f"{orchestrator_info_prompts[self.current_prompt_index].format(total_num_agents=self.total_num_agents)}")
            # Replace the last info prompt in the history
            self.history[self.initial_history_length-1] = new_info_prompt
        
        # First query for reasoning
        reasoning_prompt = self.feedback + self.suggestions + convert_to_prompt(obs) + '-reasoning:\n'
        self.history = prepend_history(self.history, reasoning_prompt, role='user', verbose=verbose)

        if len(self.history) > self.look_ahead_steps + self.initial_history_length:
            # Keep initial instructions plus most recent conversations
            query_history = (self.history[:self.initial_history_length] + 
                          self.history[-(self.look_ahead_steps):])
        else:
            query_history = self.history

        # Generate reasoning
        reasoning_response, reasoning_price, reasoning_tokens = query_llm(query_history, model=self.model, stop=None, max_tokens=400)
        reasoning = reasoning_response.strip()
        self.history = prepend_history(self.history, reasoning_response, role='assistant', verbose=verbose)

        # Second query for action
        action_prompt = "-action:\n"
        self.history = prepend_history(self.history, action_prompt, role='user', verbose=verbose)

        # Manage history length again for action query
        if len(self.history) > self.look_ahead_steps + self.initial_history_length:
            query_history = (self.history[:self.initial_history_length] + 
                          self.history[-(self.look_ahead_steps):])
        else:
            query_history = self.history

        # Generate action
        actions_response, actions_price, actions_tokens = query_llm(query_history, model=self.model, stop=None, max_tokens=100)
        self.history = prepend_history(self.history, actions_response, role='assistant', verbose=verbose)

        # Parse actions from response
        parsed_actions = self.extract_actions(actions_response)
        
        # Store the reasoning 
        self.previous_reasoning = reasoning
        
        # Update history with both the reasoning and actions
        if parsed_actions:
            parsed_actions = parsed_actions
            self.previous_actions = parsed_actions
        
        # Calculate total price
        total_price = reasoning_price + actions_price

        return parsed_actions, total_price, {'reasoning': reasoning_tokens, 'actions': actions_tokens}
        
    