import re 

from utils.llm import rules, prepend_history, query_llm
from levels.utils import convert_to_prompt


agent_info_prompt = """
----- END OF EXAMPLES -----

In this game, there are {total_num_agents} agents available, but you are controlling agent{agent_id} and can only plan and execute actions for this agent.

When asked for reasoning, you will explain your thought process about the current state, what needs to be done, and why.
When asked for an action, you will provide just the action for your agent based on your reasoning.

Possible action types include:
- goto_agent{agent_id}_[location]
- get_agent{agent_id}_[item]_[location]
- put_agent{agent_id}_[location]
- activate_agent{agent_id}_[appliance]
- noop_agent{agent_id}

Follow the formats exactly as shown in the examples, as your responses will be automatically parsed.
"""

agent_examples = [
("user", 
"""----- START OF EXAMPLE -----
There are 2 agents available. You are controlling agent0. You can execute 1 action at a time.
This is an example you can use as a reference for a different level.

-execution error messages:
  --  []
-execution suggestions:
  --  []
-game state:
current game level: level_1
current dishes:
    name: salmonMeatcake lifetime: 10
current game step: 0
maximum game steps: 60

-agent state:
at(agent0, servingtable0)
hold(agent0, None)
at(agent1, servingtable0)
hold(agent1, None)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(blender0, None)
inside(blender1, None)

-accomplished task:

-reasoning:
"""),

("assistant",
"""I need to make a salmonMeatcake. Looking at my current state, I'm at the servingtable0 and not holding anything. The first step is to get the ingredients needed for the salmonMeatcake. I should go to storage0 to get salmon, which is the main ingredient for the dish."""),

("user", "-action:\n"),

("assistant", "goto_agent0_storage0"),

("user",
"""-execution error messages:
  --  []
-execution suggestions:
  --  []
-game state:
current game level: level_1
current dishes:
    name: cake lifetime: 9
current game step: 1
maximum game steps: 60

-agent state:
at(agent0, storage0)
hold(agent0, None)
at(agent1, storage0)
hold(agent1, None)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(blender0, None)
inside(blender1, None)

-accomplished task:

-reasoning: 
"""),

("assistant", "Now I've reached storage0. Since I need to make a salmonMeatcake, I need to get the salmon from storage0. I see I'm at the right location, so I should get the salmon now."),

("user", "-action:\n"),

("assistant", "get_agent0_salmon_storage0"),

("user", """-execution error messages:
  --  []
-execution suggestions:
  --  []
-game state:
current game level: level_1
current dishes:
    name: cake lifetime: 8
current game step: 2
maximum game steps: 60

-agent state:
at(agent0, storage0)
hold(agent0, salmon)
at(agent1, storage0)
hold(agent1, flour)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(blender0, None)
inside(blender1, None)

-accomplished task:

-reasoning:
"""),

("assistant", "I now have the salmon in my hands. The next step is to process it in a blender to make the salmonMeatcake. I need to go to one of the blenders, and blender0 is available. I should head there next."),

("user", "-action:\n"),

("assistant", "goto_agent0_blender0")]


class Agent():
    def __init__(self, env, model, agent_id, total_num_agents, with_feedback, with_notes, look_ahead_steps): 
        self.env = env
        self.model = model
        self.agent_id = agent_id
        self.total_num_agents = total_num_agents
        self.with_feedback = with_feedback
        self.with_notes = with_notes
        self.look_ahead_steps = look_ahead_steps

        self.history, self.feedback, self.suggestions = self.initialize_prompt()
        self.initial_history_length = len(self.history)
        self.prompt_history = []

    def initialize_prompt(self):
        pre_prompt = ("user" , rules(self.env, self.with_notes))
        info_prompt = ("user", agent_info_prompt.format(total_num_agents=self.total_num_agents, agent_id=self.agent_id))
        examples = [(e[0], e[1].replace("agent0", f"agent{self.agent_id}")) for e in agent_examples]
        history = [pre_prompt] + examples + [info_prompt]

        if self.with_feedback:
            feedback = '-execution error messages:\n  --  []\n'
            suggestions = '-execution suggestions:\n  --  []\n'
        else:
            feedback = ''
            suggestions = ''

        return history, feedback, suggestions


    def step(self, obs, step, verbose=False): 
        """
        Executes a single step for the agent based on the given observation.

        Args:
            obs (dict): The observation data for the current step.
            step (int): The current step number.
            verbose (bool, optional): If True, prints detailed debug information. Defaults to False.
            agent_tasks (list, optional): List of tasks assigned to this agent through auction. Defaults to None.

        Returns:
            list: A list of parsed actions generated by the agent.
        """

        # update history
        if self.with_feedback and step != 0:
            self.feedback = '-execution error messages:\n  --  ' + str(self.env.feedback) + '\n'

        if self.with_notes and step !=0:
            self.suggestions = '-execution suggestions:\n  --  ' + str(self.env.suggestions) + '\n'
            if 'agent ids cannot be the same' in self.feedback:
                self.suggestions += f'  --  You can only control and plan the actions for agent{self.agent_id}. \n'
        
        # First query for reasoning
        reasoning_prompt = self.feedback + self.suggestions + convert_to_prompt(obs) + '-reasoning:\n'
        self.history = prepend_history(self.history, reasoning_prompt, role='user', verbose=verbose)

        if len(self.history) > self.look_ahead_steps + self.initial_history_length:
            # Keep initial instructions plus most recent conversations
            query_history = (self.history[:self.initial_history_length] + 
                          self.history[-(self.look_ahead_steps):])
        else:
            query_history = self.history

        # Generate reasoning
        reasoning_response, reasoning_price, reasoning_tokens = query_llm(query_history, model=self.model, stop=None, max_tokens=400)
        reasoning = reasoning_response.strip()
        self.history = prepend_history(self.history, reasoning_response, role='assistant', verbose=verbose)

        
        # Second query for action
        action_prompt = "-action:\n"
        self.history = prepend_history(self.history, action_prompt, role='user', verbose=verbose)

        # Manage history length again for action query
        if len(self.history) > self.look_ahead_steps + self.initial_history_length:
            query_history = (self.history[:self.initial_history_length] + 
                          self.history[-(self.look_ahead_steps):])
        else:
            query_history = self.history
        
        # Generate action
        action_response, action_price, action_tokens = query_llm(query_history, model=self.model, stop=None, max_tokens=100)
        self.history = prepend_history(self.history, action_response, role='assistant', verbose=verbose)

        # Parse action from response
        parsed_actions = self.extract_actions(action_response)
                
        # Only keep the first matching action if multiple are found
        self.previous_reasoning = reasoning
        if parsed_actions:
            parsed_actions = [parsed_actions[0]]
            self.previous_actions = parsed_actions
            
        
        # Calculate total price
        total_price = reasoning_price + action_price

        return parsed_actions, total_price, {'reasoning': reasoning_tokens, 'actions': action_tokens}


    def extract_actions_and_reasoning(self, text):
        """
        Extract reasoning and actions from the LLM response using explicit markers.

        Args:
            text (str): The text response from the LLM.

        Returns:
            tuple: (reasoning, actions) where reasoning is a string and actions is a list of action strings.
        """
        # Define the markers
        reasoning_start = "REASONING START"
        reasoning_end = "REASONING END"

        # Default values
        reasoning = ""
        actions = []

        # Check if both markers exist
        if reasoning_start in text and reasoning_end in text:
            try:
                # Extract reasoning between markers
                reasoning = text.split(reasoning_start, 1)[1].split(reasoning_end, 1)[0].strip()

                # Extract actions after the reasoning_end marker
                actions_text = text.split(reasoning_end, 1)[1].strip()

                # Extract actions using regex pattern
                action_types = ["noop", "goto", "put", "activate", "get"]
                pattern = r'((' + '|'.join(action_types) + r')_agent\d+(_[a-zA-Z0-9_]+)?)'
                actions = re.findall(pattern, actions_text)
            except IndexError:
                reasoning = "Error parsing with markers"
        else:
            # Fallback to a simpler method if markers aren't found
            lines = text.split('\n')
            action_start_idx = 0

            # List of action types
            action_types = ["noop", "goto", "put", "activate", "get"]
            action_pattern = r'^(' + '|'.join(action_types) + r')_agent\d+(_[a-zA-Z0-9_]+)?'

            for i, line in enumerate(lines):
                if re.match(action_pattern, line.strip()):
                    action_start_idx = i
                    break

            reasoning = '\n'.join(lines[:action_start_idx]).strip()
            action_text = '\n'.join(lines[action_start_idx:])

            # Extract actions using the existing pattern
            actions = re.findall(action_pattern, action_text)

        # Flatten the list of tuples to a list of strings
        actions = [match[0] for match in actions]

        return reasoning, actions



    def extract_actions(self, text):
        # List of action types
        action_types = ["noop", "goto", "put", "activate", "get"]
        
        # Pattern for the actions
        pattern = r'((' + '|'.join(action_types) + r')_agent\d+(_[a-zA-Z0-9_]+)?)'
        
        matches = re.findall(pattern, text)
        
        # Extracting just the full action names from the returned tuples
        actions = [match[0] for match in matches]
        return actions
    

