from agent import Agent
from utils.llm import rules, prepend_history, query_llm
from levels.utils import convert_to_prompt
from prompts.planner_agent import planner_info_prompt, planner_examples, executor_info_prompt, executor_examples
import json
from collections import defaultdict
import os


class PlannerAgent(Agent):
    def __init__(self, env, model, agent_id, total_num_agents, with_feedback, with_notes, look_ahead_steps):
        self.env = env
        self.model = model
        self.agent_id = agent_id
        self.total_num_agents = total_num_agents
        self.with_feedback = with_feedback
        self.with_notes = with_notes
        self.look_ahead_steps = look_ahead_steps

        # Load historical data first
        self.historical_data = self.load_historical_data()
        self.historical_summary = self.analyze_historical_data()

        self.history, self.feedback, self.suggestions = self.initialize_prompt()
        self.initial_history_length = len(self.history)
        self.prompt_history = []
        self.previous_actions = []
        self.last_plan = ""

    def load_historical_data(self):
        """Load historical data for the current number of agents."""
        try:
            history_file = f"granular_history_{self.total_num_agents}agent.json"
            if os.path.exists(history_file):
                with open(history_file, 'r') as f:
                    return json.load(f)
            else:
                print(f"Warning: Historical data file not found: {history_file}")
                return {}
        except Exception as e:
            print(f"Warning: Could not load historical data: {e}")
            return {}

    def analyze_historical_data(self):
        """Analyze historical data to create a useful summary for the planner."""
        if not self.historical_data:
            return "No historical data available."

        # Track success rates for different actions
        action_stats = defaultdict(lambda: defaultdict(lambda: {'success': 0, 'total': 0}))
        successful_sequences = []

        # Analyze each level
        for level_data in self.historical_data.values():
            current_sequence = []
            for step in level_data:
                plan, *agent_actions, completed_orders, completed_this_step = step
                
                # Track action success (completion indicates success)
                if completed_this_step:
                    # The actions in the last few steps contributed to success
                    for seq_step in current_sequence[-3:]:  # Look at last 3 steps
                        for agent_idx, action in enumerate(seq_step[1:-2]):  # Skip plan and completion info
                            if action and not action.startswith('noop'):
                                action_type = action.split('_')[0]  # get, put, goto, activate
                                action_stats[f"Agent{agent_idx}"][action_type]['success'] += 1
                                action_stats[f"Agent{agent_idx}"][action_type]['total'] += 1
                    
                    # Record the successful sequence
                    successful_sequences.append(current_sequence[-3:])
                    
                # Track all actions
                for agent_idx, action in enumerate(agent_actions):
                    if action and not action.startswith('noop'):
                        action_type = action.split('_')[0]
                        action_stats[f"Agent{agent_idx}"][action_type]['total'] += 1
                
                current_sequence.append(step)

        # Format the summary
        summary = []
        summary.append("Agent Performance Statistics:")
        for agent in sorted(action_stats.keys()):
            summary.append(f"\n{agent}:")
            for action_type, stats in action_stats[agent].items():
                success_rate = (stats['success'] / stats['total'] * 100) if stats['total'] > 0 else 0
                summary.append(f"- {action_type}: {success_rate:.1f}% success rate ({stats['success']}/{stats['total']} successful)")

        summary.append("\nSuccessful Patterns:")
        # Add a few example successful sequences
        for seq in successful_sequences[:3]:  # Show up to 3 examples
            summary.append("\nSuccessful sequence example:")
            for step in seq:
                actions = [a for a in step[1:-2] if a and not a.startswith('noop')]
                if actions:
                    summary.append("  " + " | ".join(actions))

        return "\n".join(summary)

    def initialize_prompt(self):
        pre_prompt = ("user", rules(self.env, self.with_notes))
        info_prompt = ("user", planner_info_prompt.format(
            total_num_agents=self.total_num_agents,
            historical_summary=self.historical_summary,
            budget=0,  # This will be updated during planning
            costs="No cost data available yet."
        ))
        history = [pre_prompt] + [info_prompt] + planner_examples

        if self.with_feedback:
            feedback = '-execution error messages:\n  --  []\n'
            suggestions = '-execution suggestions:\n  --  []\n'
        else:
            feedback = ''
            suggestions = ''

        return history, feedback, suggestions

    def plan(self, obs, step, verbose=False):
        """
        Generate a high-level plan for all agents based on the current game state.
        This method is called when a new plan is needed - at the start of the game and
        when there are changes to dish orders (new, cancelled, or completed).
        
        Args:
            obs: The current observation of the game state
            step: The current step number
            verbose: Whether to print debug information
            
        Returns:
            tuple: (plan, price, history) containing the high-level plan string, the cost, and the updated history
        """
        # update history
        if self.with_feedback and step != 0:
            self.feedback = '-execution error messages:\n  --  ' + str(self.env.feedback) + '\n'

        if self.with_notes and step != 0:
            self.suggestions = '-execution suggestions:\n  --  ' + str(self.env.suggestions) + '\n'
            if 'agent ids cannot be the same' in self.feedback:
                self.suggestions += f'  --  agent ids cannot be the same. \n'
            
            # Add information about dish events
            if obs.just_new_task:
                self.suggestions += f'  --  A new dish order has been received. Adjust the plan accordingly.\n'
            if obs.just_failed:
                self.suggestions += f'  --  A dish order has been canceled. Adjust the plan accordingly.\n'
            if obs.task_just_success:
                self.suggestions += f'  --  A dish has been completed. Adjust the plan accordingly.\n'

        prompt = self.feedback + self.suggestions + convert_to_prompt(obs) + '-plan:\n'
        
        # Update history with the prompt
        self.history = prepend_history(self.history, prompt, role='user', verbose=verbose)
        
        # Manage history length for LLM query
        if len(self.history) > self.look_ahead_steps + self.initial_history_length:
            query_history = (self.history[:self.initial_history_length] + 
                          self.history[-(self.look_ahead_steps):])
        else:
            query_history = self.history

        # GENERATE PLAN
        if verbose:
            print("Generating new plan for all agents...")
        
        # print("PLAN Prompt: ", query_history)
        plan_response, price, tokens = query_llm(query_history, max_tokens=400, model=self.model, stop=None)
        plan = plan_response.strip()
        print("PLAN tokens: ", tokens)
        print("PLAN price: ", price)
        
        
        # Update history with the plan response
        self.history = prepend_history(self.history, plan, role='assistant', verbose=verbose)
        
        self.last_plan = plan
        
        if verbose:
            print("Generated plan:", plan[:100] + "..." if len(plan) > 100 else plan)

        return plan, price, tokens
    
    def clean_plan(self, plan):
        """
        Just return the plan as is, since we're now using a single paragraph format.
        """
        return plan.strip()
    
    


executor_info_prompt = """You are an executor agent (Agent {agent_id}) in a kitchen environment with {total_num_agents} total agents.
Your task is to determine your next action based on your specific role within the overall plan.

You have been given a concise, high-level plan that describes how all agents should work together to complete dish orders.
As Agent {agent_id}, focus ONLY on the parts of the plan that involve your agent.
Convert the high-level instructions into a specific action for this turn.
DO NOT try to execute actions for other agents.

Your available actions are:
- noop_agent{agent_id}: Do nothing
- goto_agent{agent_id}_location: Move to position X
- put_agent{agent_id}_object_location: Put held item on position X
- activate_agent{agent_id}_tool: Activate tool at position X
- get_agent{agent_id}_object_location: Pick up item at position X

Return ONLY your next action in the format: action_agent{agent_id}_parameters
"""

# Examples for the executor agents
executor_examples = [
    # Example for Agent 0
    ("user", """
THIS IS AN EXAMPLE 
-execution error messages:
  --  []
-execution suggestions:
  --  []
-game state:
current game level: level_4
current dishes:
    name: tunaSashimi lifetime: 10
current game step: 0
maximum game steps: 60

-agent state:
at(agent0, servingtable0)
hold(agent0, None)
at(agent1, servingtable0)
hold(agent1, None)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(chopboard0, None)
inside(mixer0, None)

-accomplished task:

-overall plan:
Agent 0 will retrieve raw tuna from storage and bring it to the chopboard while Agent 1 moves to the chopboard to prepare for chopping. Once Agent 0 places the tuna on the chopboard, Agent 1 will chop it and deliver the finished sashimi to the serving table before the 10-step lifetime expires.

-action:
"""),
    ("assistant", "goto_agent0_storage0"),
    
    # Example for Agent 1 (with the same plan)
    ("user", """
-execution error messages:
  --  []
-execution suggestions:
  --  []
-game state:
current game level: level_4
current dishes:
    name: tunaSashimi lifetime: 9
current game step: 1
maximum game steps: 60

-agent state:
at(agent0, storage0)
hold(agent0, None)
at(agent1, storage0)
hold(agent1, None)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(chopboard0, None)
inside(mixer0, None)

-accomplished task:

-overall plan:
Agent 0 will retrieve raw tuna from storage and bring it to the chopboard while Agent 1 moves to the chopboard to prepare for chopping. Once Agent 0 places the tuna on the chopboard, Agent 1 will chop it and deliver the finished sashimi to the serving table before the 10-step lifetime expires.

-action:
"""),
    ("assistant", "get_agent0_tuna"),
    
    # Example for Agent 0 with an updated plan
    ("user", """
-execution error messages:
  --  []
-execution suggestions:
  --  []
  --  You are Agent 0. Follow your role in the plan.
-game state:
current game level: level_4
current dishes:
    name: tunaSashimi lifetime: 8
    name: onigiri lifetime: 15
current game step: 2
maximum game steps: 60

-agent state:
at(agent0, storage0)
hold(agent0, raw_tuna)
at(agent1, storage0)
hold(agent1, None)

-kitchen state:
inside(storage0, None)
inside(servingtable0, None)
inside(chopboard0, None)
inside(mixer0, None)

-accomplished task:

-overall plan:
Agent 0 should place the raw tuna on the chopboard immediately, then go to storage to fetch rice for onigiri while Agent 1 chops the tuna, delivers the sashimi to the serving table, and then assists with mixing rice and nori for the onigiri which has more time remaining.

-action:
"""),
    ("assistant", "goto_agent0_chopboard0")
]
    

class ExecutorAgent(Agent):
    def __init__(self, env, model, agent_id, total_num_agents, with_feedback, with_notes, look_ahead_steps):
        super().__init__(env, model, agent_id, total_num_agents, with_feedback, with_notes, 5)

        self.history, self.feedback, self.suggestions = self.initialize_prompt()
        self.initial_history_length = len(self.history)
        self.prompt_history = []

    def initialize_prompt(self):
        pre_prompt = ("user", rules(self.env, self.with_notes))
        info_prompt = ("user", executor_info_prompt.format(total_num_agents=self.total_num_agents, agent_id=self.agent_id))
        history = [pre_prompt] + executor_examples + [info_prompt]

        if self.with_feedback:
            feedback = '-execution error messages:\n  --  []\n'
            suggestions = '-execution suggestions:\n  --  []\n'
        else:
            feedback = ''
            suggestions = ''

        return history, feedback, suggestions
    
    def step(self, obs, step, plan, verbose=False):
        """
        Generate the next action for this agent based on the overall plan and current state.
        
        Args:
            obs: The current observation of the game state
            step: The current step number
            plan: The high-level plan generated by the planner agent
            verbose: Whether to print debug information
            
        Returns:
            tuple: (parsed_actions, price, history) containing the next action for this agent,
            the cost of the query, and the updated conversation history
        """
        # update history
        if self.with_feedback and step != 0:
            self.feedback = '-execution error messages:\n  --  ' + str(self.env.feedback) + '\n'

        if self.with_notes and step != 0:
            self.suggestions = '-execution suggestions:\n  --  ' + str(self.env.suggestions) + '\n'
            if 'agent ids cannot be the same' in self.feedback:
                self.suggestions += f'  --  You can only control and plan the actions for agent{self.agent_id}. \n'
            self.suggestions += f'  --  You are Agent {self.agent_id}. Follow your role in the plan.\n'
        
        prompt = self.feedback + self.suggestions + convert_to_prompt(obs) + '-overall plan:\n' + plan.strip() + '\n\n-action:\n'
        
        # Update history with the prompt
        self.history = prepend_history(self.history, prompt, role='user', verbose=verbose)
        
        # Manage history length for LLM query
        if len(self.history) > self.look_ahead_steps + self.initial_history_length:
            query_history = (self.history[:self.initial_history_length] + 
                          self.history[-(self.look_ahead_steps):])
        else:
            query_history = self.history

        # GENERATE ACTION
        if verbose:
            print(f"Generating action for Agent {self.agent_id} based on plan...")
            
        # Generate action
        # print("QUERY HISTORY: ", query_history)
        action_response, price, tokens = query_llm(query_history, model=self.model)
        print(f"ACTION [{self.agent_id}] tokens: ", tokens)
        print(f"ACTION [{self.agent_id}] price: ", price)
        
        # Update history with the action response
        self.history = prepend_history(self.history, action_response, role='assistant', verbose=verbose)
        
        if verbose:
            print(f"Agent {self.agent_id} action: {action_response}")

        try:
            parsed_actions = self.extract_actions(action_response)
            if parsed_actions:
                parsed_actions = [parsed_actions[0]]
        except:
            parsed_actions = []

        return parsed_actions, price, tokens