import numpy as np

def planning_function(processed_state):
    """
    Determines optimal tasks for each agent based on the current state.
    
    Args:
        processed_state: A tuple containing food location and level, agent position and level.

    Returns:
        dict: Optimal tasks for each agent ('No op','Target food 0','Target food 1','Pickup')
    """
    food_info, agents_info = processed_state
    llm_tasks = {}
    
    # Calculate total agent level
    total_agent_level = sum(agent[1] for agent in agents_info.values())
    
    # Find the highest level food that can be picked up
    target_food = None
    for food_id, food_data in food_info.items():
        if food_data is not None and food_data[1] <= total_agent_level:
            if target_food is None or food_data[1] > food_info[target_food][1]:
                target_food = food_id
    
    if target_food is None:
        # No food can be picked up, agents should move towards the closest food
        closest_food = min(
            (food for food in food_info.items() if food[1] is not None),
            key=lambda f: min(np.linalg.norm(np.array(agent[0]) - np.array(f[1][0])) for agent in agents_info.values())
        )[0]
        target_food = closest_food
    
    for agent_id, (agent_pos, _) in agents_info.items():
        if food_info[target_food] is not None:
            food_pos = food_info[target_food][0]
            if np.linalg.norm(np.array(agent_pos) - np.array(food_pos)) <= 1:
                llm_tasks[agent_id] = "Pickup"
            else:
                llm_tasks[agent_id] = f"Target food {target_food[-1]}"
        else:
            llm_tasks[agent_id] = "No op"
    
    return llm_tasks

def compute_reward(processed_state, llm_actions, actions):
    """
    Calculate rewards based on the tasks assigned and their outcomes.
    
    Args:
        processed_state: returned from function process_state(state, p, f)
        llm_actions (dict): dictionary of list of integers which means the suggest actions from llm for each agent.
        actions (dict): dictionary of a integer action that actually perform by each agent.
        
    Returns:
        reward: Dict containing rewards for each agent.
    """
    food_info, agents_info = processed_state
    reward = {agent_id: 0 for agent_id in agents_info.keys()}
    
    # Check if agents are following LLM suggestions
    # for agent_id, llm_action in llm_actions.items():
    #     if actions[agent_id] in llm_action:
    #         reward[agent_id] += 0.1  # Small reward for following suggestions
    
    # Check for successful pickup
    pickup_agents = [agent_id for agent_id, action in actions.items() if action == 5]
    if len(pickup_agents) > 1:
        pickup_positions = [agents_info[agent_id][0] for agent_id in pickup_agents]
        pickup_levels = [agents_info[agent_id][1] for agent_id in pickup_agents]
        
        for food_id, food_data in food_info.items():
            if food_data is not None:
                food_pos, food_level = food_data
                if all(np.linalg.norm(np.array(agent_pos) - np.array(food_pos)) <= 1 for agent_pos in pickup_positions):
                    if sum(pickup_levels) >= food_level:
                        for agent_id in pickup_agents:
                            reward[agent_id] += 1.0  # Large reward for successful pickup
    
    # Penalize for not moving towards food
    for agent_id, (agent_pos, _) in agents_info.items():
        closest_food = min(
            (food for food in food_info.items() if food[1] is not None),
            key=lambda f: np.linalg.norm(np.array(agent_pos) - np.array(f[1][0]))
        )[1]
        if closest_food is not None:
            food_pos = closest_food[0]
            if actions[agent_id] != 5 and np.linalg.norm(np.array(agent_pos) - np.array(food_pos)) > 1:
                reward[agent_id] -= 0.1  # Small penalty for not moving towards food
    
    return reward