import numpy as np

def planning_function(processed_state):
    """
    Determines optimal tasks for each agent based on the current state.
    
    Args:
        processed_state: A tuple containing food location and level, agent position and level.

    Returns:
        dict: Optimal tasks for each agent ('No op','Target food 0','Target food 1','Pickup')
    """
    food_info, agents_info = processed_state
    llm_tasks = {}
    
    # Calculate total agent level
    total_agent_level = sum(agent[1] for agent in agents_info.values())
    
    # Find the highest level food that can be picked up
    target_food = None
    for food_id, food_data in food_info.items():
        if food_data is not None and food_data[1] <= total_agent_level:
            if target_food is None or food_data[1] > food_info[target_food][1]:
                target_food = food_id
    
    if target_food:
        target_food_pos = food_info[target_food][0]
        
        for agent_id, (agent_pos, _) in agents_info.items():
            distance = np.linalg.norm(np.array(agent_pos) - np.array(target_food_pos))
            
            if distance <= 1:
                llm_tasks[agent_id] = "Pickup"
            else:
                llm_tasks[agent_id] = f"Target {target_food}"
    else:
        # If no food can be picked up, assign 'No op' to all agents
        for agent_id in agents_info.keys():
            llm_tasks[agent_id] = "No op"
    
    return llm_tasks

def compute_reward(processed_state, llm_actions, actions):
    """
    Calculate rewards based on the tasks assigned and their outcomes.
    
    Args:
        processed_state: returned from function process_state(state, p, f)
        llm_actions (dict): dictionary of list of integers which means the suggest actions from llm for each agent.
        actions (dict): dictionary of a integer action that actually perform by each agent.
        
    Returns:
        reward: Dict containing rewards for each agent.
    """
    food_info, agents_info = processed_state
    reward = {agent_id: 0 for agent_id in agents_info.keys()}
    
    # Check if all agents are trying to pickup
    all_pickup = all(actions[agent_id] == 5 for agent_id in actions)
    
    if all_pickup:
        # Check if there's a food item adjacent to all agents
        agent_positions = [agents_info[agent_id][0] for agent_id in agents_info]
        for food_id, food_data in food_info.items():
            if food_data is not None:
                food_pos, food_level = food_data
                if all(np.linalg.norm(np.array(agent_pos) - np.array(food_pos)) <= 1 for agent_pos in agent_positions):
                    # Successful pickup
                    total_agent_level = sum(agents_info[agent_id][1] for agent_id in agents_info)
                    if total_agent_level >= food_level:
                        for agent_id in reward:
                            reward[agent_id] += 10  # High reward for successful coordinated pickup
                    else:
                        for agent_id in reward:
                            reward[agent_id] -= 5  # Penalty for attempting pickup without sufficient level
    
    # Reward for following LLM suggestions
    for agent_id in actions:
        if actions[agent_id] in llm_actions[agent_id]:
            reward[agent_id] += 1  # Small reward for following LLM suggestion
        else:
            reward[agent_id] -= 1  # Small penalty for not following LLM suggestion
    
    return reward