import numpy as np

def planning_function(processed_state):
    """
    Determines optimal tasks for each agent based on the current state and last step task assignment.
    
    Args:
        processed_state: A tuple containing food location and level, agent position and level.
        last_llm_tasks: The tasks assign or suggest by llm during last step. Initial is None.

    Returns:
        dict: Optimal tasks for each agent ('No op','Target food 0','Target food 1','Pickup')
    """
    food_info, agents_info = processed_state
    llm_tasks = {}
    
    # Calculate total agent level
    total_agent_level = sum(agent[1] for agent in agents_info.values())
    
    # Find available food and sort by level
    available_food = [(food_id, info) for food_id, info in food_info.items() if info is not None]
    available_food.sort(key=lambda x: x[1][1])  # Sort by food level
    if not available_food:
        return {agent: 'No op' for agent in agents_info}
    
    target_food_id, target_food_info = available_food[0]
    target_food_pos, target_food_level = target_food_info
    
    # Check if agents can pick up the food
    can_pickup = total_agent_level >= target_food_level
    
    for agent_id, (agent_pos, agent_level) in agents_info.items():
        distance = np.linalg.norm(np.array(agent_pos) - np.array(target_food_pos))
        
        if distance <= 1 and can_pickup:
            llm_tasks[agent_id] = 'Pickup'
        else:
            llm_tasks[agent_id] = f'Target food {target_food_id[-1]}'
    return llm_tasks

def compute_reward(processed_state, llm_actions, actions):
    """
    Calculate rewards based on the tasks assigned and their outcomes.
    
    Args:
        processed_state: returned from function process_state(state, p, f)
        llm_actions (dict): dictionary of list of integers which means the suggest actions from llm for each agent.
        actions (dict): dictionary of a integer action that actually perform by each agent.
        
    Returns:
        reward: Dict containing rewards for each agent.
    """
    food_info, agents_info = processed_state
    reward = {agent: 0 for agent in agents_info}
    
    # Reward for following LLM suggestions
    for agent, llm_action in llm_actions.items():
        if actions[agent] in llm_action:
            reward[agent] += 0.01
    
    # Reward for successful pickup
    pickup_agents = [agent for agent, action in actions.items() if action == 5]
    if pickup_agents:
        total_level = sum(agents_info[agent][1] for agent in pickup_agents)
        for food_id, food_data in food_info.items():
            if food_data is not None:
                food_pos, food_level = food_data
                if total_level >= food_level and all(np.linalg.norm(np.array(agents_info[agent][0]) - np.array(food_pos)) <= 1 for agent in pickup_agents):
                    for agent in pickup_agents:
                        reward[agent] += 0.01
    
    # Penalty for unnecessary movement
    for agent, action in actions.items():
        if action in [1, 2, 3, 4] and 5 in llm_actions[agent]:
            reward[agent] -= 0.01
    
    # Normalize rewards
    for agent in reward:
        reward[agent] = max(0, min(reward[agent], 0.01))
    
    return reward