import numpy as np

def planning_function(processed_state):
    """
    Determines optimal tasks for each agent based on the current state.
    
    Args:
        processed_state: A tuple containing food location and level, agent position and level.

    Returns:
        dict: Optimal tasks for each agent ('No op','Target food 0','Target food 1','Pickup')
    """
    food_info, agents_info = processed_state
    llm_tasks = {}
    
    # Calculate total agent level and find closest food
    total_agent_level = sum(agent[1] for agent in agents_info.values())
    available_food = [f for f in food_info.items() if f[1] is not None]
    
    if not available_food:
        return {agent: 'No op' for agent in agents_info}
    
    # Sort food by level and distance to agents
    food_priority = sorted(available_food, key=lambda x: (x[1][1], min(np.linalg.norm(np.array(agent[0]) - np.array(x[1][0])) for agent in agents_info.values())))
    
    target_food = food_priority[0]
    target_food_id = int(target_food[0][-1])
    target_food_pos = target_food[1][0]
    target_food_level = target_food[1][1]
    
    for agent_id, (agent_pos, agent_level) in agents_info.items():
        distance = np.linalg.norm(np.array(agent_pos) - np.array(target_food_pos))
        
        if distance <= 1 and total_agent_level >= target_food_level:
            llm_tasks[agent_id] = 'Pickup'
        else:
            llm_tasks[agent_id] = f'Target food {target_food_id}'
    
    return llm_tasks

def compute_reward(processed_state, llm_actions, actions):
    """
    Calculate rewards based on the tasks assigned and their outcomes.
    
    Args:
        processed_state: returned from function process_state(state, p, f)
        llm_actions (dict): dictionary of list of integers which means the suggest actions from llm for each agent.
        actions (dict): dictionary of a integer action that actually perform by each agent.
        
    Returns:
        reward: Dict containing rewards for each agent.
    """
    food_info, agents_info = processed_state
    reward = {agent: 0 for agent in agents_info}
    
    # Check if a pickup was attempted
    pickup_attempted = any(action == 5 for action in actions.values())
    
    if pickup_attempted:
        # Check if the pickup was successful (food disappeared)
        food_picked = any(food is None for food in food_info.values())
        if food_picked:
            for agent in reward:
                reward[agent] += 10  # High reward for successful pickup
        else:
            for agent in reward:
                reward[agent] -= 5  # Penalty for failed pickup attempt
    
    # Reward coordination and following LLM suggestions
    for agent, llm_action in llm_actions.items():
        if actions[agent] in llm_action:
            reward[agent] += 1  # Reward for following LLM suggestion
        
        # Check if agents are moving towards the same food
        target_food = None
        for action in llm_action:
            if action in [1, 2, 3, 4]:  # Movement actions
                target_food = 0 if 'Target food 0' in llm_actions[agent] else 1
                break
        
        if target_food is not None:
            if all('Target food ' + str(target_food) in llm_actions[other_agent] for other_agent in agents_info):
                reward[agent] += 2  # Reward for coordinated targeting
    
    return reward