import numpy as np

def planning_function(processed_state):
    """
    Determines optimal tasks for each agent based on the current state.
    
    Args:
        processed_state: A tuple containing food location and level, agent position and level.

    Returns:
        dict: Optimal tasks for each agent ('No op','Target food 0','Target food 1','Pickup')
    """
    food_info, agents_info = processed_state
    llm_tasks = {}
    
    # Calculate total agent level
    total_agent_level = sum(agent[1] for agent in agents_info.values())
    
    # Find the highest level food that can be picked up
    target_food = None
    for food_id, food_data in food_info.items():
        if food_data is not None and food_data[1] <= total_agent_level:
            if target_food is None or food_data[1] > food_info[target_food][1]:
                target_food = food_id
    
    if target_food is None:
        # If no food can be picked up, assign 'No op' to all agents
        return {agent: 'No op' for agent in agents_info}
    
    target_food_pos = food_info[target_food][0]
    
    for agent_id, (agent_pos, _) in agents_info.items():
        if np.linalg.norm(np.array(agent_pos) - np.array(target_food_pos)) <= 1:
            llm_tasks[agent_id] = 'Pickup'
        else:
            llm_tasks[agent_id] = f'Target food {target_food[-1]}'
    
    return llm_tasks

def compute_reward(processed_state, llm_actions, actions):
    """
    Calculate rewards based on the tasks assigned and their outcomes.
    
    Args:
        processed_state: returned from function process_state(state, p, f)
        llm_actions (dict): dictionary of list of integers which means the suggest actions from llm for each agent.
        actions (dict): dictionary of a integer action that actually perform by each agent.
        
    Returns:
        reward: Dict containing rewards for each agent.
    """
    food_info, agents_info = processed_state
    reward = {agent: 0 for agent in agents_info}
    
    # Reward for following LLM suggestions
    for agent, llm_action in llm_actions.items():
        if actions[agent] in llm_action:
            reward[agent] += 0.1
    
    # Reward for coordinated movement towards food
    target_food = None
    for food_id, food_data in food_info.items():
        if food_data is not None:
            target_food = food_id
            break
    
    if target_food:
        target_pos = food_info[target_food][0]
        moving_to_target = all(actions[agent] in [1, 2, 3, 4] for agent in actions)
        if moving_to_target:
            for agent, action in actions.items():
                agent_pos = agents_info[agent][0]
                new_pos = list(agent_pos)
                if action == 1: new_pos[0] -= 1
                elif action == 2: new_pos[0] += 1
                elif action == 3: new_pos[1] -= 1
                elif action == 4: new_pos[1] += 1
                
                old_distance = np.linalg.norm(np.array(agent_pos) - np.array(target_pos))
                new_distance = np.linalg.norm(np.array(new_pos) - np.array(target_pos))
                
                if new_distance < old_distance:
                    reward[agent] += 0.2
    
    # Reward for coordinated pickup
    pickup_agents = [agent for agent, action in actions.items() if action == 5]
    if len(pickup_agents) > 1:
        pickup_positions = [agents_info[agent][0] for agent in pickup_agents]
        if all(np.linalg.norm(np.array(pos) - np.array(pickup_positions[0])) <= 1 for pos in pickup_positions):
            for agent in pickup_agents:
                reward[agent] += 1.0
    
    return reward