import numpy as np

def planning_function(processed_state):
    """
    Determines optimal tasks for each agent based on the current state.
    
    Args:
        processed_state: 
            Dict of list of (n, ): dict('agent_0', 'agent_1', 'agent_2)
                List: Agent : (m, ) [landmark0_2_agent, landmark1_2_agent, landmark2_2_agent, other_agent_2_agent, other_agent_2_agent]

    Returns:
        dict: Optimal tasks for each agent ('No op','Landmark_0','Landmark_1','Landmark_2')
    """
    num_agents = len(processed_state)
    num_landmarks = 3
    
    # Calculate distances between agents and landmarks
    distances = {}
    for agent_id, agent_obs in processed_state.items():
        distances[agent_id] = [np.linalg.norm(agent_obs[i]) for i in range(num_landmarks)]
    
    # Sort agents by their closest landmark
    sorted_agents = sorted(distances.keys(), key=lambda x: min(distances[x]))
    
    # Assign tasks
    assigned_landmarks = set()
    llm_tasks = {}
    
    for agent_id in sorted_agents:
        agent_distances = distances[agent_id]
        
        # Check if agent is already on a landmark
        if min(agent_distances) < 0.1:
            llm_tasks[agent_id] = "No op"
            assigned_landmarks.add(agent_distances.index(min(agent_distances)))
        else:
            # Assign the closest unoccupied landmark
            for landmark_idx, distance in sorted(enumerate(agent_distances), key=lambda x: x[1]):
                if landmark_idx not in assigned_landmarks:
                    llm_tasks[agent_id] = f"Landmark_{landmark_idx}"
                    assigned_landmarks.add(landmark_idx)
                    break
            else:
                # If all landmarks are occupied, assign No op
                llm_tasks[agent_id] = "No op"
    
    return llm_tasks

def compute_reward(processed_state, llm_actions, actions):
    """
    Calculate rewards based on the tasks assigned and their outcomes.
    
    Args:
        processed_state: returned from function process_state(state, p, f)
        llm_actions (dict): dictionary of list of integers which means the suggest actions from llm for each agent. E.g. {"agent_0": [2,3], "agent_1": [4],...}
        actions (dict): dictionary of a integer action that actually perform by each agent. E.g. {"agent_0": 2, "agent_1": 4, ...}

        **Note: the index of this action space is [no_action, move_left, move_right, move_down, move_up]**
        
    Returns:
        reward: Dict containing rewards for each agent. For example: {'agent_0': reward1, 'agent_1', reward2, ...}
    """
    num_agents = len(processed_state)
    num_landmarks = 3
    collision_threshold = 0.3
    occupation_threshold = 0.1
    
    reward = {agent_id: 0 for agent_id in processed_state.keys()}
    
    # Calculate global reward (sum of minimum distances to landmarks)
    global_reward = 0
    for landmark_idx in range(num_landmarks):
        min_distance = min(np.linalg.norm(processed_state[agent_id][landmark_idx]) for agent_id in processed_state.keys())
        global_reward -= min_distance
    
    # Distribute global reward equally among agents
    for agent_id in reward.keys():
        reward[agent_id] += global_reward / num_agents
    
    # Check for collisions and landmark occupation
    for agent_id, agent_obs in processed_state.items():
        # Check for collisions with other agents
        for other_agent_pos in agent_obs[num_landmarks:]:
            if np.linalg.norm(other_agent_pos) < collision_threshold:
                reward[agent_id] -= 1  # Penalty for collision
        
        # Reward for occupying a landmark
        if min(np.linalg.norm(agent_obs[i]) for i in range(num_landmarks)) < occupation_threshold:
            reward[agent_id] += 1
    
    # Reward for following LLM suggestions
    for agent_id in actions.keys():
        if actions[agent_id] in llm_actions[agent_id]:
            reward[agent_id] += 0.5  # Bonus for following LLM suggestion
    
    return reward