import numpy as np

def planning_function(processed_state):
    """
    Determines optimal tasks for each agent based on the current state.
    
    Args:
        processed_state: 
            Dict of list of (n, ): dict('agent_0', 'agent_1', 'agent_2)
                List: Agent : (m, ) [landmark0_2_agent, landmark1_2_agent, landmark2_2_agent, other_agent_2_agent, other_agent_2_agent]

    Returns:
        dict: Optimal tasks for each agent ('No op','Landmark_0','Landmark_1','Landmark_2')
    """
    num_agents = len(processed_state)
    num_landmarks = 3
    
    # Calculate distances to landmarks for each agent
    distances = {}
    for agent, obs in processed_state.items():
        distances[agent] = [np.linalg.norm(obs[i]) for i in range(num_landmarks)]
    
    # Assign landmarks to agents
    assigned_landmarks = {}
    available_landmarks = set(range(num_landmarks))
    
    for _ in range(num_agents):
        min_distance = float('inf')
        best_agent = None
        best_landmark = None
        
        for agent in distances:
            if agent not in assigned_landmarks:
                for landmark in available_landmarks:
                    if distances[agent][landmark] < min_distance:
                        min_distance = distances[agent][landmark]
                        best_agent = agent
                        best_landmark = landmark
        
        if best_agent is not None:
            assigned_landmarks[best_agent] = best_landmark
            available_landmarks.remove(best_landmark)
    
    # Create tasks based on assignments
    llm_tasks = {}
    for agent in processed_state:
        if agent in assigned_landmarks:
            llm_tasks[agent] = f"Landmark_{assigned_landmarks[agent]}"
        else:
            llm_tasks[agent] = "No op"
    
    return llm_tasks

def compute_reward(processed_state, llm_actions, actions):
    """
    Calculate rewards based on the tasks assigned and their outcomes.
    
    Args:
        processed_state: returned from function process_state(state, p, f)
        llm_actions (dict): dictionary of list of integers which means the suggest actions from llm for each agent. E.g. {"agent_0": [2,3], "agent_1": [4],...}
        actions (dict): dictionary of a integer action that actually perform by each agent. E.g. {"agent_0": 2, "agent_1": 4, ...}

        **Note: the index of this action space is [no_action, move_left, move_right, move_down, move_up]**
        
    Returns:
        reward: Dict containing rewards for each agent. For example: {'agent_0': reward1, 'agent_1', reward2, ...}
    """
    reward = {}
    num_landmarks = 3
    collision_threshold = 0.3
    landmark_reach_threshold = 0.1
    
    for agent, obs in processed_state.items():
        # Initialize reward
        reward[agent] = 0
        
        # Reward for following LLM actions
        if actions[agent] in llm_actions[agent]:
            reward[agent] += 0.5
        
        # Reward for moving towards the nearest landmark
        distances_to_landmarks = [np.linalg.norm(obs[i]) for i in range(num_landmarks)]
        nearest_landmark_dist = min(distances_to_landmarks)
        reward[agent] -= nearest_landmark_dist  # Negative reward for distance
        
        # Extra reward for reaching a landmark
        if nearest_landmark_dist < landmark_reach_threshold:
            reward[agent] += 2
        
        # Penalty for potential collisions
        for i in range(num_landmarks, len(obs)):
            distance_to_other_agent = np.linalg.norm(obs[i])
            if distance_to_other_agent < collision_threshold:
                reward[agent] -= 1  # Penalty for being too close to another agent
    
    return reward