import numpy as np

def planning_function(processed_global_state):
    available_move_actions, enemy_info, ally_info, own_info = processed_global_state
    llm_tasks = {}
    
    # Calculate team health and enemy health
    team_health = sum([info[0][0] for info in own_info.values()])
    enemy_health = sum([enemy[5][0] for agent_enemies in enemy_info.values() for enemy in agent_enemies.values()])
    
    # Identify the weakest enemy
    weakest_enemy = min([enemy for agent_enemies in enemy_info.values() for enemy in agent_enemies.values()], key=lambda x: x[5][0])
    weakest_enemy_pos = (weakest_enemy[6][0], weakest_enemy[7][0])
    
    for agent_id, agent_info in own_info.items():
        if not agent_info[4]:  # If agent is dead
            llm_tasks[agent_id] = 'none'
            continue
        
        agent_pos = (agent_info[1][0], agent_info[2][0])
        agent_health = agent_info[0][0]
        
        # Check if any enemy is in attack range
        enemies_in_range = [enemy for enemy in enemy_info[agent_id].values() if enemy[0][0] == 1]
        
        if enemies_in_range:
            # If there are enemies in range and agent health is good, attack
            if agent_health > 0.3:
                llm_tasks[agent_id] = 'attack'
            else:
                # If agent health is low, try to move away from enemies
                move_direction = get_safe_direction(agent_pos, enemies_in_range, available_move_actions[agent_id])
                llm_tasks[agent_id] = 'move' if move_direction else 'attack'
        else:
            # If no enemies in range, move towards the weakest enemy
            move_direction = get_move_towards(agent_pos, weakest_enemy_pos, available_move_actions[agent_id])
            llm_tasks[agent_id] = 'move' if move_direction else 'attack'
    
    return llm_tasks

def get_safe_direction(agent_pos, enemies, available_moves):
    enemy_positions = [(enemy[6][0], enemy[7][0]) for enemy in enemies]
    directions = {'North': (0, 1), 'South': (0, -1), 'East': (1, 0), 'West': (-1, 0)}
    
    max_distance = -1
    best_direction = None
    
    for move in available_moves:
        new_pos = (agent_pos[0] + directions[move][0], agent_pos[1] + directions[move][1])
        min_distance = min(np.linalg.norm(np.array(new_pos) - np.array(enemy_pos)) for enemy_pos in enemy_positions)
        
        if min_distance > max_distance:
            max_distance = min_distance
            best_direction = move
    
    return best_direction

def get_move_towards(agent_pos, target_pos, available_moves):
    directions = {'North': (0, 1), 'South': (0, -1), 'East': (1, 0), 'West': (-1, 0)}
    
    min_distance = float('inf')
    best_direction = None
    
    for move in available_moves:
        new_pos = (agent_pos[0] + directions[move][0], agent_pos[1] + directions[move][1])
        distance = np.linalg.norm(np.array(new_pos) - np.array(target_pos))
        
        if distance < min_distance:
            min_distance = distance
            best_direction = move
    
    return best_direction

def compute_reward(processed_state, llm_tasks, tasks):
    reward = {}
    available_move_actions, enemy_info, ally_info, own_info = processed_state
    
    for agent_id, task in tasks.items():
        if own_info[agent_id][4]:  # If agent is alive
            if task == llm_tasks[agent_id]:
                reward[agent_id] = 0.1  # Small positive reward for following LLM task
            elif task == 'stop':
                reward[agent_id] = -0.05  # Small penalty for stopping
            else:
                reward[agent_id] = 0  # No reward for not following LLM task
        else:
            reward[agent_id] = -0.1  # Small penalty for being dead
        
        # Additional rewards based on outcomes
        if task == 'attack':
            # Check if any enemy was damaged or killed
            for enemy in enemy_info[agent_id].values():
                if enemy[5][0] < 1:  # If enemy health is not full
                    reward[agent_id] += 0.2  # Reward for damaging enemy
                if enemy[5][0] == 0:  # If enemy is killed
                    reward[agent_id] += 1.0  # Larger reward for killing enemy
        
        elif task == 'move':
            # Reward for moving to a better position (e.g., closer to allies, further from enemies)
            agent_pos = (own_info[agent_id][1][0], own_info[agent_id][2][0])
            ally_distances = [np.linalg.norm(np.array(agent_pos) - np.array((ally[6][0], ally[7][0]))) for ally in ally_info[agent_id].values()]
            enemy_distances = [np.linalg.norm(np.array(agent_pos) - np.array((enemy[6][0], enemy[7][0]))) for enemy in enemy_info[agent_id].values()]
            
            if min(ally_distances) < 3 and min(enemy_distances) > 5:
                reward[agent_id] += 0.1  # Reward for good positioning
    
    return reward