import numpy as np
import torch

def top_k_accuracy(logits: np.ndarray, labels: np.ndarray, k: int = 5) -> float:
    if len(labels) == 0:
        raise ValueError("labels array is empty")
    top_k_preds = np.argsort(logits, axis=1)[:, -k:]
    correct = np.sum(np.any(top_k_preds == labels[:, None], axis=1))
    return correct / len(labels)

def combined_vision_location_encoding_evaluation(
    location_embeddings: np.ndarray,
    vision_embeddings: np.ndarray,
    labels: np.ndarray,
    k: int = 5
) -> tuple:
    location_embeddings = torch.sigmoid(torch.from_numpy(location_embeddings)).numpy()
    location_embeddings = location_embeddings / np.sum(location_embeddings, axis=1, keepdims=True)
    combined_logits = location_embeddings * vision_embeddings
    top_1_combined_accuracy = top_k_accuracy(combined_logits, labels, k=1)
    top_k_combined_accuracy = top_k_accuracy(combined_logits, labels, k=k)
    top_1_vision_accuracy = top_k_accuracy(vision_embeddings, labels, k=1)
    top_k_vision_accuracy = top_k_accuracy(vision_embeddings, labels, k=k)
    top_1_location_accuracy = top_k_accuracy(location_embeddings, labels, k=1)
    top_k_location_accuracy = top_k_accuracy(location_embeddings, labels, k=k)
    return (
        top_1_combined_accuracy,
        top_k_combined_accuracy,
        top_1_vision_accuracy,
        top_k_vision_accuracy,
        top_1_location_accuracy,
        top_k_location_accuracy,
    )