# src/eval_utils.py

import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score, confusion_matrix

def test_acc_and_confusion_matrix(model, test_emb, test_labels, batch_size=4):
    """
    Evaluate final model accuracy and confusion matrix.

    Args:
        model: Trained classifier model.
        test_emb (torch.Tensor): Feature tensor for test samples.
        test_labels (torch.Tensor): Ground-truth labels.
        batch_size (int): Batch size for evaluation.

    Returns:
        (float, np.ndarray): Accuracy score and confusion matrix.
    """
    predictions = []
    test_dataset = TensorDataset(test_emb.clone().detach())
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    with torch.no_grad():
        for features in test_loader:
            features = features[0]
            outputs = model(features)
            preds = outputs.argmax(dim=1)
            predictions.extend(preds.tolist())

    acc = accuracy_score(test_labels.cpu(), predictions)
    cm = confusion_matrix(test_labels.cpu(), predictions)
    return acc, cm
