import torch
from sklearn.metrics import roc_auc_score, f1_score, accuracy_score, precision_score, recall_score


def evaluate_model(model, dataloader, device):
    model.eval()
    test_outputs = []
    test_labels = []
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            test_outputs.append(outputs)
            test_labels.append(labels)
    test_outputs = torch.cat(test_outputs).squeeze().cpu()
    test_labels = torch.cat(test_labels).squeeze().cpu()
    return test_outputs, test_labels


def compute_metrics(test_outputs, test_labels):
    test_auc = roc_auc_score(test_labels, test_outputs)
    test_f1 = f1_score(test_labels, test_outputs > 0.5)

    test_preds = test_outputs > 0.5
    accuracy = accuracy_score(test_labels, test_preds)
    precision = precision_score(test_labels, test_preds)
    recall = recall_score(test_labels, test_preds)

    return {
        'auc': test_auc,
        'f1': test_f1,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall
    }
