from pathlib import Path

import pytest

from hallucinations.data.umwp import load_umwp_dataset

UMWP_DATASET_PATH = Path("data/datasets/umwp/StandardDataset.jsonl")


@pytest.mark.skipif(not UMWP_DATASET_PATH.exists(), reason="UMWP dataset not available")
def test_load_umwp_dataset_smoke() -> None:
    dataset = load_umwp_dataset(UMWP_DATASET_PATH)

    assert len(dataset) > 0
    required_columns = {"id", "question_id", "question", "answer", "answerable", "category"}
    assert required_columns.issubset(set(dataset.column_names))
