import datasets
import jsonlines
val_data = datasets.load_dataset("trivia_qa", "unfiltered.nocontext", split="validation")

def normalize_quotes(s: str) -> str:
    # maps U+2018, U+2019, U+2032  → ASCII apostrophe '
    return s.translate(str.maketrans({
        "\u2018": "'",   # ‘
        "\u2019": "'",   # ’
        "\u2032": "'",   # ′
        "`":  "'",
        "“": "\"",
        "”": "\""
    }))

data = [
    {
        "query": normalize_quotes(question),
        "truthful answer": [answer["aliases"]] ,
        
    }
    if isinstance(answer["aliases"], str) else {
        "query": normalize_quotes(question),
        "truthful answer": answer["aliases"],
        
    }
    for question, answer in zip(val_data["question"], val_data["answer"])
]

with jsonlines.open("data/datasets/triviaqa/test.jsonl", "w") as writer:
    writer.write_all(data)