{
    "config": {
        "model": {
            "name": "Llama-2-7b-chat-hf",
            "batch_size": 16,
            "prompt": "[INST] <<SYS>>\nYou are a helpful, respectful and honest assistant. Answer the following yes/no questions. You must give a simple yes or no answer. Always answer as helpfuly as possible.\n<</SYS>>\n\n[question] [/INST]\nAnswer:",
            "free_response_prompt": "[INST] <<SYS>>\nYou are a helpful, respectful and honest assistant. Always answer as helpfuly as possible.\n<</SYS>>\n\n[question] [/INST]\nAnswer:",
            "cot": false,
            "dataset_model_name": "vicuna-7b-v1.3"
        },
        "task": {
            "name": "aita",
            "possible_shift_variables": [
                "a",
                "b",
                "c",
                "d",
                "e"
            ]
        },
        "baseline": {
            "embedder": {
                "name": "GloVe"
            },
            "retriever": {
                "name": "nearest_neighbor"
            },
            "explanation": {
                "name": "none",
                "requires_model_queries": false
            },
            "name": "nearest_neighbor_three",
            "class_name": "nearest_neighbor",
            "knn": 3
        },
        "distributional_shift": {
            "name": "new_combinations",
            "train_variables": 5,
            "new_values": 3
        },
        "dataset_construction": {
            "templates_per_topic": 15,
            "train_questions_per_template": 500,
            "test_questions_per_template": 50
        }
    },
    "n_templates_evaluated": 15,
    "templates_evaluated": [
        "aita_118",
        "aita_83",
        "aita_302",
        "aita_362",
        "aita_231",
        "aita_502",
        "aita_360",
        "aita_66",
        "aita_117",
        "aita_401",
        "aita_39",
        "aita_370",
        "aita_238",
        "aita_219",
        "aita_512"
    ],
    "results": {
        "log_score": -0.9742578591996031,
        "normalized_log_score": -0.8372018532653113,
        "kl_divergence": 0.8372018532653113,
        "accuracy": 0.6426666666666667,
        "tv_distance": 0.36233505564663454
    }
}