{
    "name": "heavy",
    "uuid": "bdad5aa7-c6b7-43db-8fee-4869a04621d0",
    "model": "allenai/OLMo-1.7-7B-hf",
    "creation_date": "2024_06_03-02_59_29",
    "eval_metrics": {
        "icl": {
            "mmlu_zeroshot": 0.4624800506913871,
            "hellaswag_zeroshot": 0.7600079774856567,
            "jeopardy": 0.43934719264507294,
            "bigbench_qa_wikidata": 0.7122188806533813,
            "arc_easy": 0.7647306323051453,
            "arc_challenge": 0.46587032079696655,
            "mmlu_fewshot": 0.5397106654811323,
            "bigbench_misconceptions": 0.543379008769989,
            "copa": 0.8299999833106995,
            "siqa": 0.8500511646270752,
            "commonsense_qa": 0.8845208883285522,
            "piqa": 0.8155604004859924,
            "openbook_qa": 0.4059999883174896,
            "bigbench_novel_concepts": 0.59375,
            "bigbench_strange_stories": 0.7011494040489197,
            "bigbench_strategy_qa": 0.6474443078041077,
            "lambada_openai": 0.7077430486679077,
            "hellaswag": 0.7819159626960754,
            "winograd": 0.8095238208770752,
            "winogrande": 0.6921862959861755,
            "bigbench_conlang_translation": 0.10975609719753265,
            "bigbench_language_identification": 0.34599998593330383,
            "bigbench_conceptual_combinations": 0.6796116232872009,
            "bigbench_elementary_math_qa": 0.29331761598587036,
            "bigbench_dyck_languages": 0.28299999237060547,
            "agi_eval_lsat_ar": 0.30000001192092896,
            "bigbench_cs_algorithms": 0.5507575869560242,
            "bigbench_logical_deduction": 0.26866665482521057,
            "bigbench_operators": 0.46666666865348816,
            "bigbench_repeat_copy_logic": 0.03125,
            "simple_arithmetic_nospaces": 0.14399999380111694,
            "simple_arithmetic_withspaces": 0.14100000262260437,
            "math_qa": 0.28897082805633545,
            "logi_qa": 0.3502304255962372,
            "pubmed_qa_labeled": 0.36800000071525574,
            "squad": 0.0,
            "agi_eval_lsat_rc": 0.5970149040222168,
            "agi_eval_lsat_lr": 0.41372549533843994,
            "coqa": 0.4143805503845215,
            "bigbench_understanding_fables": 0.6084656119346619,
            "boolq": 0.8525993824005127,
            "agi_eval_sat_en": 0.6941747665405273,
            "winogender_mc_female": 0.5166666507720947,
            "winogender_mc_male": 0.5166666507720947,
            "enterprise_pii_classification": 0.4662739336490631,
            "bbq": 0.5944152068008076,
            "gpqa_main": 0.25,
            "gpqa_diamond": 0.2070707082748413,
            "gsm8k_cot": 0.2653525471687317,
            "agi_eval_sat_math_cot": 0.08636363595724106,
            "aqua_cot": 0.05306122452020645,
            "svamp_cot": 0.41999998688697815,
            "triviaqa_sm_sub": 0.4546666741371155
        }
    },
    "aggregated_task_categories_centered": {
        "commonsense reasoning": 0.5399169923122349,
        "language understanding": 0.5079368543601108,
        "reading comprehension": 0.407590590646131,
        "safety": 0.04701122099703009,
        "symbolic problem solving": 0.19933668133466756,
        "world knowledge": 0.32794739350415114
    },
    "aggregated_centered_results": 0.3415306516527822,
    "aggregated_results": 0.47999463040491647,
    "rw_small": 0.7275657653808594,
    "rw_small_centered": 0.5517702395455879,
    "95%_CI_above": 0.5595716101105337,
    "95%_CI_above_centered": 0.4294000278999167,
    "99%_CI_above": 0.5566327617220257,
    "99%_CI_above_centered": 0.45063365485539053,
    "low_variance_datasets": 0.5597399805079807,
    "low_variance_datasets_centered": 0.4702070104995895,
    "_filename": "exp_data/evals/evaluation_olmo-1.7-7b_heavy.json",
    "missing tasks": "[]",
    "Core": 0.4702070104995895,
    "Extended": 0.3415306516527822
}