{
    "name": "/<scrub>/task_runtime/scrub/eval/heavy",
    "uuid": "0fbb2bc5-b194-4dc0-a509-afed0665c086",
    "model": "d=512_l=8_h=4",
    "creation_date": "2024_01_29-19_30_42",
    "eval_metrics": {
        "perplexity": 4.304766794045766,
        "downstream_perpexity": {
            "mmlu": 5.278683722019196,
            "hellaswag": 3.036212941010793,
            "jeopardy_all": 4.185969823598862,
            "triviaqa_sm_sub": 4.494954228401184,
            "gsm8k": 3.2920319974422454,
            "agi_eval_sat_math": 3.8111725081096997,
            "aqua": 3.735347589181394,
            "svamp": 4.340516239802042,
            "bigbench_qa_wikidata": 5.800940648714701,
            "arc_easy": 3.9706326206525167,
            "arc_challenge": 3.8805115640163423,
            "bigbench_misconceptions": 6.102336900963631,
            "copa": 3.362064366340637,
            "siqa": 5.079966564973195,
            "commonsense_qa": 5.357826364040375,
            "piqa": 3.320584229628245,
            "openbook_qa": 5.192072226047516,
            "bigbench_novel_concepts": 3.8312977999448776,
            "bigbench_strange_stories": 4.363424527234044,
            "bigbench_strategy_qa": 2.7918802360693613,
            "lambada_openai": 3.0646334146459897,
            "winograd_wsc": 3.2385299537644716,
            "winogrande": 3.653505845864614,
            "bigbench_conlang_translation": 2.80613911151886,
            "bigbench_language_identification": 4.665547790129979,
            "bigbench_conceptual_combinations": 0.9583402490152896,
            "bigbench_elementary_math_qa": 4.5254721740881605,
            "bigbench_dyck_languages": 6.4749652993779225,
            "agi_eval_lsat_ar": 5.313559928147689,
            "bigbench_cs_algorithms": 6.228576672077179,
            "bigbench_logical_deduction": 1.048056727151076,
            "bigbench_operators": 6.467633694694156,
            "bigbench_repeat_copy_logic": 2.5820881091058254,
            "simple_arithmetic_nospaces": 7.508671416955836,
            "simple_arithmetic_withspaces": 6.7456241831058215,
            "math_qa": 4.9608998656272885,
            "logi_qa": 5.0850506477092265,
            "pubmed_qa_labeled": 7.271783547742026,
            "squad": 3.1477536459763846,
            "agi_eval_lsat_rc": 5.5318992467068915,
            "agi_eval_lsat_lr": 5.404990955427581,
            "coqa": 3.8393341819445292,
            "bigbench_understanding_fables": 5.356034818780485,
            "boolq": 5.520345711708069,
            "agi_eval_sat_en": 5.520447154646939,
            "winogender_mc_female": 3.7055022517840066,
            "winogender_mc_male": 3.2913995027542113,
            "enterprise_pii_classification": 2.2342695484558743,
            "bbq": 0.5740806398292383,
            "human_eval_return_complex": 4.423253517451249,
            "human_eval_return_simple": 7.02865507796004,
            "human_eval-0.5": 4.283961685692391,
            "human_eval-0.25": 4.3971329971057616,
            "human_eval-0.75": 4.447489280526231,
            "human_eval": 4.501776359430173,
            "processed_human_eval_cpp": 4.729903425489153,
            "processed_human_eval_js": 4.696410042483632
        },
        "icl": {
            "mmlu_zeroshot": 0.2312963293832645,
            "mmlu": 0.23798752445400806,
            "hellaswag_zeroshot": 0.29864567518234253,
            "jeopardy": 0.0,
            "triviaqa_sm_sub": 0.0010000000474974513,
            "gsm8k": 0.0015163002535700798,
            "agi_eval_sat_math": 0.013636363670229912,
            "aqua": 0.0,
            "svamp": 0.019999999552965164,
            "bigbench_qa_wikidata": 0.2565818727016449,
            "arc_easy": 0.3598484992980957,
            "arc_challenge": 0.2226962447166443,
            "bigbench_misconceptions": 0.4794520437717438,
            "copa": 0.6100000143051147,
            "siqa": 0.48464688658714294,
            "commonsense_qa": 0.2383292317390442,
            "piqa": 0.6273123025894165,
            "openbook_qa": 0.2800000011920929,
            "bigbench_novel_concepts": 0.375,
            "bigbench_strange_stories": 0.4712643623352051,
            "bigbench_strategy_qa": 0.4901703894138336,
            "lambada_openai": 0.2320978045463562,
            "hellaswag": 0.29904401302337646,
            "winograd": 0.5311355590820312,
            "winogrande": 0.5232833623886108,
            "bigbench_conlang_translation": 0.012195121496915817,
            "bigbench_language_identification": 0.2558000087738037,
            "bigbench_conceptual_combinations": 0.223300963640213,
            "bigbench_elementary_math_qa": 0.25285640358924866,
            "bigbench_dyck_languages": 0.1120000034570694,
            "agi_eval_lsat_ar": 0.208695650100708,
            "bigbench_cs_algorithms": 0.3909091055393219,
            "bigbench_logical_deduction": 0.24799999594688416,
            "bigbench_operators": 0.0714285746216774,
            "bigbench_repeat_copy_logic": 0.0,
            "simple_arithmetic_nospaces": 0.0,
            "simple_arithmetic_withspaces": 0.0020000000949949026,
            "math_qa": 0.2497485727071762,
            "logi_qa": 0.21966205537319183,
            "pubmed_qa_labeled": 0.5109999775886536,
            "squad": 0.002175969770178199,
            "agi_eval_lsat_rc": 0.2723880708217621,
            "agi_eval_lsat_lr": 0.26274511218070984,
            "coqa": 0.0227984469383955,
            "bigbench_understanding_fables": 0.29100528359413147,
            "boolq": 0.5287461876869202,
            "agi_eval_sat_en": 0.3009708821773529,
            "winogender_mc_female": 0.4833333194255829,
            "winogender_mc_male": 0.46666666865348816,
            "enterprise_pii_classification": 0.5039764642715454,
            "bbq": 0.40845710310069
        }
    },
    "aggregated_task_categories_centered": {
        "commonsense reasoning": 0.11848669250806172,
        "language understanding": 0.056939789559692144,
        "reading comprehension": 0.0953682376712095,
        "safety": -0.06878322227434677,
        "symbolic problem solving": 0.052036135607502525,
        "world knowledge": 0.03569884467858083
    },
    "aggregated_centered_results": 0.057987405105009895,
    "aggregated_results": 0.2663883278781342,
    "rw_small": 0.4238699972629547,
    "95%_CI_above": 0.28562705906912345,
    "99%_CI_above": 0.274088850421831,
    "model_uuid": "52432727-eedb-4b3e-b9ef-cf73660109a9",
    "low_variance_datasets": 0.2759785694387657
}