{
    "name": "/<scrub>/task_runtime/scrub/eval/heavy",
    "uuid": "2e13a3dd-75b2-4d0a-a805-daaed0a8a0a3",
    "model": "d=96_l=8_h=4",
    "creation_date": "2024_01_29-15_21_15",
    "eval_metrics": {
        "perplexity": 7.255427984396617,
        "downstream_perpexity": {
            "mmlu": 5.574545300006866,
            "hellaswag": 5.696534955501557,
            "jeopardy_all": 8.359339988231659,
            "triviaqa_sm_sub": 8.028283369541168,
            "gsm8k": 6.990634079774221,
            "agi_eval_sat_math": 7.799927421049638,
            "aqua": 6.647586616204709,
            "svamp": 5.994692087173462,
            "bigbench_qa_wikidata": 10.546289785703022,
            "arc_easy": 7.032759328683217,
            "arc_challenge": 6.693987953662872,
            "bigbench_misconceptions": 6.186425287429601,
            "copa": 5.922754821777343,
            "siqa": 5.083433266480764,
            "commonsense_qa": 5.576065214474996,
            "piqa": 6.293343798319499,
            "openbook_qa": 7.742509762763977,
            "bigbench_novel_concepts": 5.801536649465561,
            "bigbench_strange_stories": 6.822923473928166,
            "bigbench_strategy_qa": 5.52195926507314,
            "lambada_openai": 9.410168333848317,
            "winograd_wsc": 5.0939436212127465,
            "winogrande": 5.469625627994537,
            "bigbench_conlang_translation": 7.4268099883707555,
            "bigbench_language_identification": 5.20652064482371,
            "bigbench_conceptual_combinations": 7.044321879599858,
            "bigbench_elementary_math_qa": 7.486663842201233,
            "bigbench_dyck_languages": 10.848302869235768,
            "agi_eval_lsat_ar": 5.457854161055192,
            "bigbench_cs_algorithms": 11.62894693215688,
            "bigbench_logical_deduction": 5.2896800835927325,
            "bigbench_operators": 7.012788513728551,
            "bigbench_repeat_copy_logic": 7.700827956199646,
            "simple_arithmetic_nospaces": 8.64743103500174,
            "simple_arithmetic_withspaces": 7.850777630545512,
            "math_qa": 4.703559219837189,
            "logi_qa": 5.66537721658815,
            "pubmed_qa_labeled": 8.795895652610715,
            "squad": 7.88753160238266,
            "agi_eval_lsat_rc": 5.4906727885132405,
            "agi_eval_lsat_lr": 5.615300784391516,
            "coqa": 7.523047800858816,
            "bigbench_understanding_fables": 4.912025005098373,
            "boolq": 8.482102382183076,
            "agi_eval_sat_en": 5.335501839813677,
            "winogender_mc_female": 9.063061634699503,
            "winogender_mc_male": 8.803047386805217,
            "enterprise_pii_classification": 8.606369670232137,
            "bbq": 7.59393976132075,
            "human_eval_return_complex": 8.911935986496331,
            "human_eval_return_simple": 11.251591708209064,
            "human_eval-0.5": 9.064939312818574,
            "human_eval-0.25": 8.973695632888049,
            "human_eval-0.75": 9.168518368790789,
            "human_eval": 8.965987193875197,
            "processed_human_eval_cpp": 9.246979014473672,
            "processed_human_eval_js": 9.335195500676225
        },
        "icl": {
            "mmlu_zeroshot": 0.23116857623844816,
            "mmlu": 0.23116857623844816,
            "hellaswag_zeroshot": 0.2443736344575882,
            "jeopardy": 0.0,
            "triviaqa_sm_sub": 0.00033333332976326346,
            "gsm8k": 0.0,
            "agi_eval_sat_math": 0.004545454401522875,
            "aqua": 0.0,
            "svamp": 0.0,
            "bigbench_qa_wikidata": 0.0,
            "arc_easy": 0.2554713785648346,
            "arc_challenge": 0.22525596618652344,
            "bigbench_misconceptions": 0.5205479264259338,
            "copa": 0.47999998927116394,
            "siqa": 0.48413512110710144,
            "commonsense_qa": 0.1957411915063858,
            "piqa": 0.5157780051231384,
            "openbook_qa": 0.24199999868869781,
            "bigbench_novel_concepts": 0.1875,
            "bigbench_strange_stories": 0.5632184147834778,
            "bigbench_strategy_qa": 0.5351681709289551,
            "lambada_openai": 0.0,
            "hellaswag": 0.24367655813694,
            "winograd": 0.5201465487480164,
            "winogrande": 0.5098658204078674,
            "bigbench_conlang_translation": 0.0,
            "bigbench_language_identification": 0.2502000033855438,
            "bigbench_conceptual_combinations": 0.26213592290878296,
            "bigbench_elementary_math_qa": 0.2683176100254059,
            "bigbench_dyck_languages": 0.0,
            "agi_eval_lsat_ar": 0.22608695924282074,
            "bigbench_cs_algorithms": 0.0,
            "bigbench_logical_deduction": 0.2513333261013031,
            "bigbench_operators": 0.03809523954987526,
            "bigbench_repeat_copy_logic": 0.0,
            "simple_arithmetic_nospaces": 0.0,
            "simple_arithmetic_withspaces": 0.0,
            "math_qa": 0.24840764701366425,
            "logi_qa": 0.20276497304439545,
            "pubmed_qa_labeled": 0.0,
            "squad": 0.0,
            "agi_eval_lsat_rc": 0.2761194109916687,
            "agi_eval_lsat_lr": 0.27843138575553894,
            "coqa": 0.0,
            "bigbench_understanding_fables": 0.25925925374031067,
            "boolq": 0.3782874643802643,
            "agi_eval_sat_en": 0.3009708821773529,
            "winogender_mc_female": 0.550000011920929,
            "winogender_mc_male": 0.4333333373069763,
            "enterprise_pii_classification": 0.5063328146934509,
            "bbq": 0.49599273096431384
        }
    },
    "aggregated_task_categories_centered": {
        "commonsense reasoning": 0.06378179167707762,
        "language understanding": 0.007567445437113443,
        "reading comprehension": -0.011297978460788725,
        "safety": -0.0071705525571649775,
        "symbolic problem solving": 0.015321715207149585,
        "world knowledge": -0.004310602189253738
    },
    "aggregated_centered_results": 0.01268762293317201,
    "aggregated_results": 0.22384634583818439,
    "rw_small": 0.3488757088780403,
    "95%_CI_above": 0.20882244556013382,
    "99%_CI_above": 0.1832444627583027,
    "model_uuid": "bfbb401b-cd66-44d2-8096-28e6acf9e70f",
    "low_variance_datasets": 0.19658994352953
}