{
    "name": "/<scrub>/task_runtime/scrub/eval/heavy",
    "uuid": "a0b9927e-9951-4477-a470-a0cf2bd64e7d",
    "model": "d=1024_l=24_h=8",
    "creation_date": "2024_02_12-14_52_08",
    "eval_metrics": {
        "perplexity": 3.31946835120519,
        "downstream_perpexity": {
            "mmlu": 3.950016123056412,
            "hellaswag": 2.9090161363283795,
            "jeopardy_all": 3.9216318557659786,
            "triviaqa_sm_sub": 4.021080728371938,
            "gsm8k": 2.841562648614248,
            "agi_eval_sat_math": 2.371288286555897,
            "aqua": 3.167568228196125,
            "svamp": 3.6078965894381203,
            "bigbench_qa_wikidata": 5.441729382673899,
            "arc_easy": 3.75175927678744,
            "arc_challenge": 3.620997275908788,
            "bigbench_misconceptions": 5.9879424430463954,
            "copa": 3.214557902812958,
            "siqa": 4.411346584558487,
            "commonsense_qa": 4.840249544382095,
            "piqa": 3.2217750787734984,
            "openbook_qa": 4.94931012582779,
            "bigbench_novel_concepts": 3.701393112540245,
            "bigbench_strange_stories": 3.6220605811853517,
            "bigbench_strategy_qa": 2.1397339204947152,
            "lambada_openai": 2.6140548929572107,
            "winograd_wsc": 3.050773839810829,
            "winogrande": 3.487412925561269,
            "bigbench_conlang_translation": 2.6566275903364507,
            "bigbench_language_identification": 5.096365849177043,
            "bigbench_conceptual_combinations": 1.1264733145537886,
            "bigbench_elementary_math_qa": 3.916078116496404,
            "bigbench_dyck_languages": 5.092579792026712,
            "agi_eval_lsat_ar": 4.7828351083009135,
            "bigbench_cs_algorithms": 6.771997825304667,
            "bigbench_logical_deduction": 1.3066663215557734,
            "bigbench_operators": 5.883864003136044,
            "bigbench_repeat_copy_logic": 2.4144296422600746,
            "simple_arithmetic_nospaces": 7.137166516620572,
            "simple_arithmetic_withspaces": 6.881091464467409,
            "math_qa": 5.717128566900889,
            "logi_qa": 3.3437402398538665,
            "pubmed_qa_labeled": 5.515919584687016,
            "squad": 2.389951393008232,
            "agi_eval_lsat_rc": 4.565121607993966,
            "agi_eval_lsat_lr": 4.716555651496439,
            "coqa": 3.2181499689817428,
            "bigbench_understanding_fables": 5.816763184057972,
            "boolq": 4.2417008697986605,
            "agi_eval_sat_en": 4.761445960952241,
            "winogender_mc_female": 2.429897840817769,
            "winogender_mc_male": 2.53375869790713,
            "enterprise_pii_classification": 5.180903311570486,
            "bbq": 0.5819550922761361,
            "human_eval_return_complex": 4.062562751019095,
            "human_eval_return_simple": 7.324291048823176,
            "human_eval-0.5": 3.981562038747276,
            "human_eval-0.25": 4.104558655401555,
            "human_eval-0.75": 4.027657966788222,
            "human_eval": 4.270089467851127,
            "processed_human_eval_cpp": 3.5628888769919826,
            "processed_human_eval_js": 3.201204704075325
        },
        "icl": {
            "mmlu_zeroshot": 0.23157964071683718,
            "mmlu": 0.23919217183924557,
            "hellaswag_zeroshot": 0.3152758479118347,
            "jeopardy": 0.0023969837930053473,
            "triviaqa_sm_sub": 0.00033333332976326346,
            "gsm8k": 0.0,
            "agi_eval_sat_math": 0.004545454401522875,
            "aqua": 0.0,
            "svamp": 0.01666666753590107,
            "bigbench_qa_wikidata": 0.288814514875412,
            "arc_easy": 0.39099326729774475,
            "arc_challenge": 0.21928328275680542,
            "bigbench_misconceptions": 0.49771690368652344,
            "copa": 0.5600000023841858,
            "siqa": 0.48413512110710144,
            "commonsense_qa": 0.22358722984790802,
            "piqa": 0.6447225213050842,
            "openbook_qa": 0.27399998903274536,
            "bigbench_novel_concepts": 0.4375,
            "bigbench_strange_stories": 0.4655172526836395,
            "bigbench_strategy_qa": 0.5146352052688599,
            "lambada_openai": 0.2998253405094147,
            "hellaswag": 0.31428003311157227,
            "winograd": 0.593406617641449,
            "winogrande": 0.49723756313323975,
            "bigbench_conlang_translation": 0.012195121496915817,
            "bigbench_language_identification": 0.25600001215934753,
            "bigbench_conceptual_combinations": 0.21359223127365112,
            "bigbench_elementary_math_qa": 0.24837526679039001,
            "bigbench_dyck_languages": 0.13600000739097595,
            "agi_eval_lsat_ar": 0.239130437374115,
            "bigbench_cs_algorithms": 0.4022727310657501,
            "bigbench_logical_deduction": 0.23466666042804718,
            "bigbench_operators": 0.1190476194024086,
            "bigbench_repeat_copy_logic": 0.0,
            "simple_arithmetic_nospaces": 0.004999999888241291,
            "simple_arithmetic_withspaces": 0.003000000026077032,
            "math_qa": 0.2541066110134125,
            "logi_qa": 0.22887864708900452,
            "pubmed_qa_labeled": 0.5540000200271606,
            "squad": 0.019583728164434433,
            "agi_eval_lsat_rc": 0.24253731966018677,
            "agi_eval_lsat_lr": 0.29019609093666077,
            "coqa": 0.1047225371003151,
            "bigbench_understanding_fables": 0.28042328357696533,
            "boolq": 0.6000000238418579,
            "agi_eval_sat_en": 0.23786407709121704,
            "winogender_mc_female": 0.5,
            "winogender_mc_male": 0.5166666507720947,
            "enterprise_pii_classification": 0.4927835166454315,
            "bbq": 0.4624849557876587
        }
    },
    "aggregated_task_categories_centered": {
        "commonsense reasoning": 0.12810039520263672,
        "language understanding": 0.07818829035386443,
        "reading comprehension": 0.11829175349945824,
        "safety": -0.014032438397407532,
        "symbolic problem solving": 0.05869105538974206,
        "world knowledge": 0.04938039035650892
    },
    "aggregated_centered_results": 0.07482201397169112,
    "aggregated_results": 0.2778269116308258,
    "rw_small": 0.4304649978876114,
    "95%_CI_above": 0.3055040507189309,
    "99%_CI_above": 0.29232121928595006,
    "model_uuid": "858c5303-b800-4fbb-a48e-c19c4c997a21",
    "low_variance_datasets": 0.2954809222772548
}