{
    "name": "/<scrub>/task_runtime/scrub/eval/heavy",
    "uuid": "a1922e06-d00c-414b-9d15-1c9f43978087",
    "model": "d=96_l=8_h=4",
    "creation_date": "2024_02_13-12_37_30",
    "eval_metrics": {
        "perplexity": 4.445184568564097,
        "downstream_perpexity": {
            "mmlu": 4.571799224615097,
            "hellaswag": 4.538831514120102,
            "jeopardy_all": 6.563713109493255,
            "triviaqa_sm_sub": 6.509380845228831,
            "gsm8k": 4.586485614379247,
            "agi_eval_sat_math": 3.3311555645682596,
            "aqua": 4.636651361231901,
            "svamp": 4.422645678520203,
            "bigbench_qa_wikidata": 7.99140956401825,
            "arc_easy": 5.713711965084076,
            "arc_challenge": 5.297485947608948,
            "bigbench_misconceptions": 5.625168970186416,
            "copa": 4.710454421043396,
            "siqa": 4.635654517014822,
            "commonsense_qa": 4.8775478521982825,
            "piqa": 5.132726788520813,
            "openbook_qa": 6.567792597770691,
            "bigbench_novel_concepts": 5.0565075278282166,
            "bigbench_strange_stories": 5.630778364751531,
            "bigbench_strategy_qa": 5.272606074810028,
            "lambada_openai": 5.7560855348904925,
            "winograd_wsc": 4.210086715090406,
            "winogrande": 4.5957058330376945,
            "bigbench_conlang_translation": 4.352883594792064,
            "bigbench_language_identification": 4.150483880440394,
            "bigbench_conceptual_combinations": 3.0311727593246016,
            "bigbench_elementary_math_qa": 5.783523901303609,
            "bigbench_dyck_languages": 6.773796739197579,
            "agi_eval_lsat_ar": 4.58548625137495,
            "bigbench_cs_algorithms": 8.343708678086598,
            "bigbench_logical_deduction": 1.761532693107923,
            "bigbench_operators": 6.7875271433875675,
            "bigbench_repeat_copy_logic": 4.4797023087739944,
            "simple_arithmetic_nospaces": 7.626575743450838,
            "simple_arithmetic_withspaces": 7.695875077187514,
            "math_qa": 4.44064616560936,
            "logi_qa": 5.1920620556434365,
            "pubmed_qa_labeled": 9.291023578964362,
            "squad": 4.778201947609584,
            "agi_eval_lsat_rc": 4.435249528778133,
            "agi_eval_lsat_lr": 4.6908975367452586,
            "coqa": 5.548120983441671,
            "bigbench_understanding_fables": 4.658836258782281,
            "boolq": 7.17024198770523,
            "agi_eval_sat_en": 4.663200793914425,
            "winogender_mc_female": 5.832546790440877,
            "winogender_mc_male": 5.02921085357666,
            "enterprise_pii_classification": 7.2666204611460365,
            "bbq": 2.497307059168816,
            "human_eval_return_complex": 3.4834291972513274,
            "human_eval_return_simple": 5.53688551928546,
            "human_eval-0.5": 3.3858553025780656,
            "human_eval-0.25": 3.4226572150137367,
            "human_eval-0.75": 3.475425909205181,
            "human_eval": 3.5236691018430197,
            "processed_human_eval_cpp": 3.06163986277136,
            "processed_human_eval_js": 2.6024208541323497
        },
        "icl": {
            "mmlu_zeroshot": 0.23127958779795127,
            "mmlu": 0.23303702199145368,
            "hellaswag_zeroshot": 0.25134435296058655,
            "jeopardy": 0.0,
            "triviaqa_sm_sub": 0.0006666666595265269,
            "gsm8k": 0.0007581501267850399,
            "agi_eval_sat_math": 0.004545454401522875,
            "aqua": 0.0,
            "svamp": 0.009999999776482582,
            "bigbench_qa_wikidata": 0.019438019022345543,
            "arc_easy": 0.27398988604545593,
            "arc_challenge": 0.21075084805488586,
            "bigbench_misconceptions": 0.5068492889404297,
            "copa": 0.41999998688697815,
            "siqa": 0.4943705201148987,
            "commonsense_qa": 0.2571662664413452,
            "piqa": 0.5554950833320618,
            "openbook_qa": 0.23800000548362732,
            "bigbench_novel_concepts": 0.21875,
            "bigbench_strange_stories": 0.48275861144065857,
            "bigbench_strategy_qa": 0.49235475063323975,
            "lambada_openai": 0.03997671231627464,
            "hellaswag": 0.24686317145824432,
            "winograd": 0.47985348105430603,
            "winogrande": 0.48460930585861206,
            "bigbench_conlang_translation": 0.0,
            "bigbench_language_identification": 0.25200000405311584,
            "bigbench_conceptual_combinations": 0.27184465527534485,
            "bigbench_elementary_math_qa": 0.26648321747779846,
            "bigbench_dyck_languages": 0.004000000189989805,
            "agi_eval_lsat_ar": 0.1608695685863495,
            "bigbench_cs_algorithms": 0.39393940567970276,
            "bigbench_logical_deduction": 0.2626666724681854,
            "bigbench_operators": 0.06666667014360428,
            "bigbench_repeat_copy_logic": 0.0,
            "simple_arithmetic_nospaces": 0.003000000026077032,
            "simple_arithmetic_withspaces": 0.004999999888241291,
            "math_qa": 0.24371437728405,
            "logi_qa": 0.21658986806869507,
            "pubmed_qa_labeled": 0.029999999329447746,
            "squad": 0.0007568590226583183,
            "agi_eval_lsat_rc": 0.2761194109916687,
            "agi_eval_lsat_lr": 0.2803921699523926,
            "coqa": 0.002505323849618435,
            "bigbench_understanding_fables": 0.30687829852104187,
            "boolq": 0.3782874643802643,
            "agi_eval_sat_en": 0.3009708821773529,
            "winogender_mc_female": 0.4833333194255829,
            "winogender_mc_male": 0.550000011920929,
            "enterprise_pii_classification": 0.5063328146934509,
            "bbq": 0.4381141391667453
        }
    },
    "aggregated_task_categories_centered": {
        "commonsense reasoning": 0.05273078506191572,
        "language understanding": -0.00021185027435421944,
        "reading comprehension": 0.0011230991440243105,
        "safety": -0.011109857396645967,
        "symbolic problem solving": 0.04231453972558181,
        "world knowledge": -0.004265034739700779
    },
    "aggregated_centered_results": 0.019319503133965167,
    "aggregated_results": 0.2324180843798035,
    "rw_small": 0.3364969740311305,
    "95%_CI_above": 0.22230855959457008,
    "99%_CI_above": 0.20126413498772308,
    "model_uuid": "e177b02d-5894-46a8-bd42-b6a149e8ad3a",
    "low_variance_datasets": 0.21529601885545577
}