{
    "name": "/<scrub>/task_runtime/scrub/eval/heavy",
    "uuid": "d4297e97-e27e-4c3b-93a8-d7e25ac21510",
    "model": "d=1024_l=24_h=8",
    "creation_date": "2024_02_12-09_18_48",
    "eval_metrics": {
        "perplexity": 2.519537311792374,
        "downstream_perpexity": {
            "mmlu": 3.3962035914262136,
            "hellaswag": 2.823373387257258,
            "jeopardy_all": 2.979023904601733,
            "triviaqa_sm_sub": 3.3628438889980314,
            "gsm8k": 2.4191139062245686,
            "agi_eval_sat_math": 1.8622365658933466,
            "aqua": 2.86578747496313,
            "svamp": 2.8569911988576253,
            "bigbench_qa_wikidata": 4.641228920221328,
            "arc_easy": 3.3274321963389712,
            "arc_challenge": 3.2658950626850127,
            "bigbench_misconceptions": 5.660932791287496,
            "copa": 3.0183710932731627,
            "siqa": 3.222509483496348,
            "commonsense_qa": 4.320584974686304,
            "piqa": 3.113336447874705,
            "openbook_qa": 4.7971806979179386,
            "bigbench_novel_concepts": 3.4086705297231674,
            "bigbench_strange_stories": 3.7136221315668916,
            "bigbench_strategy_qa": 2.106765705347061,
            "lambada_openai": 2.095911184946696,
            "winograd_wsc": 2.914974113087078,
            "winogrande": 3.4166778127352395,
            "bigbench_conlang_translation": 2.5096342301950223,
            "bigbench_language_identification": 3.764637458324432,
            "bigbench_conceptual_combinations": 0.7193801159997588,
            "bigbench_elementary_math_qa": 3.6354130188624065,
            "bigbench_dyck_languages": 4.685196633098506,
            "agi_eval_lsat_ar": 3.133602295751157,
            "bigbench_cs_algorithms": 6.73278101682663,
            "bigbench_logical_deduction": 0.9929639826218287,
            "bigbench_operators": 6.202595569973901,
            "bigbench_repeat_copy_logic": 1.9467885233461857,
            "simple_arithmetic_nospaces": 7.443890025635727,
            "simple_arithmetic_withspaces": 6.420637516414418,
            "math_qa": 4.3368168592453005,
            "logi_qa": 2.591804478758125,
            "pubmed_qa_labeled": 3.8805245534211648,
            "squad": 2.3979672133922576,
            "agi_eval_lsat_rc": 2.676459121170329,
            "agi_eval_lsat_lr": 2.5915258416942524,
            "coqa": 2.2825750688711803,
            "bigbench_understanding_fables": 3.0378847324028215,
            "boolq": 3.337830706437429,
            "agi_eval_sat_en": 3.4174932924289148,
            "winogender_mc_female": 1.5230704188346862,
            "winogender_mc_male": 1.6158035357793172,
            "enterprise_pii_classification": 2.212185819943746,
            "bbq": 0.47507547258088983,
            "human_eval_return_complex": 1.2815825047455436,
            "human_eval_return_simple": 3.6695662511361613,
            "human_eval-0.5": 1.1454477539149726,
            "human_eval-0.25": 1.1854427696728125,
            "human_eval-0.75": 1.1720695993522319,
            "human_eval": 1.2488846807945064,
            "processed_human_eval_cpp": 1.0956113856771719,
            "processed_human_eval_js": 1.0093606772219264
        },
        "icl": {
            "mmlu_zeroshot": 0.2357229679300074,
            "mmlu": 0.2401513776235413,
            "hellaswag_zeroshot": 0.3420633375644684,
            "jeopardy": 0.00945174815133214,
            "triviaqa_sm_sub": 0.00033333332976326346,
            "gsm8k": 0.0030326005071401596,
            "agi_eval_sat_math": 0.004545454401522875,
            "aqua": 0.004081632476300001,
            "svamp": 0.0,
            "bigbench_qa_wikidata": 0.4798484444618225,
            "arc_easy": 0.4524410665035248,
            "arc_challenge": 0.2397610992193222,
            "bigbench_misconceptions": 0.5022830963134766,
            "copa": 0.6299999952316284,
            "siqa": 0.5040941834449768,
            "commonsense_qa": 0.28091728687286377,
            "piqa": 0.6447225213050842,
            "openbook_qa": 0.2879999876022339,
            "bigbench_novel_concepts": 0.34375,
            "bigbench_strange_stories": 0.44252872467041016,
            "bigbench_strategy_qa": 0.4879860281944275,
            "lambada_openai": 0.38579466938972473,
            "hellaswag": 0.34534952044487,
            "winograd": 0.622710645198822,
            "winogrande": 0.5161799788475037,
            "bigbench_conlang_translation": 0.012195121496915817,
            "bigbench_language_identification": 0.2554999887943268,
            "bigbench_conceptual_combinations": 0.26213592290878296,
            "bigbench_elementary_math_qa": 0.23912474513053894,
            "bigbench_dyck_languages": 0.23100000619888306,
            "agi_eval_lsat_ar": 0.19130434095859528,
            "bigbench_cs_algorithms": 0.459090918302536,
            "bigbench_logical_deduction": 0.24799999594688416,
            "bigbench_operators": 0.18095238506793976,
            "bigbench_repeat_copy_logic": 0.03125,
            "simple_arithmetic_nospaces": 0.003000000026077032,
            "simple_arithmetic_withspaces": 0.003000000026077032,
            "math_qa": 0.24740193784236908,
            "logi_qa": 0.2457757294178009,
            "pubmed_qa_labeled": 0.453000009059906,
            "squad": 0.06584673374891281,
            "agi_eval_lsat_rc": 0.21641790866851807,
            "agi_eval_lsat_lr": 0.25882354378700256,
            "coqa": 0.15182262659072876,
            "bigbench_understanding_fables": 0.25925925374031067,
            "boolq": 0.5051987767219543,
            "agi_eval_sat_en": 0.19417475163936615,
            "winogender_mc_female": 0.4333333373069763,
            "winogender_mc_male": 0.4833333194255829,
            "enterprise_pii_classification": 0.5033873319625854,
            "bbq": 0.4879861988804557
        }
    },
    "aggregated_task_categories_centered": {
        "commonsense reasoning": 0.1256940762201945,
        "language understanding": 0.11864617482448618,
        "reading comprehension": 0.07324594166129827,
        "safety": -0.04597990621219983,
        "symbolic problem solving": 0.07233292549434635,
        "world knowledge": 0.08978771670063317
    },
    "aggregated_centered_results": 0.08156991240822141,
    "aggregated_results": 0.28682479575166264,
    "rw_small": 0.4476073756814003,
    "95%_CI_above": 0.32802533031046166,
    "99%_CI_above": 0.3205250411964953,
    "model_uuid": "d4eda944-bd23-43e2-a671-8783d5c04afd",
    "low_variance_datasets": 0.33223663987168534
}