{
    "name": "/<scrub>/task_runtime/scrub/eval/heavy",
    "uuid": "b02955e2-f4a3-429a-8458-efc4d4c3d8d7",
    "model": "d=1024_l=24_h=8",
    "creation_date": "2024_02_12-08_44_57",
    "eval_metrics": {
        "perplexity": 2.6024322470029193,
        "downstream_perpexity": {
            "mmlu": 1.810143357548444,
            "hellaswag": 2.44071551186942,
            "jeopardy_all": 2.2791872432787126,
            "triviaqa_sm_sub": 2.611622014959653,
            "gsm8k": 2.0154216653745767,
            "agi_eval_sat_math": 1.7790990032932974,
            "aqua": 2.507558600270018,
            "svamp": 2.4161226312319437,
            "bigbench_qa_wikidata": 3.5729048591647747,
            "arc_easy": 2.780241434321259,
            "arc_challenge": 2.8329139519164994,
            "bigbench_misconceptions": 5.761013033183198,
            "copa": 2.6937528359889984,
            "siqa": 1.691417954778232,
            "commonsense_qa": 1.87874609412569,
            "piqa": 2.702894105745218,
            "openbook_qa": 4.3595919589996335,
            "bigbench_novel_concepts": 2.843109592795372,
            "bigbench_strange_stories": 3.424634268914146,
            "bigbench_strategy_qa": 2.108652152313109,
            "lambada_openai": 1.7047209403367904,
            "winograd_wsc": 2.6303146217332216,
            "winogrande": 3.1712718294094,
            "bigbench_conlang_translation": 2.0763540885797362,
            "bigbench_language_identification": 5.625110585156811,
            "bigbench_conceptual_combinations": 1.1646940881766161,
            "bigbench_elementary_math_qa": 4.03409315145553,
            "bigbench_dyck_languages": 4.313431212902069,
            "agi_eval_lsat_ar": 1.8598920106887817,
            "bigbench_cs_algorithms": 5.644895832466356,
            "bigbench_logical_deduction": 1.1611345701217652,
            "bigbench_operators": 5.808703247706095,
            "bigbench_repeat_copy_logic": 1.6006507687270641,
            "simple_arithmetic_nospaces": 6.83725142288208,
            "simple_arithmetic_withspaces": 6.751215864658356,
            "math_qa": 6.085315931334897,
            "logi_qa": 1.9657368147245018,
            "pubmed_qa_labeled": 7.733960754871369,
            "squad": 2.0933655690785953,
            "agi_eval_lsat_rc": 2.0100549241500114,
            "agi_eval_lsat_lr": 1.97163272932464,
            "coqa": 2.9155612245741516,
            "bigbench_understanding_fables": 6.0998756216947365,
            "boolq": 3.127211407856839,
            "agi_eval_sat_en": 2.1656158132460512,
            "winogender_mc_female": 1.1555589735507965,
            "winogender_mc_male": 1.0615417202313742,
            "enterprise_pii_classification": 5.447938813446899,
            "bbq": 0.32420715843327663,
            "human_eval_return_complex": 2.9969045759186033,
            "human_eval_return_simple": 6.244848290005246,
            "human_eval-0.5": 2.907015226236204,
            "human_eval-0.25": 3.0764140704783,
            "human_eval-0.75": 2.885862931972597,
            "human_eval": 3.273264575295332,
            "processed_human_eval_cpp": 3.248482086643669,
            "processed_human_eval_js": 2.505025286500047
        },
        "icl": {
            "mmlu_zeroshot": 0.23807243793679958,
            "mmlu": 0.2409873808685102,
            "hellaswag_zeroshot": 0.5198167562484741,
            "jeopardy": 0.12735132798552512,
            "triviaqa_sm_sub": 0.0010000000474974513,
            "gsm8k": 0.0,
            "agi_eval_sat_math": 0.004545454401522875,
            "aqua": 0.0,
            "svamp": 0.0,
            "bigbench_qa_wikidata": 0.5826976895332336,
            "arc_easy": 0.558501660823822,
            "arc_challenge": 0.282423198223114,
            "bigbench_misconceptions": 0.4611872136592865,
            "copa": 0.6800000071525574,
            "siqa": 0.4918116629123688,
            "commonsense_qa": 0.21130220592021942,
            "piqa": 0.707290530204773,
            "openbook_qa": 0.335999995470047,
            "bigbench_novel_concepts": 0.375,
            "bigbench_strange_stories": 0.517241358757019,
            "bigbench_strategy_qa": 0.4884229004383087,
            "lambada_openai": 0.4905880093574524,
            "hellaswag": 0.5168293118476868,
            "winograd": 0.7179487347602844,
            "winogrande": 0.5556432604789734,
            "bigbench_conlang_translation": 0.018292682245373726,
            "bigbench_language_identification": 0.2529999911785126,
            "bigbench_conceptual_combinations": 0.24271844327449799,
            "bigbench_elementary_math_qa": 0.24174527823925018,
            "bigbench_dyck_languages": 0.14499999582767487,
            "agi_eval_lsat_ar": 0.16521738469600677,
            "bigbench_cs_algorithms": 0.45075756311416626,
            "bigbench_logical_deduction": 0.2446666657924652,
            "bigbench_operators": 0.190476194024086,
            "bigbench_repeat_copy_logic": 0.03125,
            "simple_arithmetic_nospaces": 0.0020000000949949026,
            "simple_arithmetic_withspaces": 0.006000000052154064,
            "math_qa": 0.26181694865226746,
            "logi_qa": 0.27496159076690674,
            "pubmed_qa_labeled": 0.3970000147819519,
            "squad": 0.31854304671287537,
            "agi_eval_lsat_rc": 0.26119402050971985,
            "agi_eval_lsat_lr": 0.26862746477127075,
            "coqa": 0.25216084718704224,
            "bigbench_understanding_fables": 0.26455026865005493,
            "boolq": 0.590519905090332,
            "agi_eval_sat_en": 0.291262149810791,
            "winogender_mc_female": 0.4333333373069763,
            "winogender_mc_male": 0.5666666626930237,
            "enterprise_pii_classification": 0.49985271692276,
            "bbq": 0.46040007201108063
        }
    },
    "aggregated_task_categories_centered": {
        "commonsense reasoning": 0.16490864753723145,
        "language understanding": 0.2207355023516963,
        "reading comprehension": 0.1578652821481228,
        "safety": -0.01987360553307968,
        "symbolic problem solving": 0.06841114912078612,
        "world knowledge": 0.13250871025264463
    },
    "aggregated_centered_results": 0.12460427358817715,
    "aggregated_results": 0.3183661635575238,
    "rw_small": 0.5311507980028788,
    "95%_CI_above": 0.37228911209512844,
    "99%_CI_above": 0.37308681425452234,
    "model_uuid": "3768017b-2341-4211-8766-1a4f66f01bb6",
    "low_variance_datasets": 0.39469625526531177
}