{
    "name": "/<scrub>/task_runtime/scrub/eval/heavy",
    "uuid": "33cdfc37-c414-4bcc-86c5-95edd9aa59cb",
    "model": "d=1024_l=24_h=8",
    "creation_date": "2024_02_12-16_59_49",
    "eval_metrics": {
        "perplexity": 2.2214492579301197,
        "downstream_perpexity": {
            "mmlu": 2.180930678906595,
            "hellaswag": 2.5388794399262236,
            "jeopardy_all": 2.2432632982364846,
            "triviaqa_sm_sub": 2.5663202837308248,
            "gsm8k": 2.0382706768319316,
            "agi_eval_sat_math": 1.6243988459760492,
            "aqua": 2.501720252815558,
            "svamp": 2.5660921303431192,
            "bigbench_qa_wikidata": 3.722773173364456,
            "arc_easy": 2.8723119520378435,
            "arc_challenge": 2.9046491256346068,
            "bigbench_misconceptions": 5.438334428012099,
            "copa": 2.7664009726047514,
            "siqa": 1.4074245014722662,
            "commonsense_qa": 2.065941070944046,
            "piqa": 2.7907029975355644,
            "openbook_qa": 4.428257345199585,
            "bigbench_novel_concepts": 2.8937880396842957,
            "bigbench_strange_stories": 3.339511197873916,
            "bigbench_strategy_qa": 2.1170505141003675,
            "lambada_openai": 1.6164171579323727,
            "winograd_wsc": 2.688452051236079,
            "winogrande": 3.2383293369762898,
            "bigbench_conlang_translation": 2.088235920522271,
            "bigbench_language_identification": 2.0865713072362055,
            "bigbench_conceptual_combinations": 1.0297839734160785,
            "bigbench_elementary_math_qa": 3.387481490814711,
            "bigbench_dyck_languages": 4.609787565469742,
            "agi_eval_lsat_ar": 2.1143641316372417,
            "bigbench_cs_algorithms": 7.154466147495039,
            "bigbench_logical_deduction": 1.1676997136274974,
            "bigbench_operators": 5.440411440531412,
            "bigbench_repeat_copy_logic": 1.5394982509315014,
            "simple_arithmetic_nospaces": 7.281423626899719,
            "simple_arithmetic_withspaces": 5.8728711767196655,
            "math_qa": 2.8546455281315497,
            "logi_qa": 2.2529152913760115,
            "pubmed_qa_labeled": 2.9975270223617554,
            "squad": 1.9570244692921526,
            "agi_eval_lsat_rc": 1.9069448220196055,
            "agi_eval_lsat_lr": 2.0063796838124595,
            "coqa": 1.6217292335328193,
            "bigbench_understanding_fables": 2.031385177026981,
            "boolq": 3.9268021659384447,
            "agi_eval_sat_en": 1.9598997379969625,
            "winogender_mc_female": 1.2577883044878642,
            "winogender_mc_male": 1.2392911473910013,
            "enterprise_pii_classification": 3.478532555247266,
            "bbq": 0.27001876252387663,
            "human_eval_return_complex": 0.9861686642714372,
            "human_eval_return_simple": 3.716927038656699,
            "human_eval-0.5": 0.8763871956162337,
            "human_eval-0.25": 0.919279908625091,
            "human_eval-0.75": 0.8857097523968395,
            "human_eval": 0.9862722318346907,
            "processed_human_eval_cpp": 0.882863317957576,
            "processed_human_eval_js": 0.8362004124536747
        },
        "icl": {
            "mmlu_zeroshot": 0.23602519720269924,
            "mmlu": 0.2362248118509326,
            "hellaswag_zeroshot": 0.45389366149902344,
            "jeopardy": 0.1673962265253067,
            "triviaqa_sm_sub": 0.00033333332976326346,
            "gsm8k": 0.0,
            "agi_eval_sat_math": 0.004545454401522875,
            "aqua": 0.0,
            "svamp": 0.0,
            "bigbench_qa_wikidata": 0.603365957736969,
            "arc_easy": 0.5513467788696289,
            "arc_challenge": 0.2670648396015167,
            "bigbench_misconceptions": 0.4931506812572479,
            "copa": 0.6499999761581421,
            "siqa": 0.5,
            "commonsense_qa": 0.20147420465946198,
            "piqa": 0.6844395995140076,
            "openbook_qa": 0.33000001311302185,
            "bigbench_novel_concepts": 0.40625,
            "bigbench_strange_stories": 0.5,
            "bigbench_strategy_qa": 0.5294888615608215,
            "lambada_openai": 0.520667552947998,
            "hellaswag": 0.4616610109806061,
            "winograd": 0.721611738204956,
            "winogrande": 0.5564325451850891,
            "bigbench_conlang_translation": 0.018292682245373726,
            "bigbench_language_identification": 0.2558000087738037,
            "bigbench_conceptual_combinations": 0.28155338764190674,
            "bigbench_elementary_math_qa": 0.236897274851799,
            "bigbench_dyck_languages": 0.2720000147819519,
            "agi_eval_lsat_ar": 0.2130434811115265,
            "bigbench_cs_algorithms": 0.4522727131843567,
            "bigbench_logical_deduction": 0.23666666448116302,
            "bigbench_operators": 0.15714286267757416,
            "bigbench_repeat_copy_logic": 0.03125,
            "simple_arithmetic_nospaces": 0.0020000000949949026,
            "simple_arithmetic_withspaces": 0.0010000000474974513,
            "math_qa": 0.25879988074302673,
            "logi_qa": 0.24731183052062988,
            "pubmed_qa_labeled": 0.5120000243186951,
            "squad": 0.3866603672504425,
            "agi_eval_lsat_rc": 0.2723880708217621,
            "agi_eval_lsat_lr": 0.2764706015586853,
            "coqa": 0.27909308671951294,
            "bigbench_understanding_fables": 0.25925925374031067,
            "boolq": 0.5048929452896118,
            "agi_eval_sat_en": 0.276699036359787,
            "winogender_mc_female": 0.4333333373069763,
            "winogender_mc_male": 0.44999998807907104,
            "enterprise_pii_classification": 0.49985271692276,
            "bbq": 0.43812011859633704
        }
    },
    "aggregated_task_categories_centered": {
        "commonsense reasoning": 0.16601948191722235,
        "language understanding": 0.2123657783959061,
        "reading comprehension": 0.16257858152190843,
        "safety": -0.0893469195474278,
        "symbolic problem solving": 0.07374369291743885,
        "world knowledge": 0.14311821460077975
    },
    "aggregated_centered_results": 0.12198870915800791,
    "aggregated_results": 0.3201602508376131,
    "rw_small": 0.497271791100502,
    "95%_CI_above": 0.3793428838523952,
    "99%_CI_above": 0.3791024926900864,
    "model_uuid": "f17f8e15-c808-4630-862c-ffd777d49feb",
    "low_variance_datasets": 0.3964322538538413
}