{
    "name": "/<scrub>/task_runtime/scrub/eval/heavy",
    "uuid": "2a6f7dcb-3f50-4b41-b353-2c82f5970e4a",
    "model": "d=1024_l=24_h=8",
    "creation_date": "2024_02_12-10_12_49",
    "eval_metrics": {
        "perplexity": 2.728167486190796,
        "downstream_perpexity": {
            "mmlu": 1.739460566583445,
            "hellaswag": 2.4700227510024533,
            "jeopardy_all": 2.4784568465426107,
            "triviaqa_sm_sub": 2.6883117583592733,
            "gsm8k": 2.01113117509398,
            "agi_eval_sat_math": 1.8143093125386671,
            "aqua": 2.5404684227340075,
            "svamp": 2.7247683453559874,
            "bigbench_qa_wikidata": 3.947240899417656,
            "arc_easy": 2.8034884847374477,
            "arc_challenge": 2.8841681490579156,
            "bigbench_misconceptions": 5.488823106844131,
            "copa": 2.7221675157546996,
            "siqa": 1.436187315621664,
            "commonsense_qa": 1.7208475047305996,
            "piqa": 2.752145148983977,
            "openbook_qa": 4.380518337726593,
            "bigbench_novel_concepts": 2.992038995027542,
            "bigbench_strange_stories": 3.4482079716934555,
            "bigbench_strategy_qa": 1.9988185774556098,
            "lambada_openai": 1.7634030223852821,
            "winograd_wsc": 2.668193824125297,
            "winogrande": 3.184520798211222,
            "bigbench_conlang_translation": 2.172178144135126,
            "bigbench_language_identification": 5.662341540993441,
            "bigbench_conceptual_combinations": 1.2277732532001235,
            "bigbench_elementary_math_qa": 4.388098141107419,
            "bigbench_dyck_languages": 4.548924263954163,
            "agi_eval_lsat_ar": 1.8930417454761008,
            "bigbench_cs_algorithms": 6.800135007771579,
            "bigbench_logical_deduction": 1.3700507748126984,
            "bigbench_operators": 5.478259456725348,
            "bigbench_repeat_copy_logic": 1.5403209887444973,
            "simple_arithmetic_nospaces": 7.0261404728889465,
            "simple_arithmetic_withspaces": 6.858932691574097,
            "math_qa": 5.653635834992191,
            "logi_qa": 1.9837888290622083,
            "pubmed_qa_labeled": 6.839900532722473,
            "squad": 2.061460359173218,
            "agi_eval_lsat_rc": 2.111350136016732,
            "agi_eval_lsat_lr": 1.9733516630004435,
            "coqa": 2.9958075586751667,
            "bigbench_understanding_fables": 5.892956857328062,
            "boolq": 3.5504740989171766,
            "agi_eval_sat_en": 2.2496584075168498,
            "winogender_mc_female": 1.2074019561211269,
            "winogender_mc_male": 1.076327101389567,
            "enterprise_pii_classification": 3.9706249930194972,
            "bbq": 0.36859428036173586,
            "human_eval_return_complex": 3.900907013359971,
            "human_eval_return_simple": 7.8771902806050065,
            "human_eval-0.5": 3.886809917484842,
            "human_eval-0.25": 4.007267334112307,
            "human_eval-0.75": 3.8882113273550822,
            "human_eval": 4.189544273585808,
            "processed_human_eval_cpp": 3.7817100412356925,
            "processed_human_eval_js": 2.8096039513262308
        },
        "icl": {
            "mmlu_zeroshot": 0.23578092926426938,
            "mmlu": 0.23872634533204531,
            "hellaswag_zeroshot": 0.49960166215896606,
            "jeopardy": 0.12090936079621314,
            "triviaqa_sm_sub": 0.0006666666595265269,
            "gsm8k": 0.0,
            "agi_eval_sat_math": 0.00909090880304575,
            "aqua": 0.004081632476300001,
            "svamp": 0.0,
            "bigbench_qa_wikidata": 0.566310703754425,
            "arc_easy": 0.5420875549316406,
            "arc_challenge": 0.2696245610713959,
            "bigbench_misconceptions": 0.5022830963134766,
            "copa": 0.6700000166893005,
            "siqa": 0.48106446862220764,
            "commonsense_qa": 0.30057328939437866,
            "piqa": 0.7078346014022827,
            "openbook_qa": 0.3059999942779541,
            "bigbench_novel_concepts": 0.34375,
            "bigbench_strange_stories": 0.5114942789077759,
            "bigbench_strategy_qa": 0.5163826942443848,
            "lambada_openai": 0.47545120120048523,
            "hellaswag": 0.4995020925998688,
            "winograd": 0.6959707140922546,
            "winogrande": 0.5248618721961975,
            "bigbench_conlang_translation": 0.012195121496915817,
            "bigbench_language_identification": 0.2540999948978424,
            "bigbench_conceptual_combinations": 0.33980581164360046,
            "bigbench_elementary_math_qa": 0.25408804416656494,
            "bigbench_dyck_languages": 0.2160000056028366,
            "agi_eval_lsat_ar": 0.16521738469600677,
            "bigbench_cs_algorithms": 0.44999998807907104,
            "bigbench_logical_deduction": 0.25733333826065063,
            "bigbench_operators": 0.18095238506793976,
            "bigbench_repeat_copy_logic": 0.03125,
            "simple_arithmetic_nospaces": 0.0020000000949949026,
            "simple_arithmetic_withspaces": 0.0020000000949949026,
            "math_qa": 0.2557827830314636,
            "logi_qa": 0.21812595427036285,
            "pubmed_qa_labeled": 0.24300000071525574,
            "squad": 0.30245980620384216,
            "agi_eval_lsat_rc": 0.2798507511615753,
            "agi_eval_lsat_lr": 0.2705882489681244,
            "coqa": 0.22585494816303253,
            "bigbench_understanding_fables": 0.24867725372314453,
            "boolq": 0.5706421732902527,
            "agi_eval_sat_en": 0.24271844327449799,
            "winogender_mc_female": 0.5166666507720947,
            "winogender_mc_male": 0.4000000059604645,
            "enterprise_pii_classification": 0.50044184923172,
            "bbq": 0.479542764750394
        }
    },
    "aggregated_task_categories_centered": {
        "commonsense reasoning": 0.1713312268257141,
        "language understanding": 0.21499886379266778,
        "reading comprehension": 0.12104733722905318,
        "safety": -0.05167436464266342,
        "symbolic problem solving": 0.06940216430359417,
        "world knowledge": 0.13425984724619827
    },
    "aggregated_centered_results": 0.11700849424965344,
    "aggregated_results": 0.3125753402510988,
    "rw_small": 0.5128029733896255,
    "95%_CI_above": 0.36540511301746875,
    "99%_CI_above": 0.36258184662461285,
    "model_uuid": "f2134fff-2fae-4804-a939-7bb77acaf54b",
    "low_variance_datasets": 0.38978201411664487
}