{
    "name": "/<scrub>/task_runtime/scrub/eval/heavy",
    "uuid": "0b648a69-a221-4e52-a198-839965b0b145",
    "model": "d=96_l=8_h=4",
    "creation_date": "2024_02_13-12_04_53",
    "eval_metrics": {
        "perplexity": 6.9417924602826435,
        "downstream_perpexity": {
            "mmlu": 5.576490302880605,
            "hellaswag": 5.734813495477041,
            "jeopardy_all": 8.400089927514394,
            "triviaqa_sm_sub": 8.012013109525045,
            "gsm8k": 6.656258829434713,
            "agi_eval_sat_math": 7.121620869636535,
            "aqua": 6.291206200268804,
            "svamp": 5.549886655807495,
            "bigbench_qa_wikidata": 10.324530498186748,
            "arc_easy": 7.213214973608653,
            "arc_challenge": 6.790886636575063,
            "bigbench_misconceptions": 5.719815844270192,
            "copa": 5.917082467079163,
            "siqa": 5.49925176302592,
            "commonsense_qa": 5.5232216397921245,
            "piqa": 6.430879068374634,
            "openbook_qa": 7.78425589466095,
            "bigbench_novel_concepts": 5.873168587684631,
            "bigbench_strange_stories": 6.626254868233341,
            "bigbench_strategy_qa": 5.608730912208557,
            "lambada_openai": 9.336032541592916,
            "winograd_wsc": 5.122327724219242,
            "winogrande": 5.556533793608348,
            "bigbench_conlang_translation": 7.372234905638346,
            "bigbench_language_identification": 5.259955648581187,
            "bigbench_conceptual_combinations": 7.128276477739649,
            "bigbench_elementary_math_qa": 7.377357228597005,
            "bigbench_dyck_languages": 6.7397543677762775,
            "agi_eval_lsat_ar": 5.471677166482676,
            "bigbench_cs_algorithms": 10.953286623954773,
            "bigbench_logical_deduction": 5.323640461762746,
            "bigbench_operators": 6.775233457202003,
            "bigbench_repeat_copy_logic": 7.666566222906113,
            "simple_arithmetic_nospaces": 8.01427629965694,
            "simple_arithmetic_withspaces": 7.295394394077173,
            "math_qa": 5.071088616053263,
            "logi_qa": 5.605059005514634,
            "pubmed_qa_labeled": 8.05048034471624,
            "squad": 7.982619786262513,
            "agi_eval_lsat_rc": 5.664972728757716,
            "agi_eval_lsat_lr": 5.741074691099279,
            "coqa": 7.556356557210287,
            "bigbench_understanding_fables": 5.080087876193738,
            "boolq": 9.072283029556274,
            "agi_eval_sat_en": 5.524673309140993,
            "winogender_mc_female": 9.252328077952066,
            "winogender_mc_male": 8.902828025817872,
            "enterprise_pii_classification": 8.599853388468425,
            "bbq": 7.6579295039176944,
            "human_eval_return_complex": 8.516907173817552,
            "human_eval_return_simple": 11.610000326826766,
            "human_eval-0.5": 8.595383225417718,
            "human_eval-0.25": 8.568997987886755,
            "human_eval-0.75": 8.661248899087674,
            "human_eval": 8.63390423030388,
            "processed_human_eval_cpp": 8.550420595251996,
            "processed_human_eval_js": 8.083104340041556
        },
        "icl": {
            "mmlu_zeroshot": 0.23116857623844816,
            "mmlu": 0.23116857623844816,
            "hellaswag_zeroshot": 0.24586735665798187,
            "jeopardy": 0.0,
            "triviaqa_sm_sub": 0.00033333332976326346,
            "gsm8k": 0.0,
            "agi_eval_sat_math": 0.013636363670229912,
            "aqua": 0.0,
            "svamp": 0.0,
            "bigbench_qa_wikidata": 0.0,
            "arc_easy": 0.2638888955116272,
            "arc_challenge": 0.23293516039848328,
            "bigbench_misconceptions": 0.4931506812572479,
            "copa": 0.47999998927116394,
            "siqa": 0.48413512110710144,
            "commonsense_qa": 0.1957411915063858,
            "piqa": 0.5179542899131775,
            "openbook_qa": 0.25200000405311584,
            "bigbench_novel_concepts": 0.1875,
            "bigbench_strange_stories": 0.5287356376647949,
            "bigbench_strategy_qa": 0.5321100950241089,
            "lambada_openai": 0.0,
            "hellaswag": 0.24337781965732574,
            "winograd": 0.4908424913883209,
            "winogrande": 0.4964483082294464,
            "bigbench_conlang_translation": 0.0,
            "bigbench_language_identification": 0.2502000033855438,
            "bigbench_conceptual_combinations": 0.28155338764190674,
            "bigbench_elementary_math_qa": 0.27368971705436707,
            "bigbench_dyck_languages": 0.0,
            "agi_eval_lsat_ar": 0.22608695924282074,
            "bigbench_cs_algorithms": 0.0,
            "bigbench_logical_deduction": 0.257999986410141,
            "bigbench_operators": 0.0714285746216774,
            "bigbench_repeat_copy_logic": 0.0,
            "simple_arithmetic_nospaces": 0.0,
            "simple_arithmetic_withspaces": 0.0,
            "math_qa": 0.24840764701366425,
            "logi_qa": 0.20276497304439545,
            "pubmed_qa_labeled": 0.0,
            "squad": 0.0,
            "agi_eval_lsat_rc": 0.2761194109916687,
            "agi_eval_lsat_lr": 0.27843138575553894,
            "coqa": 0.0,
            "bigbench_understanding_fables": 0.25925925374031067,
            "boolq": 0.39418959617614746,
            "agi_eval_sat_en": 0.3009708821773529,
            "winogender_mc_female": 0.5166666507720947,
            "winogender_mc_male": 0.46666666865348816,
            "enterprise_pii_classification": 0.45655375719070435,
            "bbq": 0.48712263053113763
        }
    },
    "aggregated_task_categories_centered": {
        "commonsense reasoning": 0.05660731717944145,
        "language understanding": 0.0003224611282348633,
        "reading comprehension": -0.00732244551181793,
        "safety": -0.03649514642628757,
        "symbolic problem solving": 0.01922011081543234,
        "world knowledge": -0.00847712828829981
    },
    "aggregated_centered_results": 0.00924240612915301,
    "aggregated_results": 0.22292363481412023,
    "rw_small": 0.3522215510408084,
    "95%_CI_above": 0.20861218266866424,
    "99%_CI_above": 0.18269257575273515,
    "model_uuid": "11303891-2d3e-4442-b970-293c5c16c4ba",
    "low_variance_datasets": 0.19822548363696446
}