{
    "name": "/<scrub>/task_runtime/scrub/eval/heavy",
    "uuid": "1f62183f-7d91-49c7-aee0-f770ce63a037",
    "model": "d=512_l=8_h=4",
    "creation_date": "2024_02_12-17_30_28",
    "eval_metrics": {
        "perplexity": 4.394369860490163,
        "downstream_perpexity": {
            "mmlu": 4.657142970959345,
            "hellaswag": 3.3478553632895154,
            "jeopardy_all": 5.0473936577637994,
            "triviaqa_sm_sub": 5.332881923516592,
            "gsm8k": 3.6540957351525623,
            "agi_eval_sat_math": 4.039288472045552,
            "aqua": 4.062540632364701,
            "svamp": 4.733891854286194,
            "bigbench_qa_wikidata": 6.855090037981669,
            "arc_easy": 4.469436065355937,
            "arc_challenge": 4.276058987776438,
            "bigbench_misconceptions": 5.696774983514934,
            "copa": 3.7182566261291505,
            "siqa": 5.161028548081716,
            "commonsense_qa": 5.171488813559214,
            "piqa": 3.6526123344898225,
            "openbook_qa": 5.526868267536163,
            "bigbench_novel_concepts": 4.284689337015152,
            "bigbench_strange_stories": 4.904594666656407,
            "bigbench_strategy_qa": 2.979192731777827,
            "lambada_openai": 3.8791241317987444,
            "winograd_wsc": 3.4413996934890747,
            "winogrande": 3.814916177590688,
            "bigbench_conlang_translation": 3.242558585434425,
            "bigbench_language_identification": 4.249521110455195,
            "bigbench_conceptual_combinations": 1.242166226349988,
            "bigbench_elementary_math_qa": 5.714663245280584,
            "bigbench_dyck_languages": 7.046217568281318,
            "agi_eval_lsat_ar": 4.919560055110765,
            "bigbench_cs_algorithms": 5.8395945231119795,
            "bigbench_logical_deduction": 0.9730624581376711,
            "bigbench_operators": 6.130074455624535,
            "bigbench_repeat_copy_logic": 2.821780152618885,
            "simple_arithmetic_nospaces": 7.1947176020686365,
            "simple_arithmetic_withspaces": 6.815603660936115,
            "math_qa": 4.509833127260208,
            "logi_qa": 4.572806346068551,
            "pubmed_qa_labeled": 8.969945177310654,
            "squad": 3.7513616025447845,
            "agi_eval_lsat_rc": 5.439938510531809,
            "agi_eval_lsat_lr": 5.367233012704288,
            "coqa": 4.217239737510681,
            "bigbench_understanding_fables": 5.279693785167876,
            "boolq": 5.844010047117869,
            "agi_eval_sat_en": 5.468166138361958,
            "winogender_mc_female": 3.445747399330139,
            "winogender_mc_male": 2.8986791094144184,
            "enterprise_pii_classification": 5.062188160419464,
            "bbq": 0.8243114861349264,
            "human_eval_return_complex": 4.651244632841095,
            "human_eval_return_simple": 7.488372609422013,
            "human_eval-0.5": 4.600611420666299,
            "human_eval-0.25": 4.717989712226681,
            "human_eval-0.75": 4.732852347013427,
            "human_eval": 4.794243090036439,
            "processed_human_eval_cpp": 5.045785279007432,
            "processed_human_eval_js": 5.1512462816587306
        },
        "icl": {
            "mmlu_zeroshot": 0.2322531057554379,
            "mmlu": 0.2411106296798639,
            "hellaswag_zeroshot": 0.2701652944087982,
            "jeopardy": 0.0,
            "triviaqa_sm_sub": 0.0010000000474974513,
            "gsm8k": 0.0015163002535700798,
            "agi_eval_sat_math": 0.004545454401522875,
            "aqua": 0.0,
            "svamp": 0.023333333432674408,
            "bigbench_qa_wikidata": 0.10821317881345749,
            "arc_easy": 0.31860268115997314,
            "arc_challenge": 0.22440272569656372,
            "bigbench_misconceptions": 0.465753436088562,
            "copa": 0.550000011920929,
            "siqa": 0.48822927474975586,
            "commonsense_qa": 0.28337427973747253,
            "piqa": 0.600108802318573,
            "openbook_qa": 0.26600000262260437,
            "bigbench_novel_concepts": 0.375,
            "bigbench_strange_stories": 0.4367816150188446,
            "bigbench_strategy_qa": 0.49060726165771484,
            "lambada_openai": 0.16844557225704193,
            "hellaswag": 0.2702648937702179,
            "winograd": 0.5457875728607178,
            "winogrande": 0.503551721572876,
            "bigbench_conlang_translation": 0.0,
            "bigbench_language_identification": 0.2538999915122986,
            "bigbench_conceptual_combinations": 0.291262149810791,
            "bigbench_elementary_math_qa": 0.26475366950035095,
            "bigbench_dyck_languages": 0.01899999938905239,
            "agi_eval_lsat_ar": 0.19565217196941376,
            "bigbench_cs_algorithms": 0.3954545557498932,
            "bigbench_logical_deduction": 0.23733332753181458,
            "bigbench_operators": 0.07619047909975052,
            "bigbench_repeat_copy_logic": 0.0,
            "simple_arithmetic_nospaces": 0.004000000189989805,
            "simple_arithmetic_withspaces": 0.0010000000474974513,
            "math_qa": 0.24505531787872314,
            "logi_qa": 0.2058371752500534,
            "pubmed_qa_labeled": 0.48399999737739563,
            "squad": 0.0012298959773033857,
            "agi_eval_lsat_rc": 0.28731343150138855,
            "agi_eval_lsat_lr": 0.27450981736183167,
            "coqa": 0.03043968416750431,
            "bigbench_understanding_fables": 0.29100528359413147,
            "boolq": 0.5886850357055664,
            "agi_eval_sat_en": 0.29611650109291077,
            "winogender_mc_female": 0.4333333373069763,
            "winogender_mc_male": 0.5166666507720947,
            "enterprise_pii_classification": 0.46539026498794556,
            "bbq": 0.433319782668894
        }
    },
    "aggregated_task_categories_centered": {
        "commonsense reasoning": 0.10174412031968434,
        "language understanding": 0.04765590839087963,
        "reading comprehension": 0.11145412837504409,
        "safety": -0.07564498213204471,
        "symbolic problem solving": 0.04386655209430804,
        "world knowledge": 0.007818196761732982
    },
    "aggregated_centered_results": 0.049113670267944505,
    "aggregated_results": 0.2580489346797694,
    "rw_small": 0.4092345784107844,
    "95%_CI_above": 0.2728611608212249,
    "99%_CI_above": 0.2555923382099718,
    "model_uuid": "3beb1f69-eb13-43e5-a648-5b3dd3de9b95",
    "low_variance_datasets": 0.25770311594136397
}