{
    "name": "/<scrub>/task_runtime/scrub/eval/heavy",
    "uuid": "74847e28-05eb-4a79-ac02-fcc2947f92d2",
    "model": "d=576_l=24_h=8",
    "creation_date": "2024_02_12-21_33_38",
    "eval_metrics": {
        "perplexity": 3.77912033200264,
        "downstream_perpexity": {
            "mmlu": 4.998983942618517,
            "hellaswag": 2.721985175264853,
            "jeopardy_all": 3.9189732546689773,
            "triviaqa_sm_sub": 3.9628554393425586,
            "gsm8k": 2.8084091894368135,
            "agi_eval_sat_math": 3.5292777386578646,
            "aqua": 3.4590645449502126,
            "svamp": 3.6941203045845032,
            "bigbench_qa_wikidata": 5.210217887601,
            "arc_easy": 4.0448998180954945,
            "arc_challenge": 3.7612945432345617,
            "bigbench_misconceptions": 5.9953347689484895,
            "copa": 3.083532083034515,
            "siqa": 4.601021139258128,
            "commonsense_qa": 5.085657109597196,
            "piqa": 3.0570559912848654,
            "openbook_qa": 5.080684768676758,
            "bigbench_novel_concepts": 3.6698324531316757,
            "bigbench_strange_stories": 4.45666715125928,
            "bigbench_strategy_qa": 2.0389302632026456,
            "lambada_openai": 2.4933840872022883,
            "winograd_wsc": 3.0275900267856026,
            "winogrande": 3.4356907682336026,
            "bigbench_conlang_translation": 2.6989503168478244,
            "bigbench_language_identification": 4.050933188213112,
            "bigbench_conceptual_combinations": 0.7314192924684692,
            "bigbench_elementary_math_qa": 4.152761794229973,
            "bigbench_dyck_languages": 6.673913223743439,
            "agi_eval_lsat_ar": 5.318010558252749,
            "bigbench_cs_algorithms": 5.504673389955,
            "bigbench_logical_deduction": 0.8018626635471979,
            "bigbench_operators": 5.605253037384578,
            "bigbench_repeat_copy_logic": 2.093510676175356,
            "simple_arithmetic_nospaces": 7.418699663639068,
            "simple_arithmetic_withspaces": 6.870314115524292,
            "math_qa": 4.578553146857205,
            "logi_qa": 4.806999325935376,
            "pubmed_qa_labeled": 5.101125185489654,
            "squad": 2.9319999608095797,
            "agi_eval_lsat_rc": 5.264786786107875,
            "agi_eval_lsat_lr": 5.28834082846548,
            "coqa": 3.021215408006348,
            "bigbench_understanding_fables": 4.341639410881769,
            "boolq": 3.4057449869424197,
            "agi_eval_sat_en": 5.194194337696705,
            "winogender_mc_female": 1.7042824357748032,
            "winogender_mc_male": 1.5095791220664978,
            "enterprise_pii_classification": 4.926563580081811,
            "bbq": 0.697315563997504,
            "human_eval_return_complex": 4.447932397286723,
            "human_eval_return_simple": 6.922256869238776,
            "human_eval-0.5": 4.269675549937458,
            "human_eval-0.25": 4.353119730949402,
            "human_eval-0.75": 4.543447096173356,
            "human_eval": 4.387052307768566,
            "processed_human_eval_cpp": 4.5560325510013175,
            "processed_human_eval_js": 4.580726808164178
        },
        "icl": {
            "mmlu_zeroshot": 0.23119556642415232,
            "mmlu": 0.2352484273805953,
            "hellaswag_zeroshot": 0.3726349472999573,
            "jeopardy": 0.0005361930467188359,
            "triviaqa_sm_sub": 0.00033333332976326346,
            "gsm8k": 0.005307050887495279,
            "agi_eval_sat_math": 0.00909090880304575,
            "aqua": 0.0,
            "svamp": 0.0,
            "bigbench_qa_wikidata": 0.41508784890174866,
            "arc_easy": 0.4196127951145172,
            "arc_challenge": 0.22610922157764435,
            "bigbench_misconceptions": 0.4931506812572479,
            "copa": 0.6200000047683716,
            "siqa": 0.48106446862220764,
            "commonsense_qa": 0.23505324125289917,
            "piqa": 0.6800870299339294,
            "openbook_qa": 0.3160000145435333,
            "bigbench_novel_concepts": 0.375,
            "bigbench_strange_stories": 0.4482758641242981,
            "bigbench_strategy_qa": 0.5124508738517761,
            "lambada_openai": 0.3326217830181122,
            "hellaswag": 0.37422823905944824,
            "winograd": 0.5860806107521057,
            "winogrande": 0.5382794141769409,
            "bigbench_conlang_translation": 0.012195121496915817,
            "bigbench_language_identification": 0.2563999891281128,
            "bigbench_conceptual_combinations": 0.2330097109079361,
            "bigbench_elementary_math_qa": 0.24848008155822754,
            "bigbench_dyck_languages": 0.09399999678134918,
            "agi_eval_lsat_ar": 0.1608695685863495,
            "bigbench_cs_algorithms": 0.37196969985961914,
            "bigbench_logical_deduction": 0.26866665482521057,
            "bigbench_operators": 0.16190476715564728,
            "bigbench_repeat_copy_logic": 0.03125,
            "simple_arithmetic_nospaces": 0.0,
            "simple_arithmetic_withspaces": 0.0010000000474974513,
            "math_qa": 0.24371437728405,
            "logi_qa": 0.20890937745571136,
            "pubmed_qa_labeled": 0.42500001192092896,
            "squad": 0.04342478886246681,
            "agi_eval_lsat_rc": 0.23507462441921234,
            "agi_eval_lsat_lr": 0.272549033164978,
            "coqa": 0.1168733537197113,
            "bigbench_understanding_fables": 0.2857142984867096,
            "boolq": 0.5379204750061035,
            "agi_eval_sat_en": 0.25242719054222107,
            "winogender_mc_female": 0.550000011920929,
            "winogender_mc_male": 0.46666666865348816,
            "enterprise_pii_classification": 0.5086892247200012,
            "bbq": 0.4921942786736922
        }
    },
    "aggregated_task_categories_centered": {
        "commonsense reasoning": 0.1373117690285047,
        "language understanding": 0.11357093369588256,
        "reading comprehension": 0.09026991249993443,
        "safety": 0.008775091984055267,
        "symbolic problem solving": 0.05300573201901797,
        "world knowledge": 0.06897667730690903
    },
    "aggregated_centered_results": 0.08061217036218304,
    "aggregated_results": 0.2820853298686976,
    "rw_small": 0.45242614299058914,
    "95%_CI_above": 0.3131701507278238,
    "99%_CI_above": 0.3029416112378239,
    "model_uuid": "2a925d34-8650-497b-adcf-ea8439b0c51a",
    "low_variance_datasets": 0.3132247264793312
}