{
    "name": "/<scrub>/task_runtime/scrub/eval/heavy",
    "uuid": "dbf0ddde-01b2-49c2-9495-189b48eecf9b",
    "model": "d=1024_l=24_h=8",
    "creation_date": "2024_01_29-14_19_52",
    "eval_metrics": {
        "perplexity": 2.884554445743561,
        "downstream_perpexity": {
            "mmlu": 1.8387937108675638,
            "hellaswag": 2.643053295214971,
            "jeopardy_all": 3.075947908560435,
            "triviaqa_sm_sub": 3.2035565028587976,
            "gsm8k": 2.3800598859786986,
            "agi_eval_sat_math": 2.0069741400805388,
            "aqua": 2.774405558255254,
            "svamp": 2.776384580930074,
            "bigbench_qa_wikidata": 4.946338228384653,
            "arc_easy": 3.12030375401179,
            "arc_challenge": 3.1218254208564757,
            "bigbench_misconceptions": 5.94424810583733,
            "copa": 2.9061436796188356,
            "siqa": 2.8222058335940043,
            "commonsense_qa": 2.3205229232708615,
            "piqa": 2.9270791908105216,
            "openbook_qa": 4.633480904102325,
            "bigbench_novel_concepts": 3.2892132997512817,
            "bigbench_strange_stories": 3.562059690212381,
            "bigbench_strategy_qa": 2.1639339486757914,
            "lambada_openai": 2.0468532825509707,
            "winograd_wsc": 2.82230981474831,
            "winogrande": 3.3073857565720877,
            "bigbench_conlang_translation": 2.397403922749729,
            "bigbench_language_identification": 5.265239338080089,
            "bigbench_conceptual_combinations": 1.239750195475458,
            "bigbench_elementary_math_qa": 4.358167739709218,
            "bigbench_dyck_languages": 4.466862136325917,
            "agi_eval_lsat_ar": 2.418682861328125,
            "bigbench_cs_algorithms": 6.079064464569091,
            "bigbench_logical_deduction": 1.2353878398736318,
            "bigbench_operators": 5.6770954518091115,
            "bigbench_repeat_copy_logic": 1.8032036162912846,
            "simple_arithmetic_nospaces": 7.427705463742008,
            "simple_arithmetic_withspaces": 7.054391323017473,
            "math_qa": 5.61312719186147,
            "logi_qa": 2.0416849960563,
            "pubmed_qa_labeled": 7.272143965508757,
            "squad": 2.265050760904948,
            "agi_eval_lsat_rc": 2.506141206221794,
            "agi_eval_lsat_lr": 2.349104431563733,
            "coqa": 3.110510571797689,
            "bigbench_understanding_fables": 5.913072411976163,
            "boolq": 3.362195384502411,
            "agi_eval_sat_en": 2.4753488070756484,
            "winogender_mc_female": 1.6769103229045867,
            "winogender_mc_male": 1.628559293349584,
            "enterprise_pii_classification": 3.4860226074854532,
            "bbq": 0.4090614346166452,
            "human_eval_return_complex": 3.6486867882135345,
            "human_eval_return_simple": 7.015135288238525,
            "human_eval-0.5": 3.5570712322142066,
            "human_eval-0.25": 3.6997632805894063,
            "human_eval-0.75": 3.5815430385310476,
            "human_eval": 3.918072908389859,
            "processed_human_eval_cpp": 3.4697089550658045,
            "processed_human_eval_js": 2.7039512730226285
        },
        "icl": {
            "mmlu_zeroshot": 0.23154661739081667,
            "mmlu": 0.2351023633229105,
            "hellaswag_zeroshot": 0.40350526571273804,
            "jeopardy": 0.03209998300299048,
            "triviaqa_sm_sub": 0.0006666666595265269,
            "gsm8k": 0.0,
            "agi_eval_sat_math": 0.004545454401522875,
            "aqua": 0.0,
            "svamp": 0.0,
            "bigbench_qa_wikidata": 0.4952020049095154,
            "arc_easy": 0.47727271914482117,
            "arc_challenge": 0.2542662024497986,
            "bigbench_misconceptions": 0.44748857617378235,
            "copa": 0.6000000238418579,
            "siqa": 0.48157626390457153,
            "commonsense_qa": 0.29320228099823,
            "piqa": 0.6898803114891052,
            "openbook_qa": 0.3100000023841858,
            "bigbench_novel_concepts": 0.28125,
            "bigbench_strange_stories": 0.4712643623352051,
            "bigbench_strategy_qa": 0.5107033848762512,
            "lambada_openai": 0.3984086811542511,
            "hellaswag": 0.40151363611221313,
            "winograd": 0.6007326245307922,
            "winogrande": 0.530386745929718,
            "bigbench_conlang_translation": 0.012195121496915817,
            "bigbench_language_identification": 0.251800000667572,
            "bigbench_conceptual_combinations": 0.3106796145439148,
            "bigbench_elementary_math_qa": 0.24855870008468628,
            "bigbench_dyck_languages": 0.12099999934434891,
            "agi_eval_lsat_ar": 0.22173912823200226,
            "bigbench_cs_algorithms": 0.42500001192092896,
            "bigbench_logical_deduction": 0.2633333206176758,
            "bigbench_operators": 0.14761905372142792,
            "bigbench_repeat_copy_logic": 0.0,
            "simple_arithmetic_nospaces": 0.0020000000949949026,
            "simple_arithmetic_withspaces": 0.0020000000949949026,
            "math_qa": 0.2504190504550934,
            "logi_qa": 0.23502303659915924,
            "pubmed_qa_labeled": 0.4560000002384186,
            "squad": 0.14891201257705688,
            "agi_eval_lsat_rc": 0.2276119440793991,
            "agi_eval_lsat_lr": 0.24117647111415863,
            "coqa": 0.1573343425989151,
            "bigbench_understanding_fables": 0.25925925374031067,
            "boolq": 0.4871559739112854,
            "agi_eval_sat_en": 0.23786407709121704,
            "winogender_mc_female": 0.550000011920929,
            "winogender_mc_male": 0.38333332538604736,
            "enterprise_pii_classification": 0.49455082416534424,
            "bbq": 0.47254341027953406
        }
    },
    "aggregated_task_categories_centered": {
        "commonsense reasoning": 0.1357647975285848,
        "language understanding": 0.14535507078592974,
        "reading comprehension": 0.08638841224213441,
        "safety": -0.04978621412407269,
        "symbolic problem solving": 0.06135660832126935,
        "world knowledge": 0.08589954291634079
    },
    "aggregated_centered_results": 0.08526405600209154,
    "aggregated_results": 0.28936711481766936,
    "rw_small": 0.45876448849836987,
    "95%_CI_above": 0.3318660424889602,
    "99%_CI_above": 0.32583127313479776,
    "model_uuid": "d819d0b6-ddf0-4342-956c-8c09dedbaed5",
    "low_variance_datasets": 0.33850140930153433
}