{
    "name": "/<scrub>/task_runtime/scrub/eval/heavy",
    "uuid": "ec5e47a1-352a-497d-a954-1543e34d53b3",
    "model": "d=576_l=24_h=8",
    "creation_date": "2024_01_29-14_50_27",
    "eval_metrics": {
        "perplexity": 3.4650756577650705,
        "downstream_perpexity": {
            "mmlu": 4.005756614605586,
            "hellaswag": 3.0350899000962577,
            "jeopardy_all": 4.4275213519732155,
            "triviaqa_sm_sub": 4.412837866942088,
            "gsm8k": 3.073345265785853,
            "agi_eval_sat_math": 2.476862037181854,
            "aqua": 3.3501335114848856,
            "svamp": 4.083970612684886,
            "bigbench_qa_wikidata": 5.188274113337199,
            "arc_easy": 3.95991979042689,
            "arc_challenge": 3.7892889082431793,
            "bigbench_misconceptions": 5.493010248767731,
            "copa": 3.3953230428695678,
            "siqa": 4.852400398254394,
            "commonsense_qa": 4.752521399656931,
            "piqa": 3.393225250641505,
            "openbook_qa": 5.176269697189331,
            "bigbench_novel_concepts": 3.654243156313896,
            "bigbench_strange_stories": 3.8010817683976272,
            "bigbench_strategy_qa": 2.3000625987847645,
            "lambada_openai": 2.90017895847559,
            "winograd_wsc": 3.1603762195223855,
            "winogrande": 3.586646513144175,
            "bigbench_conlang_translation": 2.905040544707601,
            "bigbench_language_identification": 5.1267158468564356,
            "bigbench_conceptual_combinations": 1.289161692545252,
            "bigbench_elementary_math_qa": 4.415826757748921,
            "bigbench_dyck_languages": 5.185134720401604,
            "agi_eval_lsat_ar": 5.103736618290776,
            "bigbench_cs_algorithms": 6.185780509312948,
            "bigbench_logical_deduction": 1.4065571318070094,
            "bigbench_operators": 6.300038707823981,
            "bigbench_repeat_copy_logic": 2.3439075648784637,
            "simple_arithmetic_nospaces": 7.449471417094479,
            "simple_arithmetic_withspaces": 7.301367003376744,
            "math_qa": 5.384839979807536,
            "logi_qa": 4.152729671122292,
            "pubmed_qa_labeled": 7.058313755928969,
            "squad": 2.6591730316480002,
            "agi_eval_lsat_rc": 5.389921191912978,
            "agi_eval_lsat_lr": 5.47406470822353,
            "coqa": 3.4111159404118854,
            "bigbench_understanding_fables": 5.948698603917682,
            "boolq": 4.098112404346466,
            "agi_eval_sat_en": 5.378102334957679,
            "winogender_mc_female": 2.547007695833842,
            "winogender_mc_male": 2.5264920314153034,
            "enterprise_pii_classification": 4.1283974329630535,
            "bbq": 0.6994527368495862,
            "human_eval_return_complex": 4.26515899868462,
            "human_eval_return_simple": 7.301866118972366,
            "human_eval-0.5": 4.158818412117842,
            "human_eval-0.25": 4.3037223568776755,
            "human_eval-0.75": 4.2080338800825725,
            "human_eval": 4.447127702759533,
            "processed_human_eval_cpp": 3.861395468623001,
            "processed_human_eval_js": 3.4019248630942367
        },
        "icl": {
            "mmlu_zeroshot": 0.23158116011243118,
            "mmlu": 0.24656527502494946,
            "hellaswag_zeroshot": 0.29326826333999634,
            "jeopardy": 0.001556647615507245,
            "triviaqa_sm_sub": 0.00033333332976326346,
            "gsm8k": 0.0,
            "agi_eval_sat_math": 0.004545454401522875,
            "aqua": 0.0,
            "svamp": 0.019999999552965164,
            "bigbench_qa_wikidata": 0.27838197350502014,
            "arc_easy": 0.3712121248245239,
            "arc_challenge": 0.22098976373672485,
            "bigbench_misconceptions": 0.5022830963134766,
            "copa": 0.5299999713897705,
            "siqa": 0.4994882345199585,
            "commonsense_qa": 0.28091728687286377,
            "piqa": 0.6300326585769653,
            "openbook_qa": 0.257999986410141,
            "bigbench_novel_concepts": 0.3125,
            "bigbench_strange_stories": 0.44252872467041016,
            "bigbench_strategy_qa": 0.4757536053657532,
            "lambada_openai": 0.26295360922813416,
            "hellaswag": 0.2934674322605133,
            "winograd": 0.5384615659713745,
            "winogrande": 0.49250197410583496,
            "bigbench_conlang_translation": 0.012195121496915817,
            "bigbench_language_identification": 0.25029999017715454,
            "bigbench_conceptual_combinations": 0.20388349890708923,
            "bigbench_elementary_math_qa": 0.2624213695526123,
            "bigbench_dyck_languages": 0.10599999874830246,
            "agi_eval_lsat_ar": 0.24782608449459076,
            "bigbench_cs_algorithms": 0.41969695687294006,
            "bigbench_logical_deduction": 0.2606666684150696,
            "bigbench_operators": 0.095238097012043,
            "bigbench_repeat_copy_logic": 0.0,
            "simple_arithmetic_nospaces": 0.0020000000949949026,
            "simple_arithmetic_withspaces": 0.003000000026077032,
            "math_qa": 0.2467314749956131,
            "logi_qa": 0.2534562349319458,
            "pubmed_qa_labeled": 0.5509999990463257,
            "squad": 0.010122989304363728,
            "agi_eval_lsat_rc": 0.24626865983009338,
            "agi_eval_lsat_lr": 0.2647058963775635,
            "coqa": 0.09532757103443146,
            "bigbench_understanding_fables": 0.28042328357696533,
            "boolq": 0.6143730878829956,
            "agi_eval_sat_en": 0.23786407709121704,
            "winogender_mc_female": 0.4833333194255829,
            "winogender_mc_male": 0.4000000059604645,
            "enterprise_pii_classification": 0.49484536051750183,
            "bbq": 0.45048991116610443
        }
    },
    "aggregated_task_categories_centered": {
        "commonsense reasoning": 0.0951036810874939,
        "language understanding": 0.04895434047405919,
        "reading comprehension": 0.11552657803986222,
        "safety": -0.08566570146517319,
        "symbolic problem solving": 0.06195606144869493,
        "world knowledge": 0.04732948900109371
    },
    "aggregated_centered_results": 0.05964687891548635,
    "aggregated_results": 0.26822532937387367,
    "rw_small": 0.4203908642133077,
    "95%_CI_above": 0.29340855508915736,
    "99%_CI_above": 0.2842469360921532,
    "model_uuid": "fff6723e-b3cf-425b-a488-fdbacacc0773",
    "low_variance_datasets": 0.2859376378801905
}