{
    "name": "/<scrub>/task_runtime/scrub/eval/heavy",
    "uuid": "45cd9729-f328-43cc-bbcb-d8fbaf5e7ebe",
    "model": "d=96_l=8_h=4",
    "creation_date": "2024_01_29-13_40_09",
    "eval_metrics": {
        "perplexity": 6.108402935663859,
        "downstream_perpexity": {
            "mmlu": 5.2394558946291605,
            "hellaswag": 4.91864474217097,
            "jeopardy_all": 7.240347862243652,
            "triviaqa_sm_sub": 6.834395758310953,
            "gsm8k": 5.6652191281318665,
            "agi_eval_sat_math": 6.1349276737733325,
            "aqua": 5.482354327610561,
            "svamp": 4.951870032946268,
            "bigbench_qa_wikidata": 9.427522166570027,
            "arc_easy": 6.21872710386912,
            "arc_challenge": 5.94592913389206,
            "bigbench_misconceptions": 6.019141543401431,
            "copa": 5.152074055671692,
            "siqa": 4.94459052880605,
            "commonsense_qa": 5.31091593503952,
            "piqa": 5.421374539534251,
            "openbook_qa": 7.005831627845764,
            "bigbench_novel_concepts": 5.500206083059311,
            "bigbench_strange_stories": 5.786444617413927,
            "bigbench_strategy_qa": 4.718573792775472,
            "lambada_openai": 8.061911209424336,
            "winograd_wsc": 4.5823400142865305,
            "winogrande": 4.911013672749202,
            "bigbench_conlang_translation": 6.562228679656982,
            "bigbench_language_identification": 4.3952924887339275,
            "bigbench_conceptual_combinations": 6.005166479684775,
            "bigbench_elementary_math_qa": 5.980989336967468,
            "bigbench_dyck_languages": 5.283968980572805,
            "agi_eval_lsat_ar": 5.433911997339,
            "bigbench_cs_algorithms": 10.460053141911825,
            "bigbench_logical_deduction": 4.445468815167745,
            "bigbench_operators": 6.336468464987618,
            "bigbench_repeat_copy_logic": 6.52769935131073,
            "simple_arithmetic_nospaces": 7.856193680222295,
            "simple_arithmetic_withspaces": 7.591787808081683,
            "math_qa": 4.997786724567414,
            "logi_qa": 4.78882077945176,
            "pubmed_qa_labeled": 7.404468649074811,
            "squad": 6.822764257589976,
            "agi_eval_lsat_rc": 5.830954203854746,
            "agi_eval_lsat_lr": 5.659293390722835,
            "coqa": 6.720981832345327,
            "bigbench_understanding_fables": 5.237108349169373,
            "boolq": 9.234119192759197,
            "agi_eval_sat_en": 5.492349981104286,
            "winogender_mc_female": 7.130841159820557,
            "winogender_mc_male": 6.805212338765462,
            "enterprise_pii_classification": 8.657877012093861,
            "bbq": 6.2569954752922055,
            "human_eval_return_complex": 7.646044573445958,
            "human_eval_return_simple": 10.814717576310441,
            "human_eval-0.5": 7.652650865112863,
            "human_eval-0.25": 7.671812906497863,
            "human_eval-0.75": 7.74566536414914,
            "human_eval": 7.690631671649654,
            "processed_human_eval_cpp": 7.268912955100492,
            "processed_human_eval_js": 6.7976734231158
        },
        "icl": {
            "mmlu_zeroshot": 0.23118784291702404,
            "mmlu": 0.2311782095777361,
            "hellaswag_zeroshot": 0.23760206997394562,
            "jeopardy": 0.0,
            "triviaqa_sm_sub": 0.00033333332976326346,
            "gsm8k": 0.0,
            "agi_eval_sat_math": 0.004545454401522875,
            "aqua": 0.0,
            "svamp": 0.0,
            "bigbench_qa_wikidata": 0.0,
            "arc_easy": 0.26893940567970276,
            "arc_challenge": 0.20051194727420807,
            "bigbench_misconceptions": 0.4794520437717438,
            "copa": 0.5,
            "siqa": 0.48413512110710144,
            "commonsense_qa": 0.1957411915063858,
            "piqa": 0.554951012134552,
            "openbook_qa": 0.23800000548362732,
            "bigbench_novel_concepts": 0.28125,
            "bigbench_strange_stories": 0.517241358757019,
            "bigbench_strategy_qa": 0.47225862741470337,
            "lambada_openai": 0.000194061707588844,
            "hellaswag": 0.23859788477420807,
            "winograd": 0.5091575384140015,
            "winogrande": 0.5106551051139832,
            "bigbench_conlang_translation": 0.0,
            "bigbench_language_identification": 0.25040000677108765,
            "bigbench_conceptual_combinations": 0.3009708821773529,
            "bigbench_elementary_math_qa": 0.27025681734085083,
            "bigbench_dyck_languages": 0.0,
            "agi_eval_lsat_ar": 0.22608695924282074,
            "bigbench_cs_algorithms": 0.0,
            "bigbench_logical_deduction": 0.2606666684150696,
            "bigbench_operators": 0.08095238357782364,
            "bigbench_repeat_copy_logic": 0.0,
            "simple_arithmetic_nospaces": 0.0,
            "simple_arithmetic_withspaces": 0.007000000216066837,
            "math_qa": 0.24840764701366425,
            "logi_qa": 0.20276497304439545,
            "pubmed_qa_labeled": 0.0,
            "squad": 0.0,
            "agi_eval_lsat_rc": 0.2761194109916687,
            "agi_eval_lsat_lr": 0.27843138575553894,
            "coqa": 0.0001252661895705387,
            "bigbench_understanding_fables": 0.25925925374031067,
            "boolq": 0.3782874643802643,
            "agi_eval_sat_en": 0.3009708821773529,
            "winogender_mc_female": 0.4833333194255829,
            "winogender_mc_male": 0.4833333194255829,
            "enterprise_pii_classification": 0.5063328146934509,
            "bbq": 0.44183329289609735
        }
    },
    "aggregated_task_categories_centered": {
        "commonsense reasoning": 0.06381172935167949,
        "language understanding": 0.0095725592115438,
        "reading comprehension": -0.011282320187092408,
        "safety": -0.042583626779642975,
        "symbolic problem solving": 0.019647529234902723,
        "world knowledge": -0.016459088149398483
    },
    "aggregated_centered_results": 0.009598460259192709,
    "aggregated_results": 0.2237542149179092,
    "rw_small": 0.34933266540368396,
    "95%_CI_above": 0.21150709618928942,
    "99%_CI_above": 0.18584540440584532,
    "model_uuid": "2dcb8242-c72b-4e83-81c3-20e190ed2ee6",
    "low_variance_datasets": 0.19955465010108045
}