{
    "name": "/<scrub>/task_runtime/scrub/eval/heavy",
    "uuid": "ff251039-e9d5-4525-a664-a78c47d187da",
    "model": "open_lm_7b",
    "creation_date": "2024_03_04-12_45_54",
    "eval_metrics": {
        "icl": {
            "mmlu_zeroshot": 0.25800055422280965,
            "mmlu": 0.25836840546444845,
            "hellaswag_zeroshot": 0.6701852083206177,
            "jeopardy": 0.023775391280651093,
            "triviaqa_sm_sub": 0.00033333332976326346,
            "gsm8k": 0.0,
            "agi_eval_sat_math": 0.004545454401522875,
            "aqua": 0.0,
            "svamp": 0.0,
            "bigbench_qa_wikidata": 0.6333841681480408,
            "arc_easy": 0.6485690474510193,
            "arc_challenge": 0.3361774682998657,
            "bigbench_misconceptions": 0.4931506812572479,
            "copa": 0.75,
            "siqa": 0.48720574378967285,
            "commonsense_qa": 0.3013922870159149,
            "piqa": 0.7780196070671082,
            "openbook_qa": 0.37400001287460327,
            "bigbench_novel_concepts": 0.53125,
            "bigbench_strange_stories": 0.522988498210907,
            "bigbench_strategy_qa": 0.4971603453159332,
            "lambada_openai": 0.5812148451805115,
            "hellaswag": 0.679745078086853,
            "winograd": 0.7838827967643738,
            "winogrande": 0.5998421311378479,
            "bigbench_conlang_translation": 0.030487805604934692,
            "bigbench_language_identification": 0.2524000108242035,
            "bigbench_conceptual_combinations": 0.3203883469104767,
            "bigbench_elementary_math_qa": 0.25031447410583496,
            "bigbench_dyck_languages": 0.13099999725818634,
            "agi_eval_lsat_ar": 0.27826085686683655,
            "bigbench_cs_algorithms": 0.4439393877983093,
            "bigbench_logical_deduction": 0.24199999868869781,
            "bigbench_operators": 0.20476190745830536,
            "bigbench_repeat_copy_logic": 0.09375,
            "simple_arithmetic_nospaces": 0.004000000189989805,
            "simple_arithmetic_withspaces": 0.004999999888241291,
            "math_qa": 0.23466308414936066,
            "logi_qa": 0.28264209628105164,
            "pubmed_qa_labeled": 0.527999997138977,
            "squad": 0.2691580057144165,
            "agi_eval_lsat_rc": 0.20522387325763702,
            "agi_eval_lsat_lr": 0.25294119119644165,
            "coqa": 0.2814731299877167,
            "bigbench_understanding_fables": 0.2698412835597992,
            "boolq": 0.631192684173584,
            "agi_eval_sat_en": 0.1990291327238083,
            "winogender_mc_female": 0.5,
            "winogender_mc_male": 0.5,
            "enterprise_pii_classification": 0.4904271066188812,
            "bbq": 0.4607287645339966
        }
    },
    "aggregated_task_categories_centered": {
        "commonsense reasoning": 0.20995059857765835,
        "language understanding": 0.32618050401409465,
        "reading comprehension": 0.15546630943814913,
        "safety": -0.024422064423561096,
        "symbolic problem solving": 0.06250027291890647,
        "world knowledge": 0.16399352781547605
    },
    "aggregated_centered_results": 0.15067732068337994,
    "aggregated_results": 0.34460419985390983,
    "rw_small": 0.6059944232304891,
    "rw_small_centered": 0.36144087711970013,
    "95%_CI_above": 0.42014402225613595,
    "95%_CI_above_centered": 0.2578814745777183,
    "99%_CI_above": 0.4204860445857048,
    "99%_CI_above_centered": 0.2923316837350527,
    "low_variance_datasets": 0.4430056373504075,
    "low_variance_datasets_centered": 0.3092108135196296,
    "model_uuid": "1877e30d-c4c9-4756-a4e3-9d9620a97578"
}