{
    "name": "/<scrub>/task_runtime/scrub/eval/heavy",
    "uuid": "37db576d-6f65-4b6a-b082-74c8d9e0d964",
    "model": "open_lm_1b",
    "creation_date": "2024_02_15-16_41_02",
    "eval_metrics": {
        "perplexity": 2.5776191393534345,
        "downstream_perpexity": {
            "mmlu": 1.6514112691084544,
            "hellaswag": 2.449658916890621,
            "jeopardy_all": 2.436510495096445,
            "triviaqa_sm_sub": 2.5536425379415353,
            "gsm8k": 2.0023816054066024,
            "agi_eval_sat_math": 1.8326589134606448,
            "aqua": 2.504969370608427,
            "svamp": 2.6507093048095705,
            "bigbench_qa_wikidata": 3.926298152903716,
            "arc_easy": 2.830902016411225,
            "arc_challenge": 2.8784812425573665,
            "bigbench_misconceptions": 5.717645828038046,
            "copa": 2.691913571357727,
            "siqa": 1.207287368675073,
            "commonsense_qa": 1.755708883702755,
            "piqa": 2.699453677733739,
            "openbook_qa": 4.387310722351074,
            "bigbench_novel_concepts": 2.861906796693802,
            "bigbench_strange_stories": 3.489493582440519,
            "bigbench_strategy_qa": 1.8942115634679795,
            "lambada_openai": 1.6582851272270394,
            "winograd_wsc": 2.6733247356100396,
            "winogrande": 3.169294589261214,
            "bigbench_conlang_translation": 2.117223932975676,
            "bigbench_language_identification": 5.815311791499456,
            "bigbench_conceptual_combinations": 1.0568427871731878,
            "bigbench_elementary_math_qa": 4.105769977966944,
            "bigbench_dyck_languages": 4.182462952534357,
            "agi_eval_lsat_ar": 1.818859124701956,
            "bigbench_cs_algorithms": 6.220571199059487,
            "bigbench_logical_deduction": 1.2158734291791915,
            "bigbench_operators": 5.366779504503523,
            "bigbench_repeat_copy_logic": 1.6755919829010963,
            "simple_arithmetic_nospaces": 6.933127081394195,
            "simple_arithmetic_withspaces": 6.618668407201767,
            "math_qa": 6.187715671459833,
            "logi_qa": 1.7659351345947079,
            "pubmed_qa_labeled": 6.696092424790065,
            "squad": 2.321437035997709,
            "agi_eval_lsat_rc": 1.6780933324080796,
            "agi_eval_lsat_lr": 1.5975333024473752,
            "coqa": 3.15378225594759,
            "bigbench_understanding_fables": 5.390544699613379,
            "boolq": 3.950864921013514,
            "agi_eval_sat_en": 1.6860671506344693,
            "winogender_mc_female": 1.2847780813773473,
            "winogender_mc_male": 1.2128370185693105,
            "enterprise_pii_classification": 3.5647235145171483,
            "bbq": 0.262440528604202,
            "human_eval_return_complex": 2.7319404590786913,
            "human_eval_return_simple": 4.9457553528450635,
            "human_eval-0.5": 2.6064284669189917,
            "human_eval-0.25": 2.7398332480977223,
            "human_eval-0.75": 2.6659744272871717,
            "human_eval": 2.8546273214061086,
            "processed_human_eval_cpp": 2.7721641122924616,
            "processed_human_eval_js": 2.3417154332486594
        },
        "icl": {
            "mmlu_zeroshot": 0.24947628378868103,
            "mmlu": 0.2551001723398242,
            "hellaswag_zeroshot": 0.5146385431289673,
            "jeopardy": 0.09729161635041236,
            "triviaqa_sm_sub": 0.00033333332976326346,
            "gsm8k": 0.0,
            "agi_eval_sat_math": 0.00909090880304575,
            "aqua": 0.0,
            "svamp": 0.0,
            "bigbench_qa_wikidata": 0.587126612663269,
            "arc_easy": 0.558080792427063,
            "arc_challenge": 0.27389079332351685,
            "bigbench_misconceptions": 0.4840182662010193,
            "copa": 0.6600000262260437,
            "siqa": 0.48106446862220764,
            "commonsense_qa": 0.3226863145828247,
            "piqa": 0.7219803929328918,
            "openbook_qa": 0.33000001311302185,
            "bigbench_novel_concepts": 0.34375,
            "bigbench_strange_stories": 0.522988498210907,
            "bigbench_strategy_qa": 0.47225862741470337,
            "lambada_openai": 0.49194642901420593,
            "hellaswag": 0.5148376822471619,
            "winograd": 0.6996337175369263,
            "winogrande": 0.5351223349571228,
            "bigbench_conlang_translation": 0.018292682245373726,
            "bigbench_language_identification": 0.25380000472068787,
            "bigbench_conceptual_combinations": 0.24271844327449799,
            "bigbench_elementary_math_qa": 0.24947589635849,
            "bigbench_dyck_languages": 0.210999995470047,
            "agi_eval_lsat_ar": 0.2347826063632965,
            "bigbench_cs_algorithms": 0.4598484933376312,
            "bigbench_logical_deduction": 0.25733333826065063,
            "bigbench_operators": 0.1666666716337204,
            "bigbench_repeat_copy_logic": 0.03125,
            "simple_arithmetic_nospaces": 0.0020000000949949026,
            "simple_arithmetic_withspaces": 0.0010000000474974513,
            "math_qa": 0.2507542669773102,
            "logi_qa": 0.3041474521160126,
            "pubmed_qa_labeled": 0.3449999988079071,
            "squad": 0.3317880928516388,
            "agi_eval_lsat_rc": 0.23507462441921234,
            "agi_eval_lsat_lr": 0.272549033164978,
            "coqa": 0.2453964650630951,
            "bigbench_understanding_fables": 0.22751322388648987,
            "boolq": 0.5498470664024353,
            "agi_eval_sat_en": 0.1893203854560852,
            "winogender_mc_female": 0.5,
            "winogender_mc_male": 0.4833333194255829,
            "enterprise_pii_classification": 0.5072165131568909,
            "bbq": 0.5044517733834006
        }
    },
    "aggregated_task_categories_centered": {
        "commonsense reasoning": 0.173145721356074,
        "language understanding": 0.2101346809261789,
        "reading comprehension": 0.11514438067873319,
        "safety": -0.002499197017062804,
        "symbolic problem solving": 0.07986132982445675,
        "world knowledge": 0.13768985215636625
    },
    "aggregated_centered_results": 0.12307529882818466,
    "aggregated_results": 0.3176446308653237,
    "rw_small": 0.5152632594108582,
    "95%_CI_above": 0.37524524892595684,
    "99%_CI_above": 0.37560124686360363,
    "model_uuid": "adde67a9-ce24-40e1-b5bd-f51895bf663c",
    "low_variance_datasets": 0.39961884837936273
}