{
    "name": "/mnt/task_runtime/dcnlp/eval/heavy",
    "uuid": "73f645a5-be96-4102-a7b5-9afbf40ed36c",
    "model": "open_lm_1b_swiglutorch",
    "creation_date": "2024_06_03-18_41_52",
    "eval_metrics": {
        "icl": {
            "mmlu_zeroshot": 0.23875628960760018,
            "hellaswag_zeroshot": 0.5557658076286316,
            "jeopardy": 0.029043759685009718,
            "triviaqa_sm_sub": 0.11833333224058151,
            "gsm8k_cot": 0.005307050887495279,
            "agi_eval_sat_math_cot": 0.00909090880304575,
            "aqua_cot": 0.004081632476300001,
            "svamp_cot": 0.03999999910593033,
            "bigbench_qa_wikidata": 0.5646375417709351,
            "arc_easy": 0.5795454382896423,
            "arc_challenge": 0.2807167172431946,
            "mmlu_fewshot": 0.2574707094514579,
            "bigbench_misconceptions": 0.456620991230011,
            "copa": 0.6899999976158142,
            "siqa": 0.5148413777351379,
            "commonsense_qa": 0.2325962334871292,
            "piqa": 0.7328618168830872,
            "openbook_qa": 0.3540000021457672,
            "bigbench_novel_concepts": 0.5,
            "bigbench_strange_stories": 0.4597701132297516,
            "bigbench_strategy_qa": 0.48711228370666504,
            "lambada_openai": 0.47215214371681213,
            "hellaswag": 0.5596494674682617,
            "winograd": 0.7435897588729858,
            "winogrande": 0.5588003396987915,
            "bigbench_conlang_translation": 0.012195121496915817,
            "bigbench_language_identification": 0.25440001487731934,
            "bigbench_conceptual_combinations": 0.18446601927280426,
            "bigbench_elementary_math_qa": 0.2486635148525238,
            "bigbench_dyck_languages": 0.07599999755620956,
            "agi_eval_lsat_ar": 0.17826086282730103,
            "bigbench_cs_algorithms": 0.4166666567325592,
            "bigbench_logical_deduction": 0.2720000147819519,
            "bigbench_operators": 0.1190476194024086,
            "bigbench_repeat_copy_logic": 0.0,
            "simple_arithmetic_nospaces": 0.0020000000949949026,
            "simple_arithmetic_withspaces": 0.003000000026077032,
            "math_qa": 0.24539054930210114,
            "logi_qa": 0.24731183052062988,
            "pubmed_qa_labeled": 0.5049999952316284,
            "squad": 0.2228003740310669,
            "agi_eval_lsat_rc": 0.24253731966018677,
            "agi_eval_lsat_lr": 0.30392158031463623,
            "coqa": 0.21558311581611633,
            "bigbench_understanding_fables": 0.2698412835597992,
            "boolq": 0.46727830171585083,
            "agi_eval_sat_en": 0.223300963640213,
            "winogender_mc_female": 0.5166666507720947,
            "winogender_mc_male": 0.4833333194255829,
            "enterprise_pii_classification": 0.523122251033783,
            "bbq": 0.45263090458783234,
            "gpqa_main": 0.2366071492433548,
            "gpqa_diamond": 0.2070707082748413
        }
    },
    "aggregated_task_categories_centered": {
        "commonsense reasoning": 0.19581576724788513,
        "language understanding": 0.2502575642101434,
        "reading comprehension": 0.09703259916747348,
        "safety": -0.012123437090353545,
        "symbolic problem solving": 0.05496719053810351,
        "world knowledge": 0.1025479298970003
    },
    "aggregated_centered_results": 0.11596869535253125,
    "aggregated_results": 0.31271395909492117,
    "rw_small": 0.5220177670319875,
    "rw_small_centered": 0.16395645880559737,
    "95%_CI_above": 0.3910228298907471,
    "95%_CI_above_centered": 0.18946248779650726,
    "99%_CI_above": 0.39465731299360807,
    "99%_CI_above_centered": 0.22473572203917908,
    "low_variance_datasets": 0.37742708943022246,
    "low_variance_datasets_centered": 0.21806523189846327,
    "model_uuid": "818c88d3-3734-4096-b50c-40a1bd0b08c5",
    "_filename": "exp_data/evals/evaluation_c4_original-open_lm_1b_swiglutorch-warm=5000-lr=0p003-wd=0p0033-cd=3e-05-bs=256-mult=1-seed=124-tokens=28795904000_heavy.json",
    "missing tasks": "[]",
    "Core": 0.21806523189846327,
    "Extended": 0.11596869535253125
}