{
    "name": "/opt/ml/code/eval/heavy",
    "uuid": "13ef667b-2691-423e-b6a0-7ac0a17c0d6a",
    "model": "open_lm_7b_swiglutorch",
    "creation_date": "2024_06_11-17_08_00",
    "eval_metrics": {
        "icl": {
            "mmlu_zeroshot": 0.39392331007279846,
            "hellaswag_zeroshot": 0.6659032106399536,
            "jeopardy": 0.008282836573198438,
            "triviaqa_sm_sub": 0.0006666666595265269,
            "gsm8k_cot": 0.10538286715745926,
            "agi_eval_sat_math_cot": 0.04545454680919647,
            "aqua_cot": 0.016326529905200005,
            "svamp_cot": 0.12999999523162842,
            "bigbench_qa_wikidata": 0.5624231100082397,
            "arc_easy": 0.5997474789619446,
            "arc_challenge": 0.37457337975502014,
            "mmlu_fewshot": 0.4283623996010998,
            "bigbench_misconceptions": 0.5662100315093994,
            "copa": 0.800000011920929,
            "siqa": 0.7594677805900574,
            "commonsense_qa": 0.7231777310371399,
            "piqa": 0.7268770337104797,
            "openbook_qa": 0.4020000100135803,
            "bigbench_novel_concepts": 0.53125,
            "bigbench_strange_stories": 0.5804597735404968,
            "bigbench_strategy_qa": 0.728265643119812,
            "lambada_openai": 0.4713759124279022,
            "hellaswag": 0.6714797616004944,
            "winograd": 0.6739926934242249,
            "winogrande": 0.591949462890625,
            "bigbench_conlang_translation": 0.0,
            "bigbench_language_identification": 0.28299999237060547,
            "bigbench_conceptual_combinations": 0.41747573018074036,
            "bigbench_elementary_math_qa": 0.25353774428367615,
            "bigbench_dyck_languages": 0.023000000044703484,
            "agi_eval_lsat_ar": 0.239130437374115,
            "bigbench_cs_algorithms": 0.34545454382896423,
            "bigbench_logical_deduction": 0.2693333327770233,
            "bigbench_operators": 0.15238095819950104,
            "bigbench_repeat_copy_logic": 0.0,
            "simple_arithmetic_nospaces": 0.023000000044703484,
            "simple_arithmetic_withspaces": 0.03799999877810478,
            "math_qa": 0.26483404636383057,
            "logi_qa": 0.302611380815506,
            "pubmed_qa_labeled": 0.0020000000949949026,
            "squad": 0.00018921475566457957,
            "agi_eval_lsat_rc": 0.41417911648750305,
            "agi_eval_lsat_lr": 0.49215686321258545,
            "coqa": 0.041463110595941544,
            "bigbench_understanding_fables": 0.3968254029750824,
            "boolq": 0.7440366744995117,
            "agi_eval_sat_en": 0.41747573018074036,
            "winogender_mc_female": 0.6000000238418579,
            "winogender_mc_male": 0.5333333611488342,
            "enterprise_pii_classification": 0.49690720438957214,
            "bbq": 0.5286642990329049,
            "gpqa_main": 0.234375,
            "gpqa_diamond": 0.20202019810676575
        }
    },
    "missing tasks": "[]",
    "aggregated_task_categories_centered": {
        "commonsense reasoning": 0.4476610310366382,
        "language understanding": 0.3192866177506114,
        "reading comprehension": 0.1840396660849237,
        "safety": 0.0794524442065846,
        "symbolic problem solving": 0.08048113436017133,
        "world knowledge": 0.16811283649232683
    },
    "aggregated_centered_results": 0.2040387486062182,
    "aggregated_results": 0.36364031210460074,
    "rw_small": 0.6293683697779974,
    "rw_small_centered": 0.38287944612447283,
    "95%_CI_above": 0.4223075653534165,
    "95%_CI_above_centered": 0.26295791480744857,
    "99%_CI_above": 0.40505846855420435,
    "99%_CI_above_centered": 0.268127179666261,
    "low_variance_datasets": 0.41365625293785174,
    "low_variance_datasets_centered": 0.29010599259858644,
    "model_uuid": "2a671f95-5722-4c6a-89ae-d175f28fe818",
    "_filename": "exp_data/evals/evaluation_jsc_mix_sftv3_20percent_open_lm_7b_swiglutorch_heavy.json",
    "Core": 0.29010599259858644,
    "Extended": 0.2040387486062182
}