{
    "circa": {
        "max_acc": 0.8888240443536621,
        "min_acc": 0.08491391887948643,
        "extrapolation_auc": 0.6957127460420438,
        "human_eval": 0.923
    },
    "boolean_expressions": {
        "max_acc": 0.998,
        "min_acc": 0.633,
        "extrapolation_auc": 0.48378542881094205,
        "human_eval": 1.0
    },
    "mnli": {
        "max_acc": 0.8452,
        "min_acc": 0.712,
        "extrapolation_auc": 0.25969552747052965,
        "human_eval": 0.92
    },
    "mrpc": {
        "max_acc": 0.8701449275362318,
        "min_acc": 0.7710144927536232,
        "extrapolation_auc": 0.23371617846576287,
        "human_eval": 0.8701449275362318
    },
    "qqp": {
        "max_acc": 0.8518,
        "min_acc": 0.796,
        "extrapolation_auc": 0.2608561623992306,
        "human_eval": 0.8518
    },
    "reasoning_about_colored_objects": {
        "max_acc": 0.9175,
        "min_acc": 0.07875,
        "extrapolation_auc": 0.4050847493116487,
        "human_eval": 1.0
    },
    "hyperbaton": {
        "max_acc": 0.9996,
        "min_acc": 0.1628,
        "extrapolation_auc": 0.6668830536865006,
        "human_eval": 1.0
    },
    "anli": {
        "max_acc": 0.658,
        "min_acc": 0.405,
        "extrapolation_auc": 0.22954516571494626,
        "human_eval": 0.92
    },
    "fig_qa": {
        "max_acc": 0.9390495867768595,
        "min_acc": 0.7231404958677686,
        "extrapolation_auc": 0.49656486457324955,
        "human_eval": 0.9534
    },
    "sports_understanding": {
        "max_acc": 0.9609544468546638,
        "min_acc": 0.7429501084598699,
        "extrapolation_auc": 0.39164872377970966,
        "human_eval": 1.0
    },
    "banking77": {
        "max_acc": 0.9133116883116883,
        "min_acc": 0.05519480519480519,
        "extrapolation_auc": 0.5080100483971645,
        "human_eval": 0.9133116883116883
    },
    "formal_fallacies_syllogisms_negation": {
        "max_acc": 0.5211267605633803,
        "min_acc": 0.3647887323943662,
        "extrapolation_auc": 0.23513056834556623,
        "human_eval": 0.8
    },
    "tracking_shuffled_objects": {
        "max_acc": 0.21866666666666668,
        "min_acc": 0.0,
        "extrapolation_auc": 0.164,
        "human_eval": 1.0
    },
    "overruling": {
        "max_acc": 0.9833333333333333,
        "min_acc": 0.8083333333333333,
        "extrapolation_auc": 0.6101985955175594,
        "human_eval": 0.9833333333333333
    },
    "toxicchat0124": {
        "max_acc": 0.9668,
        "min_acc": 0.9098,
        "extrapolation_auc": 0.43194267047493484,
        "human_eval": 1.0
    },
    "temporal_sequences": {
        "max_acc": 0.99,
        "min_acc": 0.0,
        "extrapolation_auc": 0.7424999999999999,
        "human_eval": 1.0
    },
    "boolq": {
        "max_acc": 0.8695652173913043,
        "min_acc": 0.8260869565217391,
        "extrapolation_auc": 0.25275935010831146,
        "human_eval": 0.89
    },
    "wic": {
        "max_acc": 0.7955801104972375,
        "min_acc": 0.6040515653775322,
        "extrapolation_auc": 0.2624469881309198,
        "human_eval": 0.8
    },
    "sst2": {
        "max_acc": 0.9402,
        "min_acc": 0.8782,
        "extrapolation_auc": 0.3477714568551887,
        "human_eval": 0.978
    },
    "web_of_lies": {
        "max_acc": 0.9983221476510067,
        "min_acc": 0.5436241610738255,
        "extrapolation_auc": 0.28282760662919587,
        "human_eval": 1.0
    },
    "ade_corpus_v2_classification": {
        "max_acc": 0.9294217687074829,
        "min_acc": 0.4914965986394558,
        "extrapolation_auc": 0.5430125919296623,
        "human_eval": 0.9294217687074829
    },
    "object_counting": {
        "max_acc": 0.9044321329639889,
        "min_acc": 0.4120498614958449,
        "extrapolation_auc": 0.22744487066063185,
        "human_eval": 0.95
    },
    "mnist_ascii": {
        "max_acc": 0.9372,
        "min_acc": 0.0926,
        "extrapolation_auc": 0.3978971930553035,
        "human_eval": 0.98
    },
    "rte": {
        "max_acc": 0.891566265060241,
        "min_acc": 0.678714859437751,
        "extrapolation_auc": 0.4489155099727983,
        "human_eval": 0.936
    },
    "qnli": {
        "max_acc": 0.8972,
        "min_acc": 0.779,
        "extrapolation_auc": 0.470061859617137,
        "human_eval": 0.912
    },
    "quail": {
        "max_acc": 0.7619512195121951,
        "min_acc": 0.5434146341463415,
        "extrapolation_auc": 0.3595595144888182,
        "human_eval": 0.7619512195121951
    },
    "high": {
        "max_acc": 0.7575757575757576,
        "min_acc": 0.23013150371640936,
        "extrapolation_auc": 0.4302275261197421,
        "human_eval": 0.85
    },
    "medmcqa": {
        "max_acc": 0.5398,
        "min_acc": 0.16,
        "extrapolation_auc": 0.3409619573367052,
        "human_eval": 0.9
    },
    "mmlu": {
        "max_acc": 0.5781818181818181,
        "min_acc": 0.29818181818181816,
        "extrapolation_auc": 0.29727312669478945,
        "human_eval": 0.898
    },
    "commonsense_qa": {
        "max_acc": 0.7117948717948718,
        "min_acc": 0.2205128205128205,
        "extrapolation_auc": 0.4344958692622533,
        "human_eval": 0.89
    }
}