{
    "sports_understanding": {
        "max_acc": 0.9934924078091106,
        "min_acc": 0.6637744034707158,
        "extrapolation_auc": 0.33238561004698275,
        "human_eval": 1.0
    },
    "commonsense_qa": {
        "max_acc": 0.7671794871794871,
        "min_acc": 0.22666666666666666,
        "extrapolation_auc": 0.47384803013503385,
        "human_eval": 0.89
    },
    "anli": {
        "max_acc": 0.738,
        "min_acc": 0.385,
        "extrapolation_auc": 0.14223289025143546,
        "human_eval": 0.92
    },
    "banking77": {
        "max_acc": 0.8860389610389611,
        "min_acc": 0.32954545454545453,
        "extrapolation_auc": 0.41964364417853245,
        "human_eval": 0.8860389610389611
    },
    "toxicchat0124": {
        "max_acc": 0.9714,
        "min_acc": 0.8764,
        "extrapolation_auc": 0.46456927931065684,
        "human_eval": 1.0
    },
    "quail": {
        "max_acc": 0.7990243902439025,
        "min_acc": 0.29365853658536584,
        "extrapolation_auc": 0.4854499462293463,
        "human_eval": 0.7990243902439025
    },
    "temporal_sequences": {
        "max_acc": 1.0,
        "min_acc": 0.25,
        "extrapolation_auc": 0.75,
        "human_eval": 1.0
    },
    "tracking_shuffled_objects": {
        "max_acc": 0.9493333333333334,
        "min_acc": 0.19733333333333333,
        "extrapolation_auc": 0.19587145882276807,
        "human_eval": 1.0
    },
    "boolq": {
        "max_acc": 0.8759278897136797,
        "min_acc": 0.8525980911983033,
        "extrapolation_auc": 0.14602993704496706,
        "human_eval": 0.89
    },
    "mnli": {
        "max_acc": 0.8538,
        "min_acc": 0.635,
        "extrapolation_auc": 0.3841092910856081,
        "human_eval": 0.92
    },
    "mrpc": {
        "max_acc": 0.8753623188405797,
        "min_acc": 0.7472463768115942,
        "extrapolation_auc": 0.22771501301363245,
        "human_eval": 0.8753623188405797
    },
    "qqp": {
        "max_acc": 0.8496,
        "min_acc": 0.755,
        "extrapolation_auc": 0.382766080989747,
        "human_eval": 0.8496
    },
    "qnli": {
        "max_acc": 0.9222,
        "min_acc": 0.5654,
        "extrapolation_auc": 0.6462857331823739,
        "human_eval": 0.9222
    },
    "medmcqa": {
        "max_acc": 0.747,
        "min_acc": 0.4208,
        "extrapolation_auc": 0.3096702922681246,
        "human_eval": 0.9
    },
    "high": {
        "max_acc": 0.8421955403087479,
        "min_acc": 0.5045740423098913,
        "extrapolation_auc": 0.45760051250247263,
        "human_eval": 0.85
    },
    "ade_corpus_v2_classification": {
        "max_acc": 0.9477040816326531,
        "min_acc": 0.47278911564625853,
        "extrapolation_auc": 0.6359832472717216,
        "human_eval": 0.9477040816326531
    },
    "overruling": {
        "max_acc": 0.9708333333333333,
        "min_acc": 0.8208333333333333,
        "extrapolation_auc": 0.4726776467368354,
        "human_eval": 0.9708333333333333
    },
    "reasoning_about_colored_objects": {
        "max_acc": 0.94125,
        "min_acc": 0.3875,
        "extrapolation_auc": 0.28073454739626463,
        "human_eval": 1.0
    },
    "mmlu": {
        "max_acc": 0.6545454545454545,
        "min_acc": 0.2545454545454545,
        "extrapolation_auc": 0.36051652946916396,
        "human_eval": 0.898
    },
    "sst2": {
        "max_acc": 0.9324,
        "min_acc": 0.8908,
        "extrapolation_auc": 0.2066002432853412,
        "human_eval": 0.978
    },
    "object_counting": {
        "max_acc": 0.9238227146814404,
        "min_acc": 0.24722991689750692,
        "extrapolation_auc": 0.4953692719919115,
        "human_eval": 0.95
    },
    "mnist_ascii": {
        "max_acc": 0.9282,
        "min_acc": 0.0948,
        "extrapolation_auc": 0.34848156471494307,
        "human_eval": 0.98
    },
    "boolean_expressions": {
        "max_acc": 0.99,
        "min_acc": 0.713,
        "extrapolation_auc": 0.33449188313788225,
        "human_eval": 1.0
    },
    "wic": {
        "max_acc": 0.8084714548802947,
        "min_acc": 0.6169429097605893,
        "extrapolation_auc": 0.16817992813357657,
        "human_eval": 0.8084714548802947
    },
    "rte": {
        "max_acc": 0.8795180722891566,
        "min_acc": 0.26104417670682734,
        "extrapolation_auc": 0.4827788781588751,
        "human_eval": 0.936
    },
    "circa": {
        "max_acc": 0.910709074992705,
        "min_acc": 0.10854975196965276,
        "extrapolation_auc": 0.6433464356769344,
        "human_eval": 0.923
    },
    "fig_qa": {
        "max_acc": 0.9411157024793388,
        "min_acc": 0.5547520661157025,
        "extrapolation_auc": 0.39968233108651213,
        "human_eval": 0.9534
    },
    "hyperbaton": {
        "max_acc": 0.9982,
        "min_acc": 0.602,
        "extrapolation_auc": 0.5693484240531477,
        "human_eval": 1.0
    },
    "web_of_lies": {
        "max_acc": 1.0,
        "min_acc": 0.5151006711409396,
        "extrapolation_auc": 0.27888614249831767,
        "human_eval": 1.0
    },
    "formal_fallacies_syllogisms_negation": {
        "max_acc": 0.9732394366197183,
        "min_acc": 0.4802816901408451,
        "extrapolation_auc": 0.183502823317196,
        "human_eval": 0.9732394366197183
    }
}