{
  "results": {
    "bigbench_causal_judgement": {
      "multiple_choice_grade": 0.5631578947368421,
      "multiple_choice_grade_stderr": 0.036078330444807245
    },
    "bigbench_date_understanding": {
      "multiple_choice_grade": 0.5826558265582655,
      "multiple_choice_grade_stderr": 0.025705692903559226
    },
    "bigbench_disambiguation_qa": {
      "multiple_choice_grade": 0.3643410852713178,
      "multiple_choice_grade_stderr": 0.03001923241206336
    },
    "bigbench_dyck_languages": {
      "multiple_choice_grade": 0.123,
      "multiple_choice_grade_stderr": 0.010391293421849874
    },
    "bigbench_formal_fallacies_syllogisms_negation": {
      "multiple_choice_grade": 0.49915492957746477,
      "multiple_choice_grade_stderr": 0.0041960485493055645
    },
    "bigbench_geometric_shapes": {
      "multiple_choice_grade": 0.20334261838440112,
      "multiple_choice_grade_stderr": 0.021272007856536258,
      "exact_str_match": 0.12256267409470752,
      "exact_str_match_stderr": 0.017331879192703025
    },
    "bigbench_hyperbaton": {
      "multiple_choice_grade": 0.4936,
      "multiple_choice_grade_stderr": 0.002235907150490653
    },
    "bigbench_logical_deduction_five_objects": {
      "multiple_choice_grade": 0.24,
      "multiple_choice_grade_stderr": 0.01911886665375975
    },
    "bigbench_logical_deduction_seven_objects": {
      "multiple_choice_grade": 0.1657142857142857,
      "multiple_choice_grade_stderr": 0.014063673984033173
    },
    "bigbench_logical_deduction_three_objects": {
      "multiple_choice_grade": 0.38666666666666666,
      "multiple_choice_grade_stderr": 0.028163138908196852
    },
    "bigbench_movie_recommendation": {
      "multiple_choice_grade": 0.438,
      "multiple_choice_grade_stderr": 0.022210326363977417
    },
    "bigbench_navigate": {
      "multiple_choice_grade": 0.486,
      "multiple_choice_grade_stderr": 0.01581309754773099
    },
    "bigbench_reasoning_about_colored_objects": {
      "multiple_choice_grade": 0.2985,
      "multiple_choice_grade_stderr": 0.010234805842091589
    },
    "bigbench_ruin_names": {
      "multiple_choice_grade": 0.296875,
      "multiple_choice_grade_stderr": 0.021609729061250887
    },
    "bigbench_salient_translation_error_detection": {
      "multiple_choice_grade": 0.17935871743486975,
      "multiple_choice_grade_stderr": 0.012150393578288319
    },
    "bigbench_snarks": {
      "multiple_choice_grade": 0.5303867403314917,
      "multiple_choice_grade_stderr": 0.03719891321680327
    },
    "bigbench_sports_understanding": {
      "multiple_choice_grade": 0.4949290060851927,
      "multiple_choice_grade_stderr": 0.015930505328489487
    },
    "bigbench_temporal_sequences": {
      "multiple_choice_grade": 0.296,
      "multiple_choice_grade_stderr": 0.014442734941575016
    },
    "bigbench_tracking_shuffled_objects_five_objects": {
      "multiple_choice_grade": 0.1944,
      "multiple_choice_grade_stderr": 0.011197643581460408
    },
    "bigbench_tracking_shuffled_objects_seven_objects": {
      "multiple_choice_grade": 0.13428571428571429,
      "multiple_choice_grade_stderr": 0.008152809490408933
    },
    "bigbench_tracking_shuffled_objects_three_objects": {
      "multiple_choice_grade": 0.38666666666666666,
      "multiple_choice_grade_stderr": 0.028163138908196852
    }
  },
  "versions": {
    "bigbench_causal_judgement": 0,
    "bigbench_date_understanding": 0,
    "bigbench_disambiguation_qa": 0,
    "bigbench_dyck_languages": 0,
    "bigbench_formal_fallacies_syllogisms_negation": 0,
    "bigbench_geometric_shapes": 0,
    "bigbench_hyperbaton": 0,
    "bigbench_logical_deduction_five_objects": 0,
    "bigbench_logical_deduction_seven_objects": 0,
    "bigbench_logical_deduction_three_objects": 0,
    "bigbench_movie_recommendation": 0,
    "bigbench_navigate": 0,
    "bigbench_reasoning_about_colored_objects": 0,
    "bigbench_ruin_names": 0,
    "bigbench_salient_translation_error_detection": 0,
    "bigbench_snarks": 0,
    "bigbench_sports_understanding": 0,
    "bigbench_temporal_sequences": 0,
    "bigbench_tracking_shuffled_objects_five_objects": 0,
    "bigbench_tracking_shuffled_objects_seven_objects": 0,
    "bigbench_tracking_shuffled_objects_three_objects": 0
  },
  "config": {
    "model": "hf-causal-experimental",
    "model_args": "pretrained=mosaicml/mpt-7b,trust_remote_code=True",
    "num_fewshot": 3,
    "batch_size": "auto",
    "device": "cuda",
    "no_cache": true,
    "limit": null,
    "bootstrap_iters": 100000,
    "description_dict": {}
  }
}
