{
  "results": {
    "gsm8k": {
      "exact_match,strict-match": 0.3388931008339651,
      "exact_match_stderr,strict-match": 0.013037955768562502,
      "exact_match,flexible-extract": 0.34040940106141016,
      "exact_match_stderr,flexible-extract": 0.0130520971032991,
      "alias": "gsm8k"
    }
  },
  "group_subtasks": {
    "gsm8k": []
  },
  "configs": {
    "gsm8k": {
      "task": "gsm8k",
      "group": [
        "math_word_problems"
      ],
      "dataset_path": "gsm8k",
      "dataset_name": "main",
      "training_split": "train",
      "test_split": "test",
      "fewshot_split": "train",
      "doc_to_text": "Question: {{question}}\nAnswer:",
      "doc_to_target": "{{answer}}",
      "description": "",
      "target_delimiter": " ",
      "fewshot_delimiter": "\n\n",
      "num_fewshot": 5,
      "metric_list": [
        {
          "metric": "exact_match",
          "aggregation": "mean",
          "higher_is_better": true,
          "ignore_case": true,
          "ignore_punctuation": false,
          "regexes_to_ignore": [
            ",",
            "\\$",
            "(?s).*#### ",
            "\\.$"
          ]
        }
      ],
      "output_type": "generate_until",
      "generation_kwargs": {
        "until": [
          "Question:",
          "</s>",
          "<|im_end|>"
        ],
        "do_sample": false,
        "temperature": 0.0
      },
      "repeats": 1,
      "filter_list": [
        {
          "name": "strict-match",
          "filter": [
            {
              "function": "regex",
              "regex_pattern": "#### (\\-?[0-9\\.\\,]+)"
            },
            {
              "function": "take_first"
            }
          ]
        },
        {
          "name": "flexible-extract",
          "filter": [
            {
              "function": "regex",
              "group_select": -1,
              "regex_pattern": "(-?[$0-9.,]{2,})|(-?[0-9]+)"
            },
            {
              "function": "take_first"
            }
          ]
        }
      ],
      "should_decontaminate": false,
      "metadata": {
        "version": 3.0
      }
    }
  },
  "versions": {
    "gsm8k": 3.0
  },
  "n-shot": {
    "gsm8k": 5
  },
  "config": {
    "model": "hf",
    "model_args": "pretrained=/fast/redacted/Qwen1.5-1.8B/snapshots/model/,trust_remote_code=True",
    "batch_size": "1",
    "batch_sizes": [],
    "device": null,
    "use_cache": null,
    "limit": null,
    "bootstrap_iters": 100000,
    "gen_kwargs": null
  },
  "git_hash": "ea4fe0c"
}