benchmark: gsmhard
budget: null
budget_growth: double
demonstrations_variable_name: demonstrations
initial_test_set_size: 1
max_test_set_size: 1000
num_candidates: 1
num_demonstrations: 0
parallelism: 5
shuffle_test: false
test_set_name: test
timeout: 120
train_set_name: train
experiment_prefix: gpt4o_gsmhard_zero_shot_
variables:
  model:
  - openai/gpt-4o-mini-2024-07-18
  prompt_pattern:
  - cot
  num_demonstrations:
  - 0