benchmark: fever
budget: null
budget_growth: double
demonstrations_variable_name: demonstrations
initial_test_set_size: 2
max_test_set_size: 32
num_candidates: 4
num_demonstrations: 5
parallelism: 1
shuffle_test: false
test_set_name: test
timeout: 60
train_set_name: train
variables:
  model:
  # - watsonx/meta-llama/llama-3-1-8b-instruct
  # - watsonx/ibm/granite-20b-multilingual
  - watsonx/ibm/granite-34b-code-instruct
  prompt_pattern:
  # - cot
  # - react
  - rewoo

# "model": self.sample_random_index(
#     [
#         # "ibm/granite-13b-instruct-v2",
#         # "ibm/granite-13b-chat-v2",
#         # "meta-llama/llama-3-1-70b-instruct",
#         # "meta-llama/llama-3-405b-instruct",
#         "ibm/granite-20b-multilingual",
#         # "ibm/granite-20b-multilang-lab-rc",
#         # "ibm/granite-7b-lab",
#         # "meta-llama/llama-3-70b-instruct"
#         # str(Models.granite_34b_code_instruct),
#     ],
# ),