task: openai_math
train_ratio: # determined by dataset

seed: 42

add_special_tokens: True

# MODEL ckpt
load_critic_model: microsoft/deberta-v3-large

# # Search Method
# search_method: beam # can choose from beam, next
search_method: greedy # can choose from beam, next
mode: bbox # can choose from bbox, new
# SEARCH PARAMS
beam_size: 3
num_candidates: 10
# max search depth
max_length: 15
early_stopping: True
only_eval_answers: False

# QUERY_PARAMS
# temperature: 0.7
# top_p: 0.9
temperature: 0.0
top_p: 1.0
frequency_penalty: 0 
presence_penalty: 0 
stop: ["\n", "\n\n", "\n\n\n"]
max_tokens: 512
max_tokens_cot: 25600

qa_template: "Q: <Q>\nA: Let's think step by step and output the final answer within \\boxed{}. <A>"

# WANDB
log_with_wandb: False
wandb_project: SearchLLM
wandb_group: Greedy
wandb_run_name: OpenAI_Math

# EVALUATION
eval_blackbox: True
eval_unfinetuned: False
num_eval_rounds: 1

# [Optional] Used for unembedded vllm
port: 8000
whitebox: Qwen2.5-Math-7B-Instruct
