task: aime24 # gpqa, openai_math, s1_eval
train_ratio: # determined by dataset

seed: 42

add_special_tokens: True

# MODEL ckpt
load_critic_model: answerdotai/ModernBERT-large

# Search Method
search_method: beam # can choose from beam, next
mode: linear # can choose from bbox, new

remove_root: True
normalize: True
alpha: 1
sigma: 1
c: 0

# SEARCH PARAMS
beam_size: 2
num_candidates: 5
# max search depth
max_length: 50 #120
early_stopping: True
only_eval_answers: False

# QUERY_PARAMS
temperature: 0.7
top_p: 0.9
frequency_penalty: 0 
presence_penalty: 0 
stop: ["\n", "\n\n", "\n\n\n"]

max_tokens: 2560 # 512
max_tokens_cot: 25600
ensure_length: 2048 #256

qa_template: "Q: <Q>\nA: You MUST conclude the final answer after the phrase 'The final answer is'. <A>"

# WANDB
log_with_wandb: False
wandb_project: SearchLLM
wandb_group: Greedy
wandb_run_name: AIME

# EVALUATION
eval_blackbox: False # True
eval_unfinetuned: True
num_eval_rounds: 1

# [Optional] Used for unembedded vllm
port: 8000
proposal: Qwen2.5-Math-7B-Instruct
