# refer to src/sal/config.py for more options
filter_duplicates: true
approach: beam_search
n: 64
prm_batch_size: 1
search_batch_size: 1
dataset_start: 0
dataset_end: 100
dataset_name: HuggingFaceH4/MATH-500
dataset_split: test
seed: 42
model_path: Meta-Llama/Llama-3.2-3B-Instruct #meta-llama/Llama-3.1-8B #
prm_path: Qwen/Qwen2.5-Math-PRM-7B
prm_type: discriminative
output_dir: /cmlscratch/agrawal5/Reward_hacking/scripts/ThinkPRM/output/results/MATH-100/qwen_prm/beam_search_qwen_prm_100_OOD #beam_search_qwen_prm_AIME_sameques_100_CL4_OOD/ #beam_search_qwen_prm_AIME_100_OOD/
temperature: .8
prm_temperature: 0.0
prm_type: discriminative
system_prompt: "Solve the following math problem efficiently and clearly:\n\n- For simple problems (2 steps or fewer):\nProvide a concise solution with minimal explanation.\n\n- For complex problems (3 steps or more):\nUse this step-by-step format:\n\n## Step 1: [Concise description]\n[Brief explanation and calculations]\n\n## Step 2: [Concise description]\n[Brief explanation and calculations]\n\n...\n\nRegardless of the approach, always conclude with:\n\nTherefore, the final answer is: $\\boxed{answer}$. I hope it is correct.\n\nWhere [answer] is just the final number or expression that solves the problem."
max_tokens: 4096
num_iterations: 20
beam_width: 4
prm_device: cuda:1
# is_lora: True
# lora_checkpoint: /cmlscratch/agrawal5/Reward_hacking/scripts/curriculum_learning/Qwen2.5-Math-PRM-7B-pref_0.0_to_0.1_CL3_sameques_aug/checkpoint-21500 #Qwen2.5-Math-PRM-7B-pref_samestep_aug/checkpoint-5849