# refer to src/sal/config.py for more options
filter_duplicates: true
approach: best_of_n
n: 128 #64 #
prm_batch_size: 1
search_batch_size: 1
dataset_start: 0
dataset_end: 100 #30
dataset_name: HuggingFaceH4/MATH-500
dataset_split: test
seed: 42
model_path: Meta-Llama/Llama-3.2-3B-Instruct
prm_path: Qwen/Qwen2.5-Math-PRM-7B
prm_type: discriminative
output_dir: /cmlscratch/agrawal5/Reward_hacking/scripts/ThinkPRM/output/results/MATH-100/qwen_prm/best_of_n_qwen_prm_100_OOD/
temperature: .8
prm_temperature: 0.0
prm_type: discriminative
system_prompt: "Solve the following math problem efficiently and clearly:\n\n- For simple problems (2 steps or fewer):\nProvide a concise solution with minimal explanation.\n\n- For complex problems (3 steps or more):\nUse this step-by-step format:\n\n## Step 1: [Concise description]\n[Brief explanation and calculations]\n\n## Step 2: [Concise description]\n[Brief explanation and calculations]\n\n...\n\nRegardless of the approach, always conclude with:\n\nTherefore, the final answer is: $\\boxed{answer}$. I hope it is correct.\n\nWhere [answer] is just the final number or expression that solves the problem."
cached_solutions_path: /cmlscratch/agrawal5/Reward_hacking/scripts/ThinkPRM/output/results/MATH-100/qwen_prm/best_of_n_qwen_prm_sameques_100_CL4_OOD/best_of_n_completions.jsonl #best_of_n_qwen_prm_CL2/best_of_n_completions.jsonl    #
prm_device: cuda:1
# is_lora: True
# lora_checkpoint: /cmlscratch/agrawal5/Reward_hacking/scripts/curriculum_learning/Qwen2.5-Math-PRM-7B-pref_0.0_to_0.1_CL3_sameques_aug/checkpoint-21500 #Qwen2.5-Math-PRM-7B-pref_samestep_aug/checkpoint-5849