#!/bin/bash

echo "Make sure that the OUTPUT_DIR exists before running this script."

export SMALL_POLICY_MODEL=recipes/Qwen-2.5-Math-1.5B-Instruct/beam_search.yaml
export LARGE_POLICY_MODEL=recipes/Qwen-2.5-Math-7B-Instruct/beam_search.yaml
export SMALL_PRM=Skywork/Skywork-o1-Open-PRM-Qwen-2.5-1.5B
export LARGE_PRM=Skywork/Skywork-o1-Open-PRM-Qwen-2.5-7B
export EXPERIMENT_DIR=./output/generator_verifier_tradeoff/

if [ ! -d "$EXPERIMENT_DIR" ]; then
    echo "Creating EXPERIMENT_DIR: $EXPERIMENT_DIR"
    mkdir -p "$EXPERIMENT_DIR"
else
    echo "Directory $EXPERIMENT_DIR already exists."
fi

export SEED=0
max_iterations=12
system_prompt="Please reason step by step, and put your final answer within \\boxed{}."

echo "Running test_time_compute.py with different policy models and PRM models"

n=64
g=1
B2=2

echo "Running test_time_compute.py with n=$n, g=$g, B2=$B2"

small_policy_output_dir="$EXPERIMENT_DIR/Qwen2.5-1_5B-Skywork-o1-7B-g${g}-n${n}-B2${B2}"
small_policy_log_file="$small_policy_output_dir/log.log"
large_policy_output_dir="$EXPERIMENT_DIR/Qwen2.5-7B-Skywork-o1-1_5B-g${g}-n${n}-B2${B2}"
large_policy_log_file="$large_policy_output_dir/log.log"

if [ ! -d "$small_policy_output_dir" ]; then
    echo "Creating small policy output directory: $small_policy_output_dir"
    mkdir -p "$small_policy_output_dir"
else
    echo "Directory $small_policy_output_dir already exists."
fi
if [ ! -d "$large_policy_output_dir" ]; then
    echo "Creating large policy output directory: $large_policy_output_dir"
    mkdir -p "$large_policy_output_dir"
else
    echo "Directory $large_policy_output_dir already exists."
fi

python scripts/test_time_compute.py $SMALL_POLICY_MODEL \
    --n=$n \
    --num_samples=500 \
    --prm_path=$LARGE_PRM \
    --output_dir=$small_policy_output_dir \
    --system_prompt="$system_prompt" \
    --seed=$SEED \
    --lookahead=0 \
    --num_iterations=$max_iterations \
    --g=$g > "$small_policy_log_file" 2>&1

CUDA_VISIBLE_DEVICES=1 NCCL_P2P_DISABLE=1 python scripts/test_time_compute.py $LARGE_POLICY_MODEL \
    --n=$n \
    --num_samples=500 \
    --prm_path=$SMALL_PRM \
    --output_dir=$large_policy_output_dir \
    --system_prompt="$system_prompt" \
    --seed=$SEED \
    --lookahead=0 \
    --num_iterations=$max_iterations \
    --g=$g > "$large_policy_log_file" 2>&1

echo "Test Time Compute completed. Check log files in $EXPERIMENT_DIR"
