# Run model training
#!/bin/bash

CONFIG_PATH="recipes/ThinkTime/grpo/config_demo.yaml"

# Extract the output_dir from config_demo.yml
output_dir=$(grep '^output_dir:' ${CONFIG_PATH} | awk '{print $2}')

NCCL_DEBUG=WARN TRL_USE_RICH=0 VLLM_WORKER_MULTIPROC_METHOD=spawn ACCELERATE_LOG_LEVEL=warn accelerate launch --config_file recipes/accelerate_configs/zero3_offload.yaml --num_processes=8 src/open_r1/grpo_ts.py --config ${CONFIG_PATH}
