#!/bin/bash

export N_GPUS=8
export BASE_MODEL=Qwen/Qwen2.5-1.5B-Instruct
export DATA_DIR=/home/ubuntu/TinyZero_NuminaMath-CoT
export MAX_PROMPT_LENGTH=4096
export MAX_RESPONSE_LENGTH=8192
export ROLLOUT_TP_SIZE=4
export PROJECT_NAME=grpo_gold_branch
export EXPERIMENT_NAME=NuminaMath-CoT_binary_search_gold_branch-qwen2.5-1.5b-instruct_kl-clamp_temp-0_6
export VLLM_ATTENTION_BACKEND=XFORMERS

bash ./scripts/train_tiny_zero_grpo_math_gold_branch.sh