#!/bin/bash

export N_GPUS=8
export BASE_MODEL=/home/ubuntu/tinyzero_gold/checkpoints/grpo_gold/NuminaMath-CoT_gold_0-1-qwen2.5-1.5b-instruct/actor/global_step_200
export DATA_DIR=/home/ubuntu/TinyZero_NuminaMath-CoT_gold
export MAX_PROMPT_LENGTH=4096
export MAX_RESPONSE_LENGTH=8192
export ROLLOUT_TP_SIZE=4
export PROJECT_NAME=grpo_gold
export EXPERIMENT_NAME=NuminaMath-CoT_gold_0-2-qwen2.5-1.5b-instruct
export VLLM_ATTENTION_BACKEND=XFORMERS

bash ./scripts/train_tiny_zero_grpo_math_1.sh