#!/bin/bash

export N_GPUS=4
export BASE_MODEL=Qwen/Qwen2.5-3B-Instruct
export DATA_DIR=/home/ubuntu/TinyZero_NuminaMath-CoT
export MAX_PROMPT_LENGTH=2048
export MAX_RESPONSE_LENGTH=14336
export ROLLOUT_TP_SIZE=4
export PROJECT_NAME=grpo_gold
export EXPERIMENT_NAME=NuminaMath-CoT-qwen2.5-3b-instruct
export VLLM_ATTENTION_BACKEND=XFORMERS

CUDA_VISIBLE_DEVICES=4,5,6,7 bash ./scripts/train_tiny_zero_grpo_math.sh