export N_GPUS=8
export BASE_MODEL='path/to/pruned_llama3_r1_sft'
export DATA_DIR='path/to/simplelr_abel_gsm8k_level1'
export ROLLOUT_TP_SIZE=2
export EXPERIMENT_NAME=llama3_r1_prune_rlvr
export VLLM_ATTENTION_BACKEND=XFORMERS

bash ./scripts/train_grpo.sh