export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export N_GPUS=8
export ROLLOUT_TP_SIZE=1
export VLLM_ATTENTION_BACKEND=XFORMERS


export DATA_DIR="./dataset/contextif"
export BASE_MODEL=""
export EXPERIMENT_NAME="contextif-grpo"

echo "Starting ContextIF training with experiment name: $EXPERIMENT_NAME"
echo "Using data from: $DATA_DIR"
bash ./examples/grpo_trainer/run_grpo.sh
