export WANDB_BASE_URL="https://api.wandb.ai"
export WANDB_API_KEY="YOUR_WANDB_API_KEY"
export WANDB_PROJECT="PROJECT_NAME"
export SDL_AUDIODRIVER=dummy


mkdir packages
cd packages
git clone https://github.com/huggingface/trl
cd ..

cp patches/grpo_config.py packages/trl/trl/trainer/grpo_config.py
cp patches/grpo_trainer.py packages/trl/trl/trainer/grpo_trainer.py
cp patches/vllm_client.py packages/trl/trl/extras/vllm_client.py
cp patches/vllm_serve.py packages/trl/trl/scripts/vllm_serve.py

pip install -r requirements.txt
pip install -e "./packages/trl[dev]"

CFG=${CFG:-""}

CUDA_VISIBLE_DEVICES=0,1 xvfb-run -a -s "-screen 0 1024x768x24 -ac +extension GLX +render -noreset" accelerate launch --config_file=configs/config_zero2.yaml grpo.py $CFG