export WANDB_BASE_URL="https://api.wandb.ai"
export WANDB_API_KEY="YOUR_API_KEY"
export WANDB_PROJECT="PROJECT_NAME"


mkdir packages
cd packages
git clone https://github.com/huggingface/trl
cd ..


cp patches/grpo_config.py packages/trl/trl/trainer/grpo_config.py
cp patches/grpo_trainer.py packages/trl/trl/trainer/grpo_trainer.py
cp patches/vllm_client.py packages/trl/trl/extras/vllm_client.py
cp patches/vllm_serve.py packages/trl/trl/scripts/vllm_serve.py


pip install -r requirements.txt
pip install -e "./packages/trl[dev]"


CFG=${CFG:-""}

CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch --config_file=configs/accelerate/config_zero2.yaml train_grpo.py $CFG