export CUDA_VISIBLE_DEVICES=0

SEED=42


for LR in 0.0001 0.0003 0.0005 
do
python lib/train_ppo.py \
    --seed=$SEED \
    --ppo_config_path='config/tofu/ppo.yaml' \
    --kl_coef=0.1 \
    --lr=$LR \
    --num_ppo_epochs=10 \
    --output_dir='ppo_outputs/ppo3' 
done