export WANDB_DISABLED=false
export CUDA_VISIBLE_DEVICES=0

set -u

SEEDS=(42 43)
EPOCHS=(8 10)
k_slices=(10)

while true; do
  all_done=true

  for seed in "${SEEDS[@]}"; do
    for epoch in "${EPOCHS[@]}"; do
      for k_slice in "${k_slices[@]}"; do

        output_dir="main_results/seed_${seed}/models/arxiv/qwen/ppo_400_400_${k_slice}_1_${epoch}epoch_lr_1.5e-4_nli"

        if [[ -d "$output_dir" ]]; then
          echo ">>> Exists, skip: $output_dir"
          continue
        fi

        all_done=false
        echo "=== Running: seed=${seed}, epoch=${epoch}, k_slice=${k_slice} ==="

        python train_ppo.py \
          --seed "${seed}" \
          --epoch "${epoch}" \
          --k_slice "${k_slice}" \
          --learning_rate 1.5e-4 \
          --per_device_train_batch_size 8 \
          --dataset "arxiv" \
          --dataset_path "Glow-AI/WaterDrum-Ax" \
          --model_family "qwen" \
          --base_model_path "Qwen/Qwen3-8B" \
          --policy_model_path "main_results/seed_${seed}/models/arxiv/arxiv_original_qwen3_8b_15_1e-4" \
          --ref_model_path "main_results/seed_${seed}/models/arxiv/arxiv_original_qwen3_8b_15_1e-4" \
          --reward_base_model_path "Qwen/Qwen3-0.6B" \
          --reward_model_path "reward_model/arxiv/qwen/classifier" \
          --output_dir "$output_dir" \
          --response_length 200 \
          --class_num 20 \
          --forget_label 19
        break 3
      done
    done
  done

  if [[ "$all_done" == true ]]; then
    echo "=== All output_dir folders exist. Exit. ==="
    break
  fi
done
