#!/usr/bin/env bash
set -uo pipefail    # no -e so failures don't abort the sweep

# GPUs per run (override with: NPROC=4)
NPROC="${NPROC:-4}"

METHODS=(selu-no-cpl selu-no-pair selu-no-cal selu-no-ce)
LORA_PAIRS=("16:32")      # r:alpha
SUBSETS=(forget05)

failed=()

for M in "${METHODS[@]}"; do
  for PAIR in "${LORA_PAIRS[@]}"; do
    IFS=":" read -r R ALPHA <<< "$PAIR"
    for S in "${SUBSETS[@]}"; do
      echo ">>> method=$M  r=$R  alpha=$ALPHA  subset=$S"
      if ! torchrun --nproc_per_node="$NPROC" \
            tofu_unlearn.py \
            --model_id tofu_llama-2-7b \
            --model_variant lora \
            --lora_rank "$R" \
            --lora_alpha "$ALPHA" \
            --train_method "$M" \
            --dataset tofu \
            --dataset_split "$S" \
            "$@"; then
        echo "!!! FAILED: method=$M r=$R alpha=$ALPHA subset=$S"
        failed+=("$M:r$R:a$ALPHA:$S")
        continue
      fi
      echo ">>> done: method=$M r=$R alpha=$ALPHA subset=$S"
    done
  done
done

if ((${#failed[@]})); then
  echo "Sweep finished with failures:"
  for f in "${failed[@]}"; do echo "  - $f"; done
  exit 1
else
  echo "All runs completed successfully."
fi

