model:
  name: "wrn_28_10"
  num_classes: 100

dataset:
  name: "cifar100"
  data_root: "/home/voz/shared/database/vision"
  batch_size: 256
  num_workers: 8
  has_val: true
  
  split_protocol:
    type: "class_forget"
    forget_classes: [0,1,2,3,4]

deterministic: true

method:
  name: scrub

  # ---------- Optimization ----------
  optimizer: "adam"           # "adam" | "adamw" | "sgd"
  lr: 0.0005
  weight_decay: 0.0005
  momentum: 0.9               # used only for SGD
  lr_decay_after: 2           # decay lr by ×0.1 after epoch 2 (optional)

  # ---------- Alternating training ----------
  # Adjusted for CIFAR-100 + 5 classes
  max_steps: 5                # Increased from 2: More MAX epochs for 5 classes
  min_steps: 4                # Increased from 3: More MIN epochs to preserve 95 retain classes
  final_min_steps: 1          # Added extra MIN epoch for stability with more classes
  alpha: 0.5                 # Weight on KL divergence during MIN step
  gamma: 1.0                  # Weight on CE loss during MIN step
  clip_grad_norm: 1.0         # Clip gradients (set 0 or null to disable)

  # ---------- CRITICAL: Separate batch sizes for forget/retain ----------
  # Adjusted for 5 forget classes vs 95 retain classes
  batch_size_forget: 256      # Reduced from 512: 5 classes = ~2500 samples (5% of data)
  batch_size_retain: 128      # Keep same: 95 classes = ~47500 samples (95% of data)

  # ---------- Rewind variant (SCRUB+R) ----------
  rewind: false               # true → enable rewind selection for privacy (UP application)
  # When true, the method will select the checkpoint whose forget error (train)
  # is closest to the forget error (validation) of the final model.
  # This helps defend against Membership Inference Attacks.