seed_index: 0
deterministic: False
batch_size: 64 # 128 for B-32, B-16 and 64 for L-14

trainer:
  max_epochs: ${dataset.ft_epochs}
  log_every_n_steps: 1
  fast_dev_run: False
  accelerator: 'gpu'
  devices: 1
  precision: 32
  max_steps: -1
  num_sanity_val_steps: 3
  gradient_clip_val: 10.0
  val_check_interval: 1.0
  deterministic: ${train.deterministic}
  accumulate_grad_batches: 2 # 1 if B-32, B-16 and 2 if L-14

# Mergeability regularization configuration
regularization:
  # R2: Moderate Update Regularization
  # Penalizes large deviations from pretrained weights
  # All merge methods show negative coefficients for weight_l2_distance
  enable_moderate_update: false  # Enable/disable R2 regularization
  lambda_moderate_update: 0.01  # Weight for R2 regularization term

  # R3: Gradient Magnitude Regularization
  # Encourages moderate gradient magnitudes (not too large)
  # All merge methods show negative coefficients for gradient_l2_distance
  enable_grad_magnitude: true  # Enable/disable R3 regularization
  lambda_grad_magnitude: 0.1  # Weight for R3 regularization term

restore:
  ckpt_or_run_path: null
  mode: finetune # null, finetune, hotstart, continue

monitor:
  metric: 'acc/val'
  mode: 'max'

callbacks:
  - _target_: pytorch_lightning.callbacks.LearningRateMonitor
    logging_interval: "step"
    log_momentum: False

  - _target_: pytorch_lightning.callbacks.progress.tqdm_progress.TQDMProgressBar
    refresh_rate: 20

logging:
  upload:
    run_files: true
    source: true

  logger:
    _target_: lightning.pytorch.loggers.WandbLogger

    project: ${core.project_name}
    entity: null  # Set to your W&B username or use command line override
    log_model: false
    mode: 'online'
    tags: ${core.tags}

  wandb_watch:
    log: null
    log_freq: 100
    log_graph: false
