defaults:
  - _self_
  - wandb: private.yaml
  - override hydra/hydra_logging: disabled
  - override hydra/job_logging: disabled

# disable hydra outputs
hydra:
  output_subdir: null
  run:
    dir: .

name: "iter-dist-cifar100-vit" # change here for cifar10
method: "iter_dist"
backbone:
  name: "vit_tiny" # use vit small and remove these image size and patch size kwargs
  kwargs: # Vit
    img_size: 32         # VIT # must match augmentations.crop_size
    patch_size: 4        # VIT # recommended for 32x32 (divides img_size); 8 also works
method_kwargs:
  target_distribution: "laplace" # "gauss" or "laplace" or "product_laplace"
  proj_hidden_dim: 2048
  proj_output_dim: 2048
  sim_loss_weight: 25.0
  var_loss_weight: 25.0
  cov_loss_weight: 1.0
  one_d_dist_loss_weight: 1.0
  one_d_dist_loss_choice: "sliced_wasserstein_distance" # slicecd_wassersitance distance (for laplace), cvm_exact_loss, jarque_bera_loss 
  swd_num_projections: 2048
  projection_sampling_mode: "random"
  add_projector_classifier: true # add this everywehere we want to log projector metrics as a method kwarg
  probe_l1_penalty_weight: 0.0
  projector_type: "mlp3" # "mlp3" or "mlp3_with_one_more_relu"
  mean_shift_scalar_for_rectified_gauss: 0.0
  p_norm_for_rectified_lp_distribution: 1.0 # by default, we use 1-norm for rectified lp distribution, which is equivalent to "rectified_product_laplace". This parameter only matters when target_distribution is "rectified_lp_distribution". Otherwise, it's not interfering with anything.
  mode_of_sigma: "gen_gauss_var_1" # available: [gen_gauss_var_1, rec_gen_gauss_var_1]
  activation_heatmap:
    enabled: False
    num_images: 1
    sample_batch: 50
    sample_dim: 50

mlp_probe:
  enabled: False
  num_layers: 3

data:
  dataset: cifar100 # change here for cifar10
  train_path: "./datasets"
  val_path: "./datasets"
  format: "image_folder"
  num_workers: 4
augmentations:
  - rrc:
      enabled: True
      crop_min_scale: 0.2
      crop_max_scale: 1.0
    color_jitter:
      enabled: True
      brightness: 0.4
      contrast: 0.4
      saturation: 0.2
      hue: 0.1
      prob: 0.8
    grayscale:
      enabled: True
      prob: 0.2
    gaussian_blur:
      enabled: False
      prob: 0.0
    solarization:
      enabled: True
      prob: 0.1
    equalization:
      enabled: False
      prob: 0.0
    horizontal_flip:
      enabled: True
      prob: 0.5
    crop_size: 32
    num_crops: 2
optimizer:
  name: "adamw" # VIT
  batch_size: 256
  lr: 5e-4 # VIT
  classifier_lr: 5e-3 # always 5e-5
  weight_decay: 1e-4
  # kwargs:
  #   clip_lr: True
  #   eta: 0.02
  #   exclude_bias_n_norm: True
scheduler:
  name: "warmup_cosine"
checkpoint:
  enabled: True
  dir: "trained_models"
  frequency: 1
auto_resume:
  enabled: True

# overwrite PL stuff
max_epochs: 1000
devices: [0]
sync_batchnorm: True
accelerator: "gpu"
strategy: "ddp"
precision: 16-mixed