seed: 42

name: "shannon_hypersphere_k1_{data_dataset_name}_{backbone_name}_{backbone_use_batch_norm}"
artifacts_root: "artifacts/stl10/shannon_hypersphere_k1"
wandb_group: "stl10-vit_tiny_patch16_224-comparison"

backbone:
  name: "vit_tiny_patch16_224" # convnextv2_pico
  pretrained_weights: null # "imagenet" / null 
  use_batch_norm: False
  kwargs: {}
data:
  dataset_name: "stl10"
  num_workers: 14


trainer:
  max_epochs: 201
  devices: [0]
  accelerator: "gpu"
  precision: bf16-mixed
  # sync_batchnorm: True
  accumulate_grad_batches: 1 # 4
  overfit_batches: 0.0 # 1

optimizer:
  type: "adamw"
  batch_size: 128
  lr: 1e-3
  weight_decay: 1e-6

wandb: true # set to false to disable wandb logging

method_kwargs:
  global_crops: 2
  local_crops: 6
  expand: 1.0
  collapse: 1.0
  sigma: 1.0
  projector_dim: 1024

# optimizer:
#   name: "lars"
#   classifier_lr: 0.1
#   weight_decay: 1e-6
#   kwargs:
#     clip_lr: True
#     eta: 0.02
#     exclude_bias_n_norm: True

# method_kwargs:
#   proj_hidden_dim: 4096
#   proj_output_dim: 256
#   pred_hidden_dim: 8192
# momentum:
#   base_tau: 0.99
#   final_tau: 1.0
# scheduler:
#   name: "warmup_cosine"
# checkpoint:
#   enabled: True
#   dir: "artifacts/pretrain"
#   frequency: 1

# disable hydra outputs
hydra:
  output_subdir: null
  run:
    dir: .