seed: 42

name: "DebuggingCommonTraining" #"vicregl-ade-convnextv2_tiny_adam_lr0.1_no-grad-accumulation"
artifacts_root: "artifacts/pretrain_lightly/vicregl"

backbone:
  name: "convnextv2_tiny" #"convnextv2_tiny"
  pretrained_weights: null # "imagenet" / null 
  kwargs:
    global_pool: "" # important for ViCRegL
data:
  dataset_name: "stl10"
  num_workers: 16
  input_size: 224

trainer:
  max_epochs: 801
  devices: [0]
  accelerator: "gpu"
  precision: bf16-mixed
  # sync_batchnorm: True
  accumulate_grad_batches: 1 # 4
  overfit_batches: 0.0 # 1

optimizer:
  batch_size: 64
  lr: 0.001
  weight_decay: 1e-6

wandb: true # set to false to disable wandb logging

# optimizer:
#   name: "lars"
#   classifier_lr: 0.1
#   weight_decay: 1e-6
#   kwargs:
#     clip_lr: True
#     eta: 0.02
#     exclude_bias_n_norm: True

# method_kwargs:
#   proj_hidden_dim: 4096
#   proj_output_dim: 256
#   pred_hidden_dim: 8192
# momentum:
#   base_tau: 0.99
#   final_tau: 1.0
# scheduler:
#   name: "warmup_cosine"
# checkpoint:
#   enabled: True
#   dir: "artifacts/pretrain"
#   frequency: 1

# disable hydra outputs
hydra:
  output_subdir: null
  run:
    dir: .