MODEL:
  META_ARCHITECTURE: SSLMetaArch
  DEVICE: cuda
  WEIGHTS: ''
  DTYPE: float32
compute_precision:
  param_dtype: bf16
  reduce_dtype: fp32
  sharding_strategy: SHARD_GRAD_OP
dino:
  loss_weight: 1.0
  global_ignore_diagonal: true
  head_n_prototypes: 262144
  head_bottleneck_dim: 512
  head_norm_last_layer: false
  head_nlayers: 3
  head_hidden_dim: 8192
  koleo_loss_weight: 0.1
  koleo_loss_distributed: false
  koleo_topk: 1
  koleo_distributed_replicas: 0
  koleo_distributed_loss_group_size: null
  force_weight_norm: false
ibot:
  loss_weight: 1.0
  mask_sample_probability: 0.5
  mask_ratio_min_max:
  - 0.1
  - 0.5
  mask_random_circular_shift: false
  force_masking_even_with_zero_weight: false
  separate_head: true
  head_n_prototypes: 98304
  head_bottleneck_dim: 384
  head_norm_last_layer: false
  head_nlayers: 3
  head_hidden_dim: 4096
gram:
  use_loss: false
  compute_stats: false
train:
  batch_size_per_gpu: 16
  dataset_path: null
  saveckp_freq: 20
  seed: 0
  num_workers: 10
  OFFICIAL_EPOCH_LENGTH: 1000
  monitor_gradient_norm: false
  chunk_schedule: []
  cache_dataset: true
  use_teacher_head: true
  learn_from_teacher_tokens: false
  centering: sinkhorn_knopp
  checkpointing: true
  checkpointing_full: false
  compile: true
  cudagraphs: false
  cell_augmentation: false
  cell_augmentation_type: hpa
  sharded_eval_checkpoint: true
student:
  arch: vit_7b
  patch_size: 16
  drop_path_rate: 0.4
  layerscale: 1.0e-05
  patch_drop: 0.0
  pretrained_weights: ''
  ffn_layer: swiglu64
  ffn_ratio: 3
  resume_from_teacher_chkpt: ''
  qkv_bias: false
  proj_bias: true
  ffn_bias: true
  norm_layer: layernormbf16
  n_storage_tokens: 4
  untie_cls_and_patch_norms: false
  untie_global_and_local_cls_norm: true
  mask_k_bias: true
  in_chans: 3
  pos_embed_type: rope
  pos_embed_rope_base: 100
  pos_embed_rope_min_period: null
  pos_embed_rope_max_period: null
  pos_embed_rope_normalize_coords: separate
  pos_embed_rope_shift_coords: null
  pos_embed_rope_jitter_coords: null
  pos_embed_rope_rescale_coords: 2
  pos_embed_rope_dtype: fp32
  fp8_enabled: true
  fp8_filter: blocks
teacher:
  momentum_teacher: null
  final_momentum_teacher: null
  warmup_teacher_temp: null
  teacher_temp: null
  warmup_teacher_temp_epochs: null
  in_chans: 3
distillation:
  enabled: false
  full_cfg_path: ''
  checkpoint_path: ''
multidistillation:
  enabled: false
hrft:
  enabled: false
  checkpoint_path: ''
optim:
  epochs: 1000
  optimizer: adamw
  weight_decay: null
  weight_decay_end: null
  lr: null
  warmup_epochs: null
  min_lr: null
  schedule_trunc_extra: null
  clip_grad: 30.0
  freeze_last_layer_epochs: null
  scaling_rule: sqrt_wrt_1024
  patch_embed_lr_mult: 0.2
  dino_head_wd_multiplier: 1.0
  layerwise_decay: 0.98
  multi_tensor_optim: true
  dump_fsdp_weights_path: ''
  adamw_beta1: 0.9
  adamw_beta2: 0.99
crops:
  global_crops_scale:
  - 0.32
  - 1.0
  local_crops_number: 8
  local_crops_scale:
  - 0.05
  - 0.32
  global_crops_size: 256
  local_crops_size: 112
  localcrops_subset_of_globalcrops: false
  share_color_jitter: false
  horizontal_flips: false
  rgb_mean:
  - 0.485
  - 0.456
  - 0.406
  rgb_std:
  - 0.229
  - 0.224
  - 0.225
checkpointing:
  period: 1000
  max_to_keep: 3
  keep_every: 50000
schedules:
  lr:
    start: 0
    peak: 5.0e-05
    end: 5.0e-05
    warmup_epochs: 100
    freeze_last_layer_epochs: 5
  weight_decay:
    start: 0.04
    peak: 0.04
    end: 0.04
    warmup_epochs: 0
  teacher_temp:
    start: 0.04
    peak: 0.07
    end: 0.07
    warmup_epochs: 100
  momentum:
    start: 0.994
    peak: 0.994
    end: 0.994
    warmup_epochs: 0
