wandb: v4
stage_name: stage3
name: twob14-mae
processors:
  - kind: stage3_processor
vars:
  stage_id: rke3pnaf
  max_epochs: 20
  batch_size: 512
  optim:
    kind: adamw
    lr: 4.0e-4
    betas: [ 0.9, 0.95 ]
    weight_decay: TEMPLATE
    schedule:
      template: ${yaml:schedules/wupcos_epoch}
      template.vars.end_epoch: 4

datasets:
  train:
    template: ${yaml:datasets/imagenet/train_byolaug_localviews}
    template.vars.version: imagenet1k
    template.vars.local_resolution: 98
  train_noaug_10p:
    template: ${yaml:datasets/imagenet/train_noaug}
    template.vars.version: imagenet1k_10percent_simclrv2
  test:
    template: ${yaml:datasets/imagenet/test_noaug}
    template.vars.version: imagenet1k

model:
  kind: contrastive_model
  encoder:
#    kind: vit.vit
#    patch_size: 16
#    kwargs: ${select:large:${yaml:models/vit}}
    optim:
      template: ${vars.optim}
      template.weight_decay: 0.05
      template.param_group_modifiers:
        - kind: layerwise_lr_decay_modifier
          decay: 0.65
    freezers:
      - kind: vit_block_freezer
        end_percent: 0.25
    initializers:
      - kind: previous_run_initializer
        stage_id: ${vars.stage_id}
        stage_name: stage2
        model_name: contrastive_model.encoder
        checkpoint: last
        use_checkpoint_kwargs: true
  heads:
    main:
#      kind: ssl.nnclr_head
#      kwargs: ${select:base:${yaml:models/nnclr}}
#      pooling:
#        kind: class_token
      queue_kwargs:
        topk: 20
      optim:
        template: ${vars.optim}
        template.weight_decay: 1.0e-5
      initializers:
        - kind: previous_run_initializer
          stage_id: ${vars.stage_id}
          stage_name: stage2
          model_name: contrastive_model.heads.nnclr_temp035_cls
          checkpoint: last
          use_checkpoint_kwargs: true
    cls22:
#      kind: ssl.nnclr_head
#      kwargs: ${select:base:${yaml:models/nnclr}}
#      pooling:
#        kind: class_token
      queue_kwargs:
        topk: 20
      optim:
        template: ${vars.optim}
        template.weight_decay: 1.0e-5
      initializers:
        - kind: previous_run_initializer
          stage_id: ${vars.stage_id}
          stage_name: stage2
          model_name: contrastive_model.heads.nnclr_temp035_cls22
          checkpoint: last
          use_checkpoint_kwargs: true
    cls21:
#      kind: ssl.nnclr_head
#      kwargs: ${select:base:${yaml:models/nnclr}}
#      pooling:
#        kind: class_token
      queue_kwargs:
        topk: 20
      optim:
        template: ${vars.optim}
        template.weight_decay: 1.0e-5
      initializers:
        - kind: previous_run_initializer
          stage_id: ${vars.stage_id}
          stage_name: stage2
          model_name: contrastive_model.heads.nnclr_temp035_cls21
          checkpoint: last
          use_checkpoint_kwargs: true
    cls20:
#      kind: ssl.nnclr_head
#      kwargs: ${select:base:${yaml:models/nnclr}}
#      pooling:
#        kind: class_token
      queue_kwargs:
        topk: 20
      optim:
        template: ${vars.optim}
        template.weight_decay: 1.0e-5
      initializers:
        - kind: previous_run_initializer
          stage_id: ${vars.stage_id}
          stage_name: stage2
          model_name: contrastive_model.heads.nnclr_temp035_cls20
          checkpoint: last
          use_checkpoint_kwargs: true
    cls19:
#      kind: ssl.nnclr_head
#      kwargs: ${select:base:${yaml:models/nnclr}}
#      pooling:
#        kind: class_token
      queue_kwargs:
        topk: 20
      optim:
        template: ${vars.optim}
        template.weight_decay: 1.0e-5
      initializers:
        - kind: previous_run_initializer
          stage_id: ${vars.stage_id}
          stage_name: stage2
          model_name: contrastive_model.heads.nnclr_temp035_cls19
          checkpoint: last
          use_checkpoint_kwargs: true
    cls18:
#      kind: ssl.nnclr_head
#      kwargs: ${select:base:${yaml:models/nnclr}}
#      pooling:
#        kind: class_token
      queue_kwargs:
        topk: 20
      optim:
        template: ${vars.optim}
        template.weight_decay: 1.0e-5
      initializers:
        - kind: previous_run_initializer
          stage_id: ${vars.stage_id}
          stage_name: stage2
          model_name: contrastive_model.heads.nnclr_temp035_cls18
          checkpoint: last
          use_checkpoint_kwargs: true
    cls17:
#      kind: ssl.nnclr_head
#      kwargs: ${select:base:${yaml:models/nnclr}}
#      pooling:
#        kind: class_token
      queue_kwargs:
        topk: 20
      optim:
        template: ${vars.optim}
        template.weight_decay: 1.0e-5
      initializers:
        - kind: previous_run_initializer
          stage_id: ${vars.stage_id}
          stage_name: stage2
          model_name: contrastive_model.heads.nnclr_temp035_cls17
          checkpoint: last
          use_checkpoint_kwargs: true
    cls16:
#      kind: ssl.nnclr_head
#      kwargs: ${select:base:${yaml:models/nnclr}}
#      pooling:
#        kind: class_token
      queue_kwargs:
        topk: 20
      optim:
        template: ${vars.optim}
        template.weight_decay: 1.0e-5
      initializers:
        - kind: previous_run_initializer
          stage_id: ${vars.stage_id}
          stage_name: stage2
          model_name: contrastive_model.heads.nnclr_temp035_cls16
          checkpoint: last
          use_checkpoint_kwargs: true
trainer:
  kind: contrastive_trainer
  precision: bfloat16
  max_epochs: ${vars.max_epochs}
  effective_batch_size: ${vars.batch_size}
  log_every_n_epochs: 1
  callbacks:
    # store models
    - kind: checkpoint_callback
      every_n_epochs: 5
    - kind: ema_callback
      every_n_epochs: 5
      model_paths:
        - encoder
      target_factors:
        - 0.9999
    # callbacks for resuming
    - kind: checkpoint_callback
      every_n_epochs: 1
      save_weights: false
      save_optim: false
      save_latest_weights: true
      save_latest_optim: true
    - kind: ema_callback
      every_n_epochs: 1
      save_weights: false
      save_latest_weights: true
      model_paths:
        - encoder
      target_factors:
        - 0.9999
    # k-NN 10%
    - kind: offline_knn_metrics_callback
      every_n_epochs: 1
      train_dataset_key: train_noaug_10p
      test_dataset_key: test
      extractors: from_processor