wandb: v4
stage_name: probe-dtd
name: ibot-large16
vars:
  epochs: 50
  pooling: class_token

processors:
  - kind: probe_processor
    grid: dinov2
    probe_kind: linear_probe
    poolings:
      - ${vars.pooling}
      - concat_class_average

datasets:
  train:
    kind: dtd
    split: train
    sample_wrappers:
      - kind: x_transform_wrapper
        transform:
          - kind: kd_random_resized_crop
            size: 224
            scale:
              - 0.08
              - 1.0
            interpolation: bicubic
          - kind: kd_random_horizontal_flip
          - kind: kd_imagenet_norm
  test:
    kind: dtd
    split: test
    sample_wrappers:
      - kind: x_transform_wrapper
        transform:
          - kind: kd_resize
            size: 256
            interpolation: bicubic
          - kind: center_crop
            size: 224
          - kind: kd_imagenet_norm

model:
  kind: probe_model
  encoder:
#    kind: vit.vit
#    patch_size: 16
#    kwargs: ${select:large:${yaml:models/vit}}
    initializers:
      - kind: pretrained_initializer
        weights_file: ibot_large16_rand.pth
        use_checkpoint_kwargs: true
  heads: from_processor

trainer:
  kind: classification_trainer
  precision: bfloat16
  effective_batch_size: 256
  max_epochs: ${vars.epochs}
  log_every_n_epochs: 1
  callbacks:
    # accuracy
    - kind: offline_accuracy_callback
      every_n_epochs: 1
      topk: [ 1 ]
      dataset_key: test
      to_cpu: true