model:
  arch: dinov2_feature_extractor
  model_type: vitg14

datasets:
  flickr30k:
    vis_processor:
      eval:
        name: "dinov2_image_eval"

run:
  task: feature_extraction

  # dataloading
  num_workers: 8
  batch_size_train: 128
  batch_size_eval: 128

  evaluate: True
  test_splits: ["test"]
  
  # misc
  seed: 42
  output_dir: "output/feature_extraction" # not used
 
  # distribution
  device: "cuda"
  world_size: 1
  distributed: False
