version: 2

train_file: [

  # "/home/storage/classwise_collection/coco/{00000..00100}.tar" # correct this
  # "/notebooks/data/cc3m/val/{00000..00001}.tar"
  '/notebooks/data/flickr30k/flickr30k-train-pairs.json'
  # '/notebooks/data/coco_wds/mscoco/{00000..00059}.tar'


]

# val_file: '/notebooks/data/flickr30k/flickr30-test-pairs.json' 
# test_file: '/notebooks/data/flickr30k/flickr30-test-pairs.json'
# image_root: '/notebooks/data/flickr30k/flickr30k_images/flickr30k_images/'  


val_file: '/notebooks/data/coco/coco_val.json'                
test_file: '/notebooks/data/coco/coco_val.json'
image_root: '/notebooks/data/coco/val2017/'  
save_sims: './storage/10/albef-sims'

image_res: 256 #384
embed_dim: 256
temp: 0.07
batch_size_train: 32
batch_size_test: 64
k_test: 128

alpha: 0.4

optimizer: {opt: adamW, lr: 1e-5, weight_decay: 0.02} 
schedular: {sched: cosine, lr: 1e-5, epochs: 10, min_lr: 1e-6, decay_rate: 1, warmup_lr: 1e-5, warmup_epochs: 1, cooldown_epochs: 0}

text_encoder: base
vision_encoder: base
adapter_append: false

model_config:
  import_path: models.clip_adjustable.CLIP

# This doesn't matter for evaluation, but we have to
# include it anyway because my code is spaghetti rn.
pretrained_text: true
pretrained_vision: true
freeze_text_encoder: false
freeze_vision_encoder: false
unlock_layernorm: false
limit_num_samples: false
add_adapter: false
freeze_proj: false
unlock_dense: false
unlock_attn: false
unlock_random: false
bitfit: false
always_freeze:
  visual_encoder: []
  text_encoder: []

# This should match how the model was trained, or results
# will not make sense.

conventional_adapter:
  insert: true
  reduction_factor: 4

#add_cross_adapter: true