backbone: "CLIP-ViT-L/14@336px"
resolution: 336

output_dir: null
print_freq: 10

seed: 0
deterministic: True
gpu: 0
num_workers: 8
prec: "amp"

num_epochs: 10
batch_size: 128
micro_batch_size: 128
lr: 0.01
weight_decay: 5e-4
momentum: 0.9
loss_type: "CC"
classifier: "CosineClassifier"

init_head: "text_feat"
tte: False
expand: 21
