name: clip_concrete_task_arithmetic

# batch size per gpu
# if you have multiple gpus, the total batch size will be `batch_size * num_gpus`
batch_size: 16
num_workers: 8

merge_dtype: null
optimizer: adam
lr: 1e-3

scaling_factor: 0.3

max_steps: 2000
save_interval: 500
initial_logits: 0
temperature: 0.5

# "discrete" or "continuous", this is the mask applied for evaluation, not during training
# the performance of final model are expected to be similar
eval_mask_type: continuous

mask_checkpoint: null
# if `clamp_weights` is true, the weights will be clamped to [0, 1]
clamp_weights: false

# arguments of `functional_call`
tie_weights: true
strict: false

# directory to cache zero-shot classication heads
cache_dir: outputs
skip_training: false
