_target_: fusion_bench.method.concrete_subspace.clip_concrete_tsvm.ConcreteTSVMForOpenCLIP
# === Concrete Subspace parameters ===
# batch size per gpu
# if you have multiple gpus, the total batch size will be `batch_size * num_gpus`
dataloader_kwargs:
  batch_size: 16
  num_workers: 8
optimizer:
  _target_: torch.optim.AdamW
  lr: 1e-3
  weight_decay: 0.01
  fused: null
lr_scheduler: null
merge_dtype: null
max_steps: 2000
save_interval: 500
initial_logits: 0
temperature: 0.5
# "discrete" or "continuous", this is the mask applied for evaluation, not during training
# the performance of final model are expected to be similar
eval_mask_type: continuous
mask_checkpoint: null
# if `clamp_weights` is true, the weights will be clamped to [0, 1]
clamp_weights: false
# arguments of `functional_call`
tie_weights: true
strict: false
# directory to cache zero-shot classification heads
cache_dir: outputs
skip_training: false
# === TSVM parameters ===
exclude_keys: null
# alpha (also known as scaling factor) is a float or a list of floats
# example:
# alpha: 1
# alpha: [1, 0.5, 0.25]
alpha: 1
return_single_task_models: false
