
defaults:
  - _self_
  - router: linear # linear, nn, proj
  - aggregator: tsvm

_target_: mass.pl_module.router_task_classifier.RouterTaskClassifier


x_key: ${conventions.x_key}
y_key: ${conventions.y_key}
dataset_names: ${eval_datasets}
base_merging_method: tsvm # isotropic, tsvm, zeroshot
heads_selection_method: max # avg, max 

image_encoder: ??  
classification_heads: ??
oracle_mode: false
tv_device: cuda  


optimizer:
  _target_: torch.optim.AdamW
  lr: 1e-5
  weight_decay: 0.1
  # _target_: torch.optim.SGD
  # lr: 0.1
  # momentum: 0.9
  # weight_decay: 1e-4

lr_scheduler:
  _target_: torch.optim.lr_scheduler.CosineAnnealingLR
  T_max: 10
