_target_: mass.pl_module.we_moe.WeightEnsemblingMoEAlgorithm

merger:
  _target_: mass.merger.arithmetic_merger.TaskArithmeticMerger
  optimal_alpha: 
    8: 0.30
    14: 0.20
    20: 0.10

dataset_names: ${eval_datasets}

zeroshot_model: ??  
finetuned_models: ??

save_checkpoint_path: ${misc.ckpt_path}/WeMoE.pt

checkpoint: false
save_checkpoint: false
router_hidden_layers: 2

init_lambda: 0.3
max_steps: 1000
batch_size: 16
batch_reduce: true
use_grad_accumulate: true
model_path: null

encoder_name: ${nn.encoder.model_name}
ckpt_path: ${misc.ckpt_path}
openclip_cachedir: ${misc.openclip_cachedir}


optimizer:
  _target_: torch.optim.AdamW
  lr: 1e-4
  _partial_: true  # This prevents immediate instantiation



