name: ??? # this can be clip_weight_ensembling_moe
# the path for loading the model weights, if specified, skip the test-time adaptation training
checkpoint: False
# the path for saving the model weights.
save_checkpoint: False
router_hidden_layers: 2
init_lambda: 0.3
batch_reduce: true
# learning rate
lr: 1e-4
optimizer: adam
# this is overrided by `fabric.devices` if launched from the `fusion_bench` CLI.
devices: 1
batch_size: 16
num_workers: 16
max_steps: 1000
# if true, we will use the gradient accumulation across tasks to save memory
use_grad_accumulate: true
cache_dir: outputs
fast_dev_run: ${fast_dev_run}
