decompose: "encoder"
eval_batch_size: 256

exclude_param_names_regex:
- classifier
merge_backbone: roberta-base
merge_coeff: 1.6
merge_method: cart
merge_rank_ratio: 0.04

tasks:
- cola
- sst2
- mrpc
- qqp
- mnli
- qnli
- rte

initial_rank_ratio: 0.04
prior: 1.6
mask_temp: 10
device: "cuda"
lr: 0.0001
tta_batch_size: 8
tta_eval_interval: 500
tta_steps: 5000
skip_first_eval: false
do_adaptive_merging: true