_target_: fusion_bench.method.adamerging.gpt2_layer_wise_adamerging.GPT2LayerWiseAdaMergingAlgorithm
_recursive_: false

optimizer:
  _target_: torch.optim.Adam
  lr: 1e-4

dataloader_kwargs:
  batch_size: 16
  num_workers: 0

init_values: 0.3
max_steps: 1000
# if `merging_weights_path` is specified, the merging weights will be loaded from the file and skip the training process
merging_weights_load_path: null
merging_weights_save_path: null

variant: null
clamp_weights: false
tie_weights: true
strict: false

cache_dir: "outputs/cache"
