_target_: fusion_bench.method.adamerging.llama_adamerging.LayerWiseAdaMergingForLlamaSFT
seed: 0
output_dir: null
# path to initialize the merging weights
# this weights can be a list of float, or a string that points to a *.np, *.pt file containing the weights
# if weights is specified, skip the test-time adaptation training
init_weights_path: null
sparsity_ratio: null
# average attention modules instead of learning merging weights
average_attntion: true
# start_layer_idx is a float (in [0,1]) or int or null. If it is null, start at the first layer
start_layer_idx: 0.3
# learning rate
optimizer: adam
lr: 1e-4
init_values: 0.5
# if `clamp_weights` is true, the weights will be clamped to [0, 1]
clamp_weights: false
normalized_merging_weights: true
# arguments of `functional_call`
tie_weights: true
strict: false
max_steps: 1000
fast_dev_run: ${fast_dev_run}
# the path for saving the merging weights
save_interval: 100
save_merged_model: true
dataloader_kwargs:
  batch_size: 24
  num_workers: 0
  shuffle: true
