#model_dir
model_name: "meta-llama/Llama-3.1-8B"  #meta-llama/Llama-3.1-8B  Qwen/Qwen2.5-7B
hypernetwork_ckpt: /home/liuxb/code/Multi-EMoE/{}_{}_hypernetwork_{}.pth
retrieval_model_ckpt: "intfloat/e5-base-v2"  # intfloat/e5-base-v2, facebook/contriever-msmarco
decompose_model_ckpt: /home/liuxb/code/Multi-EMoE/Decomposer/falcon3-1b-HotPot

#dataset
data_name: HotPot  # 'HotPot', 'WikiMhQA', 'musique'
layer_scanning_datasets: /home/liuxb/code/Multi-EMoE/datasets/causal_tracing.json
train_dataset: /home/liuxb/code/Multi-EMoE/datasets/{}_train.json
test_dataset: /home/liuxb/code/Multi-EMoE/datasets/{}_test.json

#model
half: False
train_batch_size: 1
valid_batch_size: 1

#train
learning_rate: 1e-4
learning_rate_min: 1e-6
single_layer: 9 #Inserting an expert layer

#parameter
embed_feature: 4096  # 3584 if Qwen model, 4096 for llama
in_feature: 4096
hid_feature: 1024
num_kv: 1