#LEARNABLE
CUDA_VISIBLE_DEVICES=0 python src/lgmodeling/att_matching.py \
    --seed 0 --tgt_len 256 --mem_len 256 --eval_tgt_len 256 \
    --model-a /root/weights/wt103/gpt2-finetune/finetune-learnable-indice0-heads3-shared1-routed0-topk0-seed0/best_60000 \
    --model-b /root/weights/wt103/gpt2-finetune/finetune-learnable-indice0-heads3-shared1-routed0-topk0-seed20/best_60000 \
    --data-path /root/datasets/wt103 --dataset wt103
#ROPE
CUDA_VISIBLE_DEVICES=0 python src/lgmodeling/att_matching.py \
    --seed 0 --tgt_len 256 --mem_len 256 --eval_tgt_len 256 \
    --model-a /root/weights/wt103/gpt2-finetune/finetune-rope-indice0-heads3-shared1-routed0-topk0-seed0/best_60000 \
    --model-b /root/weights/wt103/gpt2-finetune/finetune-rope-indice0-heads3-shared1-routed0-topk0-seed20/best_60000 \
    --data-path /root/datasets/wt103 --dataset wt103