#LEARNABLE
CUDA_VISIBLE_DEVICES=0 python src/lgmodeling/att_matching.py \
    --seed 0 --tgt_len 256 --mem_len 256 --eval_tgt_len 256 \
    --model-a /root/weights/enwik8/gpt2-finetune/finetune-learnable-indice0-heads8-shared1-routed0-topk0-seed0/best_60000 \
    --model-b /root/weights/enwik8/gpt2-finetune/finetune-learnable-indice0-heads8-shared1-routed0-topk0-seed20/best_60000 \
    --data-path /root/datasets/enwik8 --dataset enwik8
#ROPE
CUDA_VISIBLE_DEVICES=0 python src/lgmodeling/att_matching.py \
    --seed 0 --tgt_len 256 --mem_len 256 --eval_tgt_len 256 \
    --model-a /root/weights/enwik8/gpt2-finetune/finetune-rope-indice0-heads8-shared1-routed0-topk0-seed0/best_60000 \
    --model-b /root/weights/enwik8/gpt2-finetune/finetune-rope-indice0-heads8-shared1-routed0-topk0-seed20/best_60000 \
    --data-path /root/datasets/enwik8 --dataset enwik8