CUDA_VISIBLE_DEVICES=0 python src/llama/att_matching.py --tgt_len 256 --mem_len 0 --eval_tgt_len 256\
    --model-a ~/weights/lmc/llama-wt103/llama-finetune/finetune-indice0,1,2,3,4,5,6,7,8,9,10,11-heads3-mlpTrue-typeNormal-seed0/best_60000\
    --model-b ~/weights/lmc/llama-wt103/llama-finetune/finetune-indice0,1,2,3,4,5,6,7,8,9,10,11-heads3-mlpTrue-typeNormal-seed20/best_60000\
    --data-path ~/datasets/wt103 --dataset wt103 --dist L2