python sparsecache/dictionary_learning/train.py \
    --model_name_or_path "Qwen2.5-7B-Instruct_1M_wiki3M" \
    --dictionary_size 8192 --sparsity 64  --concat 1 \
    --num_epochs 300 --batch_size 2048 --lr 0.0005 #  --use_norm 
