# test nf4 with no pi
python perplexity_yarn.py --tokenized output/govreport-test-tokenized \
--dataset-min-tokens 20384 --samples 10 --output-file ppl_results/govreport_nf4_no_pi.csv \
--min-tokens 256 --max-tokens 19456 -m /home/yeq6/Research_project/llama/llama-2-7b-chat_hf \
--sliding-window 2048 --tokens-step 384 --aggressive-memory --load-in-4bit

# test nf4 with yarn
python perplexity_yarn.py --tokenized output/govreport-test-tokenized \
--dataset-min-tokens 20384 --samples 10 --output-file ppl_results/govreport_nf4_yarn.csv \
--min-tokens 256 --max-tokens 19456 -m /home/yeq6/Research_project/llama/llama-2-7b-chat_hf \
--sliding-window 2048 --tokens-step 384 --aggressive-memory --load-in-4bit --yarn 8 \
--original-max-position-embeddings 2048 --original --custom-model


# search for awq scale after ntk interpolation
python -m awq.entry --model_path /home/yeq6/Research_project/llama/llama-2-7b-chat_hf \
--w_bit 4 --q_group_size 128 --run_awq  --dump_awq awq_cache/llama2-7b-w4-g128_mine_prentk.pt --ntk 8 --ntk-aware-search 

# search for awq scale after yarn interpolation
python -m awq.entry --model_path /home/yeq6/Research_project/llama/llama-2-7b-chat_hf \
--original-max-position-embeddings 2048 --w_bit 4 --q_group_size 128 --run_awq \
--dump_awq awq_cache/llama2-7b-w4-g128_mine_preyarn.pt --yarn 8 --yarn-aware-search --custom-model


# search for awq scale after yarn interpolation
python -m awq.entry --model_path models/Qwen2.5-7B\
--w_bit 4 --q_group_size 128 --run_awq \
--dump_awq awq_cache/Qwen2.5-7B-w4-g128_mine.pt
