# search for awq scale after ntk interpolation
python -m awq.entry --model_path /home/yeq6/Research_project/llama/llama-2-7b-chat_hf \
--w_bit 4 --q_group_size 128 --run_awq  --dump_awq awq_cache/llama2-7b-w4-g128_mine_prentk.pt --ntk 8 --ntk-aware-search 

# search for awq scale after yarn interpolation
python -m awq.entry --model_path /home/yeq6/Research_project/llama/llama-2-7b-chat_hf \
--original-max-position-embeddings 2048 --w_bit 4 --q_group_size 128 --run_awq \
--dump_awq awq_cache/llama2-7b-w4-g128_mine_preyarn.pt --yarn 8 --yarn-aware-search --custom-model --no-use-cache
