#!/bin/bash

python perplexity_yarn.py --tokenized output/govreport-test-tokenized-awq \
--dataset-min-tokens 20384 --samples 10 --output-file ppl_results/govreport_awq_int4.csv \
--min-tokens 256 --max-tokens 19456 -m /home/yeq6/Research_project/llama/llama-2-7b-chat_hf \
--quant_path /home/yeq6/Research_project/llama/llm-awq/quant_cache/llama-2-7b-chat-w4-g128-awq-v2.pt \
--sliding-window 2048 --tokens-step 384 --aggressive-memory --awq --ntk 8

# non interpolated
python perplexity_yarn.py --tokenized output/govreport-test-tokenized-awq \
--dataset-min-tokens 20384 --samples 10 --output-file ppl_results/govreport_awq_int4_no_pi.csv \
--min-tokens 256 --max-tokens 19456 -m /home/yeq6/Research_project/llama/llama-2-7b-chat_hf \
--quant_path /home/yeq6/Research_project/llama/llm-awq/quant_cache/llama-2-7b-chat-w4-g128-awq-v2.pt \
--sliding-window 2048 --tokens-step 384 --aggressive-memory --awq

# ntk interpolation with post interpolation awq
python perplexity_yarn.py --tokenized output/govreport-test-tokenized-awq \
--dataset-min-tokens 20384 --samples 10 --output-file ppl_results/govreport_awq_int4_prentk.csv \
--min-tokens 256 --max-tokens 19456 -m /home/yeq6/Research_project/llama/llama-2-7b-chat_hf \
--quant_path /home/yeq6/Research_project/llama/llm-awq/quant_cache/llama-2-7b-chat-w4-g128-awq_prentk-v2.pt \
--sliding-window 2048 --tokens-step 384 --aggressive-memory --awq --ntk 8