#!/bin/bash

# rescale entire model with 2048 og context size
# python perplexity_yarn.py --tokenized output/govreport-test-tokenized-awq \
# --dataset-min-tokens 20384 --samples 10 \
# --output-file ppl_results/govreport_awq_int4_yarn_temp_5.csv \
# --min-tokens 256 --max-tokens 19456 -m /home/yeq6/Research_project/llama/llama-2-7b-chat_hf \
# --quant_path /home/yeq6/Research_project/llama/llm-awq/quant_cache/llama-2-7b-chat-w4-g128_mine_temp_5-v2.pt \
# --sliding-window 2048 --tokens-step 384 --aggressive-memory --yarn 8 --original --custom-model \
# --original-max-position-embeddings 2048 --awq

# only rescale embedding with 2048 og context size
python perplexity_yarn.py --tokenized output/govreport-test-tokenized-awq \
--dataset-min-tokens 20384 --samples 10 \
--output-file ppl_results/govreport_awq_int4_yarn_embedding_only_temp_5.csv \
--min-tokens 256 --max-tokens 19456 -m /home/yeq6/Research_project/llama/llama-2-7b-chat_hf \
--quant_path /home/yeq6/Research_project/llama/llm-awq/quant_cache/llama-2-7b-chat-w4-g128_mine_embedding_only_temp_5-v2.pt \
--sliding-window 2048 --tokens-step 384 --aggressive-memory --yarn 8 --original --custom-model \
--original-max-position-embeddings 2048 --awq

# rescale entire model with 4096 og context size
python perplexity_yarn.py --tokenized output/govreport-test-tokenized-awq \
--dataset-min-tokens 20384 --samples 10 \
--output-file ppl_results/govreport_awq_int4_yarn_temp_5_4096.csv \
--min-tokens 256 --max-tokens 19456 -m /home/yeq6/Research_project/llama/llama-2-7b-chat_hf \
--quant_path /home/yeq6/Research_project/llama/llm-awq/quant_cache/llama-2-7b-chat-w4-g128_mine_temp_5-v2.pt \
--sliding-window 2048 --tokens-step 384 --aggressive-memory --yarn 8 --original --custom-model \
--original-max-position-embeddings 4096 --awq

# only rescale embedding with 4096 og context size
python perplexity_yarn.py --tokenized output/govreport-test-tokenized-awq \
--dataset-min-tokens 20384 --samples 10 \
--output-file ppl_results/govreport_awq_int4_yarn_embedding_only_temp_5_4096.csv \
--min-tokens 256 --max-tokens 19456 -m /home/yeq6/Research_project/llama/llama-2-7b-chat_hf \
--quant_path /home/yeq6/Research_project/llama/llm-awq/quant_cache/llama-2-7b-chat-w4-g128_mine_embedding_only_temp_5-v2.pt \
--sliding-window 2048 --tokens-step 384 --aggressive-memory --yarn 8 --original --custom-model \
--original-max-position-embeddings 4096 --awq



