#!/bin/bash
echo "search for optimal scale for individual channel"
python perplexity_yarn_search.py --tokenized output/govreport-test-tokenized-awq \
--dataset-min-tokens 20384 --samples 10 \
-m /home/yeq6/Research_project/llama/llama-2-7b-chat_hf \
--awq_cache llm-awq/awq_cache/llama2-7b-w4-g128_mine.pt \
--sliding-window 2048 --aggressive-memory --yarn 8 \
--original --custom-model --original-max-position-embeddings 2048 --awq \

echo "Complete search"