#!/bin/bash
  python perplexity_yarn.py --tokenized output/govreport-test-tokenized-awq \
  --dataset-min-tokens 20384 --samples 10 \
  --output-file ppl_results/int4_awq_weight_activation_searched_rescale \
  --min-tokens 256 --max-tokens 14080 -m /home/yeq6/Research_project/llama/llama-2-7b-chat_hf \
  --sliding-window 2048 --tokens-step 384 --aggressive-memory --yarn 8 \
  --awq_cache llm-awq/awq_cache/llama2-7b-w4-g128_mine.pt \
  --original --custom-model --original-max-position-embeddings 2048 --awq \
  --comment int4_awq_weight_activation_searched_rescale \
  --rescale_attention_all \
  --search_result_path /home/yeq6/Research_project/llama/best_channel_scales.txt \
  --use_search_result --apply_hardmard --hardmard_layers "q_proj,k_proj,v_proj"