#!/bin/bash

  # python perplexity_yarn.py --tokenized output/govreport-test-tokenized-awq \
  # --dataset-min-tokens 20384 --samples 10 \
  # --output-file ppl_results/baseline_RTN_int4_weight_activation \
  # --min-tokens 256 --max-tokens 16384 -m /home/yeq6/Research_project/llama/llama-2-7b-chat_hf \
  # --sliding-window 2048 --tokens-step 384 --aggressive-memory --yarn 8 \
  # --original --custom-model --original-max-position-embeddings 2048 --awq \
  # --comment baseline_RTN_int4_weight_activation_new --naive_quant --quant_activation

  # python perplexity_yarn.py --tokenized output/govreport-test-tokenized-awq \
  # --dataset-min-tokens 20384 --samples 10 \
  # --output-file ppl_results/baseline_RTN_int4_weight_activation_no_pi \
  # --min-tokens 256 --max-tokens 6400 -m /home/yeq6/Research_project/llama/llama-2-7b-chat_hf \
  # --sliding-window 2048 --tokens-step 384 --aggressive-memory \
  # --original --awq \
  # --comment baseline_RTN_int4_weight_activation_no_pi_new --naive_quant --no_pi --quant_activation

  python perplexity_yarn.py --tokenized output/govreport-test-tokenized-awq \
  --dataset-min-tokens 20384 --samples 10 \
  --output-file ppl_results/int4_weight_activation_hardmard_qkv \
  --min-tokens 256 --max-tokens 13696 -m /home/yeq6/Research_project/llama/llama-2-7b-chat_hf \
  --sliding-window 2048 --tokens-step 384 --aggressive-memory --yarn 8 \
  --original --custom-model --original-max-position-embeddings 2048 --awq \
  --comment int4_weight_activation_hardmard_qkv_new --apply_hardmard --hardmard_layers "q_proj,k_proj,v_proj" --quant_activation

  # python perplexity_yarn.py --tokenized output/govreport-test-tokenized-awq \
  # --dataset-min-tokens 20384 --samples 10 \
  # --output-file ppl_results/int4_weight_activation_hardmard_o \
  # --min-tokens 256 --max-tokens 14080 -m /home/yeq6/Research_project/llama/llama-2-7b-chat_hf \
  # --sliding-window 2048 --tokens-step 384 --aggressive-memory --yarn 8 \
  # --original --custom-model --original-max-position-embeddings 2048 --awq \
  # --comment int4_weight_activation_hardmard_o_new --apply_hardmard --hardmard_layers "o_proj" --quant_activation

   python perplexity_yarn.py --tokenized output/govreport-test-tokenized-awq \
  --dataset-min-tokens 20384 --samples 10 \
  --output-file ppl_results/int4_weight_activation_hardmard_up \
  --min-tokens 256 --max-tokens 14080 -m /home/yeq6/Research_project/llama/llama-2-7b-chat_hf \
  --sliding-window 2048 --tokens-step 384 --aggressive-memory --yarn 8 \
  --original --custom-model --original-max-position-embeddings 2048 --awq \
  --comment int4_weight_activation_hardmard_up_new --apply_hardmard --hardmard_layers "up_proj" --quant_activation

  python perplexity_yarn.py --tokenized output/govreport-test-tokenized-awq \
  --dataset-min-tokens 20384 --samples 10 \
  --output-file ppl_results/int4_weight_activation_hardmard_gate \
  --min-tokens 256 --max-tokens 14080 -m /home/yeq6/Research_project/llama/llama-2-7b-chat_hf \
  --sliding-window 2048 --tokens-step 384 --aggressive-memory --yarn 8 \
  --original --custom-model --original-max-position-embeddings 2048 --awq \
  --comment int4_weight_activation_hardmard_gate_new --apply_hardmard --hardmard_layers "gate_proj" --quant_activation