#!/bin/bash

# first argument is the scale
# Usage: ./ppl_recale_individual_channel_all_attention_new.sh <scale>
scale=$1

for i in {41..127}
do
  echo "Running with --individual_channel $i --individual_channel_scale $scale on all attention block"
  python perplexity_yarn.py --tokenized output/govreport-test-tokenized-awq \
  --dataset-min-tokens 20384 --samples 10 \
  --output-file ppl_results/new_embedding_only/per_head_individual_channel_all_attention_$scale \
  --min-tokens 256 --max-tokens 7500 -m /home/yeq6/Research_project/llama/llama-2-7b-chat_hf \
  --awq_cache llm-awq/awq_cache/llama2-7b-w4-g128_mine.pt \
  --beta_point 1287 --sliding-window 2048 --tokens-step 384 --aggressive-memory --yarn 8 \
  --original --custom-model --original-max-position-embeddings 2048 --awq --rescale_per_head --individual_channel $i \
  --comment per_head_all_attention_individual_channel_$i_scale_$scale \
  --rescale_attention_all \
  --individual_channel_scale $scale
  echo "Completed run with --individual_channel $i --individual_channel_scale $scale"
done

echo "All runs completed."d