#!/bin/bash

llama_model="your/model/path" #do not end with "/"
mistral_model="your/model/path"
device=0
chunk_size=4096
kernel_size=15
alpha=0.32
kv_warmup_budget=20000
warmup_layers=16
version=1.0

for kv_budget in 128 256 512 1024; do
    CUDA_VISIBLE_DEVICES=$device python eval/needle/main.py \
    --model $llama_model \
    --version $version \
    --mode take    \
    --pooling avg  \
    --kv_budget $kv_budget \
    --kv_prune_trigger_size 4096 \
    --chunk_size $chunk_size  \
    --kernel_size $kernel_size \
    --task_query_len 15 \
    --chunk_window_size 8 \
    --chunk_sink 4  \
    --alpha $alpha \
    --kv_warmup_budget $kv_warmup_budget \
    --warmup_layers $warmup_layers
    python eval/needle/visualize.py \
    --eval_path outputs/Llama-3.1-8B-Instruct/needle/take/${version}/_cs${chunk_size}_ks${kernel_size}_b${kv_budget}_tql15_wl${warmup_layers}_sink4_cw8_alpha${alpha}_avg
    sleep 10
done

for kv_budget in 128 256 512 1024 ; do
  CUDA_VISIBLE_DEVICES=$device python -m eval.needle.main_mistral \
  --model $mistral_model \
  --version $version \
  --mode tao \
  --pooling avg \
  --kv_budget $kv_budget \
  --kv_prune_trigger_size 4096 \
  --chunk_size $chunk_size \
  --kernel_size $kernel_size \
  --task_query_len 15 \
  --chunk_window_size 8 \
  --chunk_sink 4 \
  --alpha $alpha \
  --kv_warmup_budget $kv_warmup_budget \
  --warmup_layers $warmup_layers

  python eval/needle/visualize.py \
  --eval_path outputs/Mistral-7B-Instruct-v0.3/needle/tao/${version}/_cs${chunk_size}_ks${kernel_size}_b${kv_budget}_tql15_wl${warmup_layers}_wb${kv_warmup_budget}_alpha${alpha}_avg
  sleep 10
done
