#!/bin/bash

dataset_list="narrativeqa qasper multifieldqa_en hotpotqa 2wikimqa musique gov_report qmsum multi_news trec triviaqa samsum lcc repobench-p"
llama_model="your/model/path"
mistral_model="your/model/path"
device=0
version="1.0"

chunk_size=4096
kv_budget=512
kv_warmup_budget=8192
pooling="avg"
alpha=0.2

for kv_budget in 128 256 512 1024;do
  for dataset in $dataset_list;do
      CUDA_VISIBLE_DEVICES=$device python -m eval.longbench.main \
          --model $llama_model \
          --mode take \
          --dataset $dataset \
          --longbench_type longbench \
          --version $version \
          --chunk_size $chunk_size \
          --kv_budget $kv_budget \
          --warmup_layers 16 \
          --kv_warmup_budget $kv_warmup_budget \
          --pooling $pooling \
          --alpha $alpha
  done

  CUDA_VISIBLE_DEVICES=$device python -m eval.longbench.evaluate \
      --eval_path "outputs/Llama-3.1-8B-Instruct/longbench/tao/${version}/_cs${chunk_size}_b${kv_budget}_wl16_wlb${kv_warmup_budget}_${pooling}"


  for dataset in $dataset_list;do
      CUDA_VISIBLE_DEVICES=$device python -m eval.longbench.main_mistral \
          --model $mistral_model \
          --mode take \
          --dataset $dataset \
          --longbench_type longbench \
          --version $version \
          --chunk_size $chunk_size \
          --kv_budget $kv_budget \
          --warmup_layers 16 \
          --kv_warmup_budget $kv_warmup_budget \
          --pooling $pooling \
          --alpha $alpha
  done
  CUDA_VISIBLE_DEVICES=$device python -m eval.longbench.evaluate \
      --eval_path "outputs/Mistral-7B-Instruct-v0.3/longbench/take/${version}/_cs${chunk_size}_b${kv_budget}_wl16_wlb${kv_warmup_budget}_${pooling}"
done