#!/bin/bash

mkdir -p output_dirs/longbench_llama3.1

CUDA_VISIBLE_DEVICES=0 python3 run_longbench.py \
    --method "restkv" \
    --model_path "meta-llama/Llama-3.1-8B-Instruct" \
    --max_capacity_prompts 64 \
    --attn_implementation flash_attention_2 \
    --save_dir output_dirs/longbench_llama3.1 \
    --add_file_name "llama3.1" \
    --window_size 32 \
    --pooling "adaptive" \
    --kernel_sizes 5 \
    --use_wo \
    --use_ema \

# change the results_dir if you want to eval other models or max_capacity_prompts
python3 eval.py \
    --results_dir output_dirs/longbench_llama3.1/llama-3.1-8b-instruct_64
