#!/bin/bash

mkdir -p output_dirs/needle_llama3.1

CUDA_VISIBLE_DEVICES=0 python3 run_needle_in_haystack.py \
    --s_len 2000 \
    --e_len 32001 \
    --model_provider LLaMA3 \
    --model_name "meta-llama/Llama-3.1-8B-Instruct" \
    --attn_implementation flash_attention_2 \
    --step 400 \
    --method restkv \
    --max_capacity_prompt 512 \
    --model_version "LLaMA3_restkv_512" \

# change the results_dir if you want to eval other models and max_capacity_prompts
python3 scripts/visualize.py \
    --folder_path "results_needle/results/LLaMA3_restkv_512/" \
    --model_name "Llama-3.1-8B-Instruct" \