#!/bin/bash

# Configuration variables
BASE_DIR="/home/yeq6/Research_project/llama"
LOG_DIR="${BASE_DIR}/mass-run"
# MODEL_PATH="${BASE_DIR}/llama-2-7b-chat_hf"
# AWQ_CACHE="${BASE_DIR}/llm-awq/awq_cache/llama2-7b-w4-g128_mine.pt"

MODEL_PATH="meta-llama/Llama-2-7b-hf"  # Use base model for all tests
AWQ_CACHE="${BASE_DIR}/llm-awq/awq_cache/llama2-7b-w4-g128_mine.pt"



# Evaluation tasks

TASKS="wikitext boolq piqa hellaswag winogrande arc_easy arc_challenge openbookqa"

# TASKS="wikitext boolq"

# Create log directory if it doesn't exist
mkdir -p "$LOG_DIR"

# Set environment variables
export CUDA_LAUNCH_BLOCKING=1
export TOKENIZERS_PARALLELISM=false
export TORCH_USE_CUDA_DSA=1
# Run benchmark
python Benchmark_all.py \
    -m "$MODEL_PATH" \
    --aggressive-memory \
    --yarn 8 \
    --original \
    --custom-model \
    --original-max-position-embeddings 2048 \
    --awq \
    --log_file "${LOG_DIR}/$(basename $MODEL_PATH)" \
    --naive_quant \
    --tasks "$TASKS" \
    --no_pi

# rtn
# also test --no_pi --quant_activation, --apply_hardmard, --hardmard_layers "q_proj,k_proj,v_proj" "o_project" "up_proj" "gate_proj" each of them individually

# awq
# add --awq_cache "$AWQ_CACHE" 
# delete --naive_quant
# also test --no_pi --quant_activation, each of them  individually

# awq + custom rotation
# add --awq_cache "$AWQ_CACHE" --rescale_attention_all --rescale_per_head 
# --use_search_result "$SEARCH_RESULT" 
# --search_result_path "$SEARCH_RESULT_PATH" 
# delete --naive_quant
# also test --no_pi --quant_activation, each of them  individually
