source scripts/common_setting.sh

base_models=(
  "$llama2"
  "$llama2"
  "$llama2"
  "$llama2"
)

model_names=(
  "llama2"
  "llama2"
  "llama2"
  "llama2"
)

data_paths=(
  "yahma/alpaca-cleaned"
  "yahma/alpaca-cleaned"
  "yahma/alpaca-cleaned"
  "yahma/alpaca-cleaned"
)

data_names=(
  "alpaca"
  "alpaca"
  "alpaca"
  "alpaca"
)


num_remove_blocks=(
  4 8 12 16
)

lora_r=8
gpu_ids=(
  0 1 2 4
)

run_tuning_and_evaluation(){

  local gpu_id=$1
  local base_model=$2
  local model_name=$3
  local data_path=$4
  local data_name=$5
  local remove_blocks=$6

#  # ---------------- shortened ------------------
#  tune_ckpt_path="shortened"
#  block_order_path="utils/sensitivity/${model_name}_output/ppl/block_order.csv"
#
#  echo "tune_ckpt_path: ${tune_ckpt_path}"
#  CUDA_VISIBLE_DEVICES=$gpu_id python eval_speed.py \
#      --name ${tune_ckpt_path} \
#      --base_model $base_model \
#      --peft=tune_log/$model_name/${tune_ckpt_path}_rm${remove_blocks}_lora${lora_r}/$data_name/ \
#      --block_order_path ${block_order_path} \
#      --num_remove_blocks ${remove_blocks} \
#      --tasks alpaca,samsum \
#      --cutoff_len 128 \
#      --output_path results/$model_name/${tune_ckpt_path}_rm${remove_blocks}_lora${lora_r}/$data_name/
#
#  # ---------------- shortgpt ------------------
#  tune_ckpt_path="shortgpt"
#  block_order_path="utils/sensitivity/${model_name}_output/similarity/block_order.csv"
#
#  echo "tune_ckpt_path: ${tune_ckpt_path}"
#  CUDA_VISIBLE_DEVICES=$gpu_id python eval_speed.py \
#      --name ${tune_ckpt_path} \
#      --base_model $base_model \
#      --peft=tune_log/$model_name/${tune_ckpt_path}_rm${remove_blocks}_lora${lora_r}/$data_name/ \
#      --block_order_path ${block_order_path} \
#      --num_remove_blocks ${remove_blocks} \
#      --tasks alpaca,samsum \
#      --cutoff_len 128 \
#      --output_path results/$model_name/${tune_ckpt_path}_rm${remove_blocks}_lora${lora_r}/$data_name/
#
#  # ---------------- sleb ------------------
#  tune_ckpt_path="sleb"
#  block_order_path="baselines/SLEB/sleb_results/block_order_${model_name}.csv"
#
#  echo "tune_ckpt_path: ${tune_ckpt_path}"
#  CUDA_VISIBLE_DEVICES=$gpu_id python eval_speed.py \
#      --name ${tune_ckpt_path} \
#      --base_model $base_model \
#      --peft=tune_log/$model_name/${tune_ckpt_path}_rm${remove_blocks}_lora${lora_r}/$data_name/ \
#      --block_order_path ${block_order_path} \
#      --num_remove_blocks ${remove_blocks} \
#      --tasks alpaca,samsum \
#      --cutoff_len 128 \
#      --output_path results/$model_name/${tune_ckpt_path}_rm${remove_blocks}_lora${lora_r}/$data_name/
#
#  # ---------------- first ------------------
#  tune_ckpt_path="first"
#
#  echo "tune_ckpt_path: ${tune_ckpt_path}"
#  CUDA_VISIBLE_DEVICES=$gpu_id python eval_speed.py \
#      --name ${tune_ckpt_path} \
#      --base_model $base_model \
#      --peft=tune_log/$model_name/${tune_ckpt_path}_rm${remove_blocks}_lora${lora_r}/$data_name/ \
#      --num_remove_blocks ${remove_blocks} \
#      --tasks alpaca,samsum \
#      --cutoff_len 128 \
#      --output_path results/$model_name/${tune_ckpt_path}_rm${remove_blocks}_lora${lora_r}/$data_name/

  # ---------------- pudding ------------------
#  tune_ckpt_path="pudding"
#  open_path=tune_log/${model_name}/${tune_ckpt_path}_${remove_blocks}/analysis/llama_layer_list_6_advanced_tasks.csv
#
#  echo "tune_ckpt_path: ${tune_ckpt_path}"
#  CUDA_VISIBLE_DEVICES=$gpu_id python eval_speed.py \
#      --name ${tune_ckpt_path} \
#      --base_model $base_model \
#      --peft=tune_log/$model_name/${tune_ckpt_path}_${remove_blocks}/ \
#      --layerset_path ${open_path} \
#      --num_remove_blocks ${remove_blocks} \
#      --tasks alpaca,samsum \
#      --cutoff_len 128 \
#      --output_path results/$model_name/${tune_ckpt_path}_${remove_blocks}_lora${lora_r}/$data_name/

  # ---------------- buddy ------------------
  tune_ckpt_path="buddy"
  CUDA_VISIBLE_DEVICES=$gpu_id python eval_speed.py \
    --name ${tune_ckpt_path} \
    --base_model $base_model \
    --peft=tune_log/$model_name/${tune_ckpt_path}_${lora_r}/$data_name/ \
    --num_remove_blocks ${remove_blocks} \
    --lambda_reg 0.1 \
    --sensitivity_type taylor \
    --sensitivity_path utils/sensitivity/${model_name}_output/taylor/block_score_all.csv \
    --tasks alpaca,samsum \
    --cutoff_len 128 \
    --output_path results/$model_name/${tune_ckpt_path}_rm${remove_blocks}_lora${lora_r}/$data_name/
}

for ((j=0; j<${#data_paths[@]}; j+=4)); do
  for i in "${!gpu_ids[@]}"; do
      idx=$((j+i))
      if ((idx>=${#data_paths[@]}));then
        break
      fi

      gpu_id=${gpu_ids[$i]}
      base_model=${base_models[$idx]}
      model_name=${model_names[$idx]}
      data_path=${data_paths[$idx]}
      data_name=${data_names[$idx]}
      remove_blocks=${num_remove_blocks[$idx]}

      run_tuning_and_evaluation "$gpu_id" "$base_model" "$model_name" "$data_path" "$data_name" "$remove_blocks" &
  done
  wait  # Wait for all tuning and evaluation processes to finish
done



# ---------------- unpruned ------------------
#tune_ckpt_path="unpruned"
#model_name="llama2"
#echo "tune_ckpt_path: ${tune_ckpt_path}"
#CUDA_VISIBLE_DEVICES=0 python eval_speed.py \
#    --name ${tune_ckpt_path} \
#    --base_model $base_model \
#    --tasks alpaca,samsum \
#    --cutoff_len 128 \
#    --output_path results/$model_name/${tune_ckpt_path}/