source scripts/common_setting.sh

tune_ckpt_path="buddy"

base_models=(
  "$llama2"
#  "$qwen7b"
)

model_names=(
  "llama2"
#  "qwen7b"
)

data_paths=(
  "yahma/alpaca-cleaned"
)

data_names=(
  "alpaca"
)

lora_rs=(8 8 8 8 8 8 8 8)
gpu_ids=(0 1 2 3 4 5 6 7)

run_tuning_and_evaluation(){

  local gpu_id=$1
  local base_model=$2
  local model_name=$3
  local data_path=$4
  local data_name=$5
  local lora_r=$6

#  echo "base_model: ${base_model}"
#  echo "Lora Config: lora_r=($lora_r),"
#  current_time=$(date "+%Y-%m-%d %H:%M:%S")
#  echo "Start tuning on gpu: $gpu_id, $current_time"
#
##  CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7  accelerate launch --config_file accelerate.yaml finetune.py \
#  CUDA_VISIBLE_DEVICES=$gpu_id python finetune.py \
#     --base_model $base_model \
#     --data_name $data_name \
#     --data_path $data_path \
#     --output_dir tune_log/$model_name/${tune_ckpt_path}_${lora_r}/$data_name/ \
#     --lora_r $lora_r \
#     --cutoff_len 512 \
#     --lambda_reg 0.1 \
#     --sensitivity_type taylor \
#     --sensitivity_path utils/sensitivity/${model_name}_output/taylor/block_score_all.csv \
#     --num_epochs 5 \
#     --learning_rate 1e-4 \
#     --gradient_accumulation_steps 16 \
#     --batch_size 8

 # ---------------- eval acc ------------------
#   num_remove_blocks=(
#    4 8 12 16
#   )
#   eval_gpus=(
#    0 1 2 3
#   )
#
#  echo "base_model: ${base_model}"
#  current_time=$(date "+%Y-%m-%d %H:%M:%S")
#  echo "Start evaluation on gpu: $gpu_id, $current_time"
#
#  for ((k=0; k<${#num_remove_blocks[@]}; k+=1)); do
#    remove_blocks=${num_remove_blocks[$k]}
#    eval_gpu=${eval_gpus[$k]}
#    CUDA_VISIBLE_DEVICES=$eval_gpu python zero_shot_eval.py --model hf \
#        --pretrained ${base_model} \
#        --peft tune_log/$model_name/${tune_ckpt_path}_${lora_r}/$data_name/ \
#        --num_remove_blocks ${remove_blocks} \
#        --tasks openbookqa,piqa,boolq,social_iqa,hellaswag,arc_easy,winogrande,arc_challenge \
#        --device cuda:0 \
#        --batch_size 4 \
#        --output_path results/$model_name/${tune_ckpt_path}_rm${remove_blocks}_lora${lora_r}/$data_name/ &
#  done
#  wait
#  current_time=$(date "+%Y-%m-%d %H:%M:%S")
#  echo "End evaluation on gpu: $gpu_id, $current_time"


# ---------------- eval ppl ------------------
   num_remove_blocks=(
    4 8 12 16
   )
   eval_gpus=(
    0 1 2 4
   )

  echo "base_model: ${base_model}"
  current_time=$(date "+%Y-%m-%d %H:%M:%S")
  echo "Start evaluation on gpu: $gpu_id, $current_time"
  for ((k=0; k<${#num_remove_blocks[@]}; k+=1)); do
    remove_blocks=${num_remove_blocks[$k]}
    eval_gpu=${eval_gpus[$k]}
    CUDA_VISIBLE_DEVICES=$eval_gpu python eval_metric.py \
      --name ${tune_ckpt_path} \
      --base_model $base_model \
      --peft=tune_log/$model_name/${tune_ckpt_path}_${lora_r}/$data_name/ \
      --num_remove_blocks ${remove_blocks} \
      --tasks wikitext2,ptb \
      --lambda_reg 0.1 \
      --sensitivity_type taylor \
      --sensitivity_path utils/sensitivity/${model_name}_output/taylor/block_score_all.csv \
      --batch_size 4 \
      --cutoff_len 256 \
      --output_path results/$model_name/${tune_ckpt_path}_rm${remove_blocks}_lora${lora_r}/$data_name/ &
  done
  wait

  current_time=$(date "+%Y-%m-%d %H:%M:%S")
  echo "End evaluation on gpu: $gpu_id, $current_time"
}

for ((j=0; j<${#data_paths[@]}; j+=4)); do
  for i in "${!gpu_ids[@]}"; do
      idx=$((j+i))
      if ((idx>=${#data_paths[@]}));then
        break
      fi

      gpu_id=${gpu_ids[$i]}
      base_model=${base_models[$idx]}
      model_name=${model_names[$idx]}
      data_path=${data_paths[$idx]}
      data_name=${data_names[$idx]}
      lora_r=${lora_rs[$idx]}

      run_tuning_and_evaluation "$gpu_id" "$base_model" "$model_name" "$data_path" "$data_name" "$lora_r" &
  done
  wait  # Wait for all tuning and evaluation processes to finish
done