#!/bin/bash
export PYTHONPATH='.'

base_model="models--baffo32--decapoda-research-llama-7B-hf/snapshots/aa18b48a1330572a6dd5f5d5619ed19838ca285c"

lora_models=(
  "standard_lora/8"
  "vera_r64"
  "vb_lora_r4"
  "share_slice/2_4_8"
  "share_gate/2_4_8"
  "share_kron/2_4_8"
)

gpu_ids=(0 1 2 3)

run_infer(){

  local lora_model=$1
  local gpu_id=$2

  echo "Start inference model={$lora_model} on gpu={$gpu_id}"
  CUDA_VISIBLE_DEVICES=$gpu_id python rebuttal/lora_inter.py \
    --base_model $base_model\
    --lora_model rebuttal/tune_log/qkvupdown/$lora_model \
    --dataset_name yahma/alpaca-cleaned \
    --batch_size 2
}

for ((j=0; j<${#lora_models[@]}; j+="${#gpu_ids[@]}")); do
  for i in "${!gpu_ids[@]}"; do

      idx=$((j+i))
      if ((idx>=${#lora_models[@]}));then
        break
      fi

      lora_model=${lora_models[$idx]}
      gpu_id=${gpu_ids[$i]}

      run_infer "$lora_model" "$gpu_id"&
  done
  wait  # Wait for all tuning and evaluation processes to finish
done
