#!/bin/bash

# Clear or create the log files
> scripts/infer_para_k_stdout.log
> scripts/infer_para_k_stderr.log

# Define k values
# k_values=(5 10 20 40 80 160 500)
k_values=(64 128 256 512 1024 1536 2048 4096)

# Array of available GPU IDs
gpu_ids=(0 1 2 3 4 5 6 7)

# Check if we have enough GPUs
if [ ${#gpu_ids[@]} -lt ${#k_values[@]} ]; then
    echo "Error: Not enough GPUs specified. Found ${#gpu_ids[@]}, need ${#k_values[@]}."
    exit 1
fi

# Function to run inference on a specific GPU
run_inference() {
    local k=$1
    local gpu=$2
    CUDA_VISIBLE_DEVICES=$gpu python perplex_llama.py --k $k \
        >>  scripts/infer_para_k_stdout.log 2>> scripts/infer_para_k_stderr.log
    echo "k = $k on GPU $gpu completed"
}

# Run inference for each k value on a separate GPU
for i in "${!k_values[@]}"; do
    k=${k_values[$i]}
    gpu=${gpu_ids[$i]}
    run_inference $k $gpu &
done

# Wait for all background processes to finish
wait

echo "All inference tasks completed"