#!/usr/bin/env bash
set -euo pipefail

# which GPUs to use
export CUDA_VISIBLE_DEVICES=0,1,2,3


# define your datasets, save paths and models in the same order
DATASETS=(
  "clevr_policy_v0_textual__direct_sft_w_policy__test_full_d-6"
)

SAVE_DIR=(
  "<path_to_your_output_dir>"
)

MODELS=(
  "<path_to_your_saved_checkpoint_or_huggingface_model_id>"
)


# make sure all arrays are same length
if [ "${#DATASETS[@]}" -ne "${#SAVE_DIR[@]}" ] || [ "${#DATASETS[@]}" -ne "${#MODELS[@]}" ]; then
  echo "ERROR: DATASETS, SAVE_DIR and MODELS must have the same length" >&2
  exit 1
fi

# loop
for i in "${!DATASETS[@]}"; do
  DATASET="${DATASETS[i]}"
  SAVE_DIR="${SAVE_DIR[i]}"
  MODEL="${MODELS[i]}"
  # Determine MODEL_NAME based on if MODEL is a local path or an HF model ID
  if [[ -e "$MODEL" ]]; then
    # local path: use parent-of-parent folder name
    MODEL_NAME=$(basename "$(dirname "$(dirname "$MODEL")")")
  else
    # remote repo name: replace “/” with “_”
    MODEL_NAME=${MODEL//\//_}
  fi
  SAVE_PATH="$SAVE_DIR/$DATASET/$MODEL_NAME.jsonl"

  echo "Running inference for dataset: $DATASET → $SAVE_PATH"
  python scripts/vllm_infer.py \
    --model_name_or_path "$MODEL" \
    --template qwen2_vl \
    --cutoff_len 9216 \
    --max_new_tokens 2048 \
    --temperature 0.0 \
    --top_p 1.0 \
    --batch_size 128 \
    --save_every 10 \
    --save_name "$SAVE_PATH" \
    --dataset "$DATASET"
done