#!/bin/bash
MODEL=""
INPUTS="./outputs"
MODEL_PARALLEL=2
BATCH_SIZE=2

# Function to display help message
usage() {
    echo "Usage: $0 --model MODEL --inputs INPUTS --model-parallel MODEL_PARALLEL --batch-size 32 [--dry-run]" exit 1
}

# Parse command line arguments
while true; do
    case "$1" in
        --model ) MODEL="$2"; shift 2 ;;
        --inputs ) INPUTS="$2"; shift 2 ;;
        --model-parallel) MODEL_PARALLEL="$2"; shift 2 ;;
        --batch-size) BATCH_SIZE="$2"; shift 2 ;;
        -- ) shift; break ;;
        * ) if [ -n "$1" ]; then usage; else break; fi ;;
    esac
done

# Check if all required arguments are provided
if [ -z "$MODEL" ] || [ -z "$INPUTS" ]; then
    usage
fi
OUTPUT_FILE="${INPUTS}/generation-scores.parquet"

# Check if the output file already exists
# if [ -f "$OUTPUT_FILE" ]; then
#     echo "Output file $OUTPUT_FILE already exists. Skipping inference."
#     exit 0
# fi

pdm run llm_inference/score2.py \
  --model $MODEL \
  --inputs ${INPUTS}/infer.parquet  \
  --outputs ${OUTPUT_FILE} \
  --batch-size ${BATCH_SIZE} \
  --model-parallel $MODEL_PARALLEL \
  --dedup