#!/bin/bash

MODEL=""
CONFIG_FILE=""
OUTPUT_DIR="./outputs"
EXPERIMENT_NAME=""
NUM_SAMPLES=1
DRY_RUN=false
BATCH_SIZE=32

# Function to display help message
usage() {
    echo "Usage: $0 --model MODEL --config-file CONFIG_FILE --output-dir OUTPUT_DIR --num-samples NUM_SAMPLES --batch-size 32 [--dry-run]"
    exit 1
}

# Parse command line arguments
while true; do
    case "$1" in
        --model ) MODEL="$2"; shift 2 ;;
        --config-file ) CONFIG_FILE="$2"; shift 2 ;;
        --output-dir ) OUTPUT_DIR="$2"; shift 2 ;;
        --num-samples) NUM_SAMPLES="$2"; shift 2 ;;
        --batch-size) BATCH_SIZE="$2"; shift 2 ;;
        --dry-run ) DRY_RUN=true; shift ;;
        -- ) shift; break ;;
        * ) if [ -n "$1" ]; then usage; else break; fi ;;
    esac
done

# Check if all required arguments are provided
if [ -z "$MODEL" ] || [ -z "$CONFIG_FILE" ] || [ -z "$OUTPUT_DIR" ]; then
    usage
fi

CONFIG_NAME=$(basename "$CONFIG_FILE" ".yaml")
TIMESTAMP=$(date "+%Y%m%d-%H%M%S")
EXPERIMENT_NAME="${MODEL//\//_}_${CONFIG_NAME}_sample${NUM_SAMPLES}_${TIMESTAMP}"

echo "Running experiment: $EXPERIMENT_NAME"
if [ "$DRY_RUN" = false ]; then
  pdm run llm_inference/generate.py \
    --model $MODEL \
    --config-file $CONFIG_FILE \
    --output-dir $OUTPUT_DIR \
    --experiment-name $EXPERIMENT_NAME \
    --num-samples $NUM_SAMPLES \
    --batch-size $BATCH_SIZE
fi

GENERATE_OUTPUT_DIR=$OUTPUT_DIR/$EXPERIMENT_NAME
echo "Generated data in: $GENERATE_OUTPUT_DIR"
if [ "$DRY_RUN" = false ]; then
  pdm run llm_inference/score.py \
    --model $MODEL \
    --inputs $GENERATE_OUTPUT_DIR/infer.parquet  \
    --outputs $GENERATE_OUTPUT_DIR/scores.parquet  \
    --batch-size 2 \
    --dtype fp16 \
    --hidden-states \
    --layers -1 
fi