#!/usr/bin/env bash
# Launcher for domain/difficulty classification

set -euo pipefail

# Input parquet (defaults to provided dataset path)
PARQUET=${PARQUET:-./data/dataset.parquet}

# Concurrency and model
WORKERS=${WORKERS:-40}
MODEL=${MODEL:-gpt-5}     # will be normalized to openai/<model> if vendor not supplied
SEED=${SEED:-123}
#TEST=${TEST:-5}
TEST=                # set to empty to run all

# Optional OpenRouter flags
ALLOW_FALLBACKS=${ALLOW_FALLBACKS:-1}
PROVIDERS=(${PROVIDERS:-})     # e.g., PROVIDERS=(OpenAI Novita)

# Ensure environment (adjust if needed)
if [ -f ./venv/bin/activate ]; then
  source ./venv/bin/activate
fi

# Require API key to be present in environment
export OPENROUTER_API_KEY="YOUR_API_KEY_HERE"

# Preflight diagnostics and path normalization for different Lustre layouts
echo "Host: $(hostname)"
echo "Requested parquet: $PARQUET"
if [ ! -r "$PARQUET" ]; then
  echo "Primary parquet path not readable; trying alternate mounts..."
  ALT1=${PARQUET/\.\/data/\.\/backup_data}
  ALT2=${PARQUET/\.\/backup_data/\.\/data}
  for ALT in "$ALT1" "$ALT2"; do
    if [ "$ALT" != "$PARQUET" ] && [ -r "$ALT" ]; then
      echo "Using alternate parquet path: $ALT"
      PARQUET="$ALT"
      break
    fi
  done
fi
if [ ! -r "$PARQUET" ]; then
  echo "ERROR: Parquet not readable from this node: $PARQUET" >&2
  echo "Tried alternates: $ALT1 | $ALT2" >&2
  exit 1
fi

# Normalize model slug for OpenRouter: prefix with openai/ if no vendor provided
MODEL_ARG="$MODEL"
case "$MODEL_ARG" in
  */*) ;;
  *) MODEL_ARG="openai/$MODEL_ARG" ;;
esac

# Compose response file name based on parameters (sanitize slashes)
model_safe="${MODEL_ARG////_}"
if [ -n "${TEST}" ]; then
  responses_file="./outputs/classify_${model_safe}_test${TEST}_seed_${SEED}.jsonl"
else
  responses_file="./outputs/classify_${model_safe}_all_seed_${SEED}.jsonl"
fi
mkdir -p "$(dirname "$responses_file")"

EXTRA_ARGS=()
if [ "${ALLOW_FALLBACKS}" -eq 1 ]; then
  EXTRA_ARGS+=(--allow_fallbacks)
fi
if [ ${#PROVIDERS[@]} -gt 0 ]; then
  for p in "${PROVIDERS[@]}"; do
    EXTRA_ARGS+=(--provider "$p")
  done
fi
if [ -n "${TEST}" ]; then
  EXTRA_ARGS+=(--test "$TEST")
fi

python3 ./classify_domain_difficulty.py \
    --parquet "$PARQUET" \
    --workers "$WORKERS" \
    --model   "$MODEL_ARG" \
    --jsonl   "$responses_file" \
    --seed    "$SEED" \
    --template "./prompts/prompt_domain_classification.txt" \
    "${EXTRA_ARGS[@]}"


