#!/bin/bash

# Complete inference and evaluation pipeline script (local version)
# This script is the bash equivalent of run_inference_evaluation_pipeline.py for convenient local execution

set -euo pipefail  # exit on error, error on unset variables, pipefail propagates errors in pipelines

# Resolve script path and switch to repo root to ensure stable relative paths
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
cd "$REPO_ROOT"

print_usage() {
    cat <<'USAGE'
Usage:
    bash scripts/run_inference_evaluation_pipeline.sh [OPTIONS]

Options (all override defaults):
    --model_name PATH                       default: ../ckpts/grpo/600
    --dataset_name NAME                     default: aime25
    --dataset_path PATH                     default: ../datasets/ours/math/test
    --n_generations N                       default: 16
    --num_samples N                         default: 10000
    --judge_model NAME                      default: gpt-41-mini-0414-global
    --reasoning_eval_model NAME             default: gemini-2.5-pro-06-17
    --temperature FLOAT                     default: 0.6
    --top_p FLOAT                           default: 0.95
    --top_k INT                             default: 20
    --max_tokens INT                        default: 8192
    --tensor_parallel_size INT              default: 1
    --gpu_memory_utilization FLOAT          default: 0.95
    --use_system_prompt                     default: false (enable when flag present)
    -h | --help                             show this help

Example:
    bash scripts/run_inference_evaluation_pipeline.sh \
        --model_name ../ckpts/grpo/600 \
        --dataset_name aime25 \
        --dataset_path ../datasets/ours/math/test \
        --n_generations 16 \
        --num_samples 200 \
        --temperature 0.7 \
        --top_p 0.9 \
        --top_k 40 \
        --max_tokens 4096 \
        --use_system_prompt
USAGE
}

# Configure default parameters (matching the original script)
MODEL_NAME=""
DATASET_NAME=""
DATASET_PATH=""
N_GENERATIONS=16
NUM_SAMPLES=10000
JUDGE_MODEL="gpt-41-mini-0414-global"
TEMPERATURE=0.6
TOP_P=0.95
TOP_K=20
MAX_TOKENS=8192
TENSOR_PARALLEL_SIZE=1
GPU_MEMORY_UTILIZATION=0.95
USE_SYSTEM_PROMPT=false

# Parse command-line arguments
while [[ $# -gt 0 ]]; do
    case "$1" in
        -h|--help) print_usage; exit 0 ;;
        --model_name) MODEL_NAME="$2"; shift 2 ;;
        --model_name=*) MODEL_NAME="${1#*=}"; shift ;;
        --dataset_name) DATASET_NAME="$2"; shift 2 ;;
        --dataset_name=*) DATASET_NAME="${1#*=}"; shift ;;
        --dataset_path) DATASET_PATH="$2"; shift 2 ;;
        --dataset_path=*) DATASET_PATH="${1#*=}"; shift ;;
        --n_generations|-n) N_GENERATIONS="$2"; shift 2 ;;
        --n_generations=*|-n=*) N_GENERATIONS="${1#*=}"; shift ;;
        --num_samples) NUM_SAMPLES="$2"; shift 2 ;;
        --num_samples=*) NUM_SAMPLES="${1#*=}"; shift ;;
        --judge_model) JUDGE_MODEL="$2"; shift 2 ;;
        --judge_model=*) JUDGE_MODEL="${1#*=}"; shift ;;
        --temperature) TEMPERATURE="$2"; shift 2 ;;
        --temperature=*) TEMPERATURE="${1#*=}"; shift ;;
        --top_p) TOP_P="$2"; shift 2 ;;
        --top_p=*) TOP_P="${1#*=}"; shift ;;
        --top_k) TOP_K="$2"; shift 2 ;;
        --top_k=*) TOP_K="${1#*=}"; shift ;;
        --max_tokens) MAX_TOKENS="$2"; shift 2 ;;
        --max_tokens=*) MAX_TOKENS="${1#*=}"; shift ;;
        --tensor_parallel_size) TENSOR_PARALLEL_SIZE="$2"; shift 2 ;;
        --tensor_parallel_size=*) TENSOR_PARALLEL_SIZE="${1#*=}"; shift ;;
        --gpu_memory_utilization) GPU_MEMORY_UTILIZATION="$2"; shift 2 ;;
        --gpu_memory_utilization=*) GPU_MEMORY_UTILIZATION="${1#*=}"; shift ;;
        --use_system_prompt) USE_SYSTEM_PROMPT=true; shift ;;
        *)
            echo "Unknown argument: $1"; echo ""; print_usage; exit 1 ;;
    esac
done

# Create output directory
MODEL_NAME_CLEAN="$(basename "$MODEL_NAME")"
PARAMS_STRING="temp${TEMPERATURE}_topp${TOP_P}_topk${TOP_K}_maxtok${MAX_TOKENS}_n${N_GENERATIONS}_system${USE_SYSTEM_PROMPT}"
BASE_OUTPUT_DIR="inference_results"
OUTPUT_DIR="${REPO_ROOT}/${BASE_OUTPUT_DIR}/${MODEL_NAME_CLEAN}/${DATASET_NAME}/${PARAMS_STRING}"
mkdir -p "$OUTPUT_DIR"

echo "=============================="
echo "Starting inference and evaluation pipeline"
echo "=============================="
echo "Model: $MODEL_NAME"
echo "Dataset: $DATASET_NAME"
echo "Output directory: $OUTPUT_DIR"
echo "Judge model: $JUDGE_MODEL"
echo "Use system prompt: $USE_SYSTEM_PROMPT"
echo ""

# Step 1: Initial inference
echo "=============================="
echo "Step 1: Running initial inference"
echo "=============================="

# Assemble inference arguments; add bare --use_system_prompt only when enabled
INFER_ARGS=(
    --model_name "$MODEL_NAME"
    --dataset_name "$DATASET_NAME"
    --dataset_path "$DATASET_PATH"
    --tensor_parallel_size "$TENSOR_PARALLEL_SIZE"
    --gpu_memory_utilization "$GPU_MEMORY_UTILIZATION"
    --temperature "$TEMPERATURE"
    --top_p "$TOP_P"
    --top_k "$TOP_K"
    --n_generations "$N_GENERATIONS"
    --num_samples "$NUM_SAMPLES"
    --max_tokens "$MAX_TOKENS"
    --output_file "$OUTPUT_DIR/initial_inference_results.json"
)
if [[ "$USE_SYSTEM_PROMPT" == true ]]; then
    INFER_ARGS+=(--use_system_prompt)
fi

# Print and execute full command
FULL_CMD=(python utils/inference.py "${INFER_ARGS[@]}")
printf -v EXEC_CMD '%q ' "${FULL_CMD[@]}"
echo "Executing: ${EXEC_CMD}"
"${FULL_CMD[@]}"

echo "Initial inference complete"
echo ""

# Step 2: LLM-as-Judge evaluation
echo "=============================="
echo "Step 2: Evaluating with LLM-as-Judge"
echo "=============================="

# Save current directory, then switch to output directory for evaluation
CURRENT_DIR="$(pwd)"
cd "$OUTPUT_DIR"
python "$CURRENT_DIR/utils/evaluation.py" llm-judge \
    --input "initial_inference_results.json" \
    --model "$JUDGE_MODEL"

# Find the generated evaluation file
EVALUATION_FILE="$(ls -t evaluation_results_*.json | head -n1)"
echo "LLM-as-Judge evaluation complete, results file: $EVALUATION_FILE"
cd "$CURRENT_DIR"
echo ""
