#!/bin/bash

# Test dataset for retrieval evaluation
EMBEDDING_MODEL="text-embedding-3-large"
QUERY_PATH="data/math_precalculus_5_test.jsonl"

# Configuration
INPUT_DIR="data/math_precalculus_5_test_x-ai/grok-4.1-fast"
MODE=${1:-"problem"} # problem, condition, hybrid
TOP_K=${2:-3}
THRESHOLD=${3:-0.4}

# Log Configuration
SAFE_DATASET=$(echo "${QUERY_PATH}" | sed 's/\//_/g')
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
LOG_FILE="logs/debug_retriever_${MODE}_${TIMESTAMP}.log"
PROBLEM_EMBEDDING_FILE="data/math_precalculus_5_test_text-embedding-3-large_idx.npz"
EXP_EMBEDDING_FILE="${INPUT_DIR}/experience_idx.npz"

export PYTHONPATH=$PYTHONPATH:$(pwd)

echo ">>> Running Retrieval Evaluation in mode: ${MODE}"

python3 src/agents/retriever.py \
    --input_dir "$INPUT_DIR" \
    --embedding_model "$EMBEDDING_MODEL" \
    --query_path "$QUERY_PATH" \
    --mode "$MODE" \
    --k "$TOP_K" \
    --similarity_threshold "$THRESHOLD" \
    --problem_embedding_path "$PROBLEM_EMBEDDING_FILE" \
    --experience_embedding_path "$EXP_EMBEDDING_FILE" \

echo "=================================================="
echo ">>> All tasks complete."
