#!/bin/bash
# Evaluate generator-fixer workflow on bug fixing benchmarks
# Usage: ./scripts/eval/bugs/run_generator_fixer_flow.sh [options]

set -e

# Default configuration
MODEL="${MODEL:-Qwen/Qwen2.5-Coder-7B-Instruct}"
BASE_URL="${BASE_URL:-http://localhost:30000/v1}"
N_PARALLEL="${N_PARALLEL:-64}"
FIXER_ATTEMPTS_VAL="${FIXER_ATTEMPTS_VAL:-1}"
OUTPUT_DIR="${OUTPUT_DIR:-logs/eval/bugs}"

# Default validation datasets (can be overridden via VAL_DATASETS env var)
# Format: 'alias=dataset:split' or 'dataset:split'
VAL_DATASETS="${VAL_DATASETS:-bugbench_qwen7b_sampled:test}"

echo "=============================================="
echo "Generator-Fixer Flow Evaluation"
echo "=============================================="
echo "Model: ${MODEL}"
echo "Base URL: ${BASE_URL}"
echo "Parallel tasks: ${N_PARALLEL}"
echo "Fixer attempts (val): ${FIXER_ATTEMPTS_VAL}"
echo "Validation datasets: ${VAL_DATASETS}"
echo "Output directory: ${OUTPUT_DIR}"
echo "=============================================="

python examples/bugs_refactor/run_generator_fixer_flow.py \
    --val_datasets ${VAL_DATASETS} \
    --model "${MODEL}" \
    --base_url "${BASE_URL}" \
    --n_parallel ${N_PARALLEL} \
    --eval_pregenerated_only \
    --include_failed_test_output \
    --fixer_attempts_val ${FIXER_ATTEMPTS_VAL} \
    --save_results \
    --output_dir "${OUTPUT_DIR}" \
    "$@"

echo ""
echo "✅ Evaluation complete!"
echo "Results saved to: ${OUTPUT_DIR}"
