#!/bin/bash

# GEC Verification Runner - Option C (Mock + Real Mode)
# Supports both mock evidence generation and real LLM inference

set -euo pipefail

# Default configuration
BASE_DIR=""
QRELS=""
OUTDIR=""
QIDS_FILE=""
MODEL="mistralai/Mistral-7B-Instruct-v0.3"
PACKS="1-12,13-40,41-100"
WRRF_K=60
CUTOFF=200
ALPHA_WB=0.10
ALPHA_WD=0.70
ALPHA_WG=0.20
MOCK_GES=true
VERBOSE=false

usage() {
    cat << EOF
Usage: $0 [options]

Required:
    --base DIR          Base directory containing BEIR data (corpus.jsonl, queries.jsonl)
    --qrels FILE        Qrels file for evaluation
    --outdir DIR        Output directory for results
    --qids-file FILE    File containing query IDs to process

Optional:
    --model MODEL       HuggingFace model ID (default: ${MODEL})
    --packs RANGES      Portfolio pack ranges (default: ${PACKS})
    --wrrf-k K          WRRF fusion parameter (default: ${WRRF_K})
    --cutoff N          Document cutoff (default: ${CUTOFF})
    --alpha-wb FLOAT    BM25 weight (default: ${ALPHA_WB})
    --alpha-wd FLOAT    Dense weight (default: ${ALPHA_WD}) 
    --alpha-wg FLOAT    GES weight (default: ${ALPHA_WG})
    --real-ges          Use real LLM inference (default: mock)
    --mock-ges          Use mock evidence generation (default)
    --verbose           Enable verbose output

Examples:
    # Mock mode (verification)
    $0 --base /path/to/fiqa_beir --qrels /path/to/qrels.tsv \\
       --outdir results/fiqa_mock --qids-file qids100.txt

    # Real mode (full reproduction)  
    $0 --base /path/to/fiqa_beir --qrels /path/to/qrels.tsv \\
       --outdir results/fiqa_real --qids-file qids100.txt --real-ges
EOF
}

# Parse arguments
while [[ $# -gt 0 ]]; do
    case $1 in
        --base) BASE_DIR="$2"; shift 2 ;;
        --qrels) QRELS="$2"; shift 2 ;;
        --outdir) OUTDIR="$2"; shift 2 ;;
        --qids-file) QIDS_FILE="$2"; shift 2 ;;
        --model) MODEL="$2"; shift 2 ;;
        --packs) PACKS="$2"; shift 2 ;;
        --wrrf-k) WRRF_K="$2"; shift 2 ;;
        --cutoff) CUTOFF="$2"; shift 2 ;;
        --alpha-wb) ALPHA_WB="$2"; shift 2 ;;
        --alpha-wd) ALPHA_WD="$2"; shift 2 ;;
        --alpha-wg) ALPHA_WG="$2"; shift 2 ;;
        --real-ges) MOCK_GES=false; shift ;;
        --mock-ges) MOCK_GES=true; shift ;;
        --verbose) VERBOSE=true; shift ;;
        --help|-h) usage; exit 0 ;;
        *) echo "Unknown option: $1"; usage; exit 1 ;;
    esac
done

# Validate required arguments
if [[ -z "$BASE_DIR" || -z "$QRELS" || -z "$OUTDIR" || -z "$QIDS_FILE" ]]; then
    echo "Error: Missing required arguments"
    usage
    exit 1
fi

# Validate paths
for path in "$BASE_DIR" "$QRELS" "$QIDS_FILE"; do
    if [[ ! -e "$path" ]]; then
        echo "Error: Path does not exist: $path"
        exit 1
    fi
done

# Create output directory
mkdir -p "$OUTDIR"

log() { 
    if [[ "$VERBOSE" == "true" ]]; then
        echo "[$(date '+%H:%M:%S')] $*" 
    fi
}

echo "=== GEC Verification Pipeline ==="
echo "Mode: $([ "$MOCK_GES" == "true" ] && echo "MOCK (verification)" || echo "REAL (full reproduction)")"
echo "Base: $BASE_DIR"
echo "Output: $OUTDIR"
echo "Queries: $(wc -l < "$QIDS_FILE") queries"
echo

# Step 1: Generate baseline runs
log "Step 1: Generating baseline retrieval runs"

# BM25 baseline
python3 scripts/make_dense_run.py \\
    --base "$BASE_DIR" --qids-file "$QIDS_FILE" \\
    --method bm25 --cutoff "$CUTOFF" \\
    --output "$OUTDIR/bm25.trec"

# BGE dense baseline  
python3 scripts/make_dense_run.py \\
    --base "$BASE_DIR" --qids-file "$QIDS_FILE" \\
    --method bge --cutoff "$CUTOFF" \\
    --output "$OUTDIR/bge.trec"

# Create union pool
python3 scripts/fuse_union.py \\
    "$OUTDIR/bm25.trec" "$OUTDIR/bge.trec" \\
    --output "$OUTDIR/union.trec" --cutoff "$CUTOFF"

log "Step 1 complete: baseline runs generated"

# Step 2: Portfolio synthesis (mock or real)
log "Step 2: Portfolio synthesis and Multi-GES extraction"

if [[ "$MOCK_GES" == "true" ]]; then
    # Mock mode - use deterministic evidence generator
    python3 mock_ges.py \\
        --base "$BASE_DIR" --qids-file "$QIDS_FILE" \\
        --pool-file "$OUTDIR/union.trec" \\
        --packs "$PACKS" --output "$OUTDIR/portfolio_mock.jsonl" \\
        --seed 42
    
    # Convert mock results to TREC format
    python3 scripts/convert_portfolio_jsonl_to_trec.py \\
        "$OUTDIR/portfolio_mock.jsonl" \\
        --pool-file "$OUTDIR/union.trec" --restrict-to-pool \\
        --output "$OUTDIR/multi_ges.trec"
else
    # Real mode - run actual LLM synthesis
    python3 scripts/run_local_genrerank_portfolio.py \\
        --base "$BASE_DIR" --qids-file "$QIDS_FILE" \\
        --pool-file "$OUTDIR/union.trec" \\
        --model "$MODEL" --packs "$PACKS" \\
        --output "$OUTDIR/portfolio_real.jsonl"
    
    # Convert real results to TREC format
    python3 scripts/convert_portfolio_jsonl_to_trec.py \\
        "$OUTDIR/portfolio_real.jsonl" \\
        --pool-file "$OUTDIR/union.trec" --restrict-to-pool \\
        --output "$OUTDIR/multi_ges.trec"
fi

log "Step 2 complete: Multi-GES scores generated"

# Step 3: Fusion and calibration
log "Step 3: GEC-WRRF and gPoE calibration"

# Generate gate features for WRRF learning
python3 scripts/make_gate_features_plus.py \\
    "$OUTDIR/bm25.trec" "$OUTDIR/bge.trec" "$OUTDIR/multi_ges.trec" \\
    --qrels "$QRELS" --qids-file "$QIDS_FILE" \\
    --output "$OUTDIR/gate_features.csv"

# WRRF learned fusion
python3 scripts/rescore_as_wrrf.py \\
    "$OUTDIR/bm25.trec" "$OUTDIR/bge.trec" "$OUTDIR/multi_ges.trec" \\
    --features "$OUTDIR/gate_features.csv" --k "$WRRF_K" \\
    --alpha-wb "$ALPHA_WB" --alpha-wd "$ALPHA_WD" --alpha-wg "$ALPHA_WG" \\
    --output "$OUTDIR/gec_wrrf.trec"

# gPoE-HeadSafe guarded fusion
python3 scripts/poe_selective_mul_guarded.py \\
    --base-run "$OUTDIR/bge.trec" --ges-run "$OUTDIR/multi_ges.trec" \\
    --guards "H=4,L=1.10,J=60,C=10,TAU=0.25" \\
    --output "$OUTDIR/gpoe_headsafe.trec"

log "Step 3 complete: calibrated fusion runs generated"

# Step 4: Oracle analysis
log "Step 4: Oracle Upper Bound and reachability analysis"

# Compute Oracle Upper Bound across all fusion components
python3 scripts/oracle_upper_bound.py \\
    --runs "$OUTDIR/bm25.trec:BM25" "$OUTDIR/bge.trec:BGE" \\
           "$OUTDIR/multi_ges.trec:Multi-GES" "$OUTDIR/gec_wrrf.trec:GEC-WRRF" \\
    --qrels "$QRELS" --output "$OUTDIR/oracle.trec" \\
    --analysis "$OUTDIR/oracle_analysis.json"

# PoE Reachability Audit
python3 scripts/poe_reachability_audit.py \\
    --qrels "$QRELS" \\
    --runs "$OUTDIR/bm25.trec:BM25" "$OUTDIR/bge.trec:BGE" \\
           "$OUTDIR/multi_ges.trec:Multi-GES" \\
    --guards "H=4,L=1.10,J=60,C=10,TAU=0.25" \\
    --k 10 --cutoff "$CUTOFF" \\
    --output "$OUTDIR/pra_analysis.json"

log "Step 4 complete: Oracle and reachability analysis done"

# Step 5: Evaluation
log "Step 5: Performance evaluation"

# Evaluate all runs
for run_file in "$OUTDIR"/*.trec; do
    if [[ -f "$run_file" ]]; then
        run_name=$(basename "$run_file" .trec)
        python3 scripts/eval_trec_runs.py \\
            --qrels "$QRELS" --run "$run_file" \\
            --metrics "MRR@10,nDCG@10,Recall@50" \\
            --output "$OUTDIR/eval_${run_name}.json"
    fi
done

# Generate consolidated evaluation report
python3 scripts/consolidate_eval_results.py \\
    --eval-dir "$OUTDIR" --output "$OUTDIR/eval_summary.json"

echo
echo "=== Pipeline Complete ==="
echo "Results written to: $OUTDIR"
echo "Key outputs:"
echo "  - Baseline runs: bm25.trec, bge.trec"  
echo "  - GEC runs: gec_wrrf.trec, gpoe_headsafe.trec"
echo "  - Oracle analysis: oracle_analysis.json, pra_analysis.json"
echo "  - Evaluations: eval_summary.json"

if [[ "$MOCK_GES" == "true" ]]; then
    echo
    echo "NOTE: This run used MOCK evidence generation for verification."
    echo "For full reproduction, rerun with --real-ges flag."
fi