#!/bin/bash
# test_enhanced_token_pipeline.sh
# Test the complete enhanced token analysis pipeline

echo "🧪 Testing Enhanced Token Analysis Pipeline"
echo "============================================"

# Step 1: Check prerequisites
echo "📋 Step 1: Checking prerequisites..."

if [ ! -f "scripts/evaluate_responses.py" ]; then
    echo "❌ scripts/evaluate_responses.py not found"
    exit 1
fi

if [ ! -f "scripts/reformat_cluster_results.py" ]; then
    echo "❌ scripts/reformat_cluster_results.py not found" 
    exit 1
fi

# Check for cluster results
CLUSTER_FILES=(llm-inference/results/qwen-3-32B_results_20250617_093913.json)
if [ ! -f "${CLUSTER_FILES[0]}" ]; then
    echo "❌ No cluster results files found in llm-inference/results/"
    echo "   Please ensure you have cluster results files there first."
    exit 1
fi

echo "✅ Prerequisites check passed"
echo
#
## Step 2: Apply the enhancement patch
#echo "🔧 Step 2: Applying enhancement patch to evaluate_responses.py..."
#
#if python patch_evaluate_responses.py; then
#    echo "✅ Successfully patched evaluate_responses.py"
#else
#    echo "❌ Failed to patch evaluate_responses.py"
#    exit 1
#fi
echo

# Step 3: Test enhanced reformat script with token analysis
echo "🧠 Step 3: Testing enhanced reformat with token analysis..."

TEST_FILE="${CLUSTER_FILES[0]}"
echo "Using test file: $(basename "$TEST_FILE")"

if python -m scripts.reformat_cluster_results "$TEST_FILE" --verbose; then
    echo "✅ Enhanced reformat script works"
else
    echo "❌ Enhanced reformat script failed"
    exit 1
fi

# Check for token file
TOKEN_FILE="${TEST_FILE%.*}_tokens.json"
if [ -f "$TOKEN_FILE" ]; then
    echo "✅ Consolidated token file created: $(basename "$TOKEN_FILE")"
    
    # Show sample token data
    echo "📊 Sample token data:"
    jq -r '.batch_statistics | to_entries[] | "   \(.key): \(.value)"' "$TOKEN_FILE" 2>/dev/null || echo "   Could not parse token data"
else
    echo "❌ Consolidated token file not created"
    exit 1
fi
echo

# Step 4: Test enhanced evaluation script
echo "📊 Step 4: Testing enhanced evaluation script..."

# First check if there are any responses to evaluate
RESPONSE_COUNT=$(find datasets/ -name "*.txt" -newer "$TEST_FILE" 2>/dev/null | wc -l)
if [ $RESPONSE_COUNT -eq 0 ]; then
    echo "⚠️ No response files found for evaluation"
    echo "   This might be normal if all responses already existed"
else
    echo "Found $RESPONSE_COUNT response files to evaluate"
fi

# Run evaluation (this should now use consolidated token data)
echo "Running enhanced evaluation..."
if python -m scripts.evaluate_responses; then
    echo "✅ Enhanced evaluation script works"
else
    echo "❌ Enhanced evaluation script failed"
    exit 1
fi
echo

# Step 5: Check evaluation results
echo "📈 Step 5: Checking evaluation results..."

# Find the latest evaluation file
EVAL_FILE=$(ls -t evaluation_data/evaluation_results_*.json 2>/dev/null | head -1)
if [ -f "$EVAL_FILE" ]; then
    echo "✅ Evaluation results file: $(basename "$EVAL_FILE")"
    
    # Check if token data is included
    TOKEN_COUNT=$(jq '[.[] | select(.token_usage != null)] | length' "$EVAL_FILE" 2>/dev/null || echo "0")
    TOTAL_COUNT=$(jq 'length' "$EVAL_FILE" 2>/dev/null || echo "0")
    
    echo "📊 Evaluation summary:"
    echo "   Total evaluations: $TOTAL_COUNT"
    echo "   With token data: $TOKEN_COUNT"
    
    if [ "$TOKEN_COUNT" -gt 0 ]; then
        echo "✅ Token data successfully integrated into evaluation results"
        
        # Show sample token data from evaluation
        echo "📋 Sample token data from evaluation:"
        jq -r '.[0].token_usage | if . != null then "   Source: \(.source // "unknown")\n   Output tokens: \(.output_tokens // "N/A")\n   Reasoning tokens: \(.reasoning_tokens // "N/A")" else "   No token data" end' "$EVAL_FILE" 2>/dev/null
    else
        echo "⚠️ No token data found in evaluation results"
    fi
else
    echo "❌ No evaluation results file found"
    exit 1
fi
echo

# Step 6: Test visualization compatibility
echo "🎨 Step 6: Testing visualization compatibility..."

if python -c "
from scripts.visualization.core.token_visualization_utils import prepare_token_data
import pandas as pd
import json

# Load evaluation results
with open('$EVAL_FILE', 'r') as f:
    data = json.load(f)

df = pd.DataFrame(data)
token_df = prepare_token_data(df)

print(f'Successfully prepared token data: {len(token_df)} rows')
print(f'Responses with token data: {(token_df[\"total_tokens\"] > 0).sum()}')
"; then
    echo "✅ Token data is compatible with visualization system"
else
    echo "⚠️ Token visualization compatibility test failed (this might be normal)"
fi
echo

# Final summary
echo "🎉 PIPELINE TEST SUMMARY"
echo "========================"
echo "✅ Enhanced reformat script: Working"
echo "✅ Consolidated token files: Created"
echo "✅ Enhanced evaluation script: Working"
echo "✅ Token data integration: Working"
echo "✅ Evaluation results: Generated"

if [ "$TOKEN_COUNT" -gt 0 ]; then
    echo "✅ Token data in results: $TOKEN_COUNT/$TOTAL_COUNT responses"
else
    echo "⚠️ Token data in results: None found"
fi

echo
echo "💡 Next steps:"
echo "   1. Use enhanced reformat script for all cluster results"
echo "   2. Run evaluation as normal - token data automatically included"
echo "   3. Generate visualizations with token analysis"
echo
echo "🔧 Key files:"
echo "   - Enhanced reformat: scripts/reformat_cluster_results.py"
echo "   - Enhanced evaluation: scripts/evaluate_responses.py"  
echo "   - Consolidated tokens: llm-inference/results/*_tokens.json"
echo "   - Evaluation results: $EVAL_FILE"