#!/usr/bin/env julia

"""
Comprehensive Benchmark Analysis with Method Comparison Table

This script analyzes benchmark results and creates detailed comparison tables that track:
- VI Theoretical recommendations
- IB-MDP (CEED) Top1 and Top2 feature recommendations  
- Match analysis between different methods
- Cross-iteration consistency analysis
- Agreement patterns and statistical validation

Generates:
- Method comparison table: Iter | VI Theo | IB-MDP Top1 | T1 Match | IB-MDP Top2 | T2 Match | VI Sim | Sim Match
- Agreement patterns CSV with cross-method consistency metrics
- Comprehensive iteration-by-iteration analysis
"""

using DataFrames, CSV, Statistics
using Printf

println("="^80)
println("🔍 ENHANCED BENCHMARK ANALYSIS WITH METHOD COMPARISON")
println("="^80)
println("📊 Creating detailed method comparison table...")
println()

# Helper function to parse feature recommendations from string format
function parse_feature_list(feature_str)::Vector{Int}
    # Handle missing values and different types
    if ismissing(feature_str) 
        return Int[]
    end
    
    # Convert to string safely
    str_val = string(feature_str)
    
    if str_val == "" || str_val == "None" || str_val == "missing"
        return Int[]
    end
    
    # Handle different formats: "[3,4]", "feature_3,feature_4", etc.
    str_val = replace(str_val, "[" => "", "]" => "", "\"" => "")
    
    # Extract numbers from the string
    numbers = Int[]
    for match in eachmatch(r"\d+", str_val)
        push!(numbers, parse(Int, match.match))
    end
    
    return sort(numbers)
end

# Helper function to calculate feature set matches
function calculate_match(features1::Vector{Int}, features2::Vector{Int})::Int
    return (length(features1) > 0 && length(features2) > 0 && Set(features1) == Set(features2)) ? 1 : 0
end

# Helper function to get feature count for display
function get_feature_count(features::Vector{Int})::Int
    return length(features)
end

# Load and consolidate results from all available iterations
function load_iteration_results(iteration_num::Int)
    base_path = "benchmark/results/iteration_$iteration_num"
    
    if !isdir(base_path)
        @warn "Iteration $iteration_num directory not found: $base_path"
        return nothing
    end
    
    try
        # Load all result files
        complete_comparison = CSV.read("$base_path/complete_comparison.csv", DataFrame)
        ceed_actions = CSV.read("$base_path/CEED_actions.csv", DataFrame)
        top_action_sets = CSV.read("$base_path/top_action_sets.csv", DataFrame)
        vi_policy = CSV.read("$base_path/VI_policy_comparison.csv", DataFrame)
        
        return (
            iteration = iteration_num,
            complete_comparison = complete_comparison,
            ceed_actions = ceed_actions,
            top_action_sets = top_action_sets,
            vi_policy = vi_policy
        )
    catch e
        @warn "Failed to load iteration $iteration_num: $e"
        return nothing
    end
end

# Find all available iterations
available_iterations = Int[]
for i in 1:20  # Check up to 20 iterations
    if isdir("benchmark/results/iteration_$i")
        push!(available_iterations, i)
    end
end

if isempty(available_iterations)
    error("No iteration results found in results/ directory. Please run the benchmark first.")
end

println("📂 Found results for iterations: $(join(available_iterations, ", "))")
println()

# Load all available results
all_results = []
for iter in available_iterations
    result = load_iteration_results(iter)
    if result !== nothing
        push!(all_results, result)
    end
end

n_iterations = length(all_results)
println("✅ Successfully loaded $n_iterations iterations")
println()

# === METHOD COMPARISON TABLE GENERATION ===
println("="^80)
println("📋 CREATING DETAILED METHOD COMPARISON TABLE")
println("="^80)

comparison_table = []

for (idx, results) in enumerate(all_results)
    iter_num = results.iteration
    
    println("Processing iteration $iter_num...")
    
    # Extract VI recommendations (theoretical and similarity-based)
    if nrow(results.vi_policy) > 0
        vi_theo_str = results.vi_policy[1, :VI_theo]
        vi_sim_str = results.vi_policy[1, :VI_sim]
    else
        vi_theo_str = ""
        vi_sim_str = ""
    end
    
    vi_theo_features = parse_feature_list(vi_theo_str)
    vi_sim_features = parse_feature_list(vi_sim_str)
    
    # Extract IB-MDP (CEED) recommendations
    # Get recommendations for threshold 0.1 (most conservative)
    ceed_top1_features = Int[]
    ceed_top2_features = Int[]
    
    if nrow(results.top_action_sets) > 0
        # Find threshold 0.1 data
        threshold_01_data = filter(row -> row.Threshold == 0.1, results.top_action_sets)
        
        if nrow(threshold_01_data) > 0
            top1_str = threshold_01_data[1, :Top_1_Action_Set]
            ceed_top1_features = parse_feature_list(top1_str)
            
            # Check if Top_2 exists
            if "Top_2_Action_Set" in names(threshold_01_data)
                top2_str = threshold_01_data[1, :Top_2_Action_Set]
                ceed_top2_features = parse_feature_list(top2_str)
            end
        end
    end
    
    # Calculate matches
    t1_match = calculate_match(vi_theo_features, ceed_top1_features)
    t2_match = calculate_match(vi_theo_features, ceed_top2_features)
    sim_match = calculate_match(vi_sim_features, ceed_top1_features)
    
    # Format feature lists for display
    vi_theo_display = isempty(vi_theo_features) ? "" : "{$(join(vi_theo_features, ", "))}"
    ceed_top1_display = isempty(ceed_top1_features) ? "" : "{$(join(ceed_top1_features, ", "))}"
    ceed_top2_display = isempty(ceed_top2_features) ? "" : "{$(join(ceed_top2_features, ", "))}"
    vi_sim_display = isempty(vi_sim_features) ? "" : "{$(join(vi_sim_features, ", "))}"
    
    # Add to comparison table
    push!(comparison_table, (
        Iter = iter_num,
        VI_Theo = get_feature_count(vi_theo_features),
        IB_MDP_Top1_Features = ceed_top1_display,
        T1_Match = t1_match,
        IB_MDP_Top2_Features = ceed_top2_display,
        T2_Match = t2_match,
        VI_Sim = get_feature_count(vi_sim_features),
        Sim_Match = sim_match
    ))
    
    println("  VI Theo: $vi_theo_display ($(length(vi_theo_features)) features)")
    println("  CEED Top1: $ceed_top1_display")
    println("  CEED Top2: $ceed_top2_display")
    println("  VI Sim: $vi_sim_display ($(length(vi_sim_features)) features)")
    println("  Matches - T1: $t1_match, T2: $t2_match, Sim: $sim_match")
    println()
end

# Convert to DataFrame
comparison_df = DataFrame(comparison_table)

# Display the comparison table
println("="^80)
println("📊 METHOD COMPARISON TABLE")
println("="^80)
println()

# Print the table in a formatted way similar to the user's example
println("| Iter | VI Theo | IB-MDP Top1 Features | T1 Match | IB-MDP Top2 Features | T2 Match | VI Sim | Sim Match |")
println("|------|---------|---------------------|----------|---------------------|----------|--------|-----------|")

for row in eachrow(comparison_df)
    @printf("| %4d | %7d | %-19s | %8d | %-19s | %8d | %6d | %9d |\n",
        row.Iter, row.VI_Theo, row.IB_MDP_Top1_Features, row.T1_Match,
        row.IB_MDP_Top2_Features, row.T2_Match, row.VI_Sim, row.Sim_Match)
end

println()

# Save the comparison table
CSV.write("benchmark/results/method_comparison_table.csv", comparison_df)
println("💾 Method comparison table saved to: benchmark/results/method_comparison_table.csv")
println()

# === DETAILED ANALYSIS STATISTICS ===
println("="^80)
println("📈 DETAILED ANALYSIS STATISTICS")
println("="^80)

println("🔸 Overall Match Statistics:")
total_t1_matches = sum(comparison_df.T1_Match)
total_t2_matches = sum(comparison_df.T2_Match)
total_sim_matches = sum(comparison_df.Sim_Match)

println("  T1 Matches (VI Theo vs CEED Top1): $total_t1_matches/$n_iterations ($(@sprintf("%.1f", total_t1_matches/n_iterations*100))%)")
println("  T2 Matches (VI Theo vs CEED Top2): $total_t2_matches/$n_iterations ($(@sprintf("%.1f", total_t2_matches/n_iterations*100))%)")
println("  Sim Matches (VI Sim vs CEED Top1): $total_sim_matches/$n_iterations ($(@sprintf("%.1f", total_sim_matches/n_iterations*100))%)")

println()
println("🔸 Feature Count Analysis:")
vi_theo_counts = comparison_df.VI_Theo
vi_sim_counts = comparison_df.VI_Sim

println("  VI Theoretical feature counts: $(join(vi_theo_counts, ", "))")
println("  VI Similarity feature counts: $(join(vi_sim_counts, ", "))")

if length(vi_theo_counts) > 1
    println("  VI Theo consistency: $(length(unique(vi_theo_counts))) unique values")
end
if length(vi_sim_counts) > 1
    println("  VI Sim consistency: $(length(unique(vi_sim_counts))) unique values")
end

println()
println("🔸 CEED Feature Recommendation Analysis:")
# Extract unique feature combinations
top1_features = unique(comparison_df.IB_MDP_Top1_Features)
top2_features = unique(comparison_df.IB_MDP_Top2_Features)

println("  Unique CEED Top1 recommendations: $(length(top1_features))")
for feat in top1_features
    if feat != ""
        count = sum(comparison_df.IB_MDP_Top1_Features .== feat)
        println("    $feat: $count/$n_iterations iterations")
    end
end

println("  Unique CEED Top2 recommendations: $(length(top2_features))")
for feat in top2_features
    if feat != ""
        count = sum(comparison_df.IB_MDP_Top2_Features .== feat)
        println("    $feat: $count/$n_iterations iterations")
    end
end

# === CONSISTENCY ANALYSIS ===
println()
println("="^80)
println("🎯 CROSS-METHOD CONSISTENCY ANALYSIS")
println("="^80)

println("🔸 Agreement Patterns:")
agreement_patterns = DataFrame(
    Pattern = String[],
    Description = String[],
    Count = Int[],
    Percentage = Float64[]
)

# Perfect agreement (all methods agree)
perfect_agreement = sum((comparison_df.T1_Match .== 1) .& (comparison_df.Sim_Match .== 1))
push!(agreement_patterns, ("Perfect", "All methods agree", perfect_agreement, perfect_agreement/n_iterations*100))

# VI methods agree but differ from CEED
vi_agreement = sum((comparison_df.VI_Theo .== comparison_df.VI_Sim) .& (comparison_df.T1_Match .== 0))
push!(agreement_patterns, ("VI_Agreement", "VI methods agree, differ from CEED", vi_agreement, vi_agreement/n_iterations*100))

# CEED variants provide alternatives
ceed_alternatives = sum((comparison_df.T1_Match .== 0) .& (comparison_df.T2_Match .== 1))
push!(agreement_patterns, ("CEED_Alt", "VI Theo matches CEED Top2", ceed_alternatives, ceed_alternatives/n_iterations*100))

# Complete disagreement
no_agreement = sum((comparison_df.T1_Match .== 0) .& (comparison_df.T2_Match .== 0) .& (comparison_df.Sim_Match .== 0))
push!(agreement_patterns, ("Disagreement", "No method agreement", no_agreement, no_agreement/n_iterations*100))

for row in eachrow(agreement_patterns)
    println("  $(row.Pattern): $(row.Count)/$n_iterations ($(@sprintf("%.1f", row.Percentage))%) - $(row.Description)")
end

# Save agreement patterns
CSV.write("benchmark/results/agreement_patterns.csv", agreement_patterns)
println()
println("💾 Agreement patterns saved to: benchmark/results/agreement_patterns.csv")

# === FINAL ENHANCED SUMMARY ===
println()
println("="^80)
println("✅ ENHANCED BENCHMARK ANALYSIS COMPLETE")
println("="^80)

println("📊 Key Findings:")
println("  • Analyzed $n_iterations iterations with comprehensive method comparison")
println("  • Generated detailed match analysis table")
println("  • VI Theoretical-CEED Top1 agreement: $(@sprintf("%.1f", total_t1_matches/n_iterations*100))%")
println("  • VI Similarity-CEED Top1 agreement: $(@sprintf("%.1f", total_sim_matches/n_iterations*100))%")
println("  • Cross-method analysis reveals $(length(unique(comparison_df.IB_MDP_Top1_Features))) distinct CEED strategies")

println()
println("📁 Generated Files:")
println("  • benchmark/results/method_comparison_table.csv - Detailed method comparison")
println("  • benchmark/results/agreement_patterns.csv - Cross-method agreement analysis")

println()
println("🎯 The analysis reveals the relationship between different experimental design approaches:")
println("   - Value Iteration (theoretical and similarity-based)")
println("   - CEED ensemble recommendations (Top1 and Top2)")
println("   - Match patterns showing algorithmic consistency")
println()
println("="^80) 