#!/usr/bin/env python3
"""Example usage of the ImpossibleBench analysis DataLoader."""

import sys
import os
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from analysis.data_loader import DataLoader
import pandas as pd

def main():
    """Demonstrate DataLoader usage."""
    
    # Create DataLoader
    loader = DataLoader(n_workers=4)
    
    # Load evaluation results from one or more log folders
    log_folders = [
        "../logs/livecodebench_sep4bigrun_testonly4",
        "../logs/swebench_sep5bigrun-neutral-canmodify",
    ]
    
    # Check which folders exist and load from them
    for folder in log_folders:
        if os.path.exists(folder):
            print(f"Loading from {folder}")
            loader.load_folder(folder)
        else:
            print(f"Folder not found: {folder}")
    
    # Convert to DataFrame
    df = loader.to_df()
    
    if df.empty:
        print("No data loaded. Make sure eval files are available in the log folders.")
        return
    
    print(f"\nLoaded {len(df)} evaluation results")
    print(f"Columns: {list(df.columns)}")
    
    # Summary statistics
    print(f"\nSummary Statistics:")
    summary = loader.get_summary()
    for key, value in summary.items():
        if isinstance(value, float):
            print(f"  {key}: {value:.3f}")
        else:
            print(f"  {key}: {value}")
    
    # Pass rates by model and variant
    print(f"\nPass Rates by Model and Variant:")
    pass_rates = df.groupby(['model', 'variant'])['passed'].agg(['count', 'mean']).round(3)
    pass_rates.columns = ['num_samples', 'pass_rate']
    print(pass_rates.head(10))
    
    # Overall pass rates by variant
    print(f"\nOverall Pass Rates by Variant:")
    variant_rates = df.groupby('variant')['passed'].agg(['count', 'mean']).round(3)
    variant_rates.columns = ['num_samples', 'pass_rate']
    print(variant_rates)
    
    # Agent type performance
    print(f"\nPass Rates by Agent Type:")
    agent_rates = df.groupby('agent_type')['passed'].agg(['count', 'mean']).round(3)
    agent_rates.columns = ['num_samples', 'pass_rate']
    print(agent_rates)
    
    # First pass rates (where available)
    if 'first_pass' in df.columns:
        print(f"\nFirst Pass Success Rates:")
        first_pass_rates = df.groupby('variant')['first_pass'].agg(['count', 'mean']).round(3)
        first_pass_rates.columns = ['num_samples', 'first_pass_rate']
        print(first_pass_rates)
    
    # Filter examples
    print(f"\nFiltering Examples:")
    
    # Get only conflicting variant results
    conflicting_loader = loader.filter_by(variant='conflicting')
    conflicting_df = conflicting_loader.to_df()
    print(f"Conflicting variant results: {len(conflicting_df)} samples")
    
    # Get results for a specific model
    gpt5_loader = loader.filter_by(model='openai/gpt-5')
    gpt5_df = gpt5_loader.to_df()
    print(f"GPT-5 results: {len(gpt5_df)} samples")
    
    # Demonstrate saving to CSV
    output_file = "impossiblebench_analysis_results.csv"
    df.to_csv(output_file, index=False)
    print(f"\nResults saved to: {output_file}")

if __name__ == "__main__":
    main()