#!/usr/bin/env python3
"""
Example script to run analysis on the included Seekbench dataset
"""

import os
import sys
import subprocess
from pathlib import Path

def run_analysis_on_seekbench():
    """Run analysis on the included Seekbench dataset"""
    
    # Check if the dataset exists
    dataset_path = Path("data/annotated_human_final_cleaned.jsonl")
    if not dataset_path.exists():
        print(f"Error: Dataset not found at {dataset_path}")
        print("Please ensure you're running this script from the search_evaluation_toolkit directory")
        return False
    
    print("Running analysis on Seekbench annotated dataset...")
    print(f"Dataset: {dataset_path}")
    
    # Create output directory
    output_dir = Path("analysis_results_seekbench")
    output_dir.mkdir(exist_ok=True)
    
    # Run O1 analysis (RQI and SEI)
    print("\n1. Running RQI and SEI Analysis (O1)...")
    cmd_o1 = [
        "python", "analysis/O1_RL_training_impact_on_reasoning_search.py",
        "--input", str(dataset_path),
        "--outdir", str(output_dir),
        "--require_clear_for_sufficient"
    ]
    
    try:
        result = subprocess.run(cmd_o1, check=True, capture_output=True, text=True)
        print("✓ O1 analysis completed successfully")
    except subprocess.CalledProcessError as e:
        print(f"✗ O1 analysis failed: {e}")
        print(f"Error output: {e.stderr}")
        return False
    
    # Run O2 analysis (Recovery dynamics)
    print("\n2. Running Recovery Dynamics Analysis (O2)...")
    cmd_o2 = [
        "python", "analysis/O2_Recovery_dynamics.py",
        str(dataset_path),
        str(output_dir / "recovery_figs")
    ]
    
    try:
        result = subprocess.run(cmd_o2, check=True, capture_output=True, text=True)
        print("✓ O2 analysis completed successfully")
    except subprocess.CalledProcessError as e:
        print(f"✗ O2 analysis failed: {e}")
        print(f"Error output: {e.stderr}")
        return False
    
    # Run O3 analysis (Evidence strength)
    print("\n3. Running Evidence Strength Analysis (O3)...")
    cmd_o3 = [
        "python", "analysis/O3_Evidence_strength_per_turn.py",
        "--input", str(dataset_path),
        "--outdir", str(output_dir / "evidence_figs")
    ]
    
    try:
        result = subprocess.run(cmd_o3, check=True, capture_output=True, text=True)
        print("✓ O3 analysis completed successfully")
    except subprocess.CalledProcessError as e:
        print(f"✗ O3 analysis failed: {e}")
        print(f"Error output: {e.stderr}")
        return False
    
    print(f"\n🎉 All analyses completed successfully!")
    print(f"Results saved to: {output_dir}")
    print(f"\nGenerated files:")
    
    # List generated files
    for root, dirs, files in os.walk(output_dir):
        for file in files:
            if file.endswith(('.png', '.csv', '.txt')):
                rel_path = os.path.relpath(os.path.join(root, file), output_dir)
                print(f"  - {rel_path}")
    
    return True

def main():
    """Main function"""
    print("Search Evaluation Toolkit - Seekbench Analysis Example")
    print("=" * 60)
    
    # Check if we're in the right directory
    if not Path("analysis").exists():
        print("Error: Please run this script from the search_evaluation_toolkit directory")
        print("Current directory:", os.getcwd())
        return
    
    # Run the analysis
    success = run_analysis_on_seekbench()
    
    if success:
        print("\n" + "=" * 60)
        print("Analysis complete! Check the generated figures and reports.")
        print("You can now explore the results to understand agent behavior patterns.")
    else:
        print("\n" + "=" * 60)
        print("Analysis failed. Please check the error messages above.")
        sys.exit(1)

if __name__ == "__main__":
    main()
