#!/usr/bin/env python3
"""Test script for the DataLoader."""

import sys
import os
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from analysis.data_loader import DataLoader

def test_data_loader():
    """Test the DataLoader with actual log files."""
    
    # Create DataLoader instance
    loader = DataLoader(n_workers=2)
    
    # Load from logs folder (contains .eval files)
    logs_path = "../logs"
    if os.path.exists(logs_path):
        # Find subdirectories with eval files and load from multiple
        loaded_dirs = 0
        max_dirs = 3  # Load from up to 3 directories for variety
        
        for subdir in sorted(os.listdir(logs_path)):
            if loaded_dirs >= max_dirs:
                break
                
            subdir_path = os.path.join(logs_path, subdir)
            if os.path.isdir(subdir_path):
                eval_files = [f for f in os.listdir(subdir_path) if f.endswith('.eval')]
                if eval_files:
                    print(f"Loading from {subdir_path} ({len(eval_files)} eval files)")
                    loader.load_folder(subdir_path)
                    loaded_dirs += 1
        
        if loaded_dirs == 0:
            print("No eval files found in logs subdirectories")
        
        # Get summary
        summary = loader.get_summary()
        print("Summary:")
        for key, value in summary.items():
            print(f"  {key}: {value}")
        
        # Convert to DataFrame
        df = loader.to_df()
        print(f"\nDataFrame shape: {df.shape}")
        print(f"Columns: {list(df.columns)}")
        
        if not df.empty:
            print(f"\nSample data:")
            print(df[['model', 'dataset', 'variant', 'agent_type', 'passed', 'score']].head())
            
            print(f"\nDataset breakdown:")
            print(df['dataset'].value_counts())
            
            print(f"\nVariant breakdown:")
            print(df['variant'].value_counts())
            
            print(f"\nAgent type breakdown:")
            print(df['agent_type'].value_counts())
            
            print(f"\nModel breakdown:")
            print(df['model'].value_counts())
            
            print(f"\nPass rates by dataset:")
            pass_rates = df.groupby('dataset')['passed'].mean()
            print(pass_rates)
    else:
        print(f"logs folder not found at {logs_path}")

if __name__ == "__main__":
    test_data_loader()