"""
Master script to run ABBR value experiments on all datasets and combine results.

This script runs the ABBR vs consistency comparison experiment across all available datasets,
combines the results, and saves comprehensive output files.

Usage:
    python run_all_experiments.py [--seeds NUM_SEEDS] [--threshold THRESHOLD] [--support SUPPORT]
"""

import argparse
import pandas as pd
import os
import sys
from datetime import datetime
from typing import Dict, List

# Change to parent directory so data paths work correctly
script_dir = os.path.dirname(os.path.abspath(__file__))
parent_dir = os.path.dirname(script_dir)
os.chdir(parent_dir)

from abbr_experiment_config import ExperimentConfig, AVAILABLE_DATASETS
from abbr_multi_seed_experiment import run_multi_seed_experiment, print_summary_results

def run_all_datasets_experiment(
    num_seeds: int = 100,
    confidence_threshold: float = 0.9,
    min_rule_support: float = 0.1,
    max_conditions: int = 3,
    output_dir: str = "combined_results"
) -> None:
    """
    Run ABBR experiments on all available datasets and combine results.
    
    Args:
        num_seeds: Number of random seeds to test per dataset
        confidence_threshold: Confidence threshold for binary predictions
        min_rule_support: Minimum support required for rules
        max_conditions: Maximum number of conditions per rule
        output_dir: Directory to save combined results
    """
    
    # Create output directory
    # If output_dir is relative, put it in the abbr_value directory
    if not os.path.isabs(output_dir):
        output_dir = os.path.join('abbr_value', output_dir)
    os.makedirs(output_dir, exist_ok=True)
    
    # Get current timestamp for filenames
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    # Lists to store results from all datasets
    all_detailed_results = []
    all_summary_stats = []
    
    print(f"🚀 Starting comprehensive ABBR experiment")
    print(f"📊 Datasets: {list(AVAILABLE_DATASETS.keys())}")
    print(f"🎯 Seeds per dataset: {num_seeds}")
    print(f"📈 Confidence threshold: {confidence_threshold}")
    print(f"📏 Min rule support: {min_rule_support}")
    print(f"🔧 Max conditions per rule: {max_conditions}")
    print(f"💾 Output directory: {output_dir}")
    print(f"=" * 100)
    
    # Run experiment for each dataset
    for dataset_name, dataset_class in AVAILABLE_DATASETS.items():
        print(f"\n🔄 Processing {dataset_name.upper()} dataset...")
        
        try:
            # Configure experiment
            config = ExperimentConfig(
                dataset_class=dataset_class,
                confidence_threshold=confidence_threshold,
                min_rule_support=min_rule_support,
                max_conditions_per_rule=max_conditions,
                num_seeds=num_seeds,
                save_detailed_results=False,  # We'll handle saving ourselves
                save_summary_results=False
            )
            
            # Run experiment
            results_df, summary_stats = run_multi_seed_experiment(config)
            
            # Add dataset name to results
            results_df['dataset'] = dataset_name
            summary_stats['dataset_name'] = dataset_name
            
            # Store results
            all_detailed_results.append(results_df)
            all_summary_stats.append(summary_stats)
            
            # Print summary for this dataset
            print(f"✅ {dataset_name} completed:")
            print(f"   Successful seeds: {summary_stats['successful_seeds']}/{summary_stats['total_seeds']}")
            print(f"   ABBR generalization gap: {summary_stats['abbr_rule_avg_generalization_gap']:.3f}")
            print(f"   Consistency generalization gap: {summary_stats['consistency_rule_avg_generalization_gap']:.3f}")
            print(f"   ABBR better: {summary_stats['fraction_abbr_better']:.1%}")
            
        except Exception as e:
            print(f"❌ Error with {dataset_name}: {e}")
            continue
    
    if not all_detailed_results:
        print("❌ No successful experiments!")
        return
    
    # Combine all detailed results
    print(f"\n📊 Combining results from {len(all_detailed_results)} datasets...")
    combined_detailed_df = pd.concat(all_detailed_results, ignore_index=True)
    
    # Create combined summary DataFrame
    summary_columns = [
        'dataset_name', 'successful_seeds', 'total_seeds',
        'abbr_rule_avg_generalization_gap', 'abbr_rule_std_generalization_gap',
        'consistency_rule_avg_generalization_gap', 'consistency_rule_std_generalization_gap',
        'avg_generalization_gap_difference', 'std_generalization_gap_difference',
        'fraction_abbr_better', 'avg_rules_generated', 'avg_rules_with_metrics'
    ]
    
    combined_summary_df = pd.DataFrame([
        {col: stats.get(col, 'N/A') for col in summary_columns}
        for stats in all_summary_stats
    ])
    
    # Generate output filenames
    detailed_filename = f"abbr_all_datasets_detailed_{timestamp}.csv"
    summary_filename = f"abbr_all_datasets_summary_{timestamp}.csv"
    report_filename = f"abbr_all_datasets_report_{timestamp}.txt"
    
    # Save combined results
    detailed_path = os.path.join(output_dir, detailed_filename)
    summary_path = os.path.join(output_dir, summary_filename)
    report_path = os.path.join(output_dir, report_filename)
    
    combined_detailed_df.to_csv(detailed_path, index=False)
    combined_summary_df.to_csv(summary_path, index=False)
    
    # Generate comprehensive text report
    with open(report_path, 'w') as f:
        f.write("ABBR Value Experiment - Comprehensive Report\n")
        f.write("=" * 80 + "\n\n")
        f.write(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
        f.write(f"Total datasets tested: {len(all_summary_stats)}\n")
        f.write(f"Seeds per dataset: {num_seeds}\n")
        f.write(f"Confidence threshold: {confidence_threshold}\n")
        f.write(f"Min rule support: {min_rule_support}\n")
        f.write(f"Max conditions per rule: {max_conditions}\n\n")
        
        f.write("DATASET SUMMARIES:\n")
        f.write("-" * 50 + "\n")
        
        for stats in all_summary_stats:
            f.write(f"\n{stats['dataset_name'].upper()}:\n")
            f.write(f"  Success rate: {stats['successful_seeds']}/{stats['total_seeds']}\n")
            f.write(f"  ABBR generalization gap: {stats['abbr_rule_avg_generalization_gap']:.3f} ± {stats['abbr_rule_std_generalization_gap']:.3f}\n")
            f.write(f"  Consistency generalization gap: {stats['consistency_rule_avg_generalization_gap']:.3f} ± {stats['consistency_rule_std_generalization_gap']:.3f}\n")
            f.write(f"  Gap difference (Cons - ABBR): {stats['avg_generalization_gap_difference']:.3f} ± {stats['std_generalization_gap_difference']:.3f}\n")
            f.write(f"  Fraction ABBR better: {stats['fraction_abbr_better']:.3f}\n")
            f.write(f"  Avg rules generated: {stats['avg_rules_generated']:.1f}\n")
        
        # Overall analysis
        f.write(f"\nOVERALL ANALYSIS:\n")
        f.write("-" * 50 + "\n")
        
        # Calculate cross-dataset statistics
        gap_differences = [s['avg_generalization_gap_difference'] for s in all_summary_stats]
        abbr_better_fractions = [s['fraction_abbr_better'] for s in all_summary_stats]
        
        f.write(f"Average gap difference across datasets: {sum(gap_differences)/len(gap_differences):.3f}\n")
        f.write(f"Average fraction ABBR better: {sum(abbr_better_fractions)/len(abbr_better_fractions):.3f}\n")
        f.write(f"Datasets where ABBR is better on average: {sum(1 for d in gap_differences if d > 0)}/{len(gap_differences)}\n")
        
        f.write(f"\nFILES GENERATED:\n")
        f.write(f"- Detailed results: {detailed_filename}\n")
        f.write(f"- Summary results: {summary_filename}\n")
        f.write(f"- This report: {report_filename}\n")
    
    # Print final summary
    print(f"\n🎉 EXPERIMENT COMPLETED!")
    print(f"📁 Results saved to: {output_dir}")
    print(f"📄 Files generated:")
    print(f"   📊 Detailed results: {detailed_filename}")
    print(f"   📈 Summary results: {summary_filename}")
    print(f"   📝 Report: {report_filename}")
    
    print(f"\n📊 QUICK SUMMARY:")
    print(f"   Datasets tested: {len(all_summary_stats)}")
    print(f"   Average gap difference: {sum(gap_differences)/len(gap_differences):.3f}")
    print(f"   Average ABBR better rate: {sum(abbr_better_fractions)/len(abbr_better_fractions):.1%}")
    print(f"   Datasets where ABBR wins: {sum(1 for d in gap_differences if d > 0)}/{len(gap_differences)}")

def main():
    parser = argparse.ArgumentParser(
        description="Run ABBR value experiments on all datasets and combine results",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
    
    parser.add_argument(
        '--seeds', 
        type=int, 
        default=100,
        help='Number of random seeds to test per dataset'
    )
    
    parser.add_argument(
        '--threshold', 
        type=float, 
        default=0.9,
        help='Confidence threshold for binary predictions'
    )
    
    parser.add_argument(
        '--support', 
        type=float, 
        default=0.1,
        help='Minimum support (coverage) required for rules'
    )
    
    parser.add_argument(
        '--max-conditions', 
        type=int, 
        default=3,
        help='Maximum number of conditions per rule'
    )
    
    parser.add_argument(
        '--output-dir', 
        type=str, 
        default='combined_results',
        help='Directory to save combined results'
    )
    
    args = parser.parse_args()
    
    run_all_datasets_experiment(
        num_seeds=args.seeds,
        confidence_threshold=args.threshold,
        min_rule_support=args.support,
        max_conditions=args.max_conditions,
        output_dir=args.output_dir
    )

if __name__ == "__main__":
    main() 