#!/usr/bin/env python3
"""
generate_latex_results.py

A standalone script that generates LaTeX tables and newcommands from
previously generated CSV results files about tree pruning experiments.

This script expects the following files in the 'results/' directory:
- prune_unpruned_all_improvements.csv
- prune_unpruned_k_adj_summary.csv
- prune_unpruned_k_exch_summary.csv
"""

import os
import pandas as pd
import sys

def generate_latex_table(all_results_file):
    """
    Generate a LaTeX table showing errors for different parameter values for each dataset.
    - Shows only datasets with at least one improvement over heuristic errors
    - Sorts datasets alphabetically
    - Formats error numbers as integers
    """
    # Read the full results data
    try:
        df = pd.read_csv(all_results_file)
    except FileNotFoundError:
        print(f"Error: File {all_results_file} not found")
        return False
    
    # Get unique datasets
    datasets = df['dataset'].unique()
    
    # Create a result table with one row per dataset
    results = []
    for dataset in sorted(datasets):  # Sort the datasets alphabetically
        dataset_df = df[df['dataset'] == dataset]
        
        # Get base values (same for all rows of this dataset)
        first_row = dataset_df.iloc[0]
        result_row = {
            'Dataset': dataset,
            'Heuristic Pruned': int(first_row['heuristic_pruned']),
            'Heuristic Errors': int(first_row['heuristic_errors']),
            'k_adj = 1 Errors': '--',
            'k_adj = 2 Errors': '--',
            'k_exch = 1 Errors': '--',
            'k_exch = 2 Errors': '--'
        }
        
        # Track if this dataset shows any improvement over heuristic
        has_improvement = False
        
        # Fill in the available parameter values
        for _, row in dataset_df.iterrows():
            # Only update if we have a value (not missing)
            if row['result_status'] != 'missing' and pd.notna(row['improved_errors']):
                param_key = f"{row['parameter']} = {int(row['value'])} Errors"
                result_row[param_key] = int(row['improved_errors'])
                
                # Check if this is an improvement over heuristic errors
                if row['improved_errors'] < first_row['heuristic_errors']:
                    has_improvement = True
        
        # Only add this dataset if it has at least one improvement
        if has_improvement:
            results.append(result_row)
    
    # Convert to DataFrame for easier handling
    results_df = pd.DataFrame(results)
    
    # Generate LaTeX table code
    latex_code = [
        "% generated by eval-prune-unpruned-trees-convert-to-latex.py",
        "\\begin{table}[t]",
        "\\caption{Error numbers obtainable when combining pruning with local search. Dashes indicate timeouts.}",
        "\\label{tab:prune_unpruned_error_rates_table}",
        "\\centering",
        "\\begin{tabular}{lcccccc}",
        "\\toprule",
        "Dataset & Pruned Nodes & Heur.\ Errors & $\\kadj = 1$ & $\\kadj = 2$ & $\\kexch = 1$ & $\\kexch = 2$ \\\\",
        "\\midrule"
    ]
    
    # Add each row
    for _, row in results_df.iterrows():
        latex_row = f"{row['Dataset']} & {row['Heuristic Pruned']} & {row['Heuristic Errors']} & "
        latex_row += f"{row['k_adj = 1 Errors']} & {row['k_adj = 2 Errors']} & "
        latex_row += f"{row['k_exch = 1 Errors']} & {row['k_exch = 2 Errors']} \\\\"
        
        latex_code.append(latex_row)
    
    # Close the table
    latex_code.extend([
        "\\bottomrule",
        "\\end{tabular}",
        "\\end{table}"
    ])
    
    # Write to file
    with open('results/prune_unpruned_error_rates_table.tex', 'w') as f:
        f.write('\n'.join(latex_code))
    
    print(f"LaTeX table written to 'results/prune_unpruned_error_rates_table.tex'")
    return True


def generate_latex_commands(k_adj_improvements_file, k_exch_improvements_file):
    """
    Generate simplified LaTeX newcommands with statistics about the best improvements for each parameter.
    For each instance, takes the best improvement across different parameter values (k=1, k=2).
    """
    try:
        k_adj_df = pd.read_csv(k_adj_improvements_file)
        k_exch_df = pd.read_csv(k_exch_improvements_file)
    except FileNotFoundError as e:
        print(f"Error: {e}")
        return False
    
    # Initialize LaTeX newcommands
    latex_commands = [
        "% LaTeX commands for tree pruning statistics",
        "% Generated automatically from improvements data",
        "% Using best improvements for each instance regardless of parameter value"
    ]
    
    # Function to get best row for each instance
    def get_best_for_instance(df):
        best_rows = []
        for instance in df['instance'].unique():
            instance_df = df[df['instance'] == instance]
            max_reduction = instance_df['percent_reduction'].max()
            best_row = instance_df[instance_df['percent_reduction'] == max_reduction].iloc[0]
            best_rows.append(best_row)
        return pd.DataFrame(best_rows) if best_rows else pd.DataFrame()
    
    # Process k_adj improvements
    if not k_adj_df.empty:
        best_k_adj = get_best_for_instance(k_adj_df)
        
        # Count unique instances that show improvement
        improved_instances_k_adj = len(best_k_adj)
        
        # Calculate statistics for the best improvements
        mean_reduction_k_adj = best_k_adj['percent_reduction'].mean()
        median_reduction_k_adj = best_k_adj['percent_reduction'].median()
        
        latex_commands.extend([
            f"\\newcommand{{\\kadjImprovableCount}}{{{improved_instances_k_adj}}}",
            f"\\newcommand{{\\kadjErrorReductionMean}}{{{round(mean_reduction_k_adj, 2)}\\%}}",
            f"\\newcommand{{\\kadjErrorReductionMedian}}{{{round(median_reduction_k_adj, 2)}\\%}}"
        ])
    else:
        latex_commands.extend([
            "\\newcommand{\\kadjImprovableCount}{0}",
            "\\newcommand{\\kadjErrorReductionMean}{0\\%}",
            "\\newcommand{\\kadjErrorReductionMedian}{0\\%}"
        ])
        best_k_adj = pd.DataFrame()  # Empty DataFrame for total calculation
    
    # Process k_exch improvements
    if not k_exch_df.empty:
        best_k_exch = get_best_for_instance(k_exch_df)
        
        # Count unique instances that show improvement
        improved_instances_k_exch = len(best_k_exch)
        
        # Calculate statistics for the best improvements
        mean_reduction_k_exch = best_k_exch['percent_reduction'].mean()
        median_reduction_k_exch = best_k_exch['percent_reduction'].median()
        
        latex_commands.extend([
            f"\\newcommand{{\\kexchImprovableCount}}{{{improved_instances_k_exch}}}",
            f"\\newcommand{{\\kexchErrorReductionMean}}{{{round(mean_reduction_k_exch, 2)}\\%}}",
            f"\\newcommand{{\\kexchErrorReductionMedian}}{{{round(median_reduction_k_exch, 2)}\\%}}"
        ])
    else:
        latex_commands.extend([
            "\\newcommand{\\kexchImprovableCount}{0}",
            "\\newcommand{\\kexchErrorReductionMean}{0\\%}",
            "\\newcommand{\\kexchErrorReductionMedian}{0\\%}"
        ])
        best_k_exch = pd.DataFrame()  # Empty DataFrame for total calculation
    
    # Calculate total unique instances that show improvement with either method
    all_instances = set()
    if not best_k_adj.empty:
        all_instances.update(best_k_adj['instance'])
    if not best_k_exch.empty:
        all_instances.update(best_k_exch['instance'])
    
    total_improvable = len(all_instances)
    latex_commands.append(f"\\newcommand{{\\totalImprovableInstances}}{{{total_improvable}}}")
    
    # Write to file
    with open('results/prune_unpruned_statistics_commands.tex', 'w') as f:
        f.write('\n'.join(latex_commands))
    
    print(f"LaTeX commands written to 'results/prune_unpruned_statistics_commands.tex'")
    return True


def main():
    """
    Main function to generate LaTeX files from existing CSV results.
    """
    # Ensure results directory exists
    if not os.path.exists('results'):
        os.makedirs('results')
        print("Created 'results' directory")
    
    # Define input files
    all_results_file = 'results/prune_unpruned_all_results.csv'
    # improvements_file = 'results/prune_unpruned_all_improvements.csv'
    k_adj_summary_file = 'results/prune_unpruned_k_adj_improvements.csv'
    k_exch_summary_file = 'results/prune_unpruned_k_exch_improvements.csv'
    
    # Check if files exist
    missing_files = []
    for file_path in [all_results_file, # improvements_file,
                      k_adj_summary_file, k_exch_summary_file]:
        if not os.path.exists(file_path):
            missing_files.append(file_path)
    
    if missing_files:
        print("Error: The following required files are missing:")
        for file_path in missing_files:
            print(f"  - {file_path}")
        print("\nPlease run the main analysis script first to generate these files.")
        return 1
    
    # Generate LaTeX files
    print("Generating LaTeX table from all results data...")
    table_success = generate_latex_table(all_results_file)
    
    print("Generating LaTeX commands from summary data...")
    commands_success = generate_latex_commands(k_adj_summary_file, k_exch_summary_file)
    
    if table_success and commands_success:
        print("\nLaTeX generation completed successfully!")
        return 0
    else:
        print("\nLaTeX generation completed with errors.")
        return 1

if __name__ == "__main__":
    main()
