import pandas as pd
import matplotlib.pyplot as plt
import os
import numpy as np
import matplotlib as mpl

# Configure matplotlib for LaTeX output
plt.rcParams.update({
    "text.usetex": True,
    "font.family": "serif",
    "font.serif": ["Computer Modern Roman"],
    "axes.labelsize": 10,
    "font.size": 10,
    "legend.fontsize": 8,
    "xtick.labelsize": 8,
    "ytick.labelsize": 8,
})

# Read the CSV file
data = pd.read_csv('results/consolidated_results.csv')

# Filter to include only unpruned trees
unpruned_data = data[data['pruned/unpruned tree'] == 'unpruned']

# Get unique instance names
unique_instances = unpruned_data['instancename'].unique()

# Define the parameter configurations to plot
configurations = [
    {'k_exch': 0, 'k_adj': 0, 'label': r'$k_{\textsf{ex}}=0, k_{\textsf{ad}}=0$'},  # LaTeX math mode
    {'k_exch': 0, 'k_adj': 1, 'label': r'$k_{\textsf{ex}}=0, k_{\textsf{ad}}=1$'},
    {'k_exch': 0, 'k_adj': 2, 'label': r'$k_{\textsf{ex}}=0, k_{\textsf{ad}}=2$'},
    {'k_exch': 1, 'k_adj': 0, 'label': r'$k_{\textsf{ex}}=1, k_{\textsf{ad}}=0$'},
    {'k_exch': 2, 'k_adj': 0, 'label': r'$k_{\textsf{ex}}=2, k_{\textsf{ad}}=0$'}
]

# Create plots directory if it doesn't exist
os.makedirs('plots', exist_ok=True)
os.makedirs('plots/pgf', exist_ok=True)  # For PGF files
os.makedirs('plots/pdf', exist_ok=True)  # For PDF files

# Set distinctive colors for better visualization
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']
markers = ['o', 's', '^', 'D', 'v']

# Create one plot for each instance (dataset)
for instance in unique_instances:
    plt.figure(figsize=(5, 3.5))  # Better size for LaTeX documents
    
    # Filter data for the current instance
    instance_data = unpruned_data[unpruned_data['instancename'] == instance]
    
    # Track if we have any data to plot
    has_data = False
    
    # Plot each configuration
    for i, config in enumerate(configurations):
        # Filter data for the current configuration
        config_data = instance_data[
            (instance_data['k_exch'] == config['k_exch']) & 
            (instance_data['k_adj'] == config['k_adj'])
        ]
        
        # Sort by k_repl for proper line plotting
        config_data = config_data.sort_values('k_repl')
        
        # Plot if data exists
        if not config_data.empty:
            has_data = True
            plt.plot(config_data['k_repl'], config_data['min_errors'], 
                     marker=markers[i], linestyle='-', color=colors[i],
                     linewidth=1.5, markersize=4, label=config['label'])
    
    # Only save the plot if it contains data
    if has_data:
        # Set title and labels with LaTeX formatting
        plt.title(f'Dataset: {instance}', fontsize=11)
        plt.xlabel(r'$k_{re}$', fontsize=10)
        plt.ylabel(r'Min.\ \# errors achievable', fontsize=10)
        plt.grid(True, linestyle='--', alpha=0.7)
        
        # Add legend
        plt.legend(fontsize=8, framealpha=0.9)
        
        # Set reasonable axis limits
        plt.xlim(max(0, min(instance_data['k_repl']) - 0.5), max(instance_data['k_repl']) + 0.5)
        
        # Tight layout for better use of space
        plt.tight_layout()
        
        # Convert instance name to a safe filename
        safe_instance_name = "".join([c if c.isalnum() else "_" for c in instance])
        
        # Save in multiple formats
        plt.savefig(f'plots/pdf/{safe_instance_name}.pdf', bbox_inches='tight')
        plt.savefig(f'plots/pgf/{safe_instance_name}.pgf', bbox_inches='tight')
    
    plt.close()

print(f"Created vector graphics plots for unpruned tree instances in PDF and PGF formats.")
