import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import os
import sys
sys.path.append('.')
path = os.path.dirname(sys.argv[0])

# Set global font to commonly available fonts with increased font size
plt.rcParams.update({
    "font.family": "serif",
    # Use fonts that are likely to be available on most systems
    "font.serif": ["DejaVu Serif", "Liberation Serif", "Bitstream Vera Serif", "Nimbus Roman", "Times New Roman"],
    "mathtext.fontset": "dejavuserif",
    "text.usetex": False,
    "font.size": 40,  # Increase base font size
    "axes.labelsize": 40,  # Larger font for axis labels
    "axes.titlesize": 40,  # Larger font for titles
    "xtick.labelsize": 40,  # Larger font for x-tick labels
    "ytick.labelsize": 40,  # Larger font for y-tick labels
    "legend.fontsize": 30,  # Larger font for legend
})

# Read the data file
data_path = os.path.join(path, 'variance-multi-dimension.dat')
with open(data_path, 'r') as f:
    lines = f.readlines()

# Extract header and data - fix the parsing issue
header_line = lines[0].strip()
# Extract the column names from the header line, skipping the "Variables=" part
header = header_line.replace("Variables=", "").split()

# Skip the header line and process data lines
data_lines = []
for line in lines[1:]:
    if line.strip():  # Skip empty lines
        values = line.strip().split()
        if len(values) == len(header):
            data_lines.append(values)
        else:
            print(f"Warning: Line with {len(values)} values doesn't match header with {len(header)} columns. Skipping.")

# Convert to DataFrame
df = pd.DataFrame(data_lines, columns=header)
df = df.astype({col: float for col in df.columns})

# Calculate the ratio of estimated to real noise
df['noise_ratio'] = df['E_noise'] / df['E_noise_real']

# Group by i_func and dim_input
grouped = df.groupby(['i_func', 'dim_input'])

# Create the figure
plt.figure(figsize=(16, 16))

# Color map for different i_func values
colors = ['blue', 'red', 'green', 'purple']
markers = ['o', 's', '^', 'd']

# Plot each group
for i, ((i_func, dim_input), group) in enumerate(grouped):
    color = colors[int(i_func) % len(colors)]
    marker = markers[int(dim_input) % len(markers)]
    
    # Sort by num_samples for connected lines
    group = group.sort_values('num_samples')
    
    # Plot the ratio between estimated and real noise - now using loglog for log scale on both axes
    plt.loglog(
        group['num_samples'], 
        group['noise_ratio'], 
        marker=marker, 
        linestyle='-', 
        color=color, 
        linewidth=3.5,  # Increase line width for better visibility
        markersize=15,  # Increase marker size
        label=f'Eq. {24+int(i_func)}, $n_x$={int(dim_input)}'
    )

# Add horizontal line at y=1 (perfect estimation)
plt.axhline(y=1.0, color='black', linestyle='--', alpha=0.5, linewidth=3.0, label='Ideal')

# Set labels and title with increased font size
plt.xlabel('Number of Samples (log scale)')
plt.ylabel('Ratio of Estimated to Real Noise \n (log scale)')

# Set y-axis to log scale with the range [0.1, 100.0]
plt.ylim(0.1, 100.0)

# Create a better legend, now positioned at the top of the figure with larger font
handles, labels = plt.gca().get_legend_handles_labels()
by_label = dict(zip(labels, handles))
plt.legend(by_label.values(), by_label.keys(), 
           loc='upper left', 
           bbox_to_anchor=(-0.3, 1.5),  # Position above the plot
           ncol=3,  # Display in multiple columns for better space usage
)

# Adjust layout to make room for the legend at the top
plt.tight_layout()
plt.subplots_adjust(top=0.7)  # Make room for the legend at the top

# Save the figure
output_path = os.path.join(path, 'noise_ratio_comparison')
plt.savefig(output_path + '.png', dpi=30)
plt.savefig(output_path + '.pdf', dpi=300)
print(f"Figure saved to {output_path}")

# Show the figure
plt.show()
