import numpy as np
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D

# Data from the table and simulated values
dataset_splits = [
    'Cora-Public', 'Cora-Full-supervised', 'Cora-Fixed',
    'CiteSeer-Public', 'CiteSeer-Full-supervised', 'CiteSeer-Fixed',
    'PubMed-Public', 'PubMed-Full-supervised', 'PubMed-Fixed'
]
method = 'GBM'

# Accuracy from the table, with simulated F1-scores and std
accuracy = [0.8130, 0.8880, 0.8582, 0.6480, 0.7700, 0.7541, 0.8420, 0.8640, 0.8654]
f1 = [0.80, 0.87, 0.84, 0.63, 0.75, 0.73, 0.82, 0.85, 0.84]  # Simulated F1-scores
accuracy_std = [0.02, 0.015, 0.018, 0.025, 0.02, 0.022, 0.017, 0.016, 0.015]  # Simulated std
f1_std = [0.025, 0.02, 0.022, 0.03, 0.025, 0.027, 0.02, 0.018, 0.017]  # Simulated std

# Assign colors and markers
dataset_colors = {
    'Cora': 'tab:blue',
    'CiteSeer': 'tab:orange',
    'PubMed': 'tab:green'
}
split_markers = {
    'Public': 'o',  # Circle
    'Full': 's',  # Square
    'Fixed': '^'  # Triangle
}

# Extract dataset and split names
datasets = [ds.split('-')[0] for ds in dataset_splits]
splits = [ds.split('-')[1] for ds in dataset_splits]

# Plotting configuration
plt.style.use('seaborn')
plt.rcParams.update({
    'font.size': 12,
    'axes.labelsize': 14,
    'axes.titlesize': 16,
    'xtick.labelsize': 10,
    'ytick.labelsize': 12,
    'legend.fontsize': 10,
    'figure.figsize': (10, 8),
    'axes.grid': True,
    'grid.alpha': 0.5
})

# Create figure with two subplots
fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(10, 8))

# Scatter plot for accuracy
x = np.arange(len(dataset_splits))
for i, (ds, split) in enumerate(zip(datasets, splits)):
    ax1.errorbar(x[i], accuracy[i], yerr=accuracy_std[i],
                 color=dataset_colors[ds], marker=split_markers[split],
                 linestyle='none', capsize=5, markersize=8, markeredgecolor='black')

# Scatter plot for F1-score
for i, (ds, split) in enumerate(zip(datasets, splits)):
    ax2.errorbar(x[i], f1[i], yerr=f1_std[i],
                 color=dataset_colors[ds], marker=split_markers[split],
                 linestyle='none', capsize=5, markersize=8, markeredgecolor='black')

# Create custom legend
legend_elements = [
    # Dataset legend with colored, filled markers
    Line2D([0], [0], marker='o', color='w', markerfacecolor='tab:blue', label='Cora', markersize=10, markeredgecolor='black'),
    Line2D([0], [0], marker='o', color='w', markerfacecolor='tab:orange', label='CiteSeer', markersize=10, markeredgecolor='black'),
    Line2D([0], [0], marker='o', color='w', markerfacecolor='tab:green', label='PubMed', markersize=10, markeredgecolor='black'),
    # Split legend with gray markers to emphasize shape
    Line2D([0], [0], marker='o', color='w', markerfacecolor='gray', label='Public', markersize=10, markeredgecolor='black'),
    Line2D([0], [0], marker='s', color='w', markerfacecolor='gray', label='Full-supervised', markersize=10, markeredgecolor='black'),
    Line2D([0], [0], marker='^', color='w', markerfacecolor='gray', label='Fixed', markersize=10, markeredgecolor='black')
]

# Customize plots
ax1.set_ylabel('Accuracy')
ax1.set_title('Accuracy of GBM Across Datasets and Splits')
ax1.set_ylim(0.6, 0.95)
ax1.legend(handles=legend_elements, bbox_to_anchor=(1.05, 1), loc='upper left', title='Datasets and Splits')
ax1.grid(True, alpha=0.5)

ax2.set_xlabel('Dataset and Split')
ax2.set_ylabel('F1-Score')
ax2.set_title('F1-Score of GBM Across Datasets and Splits')
ax2.set_xticks(x)
ax2.set_xticklabels(dataset_splits, rotation=45, ha='right')
ax2.set_ylim(0.55, 0.9)
ax2.legend(handles=legend_elements, bbox_to_anchor=(1.05, 1), loc='upper left', title='Datasets and Splits')
ax2.grid(True, alpha=0.5)

# Adjust layout
plt.tight_layout()

# Save plot
plt.savefig('gbm_datasets.png', dpi=300, bbox_inches='tight')
plt.savefig('gbm_datasets_colors_splits_shapes_fixed_legend.pdf', format='pdf', dpi=300, bbox_inches='tight')
plt.close()

print("Visualization saved as 'gbm_datasets_colors_splits_shapes_fixed_legend.pdf'")