# plot_icml_comparison.py生成成功率跟原始归一化不加权数据Normalized_Score_Summary.csv  ICML_Performance_Success_Combined.png

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

# ==============================
# 1. Config Style (ICML/Academic Standard)
# ==============================
plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.serif'] = ['Times New Roman']
plt.rcParams['axes.unicode_minus'] = False
# Use bold labels and ticks as requested previously for paper quality
plt.rcParams['font.weight'] = 'bold'
plt.rcParams['axes.labelweight'] = 'bold'
plt.rcParams['axes.titleweight'] = 'bold'

# ==============================
# 2. Load Data
# ==============================
res_file = "综合得分分析结果.xlsx"
meta_file = "原始数据.xlsx"

# Load sheets
norm_s1 = pd.read_excel(res_file, sheet_name="Normalized_Sheet1")
comp_s2 = pd.read_excel(res_file, sheet_name="Completion_Sheet2")
meta_info = pd.read_excel(meta_file, sheet_name="Sheet3")

# Map Complexity
complexity_map = meta_info.set_index("Tasks")["Complexity"].to_dict()

# ==============================
# 3. Process Data
# ==============================
model_cols = [c for c in norm_s1.columns if c != "Tasks"]

# 3.1 Binarize Success Rate (Score = 1 if completion is 1.0, else 0)
success_df = comp_s2.copy()
for col in model_cols:
    success_df[col] = (success_df[col] == 1.0).astype(float)

# 3.2 Add Complexity
norm_s1["Complexity"] = norm_s1["Tasks"].map(complexity_map)
success_df["Complexity"] = success_df["Tasks"].map(complexity_map)

# 3.3 Calculate Averages by Model and Complexity
def get_summary(df, cols):
    melted = df.melt(id_vars=["Tasks", "Complexity"], value_vars=cols, var_name="Model", value_name="Value")
    summary = melted.groupby(["Model", "Complexity"])["Value"].mean().unstack()
    # Add Overall Average
    summary["Overall"] = melted.groupby("Model")["Value"].mean()
    # Ensure order: Low, Medium, High, Overall
    order = ["Low", "Medium", "High", "Overall"]
    summary = summary[[c for c in order if c in summary.columns]]
    return summary

norm_summary = get_summary(norm_s1, model_cols)
success_summary = get_summary(success_df, model_cols)

# Define Model Order (Ours last)
ours_name = [m for m in model_cols if "Ours" in m or "R3" in m][0]
models_order = [m for m in model_cols if m != ours_name] + [ours_name]

norm_summary = norm_summary.reindex(models_order)
success_summary = success_summary.reindex(models_order)

# ==============================
# 4. Plotting (ICML Format: 2 subplots)
# ==============================
# 4. Plotting (ICML Format: Combined One Chart)
# ==============================
# Single plot, overlaying Score (Bar) and Success Rate (Marker)
fig, ax = plt.subplots(1, 1, figsize=(10, 6))

# X-axis is now Complexity categories
complexities = ["Low", "Medium", "High", "Overall"]
complexities_labels = ["Low", "Med.", "High", "Overall"]
x = np.arange(len(complexities))

# Calculate bar width based on number of models
n_models = len(models_order)
width = 0.85 / n_models # Slightly wider to use space
offsets = np.linspace(-width * (n_models - 1) / 2, width * (n_models - 1) / 2, n_models)

# Create color palette
import matplotlib.cm as cm
# "#0B559F", "#2A7AB9", "#539DCC", "#888EDC", "#BAD6EA", "#CE4459"
# "#A9A9A9", "#708090", "#888EDC", "#2F4F4F", "#556B2F", "#CD853F"
model_colors = ["#0B559F", "#2A7AB9", "#539DCC", "#888EDC", "#BAD6EA", "#AF132B"]
if len(model_colors) < n_models:
    model_colors = sns.color_palette("husl", n_models)
    model_colors[-1] = "#CD853F" # Maintain Ours color

# Iterate over models to create bars and markers
for i, model_name in enumerate(models_order):
    # Data
    score_vals = norm_summary.loc[model_name, complexities].values
    success_vals = success_summary.loc[model_name, complexities].values
    
    # Color
    c = model_colors[i] if i < len(model_colors) else "#333333"
    
    # 1. Plot Bars (Normalized Score) - Lighter opacity to let markers pop
    # Use simple label for legend (only once)
    bars = ax.bar(x + offsets[i], score_vals, width, label=model_name, 
                  color=c, edgecolor='white', linewidth=0.5, alpha=0.85, zorder=2)
    
    # 2. Plot Markers (Success Rate) - FILLED with same color, White Edge for contrast
    # This associates the dot with the bar visually
    ax.scatter(x + offsets[i], success_vals, s=120, marker='D', 
               facecolors=c, edgecolors='black', linewidth=1.5, zorder=5,
               label='_nolegend_') # Skip duplicate legend

    # Value Labels: REMOVED for clarity (as requested "text overlap too much")
    # Only adding for the LAST model (Ours) if really needed, otherwise skipping nicely.
    # if i == len(models_order) - 1: # Highlight Ours
    #      for bar, score in zip(bars, score_vals):
    #         h = bar.get_height()
    #         if h > 0.01:
    #             ax.text(bar.get_x() + bar.get_width()/2, h + 0.02, f'{h:.2f}', 
    #                     ha='center', va='bottom', fontsize=10, fontweight='bold', color=c, zorder=6)
    
    
    for bar, score in zip(bars, score_vals):
        h = bar.get_height()
        if h > 0.01:
            ax.text(bar.get_x() + bar.get_width()/2, h + 0.02, f'{h:.2f}', 
                    ha='center', va='bottom', fontsize=10, fontweight='bold', color=c, zorder=6)


# ax.set_title("Performance & Success Rate Comparison", fontsize=18, pad=15, fontweight='bold')
ax.set_ylabel("Normalized Performance", fontsize=16)
ax.set_xticks(x)
ax.set_xticklabels(complexities_labels, rotation=0, ha='center', fontsize=16)
# ax.set_xlabel("Task Complexity", fontsize=16)
ax.tick_params(axis='y', labelsize=14)

# Enhanced Grid
ax.set_ylim(0, 1.1) # Adjusted to 0-1 range with slight buffer for markers
ax.set_yticks(np.arange(0, 1.1, 0.2)) # Explicit ticks from 0 to 1.0
ax.yaxis.grid(True, linestyle='--', alpha=0.6, zorder=0)

# Add vertical divider lines between complexity groups (Preferred over drop-lines)
for xv in x[:-1]:
    ax.axvline(xv + 0.5, color='black', linestyle='--', alpha=0.5, linewidth=1)
    
sns.despine(ax=ax)

# Legend Handling - Moved to BOTTOM, Closer to chart
handles, labs = ax.get_legend_handles_labels()
labs = [l.replace("Agent(R3-DSAgent)", "Ours(R3-DSAgent)") for l in labs]

# Custom Legend explanations
from matplotlib.lines import Line2D
shape_handles = [
    # Removed spacer to fit 2 rows nicely
    Line2D([0], [0], color='gray', lw=5, alpha=0.85, label='Bar: Normalized Score'),
    Line2D([0], [0], marker='D', color='w', label='Diamond: Success Rate', 
           markerfacecolor='gray', markeredgecolor='black', markersize=10)
]
final_handles = handles + shape_handles
final_labels = labs + ['Bar: Normalized Score', 'Diamond: Success Rate']

# Legend at BOTTOM, closer to the axis
# 8 items / 4 cols = 2 rows
fig.legend(final_handles, final_labels, loc='lower center', bbox_to_anchor=(0.5, 0.04), 
           ncol=4, fontsize=12, frameon=False) 

plt.tight_layout()
# Adjust bottom to make room for legend (2 rows require less space than 3)
plt.subplots_adjust(bottom=0.21)

output_img = "ICML_Performance_Success_Combined.png"
plt.savefig(output_img, dpi=600, bbox_inches='tight')
print(f"✅ ICML格式图表已生成: {output_img}")

# Save CSV for record
norm_summary.to_csv("Normalized_Score_Summary.csv")
success_summary.to_csv("Success_Rate_Summary.csv")
print("✅ 统计摘要已保存至 CSV 文件。")
