import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# ==============================
# 1. 读取数据
# ==============================
file_path = "原始数据.xlsx"
sheet1_raw = pd.read_excel(file_path, sheet_name="Sheet1")
sheet2_success = pd.read_excel(file_path, sheet_name="Sheet2")
meta_info = pd.read_excel(file_path, sheet_name="Sheet3")  # 包含 Gold, Median, Metric Type

# 清理列名
for df in [sheet1_raw, sheet2_success]:
    df.rename(columns={df.columns[0]: "Tasks"}, inplace=True)
    df.dropna(how="all", inplace=True)

# 获取有效任务
valid_tasks = set(meta_info["Tasks"].astype(str))

def clean_df(df):
    return df[
        df["Tasks"].notna() &
        df["Tasks"].astype(str).isin(valid_tasks)
    ].reset_index(drop=True)

s1_clean = clean_df(sheet1_raw)
s2_clean = clean_df(sheet2_success)

model_cols = [col for col in s1_clean.columns if col != "Tasks"]

# ==============================
# 2. 标准化 Sheet1（基于金牌/中位数）
# ==============================
df_long = s1_clean.melt(id_vars=["Tasks"], value_vars=model_cols, var_name="Model", value_name="RawValue")
df_meta = meta_info[["Tasks", "Gold Score", "median_threshold", "Metric Type"]].copy()
df_merged = df_long.merge(df_meta, on="Tasks", how="inner")

def compute_kaggle_like_score(row):
    x = row["RawValue"]
    gold = row["Gold Score"]
    median = row["median_threshold"]
    metric_type = row["Metric Type"]
    if metric_type == "Max":
        if x >= gold:
            return 1.0
        elif x >= median:
            return 0.5 + 0.5 * (x - median) / max(gold - median, 1e-12)
        else:
            return 0.5 * (x / max(median, 1e-12)) if median > 0 else 0.0
    else:  # Min
        if x <= gold:
            return 1.0
        elif x <= median:
            return 0.5 + 0.5 * (median - x) / max(median - gold, 1e-12)
        else:
            return 0.5 * (median / max(x, 1e-12))

df_merged["NormScore"] = df_merged.apply(compute_kaggle_like_score, axis=1)
score1_wide = df_merged.pivot(index="Tasks", columns="Model", values="NormScore").reindex(meta_info["Tasks"])
score1_filled = score1_wide.fillna(0.0)

# ==============================
# 3. 处理 Sheet2（完成度）
# ==============================
score2_wide = s2_clean.set_index("Tasks")[model_cols].reindex(meta_info["Tasks"])
score2_filled = score2_wide.fillna(0.0)  # 未完成 = 0

# ==============================
# 4. 计算综合得分：0.6 * Score1 + 0.4 * Score2
# ==============================
final_score = 0.6 * score1_filled + 0.4 * score2_filled

# ==============================
# 5. 汇总各模型平均表现（按复杂度）
# ==============================
complexity_map = meta_info.set_index("Tasks")["Complexity"].to_dict()
final_score["Complexity"] = final_score.index.map(complexity_map)

summary_records = []
for model in model_cols:
    scores = final_score[model]
    overall = scores.mean()
    low = scores[final_score["Complexity"] == "Low"].mean()
    med = scores[final_score["Complexity"] == "Medium"].mean()
    high = scores[final_score["Complexity"] == "High"].mean()
    summary_records.append({
        "Model": model,
        "Avg_Low": low,
        "Avg_Medium": med,
        "Avg_High": high,
        "Avg_Overall": overall
    })

summary_df = pd.DataFrame(summary_records).set_index("Model")

print("📊 综合得分汇总（0.6*标准化指标 + 0.4*完成度）:")
print(summary_df.round(4))

# ==============================
# 6. 可视化：雷达图（按任务，拆分为两张图）
# ==============================
# 任务名称映射表
task_short_names = {
    "detecting-insults-in-social-commentary":        "DISC",
    "new-york-city-taxi-fare-prediction":            "NYCTaxi",
    "text-normalization-challenge-english-language": "TextNorm",
    "plant-pathology-2020-fgvc7":                    "PlantPat",
    "denoising-dirty-documents":                     "DocDn",
    "mlsp-2013-birds":                               "Birds",
    "google-quest-challenge":                        "GQuest",
    "lmsys-chatbot-arena":                           "Arena",
    "us-patent-phrase-to-phrase-matching":           "PatentPP",
    "petfinder-pawpularity-score":                   "PawPop",
    "tensorflow-speech-recognition-challenge":       "TF-Speech",
    "tgs-salt-identification-challenge":             "SaltID",
    "ventilator-pressure-prediction":                "VentPres",
    "stanford-covid-vaccine":                        "COVID-Vax",
    "predict-volcanic-eruptions-ingv-oe":             "Volcano",
    "nfl-player-contact-detection":                  "NFL-CD",
    "bms-molecular-translation":                     "MolTrans"
}

# 准备绘图数据
plot_df = final_score.copy()
if "Complexity" in plot_df.columns:
    plot_df = plot_df.drop(columns=["Complexity"])

# 按行归一化（每行最大值为1）
row_max = plot_df.max(axis=1)
row_max[row_max == 0] = 1.0
plot_df = plot_df.div(row_max, axis=0)

# 映射索引名为简称
plot_df.index = plot_df.index.map(lambda x: task_short_names.get(x, x[:15] + '..' if len(x)>15 else x))

# 分割任务（前8个 和 剩余的）
all_tasks = plot_df.index.tolist()
# 确保至少有分组
if len(all_tasks) > 8:
    task_groups = [all_tasks[:8], all_tasks[8:]]
else:
    task_groups = [all_tasks]

# 设置全局字体
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = ['Verdana', 'Arial', 'SimHei'] 

plt.rcParams.update({
    "font.family": "serif",
    "font.serif": ["Times New Roman"],
    "axes.unicode_minus": False
})

# 1. 颜色设置：参考图 1 的丰富配色
palette = [
    '#5B9BD5', '#70AD47', "#EBFC00C6", '#ED7D31', '#4472C4', 
    '#7030A0', '#FFD966', '#002060', '#C00000', '#A5A5A5'
]
# palette = ["#0B559F", "#2A7AB9", "#539DCC", "#888EDC", "#BAD6EA", "#AF132B"]
# palette = sns.color_palette("tab10").as_hex()

# 2. 创建画布：调整 figsize 以适应紧凑布局
fig, axes = plt.subplots(1, 2, figsize=(16, 8), subplot_kw=dict(polar=True))
axes = axes.flatten()

# 识别需要高亮的 Ours
highlight_model = next((m for m in model_cols if "Ours" in m or "R3" in m), None)

legend_handles = []
legend_labels = []

for i, tasks_subset in enumerate(task_groups):
    ax = axes[i]
    if not tasks_subset:
        ax.set_visible(False)
        continue

    sub_df = plot_df.loc[tasks_subset]
    categories = sub_df.index.tolist()
    N = len(categories)
    angles = [n / float(N) * 2 * np.pi for n in range(N)]
    angles += angles[:1]
    
    # 调整任务名离图的距离 (pad 调小)
    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(categories, fontsize=18, fontweight='bold', color='black')
    ax.tick_params(axis='x', pad=15) 
    
    # 设置径向刻度
    ax.set_rlabel_position(0)
    ax.set_yticks([0.2, 0.4, 0.6, 0.8, 1.0])
    ax.set_yticklabels(["0.2", "0.4", "0.6", "0.8", "1.0"], color="#333333", size=10, fontweight='bold')
    ax.set_ylim(0, 1.1)
    
    for idx, model in enumerate(model_cols):
        values = sub_df[model].values.flatten().tolist()
        values += values[:1]
        
        is_highlight = (model == highlight_model)
        color = "#D62728" if is_highlight else palette[idx % len(palette)]
        
        # 线条样式
        lw = 2.5 if is_highlight else 1.5
        zorder = 10 if is_highlight else 5
        # Increased alpha for darker shading similar to example image
        alpha_fill = 0.10 if is_highlight else 0.15
        
        line, = ax.plot(angles, values, linewidth=lw, color=color, zorder=zorder, label=model)
        # 填充的阴影色彩暗一些
        ax.fill(angles, values, alpha=alpha_fill, color=color, zorder=zorder)
        
        # 3. 添加数值标注 (参考图 1)
        for j, val in enumerate(values[:-1]):
            # 过滤掉过小的值，避免中心堆叠
            if val < 0.1: continue 
            ax.text(angles[j], val + 0.02, f"{val:.2f}", 
                    size=8, color='black', ha='center', va='bottom', 
                    zorder=zorder+1, alpha=0.8)

        if i == 0:
            legend_handles.append(line)
            legend_labels.append(model)
            
    # 增强网格线清晰度 - Inner rings made clearer/darker
    ax.grid(True, color="#4d4d4d", linestyle='--', linewidth=1.0, alpha=0.9)
    # 特别增强同心圆网格 (y轴网格)
    ax.yaxis.grid(True, color="#4d4d4d", linestyle='--', linewidth=1.0, alpha=0.9)
    # x轴网格 (放射线) 可以稍微淡一点以免喧宾夺主，这里统一设置或者分开设置
    ax.xaxis.grid(True, color="#B0B0B0", linestyle=':', linewidth=0.8, alpha=0.7)

# ==========================================
# 布局优化：将图例放上方，缩小左右间距
# ==========================================

# 设置图例在上方，ncol=3 或根据模型数平分 (如 6 个模型设为 3)
n_cols = 3 if len(model_cols) <= 6 else 4
fig.legend(legend_handles, legend_labels, 
           loc='upper center', 
           bbox_to_anchor=(0.5, 0.98), 
           ncol=n_cols, 
           frameon=True, 
           fontsize=18, 
           prop={'weight': 'bold', 'size': 18})

# wspace 控制左右两个图的间距
# top 留出空间给图例
plt.subplots_adjust(top=0.80, bottom=0.08, left=0.05, right=0.95, wspace=0.01)

save_name = "Task_Radar_Style_V2.png"
plt.savefig(save_name, dpi=600, bbox_inches='tight')
plt.show()







# ==============================
# 7. 保存结果
# ==============================
with pd.ExcelWriter("综合得分分析结果.xlsx") as writer:
    final_score.to_excel(writer, sheet_name="Final_Score")
    score1_filled.to_excel(writer, sheet_name="Normalized_Sheet1")
    score2_filled.to_excel(writer, sheet_name="Completion_Sheet2")
    summary_df.to_excel(writer, sheet_name="Summary_By_Model")

print("\n✅ 综合得分分析完成！结果已保存到 '综合得分分析结果.xlsx'")

plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.serif'] = ['Times New Roman']
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['font.weight'] = 'bold'
plt.rcParams['axes.labelweight'] = 'bold'

# ==============================
# 2. 读取数据
# ==============================
input_file = "综合得分分析结果.xlsx"
# 读取汇总表
df = pd.read_excel(input_file, sheet_name="Summary_By_Model")

# 准备绘图数据
# 我们展示 Avg_Low, Avg_Medium, Avg_High, Avg_Overall
plot_data = df.melt(id_vars="Model", 
                    value_vars=["Avg_Low", "Avg_Medium", "Avg_High", "Avg_Overall"],
                    var_name="Complexity", 
                    value_name="Score")

# 简化 Complexity 名称用于显示
plot_data["Complexity"] = plot_data["Complexity"].str.replace("Avg_", "")

# 为了美观，将“我们的模型”排在最右侧或特定位置
ours_name = [m for m in df["Model"] if "Ours" in m or "R3" in m][0]
models_order = [m for m in df["Model"] if m != ours_name] + [ours_name]

# ==============================
# 3. 绘图
# ==============================
fig, ax = plt.subplots(figsize=(12, 7))

# 自定义调色板
# 给 Ours 一个醒目的颜色，其他模型使用灰度或冷色调
colors = []
palette_base = sns.color_palette("muted", len(models_order))
color_map = {m: palette_base[i] for i, m in enumerate(models_order)}
color_map[ours_name] = "#D62728" # 红色突出

# 绘制分组柱状图
width = 0.18
x = np.arange(len(models_order))

# 低、中、高复杂度及整体平均的偏移
offset = [-1.5*width, -0.5*width, 0.5*width, 1.5*width]
complexities = ["Low", "Medium", "High", "Overall"]
labels = ["Low Complexity", "Medium Complexity", "High Complexity", "Overall Average"]
bar_colors = ["#A9A9A9", "#708090", "#2F4F4F", "#CD853F"] # 灰度阶梯 + 琥珀色代表整体

for i, comp in enumerate(complexities):
    scores = []
    for m in models_order:
        score = df[df["Model"] == m][f"Avg_{comp}"].values[0]
        scores.append(score)
    
    # 也可以按复杂度上色
    bars = ax.bar(x + offset[i], scores, width, label=labels[i], 
                  color=bar_colors[i], edgecolor='black', linewidth=1, alpha=0.9)
    
    # 在柱状图上方添加数值标签
    for bar in bars:
        height = bar.get_height()
        ax.annotate(f'{height:.2f}',
                    xy=(bar.get_x() + bar.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom', fontsize=9, fontweight='bold')

# ==============================
# 4. 修饰
# ==============================
ax.set_ylabel("Comprehensive Score", fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(models_order, fontsize=12, fontweight='bold', rotation=15)
ax.set_ylim(0, 1.15) # 给上方留点空间放标签

# 网格线
ax.yaxis.grid(True, linestyle='--', alpha=0.6)
ax.set_axisbelow(True)

# 图例 - 放在上方
ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.12), ncol=4, 
          fontsize=12, frameon=False)

# 移除顶部和右侧边框
sns.despine()

plt.tight_layout()

# 保存结果
output_name = "Summary_Score_by_Complexity.png"
plt.savefig(output_name, dpi=600, bbox_inches='tight')
print(f"✅ 汇总图表已生成: {output_name}")
plt.show()

