import pandas as pd
import numpy as np

# 读取 Excel 文件中的三个 sheet
sheet1 = pd.read_excel("原始数据 - 副本.xlsx", sheet_name="Sheet1")
sheet3 = pd.read_excel("原始数据 - 副本.xlsx", sheet_name="Sheet3")

# 清理 sheet3：确保任务名一致，设置索引便于查找
sheet3 = sheet3.rename(columns={"Tasks": "Task/Model"})
sheet3.set_index("Task/Model", inplace=True)

# 提取模型列（排除 Task/Model 列）
model_columns = [col for col in sheet1.columns if col != "Task/Model"]

# 初始化统计字典
total_tasks = {model: 0 for model in model_columns}
medal_tasks = {model: 0 for model in model_columns}

# 遍历每一行（每个任务）
for _, row in sheet1.iterrows():
    task = row["Task/Model"]
    if task not in sheet3.index:
        continue  # 跳过没有 bronze score 的任务
    
    bronze_score = sheet3.loc[task, "Bronze Score"]
    metric_type = sheet3.loc[task, "Metric Type"]
    
    # 判断每个模型
    for model in model_columns:
        score = row[model]
        # 跳过 NULL 或 NaN
        if pd.isna(score):
            continue
        
        total_tasks[model] += 1
        
        # 判断是否达标
        if metric_type == "Max":
            if score >= bronze_score:
                medal_tasks[model] += 1
        elif metric_type == "Min":
            if score <= bronze_score:
                medal_tasks[model] += 1
        else:
            raise ValueError(f"Unknown metric type: {metric_type}")

# 计算奖牌获取率
medal_rates = {}
for model in model_columns:
    if total_tasks[model] == 0:
        rate = 0.0
    else:
        rate = medal_tasks[model] / total_tasks[model]
    medal_rates[model] = rate

# 转换为 DataFrame 并输出
result_df = pd.DataFrame({
    "Model": list(medal_rates.keys()),
    # "Medal Acquisition Rate": list(medal_rates.values()),
    "Medal Tasks": [medal_tasks[m] for m in medal_rates.keys()],
    "Total Succ Tasks": [total_tasks[m] for m in medal_rates.keys()]
})

# 按奖牌率降序排序
result_df = result_df.sort_values(by="Medal Acquisition Rate", ascending=False).reset_index(drop=True)

print(result_df)