# import pandas as pd

# # 读取数据，跳过第一行
# df_original = pd.read_csv('document/basic_understanding/all_scores.csv', skiprows=1)
# df_examiner = pd.read_csv('document/basic_understanding/all_scores_examiner_gemini.csv', skiprows=1)

# # 获取共同的模型
# common_models = ['GPT-4o', 'Gemini-1.5-Pro', 'Claude-3.5-Sonnet']
# difficulties = ['easy', 'medium', 'hard']

# # 创建存储结果的列表
# results = []

# # 计算每个难度和模型的相对变化
# for difficulty in difficulties:
#     for model in common_models:
#         # 获取原始分数
#         original_score = df_original[
#             (df_original['Difficulty'] == difficulty) & 
#             (df_original['Model'] == model)
#         ]['Objective Score'].values[0]
        
#         # 获取examiner分数
#         examiner_score = df_examiner[
#             (df_examiner['Difficulty'] == difficulty) & 
#             (df_examiner['Model'] == model)
#         ]['Objective Score'].values[0]
        
#         # 计算相对变化
#         relative_change = (examiner_score - original_score) / original_score
        
#         results.append({
#             'Difficulty': difficulty,
#             'Model': model,
#             'Relative Change': relative_change
#         })

# # 转换为DataFrame并保存
# df_results = pd.DataFrame(results)
# df_results.to_csv('document/basic_understanding/examiner_relative_gemini.csv', index=False)

# # 打印结果验证
# print(df_results)


import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

df_gpt = pd.read_csv('document/basic_understanding/examiner_relative_gpt.csv')
df_gemini = pd.read_csv('document/basic_understanding/examiner_relative_gemini.csv')
df_claude = pd.read_csv('document/basic_understanding/examiner_relative_claude.csv')

fig, axes = plt.subplots(1, 3, figsize=(18, 6))
difficulties = ['easy', 'medium', 'hard']
models = ['GPT-4o', 'Gemini-1.5-Pro', 'Claude-3.5-Sonnet']
colors = ['#2ecc71', '#3498db', '#e74c3c']
titles = ['GPT-4 as Examiner', 'Gemini as Examiner', 'Claude as Examiner']
width = 0.25

for idx, (df, title) in enumerate(zip([df_gpt, df_gemini, df_claude], titles)):
    x = np.arange(len(models))
    
    for i, difficulty in enumerate(difficulties):
        data = df[df['Difficulty'] == difficulty]['Relative Change']
        axes[idx].bar(x + i*width, data, width, label=difficulty, 
                     color=colors[i], alpha=0.7)
    
    axes[idx].set_title(title, pad=15, fontsize=12)
    axes[idx].set_xticks(x + width)
    axes[idx].set_xticklabels(models, rotation=45, ha='right')
    axes[idx].grid(True, linestyle='--', alpha=0.3)
    if idx == 0:
        axes[idx].set_ylabel('Relative Change')
        axes[idx].legend()
    
    axes[idx].axhline(y=0, color='black', linestyle='-', alpha=0.3)

plt.tight_layout()

plt.savefig('document/basic_understanding/examiner_comparison.png', 
            bbox_inches='tight', dpi=300)
plt.show()