# import pandas as pd
# import numpy as np

# df = pd.read_csv('document/basic_understanding/all_scores.csv', skiprows=1)

# target_models = ['GPT-4o', 'Gemini-1.5-Pro', 'Claude-3.5-Sonnet']
# difficulties = ['easy', 'medium', 'hard']

# results = []

# for difficulty in difficulties:
#     scores = []
#     for model in target_models:
#         score = df[(df['Difficulty'] == difficulty) & 
#                   (df['Model'] == model)]['Objective Score'].values[0]
#         scores.append(score)
    
#     variance = np.var(scores)
#     results.append({
#         'Difficulty': difficulty,
#         'Score Variance': variance,
#         'Models': ', '.join(target_models)
#     })

# all_scores = []
# for model in target_models:
#     for difficulty in difficulties:
#         score = df[(df['Difficulty'] == difficulty) & 
#                   (df['Model'] == model)]['Objective Score'].values[0]
#         all_scores.append(score)

# average_variance = np.var(all_scores)
# results.append({
#     'Difficulty': 'average',
#     'Score Variance': average_variance,
#     'Models': ', '.join(target_models)
# })


# result_df = pd.DataFrame(results)
# result_df.to_csv('document/basic_understanding/model_variance.csv', index=False)

# print("\nresult:")
# print(result_df)

# import pandas as pd
# import matplotlib.pyplot as plt
# import numpy as np

# df_original = pd.read_csv('document/basic_understanding/model_variance.csv')
# df_gpt = pd.read_csv('document/basic_understanding/model_variance_gpt.csv')
# df_gemini = pd.read_csv('document/basic_understanding/model_variance_gemini.csv')
# df_claude = pd.read_csv('document/basic_understanding/model_variance_claude.csv')

# fig, axes = plt.subplots(1, 3, figsize=(15, 5))
# difficulties = ['easy', 'medium', 'hard']
# width = 0.2
# colors = ['#2ecc71', '#3498db', '#e74c3c', '#9b59b6']
# labels = ['Original', 'GPT-4 Examiner', 'Gemini Examiner', 'Claude Examiner']

# for idx, difficulty in enumerate(difficulties):
#     variances = [
#         df_original[df_original['Difficulty'] == difficulty]['Score Variance'].values[0],
#         df_gpt[df_gpt['Difficulty'] == difficulty]['Score Variance'].values[0],
#         df_gemini[df_gemini['Difficulty'] == difficulty]['Score Variance'].values[0],
#         df_claude[df_claude['Difficulty'] == difficulty]['Score Variance'].values[0]
#     ]
    
#     x = np.arange(1)
#     for i in range(len(variances)):
#         axes[idx].bar(x + i*width, variances[i], width, 
#                      label=labels[i], color=colors[i], alpha=0.7)
    
#     axes[idx].set_title(f'{difficulty.capitalize()} Difficulty', pad=15, fontsize=12)
#     axes[idx].set_xticks([])
#     axes[idx].grid(True, linestyle='--', alpha=0.3)
#     if idx == 0:
#         axes[idx].set_ylabel('Score Variance')
#         axes[idx].legend(bbox_to_anchor=(0.5, -0.15), ncol=2)

# plt.tight_layout()
# plt.subplots_adjust(bottom=0.25)

# plt.savefig('document/basic_understanding/variance_comparison.png', 
#             bbox_inches='tight', dpi=300)
# plt.show()


import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

plt.rcParams['font.family'] = 'Avenir'

df_original = pd.read_csv('document/basic_understanding/model_variance.csv')
df_gpt = pd.read_csv('document/basic_understanding/model_variance_gpt.csv')
df_gemini = pd.read_csv('document/basic_understanding/model_variance_gemini.csv')
df_claude = pd.read_csv('document/basic_understanding/model_variance_claude.csv')

averages = {
    'Multi-Examiner': df_original[df_original['Difficulty'] == 'average']['Score Variance'].values[0],
    'GPT-4o': df_gpt[df_gpt['Difficulty'] == 'average']['Score Variance'].values[0],
    'Gemini-1.5-Pro': df_gemini[df_gemini['Difficulty'] == 'average']['Score Variance'].values[0],
    'Claude-3.5-Sonnet': df_claude[df_claude['Difficulty'] == 'average']['Score Variance'].values[0]
}

plt.figure(figsize=(10, 4))  # Reduce the height of the figure
colors = ['#ffa69e', '#faf3dd', '#b8f2e6', '#aed9e0']
bars = plt.barh(range(len(averages)), list(averages.values()), color=colors, alpha=0.8, height=0.7)  # Make bars thicker

# plt.title('Average Score Variance by Different Examiners', pad=20, fontsize=14)
# plt.xlabel('Score Variance', fontsize=18, fontweight='bold')
# plt.ylabel('Examiners', fontsize=12)
plt.yticks([])  # Remove y-axis ticks
plt.xticks(fontsize=16)  # Increase x-axis tick font size
plt.xlim(0, max(averages.values()) * 1.1)  # 设置x轴从0.05开始
plt.grid(True, linestyle='--', alpha=0.3)

# Add model names inside the bars, aligned to the right
for i, (bar, model) in enumerate(zip(bars, averages.keys())):
    plt.text(bar.get_width() - 0.001, bar.get_y() + bar.get_height()/2, model, 
             ha='right', va='center', fontsize=18, color='black', fontweight='bold')

plt.tight_layout()

plt.savefig('figure/variance.pdf', 
            bbox_inches='tight', dpi=500)
plt.show()
