import pandas as pd
import numpy as np

# Read the reliable metrics CSV
df = pd.read_csv('reliable_metrics.csv')

print("Loading reliable_metrics.csv...")
print(f"Total rows: {len(df)}")
print(f"Unique models: {df['model'].nunique()}")
print(f"Unique categories: {df['category'].nunique()}")
print(f"Categories: {sorted(df['category'].unique())}")

# Expected 10 categories
expected_categories = [
    'attribute_comparison', 'attribute_recognition', 'dynamic_temporal',
    'hallucination_detection', 'object_counting', 'object_localization',
    'object_presence', 'scene_understanding', 'spatial_relationship', 'visual_grounding'
]

print(f"\nExpected 10 categories: {expected_categories}")

# Filter out models with num_solutions = 0
df_filtered = df[df['num_solutions'] > 0].copy()
print(f"\nAfter filtering num_solutions > 0: {len(df_filtered)} rows")

# Group by model and check completeness
model_category_counts = df_filtered.groupby('model')['category'].count()
complete_models = model_category_counts[model_category_counts == 10].index.tolist()

print(f"\nModels with complete 10 categories: {len(complete_models)}")
print("Complete models:")
for model in sorted(complete_models):
    print(f"  {model}")

# Filter for complete models only
df_complete = df_filtered[df_filtered['model'].isin(complete_models)].copy()

# Calculate averages for each model
print(f"\nCalculating averages for {len(complete_models)} models...")

# Define numeric columns to average
numeric_cols = ['total_questions', 'RE', 'VE_mean', 'miss_all', 'hit_any',
                'degenerate_rg', 'theta_map', 'r_map', 'g_map', 'A_adj_map', 'A_adj_min', 'A_adj_max']

# Group by model and calculate means
model_averages = df_complete.groupby('model')[numeric_cols].mean().round(3)

# Add model name as a column
model_averages = model_averages.reset_index()

# Reorder columns for better readability
column_order = ['model', 'RE', 'VE_mean', 'total_questions', 'miss_all', 'hit_any',
                'theta_map', 'r_map', 'g_map', 'A_adj_map', 'A_adj_min', 'A_adj_max', 'degenerate_rg']

# Only include columns that exist
available_cols = [col for col in column_order if col in model_averages.columns]
model_averages = model_averages[available_cols]

# Sort by RE (RotationalEval) in descending order
model_averages = model_averages.sort_values('RE', ascending=False)

# Save to new CSV
output_file = 'model_averaged_metrics.csv'
model_averages.to_csv(output_file, index=False)

print(f"\nResults saved to: {output_file}")
print(f"Total models included: {len(model_averages)}")
