import pandas as pd
import numpy as np
import os
import json
from config.config import BASE_DIR
def run_coverage_accuracy_best_N(base_path):
    # Load original integrals table
    integrals_path = os.path.join(base_path, "results/integrals/integrals_by_model_and_embedding.csv")
    df = pd.read_csv(integrals_path)

    # Filter out invalid rows
    df = df.dropna(subset=['normalized_adjusted', 'N'])
    df['N'] = df['N'].astype(int)

    # Get the best N for each model/embedding pair (highest normalized_adjusted)
    best_per_model = df.loc[df.groupby(['model', 'emb_model'])['normalized_adjusted'].idxmax()]
    best_per_model = best_per_model[['model', 'emb_model', 'N', 'normalized_adjusted']].reset_index(drop=True)
    best_per_model = best_per_model.rename(columns={'N': 'best_N'})

    # Save to CSV
    output_path = os.path.join(base_path, "results/integrals/best_N_and_integrals.csv")
    best_per_model.to_csv(output_path, index=False)
    print(f"Saved best N and adjusted integral data to {output_path}")


if __name__ == "__main__":
    base_dir = BASE_DIR
    run_coverage_accuracy_best_N(base_dir)