# percentage_with_top_prob.py deepseek_math

import json
import numpy as np
import pandas as pd
import sys
from collections import defaultdict
from utils import *

import numpy as np

def summarize_top_prob_all_tokens(data_by_model, thresholds=range(50, 100, 5)):
    stats = {}
    for model, items in data_by_model.items():
        top_probs = []
        for item in items:
            tokens = item.get('token_entropy_info', [])
            for t in tokens:
                if 'top_prob' in t:
                    top_probs.append(t['top_prob'])
        top_probs = np.array(top_probs)

        summary = {
            'mean': np.mean(top_probs),
            'std': np.std(top_probs)
        }

        for thresh in thresholds:
            label = f"≥{thresh}"
            summary[label] = np.mean(top_probs >= (thresh / 100))

        stats[model] = summary
    return stats
       

if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: python percentage_with_top_prob.py [generator_name]")
        sys.exit(1)

    generator = sys.argv[1]
    response_type = sys.argv[2] if len(sys.argv) > 2 else 'all'
    
    file_map = get_file_map(generator)
    print(f"✅ Generator: {generator}")

    data_by_model = extract_json(file_map, response_type)

    thresholds = range(75, 90, 5)
    results = summarize_top_prob_all_tokens(data_by_model, thresholds=thresholds)

    index_keys = [f"≥{t}" for t in thresholds]

    df = pd.DataFrame(results).T  # models → row
    df = df[index_keys]           
    df = df.T   

    print("\n📊 Top Token Probability Summary (Transposed)")
    print(df.applymap(lambda x: f"{x:.4f}" if isinstance(x, float) else x))
