import json
import glob
from collections import defaultdict

def get_results(paths):
    results = defaultdict(list)
    convos = defaultdict(list)
    ids = defaultdict(list)
    for path in paths:
        with open(path, "r") as f:
            data = json.load(f)
            for i, x in enumerate(data['metadata']['example_level_metadata']):
                results[x['prompt_id']].append(x['score'])
                convos[x['prompt_id']].append(data['convos'][i])
                ids[i].append(x['prompt_id'])
    
    for k, v in ids.items():
        assert len(set(v)) == 1, f"Prompt ID {k} has multiple IDs: {v}"
    
    return results, convos, ids

    

paths = glob.glob("outputs/o4-mini/healthbench_hard_o4-mini_v1_300_[0-2]_allresults.json")
results, convos, ids = get_results(paths)

def idx(idx):
    print(f"idx: {idx}")
    print(f"id: {ids[idx]}")
    print(f"convos: {convos[ids[idx][0]]}")
    print(f"results: {results[ids[idx][0]]}")
    print()

import pdb; pdb.set_trace()

