import os
import json


def read_evals(eval_files, number):
    data = []
    for file in eval_files:
        with open(file, 'r') as f:
            data += json.loads(f.read())
    num_succ = len([d for d in data if d['success']])
    num_error = len([d for d in data if not d['success']])
    pass_rate = num_succ/len(data)
    print(f"Prompt {number}: {num_succ} successful, {num_error} errors, {len(data)} total, {num_succ/len(data)*100:.2f}% pass rate")
    return pass_rate


def read_defs():
    files = os.listdir('./')
    pass_rate = 0
    prompt_prefix = []
    prefix = f"eval_tkt_def_results_"
    filtered_files = [file for file in files if file.startswith(prefix)]
    prompt_prefix += filtered_files
    read_evals(prompt_prefix, 'def')    


def get_env_files():
    files = os.listdir('./')
    pass_rates = []
    for prompt in range(20):
        prompt_prefix = []
        prefix = f"eval_terminal_train_{prompt}_results_"
        filtered_files = [file for file in files if file.startswith(prefix)]
        prompt_prefix += filtered_files
        pass_rates.append(read_evals(prompt_prefix, prompt))

    # pass rate @ top 1
    pass_rates = sorted(pass_rates, reverse=True)
    print(f"Total pass rate @top 1: {pass_rates[0]*100:.2f}%")

    # pass rate @ top 5
    print(f"Total pass rate @top 5: {sum(pass_rates[:5])/5*100:.2f}%")

    #pass rate @ top 10
    print(f"Total pass rate @top 10: {sum(pass_rates[:10])/10*100:.2f}%")

    # pass rate @ top 20
    print(f"Total pass rate: {sum(pass_rates)/20*100:.2f}%")


get_env_files()
# read_defs()
