import os
import json


def read_evals(eval_files, number):
    data = []
    for file in eval_files:
        with open(file, 'r') as f:
            data += json.loads(f.read())
    num_succ = len([d for d in data if d['success']])
    num_error = len([d for d in data if not d['success']])
    pass_rate = num_succ/len(data)
    print(f"Prompt {number}: {num_succ} successful, {num_error} errors, {len(data)} total, {num_succ/len(data)*100:.2f}% pass rate")
    return pass_rate


def read_defs():
    files = os.listdir('./')
    pass_rate = 0
    prompt_prefix = []
    prefix = f"eval_tkt_def_results_"
    filtered_files = [file for file in files if file.startswith(prefix)]
    prompt_prefix += filtered_files
    read_evals(prompt_prefix, 'def')    


def get_env_files():
    files = os.listdir('./')
    pass_rates = []
    for prompt in range(20):
        prompt_prefix = []
        prefix = f"eval_tkctxt_1_{prompt}_results_"
        prefix_2 = f"eval_tkctx{prompt}_results_"
        filtered_files = [file for file in files if file.startswith(prefix)]
        filtered_files += [file for file in files if file.startswith(prefix_2)]
        prompt_prefix += filtered_files
        pass_rates.append(read_evals(prompt_prefix, prompt))

    # pass rate @ top 1
    pass_rates = sorted(pass_rates, reverse=True)
    print(f"Total pass rate @top 1: {pass_rates[0]*100:.2f}%")

    # pass rate @ top 5
    print(f"Total pass rate @top 5: {sum(pass_rates[:5])/5*100:.2f}%")

    #pass rate @ top 10
    print(f"Total pass rate @top 10: {sum(pass_rates[:10])/10*100:.2f}%")

    # pass rate @ top 20
    print(f"Total pass rate: {sum(pass_rates)/20*100:.2f}%")


get_env_files()
read_defs()


"""

src/eval/controlled_docker_env/evals/toolkit_eval_context/eval_tkctxt_1_0_results_economics_and_market_research.json 
src/eval/controlled_docker_env/evals/toolkit_eval_context/eval_tkctxt_1_0_results_education_and_e-learning.json 
src/eval/controlled_docker_env/evals/toolkit_eval_context/eval_tkctxt_1_0_results_entertainment_and_media.json 
src/eval/controlled_docker_env/evals/toolkit_eval_context/eval_tkctxt_1_0_results_environmental_science.json 
src/eval/controlled_docker_env/evals/toolkit_eval_context/eval_tkctxt_1_0_results_finance_and_banking.json 
src/eval/controlled_docker_env/evals/toolkit_eval_context/eval_tkctxt_1_0_results_geology_and_geophysics.json 
src/eval/controlled_docker_env/evals/toolkit_eval_context/eval_tkctxt_1_0_results_government_and_public_administration.json 
src/eval/controlled_docker_env/evals/toolkit_eval_context/eval_tkctxt_1_0_results_graphic_design_and_animation.json 
src/eval/controlled_docker_env/evals/toolkit_eval_context/eval_tkctxt_1_0_results_history_and_archival_science.json 
src/eval/controlled_docker_env/evals/toolkit_eval_context/eval_tkctxt_1_0_results_hospitality_and_tourism.json 
src/eval/controlled_docker_env/evals/toolkit_eval_context/eval_tkctxt_1_0_results_human_resources_and_recruitment.json 
src/eval/controlled_docker_env/evals/toolkit_eval_context/eval_tkctxt_1_0_results_journalism_and_digital_media.json 
src/eval/controlled_docker_env/evals/toolkit_eval_context/eval_tkctxt_1_0_results_law_and_legal_analytics.json 
src/eval/controlled_docker_env/evals/toolkit_eval_context/eval_tkctxt_1_0_results_logistics_and_supply_chain_management.json 
src/eval/controlled_docker_env/evals/toolkit_eval_context/eval_tkctxt_1_0_results_machine_learning.json 
src/eval/controlled_docker_env/evals/toolkit_eval_context/eval_tkctxt_1_0_results_manufacturing_and_automation.json 
src/eval/controlled_docker_env/evals/toolkit_eval_context/eval_tkctxt_1_0_results_meteorology_and_climate_science.json 
src/eval/controlled_docker_env/evals/toolkit_eval_context/eval_tkctxt_1_0_results_music_production_and_sound_engineering.json 
src/eval/controlled_docker_env/evals/toolkit_eval_context/eval_tkctxt_1_0_results_pharmaceuticals_and_drug_development.json 
src/eval/controlled_docker_env/evals/toolkit_eval_context/eval_tkctxt_1_0_results_psychology_and_neuroscience.json 
src/eval/controlled_docker_env/evals/toolkit_eval_context/eval_tkctxt_1_0_results_real_estate.json 
src/eval/controlled_docker_env/evals/toolkit_eval_context/eval_tkctxt_1_0_results_social_media_and_digital_marketing.json 
src/eval/controlled_docker_env/evals/toolkit_eval_context/eval_tkctxt_1_0_results_sports_science_and_analytics.json 
src/eval/controlled_docker_env/evals/toolkit_eval_context/eval_tkctxt_1_0_results_video_game_development.json 
src/eval/controlled_docker_env/evals/toolkit_eval_context/eval_tkctxt_1_0_results.json
src/eval/controlled_docker_env/evals/toolkit_eval_context/eval_tkctx1_results_construction_and_architecture.json 
src/eval/controlled_docker_env/evals/toolkit_eval_context/eval_tkctx1_results_cybersecurity.json
src/eval/controlled_docker_env/evals/toolkit_eval_context/eval_tkctx1_results_data_science.json

"""