import math

from measure_coverage_patch import main as measure_coverage_patch, save_div
from measure_coverage_patch_multi import main as measure_coverage_patch_multi

for approach in [
    ("gpt-4-1106-preview__swt_bench_lite_aug1_bm25_27k_cl100k__seed=0,temperature=0__test/mode_custom", "GPT4 \\zsp"),
    # ("gpt-4-1106-preview__swt_bench_lite_aug1_bm25_27k_cl100k__seed=1,temperature=07__test/mode_custom", "GPT4 \\libro", "gpt-4-1106-preview__libro_gpt-4-1106-preview__swt_bench_lite_aug1__test__test.jsonl"),
    ("swe-agent-demo3__swt_bench_lite__test/mode_vanilla", "GPT4 \\swea"),
    ("claude-3-haiku-20240307__swt_bench_lite_aug1_bm25_27k_cl100k__seed=0,temperature=0__test/mode_custom", "Haiku \\zsp"),
    # ("claude-3-haiku-20240307__swt_bench_lite_aug1_bm25_27k_cl100k__seed=1,temperature=07__test/mode_custom", "Haiku\\libro",
    #  "claude-3-haiku-20240307__libro-claude__temperature=0__test.jsonl"),
    ("swe-agent-demo3-haiku__swt_bench_lite__test/mode_vanilla", "Haiku \\swea"),
    ("Mixtral-8x22B-Instruct-v0.1__swt_bench_lite_aug1_bm25_27k_cl100k__temperature=0__test/mode_custom", "Mixtral \\zsp"),
    # ("Mixtral-8x22B-Instruct-v0.1__swt_bench_lite_aug1_bm25_27k_cl100k__temperature=07001__test/mode_custom", "Mixtral\\libro",
    #  "Mixtral-8x22B-Instruct-v0.1__libro_mixtral__swt_bench_lite_aug1__test__temperature=0,max_tokens=100__test.jsonl"),
    ("swe-agent-demo3-mixtral__swt_bench_lite__test/mode_vanilla", "Mixtral \\swea"),
]:
    eval_dir = approach[0]
    name = approach[1]

    if "=07" in eval_dir:
        libro_dir = approach[2]
        ress = measure_coverage_patch_multi(f"evaluation_output/{eval_dir}", dataset="./datasets/swt_bench_lite_aug1_bm25_diff_27k_cl100k", split="test", seeds="1,2,3,4,5", setting="IDEAL" if "paf" in name else "LIBRO", libro_inference_results=f"inference_output/{libro_dir}")
    else:
        ress = measure_coverage_patch(f"evaluation_output/{eval_dir}", dataset="./datasets/swt_bench_lite_aug1_bm25_diff_27k_cl100k", split="test", fuzzy=False)
    non_applied_cases = [res for res in ress if res.get("message", None) is not None]
    no_all_cases = len(ress)
    applied_cases = [res for res in ress if res.get("message", None) is None]
    no_applied_cases = len(applied_cases)
    ftx = sum(res["fails_initially"] or res["error_initially"] for res in applied_cases)
    good_cases = sum(res["good_case"] for res in applied_cases)

    ptp = sum(not (res["fails_initially"] or res["error_initially"]) for res in applied_cases)

    print(
        name + " & " + " & ".join(map(lambda x: f"{x*100:.1f}", [
            no_applied_cases/no_all_cases,
            save_div(ftx, no_all_cases, "NA"),
            save_div(good_cases, no_all_cases, "NA"),
            save_div(ptp, no_all_cases, "NA")
        ])),
        end=" \\\\ \n"
    )
