import math
from collections import defaultdict

from datasets import load_from_disk

from measure_coverage_patch import main as measure_coverage_patch, save_div, BLACKLIST
from measure_coverage_patch_multi import main as measure_coverage_patch_multi

dataset = "./datasets/swt_bench_lite_aug1_bm25_diff_27k_cl100k"
repos = []
num_instances_per_repo = defaultdict(int)
for example in load_from_disk(dataset)["test"]:
    if example["instance_id"] in BLACKLIST:
        continue
    repos.append(example["instance_id"].split("_")[0])
    num_instances_per_repo[example["instance_id"].split("_")[0]] += 1
repos = list(sorted(set(repos)))
print(",", ",".join(repos))

for approach in [
    # ("gpt-4-1106-preview__swt_bench_lite_aug1_bm25_diff_27k_cl100k__seed=0,temperature=0__test/mode_vanillafuzzy", "ZeroShot"),
    ("gpt-4-1106-preview__swt_bench_lite_aug1_bm25_27k_cl100k__seed=0,temperature=0__test/mode_custom", "ZeroShotPlus"),
    ("gpt-4-1106-preview__swt_bench_lite_aug1_bm25_27k_cl100k__seed=1,temperature=07__test/mode_custom", "LIBRO", "gpt-4-1106-preview__libro_gpt-4-1106-preview__swt_bench_lite_aug1__test__test.jsonl"),
    ("acr_swt_bench_lite/mode_vanilla", "AutoCodeRover"),
    ("swe-agent-demo3__swt_bench_lite__test/mode_vanilla", "SWE-Agent"),
]:

    eval_dir = approach[0]
    name = approach[1]
    if "=07" in eval_dir:
        libro_dir = approach[2]
        ress = measure_coverage_patch_multi(f"evaluation_output/{eval_dir}", dataset="./datasets/swt_bench_lite_aug1_bm25_diff_27k_cl100k", split="test", seeds="1,2,3,4,5", setting="IDEAL" if "ideal" in name else "LIBRO", libro_inference_results=f"inference_output/{libro_dir}")
    else:
        ress = measure_coverage_patch(f"evaluation_output/{eval_dir}", dataset="./datasets/swt_bench_lite_aug1_bm25_diff_27k_cl100k", split="test", fuzzy=False)

    by_repo = defaultdict(list)
    for res in ress:
        repo = res["instance_id"].split("_")[0]
        by_repo[repo].append(res)

    print(name, end=",")
    for repo in repos:
        if repo not in by_repo:
            repo_ress = []
        else:
            repo_ress = by_repo[repo]
        good_cases = sum(res.get("good_case", 0) for res in repo_ress)
        print(100 * good_cases / num_instances_per_repo[repo], end=",")
    print()



