import itertools
from collections import defaultdict

from datasets import load_from_disk, load_dataset

from measure_coverage_patch import main as measure_coverage_patch, save_div, BLACKLIST
from measure_coverage_patch_multi import main as measure_coverage_patch_multi

# dataset = "./datasets/swt_bench_lite_aug1_bm25_diff_27k_cl100k"
# dataset = load_from_disk(dataset)["test"]
dataset = "princeton-nlp/SWE-bench"
dataset = load_dataset(dataset)["test"]

BLACKLIST = set()
for line in open("blacklisted_cases_full.txt"):
    BLACKLIST.add(line.strip())

instances = [x["instance_id"] for x in dataset if x["instance_id"] not in BLACKLIST]
for instance in instances:
    print(instance)