import json
import os.path
import re
import subprocess
from typing import Optional

from datasets import load_dataset, Dataset, DatasetDict, load_from_disk

import secret

# Load the dataset
dataset = load_from_disk("./datasets/swt_bench_lite_aug1_bm25_27k_cl100k")
# dataset = load_dataset("princeton-nlp/SWE-bench_Lite_oracle")
output_dir = "./inference_output"

splits = {}
for split in dataset:
    count = 0
    num_new_funs = 0
    total_count = 0
    with open(f"{output_dir}/swt_lite_golden_{split}.jsonl", "w") as f:
        for example in dataset[split]:
            # ['instance_id', 'model_name_or_path', 'text_inputs', 'full_output', 'model_patch']
            prediction = {
                "instance_id": example["instance_id"],
                "model_name_or_path": "golden",
                "text_inputs": example["text"],
                "full_output": example["patch"],
                "model_patch": example["patch"],
            }
            f.write(json.dumps(prediction) + "\n")


