"""
Generating two datasets where the golden / swe-agent patch is part of issue for SWE-agent
"""
import json
import os.path
import re
import subprocess
from typing import Optional
import os.path
import re
import subprocess
from datasets import load_dataset, Dataset, DatasetDict, load_from_disk

from unidiff import PatchSet
# Present the model with patch generated by SWE-agent
# And ask the model to generate test cases for the patch

# Load the dataset
dataset = load_dataset("princeton-nlp/SWE-bench_Lite")
swe_bench_result_dir = "results/experiments-swe-bench/20240402_sweagent_gpt4/trajs"

def load_swe_agent_patch(instance_id: str):
    traj_file = os.path.join(swe_bench_result_dir, f"{instance_id}.traj")
    if not os.path.exists(traj_file):
        return None
    with open(traj_file) as f:
        traj = json.load(f)
    sub = traj["info"].get("submission")
    return sub

splits = {}
for split in ["test", "dev"]:
    count = 0
    num_new_funs = 0
    total_count = 0
    new_examples = []
    for i, example in enumerate(dataset[split]):
        files = []
        patch = example["patch"]

        new_example = {
            **example,
            "problem_statement": example["problem_statement"] + "\n# PROPOSED PATCH\n" + patch,
        }
        new_examples.append(new_example)
    splits[split] = Dataset.from_list(new_examples)
ds = DatasetDict(splits)
ds.save_to_disk("./datasets/swe_bench_with_golden_patch")


splits = {}
for split in ["test"]:
    count = 0
    num_new_funs = 0
    total_count = 0
    new_examples = []
    for i, example in enumerate(dataset[split]):
        files = []
        patch = load_swe_agent_patch(example["instance_id"])
        if patch is None:
            continue

        new_example = {
            **example,
            "problem_statement": example["problem_statement"] + "\n# PROPOSED PATCH\n" + patch,
        }
        new_examples.append(new_example)
    splits[split] = Dataset.from_list(new_examples)
ds = DatasetDict(splits)
ds.save_to_disk("./datasets/swe_bench_with_agent_patch")
