import os
import json
import pandas as pd
import random
import copy

random.seed(0)
def main():
    benchmark = []

    # Using 500 prompts from LMD+FoR (No orientation in the prompt)
    with open("FoR-LMD.json", 'r') as file:
        dataset = json.load(file)["data"]

    new_dataset = []

    for sample in dataset:
        if sample["reference_obj1"] == "ambiguous" or sample["reference_obj2"] == "ambiguous":
            continue
        new_dataset.append(copy.deepcopy(sample))

    sample_dataset = random.sample(new_dataset, k=500)
    for sample in sample_dataset:
        obj1 = sample["obj1"]
        obj2 = sample["obj2"]
        benchmark.append({
            "prompt": sample["prompt"],
            "obj1": obj1,
            "obj2": obj2,
            "obj2_dir": None,
            "rel1": sample["rel_obj1"],
            "rel2": sample["rel_obj2"],
            "ref_obj1": sample["reference_obj1"],
            "ref_obj2": sample["reference_obj2"],
            "currentObj": None,
            "llm_parsed_prompt": None,
            "source": "LMD_FoR"
        })

    # Using 500 prompts from FoREST clear
    with open("FoREST-C-SPLIT.json", 'r') as file:
        dataset = json.load(file)["data"]

    sample_dataset = random.sample(dataset, k=500)
    for sample in sample_dataset:
        obj1 = sample["obj1"]
        obj2 = sample["obj2"]
        benchmark.append({
            "prompt": sample["context"],
            "obj1": obj1,
            "obj2": obj2,
            "obj2_dir": sample["obj2_dir"],
            "rel1": sample["relation"],
            "rel2": None,
            "ref_obj1": sample["label"][0],
            "ref_obj2": None,
            "currentObj": None,
            "llm_parsed_prompt": None,
            "source": "FoREST",
        })

    json.dump({"data": benchmark}, open("LLM_editing_benchmark_new.json", "w"), indent=1)
    return


if __name__ == "__main__":
    main()