import os
import zipfile
from tqdm import tqdm
import time
import re
from typing import List, Tuple
import json
import subprocess
from pathlib import Path
import sys
from time import sleep
import shutil
import multiprocessing
from concurrent.futures import ThreadPoolExecutor, as_completed


def load_json(in_file):
    with open(in_file, "r", encoding="utf-8") as f:
        return json.load(f)


def save_json(data, out_file):
    Path(out_file).parent.mkdir(parents=True, exist_ok=True)
    with open(out_file, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)


def load_jsonl(in_file):
    items = []
    with open(in_file, "r", encoding="utf-8") as f:
        for line in f:
            items.append(json.loads(line))
    return items


def save_jsonl(datas, out_file, mode="w"):
    Path(out_file).parent.mkdir(parents=True, exist_ok=True)
    with open(out_file, mode, encoding="utf-8") as f:
        for data in datas:
            f.write(json.dumps(data, ensure_ascii=False) + "\n")


def fork(orig_file, forked_file, keys):
    datas = load_json(orig_file)
    new_datas = []

    for data in tqdm(datas):
        new_data = {key: data[key] for key in keys}
        new_datas.append(new_data)

    save_json(new_datas, forked_file)


def merge(forked_file, orig_file, new_file):
    datas = load_json(orig_file)
    forked_datas = load_json(forked_file)

    new_datas = []
    for data, forked_data in tqdm(zip(datas, forked_datas)):
        for k, v in forked_data.items():
            data[k] = v
        new_datas.append(data)

    save_json(new_datas, new_file)


if __name__ == "__main__":
    # orig_file = "src/generate_fullstack_tests/WebGen-Bench_test-db-backend.json"
    # forked_file = "src/generate_fullstack_tests/WebGen-Bench_test-db-backend_db-forked.json"
    # keys = ["id", "instruction", "data_structures"]
    # fork(orig_file, forked_file, keys)

    orig_file = "src/generate_fullstack_tests/WebGen-Bench_test-db-backend2.json"
    forked_file = "src/generate_fullstack_tests/WebGen-Bench_test-db-backend_db-forked.json"
    new_file = "src/generate_fullstack_tests/WebGen-Bench_test-db-backend.json"
    merge(forked_file, orig_file, new_file)