import os
import json
import re

def load_jsonl(in_file):
    datas = []
    with open(in_file, "r", encoding="utf-8") as f:
        for line in f:
            datas.append(json.loads(line))
    return datas


def load_json(in_file):
    with open(in_file, "r", encoding="utf-8") as f:
        data = json.load(f)
    return data


def save_json(data, out_file):
    with open(out_file, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=4)


_JSON_BLOCK_RE = re.compile(r"```json(.*?)```", re.S)

def parse_json_response(raw_text: str):
    """
    Extract the list of data structures from the model's raw response.
    Returns [] if nothing could be parsed.
    """
    # 1) Prefer the fenced ```json ... ``` block
    match = _JSON_BLOCK_RE.search(raw_text)
    json_src = match.group(1).strip() if match else raw_text

    try:
        parsed = json.loads(json_src)
        return parsed
    except json.JSONDecodeError:
        pass  # fall through to next attempt

    # 2) Fallback: try to find a list-like string manually
    try:
        parsed = json.loads(re.search(r"\{.*\}", raw_text, re.S).group())
        return parsed
    except Exception:
        return {}


def find_have_api(test_file, out_file, logs_dir):
    datas = load_json(test_file)
    have_api_datas = []
    root_dir = os.path.join(logs_dir, "info_gathering")

    for data in datas:
        info_file = os.path.join(root_dir, data["id"], "info_result.json")
        if not os.path.isfile(info_file):
            continue
        info_data = load_json(info_file)
        json_data = parse_json_response(info_data.get("summary", ""))
        info_data.update(json_data)
        save_json(info_data, info_file)
        if len(info_data.get("api_endpoints", [])) > 0:
            info_data.pop("history", None)
            info_data["id"] = data["id"]
            have_api_datas.append(info_data)

    save_json(have_api_datas, out_file)
    

if __name__ == "__main__":
    test_file = "src/generate_fullstack_tests/WebGen-Bench_test-db-backend.json"
    logs_dir = "/WebGen-Agent/service_logs/WebGenAgentV3_WebGen-Bench_Qwen3-Coder-480B-A35B-Instruct_iter20_select_best"
    out_file = os.path.join(logs_dir, "have_api_apps.json")

    find_have_api(test_file, out_file, logs_dir)