import os
import sys
import argparse
import json
import subprocess
import multiprocessing
from concurrent.futures import ThreadPoolExecutor, as_completed
import time
from tqdm import tqdm
import shutil
import re
from pathlib import Path

# Add the parent directory to sys.path to enable imports
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from utils import (
    start_docker_containers, 
    stop_docker_containers,
    create_docker_compose_file,
    convert_windows_path_to_linux,
    llm_generation,
)

# ----------  paths  ---------- #
GEN_PATH  = "src/generate_fullstack_tests/test_backend_model-gen.jsonl"
OUT_PATH  = "src/generate_fullstack_tests/test_backend_model-verify.json"
MODEL_ID  = "Qwen3-Coder-480B-A35B-Instruct-FP8"

# ----------  I/O helpers  ---------- #
def load_json(path):
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)

def dump_json(obj, path):
    Path(path).parent.mkdir(parents=True, exist_ok=True)
    with open(path, "w", encoding="utf-8") as f:
        json.dump(obj, f, ensure_ascii=False, indent=2)

def load_jsonl(path: str):
    """Return a list with one dict per line in the JSONL file."""
    with open(path, "r", encoding="utf-8") as f:
        return [json.loads(line) for line in f]

def dump_jsonl(objs, path: str):
    """Write a list of dicts to a JSONL file."""
    with open(path, "w", encoding="utf-8") as f:
        for obj in objs:
            f.write(json.dumps(obj, ensure_ascii=False) + "\n")

# ----------  prompt builder  ---------- #
VERIFY_TEMPLATE = """You are a senior database architect.

Your tasks:
1. Carefully examine whether every *requirement* in the website instruction below has a matching data structure.
   - If new tables are needed, add them at the end of the "model_adjusted_data_structures" list.
   - Some requirements may have more then one possible implementation; make the description as general as possible to cover all valid implementations.
   - Note that not all requirements need to be satisfied by database structures; some may be purely front-end features.
   - Note that some requirements may be satisfied by multiple tables working together, or by processing logic rather than data structures.
2. Check that **every table** in the proposed list is absolutely required to satisfy a requirement.  
   - If some tables are not absolutely unnecessary, mark them for removal. For example, user account tables may not be needed if the website does not require user registration or login.
3. Produce:
   a) A detailed step-by-step analysis that maps requirements to data structures and explains any changes.  
   b) A JSON object inside a ```json code fence with the exact format  
      {{
        "db_adjustment_suggestions": [
          "... textual remarks ...",
          "... more remarks ...",
          ...
        ],
        "model_adjusted_data_structures": [
          "...first description or empty string if unchanged...",
          "...second description or empty string if unchanged...",
          ...
        ]
      }}

Remember:
- Keep the order of tables; if a table is removed put the string "(removed)" in that slot in model_adjusted_data_structures.
- If a table description is unchanged, put an empty string "" in that slot in model_adjusted_data_structures.
- The JSON block must be valid and must appear at the end of the answer.

----------------------
WEBSITE INSTRUCTION:
{instruction}

----------------------
PROPOSED DATA STRUCTURES:
{structures}
"""

def build_validation_prompt(instr: str, structures: list[str]) -> str:
    bullet_list = "\n".join(f"- {s}" for s in structures)
    return VERIFY_TEMPLATE.format(instruction=instr, structures=bullet_list)

# ----------  parsing the model answer  ---------- #
JSON_FENCE_RE = re.compile(r"```json(.*?)```", re.S)

def parse_verify_response(txt: str):
    m = JSON_FENCE_RE.search(txt)
    if not m:
        return [], []  # failed
    try:
        parsed = json.loads(m.group(1).strip())
        sug  = parsed.get("db_adjustment_suggestions", [])
        adj  = parsed.get("model_adjusted_data_structures", [])
        return sug, adj
    except json.JSONDecodeError:
        return [], []

# ----------  LLM call wrapper (assumes llm_generation is imported) ---------- #
def llm_call(prompt: str):
    return llm_generation([{"role": "user", "content": prompt}], model=MODEL_ID).get("content", "")

# ----------  per-sample worker  ---------- #
def verify_sample(sample):
    instr       = sample.get("instruction", "")
    structures  = sample.get("data_structures", [])

    prompt      = build_validation_prompt(instr, structures)
    raw_answer  = llm_call(prompt)
    suggestions, adjusted = parse_verify_response(raw_answer)

    sample["db_adjustment_suggestions"]     = suggestions
    sample["model_adjusted_data_structures"] = adjusted
    return sample

# ----------  main ---------- #
def main():
    data = load_jsonl(GEN_PATH)

    max_workers = min(32, multiprocessing.cpu_count() * 2)
    verified = []
    with ThreadPoolExecutor(max_workers=max_workers) as exe:
        futs = {exe.submit(verify_sample, item): idx for idx, item in enumerate(data)}
        for fut in tqdm(as_completed(futs), total=len(data), desc="Verifying"):
            try:
                verified.append(fut.result())
            except Exception as e:
                idx = futs[fut]
                bad = data[idx]
                bad["db_adjustment_suggestions"] = []
                bad["model_adjusted_data_structures"] = []
                verified.append(bad)
                print(f"[WARN] verification failed for id={bad.get('id')}: {e}")

    # keep input order
    verified.sort(key=lambda d: d.get("id", ""))
    dump_json(verified, OUT_PATH)
    print(f"Verification file written to {OUT_PATH}")

if __name__ == "__main__":
    main()