#!/usr/bin/env python3
import argparse
import json
import subprocess
import sys
import tempfile
from pathlib import Path

REPO_ROOT = Path(__file__).resolve().parents[1]
LIB_DIR = REPO_ROOT / "lib"
sys.path.insert(0, str(REPO_ROOT))
sys.path.insert(0, str(LIB_DIR))

from agent import log_parsers


def load_specs(json_path: Path) -> list[dict]:
    data = json.loads(json_path.read_text(encoding="utf-8"))
    if not isinstance(data, list):
        raise ValueError("JSON root must be a list of tasks.")
    for spec in data:
        if not isinstance(spec, dict):
            raise ValueError("Each task must be an object.")
    return data


def get_parser(parser_name: str):
    parser = log_parsers.NAME_TO_PARSER.get(parser_name)
    if parser is None:
        parser = getattr(log_parsers, parser_name, None)
    if parser is None:
        raise ValueError(f"Unknown log parser: {parser_name}")
    return parser


def write_patch(temp_dir: Path, name: str, content: str) -> Path:
    path = temp_dir / name
    path.write_text(content, encoding="utf-8")
    return path


def run_in_container(
    image: str,
    workdir: str,
    patch_dir: Path,
    patch_name: str,
    test_patch_name: str,
    test_cmds: list[str],
) -> tuple[int, str]:
    cmd_lines = [
        "set -e",
        "git clean -fdx",
        f"git apply /patches/{patch_name}",
        f"git apply /patches/{test_patch_name}",
    ]
    for test_cmd in test_cmds:
        cmd_lines.append(test_cmd)
    script = "\n".join(cmd_lines)

    docker_cmd = [
        "docker",
        "run",
        "--rm",
        "-v",
        f"{patch_dir}:/patches:ro",
        "-w",
        workdir,
        image,
        "/bin/bash",
        "-lc",
        script,
    ]

    result = subprocess.run(
        docker_cmd, check=False, capture_output=True, text=True
    )
    output = (result.stdout or "") + (result.stderr or "")
    return result.returncode, output


def evaluate_instance(spec: dict, image: str, patch_override: dict | None) -> dict:
    instance_id = spec.get("instance_id")
    if not instance_id:
        raise ValueError("Task missing instance_id.")

    repo = spec.get("repo")
    if not repo or "/" not in repo:
        raise ValueError(f"Task {instance_id} missing repo.")

    install_config = spec.get("install_config", {})
    test_cmds = install_config.get("test_cmd", [])
    if not isinstance(test_cmds, list) or not test_cmds:
        raise ValueError(f"Task {instance_id} missing install_config.test_cmd.")

    parser_name = install_config.get("log_parser")
    if not parser_name:
        raise ValueError(f"Task {instance_id} missing install_config.log_parser.")
    parser = get_parser(parser_name)

    patch = spec.get("patch", "")
    if patch_override and "patch" in patch_override:
        patch = patch_override.get("patch", "")
    test_patch = spec.get("test_patch", "")
    if not patch or not test_patch:
        raise ValueError(f"Task {instance_id} missing patch/test_patch.")

    workdir = f"/{repo.split('/')[1]}"

    with tempfile.TemporaryDirectory(prefix="eval_patches_") as tmp:
        patch_dir = Path(tmp)
        patch_path = write_patch(patch_dir, "patch.diff", patch)
        test_patch_path = write_patch(patch_dir, "test_patch.diff", test_patch)
        exit_code, output = run_in_container(
            image=image,
            workdir=workdir,
            patch_dir=patch_dir,
            patch_name=patch_path.name,
            test_patch_name=test_patch_path.name,
            test_cmds=test_cmds,
        )

    log_path = Path(f"{instance_id}_log.txt")
    log_path.write_text(output, encoding="utf-8")

    parsed = parser(output)
    passed = sorted(k for k, v in parsed.items() if v == "PASSED")
    failed = sorted(k for k, v in parsed.items() if v == "FAILED")

    expected_passed = sorted(
        spec.get("PASS_TO_PASS", []) + spec.get("FAIL_TO_PASS", [])
    )

    result = {
        "instance_id": instance_id,
        "exit_code": exit_code,
        "passed_match": passed == expected_passed,
        "passed_expected": expected_passed,
        "passed_actual": passed,
        "failed_actual": failed,
        "log_length": len(output),
        "log_path": str(log_path),
    }
    return result


def main() -> int:
    parser = argparse.ArgumentParser(description="Evaluate instances via docker.")
    parser.add_argument("--json", required=True, help="Path to JSON file with tasks.")
    parser.add_argument(
        "--patches",
        default="",
        help="Optional JSON with instance_id and patch overrides.",
    )
    parser.add_argument(
        "--image-registry",
        default="",
        help="Optional registry/repo prefix for instance images.",
    )
    parser.add_argument(
        "--tag-prefix",
        default="",
        help="Optional prefix for instance image tags.",
    )
    args = parser.parse_args()

    json_path = Path(args.json)
    if not json_path.is_file():
        print(f"JSON file not found: {json_path}", file=sys.stderr)
        return 1

    specs = load_specs(json_path)
    patch_overrides = {}
    if args.patches:
        patches_path = Path(args.patches)
        if not patches_path.is_file():
            print(f"Patches JSON not found: {patches_path}", file=sys.stderr)
            return 1
        patches_data = json.loads(patches_path.read_text(encoding="utf-8"))
        if not isinstance(patches_data, list):
            print("Patches JSON root must be a list.", file=sys.stderr)
            return 1
        for item in patches_data:
            if not isinstance(item, dict):
                print("Each patches entry must be an object.", file=sys.stderr)
                return 1
            pid = item.get("instance_id")
            if not pid:
                print("Patches entry missing instance_id.", file=sys.stderr)
                return 1
            patch_overrides[pid] = item
    all_ok = True

    for spec in specs:
        instance_id = spec.get("instance_id")
        if not instance_id:
            print("Task missing instance_id.", file=sys.stderr)
            return 1
        tag = f"{args.tag_prefix}{instance_id}"
        if args.image_registry:
            tag = f"{args.image_registry}/{tag}"

        try:
            result = evaluate_instance(spec, tag, patch_overrides.get(instance_id))
        except Exception as exc:
            all_ok = False
            print(f"{instance_id}: error: {exc}", file=sys.stderr)
            continue

        ok = result["passed_match"]
        status = "OK" if ok else "MISMATCH"
        print(f"{instance_id}: {status}")
        if not ok:
            all_ok = False
            if result["passed_actual"] != result["passed_expected"]:
                print("  passed_expected:", result["passed_expected"])
                print("  passed_actual:", result["passed_actual"])
            print("  failed_actual:", result["failed_actual"])

    return 0 if all_ok else 1


if __name__ == "__main__":
    raise SystemExit(main())
