import re
import tempfile
import subprocess
from pathlib import Path
from common.task import CoqTask
from common.verdict import SolverReply


def solve_coq(
    *,
    task: CoqTask,
    timeout_ms: int,
    # We don't really mind it when the ATP succeeds but Coq reconstruction fails. See the note
    # in compile_coq_file.
    hammer_reconstr_limit: int = 1,
) -> SolverReply:
    """
    Check for equivalence using CoqHammer.

    timeout_ms = CoqHammer ATPLimit
    hammer_reconstr_limit = CoqHammer ReconstrLimit
    """
    llm_answer = task.llm_solution
    if llm_answer == "" or llm_answer is None:
        return SolverReply(verdict="unknown", error_message="Empty LLM answer")

    full_content = task.full_content
    modified_content = substitute_generated_spec(
        full_content=full_content, generated_spec=llm_answer
    )
    modified_content = update_hammer_settings(
        modified_content, int(timeout_ms / 1000), hammer_reconstr_limit
    )

    with tempfile.TemporaryDirectory() as temp_dir:
        temp_path = Path(temp_dir)
        temp_file = temp_path / task.filepath.name
        temp_file.write_text(modified_content)

        success, _ = compile_coq_file(temp_file, timeout_ms)

    if success is True:
        return SolverReply(verdict="success")
    elif success is False:
        # Explicitly returning Unknown here. We're returning unknown because CoqHammer
        # does not generate counterexamples.
        return SolverReply(verdict="unknown")
    else:
        return SolverReply(verdict="unknown")


def substitute_generated_spec(*, full_content: str, generated_spec: str) -> str:
    pattern = r"\(\* start generated_spec \*\).*?\(\* end generated_spec \*\)"
    replacement = (
        f"(* start generated_spec *)\n{generated_spec}\n(* end generated_spec *)"
    )

    return re.sub(pattern, replacement, full_content, flags=re.DOTALL)


def update_hammer_settings(full_content, atp_limit: int, reconstr_limit: int) -> str:
    content = full_content

    if "Set Hammer ATPLimit" in content:
        content = re.sub(
            r"Set Hammer ATPLimit \d+\.",
            f"Set Hammer ATPLimit {atp_limit}.",
            content,
        )
    else:
        content = re.sub(
            r"(From Hammer Require Import Hammer\.)",
            f"\\1\n\nSet Hammer ATPLimit {atp_limit}.",
            content,
        )

    if "Set Hammer ReconstrLimit" in content:
        content = re.sub(
            r"Set Hammer ReconstrLimit \d+\.",
            f"Set Hammer ReconstrLimit {reconstr_limit}.",
            content,
        )
    else:
        content = re.sub(
            r"(Set Hammer ATPLimit \d+\.)",
            f"\\1\nSet Hammer ReconstrLimit {reconstr_limit}.",
            content,
        )

    return content


def compile_coq_file(filepath: Path, timeout_ms: int) -> tuple[bool, str]:
    try:
        result = subprocess.run(
            ["coqc", filepath.name],
            capture_output=True,
            text=True,
            # The timeout here does not matter since we have set ATPLimit in the file itself.
            # We add 2.0 only as a buffer.
            timeout=int((timeout_ms / 1000.0) + 2),
            cwd=filepath.parent,
        )
        # NOTE!: ATP-ONLY SUCCESS is DEFINED as a SUCCESS.
        # See the paper for details. In short, even though
        # the Coq proof reconstruction might've failed, we still managed
        # to find a proof (but weren't able to reconstruct it in Coq) that
        # the LLM's specification is correct.
        out = result.stdout + result.stderr
        atp_only_success = "Hammer failed: proof reconstruction failed." in out
        success = (result.returncode == 0) or atp_only_success
        return success, result.stdout + result.stderr
    except subprocess.TimeoutExpired as e:
        partial_output = ""
        if e.stdout:
            partial_output += f"STDOUT:\n{e.stdout.decode() if isinstance(e.stdout, bytes) else e.stdout}\n"
        if e.stderr:
            partial_output += f"STDERR:\n{e.stderr.decode() if isinstance(e.stderr, bytes) else e.stderr}\n"
        atp_only_success = (
            "Hammer failed: proof reconstruction failed." in partial_output
        )
        return atp_only_success, f"Compilation timeout\n{partial_output}"
    except FileNotFoundError:
        return False, "FileNotFoundErr - coqc not found"
    except Exception as e:
        return atp_only_success, f"Compilation error: {e}"
