import os
import os.path as osp
import json
import re
import shutil
import logging
from datetime import datetime
from typing import List, Dict, Any, Tuple, Optional

try:
    # When executed as part of a package (recommended)
    from .llm_client import chat, extract_json_block
    from .perform_review import (
        perform_review as run_review,
        load_paper,
        LAST_REVIEW_DETAILS,
        get_last_review_details,
    )

    # from .semantic_scholar import collect_bibtex  # Disabled: using LLM for reference generation
    from .tools.figure_safety import scan_and_fix_graphics
    from .local_review_config import *  # type: ignore

except ImportError:  # fallback for script-style execution
    from llm_client import chat, extract_json_block
    from perform_review import (
        perform_review as run_review,
        load_paper,
        LAST_REVIEW_DETAILS,
        get_last_review_details,
    )

    # from semantic_scholar import collect_bibtex  # Disabled: using LLM for reference generation
    from tools.figure_safety import scan_and_fix_graphics
    import local_review_config as lrc  # type: ignore

    # Set environment variables from local config for fallback case
    for var_name in [
        "AZURE_OPENAI_API_KEY",
        "AZURE_OPENAI_ENDPOINT",
        "AZURE_OPENAI_API_VERSION",
        "AZURE_OPENAI_MODEL",
        "DEFAULT_TEMPERATURE",
    ]:
        if hasattr(lrc, var_name):
            os.environ[var_name] = str(getattr(lrc, var_name))

# ---------- Prompt templates (detailed) ----------
SYSTEM_IDEA_GEN = (
    "You are an expert ML researcher. Given a broad seed, produce multiple research ideas.\n"
    "Hard constraints (do not restate to the user):\n"
    "- Focus on computationally efficient methods suitable for standard hardware.\n"
    "- Every experiment plan must specify data generation, evaluation metrics, and expected outcomes.\n"
    "- Figures will be generated from Python code that implements the proposed methods.\n"
    "Return strict JSON for each idea with fields: Name, Title, Motivation, RelatedWork (bullets),\n"
    "ExperimentPlan (detailed steps)."
)

USER_IDEA_GEN = (
    "Seed: {seed}.\n"
    "Propose {n} distinct, high-quality ML research ideas following the schema.\n"
    "Return a JSON list only."
)

SYSTEM_PAPER_PKG = (
    "From one idea, produce a complete research package for a paper.\n"
    "Return strict JSON with: title, abstract, sections (3-7), results (tables),\n"
    "figures (list of {filename, caption, code, section}) and citation_queries (list).\n"
    "Each figure must include a 'section' field that is one of: Introduction, Method, Experiments, Results, Discussion.\n"
    "Provide 15-25 diverse citation_queries spanning foundational and recent (last 3 years) work; avoid near-duplicates.\n"
    "The code must be self-contained Python that generates the figure file when run.\n"
    "Write in standard academic style and DO NOT include meta commentary about code generation,\n"
    "implementation details, or process (e.g., do not say 'Figures are generated by the provided self-contained code'\n"
    "or that figures are merely illustrative).\n"
    "\n"
    "Writing density and completeness (do not change the JSON schema):\n"
    "- Each paragraph 3–6 sentences; avoid one-line sections; no TODOs/placeholders.\n"
    "- Target lengths per section: Introduction 4–6 paragraphs; Related Work 3–4; Background includes a short 'Problem Setting'; Method 3–5; Experiments 3–4 with at least two ablations/sensitivity analyses; Results 3–4; Discussion 2–3; Limitations 2–3.\n"
    "- For the Method section, include appropriate theoretical analysis and mathematical formulations.\n"
    "- Reference figures within relevant sections in prose; do not create a standalone 'Figures' section in the text.\n"
    "- If a section is too short, expand formalism, comparisons, or analysis rather than adding new top-level sections."
)

USER_PAPER_PKG = (
    "Idea: {idea_json}.\n"
    "Create a package with deep content. The sections should read like a real paper\n"
    "(Introduction, Related Work, Method, Experiments, Results, Discussion, Limitations).\n"
    "Include a dedicated 'Related Work' section placed immediately after the 'Introduction'.\n"
    "Include at least 2 figures, each tagged with a 'section' (choose from Introduction, Method, Experiments, Results, Discussion).\n"
    "Place figures conceptually within their sections; do not add a separate 'Figures' section.\n"
    "For the Method section, include appropriate theoretical analysis and mathematical formulations to strengthen the foundation.\n"
    "Return only JSON with 20-30 citation_queries (diverse, non-redundant).\n"
    "Enforce the density targets above (3–6 sentences per paragraph; section paragraph counts as specified). If any section is thin, expand analysis/ablations/comparisons. Keep the JSON schema exactly the same; do not add or rename fields."
)

SYSTEM_MORE_FIGS = (
    "You generate additional figures for the same idea. Return strict JSON list of figures,\n"
    "each with filename, caption, code that saves the image file, and a 'section' field\n"
    "(choose from Introduction, Method, Experiments, Results, Discussion).\n"
    "Write standard academic captions only; do NOT include meta commentary about code generation\n"
    "or that figures are merely illustrative.\n"
    "Captions must include the data generation setting(s), metric(s), and the main trend or takeaway. Ensure each figure will be referenced in the corresponding section’s prose."
)

USER_MORE_FIGS = (
    "Idea: {idea_json}.\n"
    "Based on the experiment plan, produce {k} more figures showing complementary views\n"
    "of the results (ablations, sensitivity curves, error distributions). Tag each figure with a 'section'\n"
    "chosen from Introduction, Method, Experiments, Results, Discussion."
)

# ---------- Core helpers ----------


def _setup_logging(out_dir: Optional[str] = None) -> logging.Logger:
    """Configure a console logger and, if out_dir provided, a file logger.
    Idempotent across repeated calls.
    """
    logger = logging.getLogger("minimal")
    logger.setLevel(logging.INFO)
    logger.propagate = False  # avoid duplicate logs via root logger

    # Console handler
    if not any(isinstance(h, logging.StreamHandler) for h in logger.handlers):
        sh = logging.StreamHandler()
        sh.setLevel(logging.INFO)
        fmt = logging.Formatter("[%(asctime)s] %(levelname)s - %(message)s", "%H:%M:%S")
        sh.setFormatter(fmt)
        logger.addHandler(sh)

    # File handler
    if out_dir:
        log_path = osp.join(out_dir, "log.txt")
        needs_file = True
        for h in logger.handlers:
            if (
                isinstance(h, logging.FileHandler)
                and getattr(h, "_log_path", None) == log_path
            ):
                needs_file = False
                break
        if needs_file:
            os.makedirs(out_dir, exist_ok=True)
            fh = logging.FileHandler(log_path, encoding="utf-8")
            fh.setLevel(logging.INFO)
            fmt = logging.Formatter(
                "[%(asctime)s] %(levelname)s - %(message)s", "%Y-%m-%d %H:%M:%S"
            )
            fh.setFormatter(fmt)
            # mark to avoid duplicates on subsequent calls
            setattr(fh, "_log_path", log_path)
            logger.addHandler(fh)

    return logger


# (research style) Avoid using JSON Schema structured outputs; rely on
# free-form replies and robust JSON extraction instead.


def _safe_extract_json_list(text: str):
    js = extract_json_block(text)
    if isinstance(js, list):
        return js
    if isinstance(js, dict):
        # common shape: {"ideas": [...]}, {"items": [...]}, etc.
        for key in ("ideas", "items", "results", "list"):
            v = js.get(key)
            if isinstance(v, list):
                return v
    return None


def generate_ideas(
    seed: Dict[str, Any], n: int, ideas_per_call: Optional[int] = None
) -> List[Dict[str, Any]]:
    """Generate up to n ideas, paging requests to keep prompts small.
    Page size defaults to env RESEARCH_IDEAS_PER_CALL (or 8).
    Deduplicates by (case-insensitive) Title/Name.
    """
    logger = logging.getLogger("minimal")
    env_page = os.environ.get("RESEARCH_IDEAS_PER_CALL")
    try:
        env_page_val = int(env_page) if env_page else None
    except Exception:
        env_page_val = None
    page = max(1, min(n, ideas_per_call or env_page_val or 8))

    logger.info(f"Generating ideas (n={n}, page={page}) from seed…")
    all_ideas: List[Dict[str, Any]] = []
    used_titles = set()
    attempts = 0
    while len(all_ideas) < n:
        k = min(page, n - len(all_ideas))
        attempts += 1
        # Build per-page user content; include previously used titles to discourage repeats
        prev_titles = [
            (it.get("Title") or it.get("Name") or "").strip()
            for it in all_ideas[-50:]
            if (it.get("Title") or it.get("Name"))
        ]
        note = ""
        if prev_titles:
            joined = "; ".join(prev_titles)
            note = f"\nAvoid repeating these titles: {joined}"
        messages = [
            {"role": "system", "content": SYSTEM_IDEA_GEN},
            {
                "role": "user",
                "content": USER_IDEA_GEN.format(
                    seed=seed.get("seed", ""),
                    n=k,
                )
                + note,
            },
        ]
        text = chat(messages)
        ideas = _safe_extract_json_list(text) or []
        if not ideas:
            logger.warning("Idea JSON parse failed; retrying this page once…")
            text = chat(messages)
            ideas = _safe_extract_json_list(text) or []

        added = 0
        for it in ideas:
            title = (it.get("Title") or it.get("Name") or "").strip()
            key = title.lower()
            if title and key not in used_titles:
                all_ideas.append(it)
                used_titles.add(key)
                added += 1
            if len(all_ideas) >= n:
                break
        logger.info(
            f"Page {attempts}: received {len(ideas)}, added {added}, total {len(all_ideas)}/{n}"
        )
        # Basic guard to avoid infinite loops if the model keeps repeating
        if added == 0 and attempts >= max(3, (n + page - 1) // page + 2):
            logger.warning(
                "Stopping early due to repeated duplicates or parse failures."
            )
            break

    logger.info(f"Generated {len(all_ideas)} ideas.")
    return all_ideas


def request_paper_package(idea: Dict[str, Any]) -> Dict[str, Any]:
    logger = logging.getLogger("minimal")
    title = idea.get("Title") or idea.get("Name") or "(untitled)"
    logger.info(f"Requesting paper package for: {title}")
    messages = [
        {"role": "system", "content": SYSTEM_PAPER_PKG},
        {"role": "user", "content": USER_PAPER_PKG.format(idea_json=json.dumps(idea))},
    ]
    # research style: request free-form and parse
    text = chat(messages)
    pkg = extract_json_block(text)
    if not isinstance(pkg, dict):
        # fallback: one retry
        logger.warning("Package JSON parse failed; retrying once…")
        text = chat(messages)
        pkg = extract_json_block(text)
    if not isinstance(pkg, dict):
        raise RuntimeError("LLM did not return a valid paper package")
    fig_count = len(pkg.get("figures") or [])
    logger.info(
        f"Received paper package with {len(pkg.get('sections') or {})} sections and {fig_count} figures."
    )
    return pkg


def request_more_figures(idea: Dict[str, Any], k: int = 2) -> List[Dict[str, Any]]:
    logger = logging.getLogger("minimal")
    logger.info(f"Requesting {k} additional figures…")
    messages = [
        {"role": "system", "content": SYSTEM_MORE_FIGS},
        {
            "role": "user",
            "content": USER_MORE_FIGS.format(idea_json=json.dumps(idea), k=k),
        },
    ]
    text = chat(messages)
    figs_obj = extract_json_block(text)
    figs = figs_obj.get("figures") if isinstance(figs_obj, dict) else figs_obj
    if not isinstance(figs, list):
        logger.warning("Extra-figures JSON parse failed; retrying once…")
        text = chat(messages)
        figs = extract_json_block(text) or []
    logger.info(f"Received {len(figs)} additional figures.")
    return figs


def _strip_code_fences(code: str) -> str:
    """Remove triple backtick fences if present and return inner code."""
    code = code.strip()
    if code.startswith("```"):
        # remove leading fence
        first = code.find("\n")
        if first != -1:
            code = code[first + 1 :]
    if code.endswith("```"):
        code = code[:-3]
    return code


def _normalize_code(code: str) -> str:
    """Best-effort cleanup for LLM code snippets.
    - Strip code fences
    - Convert literal \n and \t to actual newlines/tabs when present
    - Normalize line endings
    """
    code = _strip_code_fences(code)
    # If the code seems to contain many literal \n, replace them
    if "\\n" in code and code.count("\\n") >= 2:
        code = code.replace("\\n", "\n")
    if "\\t" in code:
        code = code.replace("\\t", "\t")
    code = code.replace("\r\n", "\n").replace("\r", "\n").strip("\n") + "\n"
    return code


def run_code(code: str, out_dir: str, fn_hint: str = "figure.png"):
    logger = logging.getLogger("minimal")
    code = _normalize_code(code or "")
    import subprocess

    def _run(path_basename: str) -> subprocess.CompletedProcess:
        return subprocess.run(
            ["python", path_basename],
            cwd=out_dir,
            check=False,
            timeout=120,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
            encoding="utf-8",
            errors="replace",
        )

    # Write and run the original code
    path = osp.join(out_dir, "_tmp_fig.py")
    with open(path, "w", encoding="utf-8") as f:
        f.write(code)
    logger.info(f"Running figure code -> {osp.basename(path)} (hint: {fn_hint})")
    res = _run(osp.basename(path))
    if res.returncode == 0:
        logger.info("Figure code finished.")
        return

    # If failed, attempt LLM-based auto-repair up to 2 times
    err_text = (res.stdout or "") + "\n" + (res.stderr or "")
    logger.warning("Figure code failed (rc=%s). Attempting LLM repair…", res.returncode)
    repaired = False
    for attempt in range(1, 3):
        try:
            from .llm_client import chat  # prefer package import
        except Exception:
            try:
                from llm_client import chat  # fallback
            except Exception:
                chat = None  # type: ignore

        if chat is None:
            logger.warning("LLM client unavailable; skipping repair attempts.")
            break

        sys_msg = (
            "You fix Python plotting code so it runs without errors and saves the figure.\n"
            "Requirements:\n"
            f"- Save the figure to exactly this filename: {fn_hint}.\n"
            "- Use only standard libs plus numpy/matplotlib; avoid external data/files.\n"
            "- Do not print explanations or comments; return ONLY the Python code."
        )
        user_msg = (
            "Original code:\n" + code + "\n\n" "Error output/traceback:\n" + err_text
        )
        try:
            reply = chat(
                [
                    {"role": "system", "content": sys_msg},
                    {"role": "user", "content": user_msg},
                ]
            )
        except Exception:
            logger.exception("LLM repair attempt %d failed to call chat().", attempt)
            break

        new_code = _normalize_code(str(reply or ""))
        repair_path = osp.join(out_dir, f"_tmp_fig_repair_{attempt}.py")
        with open(repair_path, "w", encoding="utf-8") as f:
            f.write(new_code)
        logger.info("Trying repaired figure code (attempt %d).", attempt)
        res2 = _run(osp.basename(repair_path))
        if res2.returncode == 0:
            logger.info("Repaired figure code succeeded on attempt %d.", attempt)
            repaired = True
            break
        else:
            logger.warning(
                "Repaired attempt %d failed (rc=%s).", attempt, res2.returncode
            )
            err_text = (res2.stdout or "") + "\n" + (res2.stderr or "")

    if repaired:
        return

    # Fall back: write a tiny placeholder to avoid LaTeX failure
    logger.error("All repair attempts failed; writing 1x1 PNG placeholder.")
    try:
        with open(osp.join(out_dir, fn_hint), "wb") as fx:
            fx.write(
                b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x02\x00\x00\x00\x90wS\xde\x00\x00\x00\x0cIDATx\x9cc````\x00\x00\x00\x04\x00\x01\x0b\xe7\x02\x9e\x00\x00\x00\x00IEND\xaeB`\x82"
            )
    except Exception:
        logger.exception("Failed to write placeholder image; continuing.")


def render_figures(
    fig_list: List[Dict[str, Any]], out_dir: str
) -> List[Tuple[str, str, Optional[str]]]:
    logger = logging.getLogger("minimal")
    figs: List[Tuple[str, str, Optional[str]]] = []
    seen: set[str] = set()
    for i, fd in enumerate(fig_list or [], 1):
        fn = fd.get("filename", f"figure_{i}.png")
        cp = fd.get("caption", f"Figure {i}")
        sec = (fd.get("section") or "").strip() or None
        code = fd.get("code", "")
        # Skip duplicate filenames: only render and include the first occurrence
        if fn in seen:
            logger.info(f"Skipping duplicate figure filename already rendered: {fn}")
            continue
        logger.info(f"Rendering figure {i}: {fn}")
        # Mark as seen immediately to avoid re-rendering later entries with same filename
        seen.add(fn)
        run_code(code, out_dir, fn_hint=fn)
        # Best-effort: ensure PNG files are valid to avoid pdfTeX PNG read errors
        try:
            path = osp.join(out_dir, fn)
            if fn.lower().endswith(".png"):
                ok = False
                if osp.exists(path) and osp.getsize(path) >= 8:
                    with open(path, "rb") as fx:
                        sig = fx.read(8)
                        ok = sig == b"\x89PNG\r\n\x1a\n"
                if not ok:
                    # overwrite with 1x1 valid PNG placeholder
                    with open(path, "wb") as fx:
                        fx.write(
                            b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x02\x00\x00\x00\x90wS\xde\x00\x00\x00\x0cIDATx\x9cc````\x00\x00\x00\x04\x00\x01\x0b\xe7\x02\x9e\x00\x00\x00\x00IEND\xaeB`\x82"
                        )
        except Exception:
            logger.exception("Failed to validate/repair figure PNG; continuing.")
        # Record this figure for LaTeX insertion
        figs.append((fn, cp, sec))
    return figs


def _strip_filecontents_references(tex: str) -> str:
    """Remove any filecontents environment that writes references.bib to avoid
    overwriting our generated BibTeX file.
    """
    pattern = (
        r"\\begin\{filecontents\}\{\s*references\.bib\s*\}.*?\\end\{filecontents\}"
    )
    return re.sub(pattern, "", tex, flags=re.S)


def _latex_sanitize(text: str) -> str:
    """Escape common LaTeX-special characters in plain text to reduce compile errors.
    We intentionally keep backslashes and braces untouched to avoid breaking LaTeX macros
    if the model provides any. This targets plain prose (sections, abstracts, captions).
    """
    if not isinstance(text, str):
        text = str(text)
    # Replace most common troublemakers outside math
    replacements = {
        "&": r"\&",
        "%": r"\%",
        "#": r"\#",
        "_": r"\_",
    }
    # Fast path if nothing to replace
    if not any(ch in text for ch in replacements):
        return text
    out = []
    for ch in text:
        out.append(replacements.get(ch, ch))
    return "".join(out)


def _unicode_to_tex(s: str) -> str:
    """
    Convert common Unicode symbols to LaTeX-friendly forms for pdfLaTeX.
    Mathy symbols are wrapped with \ensuremath{...} so they work in text or math.
    Keep replacements conservative to avoid overfitting content.
    """
    if not isinstance(s, str):
        s = str(s)
    replacements = [
        # Dashes and punctuation
        ("\u2013", "--"),  # en dash
        ("\u2014", "---"),  # em dash
        ("\u2212", "-"),  # minus sign to hyphen
        ("\u2026", r"\ldots{}"),  # ellipsis
        ("\u00a0", "~"),  # nbsp to tie
        ("\u2018", "`"),  # left single quote
        ("\u2019", "'"),  # right single quote
        ("\u201c", "``"),  # left double quote
        ("\u201d", "''"),  # right double quote
        # Times and arithmetic
        ("\u00d7", r"\ensuremath{\times}"),
        ("\u00f7", r"\ensuremath{\div}"),
        ("\u00b1", r"\ensuremath{\pm}"),
        ("\u2213", r"\ensuremath{\mp}"),
        ("\u00b7", r"\ensuremath{\cdot}"),
        ("\u00b5", r"\ensuremath{\mu}"),  # micro sign -> mu
        # Relations and operators
        ("\u2248", r"\ensuremath{\approx}"),
        ("\u223c", r"\ensuremath{\sim}"),
        ("\u2260", r"\ensuremath{\neq}"),
        ("\u2261", r"\ensuremath{\equiv}"),
        ("\u2264", r"\ensuremath{\le}"),
        ("\u2265", r"\ensuremath{\ge}"),
        ("\u221d", r"\ensuremath{\propto}"),
        ("\u221e", r"\ensuremath{\infty}"),
        ("\u2211", r"\ensuremath{\sum}"),
        ("\u2208", r"\ensuremath{\in}"),
        ("\u2209", r"\ensuremath{\notin}"),
        ("\u220b", r"\ensuremath{\ni}"),
        ("\u222a", r"\ensuremath{\cup}"),
        ("\u2229", r"\ensuremath{\cap}"),
        ("\u2282", r"\ensuremath{\subset}"),
        ("\u2286", r"\ensuremath{\subseteq}"),
        ("\u2283", r"\ensuremath{\supset}"),
        ("\u2287", r"\ensuremath{\supseteq}"),
        ("\u2205", r"\ensuremath{\emptyset}"),
        ("\u2218", r"\ensuremath{\circ}"),
        # Calculus / vectors
        ("\u2202", r"\ensuremath{\partial}"),
        ("\u2207", r"\ensuremath{\nabla}"),
        # Arrows
        ("\u2192", r"\ensuremath{\to}"),
        ("\u2190", r"\ensuremath{\leftarrow}"),
        ("\u2191", r"\ensuremath{\uparrow}"),
        ("\u2193", r"\ensuremath{\downarrow}"),
        ("\u2194", r"\ensuremath{\leftrightarrow}"),
        ("\u21d2", r"\ensuremath{\Rightarrow}"),
        ("\u21d0", r"\ensuremath{\Leftarrow}"),
        ("\u21d4", r"\ensuremath{\Leftrightarrow}"),
        # Degree and primes
        ("\u00b0", r"\ensuremath{^\circ}"),
        ("\u2032", r"\ensuremath{^{\prime}}"),
        ("\u2033", r"\ensuremath{^{\prime\prime}}"),
        ("\u2034", r"\ensuremath{^{\prime\prime\prime}}"),
        # Blackboard sets
        ("\u211d", r"\ensuremath{\mathbb{R}}"),
        ("\u2115", r"\ensuremath{\mathbb{N}}"),
        ("\u2124", r"\ensuremath{\mathbb{Z}}"),
        ("\u211a", r"\ensuremath{\mathbb{Q}}"),
        ("\u2102", r"\ensuremath{\mathbb{C}}"),
        ("\u2119", r"\ensuremath{\mathbb{P}}"),
        # Greek lowercase
        ("\u03b1", r"\ensuremath{\alpha}"),
        ("\u03b2", r"\ensuremath{\beta}"),
        ("\u03b3", r"\ensuremath{\gamma}"),
        ("\u03b4", r"\ensuremath{\delta}"),
        ("\u03b5", r"\ensuremath{\epsilon}"),
        ("\u03b6", r"\ensuremath{\zeta}"),
        ("\u03b7", r"\ensuremath{\eta}"),
        ("\u03b8", r"\ensuremath{\theta}"),
        ("\u03b9", r"\ensuremath{\iota}"),
        ("\u03ba", r"\ensuremath{\kappa}"),
        ("\u03bb", r"\ensuremath{\lambda}"),
        ("\u03bc", r"\ensuremath{\mu}"),
        ("\u03bd", r"\ensuremath{\nu}"),
        ("\u03be", r"\ensuremath{\xi}"),
        ("\u03bf", "o"),  # plain o
        ("\u03c0", r"\ensuremath{\pi}"),
        ("\u03c1", r"\ensuremath{\rho}"),
        ("\u03c2", r"\ensuremath{\varsigma}"),
        ("\u03c3", r"\ensuremath{\sigma}"),
        ("\u03c4", r"\ensuremath{\tau}"),
        ("\u03c5", r"\ensuremath{\upsilon}"),
        ("\u03c6", r"\ensuremath{\phi}"),
        ("\u03c7", r"\ensuremath{\chi}"),
        ("\u03c8", r"\ensuremath{\psi}"),
        ("\u03c9", r"\ensuremath{\omega}"),
        # Greek lowercase variants
        ("\u03d1", r"\ensuremath{\vartheta}"),
        ("\u03d5", r"\ensuremath{\varphi}"),
        ("\u03d6", r"\ensuremath{\varpi}"),
        ("\u03f0", r"\ensuremath{\varkappa}"),
        ("\u03f1", r"\ensuremath{\varrho}"),
        ("\u03f5", r"\ensuremath{\varepsilon}"),
        # Greek uppercase
        ("\u0393", r"\ensuremath{\Gamma}"),
        ("\u0394", r"\ensuremath{\Delta}"),
        ("\u0398", r"\ensuremath{\Theta}"),
        ("\u039b", r"\ensuremath{\Lambda}"),
        ("\u039e", r"\ensuremath{\Xi}"),
        ("\u03a0", r"\ensuremath{\Pi}"),
        ("\u03a3", r"\ensuremath{\Sigma}"),
        ("\u03a5", r"\ensuremath{\Upsilon}"),
        ("\u03a6", r"\ensuremath{\Phi}"),
        ("\u03a7", "X"),  # capital chi -> Latin X (no \Chi macro)
        ("\u03a8", r"\ensuremath{\Psi}"),
        ("\u03a9", r"\ensuremath{\Omega}"),
        # Script/special letters
        ("\u2113", r"\ensuremath{\ell}"),  # ℓ -> \ell
        ("\U0001d4c1", r"\ensuremath{\ell}"),  # 𝓁 -> \ell
    ]
    out = s
    for u, t in replacements:
        out = out.replace(u, t)
    # Handle common combining diacritics (compose to LaTeX accents)
    import re

    # Use callable replacements so backslashes are preserved as literals in output
    combining_map = [
        ("\u0302", "hat"),  # combining circumflex
        ("\u0303", "tilde"),  # combining tilde
        ("\u0301", "acute"),  # combining acute accent
        ("\u0300", "grave"),  # combining grave accent
        ("\u0308", "ddot"),  # combining diaeresis
        ("\u0307", "dot"),  # combining dot above
        ("\u0306", "breve"),  # combining breve
        ("\u030c", "check"),  # combining caron
        ("\u030a", "mathring"),  # combining ring above
    ]
    for uni, macro in combining_map:
        pattern = rf"([A-Za-z]){uni}"
        out = re.sub(
            pattern,
            lambda m, macro=macro: f"\\ensuremath{{\\{macro}{{{m.group(1)}}}}}",
            out,
        )
    return out


def _auto_math_wrap(s: str) -> str:
    """Wrap common math tokens in \ensuremath{...} to avoid Missing $ errors.
    Conservative rules: subscripts like w_j, superscripts like v^2 or v^T, and norms ||w||_1.
    """
    if not isinstance(s, str):
        s = str(s)

    # Avoid operating inside existing ensuremath blocks by temporarily shielding them
    def shield_ensuremath(text: str) -> Tuple[str, Dict[str, str]]:
        out = []
        mapping: Dict[str, str] = {}
        i = 0
        token_idx = 0
        while i < len(text):
            if text.startswith("\\ensuremath{", i):
                j = i + len("\\ensuremath{")
                depth = 1
                while j < len(text) and depth > 0:
                    if text[j] == "{":
                        depth += 1
                    elif text[j] == "}":
                        depth -= 1
                    j += 1
                segment = text[i:j]
                key = f"<<MATH{token_idx}>>"
                mapping[key] = segment
                out.append(key)
                token_idx += 1
                i = j
            else:
                out.append(text[i])
                i += 1
        return ("".join(out), mapping)

    def unshield(text: str, mapping: Dict[str, str]) -> str:
        for k, v in mapping.items():
            text = text.replace(k, v)
        return text

    txt, mp = shield_ensuremath(s)
    # Norms: ||x||_p -> \lVert x \rVert_{p}
    txt = re.sub(
        r"\|\|([^|]+)\|\|_([0-9]+)", r"\\ensuremath{\\lVert \1 \\rVert_{\2}}", txt
    )
    # Transpose: v^T -> v^{\top}
    txt = re.sub(r"\b([A-Za-z])\s*\^\s*T\b", r"\\ensuremath{\1^{\\top}}", txt)
    # Generic letter^letter: a^d -> a^{d}
    txt = re.sub(r"\b([A-Za-z])\s*\^\s*([A-Za-z])\b", r"\\ensuremath{\1^{\2}}", txt)
    # Simple superscripts: a^2 -> a^{2}
    txt = re.sub(r"\b([A-Za-z])\s*\^\s*([0-9]+)\b", r"\\ensuremath{\1^{\2}}", txt)
    # Subscripts: a_b -> a_{b}
    txt = re.sub(r"\b([A-Za-z])_([A-Za-z0-9]+)\b", r"\\ensuremath{\1_{\2}}", txt)
    # ASCII tilde used as similarity: ~ -> \sim
    txt = txt.replace("~", r"\\ensuremath{\\sim}")
    return unshield(txt, mp)


def _latex_sanitize_outside_math(text: str) -> str:
    """Escape special chars outside math (\ensuremath{...}).
    Also escape stray carets to avoid Missing $ errors.
    """
    if not isinstance(text, str):
        text = str(text)

    # Shield ensuremath blocks
    def shield_ensuremath(text: str) -> Tuple[str, Dict[str, str]]:
        out = []
        mapping: Dict[str, str] = {}
        i = 0
        token_idx = 0
        while i < len(text):
            if text.startswith("\\ensuremath{", i):
                j = i + len("\\ensuremath{")
                depth = 1
                while j < len(text) and depth > 0:
                    if text[j] == "{":
                        depth += 1
                    elif text[j] == "}":
                        depth -= 1
                    j += 1
                segment = text[i:j]
                key = f"<<MATH{token_idx}>>"
                mapping[key] = segment
                out.append(key)
                token_idx += 1
                i = j
            else:
                out.append(text[i])
                i += 1
        return ("".join(out), mapping)

    def unshield(text: str, mapping: Dict[str, str]) -> str:
        for k, v in mapping.items():
            text = text.replace(k, v)
        return text

    txt, mp = shield_ensuremath(text)
    # Escape special LaTeX characters outside math
    txt = re.sub(r"(?<!\\)&", r"\\&", txt)
    txt = re.sub(r"(?<!\\)%", r"\\%", txt)
    txt = re.sub(r"(?<!\\)#", r"\\#", txt)
    txt = re.sub(r"(?<!\\)_", r"\\_", txt)
    # Escape stray carets outside math
    txt = re.sub(r"\^", r"\\^{}", txt)
    return unshield(txt, mp)


def generate_references_with_llm(
    title: str, abstract: str, sections: Dict[str, str], num_refs: int = 20
) -> List[str]:
    """Generate relevant academic references using LLM based on paper content.

    Args:
        title: Paper title
        abstract: Paper abstract
        sections: Dictionary of section names to content
        num_refs: Number of references to generate

    Returns:
        List of BibTeX formatted reference strings
    """
    # Get API credentials from local_review_config
    try:
        # First try package import (when run as part of package)
        from .local_review_config import (
            AZURE_OPENAI_API_KEY,
            AZURE_OPENAI_ENDPOINT,
            AZURE_OPENAI_API_VERSION,
            AZURE_OPENAI_MODEL,
            DEFAULT_TEMPERATURE,
        )

        rk = AZURE_OPENAI_API_KEY
        rept = AZURE_OPENAI_ENDPOINT
        rav = AZURE_OPENAI_API_VERSION
        rm = AZURE_OPENAI_MODEL
        temp = DEFAULT_TEMPERATURE
    except (ImportError, AttributeError):
        # Fallback to direct import (when run as script)
        import local_review_config as lrc

        rk = getattr(lrc, "AZURE_OPENAI_API_KEY", None)
        rept = getattr(lrc, "AZURE_OPENAI_ENDPOINT", None)
        rav = getattr(lrc, "AZURE_OPENAI_API_VERSION", None)
        rm = getattr(lrc, "AZURE_OPENAI_MODEL", None)
        temp = getattr(lrc, "DEFAULT_TEMPERATURE", None)

    # Validate that all required config is present
    if not rk or not rept or not rav or not rm or temp is None:
        raise RuntimeError(
            f"Missing Azure OpenAI configuration. Found: rk={bool(rk)}, rept={bool(rept)}, rav={bool(rav)}, rm={bool(rm)}, temp={temp}"
        )

    # Extract key content for reference generation

    # Extract key content for reference generation
    content_summary = f"Title: {title}\n\nAbstract: {abstract}\n\n"

    # Add key sections content (limit to avoid token limits)
    key_sections = ["Introduction", "Related Work", "Method", "Experiments", "Results"]
    for section_name in key_sections:
        if section_name in sections:
            section_content = sections[section_name][:1000]  # Limit content length
            content_summary += f"{section_name}: {section_content}\n\n"

    # Create prompt for LLM
    system_prompt = """You are an expert academic researcher. Based on the paper content provided, generate relevant academic references in BibTeX format. Focus on:
1. Foundational papers in the field
2. Recent papers (last 3-5 years) related to the topic
3. Key papers that would be cited in a real academic paper on this topic
4. Mix of conference papers, journal articles, and arXiv preprints

Generate references that are actually relevant to the paper's content and methodology."""

    user_prompt = f"""Based on this paper content, generate {num_refs} relevant academic references in BibTeX format:

{content_summary}

Please generate {num_refs} BibTeX entries that would be appropriate citations for this paper. Include a mix of:
- Foundational papers in the relevant field
- Recent papers (2020-2025) on similar topics
- Key papers that address the methods or problems discussed

Each reference should be in proper BibTeX format with appropriate fields (author, title, booktitle/journal, year, etc.).

Return only the BibTeX entries, one per line, without any additional text or formatting."""

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt},
    ]

    # Environment variables already set at module level

    max_retries = 3  # Initial attempt + 2 retries
    logger = logging.getLogger("minimal")

    for attempt in range(max_retries):
        try:
            # Try to call LLM with credentials set in environment
            response = chat(messages, temperature=temp)

            if response:
                # Parse the response to extract BibTeX entries
                bibtex_entries = []
                lines = response.split("\n")
                current_entry = []

                for line in lines:
                    line = line.strip()
                    if line.startswith("@"):
                        # Start of new entry
                        if current_entry:
                            bibtex_entries.append("\n".join(current_entry))
                        current_entry = [line]
                    elif line and current_entry:
                        # Continuation of current entry
                        current_entry.append(line)
                    elif line == "}" and current_entry:
                        # End of current entry
                        current_entry.append(line)
                        bibtex_entries.append("\n".join(current_entry))
                        current_entry = []

                # Add the last entry if it exists
                if current_entry:
                    bibtex_entries.append("\n".join(current_entry))

                # Filter and clean entries
                valid_entries = []
                for entry in bibtex_entries:
                    if entry.strip() and entry.startswith("@"):
                        valid_entries.append(entry.strip())

                # If we got valid entries, return them
                if valid_entries:
                    return valid_entries[:num_refs]  # Limit to requested number

                # If no valid entries and this is not the last attempt, log and retry
                if attempt < max_retries - 1:
                    logger.warning(
                        f"BibTeX parsing attempt {attempt + 1} failed: no valid entries found. Retrying..."
                    )
                    # Adjust temperature for retry to get more varied responses
                    temp = min(temp + 0.1, 1.0)
                else:
                    logger.error(
                        f"All {max_retries} BibTeX parsing attempts failed: no valid entries found"
                    )

        except Exception as e:
            if attempt < max_retries - 1:
                logger.warning(
                    f"BibTeX generation attempt {attempt + 1} failed: {e}. Retrying..."
                )
                # Adjust temperature for retry
                temp = min(temp + 0.1, 1.0)
            else:
                logger.error(
                    f"All {max_retries} BibTeX generation attempts failed: {e}"
                )

    # Fallback: return empty list if all attempts failed
    return []


def _write_references_bib(references: List[str], latex_dir: str) -> None:
    """Write references.bib using entries generated by LLM.
    If an entry looks like BibTeX (starts with @), write as-is; otherwise wrap
    as a minimal @misc with a note field.
    """

    def _sanitize_bibtex_text(text: str) -> str:
        # Map unicode to TeX macros first
        s = _unicode_to_tex(text or "")
        # Escape common special chars not already escaped
        s = re.sub(r"(?<!\\)&", r"\\&", s)
        s = re.sub(r"(?<!\\)%", r"\\%", s)
        s = re.sub(r"(?<!\\)#", r"\\#", s)
        # Underscore is common in titles/urls; escape when not already escaped
        s = re.sub(r"(?<!\\)_", r"\\_", s)
        return s

    lines: List[str] = []
    for i, r in enumerate(references or [], 1):
        r = r.strip()
        if not r:
            continue
        if r.lstrip().startswith("@"):
            # Sanitize the entire BibTeX entry body
            sanitized = _sanitize_bibtex_text(r)
            lines.append(sanitized if sanitized.endswith("\n") else sanitized + "\n")
        else:
            key = f"ref{i}"
            # escape braces in note
            safe_note = _sanitize_bibtex_text(r).replace("{", "\\{").replace("}", "\\}")
            lines.append(f"@misc{{{key},\n  note = {{{safe_note}}}\n}}\n")
    path = os.path.join(latex_dir, "references.bib")
    with open(path, "w", encoding="utf-8") as f:
        f.writelines(lines)
    logging.getLogger("minimal").info(f"Wrote {len(lines)} BibTeX entries -> {path}")


def _coerce_sections(sections: Any) -> Dict[str, str]:
    """Convert various section formats into a dict of {Title: Text}."""
    if not sections:
        return {}
    if isinstance(sections, dict):
        out: Dict[str, str] = {}
        for k, v in sections.items():
            if isinstance(v, list):
                v = "\n\n".join(str(x) for x in v)
            out[str(k)] = str(v)
        return out
    if isinstance(sections, list):
        out: Dict[str, str] = {}
        for idx, item in enumerate(sections, 1):
            if isinstance(item, dict):
                title = (
                    item.get("title")
                    or item.get("name")
                    or item.get("heading")
                    or f"Section {idx}"
                )
                body = (
                    item.get("text")
                    or item.get("content")
                    or item.get("body")
                    or item.get("section")
                    or item.get("value")
                    or ""
                )
                # Backward-compatible fallback: if no body fields, accept paragraphs list
                if (not body or str(body).strip() == "") and "paragraphs" in item:
                    paras = item.get("paragraphs")
                    if isinstance(paras, list):
                        body = "\n\n".join(str(x) for x in paras)
                    elif isinstance(paras, str):
                        body = paras
                if isinstance(body, list):
                    body = "\n\n".join(str(x) for x in body)
                out[str(title)] = str(body)
            else:
                out[f"Section {idx}"] = str(item)
        return out
    # Fallback to a single unnamed section
    return {"Content": str(sections)}


def _ensure_related_work_order(sec_map: Dict[str, str]) -> Dict[str, str]:
    """Ensure a 'Related Work' section exists and is placed right after 'Introduction'.
    - If 'Related Work' missing, insert an empty placeholder.
    - Preserve other sections and their original order.
    """
    if not sec_map:
        return sec_map
    keys = list(sec_map.keys())
    intro_idx = None
    for i, k in enumerate(keys):
        if k.strip().lower() == "introduction":
            intro_idx = i
            break
    # If no Introduction found, just ensure presence of Related Work (append if missing)
    if intro_idx is None:
        if not any(k.strip().lower() == "related work" for k in keys):
            sec_map = {**sec_map, "Related Work": ""}
        return sec_map

    # Build new ordered dict-like mapping
    new_map: Dict[str, str] = {}
    for i, k in enumerate(keys):
        new_map[k] = sec_map[k]

    has_rw = any(k.strip().lower() == "related work" for k in keys)
    if has_rw:
        # Move the existing Related Work to after Introduction
        rw_key = next(k for k in keys if k.strip().lower() == "related work")
        rw_val = new_map.pop(rw_key)
        # Reinsert preserving order: after intro
        reordered: Dict[str, str] = {}
        for i, k in enumerate(keys):
            if k == rw_key:
                continue
            reordered[k] = sec_map[k]
            if k == keys[intro_idx]:
                reordered["Related Work"] = rw_val
        return reordered
    else:
        # Insert empty Related Work after Introduction
        reordered: Dict[str, str] = {}
        for i, k in enumerate(keys):
            reordered[k] = sec_map[k]
            if i == intro_idx:
                reordered["Related Work"] = ""
        return reordered


def fill_template(
    template_dir: str,
    out_dir: str,
    title: str,
    abstract: str,
    sections: Any,
    figs: List[Tuple[str, str, Optional[str]]],
    refs: List[str],
):
    logger = logging.getLogger("minimal")
    latex_dir = osp.join(out_dir, "latex")
    if osp.isdir(latex_dir):
        shutil.rmtree(latex_dir)
    shutil.copytree(template_dir, latex_dir)

    # find main tex
    main_tex = osp.join(latex_dir, "template.tex")
    if not osp.exists(main_tex):
        cands = [f for f in os.listdir(latex_dir) if f.endswith(".tex")]
        if cands:
            main_tex = osp.join(latex_dir, cands[0])
        else:
            with open(main_tex, "w") as f:
                f.write(
                    "\\documentclass{article}\n\\begin{document}\n\\end{document}\n"
                )

    with open(main_tex, "r", encoding="utf-8") as f:
        tex = f.read()
    # If the template embeds references via filecontents, strip it to avoid
    # conflicts with our generated references.bib
    tex = _strip_filecontents_references(tex)

    # Find document environment boundaries without regex to avoid escape pitfalls
    start_tag = "\\begin{document}"
    end_tag = "\\end{document}"
    s_idx = tex.find(start_tag)
    e_idx = tex.rfind(end_tag)
    if s_idx != -1 and e_idx != -1 and e_idx > s_idx:
        preamble = tex[:s_idx]
        post = tex[e_idx:]
    else:
        preamble = tex
        post = end_tag + "\n"
        s_idx = len(tex)
    # Strip any existing title/author/bibliography commands in preamble to avoid duplicates
    preamble = re.sub(r"\\title\{.*?\}\s*", "", preamble, flags=re.S)
    preamble = re.sub(r"\\author\{.*?\}\s*", "", preamble, flags=re.S)
    preamble = re.sub(r"\\bibliographystyle\{.*?\}\s*", "", preamble)
    preamble = re.sub(r"\\bibliography\{.*?\}\s*", "", preamble)
    # Prepare sections and sanitize prose (math-aware)
    sec_map = _coerce_sections(sections)
    sec_map = _ensure_related_work_order(sec_map)
    content_texts: List[str] = [title or "", abstract or ""]
    for name, txt in sec_map.items():
        content_texts.append(name or "")
        content_texts.append(txt or "")
    for _, cap, _ in figs or []:
        content_texts.append(cap or "")
    # Inject our title/author into the preamble before \begin{document}
    pre = (
        preamble
        + f"\\title{{{_latex_sanitize_outside_math(_auto_math_wrap(_unicode_to_tex(title)))}}}\n"
        + "\\author{AI Scientist}\n"
        + start_tag
        + "\n"
    )

    body = []
    body += [
        "\\maketitle",
        "",
        "\\begin{abstract}",
        _latex_sanitize_outside_math(_auto_math_wrap(_unicode_to_tex(abstract))),
        "\\end{abstract}",
    ]
    # Prepare figure buckets by section (default to Results if unspecified)
    fig_by_section: Dict[str, List[Tuple[str, str]]] = {}
    canonical_lower = {"introduction", "method", "experiments", "results", "discussion"}

    def norm_sec(s: Optional[str]) -> str:
        s = (s or "").strip()
        if not s:
            return "Results"
        s_low = s.lower()
        for can in ["introduction", "method", "experiments", "results", "discussion"]:
            if s_low == can:
                return can.title()
        return "Results"

    for fn, cap, sec in figs or []:
        key = norm_sec(sec)
        fig_by_section.setdefault(key, []).append((fn, cap))

    for name, txt in sec_map.items():
        sec_name_tex = _latex_sanitize_outside_math(
            _auto_math_wrap(_unicode_to_tex(name))
        )
        body += [
            f"\\section{{{sec_name_tex}}}",
            _latex_sanitize_outside_math(_auto_math_wrap(_unicode_to_tex(txt))),
        ]
        # Inline any figures tagged to this section
        mapped = norm_sec(name)
        # Only insert figures for canonical sections; skip non-canonical (e.g., Related Work, Limitations)
        if (name or "").strip().lower() in canonical_lower:
            for fn, cap in fig_by_section.get(mapped, []):
                body += [
                    "\\begin{figure}[ht]",
                    "\\centering",
                    f"\\includegraphics[width=0.9\\linewidth]{{../{fn}}}",
                    f"\\caption{{{_latex_sanitize_outside_math(_auto_math_wrap(_unicode_to_tex(cap)))}}}",
                    "\\end{figure}",
                ]

    # Any figures targeting a section we didn't render (edge case), append under Results at end
    # Consider only canonical sections that actually exist when determining used keys
    used_keys = {
        norm_sec(k)
        for k in sec_map.keys()
        if (k or "").strip().lower() in canonical_lower
    }
    leftovers = []
    for k, lst in fig_by_section.items():
        if k not in used_keys:
            leftovers.extend(lst)
    if leftovers:
        for fn, cap in leftovers:
            body += [
                "\\begin{figure}[ht]",
                "\\centering",
                f"\\includegraphics[width=0.9\\linewidth]{{../{fn}}}",
                f"\\caption{{{_latex_sanitize_outside_math(_auto_math_wrap(_unicode_to_tex(cap)))}}}",
                "\\end{figure}",
            ]
    # Always use BibTeX with ICLR style if available; otherwise still include BibTeX commands
    _write_references_bib(refs, latex_dir)
    body += [
        "",
        "\\bibliographystyle{iclr2024_conference}",
        "\\nocite{*}",
        "\\bibliography{references}",
    ]

    with open(main_tex, "w", encoding="utf-8") as f:
        f.write(pre + "\n".join(body) + "\n" + post)
    logger.info(f"Filled LaTeX template -> {main_tex} (figs={len(figs)}, bib=Yes)")


def compile_pdf(out_dir: str):
    import subprocess

    cwd = osp.join(out_dir, "latex")
    logging.getLogger("minimal").info("Compiling PDF with pdflatex (2 passes)…")
    # Ensure all included graphics are present/valid; hide missing/invalid/too-dark images
    try:
        # detect main tex
        tex_main = osp.join(cwd, "template.tex")
        if not osp.exists(tex_main):
            cands = [f for f in os.listdir(cwd) if f.endswith(".tex")]
            if cands:
                tex_main = osp.join(cwd, cands[0])
        if osp.exists(tex_main):
            report = scan_and_fix_graphics(tex_main)
            logging.getLogger("minimal").info(
                "Figure safety: hidden=%d, fixed=%d, generated=%d, unchanged=%d",
                len(report.get("hidden", [])),
                len(report.get("fixed", [])),
                len(report.get("generated", [])),
                len(report.get("unchanged", [])),
            )
    except Exception:
        logging.getLogger("minimal").exception("Figure safety scan failed; continuing.")
    cmds = [
        ["pdflatex", "-interaction=nonstopmode", "template.tex"],
    ]
    # If pdflatex is not available, skip gracefully
    if shutil.which("pdflatex") is None:
        logging.getLogger("minimal").warning(
            "pdflatex not found; skipping PDF compile."
        )
        return
    # First LaTeX pass
    for idx, cmd in enumerate(cmds, 1):
        try:
            res = subprocess.run(
                cmd,
                cwd=cwd,
                check=False,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True,
                encoding="utf-8",
                errors="replace",
                timeout=120,
            )
            logging.getLogger("minimal").info(
                f"pdflatex pass {idx} exit code: {res.returncode}"
            )
        except Exception:
            logging.getLogger("minimal").exception("pdflatex invocation failed.")
    # If bibliography commands present, run bibtex then two more LaTeX passes
    try:
        tex_src = open(osp.join(cwd, "template.tex"), "r", encoding="utf-8").read()
        needs_bib = "\\bibliography" in tex_src
    except Exception:
        needs_bib = False
    if needs_bib and shutil.which("bibtex") is not None:
        try:
            res = subprocess.run(
                ["bibtex", "template"],
                cwd=cwd,
                check=False,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True,
                encoding="utf-8",
                errors="replace",
                timeout=120,
            )
            logging.getLogger("minimal").info(f"bibtex exit code: {res.returncode}")
        except Exception:
            logging.getLogger("minimal").exception("bibtex invocation failed.")
        # Two more pdflatex passes to resolve references
        for i in range(2):
            try:
                res = subprocess.run(
                    ["pdflatex", "-interaction=nonstopmode", "template.tex"],
                    cwd=cwd,
                    check=False,
                    stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE,
                    text=True,
                    encoding="utf-8",
                    errors="replace",
                    timeout=120,
                )
                logging.getLogger("minimal").info(
                    f"pdflatex (post-bib) pass {i+1} exit code: {res.returncode}"
                )
            except Exception:
                logging.getLogger("minimal").exception("pdflatex (post-bib) failed.")


def _process_one_idea(
    idea: Dict[str, Any],
    idx: int,
    total: int,
    out_dir: str,
    template_dir: str,
    enable_review: bool = False,
) -> Dict[str, Any]:
    """Process a single idea end-to-end. Returns a summary dict.
    Runs synchronously; safe to call in threads since each idea uses its own folder.
    """
    logger = logging.getLogger("minimal")
    idea_title = idea.get("Title") or idea.get("Name") or f"Idea {idx}"
    idea_dir = osp.join(out_dir, f"idea_{idx:02d}")
    os.makedirs(idea_dir, exist_ok=True)
    logger.info(f"[{idx}/{total}] Generating paper for: {idea_title}")

    review_decision, review_overall = None, None
    try:
        # 1) request package (sections + figs)
        pkg = request_paper_package(idea)
        with open(osp.join(idea_dir, "package.json"), "w", encoding="utf-8") as f:
            json.dump(pkg, f, indent=2)

        # 2) request additional figures and render all
        extra = request_more_figures(idea, k=2)
        all_figs_spec = (pkg.get("figures") or []) + (extra or [])
        logger.info(f"[{idx}/{total}] Rendering {len(all_figs_spec)} total figures…")
        figs = render_figures(all_figs_spec, idea_dir)

        # 3) generate references with LLM
        logger.info(f"[{idx}/{total}] Generating references with LLM…")
        refs = generate_references_with_llm(
            title=pkg.get("title", idea.get("Title", "Untitled")),
            abstract=pkg.get("abstract", ""),
            sections=pkg.get("sections", {}) or {},
            num_refs=20,
        )
        logger.info(f"[{idx}/{total}] Generated {len(refs)} references.")

        # 4) fill template and compile
        fill_template(
            template_dir,
            idea_dir,
            pkg.get("title", idea.get("Title", "Untitled")),
            pkg.get("abstract", ""),
            pkg.get("sections", {}) or {},
            figs,
            refs,
        )
        compile_pdf(idea_dir)

        # 5) run a review on the produced paper (best-effort, controlled by flag)
        if enable_review:
            try:
                pdf_path = osp.join(idea_dir, "latex", "template.pdf")
                if osp.exists(pdf_path):
                    paper_text = load_paper(pdf_path)
                    src = "pdf"
                else:
                    tex_path = osp.join(idea_dir, "latex", "template.tex")
                    if osp.exists(tex_path):
                        paper_text = open(tex_path, "r", encoding="utf-8").read()
                        src = "tex"
                    else:
                        title = pkg.get("title") or idea.get("Title") or "Untitled"
                        abs_ = pkg.get("abstract", "")
                        secs = _coerce_sections(pkg.get("sections", {}))
                        parts = [title, abs_] + [f"{k}\n{v}" for k, v in secs.items()]
                        paper_text = "\n\n".join(parts)
                        src = "synth"
                logger.info(f"[{idx}/{total}] Running LLM review (source={src})…")

                # Review config is loaded at module level

                review = run_review(
                    paper_text,
                    model=None,
                    client=None,
                    num_reflections=1,
                    num_fs_examples=3,
                    num_reviews_ensemble=1,
                    temperature=1,
                )
                # Get review details using the function instead of global variable
                review_details = get_last_review_details()
                logger.info(
                    f"[{idx}/{total}] Review details type: {type(review_details)}"
                )
                if review_details:
                    logger.info(
                        f"[{idx}/{total}] Review details keys: {list(review_details.keys()) if isinstance(review_details, dict) else 'Not a dict'}"
                    )

                # Prepare output JSON: always include aggregated review under 'review'.
                # If details available (per-model outputs), include them for debugging/analysis.
                out_obj = {"review": review}
                try:
                    if review_details and isinstance(review_details, dict):
                        models = review_details.get("models")
                        indiv = review_details.get("individual_reviews")
                        if indiv and len(indiv) > 0:
                            out_obj["models"] = models
                            out_obj["individual_reviews"] = indiv
                            # Only add aggregated if it's different from the main review
                            aggregated = review_details.get("aggregated")
                            if aggregated and aggregated != review:
                                out_obj["aggregated"] = aggregated
                except Exception:
                    pass  # If details not available, just save the basic review

                with open(
                    osp.join(idea_dir, "review.json"), "w", encoding="utf-8"
                ) as f:
                    json.dump(out_obj, f, indent=2)
                review_decision = (review or {}).get("Decision")
                review_overall = (review or {}).get("Overall")
                if review_decision or review_overall:
                    logger.info(
                        f"[{idx}/{total}] Review summary -> Decision: {review_decision}, Overall: {review_overall}"
                    )
            except Exception:
                logger.exception(
                    f"[{idx}/{total}] Review step failed; continuing without review."
                )
    except Exception:
        logger.exception(f"[{idx}/{total}] Idea processing failed.")

    return {
        "index": idx,
        "title": idea_title,
        "dir": idea_dir,
        "decision": review_decision,
        "overall": review_overall,
    }


def run_pipeline(
    template_dir: str,
    seed: Dict[str, Any],
    out_root: str,
    num_ideas: int = 3,
    enable_review: bool = False,
) -> str:
    logger = _setup_logging()  # console
    logger.info(
        f"Starting pipeline: template_dir={template_dir}, out_root={out_root}, num_ideas={num_ideas}"
    )
    os.makedirs(out_root, exist_ok=True)
    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
    out_dir = osp.join(out_root, ts)
    os.makedirs(out_dir, exist_ok=True)
    _setup_logging(out_dir)  # file
    logger.info(f"Run directory: {out_dir}")

    # 1) generate ideas
    # Page large idea requests to avoid huge prompts
    ideas = generate_ideas(seed, n=num_ideas)
    with open(osp.join(out_dir, "ideas.json"), "w", encoding="utf-8") as f:
        json.dump(ideas, f, indent=2)
    if not ideas:
        raise RuntimeError("No ideas generated")
    logger.info(
        f"Drafting {len(ideas)} papers (parallel by idea)… Review enabled: {enable_review}"
    )
    # Concurrency strictly equals number of ideas
    concurrency = max(1, len(ideas))
    logger.info(f"Resolved concurrency: {concurrency} worker(s) (no cap)")

    from concurrent.futures import ThreadPoolExecutor, as_completed

    summary: List[Dict[str, Any]] = []
    futures = []
    with ThreadPoolExecutor(max_workers=concurrency) as ex:
        for idx, idea in enumerate(ideas, start=1):
            futures.append(
                ex.submit(
                    _process_one_idea,
                    idea,
                    idx,
                    len(ideas),
                    out_dir,
                    template_dir,
                    enable_review,
                )
            )
        for fut in as_completed(futures):
            try:
                res = fut.result()
                summary.append(res)
                try:
                    logger.info(
                        f"[{res.get('index')}/{len(ideas)}] Finished idea: {res.get('title')}"
                    )
                except Exception:
                    pass
            except Exception:
                logging.getLogger("minimal").exception("An idea task crashed.")
    # keep summary sorted by index for readability
    summary.sort(key=lambda x: x.get("index", 0))

    # Write a top-level run summary
    with open(osp.join(out_dir, "index.json"), "w", encoding="utf-8") as f:
        json.dump({"papers": summary}, f, indent=2)

    logger.info("Pipeline completed for all ideas.")
    return out_dir
