from __future__ import annotations

import json
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, List, Optional, Tuple

from .config import DATA_PATH


@dataclass
class NLItem:
    index: int
    label: str
    env: str
    chapter: int
    section: int
    local_index: Optional[int]
    content: str
    context: dict[str, Any]
    nl_answer: str | None = None
    notes: Any | None = None
    number_components: Optional[list[int]] = None
    dependencies: list[Any] = field(default_factory=list)
    target_file: str | None = None
    raw: dict[str, Any] = field(default_factory=dict)


def _infer_position(entry: dict[str, Any]) -> Tuple[int, int, Optional[int]]:
    """
    Derive (chapter, section, local_index) from context first, then number_components.
    Fallback to number_components when context.chapter_number / context.section_number is missing.
    """
    components = entry.get("number_components")
    chapter: int | None = None
    section: int | None = None
    local_index: int | None = None

    ctx = entry.get("context", {}) or {}
    chapter = ctx.get("chapter_number")
    section_number = ctx.get("section_number")

    if chapter is None and isinstance(ctx.get("chapter"), str):
        try:
            chapter = int(ctx["chapter"].split()[0])
        except Exception:
            pass

    if isinstance(section_number, str):
        try:
            section = int(section_number.split(".")[-1])
        except Exception:
            section = None
    elif isinstance(section_number, int):
        section = section_number
    else:
        section = None

    # Heuristic for "paper JSON" format:
    # - context.section_number exists (e.g. "1", "2", ...)
    # - number_components has length 1 and corresponds to the local label number (e.g. "Lemma 8" -> [8])
    # In this case, the chapter is not encoded and we default to chapter 1.
    if chapter is None and section is not None and components and len(components) == 1:
        chapter = 1
        try:
            local_index = int(components[0])
        except (TypeError, ValueError):
            local_index = None

    if (chapter is None or section is None) and components and len(components) >= 2:
        try:
            if chapter is None:
                chapter = int(components[0])
            if section is None:
                section = int(components[1])
            if local_index is None and len(components) >= 3 and components[2] is not None:
                local_index = int(components[2])
        except (TypeError, ValueError):
            pass

    if chapter is None:
        # Final fallback: treat as a single-chapter paper.
        chapter = 1

    if chapter is None or section is None:
        raise ValueError(f"Missing chapter/section info in entry: {entry}")

    if local_index is None:
        if components and len(components) >= 3 and components[2] is not None:
            try:
                local_index = int(components[2])
            except (TypeError, ValueError):
                local_index = None
        if local_index is None and components and len(components) == 1:
            try:
                local_index = int(components[0])
            except (TypeError, ValueError):
                local_index = None
        if local_index is None:
            local_index = entry.get("index")

    return int(chapter), int(section), int(local_index) if local_index is not None else None


def load_items(
    data_path: Optional[Path] = None,
    *,
    content_key: str = "content",
    require_content: bool = False,
) -> list[NLItem]:
    """
    Load natural-language items from the given JSON file (default: DATA_PATH) and return them sorted by index.
    """
    path = data_path or DATA_PATH
    raw = json.loads(path.read_text(encoding="utf-8"))
    items: list[NLItem] = []
    for entry in raw:
        content_value = entry.get(content_key)
        if require_content and not content_value:
            continue
        chapter, section, local_index = _infer_position(entry)
        nl_answer = (
            entry.get("nl_answer")
            or entry.get("natural_language_answer")
            or entry.get("answer")
            or entry.get("informal_proof")
        )
        if not isinstance(nl_answer, str):
            nl_answer = None
        dependencies = entry.get("dependencies")
        if not isinstance(dependencies, list):
            dependencies = []
        number_components = entry.get("number_components")
        if not isinstance(number_components, list):
            number_components = None
        notes = entry.get("notes")
        if notes is None:
            notes = entry.get("note")
        target_file = entry.get("target_file")
        if not isinstance(target_file, str) or not target_file.strip():
            target_file = None
        if number_components is None:
            inferred: list[int] = [int(chapter), int(section)]
            if local_index is not None:
                inferred.append(int(local_index))
            number_components = inferred
        items.append(
            NLItem(
                index=entry["index"],
                label=entry["label"],
                env=entry["env"],
                chapter=chapter,
                section=section,
                local_index=local_index,
                content=content_value if content_value is not None else entry.get("content", ""),
                context=entry.get("context", {}),
                nl_answer=(nl_answer.strip() if nl_answer and nl_answer.strip() else None),
                notes=notes,
                number_components=number_components,
                dependencies=dependencies,
                target_file=target_file.strip() if target_file else None,
                raw=entry if isinstance(entry, dict) else {},
            )
        )
    items.sort(key=lambda x: x.index)
    return items


def filter_items(items: List[NLItem], *, min_index: Optional[int] = None) -> list[NLItem]:
    """
    Return items with index >= min_index if provided.
    """
    if min_index is None:
        return list(items)
    return [item for item in items if item.index >= min_index]
