#!/usr/bin/env python3
"""
Export Generator Spec Table

Generates the world generation hyperparameters table for the paper appendix.
These parameters are frozen for the v1 benchmark.

Usage:
    python -m concept_synth.analysis.export_generator_spec_table --out paper/auto/appendix
"""

import argparse
from dataclasses import dataclass
from pathlib import Path
from typing import Optional


@dataclass(frozen=True)
class BandSpec:
    """Frozen v1 band specification for world generation."""

    task: str
    band: str
    domain_min: int
    domain_max: int
    k_worlds: str  # e.g., "4" or "7--8 YES + 2--3 NO"
    p_unary: float
    out_degree: int
    unknown_rate: Optional[float]  # None for FullObs/CI
    unknown_preds: Optional[str]  # None for FullObs/CI


# Frozen v1 benchmark parameters
V1_BAND_SPECS = [
    # FullObs bands
    BandSpec("FullObs", "simple", 5, 7, "4", 0.4, 2, None, None),
    BandSpec("FullObs", "easy", 5, 7, "6", 0.4, 2, None, None),
    BandSpec("FullObs", "medium", 7, 10, "8", 0.4, 2, None, None),
    BandSpec("FullObs", "hard", 8, 12, "10", 0.4, 2, None, None),
    BandSpec("FullObs", "extreme", 8, 12, "10", 0.4, 2, None, None),
    # CI bands
    BandSpec("CI", "core", 7, 9, "7--8 YES + 2--3 NO", 0.4, 2, None, None),
    BandSpec("CI", "lift\\_mix", 7, 9, "7--8 YES + 2--3 NO", 0.4, 2, None, None),
    # EC bands
    BandSpec("EC", "core", 6, 8, "3", 0.4, 2, 0.20, "$R,S$"),
    BandSpec("EC", "hard", 7, 9, "3", 0.4, 2, 0.20, "$R$ only"),
]


def generate_world_gen_params_table(output_path: Path, standalone: bool = False) -> None:
    """Generate LaTeX table for world generation hyperparameters.

    Args:
        output_path: Path to write the .tex file
        standalone: If True, generate a complete document; if False, just the table
    """
    lines = []

    if standalone:
        lines.extend(
            [
                "% Auto-generated by export_generator_spec_table.py",
                "% DO NOT EDIT MANUALLY - regenerate with:",
                "%   python -m concept_synth.analysis.export_generator_spec_table",
                "",
            ]
        )

    # Table environment
    lines.extend(
        [
            "\\begin{table}[h]",
            "\\centering",
            "\\caption{\\textbf{World generation hyperparameters (v1).} "
            "$p_{\\text{unary}}$ = Bernoulli probability for unary predicates $P,Q$; "
            "out-degree = regular out-degree for binary predicates $R,S$; "
            "$k$ = number of worlds per instance; unknown rate applies to EC only.}",
            "\\label{tab:world_gen_params}",
            "\\small",
            "\\begin{tabular}{@{}llcccccc@{}}",
            "\\toprule",
            "Task & Band & Domain & $k$ & $p_{\\text{unary}}$ & Out-degree & Unknown rate & Unknown preds \\\\",
            "\\midrule",
        ]
    )

    # Group by task for midrule separators
    current_task = None
    for i, spec in enumerate(V1_BAND_SPECS):
        # Add midrule between tasks
        if current_task is not None and spec.task != current_task:
            lines.append("\\midrule")
        current_task = spec.task

        # Format domain range
        domain = f"{spec.domain_min}--{spec.domain_max}"

        # Format unknown rate
        if spec.unknown_rate is not None:
            unknown_rate = f"{int(spec.unknown_rate * 100)}\\%"
        else:
            unknown_rate = "---"

        # Format unknown preds
        unknown_preds = spec.unknown_preds if spec.unknown_preds else "---"

        # Build row
        row = (
            f"{spec.task} & {spec.band} & {domain} & {spec.k_worlds} & "
            f"{spec.p_unary} & {spec.out_degree} & {unknown_rate} & {unknown_preds} \\\\"
        )
        lines.append(row)

    # Close table
    lines.extend(
        [
            "\\bottomrule",
            "\\end{tabular}",
            "\\end{table}",
            "",
        ]
    )

    # Add supporting text paragraphs
    lines.extend(
        [
            "\\textbf{Unary sampling}: Each element $a \\in D$ is assigned "
            "$P(a)=\\text{true}$ independently with probability $p_{\\text{unary}}=0.4$, "
            "subject to a balance constraint requiring 15--85\\% of the domain to satisfy "
            "each unary predicate.",
            "",
            "\\textbf{Binary sampling}: For regular out-degree mode, each element $a$ "
            "samples exactly 2 outgoing edges for $R$ (uniformly from $D \\setminus \\{a\\}$) "
            "and exactly 2 outgoing edges for $S$, yielding expected edge density "
            "$\\approx 2/|D|$ per relation.",
            "",
            "\\textbf{Unknown masking (EC)}: For each world, we collect all ground atoms "
            "of the unknown-eligible predicates, shuffle them, and mark the first "
            "$\\lfloor \\text{unknown\\_rate} \\times \\text{total\\_atoms} \\rfloor$ as unknown. "
            "The unknown rate is 20\\% for both EC bands; the core band masks atoms from "
            "$R$ and $S$, while the hard band masks only $R$ atoms.",
            "",
        ]
    )

    # Write output
    output_path.parent.mkdir(parents=True, exist_ok=True)
    with open(output_path, "w") as f:
        f.write("\n".join(lines))

    print(f"Generated: {output_path}")


def main():
    parser = argparse.ArgumentParser(
        description="Export world generation hyperparameters table",
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument(
        "--out",
        "-o",
        default="concept_synth/paper/auto/appendix",
        help="Output directory",
    )
    parser.add_argument(
        "--standalone",
        action="store_true",
        help="Generate standalone document (with header comments)",
    )

    args = parser.parse_args()

    output_dir = Path(args.out)
    output_path = output_dir / "tab_world_gen_params.tex"

    generate_world_gen_params_table(output_path, standalone=args.standalone)


if __name__ == "__main__":
    main()
