import unittest

from scripts.make_paper_tables import (
    accepted_alternatives_table,
    aggregate_table,
    classification_table,
    execution_appendix_table,
    reconstruction_failure_class_table,
    execution_significance_table,
    execution_table,
    proxy_execution_gap_table,
    search_table,
)


class PaperTableTests(unittest.TestCase):
    def test_classification_table_contains_values(self) -> None:
        table = classification_table(
            [
                {
                    "representation": "raw",
                    "model": "majority_class",
                    "accuracy": "0.1",
                    "macro_f1": "0.2",
                    "top3_accuracy": "0.3",
                }
            ]
        )
        self.assertIn("Majority", table)
        self.assertIn("0.100", table)

    def test_aggregate_table_contains_stability_values(self) -> None:
        table = aggregate_table(
            [
                {
                    "representation": "premise",
                    "model": "text_naive_bayes",
                    "accuracy_mean": "0.3774",
                    "accuracy_std": "0.0238",
                    "macro_f1_mean": "0.061",
                    "macro_f1_std": "0.008",
                    "top3_accuracy_mean": "0.6115",
                    "top3_accuracy_std": "0.0254",
                }
            ]
        )
        self.assertIn("Future premise", table)
        self.assertIn("Naive Bayes", table)
        self.assertIn("0.377 $\\pm$ 0.024", table)

    def test_search_table_contains_values(self) -> None:
        table = search_table(
            [
                {
                    "strategy": "family_guided",
                    "family_success_at_1": "0.1",
                    "family_success_at_5": "0.2",
                    "exact_tactic_success_at_1": "0.3",
                    "exact_tactic_success_at_5": "0.4",
                }
            ]
        )
        self.assertIn("Hard family", table)
        self.assertIn("0.400", table)

    def test_execution_table_contains_coverage(self) -> None:
        row = {
            "strategy": "unguided",
            "accept_at_1_all": "0.443333",
            "accept_at_3_all": "0.593333",
            "accept_at_5_all": "0.68",
            "candidate_execution_coverage": "0.988",
            "reconstruction_failure_rate": "0.012",
            "accept_at_1_executable": "0.7",
            "accept_at_3_executable": "0.8",
            "accept_at_5_executable": "0.9",
            "executable_queries_at_5": "102",
            "reconstruction_unknown_identifier": "12",
            "reconstruction_elaboration": "6",
        }
        table = execution_table([row])
        self.assertIn("Unguided", table)
        self.assertIn("0.680", table)
        self.assertIn("Coverage", table)
        appendix = execution_appendix_table([row])
        self.assertIn("Exec.Q@5", appendix)
        self.assertIn("0.900", appendix)

    def test_execution_significance_table_contains_ci(self) -> None:
        table = execution_significance_table(
            [
                {
                    "strategy": "family_soft",
                    "metric": "accept_at_5",
                    "diff": "-0.0367",
                    "bootstrap_ci_low": "-0.07",
                    "bootstrap_ci_high": "-0.0033",
                    "mcnemar_p": "0.0522",
                }
            ]
        )
        self.assertIn("Soft family", table)
        self.assertIn("[-0.070, -0.003]", table)

    def test_proxy_execution_gap_table_contains_gap(self) -> None:
        table = proxy_execution_gap_table(
            [
                {
                    "strategy": "unguided",
                    "proxy_exact_at_5": "0.1694",
                    "lean_accept_at_5": "0.68",
                    "lean_minus_proxy_exact_at_5": "0.5106",
                }
            ]
        )
        self.assertIn("Trace E@5", table)
        self.assertIn("0.511", table)

    def test_reconstruction_failure_class_table_contains_taxonomy(self) -> None:
        table = reconstruction_failure_class_table(
            [
                {"metric": "sample_size", "value": "300"},
                {"metric": "n_candidates", "value": "7500"},
                {"metric": "reconstruction_unknown_identifier", "value": "124"},
                {"metric": "reconstruction_elaboration", "value": "35"},
                {"metric": "parse_error", "value": "0"},
                {"metric": "timeout", "value": "0"},
                {"metric": "infrastructure_error", "value": "0"},
            ]
        )
        self.assertIn("159", table)
        self.assertIn("Unknown identifier", table)
        self.assertIn("Other tool failure", table)

    def test_accepted_alternatives_table_contains_gap_terms(self) -> None:
        table = accepted_alternatives_table(
            [
                {
                    "strategy": "unguided",
                    "proxy_exact_at_5_on_sample": "0.12",
                    "lean_accept_at_5": "0.67",
                    "accepted_not_gold_at_5": "0.60",
                    "accept_without_exact_at_5": "0.55",
                    "exact_without_accept_at_5": "0.02",
                }
            ]
        )
        self.assertIn("Non-trace", table)
        self.assertIn("A-no-E", table)
        self.assertIn("0.600", table)


if __name__ == "__main__":
    unittest.main()
