# Import Python packages.
import copy
from typing import Any, List, Mapping, Optional, Tuple

# Import external packages.
import pandas as pd

# Import PyTest packagtes.
import pytest
from category_encoders import HashingEncoder  # type: ignore[import-untyped]

# Import PyTest external packages.
from py._path.local import LocalPath

# Import developing library.
import fin_tech_py_toolkit as lib

# Import testing library.
from ....utils import eq_dataframe, to_eq_plural_ordered
from ...utils import template_test_io, template_test_transform


# Type aliases.
Input = List[pd.DataFrame]
Output = List[pd.DataFrame]


# Runtime constants.
IDENTIFIER = lib.transforms.TransformHashEncodePandas._IDENTIFIER


def synthesize(
    *, ood: Optional[str]
) -> Tuple[Tuple[Input, Output], Input, Output, Mapping[str, Any]]:
    r"""
    Synthesize test I/O.

    Args
    ----
    - ood
        Category representation reserved for out-of-distribution.
        If it is null, out-of-distribution is not allowed.

    Returns
    -------
    - example
        Input and output examples.
    - input
        Input case.
    - output
        Output case.
    - supplement
        Supplementary materical for synthesized test.
    """
    # Create the categorical columns.
    categorical1 = ["010", "010", "010", "020", "030"]
    categorical2 = ['"X"', '"X"', '"X"', '"Y"', '"Z"']
    categorical3 = ["A", "B", "C", "D", "E"]

    # Create out-of-distribution categorical columns.
    categorical1_ = ["040"]
    categorical2_ = ['"U"']
    categorical3_ = ["F"]

    # Create the encoder.
    n_components = 2
    encoder = HashingEncoder(n_components=n_components).fit(
        pd.DataFrame({"categorical1": categorical1, "categorical2": categorical2})
    )

    # Create input and output examples.
    example_input: Input
    example_input = [
        pd.DataFrame({"categorical1": categorical1, "categorical2": categorical2}),
        pd.DataFrame([], columns=[], index=range(5)),
        pd.DataFrame([], columns=[]),
        pd.DataFrame([], columns=[]),
    ]
    example_output: Output
    example_output = []

    # Input case.
    input: Input
    input = [
        pd.DataFrame(
            {
                "categorical1": categorical1 + ([] if ood is None else categorical1_),
                "categorical2": categorical2 + ([] if ood is None else categorical2_),
                "categorical3": categorical3 + ([] if ood is None else categorical3_),
            }
        ),
        pd.DataFrame([], columns=[], index=range(5 + int(ood is not None))),
        pd.DataFrame([], columns=[]),
        pd.DataFrame([], columns=[]),
    ]

    # For output case, Hash encoding will keep origin content for out-of-distribution.
    categorical1_ = ["040"]
    categorical2_ = ['"U"']
    categorical3_ = ["F"]

    # Output case.
    output: Output
    output = [
        pd.DataFrame({"categorical3": categorical3 + ([] if ood is None else categorical3_)}),
        pd.DataFrame(
            encoder.transform(
                pd.DataFrame(
                    {
                        "categorical1": categorical1 + ([] if ood is None else categorical1_),
                        "categorical2": categorical2 + ([] if ood is None else categorical2_),
                    }
                )
            ).values,
            columns=["categorical1-hash", "categorical2-hash"],
        ),
        pd.DataFrame([], columns=[]),
        pd.DataFrame([], columns=[]),
    ]

    return (
        (example_input, example_output),
        input,
        output,
        dict(category_encoders_init_kwargs=dict(n_components=n_components)),
    )


@pytest.mark.parametrize(
    ("raw_input", "raw_output"),
    [
        pytest.param(
            ...,
            None,
            id="unsupport-input",
            marks=[pytest.mark.xfail(raises=lib.transforms.ErrorTransformUnsupportPartial)],
        ),
        pytest.param(
            None,
            ...,
            id="unsupport-output",
            marks=[pytest.mark.xfail(raises=lib.transforms.ErrorTransformUnsupportPartial)],
        ),
        pytest.param(None, None, id="both-null"),
    ],
)
def test_io(*, raw_input: Any, raw_output: Any) -> None:
    r"""
    Test transformation input and output domain formalization.

    Args
    ----
    - raw_input
        Raw input.
    - raw_output
        Raw output.

    Returns
    -------
    """
    # Initialize testing transformation.
    factory = lib.transforms.FactoryTransform()

    # Run test template.
    template_test_io(
        IDENTIFIER,
        factory,
        raw_input,
        raw_output,
        to_eq_plural_ordered(eq_dataframe),
        to_eq_plural_ordered(eq_dataframe),
        fit_kwargs=dict(category_encoders_init_kwargs=dict(n_components=2)),
    )


@pytest.mark.parametrize("ood", [pytest.param(None, id="default"), pytest.param("<unk>", id="ood")])
def test_default(*, tmpdir: LocalPath, ood: Optional[str]) -> None:
    r"""
    Test transformation for CatBoost encoding on Pandas data.

    Args
    ----
    - tmpdir
        Temporary directory for this test.
        It is automatically provided by PyTest, so its value should not be explicitly defined.
    - ood
        Category representation reserved for out-of-distribution.
        If it is null, out-of-distribution is not allowed.

    Returns
    -------
    """
    # Initialize testing transformation.
    root = str(tmpdir)
    factory = lib.transforms.FactoryTransform()

    # Generate inputs and outputs.
    example, input, output, supp = synthesize(ood=ood)

    # Run test template.
    template_test_transform(
        root,
        IDENTIFIER,
        factory,
        example,
        input,
        output,
        to_eq_plural_ordered(eq_dataframe),
        to_eq_plural_ordered(eq_dataframe),
        fit_kwargs=dict(category_encoders_init_kwargs=supp["category_encoders_init_kwargs"]),
    )


def test_empty(*, tmpdir: LocalPath) -> None:
    r"""
    Test transformation for CatBoost encoding on empty Pandas data.

    Args
    ----
    - tmpdir
        Temporary directory for this test.
        It is automatically provided by PyTest, so its value should not be explicitly defined.

    Returns
    -------
    """
    # Initialize testing transformation.
    root = str(tmpdir)
    factory = lib.transforms.FactoryTransform()

    # Generate inputs and outputs.
    transform = factory.from_args(IDENTIFIER)
    input = transform.input(None)
    output = transform.output(None)
    example: Tuple[Input, Output]
    example = (input, [])

    # Run test template.
    template_test_transform(
        root,
        IDENTIFIER,
        factory,
        example,
        input,
        output,
        to_eq_plural_ordered(eq_dataframe),
        to_eq_plural_ordered(eq_dataframe),
        fit_kwargs=dict(category_encoders_init_kwargs=dict(n_components=2)),
    )


@pytest.mark.xfail(raises=lib.transforms.ErrorTransformUnsupportPartial)
def test_missing_columns(*, tmpdir: LocalPath) -> None:
    r"""
    Test transformation for CatBoost encoding with insufficient (categorical) columns.

    Args
    ----
    - tmpdir
        Temporary directory for this test.
        It is automatically provided by PyTest, so its value should not be explicitly defined.

    Returns
    -------
    """
    # Initialize testing transformation.
    root = str(tmpdir)
    factory = lib.transforms.FactoryTransform()

    # Generate inputs and outputs.
    transform = factory.from_args(IDENTIFIER)
    input = transform.input(None)
    output = transform.output(None)
    input_ = copy.deepcopy(input)
    input_[0]["category-missing"] = []
    example: Tuple[Input, Output]
    example = (input_, [])

    # Run test template.
    template_test_transform(
        root,
        IDENTIFIER,
        factory,
        example,
        input,
        output,
        to_eq_plural_ordered(eq_dataframe),
        to_eq_plural_ordered(eq_dataframe),
        fit_kwargs=dict(category_encoders_init_kwargs=dict(n_components=2)),
    )
