# Import Python packages.
import copy
from typing import Any, List, Mapping, Optional, Tuple

# Import external packages.
import numpy as np
import pandas as pd

# Import PyTest packagtes.
import pytest

# Import PyTest external packages.
from py._path.local import LocalPath

# Import developing library.
import fin_tech_py_toolkit as lib

# Import testing library.
from ....utils import eq_dataframe, to_eq_plural_ordered
from ...utils import template_test_io, template_test_transform


# Type aliases.
Input = List[pd.DataFrame]
Output = List[pd.DataFrame]


# Runtime constants.
IDENTIFIER = lib.transforms.TransformSDVPandas._IDENTIFIER


def synthesize(
    *, unk: int, ood: Optional[str], seed: int
) -> Tuple[Tuple[Input, Output], Input, Output, Mapping[str, Any]]:
    r"""
    Synthesize test I/O.

    Args
    ----
    - unk
        Default value for unknown or rare categories.
    - ood
        Category representation reserved for out-of-distribution.
        If it is null, out-of-distribution is not allowed.
    - seed
        Random seed used in encoding generation.

    Returns
    -------
    - example
        Input and output examples.
    - input
        Input case.
    - output
        Output case.
    - supplement
        Supplementary materical for synthesized test.
    """
    # Create the categorical columns.
    categorical1 = ["010", "010", "010", "020", "030"]
    categorical2 = ['"X"', '"X"', '"X"', '"Y"', '"Z"']
    categorical3 = ["A", "B", "C", "D", "E"]

    # Create out-of-distribution categorical columns.
    categorical1_ = ["040"]
    categorical2_ = ['"U"']
    categorical3_ = ["F"]

    # Create encoding parameters.
    pdfs = np.array([1.0, 1.5, 3.0])
    cdfs = np.array([0.0, 0.0, 0.0, 0.0])
    pdfs = pdfs / pdfs.sum()
    cdfs[1:] = np.cumsum(pdfs)
    lowers = cdfs[:-1]
    uppers = cdfs[1:]
    means = (lowers + uppers) / 2.0
    stds = (uppers - lowers) / 6.0
    encodings = {
        "020": {"categorical1-sdv": (float(means[0]), float(stds[0]))},
        "030": {"categorical1-sdv": (float(means[1]), float(stds[1]))},
        "010": {"categorical1-sdv": (float(means[2]), float(stds[2]))},
        '"Y"': {"categorical2-sdv": (float(means[0]), float(stds[0]))},
        '"Z"': {"categorical2-sdv": (float(means[1]), float(stds[1]))},
        '"X"': {"categorical2-sdv": (float(means[2]), float(stds[2]))},
    }
    encodings_ = {
        str(ood): {"categorical1-sdv": (float(unk), 0.0), "categorical2-sdv": (float(unk), 0.0)}
    }

    # Extend encodings with out-of-distribution encodings.
    encodings = {**encodings, **encodings_}

    # Create input and output examples.
    example_input: Input
    example_input = [
        pd.DataFrame({"categorical1": categorical1, "categorical2": categorical2}),
        pd.DataFrame([], columns=[], index=range(5)),
        pd.DataFrame([], columns=[]),
        pd.DataFrame([], columns=[]),
    ]
    example_output: Output
    example_output = []

    # Input case.
    input: Input
    input = [
        pd.DataFrame(
            {
                "categorical1": categorical1 + ([] if ood is None else categorical1_),
                "categorical2": categorical2 + ([] if ood is None else categorical2_),
                "categorical3": categorical3 + ([] if ood is None else categorical3_),
            }
        ),
        pd.DataFrame([], columns=[], index=range(5 + int(ood is not None))),
        pd.DataFrame([], columns=[]),
        pd.DataFrame([], columns=[]),
    ]

    # For output case, only out-of-distribution symbol remains.
    categorical1_ = [str(ood)]
    categorical2_ = [str(ood)]
    categorical3_ = ["F"]

    # Output case.
    rng = np.random.RandomState(seed)
    output: Output
    output = [
        pd.DataFrame({"categorical3": categorical3 + ([] if ood is None else categorical3_)}),
        pd.DataFrame(
            {
                "categorical1-sdv": [
                    encodings[cell]["categorical1-sdv"][0]
                    + noise * encodings[cell]["categorical1-sdv"][1]
                    for cell, noise in zip(
                        categorical1 + ([] if ood is None else categorical1_),
                        rng.normal(0.0, 1.0, (len(categorical1) + int(ood is not None))),
                    )
                ],
                "categorical2-sdv": [
                    encodings[cell]["categorical2-sdv"][0]
                    + noise * encodings[cell]["categorical2-sdv"][1]
                    for cell, noise in zip(
                        categorical2 + ([] if ood is None else categorical2_),
                        rng.normal(0.0, 1.0, (len(categorical2) + int(ood is not None))),
                    )
                ],
            }
        ),
        pd.DataFrame([], columns=[]),
        pd.DataFrame([], columns=[]),
    ]
    return (example_input, example_output), input, output, {}


@pytest.mark.parametrize(
    ("raw_input", "raw_output"),
    [
        pytest.param(
            ...,
            None,
            id="unsupport-input",
            marks=[pytest.mark.xfail(raises=lib.transforms.ErrorTransformUnsupportPartial)],
        ),
        pytest.param(
            None,
            ...,
            id="unsupport-output",
            marks=[pytest.mark.xfail(raises=lib.transforms.ErrorTransformUnsupportPartial)],
        ),
        pytest.param(None, None, id="both-null"),
    ],
)
def test_io(*, raw_input: Any, raw_output: Any) -> None:
    r"""
    Test transformation input and output domain formalization.

    Args
    ----
    - raw_input
        Raw input.
    - raw_output
        Raw output.

    Returns
    -------
    """
    # Initialize testing transformation.
    factory = lib.transforms.FactoryTransform()

    # Run test template.
    template_test_io(
        IDENTIFIER,
        factory,
        raw_input,
        raw_output,
        to_eq_plural_ordered(eq_dataframe),
        to_eq_plural_ordered(eq_dataframe),
    )


@pytest.mark.parametrize(
    ("unk", "ood"), [pytest.param(0, None, id="default"), pytest.param(0, "<unk>", id="ood")]
)
def test_default(*, tmpdir: LocalPath, unk: int, ood: Optional[str]) -> None:
    r"""
    Test transformation for count encoding on Pandas data.

    Args
    ----
    - tmpdir
        Temporary directory for this test.
        It is automatically provided by PyTest, so its value should not be explicitly defined.
    - unk
        Default value for unknown or rare categories.
    - ood
        Category representation reserved for out-of-distribution.
        If it is null, out-of-distribution is not allowed.

    Returns
    -------
    """
    # Initialize testing transformation.
    root = str(tmpdir)
    factory = lib.transforms.FactoryTransform()

    # Generate inputs and outputs.
    example, input, output, _ = synthesize(unk=unk, ood=ood, seed=42)

    # Run test template.
    template_test_transform(
        root,
        IDENTIFIER,
        factory,
        example,
        input,
        output,
        to_eq_plural_ordered(eq_dataframe),
        to_eq_plural_ordered(eq_dataframe),
        fit_kwargs=dict(unk=unk, ood=ood),
        transform_kwargs=dict(seed=42),
    )


def test_empty(*, tmpdir: LocalPath) -> None:
    r"""
    Test transformation for count encoding on empty Pandas data.

    Args
    ----
    - tmpdir
        Temporary directory for this test.
        It is automatically provided by PyTest, so its value should not be explicitly defined.

    Returns
    -------
    """
    # Initialize testing transformation.
    root = str(tmpdir)
    factory = lib.transforms.FactoryTransform()

    # Generate inputs and outputs.
    transform = factory.from_args(IDENTIFIER)
    input = transform.input(None)
    output = transform.output(None)
    example: Tuple[Input, Output]
    example = (input, [])

    # Run test template.
    template_test_transform(
        root,
        IDENTIFIER,
        factory,
        example,
        input,
        output,
        to_eq_plural_ordered(eq_dataframe),
        to_eq_plural_ordered(eq_dataframe),
    )


@pytest.mark.xfail(raises=lib.transforms.ErrorTransformUnsupportPartial)
def test_missing_columns(*, tmpdir: LocalPath) -> None:
    r"""
    Test transformation for count encoding with insufficient (categorical) columns.

    Args
    ----
    - tmpdir
        Temporary directory for this test.
        It is automatically provided by PyTest, so its value should not be explicitly defined.

    Returns
    -------
    """
    # Initialize testing transformation.
    root = str(tmpdir)
    factory = lib.transforms.FactoryTransform()

    # Generate inputs and outputs.
    transform = factory.from_args(IDENTIFIER)
    input = transform.input(None)
    output = transform.output(None)
    input_ = copy.deepcopy(input)
    input_[0]["category-missing"] = []
    example: Tuple[Input, Output]
    example = (input_, [])

    # Run test template.
    template_test_transform(
        root,
        IDENTIFIER,
        factory,
        example,
        input,
        output,
        to_eq_plural_ordered(eq_dataframe),
        to_eq_plural_ordered(eq_dataframe),
    )
