# Import Python modules.
import abc
from typing import Any, List, Mapping, Optional, Sequence, Tuple, Type, TypeVar, cast

# Import relatively from other modules.
from ..data import BaseData
from ..transforms import BaseTransform, ErrorTransformUnsupportPartial, TypeTransform


# Type variables.
Data = TypeVar("Data", bound="BaseData[Any]")


# Type aliases.
Input = Sequence[Tuple[str, Optional[str]]]
Output = List[Data]


# Self types.
SelfTypeDataset = TypeVar("SelfTypeDataset", bound="TypeDataset")
SelfBaseDataset = TypeVar("SelfBaseDataset", bound="BaseDataset[Any]")


class TypeDataset(TypeTransform):
    r"""
    Metaclass of dataset.
    """

    @staticmethod
    def __new__(cls: Type["TypeDataset"], /, *args: Any, **kwargs: Any) -> Type["BaseDataset[Any]"]:
        r"""
        Create a new instance of the class.

        Args
        ----
        - cls
            The class (type) of creating new instance.
            The new instance is first created as generic (any) type, then is casted into the class
            (type).

        Returns
        -------
        - obj
            A new instance casted by the class (type).
        """
        # Create a class by super call.
        # Pay attention that create a type class requires casting.
        dataset = cast(Type["BaseDataset[Any]"], abc.ABCMeta.__new__(cls, *args, **kwargs))

        # Register the transformation class.
        identifier = dataset._IDENTIFIER
        assert identifier.startswith("_") or (
            identifier.startswith("dataset.") and len(identifier.split(".")) == 3
        ), (
            f'Registering dataset identifier "{identifier:s}" is not of format'
            f' "dataset.structure.title".'
        )
        TypeTransform.register_transform(dataset, identifier)
        return dataset


class BaseDataset(BaseTransform[Input, Output[Data]], metaclass=TypeDataset):
    r"""
    Base of dataset.
    Datasets are special transformations transforming (loading) string addresses (e.g., paths, URLs)
    into structed data in memory or file system.
    """
    # Transformation unique identifier.
    _IDENTIFIER = "_dataset"

    def __annotate__(self: SelfBaseDataset, /) -> None:
        r"""
        Annotate attributes at instance level.

        Args
        ----

        Returns
        -------
        """
        # Annotations at instance level.
        self._addresses: Mapping[str, str]
        self.memory: Sequence[Data]
        self.memory_names: Sequence[str]
        self._memory_indices: Mapping[str, int]

    def __init__(self: SelfBaseDataset, /, *args: Any, link: bool = True, **kwargs: Any) -> None:
        r"""
        Initialize the class.

        Args
        ----
        - link
            If True, the dataset will be treated as a link of original data, thus when we save the
            dataset, it will only save addresses of original data.
            If False, the dataset will create a copy of original data under its own format for
            saving.

        Returns
        -------
        """
        # Super call.
        BaseTransform.__init__(self, *args, **kwargs)

        # Save essential attributes.
        self.link = link

    @classmethod
    @abc.abstractmethod
    def from_memalias(
        cls: Type[SelfBaseDataset],
        memory: Output[Data],
        memory_names: Sequence[str],
        /,
        *args: Any,
        addresses: Mapping[str, str] = {},
        **kwargs: Any,
    ) -> SelfBaseDataset:
        r"""
        Initialize the class from direct data memory aliasing.

        Args
        ----
        - memory
            Data memory.
        - memory_names
            Named indices of memory slots.
        - addresses
            Named addresses corresponding to memory slots respectively.

        Returns
        -------
        - self
            Class instance itself.
        """

    @classmethod
    def from_storage(
        cls: Type[SelfBaseDataset], addresses: Input, /, *args: Any, **kwargs: Any
    ) -> SelfBaseDataset:
        r"""
        Initialize the class from storage addresses.

        Args
        ----
        - addresses
            Named addresses of essential data.

        Returns
        -------
        - self
            Class instance itself.
        """
        # Create an empty dataset first.
        dataset = cls(*args, **kwargs)

        # Load dataset into memory.
        return dataset.fit(addresses, [], *args, **kwargs)

    def input(self: SelfBaseDataset, raw: Any, /) -> Input:
        r"""
        Convert raw data into input to the transformation.

        Args
        ----
        - raw
            Raw data.

        Returns
        -------
        - process
            Processed data compatible with the transformation.
        """
        # Conversion will vary according to raw data.
        if raw is None:
            # Empty dataset has nothing in memory, thus no configurations are required.
            return []
        else:
            # All the other cases are not supported.
            raise ErrorTransformUnsupportPartial(
                f"Try to formalize incompatible raw data into input domain of"
                f' "{self._IDENTIFIER:s}".'
            )

    def output(self: SelfBaseDataset, raw: Any, /) -> Output[Data]:
        r"""
        Convert raw data into output from the transformation.

        Args
        ----
        - raw
            Raw data.

        Returns
        -------
        - process
            Processed data compatible with the transformation.
        """
        # Conversion will vary according to raw data.
        if raw is None:
            # Empty dataset has nothing in memory, thus nothing can be accessible.
            return []
        else:
            # All the other cases are not supported.
            raise ErrorTransformUnsupportPartial(
                f"Try to formalize incompatible raw data into output domain of"
                f' "{self._IDENTIFIER:s}".'
            )

    def transform(
        self: SelfBaseDataset, input: Input, /, *args: Any, **kwargs: Any
    ) -> Output[Data]:
        r"""
        Transform input into output without inplacement.

        Args
        ----
        - input
            Input to the transformation.

        Returns
        -------
        - output
            Output from the transformation.
        """
        # Null input should be specially handled.
        if not input:
            # Null input should result in null output.
            return self.output(None)

        # Safety check before fetching data corresponding to input.
        missing = set(name for name, _ in input) - set(self._addresses.keys())
        assert not missing, "Fail to find named data: {:s}.".format(
            ", ".join(f'"{name:s}"' for name in missing)
        )
        inconsistent = [
            (name, address)
            for name, address in input
            if address is not None and address != self._addresses[name]
        ]
        assert (
            not inconsistent
        ), "Detect inconsistent addresses between querying and in-memory named data: {:s}".format(
            ", ".join(
                f'"{name:s}" ("{str(address):s}" defies "{str(self._addresses[name]):s}")'
                for name, address in inconsistent
            )
        )

        # Construct a memory collection of fetching data.
        return [self.memory[self._memory_indices[name]] for name, _ in input]

    def fit(
        self: SelfBaseDataset, input: Input, output: Output[Data], /, *args: Any, **kwargs: Any
    ) -> SelfBaseDataset:
        r"""
        Fit transformation parameters by example input and output.

        Args
        ----
        - input
            Example input to the transformation.
        - output
            Example output from the transformation.

        Returns
        -------
        - self
            Class instance itself.
        """
        # Null input should be specially handled.
        if not input:
            # Do nothing.
            return self

        # Load dataset into memory.
        return self.memorize(input, *args, **kwargs)

    def memalias(
        self: SelfBaseDataset,
        memory: Output[Data],
        memory_names: Sequence[str],
        /,
        *args: Any,
        addresses: Mapping[str, str] = {},
        **kwargs: Any,
    ) -> SelfBaseDataset:
        r"""
        Alias data memory by the memory of the class.

        Args
        ----
        - memory
            Data memory.
        - memory_names
            Named indices of memory slots.
        - addresses
            Named addresses corresponding to memory slots respectively.

        Returns
        -------
        - self
            Class instance itself.
        """
        # Alias all attributes from the arguments.
        self.memory = memory
        self.memory_names = memory_names
        self._memory_indices = {name: i for i, name in enumerate(self.memory_names)}

        # Alias potentially mutable attributes.
        self._addresses = addresses
        return self

    @abc.abstractmethod
    def memorize(
        self: SelfBaseDataset,
        addresses: Input,
        /,
        *args: Any,
        cache_read: bool = True,
        cache_relpaths: Mapping[str, str] = {},
        **kwargs: Any,
    ) -> SelfBaseDataset:
        r"""
        Load structed content from addresses into memory.

        Args
        ----
        - addresses
            Named addresses of essential data.
        - cache_read
            If True, non-local data (e.g., from URLs) will be cached into class cache directory on
            local file system.
        - cache_relpaths
            Relative local paths of caching data w.r.t. class cache directory.
            Index names should match with names from true addresses.

        Returns
        -------
        - self
            Class instance itself.
        """

    def get_memalias_args(self: SelfBaseDataset, /) -> Sequence[Any]:
        r"""
        Get positional arguments for memory aliasing the dataset.

        Args
        ----

        Returns
        -------
        - args
            Positional arguments for memory aliasing.
        """
        # Collect essential attributes as arguments.
        return [self.memory, self.memory_names]

    def get_memalias_kwargs(self: SelfBaseDataset, /) -> Mapping[str, Any]:
        r"""
        Get keyword arguments for memory aliasing the dataset.

        Args
        ----

        Returns
        -------
        - kwargs
            Keyword arguments for memory aliasing.
        """
        # Collect essential attributes as arguments.
        return {"addresses": self._addresses}

    def get_metadata(self: SelfBaseDataset, /) -> Mapping[str, Any]:
        r"""
        Get metadata of the transformation.

        Args
        ----

        Returns
        -------
        - metadata
            Metadata of the transformation.
        """
        # Collect essential attributes as metadata.
        return {"link": self.link}

    def get_alphabetic_data(self: SelfBaseDataset, /) -> Mapping[str, Any]:
        r"""
        Get alphabetic data of the transformation.

        Args
        ----

        Returns
        -------
        - data
            Alphabetic data of the transformation.
        """
        # Collect all reading arguments.
        return {"addresses": self._addresses, "memory_names": self.memory_names}

    def set_metadata(
        self: SelfBaseDataset, metadata: Mapping[str, Any], /  # noqa: W504
    ) -> SelfBaseDataset:
        r"""
        Set metadata of the transformation.

        Args
        ----
        - metadata
            Metadata of the transformation.

        Returns
        -------
        - self
            Class instance itself.
        """
        # Safety check.
        assert "link" in metadata, "Linking flag is missing."

        # Overwrite attributes by loaded metadata.
        self.link = metadata["link"]
        return self

    def set_alphabetic_data(
        self: SelfBaseDataset, data: Mapping[str, Any], /  # noqa: W504
    ) -> SelfBaseDataset:
        r"""
        Set alphabetic data of the transformation.

        Args
        ----
        - data
            Alphabetic data of the transformation.

        Returns
        -------
        - self
            Class instance itself.
        """
        # Safety check.
        assert "addresses" in data, "Source data file addresses are missing."
        assert "memory_names" in data, "Memory named indices are missing."

        # Overwrite attributes by loaded metadata.
        self._addresses = data["addresses"]
        self.memory_names = data["memory_names"]
        self._memory_indices = {name: i for i, name in enumerate(self.memory_names)}
        return self
