# Import Python packages.
import os
from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple, Type, TypeVar

# Import relatively from other modules.
from ...data import DataTabular
from ...io import download_file, mkdirs, rmfile
from .tabular import DatasetTabular


# Type aliases.
Input = Sequence[Tuple[str, Optional[str]]]
Output = List[DataTabular]


# Self types.
SelfDatasetTabularSimple = TypeVar("SelfDatasetTabularSimple", bound="DatasetTabularSimple")


class DatasetTabularSimple(DatasetTabular):
    r"""
    Dataset of simply structed tabular data.
    """
    # Transformation unique identifier.
    _IDENTIFIER = "dataset.tabular.simple"

    @classmethod
    def from_memalias(
        cls: Type[SelfDatasetTabularSimple],
        memory: Output,
        memory_names: Sequence[str],
        /,
        *args: Any,
        read_args: Mapping[str, Sequence[Any]] = {},
        read_kwargs: Mapping[str, Mapping[str, str]] = {},
        sorts: Optional[Tuple[str, str]] = None,
        **kwargs: Any,
    ) -> SelfDatasetTabularSimple:
        r"""
        Initialize the class from direct data memory aliasing.

        Args
        ----
        - memory
            Data memory.
        - memory_names
            Named indices of memory slots.
        - read_args
            A collection of positional arguments for reading data from named addresses.
            It is in final format that arguments are indexed by name of each memory slot.
        - read_kwargs
            A collection of keyword arguments for reading data from named addresses.
            It is in final format that arguments are indexed by name of each memory slot.
        - sorts
            Name indices of column and row sorting algorithms for disambiguiation on data from each
            named addresses.
            Sorting algorithm indices must match sorting algorithm definition in the initialization
            of this class.
            It is in final format that arguments are indexed by name of each memory slot.

        Returns
        -------
        - self
            Class instance itself.
        """
        # Create an empty dataset first.
        dataset = cls(*args, **kwargs)

        # Alias data memory.
        return dataset.memalias(
            memory,
            memory_names,
            *args,
            read_args=read_args,
            read_kwargs=read_kwargs,
            sorts=sorts,
            **kwargs,
        )

    def memalias(
        self: SelfDatasetTabularSimple,
        memory: Output,
        memory_names: Sequence[str],
        /,
        *args: Any,
        sorts: Optional[Tuple[str, str]] = None,
        read_args: Mapping[str, Sequence[Any]] = {},
        read_kwargs: Mapping[str, Mapping[str, str]] = {},
        **kwargs: Any,
    ) -> SelfDatasetTabularSimple:
        r"""
        Alias data memory by the memory of the class.

        Args
        ----
        - memory
            Data memory.
        - memory_names
            Named indices of memory slots.
        - sorts
            Column and row sorting algorithms for disambiguiation on homogeneous tabular data from
            each named addresses.
        - read_args
            A collection of positional arguments for reading data from named addresses.
            It is in final format that arguments are indexed by name of each memory slot.
        - read_kwargs
            A collection of keyword arguments for reading data from named addresses.
            It is in final format that arguments are indexed by name of each memory slot.

        Returns
        -------
        - self
            Class instance itself.
        """

        # Super call.
        DatasetTabular.memalias(self, memory, memory_names, *args, sorts=sorts, **kwargs)

        # Alias potentially mutable attributes.
        self._read_args = read_args
        self._read_kwargs = read_kwargs
        return self

    def memorize(
        self: SelfDatasetTabularSimple,
        addresses: Sequence[Tuple[str, Optional[str]]],
        /,
        *args: Any,
        sorts: Optional[Tuple[str, str]] = None,
        cache_read: bool = True,
        cache_relpaths: Mapping[str, str] = {},
        read_args: Sequence[Tuple[Sequence[str], Sequence[Any]]] = [],
        read_kwargs: Sequence[Tuple[Sequence[str], Mapping[str, Any]]] = [],
        read_silent_default: bool = True,
        **kwargs: Any,
    ) -> SelfDatasetTabularSimple:
        r"""
        Load structed content from addresses into memory.

        Args
        ----
        - addresses
            Named addresses of essential data.
        - sorts
            Column and row sorting algorithms for disambiguiation on homogeneous tabular data from
            each named addresses.
        - cache_read
            If True, non-local data (e.g., from URLs) will be cached into class cache directory on
            local file system.
        - cache_relpaths
            Relative local paths of caching data w.r.t. class cache directory.
            Index names should match with names from true addresses.
        - read_args
            A collection of positional arguments for reading data from named addresses.
            Each item of the collection is a pair of name groups and sharing positional arguments.
            Name groups of different items should not overlap.
        - read_kwargs
            A collection of keyword arguments for reading data from named addresses.
            Each item of the collection is a pair of name groups and sharing keyword arguments.
            Name groups of different items should not overlap.
        - read_silent_default
            If True, allow reading handlers to silently use default arguments of their own if
            reading arguments corresponding to address names are not provided.
            Otherwise, reading arguments of all address names must be explicitly provided.
            Pass empty positional and keyword arguments to explicitly use default arguments.

        Returns
        -------
        - self
            Class instance itself.
        """
        # Collect reading arguments and disambiguition sorting algorithms.
        if sorts is None:
            # This should be an assertion error.
            raise AssertionError(
                "For safety, silent tabular disambiguition is not allowed, but disambuigition"
                " sorting algorithms are not provided."
            )
        else:
            # For the sake of typing.
            sort_columns, sort_rows = sorts
        read_args_ = {**self._collect_read_args(read_args)}
        read_kwargs_ = {**self._collect_read_kwargs(read_kwargs)}

        # Traverse named addresses, and autofill default arguments if it is allowed.
        addresses_ = {}
        for name, address in addresses:
            # Explicitly save original address.
            assert address is not None, f'Address for tabular data "{name:s}" is missing.'
            addresses_[name] = address

            # Autofill default arguments if it is allowed.
            if read_silent_default and name not in read_args_:
                # Autofill positional arguments for focusing address name.
                read_args_[name] = []
            if read_silent_default and name not in read_kwargs_:
                # Autofill keyword arguments for focusing address name.
                read_kwargs_[name] = {}
            if not read_silent_default:
                # Ensure arguments are defined if autofilling default arguments is not allowed.
                assert (
                    name in read_args_
                ), f'Reading positional arguments are missing for address name "{name:s}".'
                assert (
                    name in read_kwargs_
                ), f'Reading keyword arguments are missing for address name "{name:s}".'

        # Read structed data into memory.
        memory = []
        memory_names = []
        for i, (name, address) in enumerate(addresses_.items()):
            # Ensure file system reading path is always local.
            if not os.path.isfile(address):
                # Save data to local file system, and overwrite address by caching path.
                path = os.path.join(self._cache, cache_relpaths[name])
                mkdirs(os.path.dirname(path))
                download_file(address, path)
            else:
                # Address is a local path.
                path = address

            # Read structed data from local path.
            data = DataTabular.from_csv(
                path,
                *read_args_[name],
                sort_columns=sort_columns,
                sort_rows=sort_rows,
                **read_kwargs_[name],
            )
            memory.append(data)
            memory_names.append(name)

            # Only when local path is different from address, caching must be handled.
            if path != address:
                # Handle cached data properly according to caching flags.
                if cache_read:
                    # Overwrite address by cached path.
                    addresses_[name] = path
                else:
                    # Otherwise, remove cached content.
                    rmfile(path)

        # Store finalized data memory and attributes.
        return self.memalias(
            memory,
            memory_names,
            addresses=addresses_,
            sorts=(sort_columns, sort_rows),
            read_args=read_args_,
            read_kwargs=read_kwargs_,
        )

    @classmethod
    def _collect_read_args(
        cls: Type[SelfDatasetTabularSimple],
        raw: Sequence[Tuple[Sequence[str], Sequence[Any]]],
        /,  # noqa: W504
    ) -> Mapping[str, Sequence[Any]]:
        r"""
        Collect formalized positional arguments for reading.

        Args
        ----
        - raw
            Raw positional arguments for reading.

        Returns
        -------
        - processed
            Processed positional arguments for reading.
        """
        # Collect positional arguments for reading.
        processed: Dict[str, Sequence[Any]]
        processed = {}
        for names, it in raw:
            # Save items corresponding to each name independently.
            for name in names:
                # Ensure items are defined without conflicts.
                assert (
                    name not in processed
                ), f'Reading positional arguments are duplicated for address name "{name:s}".'
                processed[name] = it
        return processed

    @classmethod
    def _collect_read_kwargs(
        cls: Type[SelfDatasetTabularSimple],
        raw: Sequence[Tuple[Sequence[str], Mapping[str, Any]]],
        /,
    ) -> Mapping[str, Mapping[str, Any]]:
        r"""
        Collect formalized keyword arguments for reading.

        Args
        ----
        - raw
            Raw keyword arguments for reading.

        Returns
        -------
        - processed
            Processed keyword arguments for reading.
        """
        # Collect keyword arguments for reading.
        processed: Dict[str, Mapping[str, Any]]
        processed = {}
        for names, it in raw:
            # Save items corresponding to each name independently.
            for name in names:
                # Ensure items are defined without conflicts.
                assert (
                    name not in processed
                ), f'Reading keyword arguments are duplicated for address name "{name:s}".'
                processed[name] = it
        return processed

    def get_memalias_args(self: SelfDatasetTabularSimple, /) -> Sequence[Any]:
        r"""
        Get positional arguments for memory aliasing the dataset.

        Args
        ----

        Returns
        -------
        - args
            Positional arguments for memory aliasing.
        """
        # Collect essential attributes as arguments.
        return [*DatasetTabular.get_memalias_args(self)]

    def get_memalias_kwargs(self: SelfDatasetTabularSimple, /) -> Mapping[str, Any]:
        r"""
        Get keyword arguments for memory aliasing the dataset.

        Args
        ----

        Returns
        -------
        - kwargs
            Keyword arguments for memory aliasing.
        """
        # Collect essential attributes as arguments.
        return {
            "sorts": self._sorts,
            "read_args": self._read_args,
            "read_kwargs": self._read_kwargs,
            **DatasetTabular.get_memalias_kwargs(self),
        }

    def get_alphabetic_data(self: SelfDatasetTabularSimple, /) -> Mapping[str, Any]:
        r"""
        Get alphabetic data of the transformation.

        Args
        ----

        Returns
        -------
        - data
            Alphabetic data of the transformation.
        """
        # Collect all reading arguments.
        return {
            "sorts": self._sorts,
            "read_args": self._read_args,
            "read_kwargs": self._read_kwargs,
            **DatasetTabular.get_alphabetic_data(self),
        }

    def set_alphabetic_data(
        self: SelfDatasetTabularSimple, data: Mapping[str, Any], /  # noqa: W504
    ) -> SelfDatasetTabularSimple:
        r"""
        Set alphabetic data of the transformation.

        Args
        ----
        - data
            Alphabetic data of the transformation.

        Returns
        -------
        - self
            Class instance itself.
        """
        # Super call.
        DatasetTabular.set_alphabetic_data(self, data)

        # Safety check.
        assert "sorts" in data, "Sorting algorithms for data from addresses are missing."
        assert "read_args" in data, "Positional arguments for reading from addresses are missing."
        assert "read_kwargs" in data, "Keyword arguments for reading from addresses are missing."

        # Parse loaded metadata.
        sort_columns, sort_rows = data["sorts"]
        read_args = data["read_args"]
        read_kwargs = data["read_kwargs"]

        # Load data into memory.
        if self.link:
            # Linking dataset does not have processed numeric data, and can only load raw data into
            # memory.
            # Processed data should already be autofilled, so silent autofilling is not allowed.
            # Caching should have been done during processed data preparation, so caching is not
            # allowed.
            self.memorize(
                [(name, address) for name, address in self._addresses.items()],
                sorts=(sort_columns, sort_rows),
                read_args=[([name], it_read_args) for name, it_read_args in read_args.items()],
                read_kwargs=[
                    ([name], it_read_kwargs) for name, it_read_kwargs in read_kwargs.items()
                ],
                read_silent_default=False,
                cache_read=False,
            )
        else:
            # Overwrite attributes by loaded metadata.
            self._sorts = (sort_columns, sort_rows)
            self._read_args = read_args
            self._read_kwargs = read_kwargs

            # Reconstruct tabular data containers in the memory.
            self.memory = [
                DataTabular(self._content[name], sort_columns=sort_columns, sort_rows=sort_rows)
                for i, name in enumerate(self.memory_names)
            ]
        del self._content
        return self
