# pylint: skip-file
from typing import List
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from pandas.tseries import offsets
from pandas.tseries.frequencies import to_offset
from tqdm import tqdm
import torch


regression_datasets = [
    "AustraliaRainfall",
    "HouseholdPowerConsumption1",
    "HouseholdPowerConsumption2",
    "BeijingPM25Quality",
    "BeijingPM10Quality",
    "Covid3Month",
    "LiveFuelMoistureContent",
    "FloodModeling1",
    "FloodModeling2",
    "FloodModeling3",
    "AppliancesEnergy",
    "BenzeneConcentration",
    "NewsHeadlineSentiment",
    "NewsTitleSentiment",
    "BIDMC32RR",
    "BIDMC32HR",
    "BIDMC32SpO2",
    "IEEEPPG",
    "PPGDalia",
]


def uniform_scaling(data, max_len):
    """
    This is a function to scale the time series uniformly
    :param data:
    :param max_len:
    :return:
    """
    seq_len = len(data)
    scaled_data = [data[int(j * seq_len / max_len)] for j in range(max_len)]

    return scaled_data


# The following code is adapted from the python package sktime to read .ts file.
class TsFileParseException(Exception):
    """
    Should be raised when parsing a .ts file and the format is incorrect.
    """

    pass


def load_from_tsfile_to_dataframe(
    full_file_path_and_name,
    return_separate_X_and_y=True,
    replace_missing_vals_with="NaN",
):
    """Loads data from a .ts file into a Pandas DataFrame.
    Parameters
    ----------
    full_file_path_and_name: str
        The full pathname of the .ts file to read.
    return_separate_X_and_y: bool
        true if X and Y values should be returned as separate Data Frames (X) and a numpy array (y), false otherwise.
        This is only relevant for data that
    replace_missing_vals_with: str
       The value that missing values in the text file should be replaced with prior to parsing.
    Returns
    -------
    DataFrame, ndarray
        If return_separate_X_and_y then a tuple containing a DataFrame and a numpy array containing the relevant 
        time-series and corresponding class values.
    DataFrame
        If not return_separate_X_and_y then a single DataFrame containing all time-series and (if relevant) a column 
        "class_vals" the associated class values.
    """

    # Initialize flags and variables used when parsing the file
    metadata_started = False
    data_started = False

    has_problem_name_tag = False
    has_timestamps_tag = False
    has_univariate_tag = False
    has_class_labels_tag = False
    has_target_labels_tag = False
    has_data_tag = False

    previous_timestamp_was_float = None
    previous_timestamp_was_int = None
    previous_timestamp_was_timestamp = None
    num_dimensions = None
    is_first_case = True
    instance_list = []
    class_val_list = []
    line_num = 0

    # Parse the file

    encoder = "utf-8"
    if "BIDMC" in full_file_path_and_name:
        encoder = "ISO-8859-1"
    with open(full_file_path_and_name, "r", encoding=encoder) as file:
        for line in tqdm(file):
            # Strip white space from start/end of line and change to lowercase for use below
            line = line.strip().lower()
            # Empty lines are valid at any point in a file
            if not line:
                line_num += 1
                continue
            
            # Check if this line contains metadata
            # Please note that even though metadata is stored in this function it is not currently published externally
            if line.startswith("@problemname"):
                # Check that the data has not started
                if data_started:
                    raise TsFileParseException("metadata must come before data")
                # Check that the associated value is valid
                tokens = line.split(" ")
                token_len = len(tokens)

                if token_len == 1:
                    raise TsFileParseException(
                        "problemname tag requires an associated value"
                    )

                line[len("@problemname") + 1 :]
                has_problem_name_tag = True
                metadata_started = True
            elif line.startswith("@timestamps"):
                # Check that the data has not started
                if data_started:
                    raise TsFileParseException("metadata must come before data")

                # Check that the associated value is valid
                tokens = line.split(" ")
                token_len = len(tokens)

                if token_len != 2:
                    raise TsFileParseException(
                        "timestamps tag requires an associated Boolean value"
                    )
                elif tokens[1] == "true":
                    timestamps = True
                elif tokens[1] == "false":
                    timestamps = False
                else:
                    raise TsFileParseException("invalid timestamps value")
                has_timestamps_tag = True
                metadata_started = True
            elif line.startswith("@univariate"):
                # Check that the data has not started
                if data_started:
                    raise TsFileParseException("metadata must come before data")

                # Check that the associated value is valid
                tokens = line.split(" ")
                token_len = len(tokens)
                if token_len != 2:
                    raise TsFileParseException(
                        "univariate tag requires an associated Boolean value"
                    )
                elif tokens[1] == "true":
                    _ = True
                elif tokens[1] == "false":
                    _ = False
                else:
                    raise TsFileParseException("invalid univariate value")

                has_univariate_tag = True
                metadata_started = True
            elif line.startswith("@classlabel"):
                # Check that the data has not started
                if data_started:
                    raise TsFileParseException("metadata must come before data")

                # Check that the associated value is valid
                tokens = line.split(" ")
                token_len = len(tokens)

                if token_len == 1:
                    raise TsFileParseException(
                        "classlabel tag requires an associated Boolean value"
                    )

                if tokens[1] == "true":
                    class_labels = True
                elif tokens[1] == "false":
                    class_labels = False
                else:
                    raise TsFileParseException("invalid classLabel value")

                # Check if we have any associated class values
                if token_len == 2 and class_labels:
                    raise TsFileParseException(
                        "if the classlabel tag is true then class values must be supplied"
                    )

                has_class_labels_tag = True
                [token.strip() for token in tokens[2:]]
                metadata_started = True
            elif line.startswith("@targetlabel"):
                # Check that the data has not started
                if data_started:
                    raise TsFileParseException("metadata must come before data")

                # Check that the associated value is valid
                tokens = line.split(" ")
                token_len = len(tokens)

                if token_len == 1:
                    raise TsFileParseException(
                        "targetlabel tag requires an associated Boolean value"
                    )

                if tokens[1] == "true":
                    target_labels = True
                elif tokens[1] == "false":
                    target_labels = False
                else:
                    raise TsFileParseException("invalid targetLabel value")

                has_target_labels_tag = True
                class_val_list = []
                metadata_started = True
            # Check if this line contains the start of data
            elif line.startswith("@data"):
                if line != "@data":
                    raise TsFileParseException(
                        "data tag should not have an associated value"
                    )

                if data_started and not metadata_started:
                    raise TsFileParseException("metadata must come before data")
                else:
                    has_data_tag = True
                    data_started = True
            # If the 'data tag has been found then metadata has been parsed and data can be loaded
            elif data_started:
                # Check that a full set of metadata has been provided
                incomplete_regression_meta_data = (
                    not has_problem_name_tag
                    or not has_timestamps_tag
                    or not has_univariate_tag
                    or not has_target_labels_tag
                    or not has_data_tag
                )
                incomplete_classification_meta_data = (
                    not has_problem_name_tag
                    or not has_timestamps_tag
                    or not has_univariate_tag
                    or not has_class_labels_tag
                    or not has_data_tag
                )
                if (
                    incomplete_regression_meta_data
                    and incomplete_classification_meta_data
                ):
                    raise TsFileParseException(
                        "a full set of metadata has not been provided before the data"
                    )

                # Replace any missing values with the value specified
                line = line.replace("?", replace_missing_vals_with)

                # Check if we dealing with data that has timestamps
                if timestamps:
                    # We're dealing with timestamps so cannot just split line on ':' as timestamps may contain one
                    has_another_value = False
                    has_another_dimension = False

                    timestamps_for_dimension = []
                    values_for_dimension = []

                    this_line_num_dimensions = 0
                    line_len = len(line)
                    char_num = 0

                    while char_num < line_len:
                        # Move through any spaces
                        while char_num < line_len and str.isspace(line[char_num]):
                            char_num += 1

                        # See if there is any more data to read in or if we should validate that read thus far

                        if char_num < line_len:
                            # See if we have an empty dimension (i.e. no values)
                            if line[char_num] == ":":
                                if len(instance_list) < (
                                    this_line_num_dimensions + 1
                                ):
                                    instance_list.append([])

                                instance_list[this_line_num_dimensions].append(
                                    pd.Series()
                                )
                                this_line_num_dimensions += 1

                                has_another_value = False
                                has_another_dimension = True

                                timestamps_for_dimension = []
                                values_for_dimension = []

                                char_num += 1
                            else:
                                # Check if we have reached a class label
                                if line[char_num] != "(" and target_labels:
                                    class_val = line[char_num:].strip()

                                    # if class_val not in class_val_list:
                                    #     raise TsFileParseException(
                                    #         "the class value '" + class_val + "' on line " + str(
                                    #             line_num + 1) + " is not valid")

                                    class_val_list.append(float(class_val))
                                    char_num = line_len

                                    has_another_value = False
                                    has_another_dimension = False

                                    timestamps_for_dimension = []
                                    values_for_dimension = []

                                else:
                                    # Read in the data contained within the next tuple

                                    if line[char_num] != "(" and not target_labels:
                                        raise TsFileParseException(
                                            "dimension "
                                            + str(this_line_num_dimensions + 1)
                                            + " on line "
                                            + str(line_num + 1)
                                            + " does not start with a '('"
                                        )

                                    char_num += 1
                                    tuple_data = ""

                                    while (
                                        char_num < line_len
                                        and line[char_num] != ")"
                                    ):
                                        tuple_data += line[char_num]
                                        char_num += 1

                                    if (
                                        char_num >= line_len
                                        or line[char_num] != ")"
                                    ):
                                        raise TsFileParseException(
                                            "dimension "
                                            + str(this_line_num_dimensions + 1)
                                            + " on line "
                                            + str(line_num + 1)
                                            + " does not end with a ')'"
                                        )

                                    # Read in any spaces immediately after the current tuple

                                    char_num += 1

                                    while char_num < line_len and str.isspace(
                                        line[char_num]
                                    ):
                                        char_num += 1

                                    # Check if there is another value or dimension to process after this tuple

                                    if char_num >= line_len:
                                        has_another_value = False
                                        has_another_dimension = False

                                    elif line[char_num] == ",":
                                        has_another_value = True
                                        has_another_dimension = False

                                    elif line[char_num] == ":":
                                        has_another_value = False
                                        has_another_dimension = True

                                    char_num += 1

                                    # Get the numeric value for the tuple by reading from the end of the tuple data backwards to the last comma

                                    last_comma_index = tuple_data.rfind(",")

                                    if last_comma_index == -1:
                                        raise TsFileParseException(
                                            "dimension "
                                            + str(this_line_num_dimensions + 1)
                                            + " on line "
                                            + str(line_num + 1)
                                            + " contains a tuple that has no comma inside of it"
                                        )

                                    try:
                                        value = tuple_data[last_comma_index + 1 :]
                                        value = float(value)

                                    except ValueError:
                                        raise TsFileParseException(
                                            "dimension "
                                            + str(this_line_num_dimensions + 1)
                                            + " on line "
                                            + str(line_num + 1)
                                            + " contains a tuple that does not have a valid numeric value"
                                        )

                                    # Check the type of timestamp that we have

                                    timestamp = tuple_data[0:last_comma_index]

                                    try:
                                        timestamp = int(timestamp)
                                        timestamp_is_int = True
                                        timestamp_is_timestamp = False
                                    except ValueError:
                                        timestamp_is_int = False

                                    if not timestamp_is_int:
                                        try:
                                            timestamp = float(timestamp)
                                            timestamp_is_float = True
                                            timestamp_is_timestamp = False
                                        except ValueError:
                                            timestamp_is_float = False

                                    if (
                                        not timestamp_is_int
                                        and not timestamp_is_float
                                    ):
                                        try:
                                            timestamp = timestamp.strip()
                                            timestamp_is_timestamp = True
                                        except ValueError:
                                            timestamp_is_timestamp = False

                                    # Make sure that the timestamps in the file (not just this dimension or case) are consistent

                                    if (
                                        not timestamp_is_timestamp
                                        and not timestamp_is_int
                                        and not timestamp_is_float
                                    ):
                                        raise TsFileParseException(
                                            "dimension "
                                            + str(this_line_num_dimensions + 1)
                                            + " on line "
                                            + str(line_num + 1)
                                            + " contains a tuple that has an invalid timestamp '"
                                            + timestamp
                                            + "'"
                                        )

                                    if (
                                        previous_timestamp_was_float is not None
                                        and previous_timestamp_was_float
                                        and not timestamp_is_float
                                    ):
                                        raise TsFileParseException(
                                            "dimension "
                                            + str(this_line_num_dimensions + 1)
                                            + " on line "
                                            + str(line_num + 1)
                                            + " contains tuples where the timestamp format is inconsistent"
                                        )

                                    if (
                                        previous_timestamp_was_int is not None
                                        and previous_timestamp_was_int
                                        and not timestamp_is_int
                                    ):
                                        raise TsFileParseException(
                                            "dimension "
                                            + str(this_line_num_dimensions + 1)
                                            + " on line "
                                            + str(line_num + 1)
                                            + " contains tuples where the timestamp format is inconsistent"
                                        )

                                    if (
                                        previous_timestamp_was_timestamp is not None
                                        and previous_timestamp_was_timestamp
                                        and not timestamp_is_timestamp
                                    ):
                                        raise TsFileParseException(
                                            "dimension "
                                            + str(this_line_num_dimensions + 1)
                                            + " on line "
                                            + str(line_num + 1)
                                            + " contains tuples where the timestamp format is inconsistent"
                                        )

                                    # Store the values

                                    timestamps_for_dimension += [timestamp]
                                    values_for_dimension += [value]

                                    #  If this was our first tuple then we store the type of timestamp we had

                                    if (
                                        previous_timestamp_was_timestamp is None
                                        and timestamp_is_timestamp
                                    ):
                                        previous_timestamp_was_timestamp = True
                                        previous_timestamp_was_int = False
                                        previous_timestamp_was_float = False

                                    if (
                                        previous_timestamp_was_int is None
                                        and timestamp_is_int
                                    ):
                                        previous_timestamp_was_timestamp = False
                                        previous_timestamp_was_int = True
                                        previous_timestamp_was_float = False

                                    if (
                                        previous_timestamp_was_float is None
                                        and timestamp_is_float
                                    ):
                                        previous_timestamp_was_timestamp = False
                                        previous_timestamp_was_int = False
                                        previous_timestamp_was_float = True

                                    # See if we should add the data for this dimension

                                    if not has_another_value:
                                        if len(instance_list) < (
                                            this_line_num_dimensions + 1
                                        ):
                                            instance_list.append([])

                                        if timestamp_is_timestamp:
                                            timestamps_for_dimension = (
                                                pd.DatetimeIndex(
                                                    timestamps_for_dimension
                                                )
                                            )

                                        instance_list[
                                            this_line_num_dimensions
                                        ].append(
                                            pd.Series(
                                                index=timestamps_for_dimension,
                                                data=values_for_dimension,
                                            )
                                        )
                                        this_line_num_dimensions += 1

                                        timestamps_for_dimension = []
                                        values_for_dimension = []

                        elif has_another_value:
                            raise TsFileParseException(
                                "dimension "
                                + str(this_line_num_dimensions + 1)
                                + " on line "
                                + str(line_num + 1)
                                + " ends with a ',' that is not followed by another tuple"
                            )

                        elif has_another_dimension and target_labels:
                            raise TsFileParseException(
                                "dimension "
                                + str(this_line_num_dimensions + 1)
                                + " on line "
                                + str(line_num + 1)
                                + " ends with a ':' while it should list a class value"
                            )

                        elif has_another_dimension and not target_labels:
                            if len(instance_list) < (this_line_num_dimensions + 1):
                                instance_list.append([])

                            instance_list[this_line_num_dimensions].append(
                                pd.Series(dtype=np.float32)
                            )
                            this_line_num_dimensions += 1
                            num_dimensions = this_line_num_dimensions

                        # If this is the 1st line of data we have seen then note the dimensions

                        if not has_another_value and not has_another_dimension:
                            if num_dimensions is None:
                                num_dimensions = this_line_num_dimensions

                            if num_dimensions != this_line_num_dimensions:
                                raise TsFileParseException(
                                    "line "
                                    + str(line_num + 1)
                                    + " does not have the same number of dimensions as the previous line of data"
                                )

                    # Check that we are not expecting some more data, and if not, store that processed above

                    if has_another_value:
                        raise TsFileParseException(
                            "dimension "
                            + str(this_line_num_dimensions + 1)
                            + " on line "
                            + str(line_num + 1)
                            + " ends with a ',' that is not followed by another tuple"
                        )

                    elif has_another_dimension and target_labels:
                        raise TsFileParseException(
                            "dimension "
                            + str(this_line_num_dimensions + 1)
                            + " on line "
                            + str(line_num + 1)
                            + " ends with a ':' while it should list a class value"
                        )

                    elif has_another_dimension and not target_labels:
                        if len(instance_list) < (this_line_num_dimensions + 1):
                            instance_list.append([])

                        instance_list[this_line_num_dimensions].append(pd.Series())
                        this_line_num_dimensions += 1
                        num_dimensions = this_line_num_dimensions

                    # If this is the 1st line of data we have seen then note the dimensions

                    if (
                        not has_another_value
                        and num_dimensions != this_line_num_dimensions
                    ):
                        raise TsFileParseException(
                            "line "
                            + str(line_num + 1)
                            + " does not have the same number of dimensions as the previous line of data"
                        )

                    # Check if we should have class values, and if so that they are contained in those listed in the metadata

                    if target_labels and len(class_val_list) == 0:
                        raise TsFileParseException(
                            "the cases have no associated class values"
                        )
                else:
                    dimensions = line.split(":")
                    # If first row then note the number of dimensions (that must be the same for all cases)
                    if is_first_case:
                        num_dimensions = len(dimensions)

                        if target_labels:
                            num_dimensions -= 1

                        for _ in range(0, num_dimensions):
                            instance_list.append([])
                        is_first_case = False

                    # See how many dimensions that the case whose data in represented in this line has
                    this_line_num_dimensions = len(dimensions)

                    if target_labels:
                        this_line_num_dimensions -= 1

                    # All dimensions should be included for all series, even if they are empty
                    if this_line_num_dimensions != num_dimensions:
                        raise TsFileParseException(
                            "inconsistent number of dimensions. Expecting "
                            + str(num_dimensions)
                            + " but have read "
                            + str(this_line_num_dimensions)
                        )

                    # Process the data for each dimension
                    for dim in range(0, num_dimensions):
                        dimension = dimensions[dim].strip()

                        if dimension:
                            data_series = dimension.split(",")
                            data_series = [float(i) for i in data_series]
                            instance_list[dim].append(pd.Series(data_series))
                        else:
                            instance_list[dim].append(pd.Series())

                    if target_labels:
                        class_val_list.append(
                            float(dimensions[num_dimensions].strip())
                        )
                        
            line_num += 1

    # Check that the file was not empty
    if line_num:
        # Check that the file contained both metadata and data
        complete_regression_meta_data = (
            has_problem_name_tag
            and has_timestamps_tag
            and has_univariate_tag
            and has_target_labels_tag
            and has_data_tag
        )
        complete_classification_meta_data = (
            has_problem_name_tag
            and has_timestamps_tag
            and has_univariate_tag
            and has_class_labels_tag
            and has_data_tag
        )

        if (
            metadata_started
            and not complete_regression_meta_data
            and not complete_classification_meta_data
        ):
            raise TsFileParseException("metadata incomplete")
        elif metadata_started and not data_started:
            raise TsFileParseException("file contained metadata but no data")
        elif metadata_started and data_started and len(instance_list) == 0:
            raise TsFileParseException("file contained metadata but no data")

        # Create a DataFrame from the data parsed above
        data = pd.DataFrame(dtype=np.float32)

        for dim in range(0, num_dimensions):
            data["dim_" + str(dim)] = instance_list[dim]

        # Check if we should return any associated class labels separately

        if target_labels:
            if return_separate_X_and_y:
                return data, np.asarray(class_val_list)
            else:
                data["class_vals"] = pd.Series(class_val_list)
                return data
        else:
            return data
    else:
        raise TsFileParseException("empty file")


def process_data(X, min_len, normalise=None):
    """
    This is a function to process the data, i.e. convert dataframe to numpy array
    :param X:
    :param min_len:
    :param normalise:
    :return:
    """
    tmp = []
    for i in tqdm(range(len(X))):
        _x = X.iloc[i, :].copy(deep=True)

        # 1. find the maximum length of each dimension
        all_len = [len(y) for y in _x]
        max_len = max(all_len)

        # 2. adjust the length of each dimension
        _y = []
        for y in _x:
            # 2.1 fill missing values
            if y.isnull().any():
                y = y.interpolate(method="linear", limit_direction="both")

            # 2.2. if length of each dimension is different, uniformly scale the shorter ones to the max length
            if len(y) < max_len:
                y = uniform_scaling(y, max_len)
            _y.append(y)
        _y = np.array(np.transpose(_y))

        # 3. adjust the length of the series, chop of the longer series
        _y = _y[:min_len, :]

        # 4. normalise the series
        if normalise == "standard":
            scaler = StandardScaler().fit(_y)
            _y = scaler.transform(_y)
        if normalise == "minmax":
            scaler = MinMaxScaler().fit(_y)
            _y = scaler.transform(_y)

        tmp.append(_y)
    X = np.array(tmp)
    return X


class StandardScaler:
    def __init__(self):
        self.mean = 0.0
        self.std = 1.0

    def fit(self, data):
        self.mean = data.mean(0)
        self.std = data.std(0)

    def transform(self, data):
        mean = (
            torch.from_numpy(self.mean).type_as(data).to(data.device)
            if torch.is_tensor(data)
            else self.mean
        )
        std = (
            torch.from_numpy(self.std).type_as(data).to(data.device)
            if torch.is_tensor(data)
            else self.std
        )
        return (data - mean) / std

    def inverse_transform(self, data):
        mean = (
            torch.from_numpy(self.mean).type_as(data).to(data.device)
            if torch.is_tensor(data)
            else self.mean
        )
        std = (
            torch.from_numpy(self.std).type_as(data).to(data.device)
            if torch.is_tensor(data)
            else self.std
        )
        return (data * std) + mean


class TimeFeature:
    def __init__(self):
        pass

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        pass

    def __repr__(self):
        return self.__class__.__name__ + "()"


class SecondOfMinute(TimeFeature):
    """Minute of hour encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return index.second / 59.0 - 0.5


class MinuteOfHour(TimeFeature):
    """Minute of hour encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return index.minute / 59.0 - 0.5


class HourOfDay(TimeFeature):
    """Hour of day encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return index.hour / 23.0 - 0.5


class DayOfWeek(TimeFeature):
    """Hour of day encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return index.dayofweek / 6.0 - 0.5


class DayOfMonth(TimeFeature):
    """Day of month encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return (index.day - 1) / 30.0 - 0.5


class DayOfYear(TimeFeature):
    """Day of year encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return (index.dayofyear - 1) / 365.0 - 0.5


class MonthOfYear(TimeFeature):
    """Month of year encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return (index.month - 1) / 11.0 - 0.5


class WeekOfYear(TimeFeature):
    """Week of year encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return (index.isocalendar().week - 1) / 52.0 - 0.5


def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:
    """
    Returns a list of time features that will be appropriate for the given frequency string.
    Parameters
    ----------
    freq_str
        Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc.
    """

    features_by_offsets = {
        offsets.YearEnd: [],
        offsets.QuarterEnd: [MonthOfYear],
        offsets.MonthEnd: [MonthOfYear],
        offsets.Week: [DayOfMonth, WeekOfYear],
        offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear],
        offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear],
        offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear],
        offsets.Minute: [
            MinuteOfHour,
            HourOfDay,
            DayOfWeek,
            DayOfMonth,
            DayOfYear,
        ],
        offsets.Second: [
            SecondOfMinute,
            MinuteOfHour,
            HourOfDay,
            DayOfWeek,
            DayOfMonth,
            DayOfYear,
        ],
    }

    offset = to_offset(freq_str)

    for offset_type, feature_classes in features_by_offsets.items():
        if isinstance(offset, offset_type):
            return [cls() for cls in feature_classes]

    supported_freq_msg = f"""
    Unsupported frequency {freq_str}
    The following frequencies are supported:
        Y   - yearly
            alias: A
        M   - monthly
        W   - weekly
        D   - daily
        B   - business days
        H   - hourly
        T   - minutely
            alias: min
        S   - secondly
    """
    raise RuntimeError(supported_freq_msg)


def time_features(dates, timeenc=1, freq="h") -> np.ndarray:
    """
    > `time_features` takes in a `dates` dataframe with a 'dates' column and extracts the date down to `freq` where freq can be any of the following if `timeenc` is 0:
    > * m - [month]
    > * w - [month]
    > * d - [month, day, weekday]
    > * b - [month, day, weekday]
    > * h - [month, day, weekday, hour]
    > * t - [month, day, weekday, hour, *minute]
    >
    > If `timeenc` is 1, a similar, but different list of `freq` values are supported (all encoded between [-0.5 and 0.5]):
    > * Q - [month]
    > * M - [month]
    > * W - [Day of month, week of year]
    > * D - [Day of week, day of month, day of year]
    > * B - [Day of week, day of month, day of year]
    > * H - [Hour of day, day of week, day of month, day of year]
    > * T - [Minute of hour*, hour of day, day of week, day of month, day of year]
    > * S - [Second of minute, minute of hour, hour of day, day of week, day of month, day of year]

    *minute returns a number from 0-3 corresponding to the 15 minute period it falls into.
    """
    if timeenc == 0:
        dates["month"] = dates.date.apply(lambda row: row.month, 1)
        dates["day"] = dates.date.apply(lambda row: row.day, 1)
        dates["weekday"] = dates.date.apply(lambda row: row.weekday(), 1)
        dates["hour"] = dates.date.apply(lambda row: row.hour, 1)
        dates["minute"] = dates.date.apply(lambda row: row.minute, 1)
        dates["minute"] = dates.minute.map(lambda x: x // 15)
        freq_map = {
            "y": [],
            "m": ["month"],
            "w": ["month"],
            "d": ["month", "day", "weekday"],
            "b": ["month", "day", "weekday"],
            "h": ["month", "day", "weekday", "hour"],
            "t": ["month", "day", "weekday", "hour", "minute"],
        }
        return dates[freq_map[freq.lower()]].values
    else:
        dates = pd.to_datetime(dates.date.values)
        return np.vstack(
            [feat(dates) for feat in time_features_from_frequency_str(freq)]
        ).transpose(1, 0)
