# Import Python packages.
import importlib.util as imputil
import os
import sys
from types import ModuleType
from typing import Any, Dict, Tuple, Union

# Import external packages.
import catboost  # type: ignore[import-untyped]
import lightgbm
import numpy as np
import pandas as pd
import xgboost
from sklearn.ensemble import IsolationForest  # type: ignore[import-untyped]
from sklearn.linear_model import SGDOneClassSVM  # type: ignore[import-untyped]
from sklearn.neighbors import LocalOutlierFactor  # type: ignore[import-untyped]
from sklearn.svm import OneClassSVM  # type: ignore[import-untyped]


def rcimport(relpath: str, /) -> ModuleType:
    r"""
    Runtime command import.

    Args
    ----
    - relpath
        Relative path of importing module w.r.t. current file.

    Returns
    -------
    - module
        Module.
    """
    # Load module from path.
    path = os.path.join(os.path.abspath(os.path.join(os.path.dirname(__file__), relpath)))
    name, _ = os.path.splitext(os.path.basename(path))
    spec = imputil.spec_from_file_location(name, path)
    assert spec is not None
    module = imputil.module_from_spec(spec)
    sys.modules[name] = module
    loader = spec.loader
    assert loader is not None
    loader.exec_module(module)
    return module


# Get evaluation module.
evaluate_ = rcimport("evaluate.py")


def instance_xgboost(
    train: Tuple[pd.DataFrame, pd.DataFrame],
    valid: Tuple[pd.DataFrame, pd.DataFrame],
    test: Tuple[pd.DataFrame, pd.DataFrame],
    /,
    *,
    weight_pos: float = 1.0,
    num_leaves: int = 32,
    max_depth: int = 6,
    learning_rate: float = 0.1,
    num_boost_round: int = 10,
) -> Dict[str, Any]:
    r"""
    Experiment instance with a XGBoost model.

    Args
    ----
    - train
        Training features and labels.
    - valid
        Validation features and labels.
    - test
        Test features and labels.
    - weight_pos
        Positive label weight.
    - num_leaves
        Maximum leaves of a tree.
    - max_depth
        Maximum depth of a tree.
    - learning_rate
        Learning rate.
    - num_boost_round
        Maximum number of trees.

    Returns
    -------
    - profile
        Experiment profile.
    """
    # Parse data.
    train_features, train_labels = train
    valid_features, valid_labels = valid
    test_features, test_labels = test

    # Fit the model by training and validation data.
    model = xgboost.train(
        params=dict(
            max_leaves=num_leaves,
            max_depth=max_depth,
            learning_rate=learning_rate,
            seed=42,
            objective="binary:logistic",
            scale_pos_weight=weight_pos,
            verbosity=0,
        ),
        num_boost_round=num_boost_round,
        dtrain=xgboost.DMatrix(
            train_features, label=np.reshape(train_labels.values, len(train_labels))
        ),
        evals=[
            (
                xgboost.DMatrix(
                    valid_features, label=np.reshape(valid_labels.values, len(valid_labels))
                ),
                "valid",
            )
        ],
        verbose_eval=False,
    )

    # Make inference and evaluate on validation and test data.
    valid_scores = model.predict(xgboost.DMatrix(valid_features))
    test_scores = model.predict(xgboost.DMatrix(test_features))
    profile = {
        **{
            f"valid-{key:s}": value
            for key, value in evaluate_.evaluate(
                np.reshape(valid_scores, len(valid_scores)),
                np.reshape(valid_labels.values, len(valid_labels)),
            ).items()
        },
        **{
            f"test-{key:s}": value
            for key, value in evaluate_.evaluate(
                np.reshape(test_scores, len(test_scores)),
                np.reshape(test_labels.values, len(test_labels)),
            ).items()
        },
    }
    return profile


def instance_catboost(
    train: Tuple[pd.DataFrame, pd.DataFrame],
    valid: Tuple[pd.DataFrame, pd.DataFrame],
    test: Tuple[pd.DataFrame, pd.DataFrame],
    /,
    *,
    weight_pos: float = 1.0,
    depth: int = 6,
    learning_rate: float = 0.1,
    iterations: int = 10,
) -> Dict[str, Any]:
    r"""
    Experiment instance with a CatBoost model.

    Args
    ----
    - train
        Training features and labels.
    - valid
        Validation features and labels.
    - test
        Test features and labels.
    - weight_pos
        Positive label weight.
    - depth
        Maximum depth of a tree.
    - learning_rate
        Learning rate.
    - iterations
        Maximum number of trees.

    Returns
    -------
    - profile
        Experiment profile.
    """
    # Parse data.
    train_features, train_labels = train
    valid_features, valid_labels = valid
    test_features, test_labels = test

    # Fit the model by training and validation data.
    model = catboost.CatBoostClassifier(
        depth=depth,
        learning_rate=learning_rate,
        random_seed=42,
        iterations=iterations,
        loss_function="Logloss",
        scale_pos_weight=weight_pos,
        silent=True,
        allow_writing_files=False,
    )
    model.fit(
        train_features,
        np.reshape(train_labels.values, len(train_labels)),
        eval_set=(valid_features, np.reshape(valid_labels.values, len(valid_labels))),
        silent=True,
    )

    # Make inference and evaluate on validation and test data.
    valid_scores = model.predict_proba(valid_features)[:, 1]
    test_scores = model.predict_proba(test_features)[:, 1]
    profile = {
        **{
            f"valid-{key:s}": value
            for key, value in evaluate_.evaluate(
                np.reshape(valid_scores, len(valid_scores)),
                np.reshape(valid_labels.values, len(valid_labels)),
            ).items()
        },
        **{
            f"test-{key:s}": value
            for key, value in evaluate_.evaluate(
                np.reshape(test_scores, len(test_scores)),
                np.reshape(test_labels.values, len(test_labels)),
            ).items()
        },
    }
    return profile


def instance_lightgbm(
    train: Tuple[pd.DataFrame, pd.DataFrame],
    valid: Tuple[pd.DataFrame, pd.DataFrame],
    test: Tuple[pd.DataFrame, pd.DataFrame],
    /,
    *,
    weight_pos: float = 1.0,
    num_leaves: int = 32,
    max_depth: int = 6,
    learning_rate: float = 0.1,
    num_boost_round: int = 10,
) -> Dict[str, Any]:
    r"""
    Experiment instance with a XGBoost model.

    Args
    ----
    - train
        Training features and labels.
    - valid
        Validation features and labels.
    - test
        Test features and labels.
    - weight_pos
        Positive label weight.
    - num_leaves
        Maximum leaves of a tree.
    - max_depth
        Maximum depth of a tree.
    - learning_rate
        Learning rate.
    - num_boost_round
        Maximum number of trees.

    Returns
    -------
    - profile
        Experiment profile.
    """
    # Parse data.
    train_features, train_labels = train
    valid_features, valid_labels = valid
    test_features, test_labels = test

    # Fit the model by training and validation data.
    model = lightgbm.train(
        params=dict(
            num_leaves=num_leaves,
            max_depth=max_depth,
            learning_rate=learning_rate,
            seed=42,
            objective="binary",
            scale_pos_weight=weight_pos,
            verbosity=-1,
        ),
        num_boost_round=num_boost_round,
        train_set=lightgbm.Dataset(
            train_features, label=np.reshape(train_labels.values, len(train_labels))
        ),
        valid_sets=[
            lightgbm.Dataset(
                valid_features, label=np.reshape(valid_labels.values, len(valid_labels))
            )
        ],
        valid_names=["valid"],
    )

    # Make inference and evaluate on validation and test data.
    valid_scores = model.predict(valid_features)
    test_scores = model.predict(test_features)
    profile = {
        **{
            f"valid-{key:s}": value
            for key, value in evaluate_.evaluate(
                np.reshape(valid_scores, len(valid_scores)),
                np.reshape(valid_labels.values, len(valid_labels)),
            ).items()
        },
        **{
            f"test-{key:s}": value
            for key, value in evaluate_.evaluate(
                np.reshape(test_scores, len(test_scores)),
                np.reshape(test_labels.values, len(test_labels)),
            ).items()
        },
    }
    return profile


def instance_one_class_svm(
    train: Tuple[pd.DataFrame, pd.DataFrame],
    valid: Tuple[pd.DataFrame, pd.DataFrame],
    test: Tuple[pd.DataFrame, pd.DataFrame],
    /,
    *,
    weight_pos: float = 1.0,
    outlier_fraction: float = 0.15,
    kernel: str = "rbf",
) -> Dict[str, Any]:
    r"""
    Experiment instance with an One-class SVM model.

    Args
    ----
    - train
        Training features and labels.
    - valid
        Validation features and labels.
    - test
        Test features and labels.
    - weight_pos
        Positive label weight.
    - outiler_fraction
        Outlier proportion assumed in tuning data.
    - kernel
        SVM kernel type.

    Returns
    -------
    - profile
        Experiment profile.
    """
    # Parse data.
    train_features, train_labels = train
    valid_features, valid_labels = train
    test_features, test_labels = test

    # Fit the model accordingly by tuning data size.
    tune_features = pd.concat([train_features, valid_features], axis=0)
    if len(tune_features) > 50_000:
        # Large dataset has to use a SGD approximation.
        model = SGDOneClassSVM(nu=outlier_fraction, random_state=42)
        if kernel != "rbf":
            # Non-Gaussian kernel can not be approximated in such way.
            return {
                **{
                    f"valid-{key:s}": float("-inf")
                    for key in ["rocauc", "ap", "f1", "acc", "prec", "recl"]
                },
                **{
                    f"test-{key:s}": float("-inf")
                    for key in ["rocauc", "ap", "f1", "acc", "prec", "recl"]
                },
            }
    else:
        # Otherwise, regular size dataset can be tuned directly.
        model = OneClassSVM(nu=outlier_fraction, kernel=kernel)
    model.fit(tune_features)

    # Make inference and evaluate on tuning and test data.
    valid_scores = model.predict(valid_features)
    valid_scores = 1.0 - (valid_scores + 1.0) * 0.5
    test_scores = model.predict(test_features)
    test_scores = 1.0 - (test_scores + 1.0) * 0.5
    profile = {
        **{
            f"valid-{key:s}": value
            for key, value in evaluate_.evaluate(
                np.reshape(valid_scores, len(valid_scores)),
                np.reshape(valid_labels.values, len(valid_labels)),
            ).items()
        },
        **{
            f"test-{key:s}": value
            for key, value in evaluate_.evaluate(
                np.reshape(test_scores, len(test_scores)),
                np.reshape(test_labels.values, len(test_labels)),
            ).items()
        },
    }
    return profile


def instance_isolation_forest(
    train: Tuple[pd.DataFrame, pd.DataFrame],
    valid: Tuple[pd.DataFrame, pd.DataFrame],
    test: Tuple[pd.DataFrame, pd.DataFrame],
    /,
    *,
    weight_pos: float = 1.0,
    outlier_fraction: Union[float, str] = "auto",
    n_estimators: int = 100,
    max_samples: Union[float, str] = "auto",
) -> Dict[str, Any]:
    r"""
    Experiment instance with an Isolation Forest model.

    Args
    ----
    - train
        Training features and labels.
    - valid
        Validation features and labels.
    - test
        Test features and labels.
    - weight_pos
        Positive label weight.
    - outiler_fraction
        Outlier proportion assumed in tuning data.
    - n_estimators
        Maximum number of trees.
    - max_samples
        Maximum number of learning samples.

    Returns
    -------
    - profile
        Experiment profile.
    """
    # Parse data.
    train_features, train_labels = train
    valid_features, valid_labels = train
    test_features, test_labels = test

    # Fit the model by tuning data.
    tune_features = pd.concat([train_features, valid_features], axis=0)
    model = IsolationForest(
        contamination=outlier_fraction,
        n_estimators=n_estimators,
        max_samples=max_samples,
        random_state=42,
    )
    model.fit(tune_features)

    # Make inference and evaluate on tuning and test data.
    valid_scores = model.predict(valid_features)
    valid_scores = 1.0 - (valid_scores + 1.0) * 0.5
    test_scores = model.predict(test_features)
    test_scores = 1.0 - (test_scores + 1.0) * 0.5
    profile = {
        **{
            f"valid-{key:s}": value
            for key, value in evaluate_.evaluate(
                np.reshape(valid_scores, len(valid_scores)),
                np.reshape(valid_labels.values, len(valid_labels)),
            ).items()
        },
        **{
            f"test-{key:s}": value
            for key, value in evaluate_.evaluate(
                np.reshape(test_scores, len(test_scores)),
                np.reshape(test_labels.values, len(test_labels)),
            ).items()
        },
    }
    return profile


def instance_local_outlier_factor(
    train: Tuple[pd.DataFrame, pd.DataFrame],
    valid: Tuple[pd.DataFrame, pd.DataFrame],
    test: Tuple[pd.DataFrame, pd.DataFrame],
    /,
    *,
    weight_pos: float = 1.0,
    outlier_fraction: Union[float, str] = "auto",
    n_neighbors: int = 20,
    leaf_size: int = 30,
) -> Dict[str, Any]:
    r"""
    Experiment instance with a Local Outlier Factor model.

    Args
    ----
    - train
        Training features and labels.
    - valid
        Validation features and labels.
    - test
        Test features and labels.
    - weight_pos
        Positive label weight.
    - outiler_fraction
        Outlier proportion assumed in tuning data.
    - n_neighbors
        Number of neighbors for K-neighbor queries.
    - leaf_size
        Leaf size passed to tree algorithm.

    Returns
    -------
    - profile
        Experiment profile.
    """
    # Parse data.
    train_features, train_labels = train
    valid_features, valid_labels = train
    test_features, test_labels = test

    # Fit the model by tuning data.
    tune_features = pd.concat([train_features, valid_features], axis=0)
    full_features = pd.concat([tune_features, test_features], axis=0)
    num_trains = len(train_features)
    num_tunes = len(tune_features)
    model = LocalOutlierFactor(
        contamination=outlier_fraction, n_neighbors=n_neighbors, leaf_size=leaf_size
    )

    # Make inference and evaluate on tuning and test data.
    valid_scores = model.fit_predict(tune_features)[num_trains:]
    valid_scores = 1.0 - (valid_scores + 1.0) * 0.5
    test_scores = model.fit_predict(full_features)[num_tunes:]
    test_scores = 1.0 - (test_scores + 1.0) * 0.5
    profile = {
        **{
            f"valid-{key:s}": value
            for key, value in evaluate_.evaluate(
                np.reshape(valid_scores, len(valid_scores)),
                np.reshape(valid_labels.values, len(valid_labels)),
            ).items()
        },
        **{
            f"test-{key:s}": value
            for key, value in evaluate_.evaluate(
                np.reshape(test_scores, len(test_scores)),
                np.reshape(test_labels.values, len(test_labels)),
            ).items()
        },
    }
    return profile
