import numpy as np
import pandas as pd
from algo.iforest import IForest
from algo.get_expected_depth_1d import get_depths
from lib.constants import (
    N_TREES_RANGE,
    DATASETS,
    N_TRIALS,
    SUBSAMPLE_SIZE,
)
from lib.runner import Runner
from lib.result_manager import ResultManager
from lib.utils import subsample


def calc_mse_error(X: np.ndarray, n_trees: int) -> float:
    """Calculate the mse of the depths generated by iForest and ExpectedDepth.

    Args:
        X (np.ndarray): The points to be calculated.
        n_trees (int): The number of trees.

    Returns:
        float: The mse.
    """
    isolation_forest = IForest(n_trees)
    isolation_forest.fit(X)
    isolation_forest_depths = isolation_forest.get_depths(X)

    expected_depth_depths = get_depths(X.reshape(-1))

    return float(np.square(isolation_forest_depths - expected_depth_depths).mean())


def get_target_func(X: np.ndarray, n_trees: int) -> callable:
    def func():
        return calc_mse_error(X, n_trees=n_trees)

    return func


def main():
    result_manager = ResultManager()
    runner = Runner(n_trials=N_TRIALS)

    for dataset, path in DATASETS.items():
        df = pd.read_csv(path)
        for n_trees in N_TREES_RANGE:
            for feature in df.columns:
                task_name = f"{dataset}_n_trees_{n_trees}_feature_{feature}"
                if result_manager.check_results_exist(dataset, n_trees, feature):
                    print(f"Skipping {task_name} because it already exists")
                else:
                    result_manager.clean_existing_results(dataset, n_trees, feature)
                    X = df[feature].values
                    X = subsample(X).reshape(-1, 1)
                    target_func = get_target_func(X, n_trees)
                    runner.run(target_func, task_name)
                    result_manager.push_results(
                        dataset, n_trees, feature, runner.values
                    )


if __name__ == "__main__":
    main()
