import numpy as np
import pandas as pd
import os
from sklearn.datasets import load_iris, fetch_kddcup99
from lib.constants import DATASETS_DIR


def add_random_noise(df: pd.DataFrame) -> pd.DataFrame:
    """Add random noise to the dataframe.

    Args:
        df (pd.DataFrame): The dataframe to add noise to.

    Returns:
        pd.DataFrame: The dataframe with noise added.
    """
    # The minimum nonzero difference between consecutive values
    noise_level = min(df[df.diff().abs() > 0].diff().abs().min().values[0], 1e-5)
    for i in range(df.shape[1]):
        df.iloc[:, i] = df.iloc[:, i] + np.random.uniform(
            -noise_level / 3,
            noise_level / 3,
            len(df),
        )
    return df


def dump_synthetic_data(sample_size: int = 100):
    data = {}
    data["Normal"] = np.random.normal(0, 1, (sample_size,))
    data["Uniform"] = np.random.uniform(0, 1, (sample_size,))
    data["Exponential"] = np.random.exponential(1, (sample_size,))
    df = pd.DataFrame(data)
    return df


def dump_iris_data():
    iris = load_iris(as_frame=True)
    return add_random_noise(iris.data)


def dump_http_data():
    http = fetch_kddcup99(subset="http", as_frame=True)
    return add_random_noise(http.data)


def dump_smtp_data():
    smtp = fetch_kddcup99(subset="smtp", as_frame=True)
    return add_random_noise(smtp.data)


def main():
    os.makedirs(DATASETS_DIR, exist_ok=True)
    datasets = {
        "synthetic": dump_synthetic_data(),
        "iris": dump_iris_data(),
        "http": dump_http_data(),
        "smtp": dump_smtp_data(),
    }

    for name, df in datasets.items():
        df.to_csv(f"{DATASETS_DIR}/{name}.csv", index=False)


if __name__ == "__main__":
    main()
