# https://github.com/gmingas/DataSynthesiser
# https://github.com/gmingas/DataSynthesiser/blob/master/docs/cr-datasynthesizer-privacy.pdf


import json
import os
import numpy as np
import pandas as pd
import random
import sys

from DataSynthesizer.DataDescriber import DataDescriber
from DataSynthesizer.DataGenerator import DataGenerator
from DataSynthesizer.ModelInspector import ModelInspector
from DataSynthesizer.lib.utils import display_bayesian_network


def PrivBayes_runner(
    seed,
    epsilon,
    delta,
    n_s,
    df_,
    category_threshold,
    k,
    histogram_bins,
    in_path,
    out_dir,
    wandb,
    model_id,
    scaler,
    other_categorical=[],
    keys=[],
):

    random.seed(seed)
    np.random.seed(seed)
    np.random.default_rng(seed)

    if df_.nunique().iloc[-1] < 11:
        x_train = pd.DataFrame(scaler.inverse_transform(df_.iloc[:, :-1]))
        x_train[x_train.shape[1]] = df_.iloc[:, -1]
    else:
        x_train = pd.DataFrame(scaler.inverse_transform(df_))

    x_train.columns = x_train.columns.astype(str)

    integer_types = ["int_", "intp", "int8", "int16", "int32", "int64"]
    float_types = ["float_", "float16", "float32", "float64"]
    dt_types = ["datetime"]
    obj_types = ["object"]

    print(
        x_train.columns[x_train.nunique() < category_threshold][
            np.in1d(
                x_train.columns[x_train.nunique() < category_threshold],
                x_train.select_dtypes(include=integer_types).columns,
            )
        ]
    )
    categorical_cols = np.union1d(
        x_train.columns[x_train.isin([0, 1]).all()].values.astype("str"),
        x_train.columns[x_train.nunique() < category_threshold][
            np.in1d(
                x_train.columns[x_train.nunique() < category_threshold],
                x_train.select_dtypes(include=integer_types).columns,
            )
        ].astype("str"),
    )
    categorical_cols = np.union1d(categorical_cols, other_categorical)
    categorical_attributes = {col: col in categorical_cols for col in x_train.columns}
    candidate_keys = {col: col in keys for col in x_train.columns}
    integer_attributes = {
        col: "Integer" for col in x_train.select_dtypes(include=integer_types).columns
    }
    float_attributes = {
        col: "Float" for col in x_train.select_dtypes(include=float_types).columns
    }
    dt_attributes = {
        col: "DateTime" for col in x_train.select_dtypes(include=dt_types).columns
    }
    str_attributes = {
        col: "String" for col in x_train.select_dtypes(include=obj_types).columns
    }

    # Combine all datatypes into one dictionary
    datatypes = {
        **integer_attributes,
        **float_attributes,
        **dt_attributes,
        **str_attributes,
    }

    describer = DataDescriber(
        category_threshold=category_threshold, histogram_bins=histogram_bins
    )

    # Train the Bayesian network
    describer.describe_dataset_in_correlated_attribute_mode(
        dataset_file=in_path,
        epsilon=epsilon,
        k=k,
        attribute_to_datatype=datatypes,
        attribute_to_is_categorical=categorical_attributes,
        attribute_to_is_candidate_key=candidate_keys,
        seed=seed,
        # bayesian_network=preconfigured_bn
    )

    # write and print output
    description_file = os.path.join(out_dir, "description.json")
    data_file = os.path.join(out_dir, "synthetic_data.csv")
    describer.save_dataset_description_to_file(description_file)
    display_bayesian_network(describer.bayesian_network)

    generator = DataGenerator()
    generator.generate_dataset_in_correlated_attribute_mode(n_s, description_file)
    generator.save_synthetic_data(data_file)

    synth_data = pd.read_csv(data_file, index_col=None)
    if df_.nunique().iloc[-1] < 11:
        synth_data.iloc[:, :-1] = scaler.transform(synth_data.iloc[:, :-1])
    else:
        synth_data = scaler.transform(synth_data)
    return synth_data, None
