import numpy as np
from scipy.spatial.distance import pdist
from scipy.cluster.hierarchy import linkage
from scipy.cluster.hierarchy import fcluster
from utility_functions import *
from sklearn.feature_selection import VarianceThreshold
from sklearn.metrics import roc_auc_score, root_mean_squared_error
from xgboost import XGBClassifier, XGBRegressor
import time
from loguru import logger
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
import re


class icfesl:

    @staticmethod
    def group_categorical_features(X:pd.DataFrame, x_vars:list, distance_threshold:float):
        """
        group features based on Hamming distance

        :param X: input DataFrame to cluster features on
        :param x_vars: the list of features to cluster
        :param distance_threshold: Hamming distance threshold to cluster features
        :return: clustered features
        """

        grouping = pd.DataFrame(columns=['feature','cluster'])
        grouping.feature = x_vars
        distance_matrix = pdist(X[x_vars].to_numpy().T, metric='hamming')
        linked = linkage(distance_matrix, method='complete')
        grouping.cluster = fcluster(linked, t=distance_threshold, criterion='distance')
        res = {}
        for c in np.unique(grouping.cluster):
            res[c] = grouping.loc[grouping['cluster']==c,'feature'].tolist()
        return res

    @staticmethod
    def combine_features(X:pd.DataFrame, grouping:dict, group_name:dict=None):
        """
        function to combine features based on grouping

        :param X: DataFrame before combining
        :param grouping: grouping schema
        :param group_name: names of the groups
        :return: DataFrame after combining
        """

        if group_name == None:
            group_name = {}
            for key in grouping.keys():
                newkey = grouping[key][0]
                group_name[newkey] = grouping[key]
            grouping = group_name

        df_ = X.copy()
        for group in grouping.keys():
            members = grouping[group]
            if len(members) == 1:
                pass
            else:
                df_ = df_.drop(members, axis=1)

                newcolumn = pd.Series(np.array([False for s in range(X.shape[0])]))
                for member in members:
                    if member not in X.columns:
                        print(f'column {member} not in input X: ignored...')
                    else:
                        newcolumn = newcolumn | X[member]

                if df_.shape[0] > 0:
                    df_ = pd.concat([df_, pd.DataFrame(newcolumn.astype(int), columns=[group])], axis=1)
                else:
                    df_ = newcolumn
        return df_


    @staticmethod
    def collapse_categorical_features(coeffs:pd.DataFrame, dpdx_n:list, H_n:dict, coef_col:str, cat_vars:list, X:pd.DataFrame, y:pd.Series, separator:str='::', mtype:str='regression', alpha:bool=0.05, min_obs:int=10, stop_criterion:str='auto', intertia_opt:float=0.05, inertia_cutoff:float=0.05):
        """
        function to cluster one-hot-encoded features

        :param coeffs: coefficient vector of OHE features
        :param dpdx_n: list of derivative values of observations
        :param H_n: indices of the lists of 1-entries of OHE features
        :param coef_col: coefficient column in coeffs
        :param cat_vars: list of categorical features before OHE
        :param X: OHE observations
        :param y: target
        :param separator: use as prefix for OHE categorical features
        :param mtype: problem type regression or classification
        :param alpha: regularization factor used in f_get_all_coef
        :param min_obs: number of obs required to be considered in clustering
        :param stop_criterion: stopping criterion for K-Mfor stopping with eans for clustring coefficients
        :param intertia_opt: threshold for inertia level stopping between 0 and 1
        :param inertia_cutoff: threshold for inertia improvement cutoff
        :return: cluster_group: clustered features
                 criterion: stopping criteria for each categorical feature clustering
                 inertia: ending inertia of each categorical feature clustering
                 gap_stats: gap-statistics of each categorical feature clustering
                 np.sum(var_inf)/np.dot(np.squeeze(ypred),np.squeeze(ypred)): mean square change of predictions
        """


        assert stop_criterion in ['gap', 'inertia', 'auto'], 'stopping criterion must be gap, intertia or auto'

        X = X.to_numpy()

        y = y.to_numpy()

        coeffs['sufficient_obs'] = (coeffs['obs_count']>= min_obs)

        assert mtype in ['regression','classification'], 'mtype must be either regression or classification'

        if mtype == 'regression':
            offset = np.mean(y)
            y_pred0 = ols_predict(coeffs[coef_col].to_numpy(), X, offset)
        else:
            y_pred0 = logit_predict(coeffs[coef_col].to_numpy(), X)


        cluster = {}

        gap_stats = {}

        inertia = {}

        criterion = []

        var_inf = []

        for var in cat_vars:

            df_filtered = coeffs[coeffs['feature'].str.startswith(f'{var}{separator}') & (coeffs['sufficient_obs']==True)]

            max_cluster = df_filtered.shape[0]

            if max_cluster==0:
                cluster[var] = []
                gap_stats[var] = []
                inertia[var] = []
                criterion.append('empty coeffs:skipped')
                continue
            elif max_cluster==1:
                cluster[var] = ['0%%'+'%%'.join(df_filtered['feature'].tolist())]
                gap_stats[var] = [0]
                inertia[var] = [0]
                criterion.append('optimal:skipped')
                continue
            else:
                cluster[var] = []
                gap_stats[var] = []
                inertia[var] = []

            coef_low = df_filtered[coef_col].min()

            coef_high = df_filtered[coef_col].max()

            coefs = df_filtered[coef_col].to_numpy().reshape(-1, 1)

            baseline_d = np.sum(np.square(coefs))

            for i in range(1, max_cluster + 1):
                gap_stats[var] = []
                approx = coeffs.copy()
                approx['group'] = -1
                if mtype == 'regression':
                    kmean = KMeans(init="k-means++", n_clusters=i, random_state=0).fit(coefs, sample_weight=(df_filtered.obs_count+alpha))
                else:
                    xij = df_filtered.feature.tolist()
                    weights = []
                    for x in xij:
                        idx = H_n[coeffs.index[coeffs.feature==x].values[0]]
                        p_r = np.dot(dpdx_n[idx], dpdx_n[idx])
                        weights.append(p_r)
                    kmean = KMeans(init="k-means++", n_clusters=i, random_state=0).fit(coefs, np.array(weights))
                    
                inertia[var].append(kmean.inertia_)

                clustered_dps = {}
                approx.loc[df_filtered.index,'group'] = kmean.labels_

                baseline_center = 0
                for j in range(len(kmean.cluster_centers_)):
                    c_index = [index for index, value in enumerate(kmean.labels_) if value == j]
                    clustered_dps[j] = [coefs[k][0] for k in c_index]
                    approx.loc[approx['group'] == j, coef_col] = kmean.cluster_centers_[j][0]
                    baseline_center += len(clustered_dps[j])*(kmean.cluster_centers_[j]**2)

                gap_st, std = gap_stat(clustered_dps=clustered_dps, low=coef_low, high=coef_high, n_simu=10*len(clustered_dps.keys()))
                gap_stats[var].append(gap_st)

                if stop_criterion == 'gap' or stop_criterion == 'auto':
                    if (len(gap_stats[var]) > 1) and (gap_stats[var][-1]<=gap_stats[var][-2]+std):
                        criterion.append('optimal: gap')
                        break

                if stop_criterion == 'inertia' or stop_criterion == 'auto':

                    baseline = baseline_d + baseline_center

                    if kmean.inertia_/ baseline <= intertia_opt:
                        criterion.append('optimal: inertia below threshold')
                        break
                    elif (len(inertia[var]) > 1) and ((inertia[var][-2] - inertia[var][-1])/inertia[var][-1]) <= inertia_cutoff:
                        criterion.append('optimal: inertia improve meets cutoff threshold')
                        break

                if stop_criterion == 'gap' or stop_criterion == 'auto':
                    if (len(gap_stats[var]) > 1) and (gap_stats[var][-1]<=gap_stats[var][-2]+std):
                        criterion.append('optimal: gap')
                        break

            if (i == max_cluster) and len(criterion) < cat_vars.index(var)+1:
                criterion.append('max-steps')

            for c in range(len(np.unique(kmean.labels_))):
                cluster[var].append(f'{c}%%' + '%%'.join(approx.loc[approx['group']==c,'feature'].tolist()))

            if mtype == 'regression':
                ypred = ols_predict(approx[coef_col].to_numpy(), X, offset)
            else:
                ypred = logit_predict(approx[coef_col].to_numpy(), X)

            var_inf.append(np.linalg.norm(ypred-y_pred0)/np.linalg.norm(y_pred0))


        cluster_group = {}

        for key in cluster.keys():

            for cl in cluster[key]:
                items = cl.split('%%')
                cl_num = items[0]
                cluster_group[f"{key}+{cl_num}"] = items[1:]

        if len(var_inf) > 0:
            return cluster_group, criterion, inertia, gap_stats, np.sum(var_inf)/np.dot(np.squeeze(ypred),np.squeeze(ypred))
        else:
            return cluster_group, criterion, inertia, gap_stats, np.nan, np.nan


    @staticmethod
    def f_get_all_coef(X: pd.DataFrame, y:pd.Series, mtype: str = 'regression', alpha:float=0.05, max_iter:int=100):
        """
        function to calculate coefficient vector

        :param X: observations
        :param y: target
        :param mtype: problem type
        :param alpha: L2 factor
        :max_iter: max iterations
        :return: coefficient vector
               : converged
        """

        assert mtype in ['regression', 'classification'], 'mtype must be regression or classification'

        if mtype == 'regression':
            coeffs = pd.DataFrame(columns=['feature', 'coef'])
            coeffs.feature = X.columns.tolist()
            y = y - np.mean(y)
            coef, converged = f_Ols(X, y, alpha, max_iter)
            coeffs.coef = coef
            return coeffs, converged
        else:
            coeffs = pd.DataFrame(columns=['feature', 'coef'])
            coeffs.feature = ['const'] + X.columns.tolist()
            coef, converged = f_Logit(X, y, alpha, max_iter)
            coeffs.coef = coef
            return coeffs, converged

    @staticmethod
    def f_get_dummies(df: pd.DataFrame, varlist: list, encoder: OneHotEncoder = None):
        """
        One hot encoding
        :param df: input encoding
        :param varlist: list of features to encode
        :param encoder: pre-trained OHE encoder
        :return: One hot encoded features
        """

        if encoder == None:
            encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False, drop='first')
            encoded_features = encoder.fit_transform(df[varlist])
            return_encoder = True
        else:
            encoded_features = encoder.transform(df[varlist])
            return_encoder = False

        feature_names = encoder.get_feature_names_out(varlist)

        rep = {}
        for var in varlist:
            rep[f"{var}_"] = f"{var}::"

        rep = dict((re.escape(k), v) for k, v in rep.items())
        pattern = re.compile("|".join(rep.keys()))

        for i in range(len(feature_names)):
            feature_names[i] = pattern.sub(lambda m: rep[re.escape(m.group(0))], feature_names[i])

        encoded_df = pd.DataFrame(encoded_features, columns=feature_names, index=df.index)

        df_final = pd.concat([df.drop(columns=varlist), encoded_df], axis=1)

        if return_encoder:
            return df_final, encoder
        else:
            return df_final


    @staticmethod
    def regularized_search_algorun(X:pd.DataFrame, y:pd.Series, X_test:pd.DataFrame, y_test:pd.Series, varlist:list, mtype:str='classification',
                                      alphas:list=[0.05], max_iter:int=100, cbine_column:bool=False, distance_threshold:float=0.1, min_obs:int=20, stop_criterion:str='auto',
                                      intertia_opt:float=0.05, inertia_cutoff:float=0.05,xgboost:bool=True, figure:bool=False, figsize:tuple=(6.4,4.8),
                                      model_weights:list = [0.5, 0.5]):

        """
        function to loop through regularization factors and find the optimal one and corresponding clustring schema

        :param X: OHE observations for training
        :param y: target for training
        :param X_test: OHE observations for testing
        :param y_test: target for testing
        :param varlist: list of categorical features before OHE
        :param mtype: problem type
        :param alphas: list of regularization factors to run
        :param cbine_column: if turn on Hamming distance clustering as preprocessing
        :param distance_threshold: Hamming distance filter threshold
        :param min_obs: number of obs to be considered in feature clustering
        :param stop_criterion: stopping criterion for K-Mfor stopping with eans for clustring coefficients
        :param intertia_opt: threshold for inertia level stopping between 0 and 1
        :param inertia_cutoff: threshold for inertia improvement cutoff
        :param xgboost: if turn on xgboost
        :param figure: if return decision plots
        :param figsize: decision plot figure size
        :param model_weights: weights of fit metrics to auto-select optimal alpha
        :return: fit_info_panel: fit infomation panel
                 index_of_best: index of best p value
                 cluster_groups: optimal clusters
                 criterion: stopping criteria for each categorical feature clustering
                 inertia: ending inertia of each categorical feature clustering
                 gap_stats: gap-statistics of each categorical feature clustering
                 cgrouping: Hamming distance clustering preprocessing grouping
        """

        icfesl_converged = []
        cluster_groups = []
        criterions = []
        inertias = []
        gap_statss = []
        var_infs = []
        dof = []
        r2 = []
        reg_fit_time = []
        xgb_fit_time = []

        reg_training_auroc = []
        reg_testing_auroc = []
        xgb_training_auroc = []
        xgb_testing_auroc = []

        reg_training_rmse = []
        reg_testing_rmse = []
        xgb_training_rmse = []
        xgb_testing_rmse = []

        if cbine_column:
            cgrouping = icfesl.group_categorical_features(X, X.columns.tolist(), distance_threshold)
            X2 = icfesl.combine_features(X, cgrouping)
            X2_test = icfesl.combine_features(X_test, cgrouping)
        else:
            X2 = X
            X2_test = X_test

        dummy_features_count = pd.DataFrame(columns=['feature','obs_count'])
        dummy_features_count.feature = X2.columns.tolist()

        for f in X2.columns.tolist():
            dummy_features_count.loc[dummy_features_count['feature']==f,'obs_count'] = X2[f].sum()

        if mtype == 'classification':

            X2 = sm.add_constant(X2, has_constant='skip')

            X2_test = sm.add_constant(X2_test, has_constant='skip')

        H_n = {}

        for j in range(1,X2.shape[1]):
            H_n[j] = X2.index[X2.iloc[:,j] == 1].tolist()

        for alpha in alphas:

            logger.info(f'running algorithm with L2 regularization factor = {alpha} ------>')

            coeffs, converged = icfesl.f_get_all_coef(X2.loc[:, X2.columns != 'const'], y, mtype=mtype, alpha=alpha)

            icfesl_converged.append(converged)

            coeffs = coeffs.merge(dummy_features_count, on='feature', how='left')

            dpdx_n = np.array([dpdx(X2.iloc[i, :], coeffs.coef) for i in range(X2.shape[0])])

            cluster_group, criterion, inertia, gap_stats, var_inf = icfesl.collapse_categorical_features(coeffs, dpdx_n, H_n,'coef', varlist, X2, y,'::', mtype, alpha, min_obs, stop_criterion, intertia_opt, inertia_cutoff)

            X3 = icfesl.combine_features(X2, cluster_group)

            X3_test = icfesl.combine_features(X2_test, cluster_group)

            cluster_groups.append(cluster_group)
            criterions.append(criterion)
            inertias.append(inertia)
            gap_statss.append(gap_stats)
            var_infs.append(var_inf)

            if mtype == 'classification':
                logger.info(f'Running logit with ICFESL encoding')
                start = time.time()
                model = sm.GLM(y, sm.add_constant(X3, has_constant='skip'), family=sm.families.Binomial()).fit(disp=False)
                end = time.time()
                reg_fit_time.append(round(end - start, 4))
                dof.append(model.df_model)
                r2.append(model.pseudo_rsquared())

                y_pred = model.predict(sm.add_constant(X3, has_constant='skip'))
                auroc = roc_auc_score(y, y_pred)
                reg_training_auroc.append(auroc)

                y_pred = model.predict(sm.add_constant(X3_test, has_constant='skip'))
                auroc = roc_auc_score(y_test, y_pred)
                reg_testing_auroc.append(auroc)

                if xgboost:
                    model = XGBClassifier(n_estimators=100, random_state=200)
                    logger.info(f'Running xgbClassifier with ICFESL encoding')
                    start = time.time()
                    model.fit(X3, y)
                    end = time.time()
                    xgb_fit_time.append(round(end - start, 4))

                    y_pred = model.predict_proba(X3)[:, 1]
                    auroc = roc_auc_score(y, y_pred)
                    xgb_training_auroc.append(auroc)

                    y_pred = model.predict_proba(X3_test)[:, 1]
                    auroc = roc_auc_score(y_test, y_pred)
                    xgb_testing_auroc.append(auroc)

            else:
                logger.info(f'Running OLS with ICFESL encoding')
                start = time.time()
                model = sm.OLS(y, sm.add_constant(X3, has_constant='skip')).fit(disp=False)
                end = time.time()
                reg_fit_time.append(round(end - start, 4))
                dof.append(model.df_model)
                r2.append(model.rsquared)

                y_pred = model.predict(sm.add_constant(X3, has_constant='skip'))
                rmse = root_mean_squared_error(y, y_pred)
                reg_training_rmse.append(rmse)

                y_pred = model.predict(sm.add_constant(X3_test, has_constant='skip'))
                rmse = root_mean_squared_error(y_test, y_pred)
                reg_testing_rmse.append(rmse)

                if xgboost:
                    model = XGBRegressor(n_estimators=100, random_state=200)
                    logger.info(f'Running xgbRegressor with ICFESL encoding')
                    start = time.time()
                    model.fit(X3, y)
                    end = time.time()
                    xgb_fit_time.append(round(end - start, 4))

                    y_pred = model.predict(X3)
                    rmse = root_mean_squared_error(y, y_pred)
                    xgb_training_rmse.append(rmse)

                    y_pred = model.predict(X3_test)
                    rmse = root_mean_squared_error(y_test, y_pred)
                    xgb_testing_rmse.append(rmse)

            logger.info(f'Completed: running algorithm with L2 regularization factor = {alpha} ------>')

        fit_info_panel = pd.DataFrame()
        fit_info_panel['Experiment'] = pd.Series(range(len(alphas)))
        fit_info_panel['dof'] = pd.Series(dof)
        fit_info_panel['reg_fit_time'] = pd.Series(reg_fit_time)

        if mtype == 'classification':

            fit_info_panel['reg_training_auroc'] = pd.Series(reg_training_auroc)
            fit_info_panel['reg_testing_auroc'] = pd.Series(reg_testing_auroc)

            if xgboost:
                fit_info_panel['xgb_fit_time'] = pd.Series(xgb_fit_time)
                fit_info_panel['xgb_training_auroc'] = pd.Series(xgb_training_auroc)
                fit_info_panel['xgb_testing_auroc'] = pd.Series(xgb_testing_auroc)

        else:

            fit_info_panel['reg_training_rmse'] = pd.Series(reg_training_rmse)
            fit_info_panel['reg_testing_rmse'] = pd.Series(reg_testing_rmse)

            if xgboost:
                fit_info_panel['xgb_fit_time'] = pd.Series(xgb_fit_time)
                fit_info_panel['xgb_training_rmse'] = pd.Series(xgb_training_rmse)
                fit_info_panel['xgb_testing_rmse'] = pd.Series(xgb_testing_rmse)

        fit_info_panel['var_inf'] = pd.Series(var_infs)

        scores = []

        for i in range(len(alphas)):

            if mtype == 'regression':
                score = model_weights[0]*(reg_training_rmse[i] + reg_testing_rmse[i])
                if xgboost:
                    score += model_weights[1]*(xgb_training_rmse[i] + xgb_testing_rmse[i])

                scores.append(-score)
            else:
                score = model_weights[0] * (reg_training_auroc[i] + reg_testing_auroc[i])
                if xgboost:
                    score += model_weights[1] * (xgb_training_auroc[i] + xgb_testing_auroc[i])

                scores.append(score)

            if i >= 3 and icfesl_converged[i] and scores[i-1]<scores[i-2] and scores[i-2]<scores[i-3]:
                logger.info('search stopped: model fit scores are decreasing...')
                break

        graded_scores =  [scores[i]*icfesl_converged[i] for i in range(len(scores))]

        best_metric = max(graded_scores)

        if best_metric==0:
            index_of_best = -1
        else:
            index_of_best = graded_scores.index(best_metric)

        if figure:
            fig1, ax1 = plt.subplots(figsize=figsize)
            logit_line, = ax1.plot(fit_info_panel['Experiment'], np.log2(fit_info_panel['reg_fit_time']), label='log(logit)', color='blue')
            lines = [logit_line]
            if xgboost:
                xgb_line, = ax1.plot(fit_info_panel['Experiment'], np.log2(fit_info_panel['xgb_fit_time']), label='log(xgboost)', color='red')
                lines.append(xgb_line)

            ax1.set_xlabel('Experiment')
            ax1.set_ylabel('Training Time', color='black')
            ax1.tick_params('y', colors='black')

            if xgboost:
                ax1.set_ylim(np.log2(fit_info_panel[['reg_fit_time','xgb_fit_time']]).min().min()- 0.5,
                             np.log2(fit_info_panel[['reg_fit_time','xgb_fit_time']]).max().max()+ 0.5)
            else:
                ax1.set_ylim(np.min(np.log2(fit_info_panel['reg_fit_time'])) - 0.5, np.max(np.log2(fit_info_panel['reg_fit_time'])) + 0.5)

            ax2 = ax1.twinx()
            ax2.set_ylabel('Rate', color='black')
            ax2.tick_params('y', colors='black')

            var_inf_line, = ax2.plot(fit_info_panel['Experiment'], fit_info_panel['var_inf'], label='variance inflation', linestyle='dashed', color='orange')

            lines.append(var_inf_line)

            labels = [l.get_label() for l in lines]
            ax1.legend(lines, labels, loc='lower right')
            plt.title('Run Summary Plot')
            plt.close(fig1)

            fig2, ax1 = plt.subplots(figsize=figsize)

            if mtype == 'classification':
                logit_train, = plt.plot(fit_info_panel['Experiment'], fit_info_panel['reg_training_auroc'], label='logit_train', linestyle = 'solid', color='blue')
                logit_test, = plt.plot(fit_info_panel['Experiment'], fit_info_panel['reg_testing_auroc'], label='logit_test', linestyle = 'dashed', color='blue')
                lines = [logit_train, logit_test]
                if xgboost:
                    xgb_train, = plt.plot(fit_info_panel['Experiment'], fit_info_panel['xgb_training_auroc'], label='xgb_train', linestyle='solid', color='red')
                    xgb_test, = plt.plot(fit_info_panel['Experiment'], fit_info_panel['xgb_testing_auroc'], label='xgb_test', linestyle='dashed', color='red')
                    lines += [xgb_train, xgb_test]

                ax1.set_xlabel('Experiment')
                ax1.set_ylabel('AUROC', color='black')
                ax1.tick_params('y', colors='black')

            else:
                logit_train, = plt.plot(fit_info_panel['Experiment'], fit_info_panel['reg_training_rmse'], label='logit_train', linestyle = 'solid', color='blue')
                logit_test, = plt.plot(fit_info_panel['Experiment'], fit_info_panel['reg_testing_rmse'], label='logit_test', linestyle = 'dashed', color='blue')
                lines = [logit_train, logit_test]
                if xgboost:
                    xgb_train, = plt.plot(fit_info_panel['Experiment'], fit_info_panel['xgb_training_rmse'], label='xgb_train', linestyle='solid', color='red')
                    xgb_test, = plt.plot(fit_info_panel['Experiment'], fit_info_panel['xgb_testing_rmse'], label='xgb_test', linestyle='dashed', color='red')
                    lines += [xgb_train, xgb_test]

                ax1.set_xlabel('Experiment')
                ax1.set_ylabel('RMSE', color='black')
                ax1.tick_params('y', colors='black')


            ax2 = ax1.twinx()
            ax2.set_ylabel('Dof', color='black')
            ax2.tick_params('y', colors='black')

            dof_line, = ax2.plot(fit_info_panel['Experiment'], fit_info_panel['dof'], label='Dof', linestyle='dotted', color='orange')

            ax2.axvline(x=index_of_best, linestyle='--', linewidth=2, label='Best Experiment')

            lines.append(dof_line)
            labels = [l.get_label() for l in lines]
            ax1.legend(lines, labels, loc='lower right')
            plt.title('Decision Plot')
            plt.close(fig2)


            if cbine_column:
                return fit_info_panel, index_of_best, [fig2, fig1], cluster_groups, criterions, inertias, gap_statss, cgrouping,
            else:
                return fit_info_panel, index_of_best, [fig2, fig1], cluster_groups, criterions, inertias, gap_statss

        else:
            if cbine_column:
                return fit_info_panel, index_of_best, cluster_groups, criterions, inertias, gap_statss, cgrouping,
            else:
                return fit_info_panel, index_of_best, cluster_groups, criterions, inertias, gap_statss



