"""Functions to estimate binning-induced grouping loss."""
import numpy as np
from sklearn.neighbors import KNeighborsRegressor


def check_2D_array(x):
    if x.ndim == 1:
        x = x.reshape(-1, 1)

    elif x.ndim == 2 and x.shape[1] != 1:
        raise ValueError(f'x must have one feature. Got shape '
                         f'{x.shape}')

    elif x.ndim > 2:
        raise ValueError(f'x must be at most 2 dimensional. '
                         f'Got shape {x.shape}')

    return x

class CEstimator():

    def __init__(self, y_scores, y_labels):
        y_scores = np.array(y_scores)
        y_labels = np.array(y_labels)

        y_scores = check_2D_array(y_scores)

        self.y_scores = y_scores
        self.y_labels = y_labels

    def _c_hat(self, test_scores):
        test_scores = check_2D_array(test_scores)
        est = KNeighborsRegressor(n_neighbors=2000)
        est.fit(self.y_scores.reshape(-1, 1), self.y_labels)
        c_hat = est.predict(test_scores)
        return c_hat

    def c_hat(self):
        return self._c_hat(self.y_scores.reshape(-1, 1))


def estimate_GL_induced(c_hat, y_scores, bins):
    """Estimate GL induced for the Brier score."""
    n_bins = len(bins) - 1
    y_bins = np.digitize(y_scores, bins=bins) - 1
    y_bins = np.clip(y_bins, a_min=None, a_max=n_bins-1)

    uniques, counts = np.unique(y_bins, return_counts=True)
    var = []

    for i in uniques:
        var.append(np.var(c_hat[y_bins == i]))

    GL_ind = np.vdot(var, counts)/np.sum(counts)

    return GL_ind


def estimate_CL_induced(c_hat, y_scores, bins):
    """Estimate CL induced for the Brier score."""
    n_bins = len(bins) - 1
    y_bins = np.digitize(y_scores, bins=bins) - 1
    y_bins = np.clip(y_bins, a_min=None, a_max=n_bins-1)

    uniques, counts = np.unique(y_bins, return_counts=True)
    var = []

    S_minus_C = y_scores - c_hat

    for i in uniques:
        var.append(np.var(S_minus_C[y_bins == i]))

    CL_ind = -np.vdot(var, counts)/np.sum(counts)

    return CL_ind
