import numpy as np
from collections import deque
from statsmodels.tsa.stattools import adfuller


class ConvergenceCheck:
    ''' checks optimization convergence using stationarity and optimal region conditions '''
    def __init__(self, optimization='min', patience=20, tolerance=1e-3, p_value=0.05):
        assert isinstance(patience, int) and patience >= 2, "Invalid value for argument \"patience\": {}".format(patience)
        assert isinstance(tolerance, float) and tolerance > 0, "Invalid value for argument \"tolerance\": {}".format(tolerance)
        assert optimization in ['min', 'max'], \
            "Invalid value for argument \"optimization\": {}, which should be either \"min\" or \"max\".".format(optimization)
        assert p_value > 0 and p_value < 1, "Invalid value for argument \"p_value\": {}, which should be within 0 and 1.".format(p_value)
        
        self.patience = patience
        self.tolerance = tolerance
        self.optimization = optimization
        self.p_value = p_value
        self.queue = deque(maxlen=patience)
        self.best_score = None

    def __call__(self, score):
        try:
            score = float(score)
        except (TypeError, ValueError):
            raise ValueError("Invalid value for input \"score\": {}".format(score))

        # add current score to the queue of observation
        self.queue.append(score)

        # skip convergence check if the best score was just updated
        if self._update_best_score(score):
            return False

        # skip convergence check until sufficient observations are enqueued
        if len(self.queue) < self.patience:
            return False

        # consider convergent when scores in the queue are both stationary and in optimal region
        if self._is_stationary() and self._is_in_optimal_region():
            return True
        else:
            return False


    def _update_best_score(self, score):
        ''' if the current score is a new extreme (min / max), update the best_score and return True '''
        if self.best_score is None:
            self.best_score = score
            return True
        elif (self.optimization == 'min') and (score < self.best_score):
            self.best_score = score
            return True
        elif (self.optimization == 'max') and (score > self.best_score):
            self.best_score = score
            return True
        else:
            return False

    def _is_in_optimal_region(self):
        '''
        determine if scores in the queue are near optimal based on their distribution:
        for minimization: boundary = mean - std (p-value around 16%)
        for maximization: boundary = mean + std (p-value around 16%)
        return True if relative difference between boundary and best_score is within tolerance
        '''
        series = np.array(self.queue)

        if self.optimization == 'min':
            boundary = np.mean(series) - np.std(series)
        else:
            assert self.optimization == 'max'
            boundary = np.mean(series) + np.std(series)

        if abs(self.best_score) < 1e-8:
            # use absolute difference when best_score near zero
            ref_diff = abs(boundary - self.best_score)
        else:
            ref_diff = abs(boundary - self.best_score) / abs(self.best_score)

        return ref_diff < self.tolerance

    def _is_stationary(self):
        '''
        determine if scores in the queue form a stationary series using Augmented Dickey-Fuller test 
        return True if p-value < threshold (reject null hypothesis)
        '''
        series = np.array(self.queue)

        # use adaptive lag selection for small-sample behavior
        results = adfuller(series, autolag='AIC')

        return results[1] < self.p_value

    def reset(self):
        ''' empty the queue and clear the best_score record '''
        self.queue.clear()
        self.best_score = None