import torch
import numpy as np

def hill_estimator(x, k):
    """
    Hill's estimator for tail index alpha using the top-k order statistics.

    Parameters
    ----------
    x : array-like
        Sample of positive observations (x_i > 0).
    k : int
        Number of top order statistics to use (1 <= k < n).

    Returns
    -------
    alpha_hat : float
        Hill estimate of the tail index alpha.

    Notes
    -----
    Requires strictly positive data. Sorts ascending internally.
    """
    x = np.asarray(x, dtype=float)
    x = x[np.isfinite(x)]
    if np.any(x <= 0):
        raise ValueError("Hill estimator requires strictly positive data (x_i > 0).")
    n = x.size
    if not (1 <= k < n):
        raise ValueError(f"k must satisfy 1 <= k < n (n={n}).")

    x_sorted = np.sort(x)               # ascending
    x_thresh = x_sorted[n - k - 1]      # X_(n-k)
    tail = x_sorted[n - k:]             # top k
    return np.mean(np.log(tail) - np.log(x_thresh))

def hill_sequence(x, k_max=None):
    """
    Compute Hill estimates for k = 1..k_max (useful for a Hill plot).

    Parameters
    ----------
    x : array-like
        Positive data.
    k_max : int or None
        Maximum k. If None, uses floor(n**0.9) clipped to at least 5 and at most n-1.

    Returns
    -------
    ks : ndarray  shape (m,)
        k values.
    hills : ndarray shape (m,)
        Hill estimates for each k.
    """
    x = np.asarray(x, dtype=float)
    n = x.size
    if k_max is None:
        k_max = int(np.clip(n**0.9, 5, n - 1))
    ks = np.arange(1, k_max + 1)
    hills = np.array([hill_estimator(x, int(k)) for k in ks])
    return ks, hills

def select_k_stability(x, k_min=5, k_max=None, window=20, rel_tol=0.05, enforce_monotone=False):
    """
    Pick k by a simple 'stability plateau' heuristic on the Hill sequence.

    Idea
    ----
    Choose the smallest k where the Hill curve remains relatively flat in a
    trailing window: the coefficient of variation (std/mean) within the window
    is below rel_tol. If none qualify, return argmin of rolling std as fallback.

    Parameters
    ----------
    x : array-like
        Positive data.
    k_min : int
        Minimum k to consider.
    k_max : int or None
        Maximum k to consider; default as in hill_sequence.
    window : int
        Window length for stability check.
    rel_tol : float
        Relative tolerance: require std(window)/|mean(window)| <= rel_tol.
    enforce_monotone : bool
        If True, ignore k where Hill(k) is still strictly increasing
        faster than rel_tol (very conservative).

    Returns
    -------
    k_star : int
        Selected k.
    info : dict
        Diagnostic info: {'ks', 'hills', 'rolling_mean', 'rolling_std', 'mask'}
    """
    ks, hills = hill_sequence(x, k_max=k_max)
    mask_range = (ks >= k_min) & (ks >= window)
    ks_cand = ks[mask_range]
    hills_cand = hills[mask_range]

    # rolling stats over the last `window` points up to each index
    roll_mean = np.full_like(hills, np.nan, dtype=float)
    roll_std = np.full_like(hills, np.nan, dtype=float)
    for i in range(len(hills)):
        j0 = max(0, i - window + 1)
        seg = hills[j0:i+1]
        if seg.size == window:  # only when we have a full window
            roll_mean[i] = seg.mean()
            roll_std[i] = seg.std(ddof=1) if window > 1 else 0.0

    rel_var = np.abs(roll_std / np.maximum(np.abs(roll_mean), 1e-12))
    stable = (rel_var <= rel_tol)

    # Optionally ignore points where recent slope is too large
    if enforce_monotone:
        slope = np.gradient(hills, ks)
        stable &= (np.abs(slope) <= rel_tol * np.maximum(np.abs(hills), 1e-12))

    # pick the first k that is stable and within candidate range
    stable_idx = np.where(stable & mask_range)[0]
    if stable_idx.size > 0:
        k_star = int(ks[stable_idx[0]])
    else:
        # fallback: pick k with minimal rolling std within candidate range
        idx = np.nanargmin(roll_std[mask_range])
        k_star = int(ks_cand[idx])

    info = dict(
        ks=ks, hills=hills, rolling_mean=roll_mean, rolling_std=roll_std,
        mask=mask_range, rel_var=rel_var
    )
    return k_star, info

def fit_hill(x, k=None, auto=True, **auto_kwargs):
    """
    High-level helper: choose k (optional) and return Hill estimate.

    Parameters
    ----------
    x : array-like
        Positive data.
    k : int or None
        If provided, use this k. If None and auto=True, select k via stability.
    auto : bool
        Whether to auto-select k when k is None.
    **auto_kwargs
        Passed to select_k_stability (e.g., k_min, window, rel_tol).

    Returns
    -------
    alpha_hat : float
        Hill estimate.
    k_used : int
        The k actually used.
    diag : dict
        Diagnostics (empty if k given).
    """
    if k is None:
        if not auto:
            raise ValueError("k is None and auto=False; provide a k or enable auto.")
        k, diag = select_k_stability(x, **auto_kwargs)
    else:
        diag = {}

    alpha_hat = hill_estimator(x, k)
    return alpha_hat, k, diag
