


import os, sys

# sanity: show which python is running (should be your conda env)
print("python:", sys.executable)
print("CONDA_PREFIX:", os.environ.get("CONDA_PREFIX"))

# make repo root importable (because notebook is in notebooks/)
sys.path.insert(0, os.path.abspath(".."))

import cppimport.import_hook  # enables compile-on-import
import lemon_mcf              # this will compile lemon_mcf.cpp on first import

print("lemon_mcf loaded from:", lemon_mcf.__file__)





















import numpy as np

def _haar_orthonormal_matrix(d: int, rng: np.random.Generator) -> np.ndarray:
    """
    Sample a (approximately) Haar-uniform random orthonormal matrix U in R^{dxd}
    using QR decomposition of a standard Gaussian matrix, with a sign correction.
    """
    A = rng.standard_normal((d, d))
    U, R = np.linalg.qr(A)
    # Sign correction to make distribution closer to Haar on O(d)
    s = np.sign(np.diag(R))
    s[s == 0] = 1.0
    U = U * s  # multiplies columns by sign
    return U

def generate_synthetic_convex_quadratic_dataset(
    d: int,
    m: float,
    M: float,
    X: int,
    sigma2: float,
    K: int,
    seed: int | None = None
):
    if d <= 0:
        raise ValueError("d must be positive.")
    if not (M >= m):
        raise ValueError("Require M >= m.")
    if m <= 0:
        raise ValueError("Require m > 0 so that Q is positive definite.")
    if K <= 0:
        raise ValueError("K must be positive.")
    if sigma2 < 0:
        raise ValueError("sigma2 must be nonnegative.")

    rng = np.random.default_rng(seed)

    # Handle X: scalar or length-d vector of bounds
    X_arr = np.asarray(X, dtype=int)
    if X_arr.ndim == 0:
        X_arr = np.full(d, int(X_arr))
    if X_arr.shape != (d,):
        raise ValueError(f"X must be a scalar or an array of shape ({d},).")
    if np.any(X_arr < 0):
        raise ValueError("All entries of X must be nonnegative integers.")

    # (i) sample eigenvalues
    lambdas = rng.uniform(m, M, size=d)

    # (ii) sample orthonormal matrix U
    U = _haar_orthonormal_matrix(d, rng)

    # Build Q = U diag(lambdas) U^T
    Q = U @ np.diag(lambdas) @ U.T

    # Helper to sample integer vectors uniformly from [-X, X]^d (per-coordinate bounds)
    def sample_int_box(num: int) -> np.ndarray:
        out = np.empty((num, d), dtype=int)
        for i in range(d):
            Xi = int(X_arr[i])
            out[:, i] = rng.integers(-Xi, Xi + 1, size=num)
        return out

    # (iii) sample x*
    x_star = sample_int_box(1).reshape(-1)  # shape (d,)

    # (iv) sample K vectors x^1,...,x^K
    X_samples = sample_int_box(K)  # shape (K, d)

    # (v) sample Gaussian noises
    sigma = float(np.sqrt(sigma2))
    eps = rng.normal(loc=0.0, scale=sigma, size=K)

    # (vi) compute y^k = (x^k - x*)^T Q (x^k - x*) + eps^k
    diffs = X_samples - x_star  # broadcast, shape (K, d)
    quad = np.einsum("bi,ij,bj->b", diffs, Q, diffs)  # shape (K,)
    y = quad + eps

    result = {
        "U": U,
        "lambdas": lambdas,
        "Q": Q,
        "x_star": x_star,
        "X_samples": X_samples,
        "y": y,
    }
    
    return result


# --- quick example ---
data = generate_synthetic_convex_quadratic_dataset(
    d=5, m=0.5, M=3.0, X=10, sigma2=0.25, K=100, seed=0
)

print("x_star:", data["x_star"])
print("X_samples shape:", data["X_samples"].shape)
print("y shape:", data["y"].shape)
print("min eigenvalue(Q) ~", np.min(np.linalg.eigvalsh(data["Q"])))





































