{"cells":[{"cell_type":"markdown","metadata":{"id":"PU0j8xr5ma-2"},"source":["# Length-Scale Selection for Job Corps Data (Semi-Synthetic)\n","\n","This notebook reproduces the length-scale and ridge parameter tuning experiments for the Job Corps dataset.\n","\n","**Workflow:**\n","1. **Data Loading:** Loads the Job Corps empirical application data (`emp_app.csv`) and performs one-hot encoding.\n","2. **Range Estimation:** Calculates heuristics based on median pairwise distances to determine reasonable search ranges for length-scales ($\\ell_X$ and $\\ell_T$).\n","3. **Hyperparameter Tuning:** Performs a 2D Grid Search using Nyström Kernel Ridge Regression with Leave-One-Out Cross-Validation (LOOCV) to find the optimal length-scales and regularization parameter ($\\beta$)."]},{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":31363,"status":"ok","timestamp":1769398090840,"user":{"displayName":"D K","userId":"02556183042422178006"},"user_tz":300},"id":"y6aTW-uzl-Oe","outputId":"cc3f37c4-fcd8-421a-b691-603df7cf338a"},"outputs":[{"output_type":"stream","name":"stdout","text":["Mounted at /content/drive\n","Working Directory: /content/drive/MyDrive/Colab Notebooks/CTE_Baseline\n"]}],"source":["import sys\n","import pathlib\n","import numpy as np\n","import pandas as pd\n","import logging\n","from typing import Tuple, List, Dict, Optional, Union\n","\n","# --- Environment Setup ---\n","# If running in Colab, mount drive. Otherwise, assume local paths.\n","try:\n","    from google.colab import drive\n","    drive.mount('/content/drive')\n","    BASE_DIR = pathlib.Path(\"/content/drive/MyDrive/Colab Notebooks/CTE_Baseline\")\n","except ImportError:\n","    # Local execution assumption\n","    BASE_DIR = pathlib.Path(\".\").resolve()\n","\n","sys.path.append(str(BASE_DIR))\n","\n","# Project specific imports\n","from KRR_methods.data_jobcorps import make_Xss\n","from KRR_methods.algorithms.length_selection import tune_length2d_and_beta_loocv_krr_nystrom\n","\n","print(f\"Working Directory: {BASE_DIR}\")"]},{"cell_type":"code","execution_count":2,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":2708,"status":"ok","timestamp":1769398099280,"user":{"displayName":"D K","userId":"02556183042422178006"},"user_tz":300},"id":"Go0JeNj_mHuB","outputId":"4a4f0d25-ba60-4593-c773-1ec216aa2c76"},"outputs":[{"output_type":"stream","name":"stdout","text":["Loading data from: emp_app.csv...\n","Data Loaded. Shapes -> X: (4024, 138), T: (4024,), Y: (4024,)\n"]}],"source":["def load_and_preprocess_jobcorps(base_dir: pathlib.Path) -> Tuple[pd.DataFrame, pd.Series, pd.Series]:\n","    \"\"\"\n","    Loads the Job Corps dataset, performs one-hot encoding for categorical variables,\n","    and returns the covariates (X), treatment (T), and outcome (Y).\n","    \"\"\"\n","    # Define paths\n","    emp_dir = base_dir / \"DML_methods\" / \"Data_and_Results\"\n","    data_path = emp_dir / \"emp_app.csv\"\n","\n","    if not data_path.exists():\n","        raise FileNotFoundError(f\"Data file not found at: {data_path}\")\n","\n","    print(f\"Loading data from: {data_path.name}...\")\n","    data = pd.read_csv(data_path, index_col=0)\n","\n","    # Shuffle once (fixed seed) – ensures reproducibility matching original script\n","    data = data.sample(frac=1, random_state=20)\n","\n","    # Preprocessing: Convert int64 columns to dummy variables (One-Hot Encoding)\n","    # This matches the preprocessing steps of the original empirical application.\n","    data_processed = pd.concat(\n","        [\n","            data.select_dtypes(exclude=\"int64\"),\n","            pd.get_dummies(\n","                data.select_dtypes(\"int64\").astype(\"category\"),\n","                drop_first=True,\n","                dtype=float,\n","            ),\n","        ],\n","        axis=1,\n","    )\n","\n","    X = data_processed.drop([\"d\", \"y\"], axis=1)  # Covariates\n","    T = data_processed[\"d\"]                      # Treatment\n","    Y = data_processed[\"y\"]                      # Outcome\n","\n","    print(f\"Data Loaded. Shapes -> X: {X.shape}, T: {T.shape}, Y: {Y.shape}\")\n","    return X, T, Y\n","\n","# Execute Loading\n","X, T, Y = load_and_preprocess_jobcorps(BASE_DIR)\n","\n","# Generate semi-synthetic covariates (if needed by downstream functions)\n","Xss = make_Xss(X)"]},{"cell_type":"code","execution_count":3,"metadata":{"executionInfo":{"elapsed":31,"status":"ok","timestamp":1769398102184,"user":{"displayName":"D K","userId":"02556183042422178006"},"user_tz":300},"id":"r_0zPl96mJnz"},"outputs":[],"source":["# ============================================================\n","# Utility Functions for Length Scale Heuristics\n","# ============================================================\n","\n","def pairwise_dist_median(Z: np.ndarray, max_pairs: int = 200_000, seed: int = 123) -> float:\n","    \"\"\"\n","    Calculates the median of pairwise Euclidean distances.\n","    If the number of pairs exceeds `max_pairs`, it estimates the median using subsampling.\n","\n","    Args:\n","        Z: Input array (n, d)\n","        max_pairs: Threshold for exact calculation vs subsampling.\n","        seed: Random seed for subsampling.\n","    \"\"\"\n","    n = Z.shape[0]\n","    rng = np.random.default_rng(seed)\n","    total_pairs = n * (n - 1) // 2\n","\n","    if total_pairs <= max_pairs:\n","        # Exact calculation for small datasets\n","        G = Z @ Z.T\n","        sq = np.sum(Z * Z, axis=1, keepdims=True)\n","        # Numerical stability: clamp minimum to 0\n","        D2 = np.maximum(sq + sq.T - 2.0 * G, 0.0)\n","        iu = np.triu_indices(n, k=1)\n","        dists = np.sqrt(D2[iu], dtype=Z.dtype)\n","        return float(np.median(dists))\n","    else:\n","        # Subsampling for large datasets\n","        m = max_pairs\n","        i = rng.integers(0, n, size=m)\n","        j = rng.integers(0, n, size=m)\n","\n","        # Ensure we don't compute distance of a point to itself\n","        same = (i == j)\n","        if np.any(same):\n","            j[same] = (j[same] + 1) % n\n","\n","        dists = np.linalg.norm(Z[i] - Z[j], axis=1)\n","        return float(np.median(dists))\n","\n","def matern_c(nu: float) -> float:\n","    \"\"\"\n","    Returns the scaling constant c(nu) such that z = c * r / ell.\n","    Formula: c = sqrt(2 * nu)\n","    \"\"\"\n","    if nu <= 0:\n","        raise ValueError(\"nu must be positive.\")\n","    return float(np.sqrt(2.0 * nu))\n","\n","def solve_z_for_tau(rho: float, nu: float = 1.5, tol: float = 1e-12, max_iter: int = 200) -> float:\n","    \"\"\"\n","    Solves for z > 0 such that tau(z; nu) = rho.\n","    tau(z; nu) is the correlation function for the Matérn kernel.\n","\n","    Supported nu values:\n","      - 0.5 : tau(z) = exp(-z)\n","      - 1.5 : tau(z) = (1 + z) * exp(-z)\n","      - 2.5 : tau(z) = (1 + z + z^2/3) * exp(-z)\n","    \"\"\"\n","    import math\n","\n","    if not (0.0 < rho < 1.0):\n","        raise ValueError(f\"rho must be in (0, 1), got {rho}\")\n","\n","    # Special case: nu = 0.5 (Laplace) => z = -log(rho)\n","    if abs(nu - 0.5) < 1e-9:\n","        return -math.log(rho)\n","\n","    # Polynomial components for nu=1.5 and nu=2.5\n","    if abs(nu - 1.5) < 1e-9:\n","        def P(z): return 1.0 + z\n","    elif abs(nu - 2.5) < 1e-9:\n","        def P(z): return 1.0 + z + (z**2) / 3.0\n","    else:\n","        raise ValueError(\"solve_z_for_tau currently supports nu in {0.5, 1.5, 2.5}.\")\n","\n","    def f(z):\n","        return P(z) * math.exp(-z) - rho\n","\n","    # Binary search to find root\n","    z_lo, z_hi = 0.0, 50.0\n","    if f(z_lo) < 0: return z_lo # Should not happen given correlation definition\n","\n","    while f(z_hi) > 0 and z_hi < 1e6:\n","        z_hi *= 2.0\n","\n","    for _ in range(max_iter):\n","        z_mid = 0.5 * (z_lo + z_hi)\n","        if f(z_mid) > 0:\n","            z_lo = z_mid\n","        else:\n","            z_hi = z_mid\n","        if (z_hi - z_lo) < tol:\n","            break\n","\n","    return 0.5 * (z_lo + z_hi)"]},{"cell_type":"code","execution_count":4,"metadata":{"executionInfo":{"elapsed":23,"status":"ok","timestamp":1769398105765,"user":{"displayName":"D K","userId":"02556183042422178006"},"user_tz":300},"id":"Q97-QAbymOYd"},"outputs":[],"source":["# ============================================================\n","# Wrappers for Estimating Length Scales (ell) for X and T\n","# ============================================================\n","\n","def estimate_ell_for_jobcorps_X_only(\n","    X_origin, rhos=(0.5, 0.8), nu=1.5, kernel_type=\"matern\",\n","    max_pairs=200_000, seed=42, x_divisor=1\n",") -> dict:\n","    \"\"\"\n","    Estimate length-scale parameters (ell) for X based on median pairwise distances\n","    and target correlation levels (rho).\n","    \"\"\"\n","    X_orig_np = np.asarray(X_origin, dtype=float)\n","    X_scaled = X_orig_np / float(x_divisor)\n","\n","    r_med = pairwise_dist_median(X_scaled, max_pairs=max_pairs, seed=seed)\n","    kernel_type = str(kernel_type).lower()\n","    results = []\n","\n","    if kernel_type == \"matern\":\n","        c = matern_c(nu)\n","        for rho in rhos:\n","            z_rho = solve_z_for_tau(rho, nu=nu)\n","            ell_rho = (c * r_med) / z_rho\n","            results.append({\n","                \"rho\": float(rho), \"z_rho\": float(z_rho),\n","                \"ell_rho\": float(ell_rho), \"kernel_type\": \"matern\", \"nu\": float(nu)\n","            })\n","        out_nu = float(nu)\n","\n","    elif kernel_type == \"gaussian\":\n","        for rho in rhos:\n","            z_rho = np.sqrt(-np.log(rho))\n","            ell_rho = r_med / z_rho\n","            results.append({\n","                \"rho\": float(rho), \"z_rho\": float(z_rho),\n","                \"ell_rho\": float(ell_rho), \"kernel_type\": \"gaussian\"\n","            })\n","        out_nu = None\n","    else:\n","        raise ValueError(\"kernel_type must be 'matern' or 'gaussian'.\")\n","\n","    return {\"r_median\": float(r_med), \"kernel_type\": kernel_type, \"nu\": out_nu, \"results\": results}\n","\n","\n","def estimate_ell_for_T_only(\n","    T_origin, rhos=(0.5, 0.8), nu=1.5, kernel_type=\"matern\",\n","    max_pairs=200_000, seed=42, t_divisor=1\n",") -> dict:\n","    \"\"\"\n","    Estimate length-scale parameters (ell) for T based on median pairwise distances.\n","    \"\"\"\n","    T_arr = np.asarray(T_origin, dtype=float).reshape(-1, 1)\n","    T_scaled = T_arr / float(t_divisor)\n","    r_med = pairwise_dist_median(T_scaled, max_pairs=max_pairs, seed=seed)\n","\n","    kernel_type = str(kernel_type).lower()\n","    results = []\n","\n","    if kernel_type == \"laplace\":\n","        # exp(-r/ell) = rho\n","        for rho in rhos:\n","            z_rho = solve_z_for_tau(rho, nu=0.5) # Equivalent to -log(rho)\n","            ell_rho = r_med / z_rho\n","            results.append({\n","                \"rho\": float(rho), \"z_rho\": float(z_rho),\n","                \"ell_rho\": float(ell_rho), \"kernel_type\": \"laplace\", \"nu\": None\n","            })\n","        out_nu = None\n","\n","    elif kernel_type == \"matern\":\n","        c = matern_c(nu)\n","        for rho in rhos:\n","            z_rho = solve_z_for_tau(rho, nu=nu)\n","            ell_rho = (c * r_med) / z_rho\n","            results.append({\n","                \"rho\": float(rho), \"z_rho\": float(z_rho),\n","                \"ell_rho\": float(ell_rho), \"kernel_type\": \"matern\", \"nu\": float(nu)\n","            })\n","        out_nu = float(nu)\n","\n","    elif kernel_type == \"gaussian\":\n","        for rho in rhos:\n","            z_rho = np.sqrt(-np.log(rho))\n","            ell_rho = r_med / z_rho\n","            results.append({\n","                \"rho\": float(rho), \"z_rho\": float(z_rho),\n","                \"ell_rho\": float(ell_rho), \"kernel_type\": \"gaussian\", \"nu\": None\n","            })\n","        out_nu = None\n","    else:\n","        raise ValueError(\"kernel_type must be one of {'laplace', 'matern', 'gaussian'}.\")\n","\n","    return {\"r_median\": float(r_med), \"kernel_type\": kernel_type, \"nu\": out_nu, \"results\": results}"]},{"cell_type":"markdown","metadata":{"id":"2dGn8OUgmRw7"},"source":["## Step 1: Median-based Range Estimation\n","\n","Before running the grid search, we estimate plausible ranges for the length-scales by analyzing the median pairwise distances of the input data ($X$ and $T$) against target correlation levels ($\\rho$)."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":822,"status":"ok","timestamp":1767786403300,"user":{"displayName":"D K","userId":"02556183042422178006"},"user_tz":-540},"id":"DjeaIjJ9mSYD","outputId":"c79cb0a1-aa9a-4cc4-dffc-865cd450a182"},"outputs":[{"output_type":"stream","name":"stdout","text":["--- Estimating Range for X (Matern, nu=0.5) ---\n","{'r_median': 4.69041575982343, 'kernel_type': 'matern', 'nu': 0.5, 'results': [{'rho': 0.2, 'z_rho': 1.6094379124341003, 'ell_rho': 2.9143191691872627, 'kernel_type': 'matern', 'nu': 0.5}, {'rho': 0.85, 'z_rho': 0.16251892949777494, 'ell_rho': 28.86073501910217, 'kernel_type': 'matern', 'nu': 0.5}]}\n","\n","--- Estimating Range for T (Laplace) ---\n","Laplace: {'r_median': 840.71428535, 'kernel_type': 'laplace', 'nu': None, 'results': [{'rho': 0.15, 'z_rho': 1.8971199848858813, 'ell_rho': 443.1529328918919, 'kernel_type': 'laplace', 'nu': None}, {'rho': 0.85, 'z_rho': 0.16251892949777494, 'ell_rho': 5173.023769895742, 'kernel_type': 'laplace', 'nu': None}]}\n"]}],"source":["# 1. Estimate ranges for X\n","print(\"--- Estimating Range for X (Matern, nu=0.5) ---\")\n","out_x = estimate_ell_for_jobcorps_X_only(Xss, rhos=(0.2, 0.85), nu=0.5, kernel_type=\"matern\")\n","print(out_x)\n","\n","# 2. Estimate ranges for T\n","print(\"\\n--- Estimating Range for T (Laplace) ---\")\n","out_t = estimate_ell_for_T_only(T, rhos=(0.15, 0.85), kernel_type=\"laplace\")\n","print(\"Laplace:\", out_t)"]},{"cell_type":"markdown","metadata":{"id":"L8lio4nxmV1u"},"source":["## Step 2: 2D Grid Search with LOOCV\n","\n","We perform a grid search over the length-scales for $X$ and $T$. For each combination of length-scales, we optimize the ridge regularization parameter ($\\beta$) using Leave-One-Out Cross-Validation (LOOCV) via Nyström approximation.\n","\n","* **Kernel:** Product kernel $k((x,t), (x',t')) = k_X(x, x') \\cdot k_T(t, t')$\n","* **Approximation:** Nyström method with $m=700$ landmark points."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"35d0IWahmXlI","executionInfo":{"status":"ok","timestamp":1767786868674,"user_tz":-540,"elapsed":465373,"user":{"displayName":"D K","userId":"02556183042422178006"}},"outputId":"e9caa8c0-d4b8-44c5-b20d-dda433d60918"},"outputs":[{"output_type":"stream","name":"stdout","text":["\n","### Laplace kernel (Matern nu=0.5) — Grid Search Execution\n","Best ell_x = 13.0, ell_t = 6000.0, beta* = 0.3178656453779402, lambda* = 7.899245660485591e-05, LOOCV MSE = 1305.6361993509345\n","\n","========================================\n","Best 2D Hyperparameter Result:\n","========================================\n","  ell_x* = 13.0\n","  ell_t* = 6000.0\n","  beta* = 0.317866\n","  lambda* = 7.899246e-05\n","  MSE* = 1305.6362\n"]}],"source":["def main() -> None:\n","    # Use the semi-synthetic X generated earlier\n","    Xss_data = make_Xss(X)\n","\n","    print(\"\\n### Laplace kernel (Matern nu=0.5) — Grid Search Execution\")\n","\n","    # Grid definition based on the heuristic results from Step 1\n","    length_grid_x = [7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29]\n","    length_grid_t = [250, 500, 1000, 2000, 3000, 4000, 5000, 6000]\n","\n","    # Run the 2D tuning\n","    # Note: This function internally optimizes beta for every (ell_x, ell_t) pair.\n","    res_2d = tune_length2d_and_beta_loocv_krr_nystrom(\n","        Xss_data,\n","        T,\n","        Y,\n","        length_grid_x=length_grid_x,\n","        length_grid_t=length_grid_t,\n","        m=700,                      # Number of Nystrom landmarks\n","        kernel_type_f=\"matern\",     # Base kernel type\n","        nu_x=0.5,                   # Roughness for X (0.5 = Laplace-like)\n","        nu_t=0.5,                   # Roughness for T\n","        beta_bounds=(1e-4, 1e2),    # Regularization search range\n","        random_state=0,\n","    )\n","\n","    print(\"\\n\" + \"=\"*40)\n","    print(\"Best 2D Hyperparameter Result:\")\n","    print(\"=\"*40)\n","    print(f\"  ell_x* = {res_2d['best_length_x']}\")\n","    print(f\"  ell_t* = {res_2d['best_length_t']}\")\n","    print(f\"  beta* = {res_2d['best_beta']:.6f}\")\n","    print(f\"  lambda* = {res_2d['best_lambda']:.6e}\")\n","    print(f\"  MSE* = {res_2d['best_mse']:.4f}\")\n","\n","if __name__ == \"__main__\":\n","    main()"]},{"cell_type":"markdown","source":["# T-to-Y Regression: Length Parameter Selection"],"metadata":{"id":"zYl__aR-VwV4"}},{"cell_type":"code","source":["import numpy as np\n","import io\n","import contextlib\n","\n","from KRR_methods.algorithms.length_selection import tune_length2d_and_beta_loocv_krr_nystrom\n","\n","\n","def tune_t_only_length_loocv_nystrom(\n","    T,\n","    Y,\n","    length_grid_t,\n","    *,\n","    m: int = 700,\n","    kernel_type_f: str = \"matern\",\n","    nu_t: float = 0.5,\n","    beta_bounds=(1e-4, 1e2),\n","    random_state: int = 0,\n","    suppress_internal_prints: bool = True,\n","):\n","    \"\"\"\n","    T-only LOOCV length selection using Nyström approximation by reusing the existing 2D tuner.\n","\n","    Key trick:\n","      - Create X_dummy as a constant vector so that the X-kernel becomes constant (k_X == 1)\n","        for any stationary kernel.\n","      - The product kernel then reduces to the T-kernel, i.e., T-only regression.\n","\n","    The underlying 2D tuner internally optimizes beta for each (ell_x, ell_t) pair.\n","    Here we provide ell_x grid with a single dummy value [1.0] so ell_x is irrelevant.\n","    \"\"\"\n","    T_arr = np.asarray(T, dtype=float).reshape(-1)\n","    Y_arr = np.asarray(Y, dtype=float).reshape(-1)\n","    n = T_arr.shape[0]\n","\n","    # Constant dummy X so k_X(x_i, x_j) == 1 for stationary kernels\n","    X_dummy = np.zeros((n, 1), dtype=float)\n","\n","    # ell_x is meaningless in this setup; keep it singleton to minimize computation\n","    length_grid_x_dummy = [1.0]\n","\n","    # Nyström landmarks cannot exceed n\n","    m_eff = int(min(m, n))\n","\n","    # Optionally suppress any stdout/stderr produced inside the tuner\n","    if suppress_internal_prints:\n","        buf_out = io.StringIO()\n","        buf_err = io.StringIO()\n","        with contextlib.redirect_stdout(buf_out), contextlib.redirect_stderr(buf_err):\n","            res_raw = tune_length2d_and_beta_loocv_krr_nystrom(\n","                X_dummy,\n","                T_arr,\n","                Y_arr,\n","                length_grid_x=length_grid_x_dummy,\n","                length_grid_t=list(length_grid_t),\n","                m=m_eff,\n","                kernel_type_f=kernel_type_f,\n","                nu_x=0.5,               # arbitrary; X is dummy\n","                nu_t=float(nu_t),\n","                beta_bounds=beta_bounds,\n","                random_state=random_state,\n","            )\n","    else:\n","        res_raw = tune_length2d_and_beta_loocv_krr_nystrom(\n","            X_dummy,\n","            T_arr,\n","            Y_arr,\n","            length_grid_x=length_grid_x_dummy,\n","            length_grid_t=list(length_grid_t),\n","            m=m_eff,\n","            kernel_type_f=kernel_type_f,\n","            nu_x=0.5,\n","            nu_t=float(nu_t),\n","            beta_bounds=beta_bounds,\n","            random_state=random_state,\n","        )\n","\n","    # Return a clean T-only result (do NOT expose/print best_length_x)\n","    res_clean = {\n","        \"best_length_t\": res_raw[\"best_length_t\"],\n","        \"best_beta\": res_raw[\"best_beta\"],\n","        \"best_mse\": res_raw[\"best_mse\"],\n","        \"nystrom_m\": m_eff,\n","        \"nu_t\": float(nu_t),\n","        \"beta_bounds\": beta_bounds,\n","        # Keep full result only if you want debugging; remove this line if you want it minimal\n","        \"full_result\": res_raw,\n","    }\n","    return res_clean\n","\n","\n","# -------------------------------------------------------------------\n","# Usage (paste after you already have T and Y created)\n","# -------------------------------------------------------------------\n","length_grid_t = [250, 500, 1000, 2000, 3000, 4000, 5000, 6000]\n","\n","res_t_only = tune_t_only_length_loocv_nystrom(\n","    T=T,\n","    Y=Y,\n","    length_grid_t=length_grid_t,\n","    m=700,\n","    kernel_type_f=\"matern\",\n","    nu_t=0.5,                 # change to 1.5 if you want Matérn(ν=1.5) on T\n","    beta_bounds=(1e-4, 1e2),\n","    random_state=0,\n","    suppress_internal_prints=True,\n",")\n","\n","print(\"\\n\" + \"=\" * 40)\n","print(\"Best T-only Nyström-LOOCV Result\")\n","print(\"=\" * 40)\n","print(f\"  ell_t* = {res_t_only['best_length_t']}\")\n","print(f\"  beta*  = {res_t_only['best_beta']:.6f}\")\n","print(f\"  MSE*   = {res_t_only['best_mse']:.6e}\")\n","print(f\"  m      = {res_t_only['nystrom_m']}\")\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"usMnRfUPV0QZ","executionInfo":{"status":"ok","timestamp":1769399132145,"user_tz":300,"elapsed":14683,"user":{"displayName":"D K","userId":"02556183042422178006"}},"outputId":"1d17ec96-a840-4523-ed24-bd25a0a501c2"},"execution_count":7,"outputs":[{"output_type":"stream","name":"stdout","text":["\n","========================================\n","Best T-only Nyström-LOOCV Result\n","========================================\n","  ell_t* = 3000.0\n","  beta*  = 3.928282\n","  MSE*   = 1.413240e+03\n","  m      = 700\n"]}]}],"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyM2Gwc+CVtxggxE38SsDvCk"},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"name":"python"}},"nbformat":4,"nbformat_minor":0}