{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# ============================================================================\n",
    "# CHUNK 1: SETUP - Algorithms, Helper Functions, and Definitions\n",
    "# ============================================================================\n",
    "\n",
    "import numpy as np\n",
    "import torch\n",
    "import matplotlib.pyplot as plt\n",
    "from scipy.linalg import norm\n",
    "from hj_prox import compute_prox_HJ\n",
    "import time\n",
    "\n",
    "eps = 1e-5\n",
    "\n",
    "# Plotting configuration\n",
    "plt.rcParams.update({'font.size': 20})\n",
    "\n",
    "\n",
    "# ============================================================================\n",
    "# Helper Functions\n",
    "# ============================================================================\n",
    "\n",
    "def soft_threshold(x, threshold):\n",
    "    \"\"\"Proximal operator for L1 norm (soft thresholding).\"\"\"\n",
    "    return np.sign(x) * np.maximum(np.abs(x) - threshold, 0)\n",
    "\n",
    "\n",
    "def prox_group_lasso(x, groups, threshold):\n",
    "    \"\"\"Proximal operator for group L2 norm.\"\"\"\n",
    "    x_prox = x.copy()\n",
    "    for start, end in groups:\n",
    "        group = x[start:end]\n",
    "        group_norm = norm(group, 2)\n",
    "        if group_norm > 0:\n",
    "            scale = np.maximum(1 - threshold * np.sqrt(end - start) / group_norm, 0)\n",
    "            x_prox[start:end] = scale * group\n",
    "        else:\n",
    "            x_prox[start:end] = 0\n",
    "    return x_prox\n",
    "\n",
    "\n",
    "def gradient_f_numpy(X, y, beta):\n",
    "    \"\"\"Gradient of the smooth term f(β) = (1/2)||y - Xβ||².\"\"\"\n",
    "    return X.T @ (X @ beta - y)\n",
    "\n",
    "\n",
    "def objective_numpy(X, y, beta, groups, lambda1, lambda2):\n",
    "    \"\"\"Compute the full objective function value.\"\"\"\n",
    "    residual = y - X @ beta\n",
    "    smooth_term = 0.5 * np.sum(residual ** 2)\n",
    "    l1_term = lambda1 * np.sum(np.abs(beta))\n",
    "    \n",
    "    group_term = 0\n",
    "    for start, end in groups:\n",
    "        group_term += lambda2 * np.sqrt(end - start) * norm(beta[start:end], 2)\n",
    "    \n",
    "    return smooth_term + l1_term + group_term\n",
    "\n",
    "\n",
    "def l1_penalty_batch(beta_batch, lambda1):\n",
    "    \"\"\"Compute L1 penalty for batch of coefficient vectors.\n",
    "    Args:\n",
    "        beta_batch: shape (n_samples, n_features)\n",
    "        lambda1: L1 penalty parameter\n",
    "    Returns:\n",
    "        penalties: shape (n_samples,)\n",
    "    \"\"\"\n",
    "    return lambda1 * torch.abs(beta_batch).sum(dim=1)\n",
    "\n",
    "\n",
    "def group_penalty_batch(beta_batch, groups, lambda2):\n",
    "    \"\"\"Compute group L2 penalty for batch of coefficient vectors.\n",
    "    Args:\n",
    "        beta_batch: shape (n_samples, n_features)\n",
    "        groups: list of (start, end) tuples\n",
    "        lambda2: group penalty parameter\n",
    "    Returns:\n",
    "        penalties: shape (n_samples,)\n",
    "    \"\"\"\n",
    "    penalties = torch.zeros(beta_batch.shape[0], device=beta_batch.device)\n",
    "    for start, end in groups:\n",
    "        group_size = end - start\n",
    "        # Compute L2 norm for each sample's group\n",
    "        group_norms = torch.norm(beta_batch[:, start:end], p=2, dim=1)\n",
    "        penalties += lambda2 * np.sqrt(group_size) * group_norms\n",
    "    return penalties\n",
    "\n",
    "\n",
    "def compute_gradient(beta, X, y):\n",
    "    \"\"\"Compute gradient of smooth term: ∇f(β) = X^T(Xβ - y)\"\"\"\n",
    "    return X.T @ (X @ beta - y)\n",
    "\n",
    "\n",
    "def total_objective(beta, X, y, groups, lambda1, lambda2):\n",
    "    \"\"\"Compute total objective value.\"\"\"\n",
    "    residual = y - X @ beta\n",
    "    smooth_part = 0.5 * (residual**2).sum()\n",
    "    l1_part = lambda1 * torch.abs(beta).sum()\n",
    "    \n",
    "    group_part = 0\n",
    "    for start, end in groups:\n",
    "        group_size = end - start\n",
    "        group_norm = torch.norm(beta[start:end], p=2)\n",
    "        group_part += lambda2 * np.sqrt(group_size) * group_norm\n",
    "    \n",
    "    return smooth_part + l1_part + group_part\n",
    "\n",
    "\n",
    "# ============================================================================\n",
    "# Algorithm 1: Davis-Yin with Analytical Proximal Operators\n",
    "# ============================================================================\n",
    "\n",
    "def davis_yin_standard(X, y, groups, lambda1, lambda2, gamma=None, max_iter=1000, tol=1e-25):\n",
    "    \"\"\"Standard Davis-Yin with exact proximal operators.\"\"\"\n",
    "    n_samples, n_features = X.shape\n",
    "    \n",
    "    # Compute Lipschitz constant if gamma not provided\n",
    "    if gamma is None:\n",
    "        L = norm(X.T @ X, 2)\n",
    "        gamma = 1.0 / L\n",
    "    \n",
    "    # Initialize variables\n",
    "    beta = np.zeros(n_features)\n",
    "    z = beta.copy()\n",
    "    \n",
    "    objectives = []\n",
    "    \n",
    "    print(\"Starting Standard Davis-Yin Algorithm...\")\n",
    "    print(f\"λ₁ = {lambda1}, λ₂ = {lambda2}, γ = {gamma}\")\n",
    "    print(\"-\" * 70)\n",
    "    \n",
    "    for iteration in range(max_iter):\n",
    "        t_start = time.time()\n",
    "        beta_old = beta.copy()\n",
    "        \n",
    "        # Davis-Yin steps\n",
    "        # Step 1: w = prox_{γh}(z)\n",
    "        w = prox_group_lasso(z, groups, gamma * lambda2)\n",
    "        \n",
    "        # Step 2: beta = prox_{γg}(2w - z - γ∇f(w))\n",
    "        grad = gradient_f_numpy(X, y, w)\n",
    "        temp = 2 * w - z - gamma * grad\n",
    "        beta = soft_threshold(temp, gamma * lambda1)\n",
    "        \n",
    "        # Step 3: z = z + (beta - w)\n",
    "        z = z + (beta - w)\n",
    "        \n",
    "        # Compute objective\n",
    "        obj_val = objective_numpy(X, y, beta, groups, lambda1, lambda2)\n",
    "        objectives.append(obj_val)\n",
    "        \n",
    "        t_iter = time.time() - t_start\n",
    "        \n",
    "        # Check convergence\n",
    "        residual = norm(beta - beta_old) / (norm(beta_old) + 1e-10)\n",
    "        \n",
    "        if iteration % 5 == 0:\n",
    "            print(f\"Iteration {iteration:4d}: Objective = {obj_val:.2f}, \"\n",
    "                  f\"||Δβ|| = {residual:.2e}, time = {t_iter:.3f}s\")\n",
    "        \n",
    "        if residual < tol:\n",
    "            print(f\"\\nConverged at iteration {iteration}\")\n",
    "            break\n",
    "    \n",
    "    return beta, objectives\n",
    "\n",
    "\n",
    "# ============================================================================\n",
    "# Algorithm 2: Davis-Yin with HJ-Prox\n",
    "# ============================================================================\n",
    "\n",
    "def davis_yin_hjprox(X, y, groups, lambda1=0.1, lambda2=0.1, \n",
    "                     gamma=1.0, gamma_min=1e-4, gamma_decay=0.99,\n",
    "                     max_iter=1000, tol=1e-15,\n",
    "                     int_samples_l1=100, int_samples_group=100,\n",
    "                     delta_l1=1e-1, delta_group=1e-1,\n",
    "                     beta_init=None, verbose=True, device='cpu'):\n",
    "    \"\"\"\n",
    "    Davis-Yin algorithm using HJ-Prox for non-smooth terms with adaptive step size.\n",
    "    \n",
    "    Minimizes: f(β) + g(β) + h(β)\n",
    "    where:\n",
    "        f(β) = 0.5||y - Xβ||²  (smooth, use gradient)\n",
    "        g(β) = λ₁||β||₁        (non-smooth, use HJ-Prox)\n",
    "        h(β) = λ₂∑√pₘ||β^(m)||₂ (non-smooth, use HJ-Prox)\n",
    "        \n",
    "    Parameters:\n",
    "    -----------\n",
    "    X : array-like, shape (n_samples, n_features)\n",
    "        Design matrix\n",
    "    y : array-like, shape (n_samples,)\n",
    "        Response vector\n",
    "    groups : list of tuples\n",
    "        List of (start, end) tuples defining groups\n",
    "    lambda1 : float\n",
    "        L1 penalty parameter\n",
    "    lambda2 : float\n",
    "        Group penalty parameter\n",
    "    gamma : float\n",
    "        Initial step size parameter\n",
    "    gamma_min : float\n",
    "        Minimum allowed value for gamma\n",
    "    gamma_decay : float\n",
    "        Factor to multiply gamma by when objective doesn't decrease\n",
    "    max_iter : int\n",
    "        Maximum number of iterations\n",
    "    tol : float\n",
    "        Convergence tolerance\n",
    "    int_samples_l1 : int\n",
    "        Number of samples for L1 HJ-Prox\n",
    "    int_samples_group : int\n",
    "        Number of samples for group HJ-Prox\n",
    "    delta_l1 : float\n",
    "        Smoothing parameter for L1 HJ-Prox\n",
    "    delta_group : float\n",
    "        Smoothing parameter for group HJ-Prox\n",
    "    beta_init : array-like, optional\n",
    "        Initial value for beta\n",
    "    verbose : bool\n",
    "        Whether to print progress\n",
    "    device : str\n",
    "        Device to use ('cpu' or 'cuda')\n",
    "        \n",
    "    Returns:\n",
    "    --------\n",
    "    beta_final : numpy array\n",
    "        Optimized coefficients\n",
    "    objectives : list\n",
    "        Objective values at each iteration\n",
    "    \"\"\"\n",
    "    # Convert to torch tensors\n",
    "    X = torch.tensor(X, dtype=torch.float32, device=device)\n",
    "    y = torch.tensor(y, dtype=torch.float32, device=device)\n",
    "    \n",
    "    n_samples, n_features = X.shape\n",
    "    \n",
    "    # Initialize beta\n",
    "    if beta_init is None:\n",
    "        beta = torch.zeros(n_features, device=device)\n",
    "    else:\n",
    "        if isinstance(beta_init, torch.Tensor):\n",
    "            beta = beta_init.to(device=device, dtype=torch.float32)\n",
    "        else:\n",
    "            beta = torch.tensor(beta_init, dtype=torch.float32, device=device)\n",
    "        \n",
    "        if beta.shape[0] != n_features:\n",
    "            raise ValueError(f\"beta_init must have shape ({n_features},), but got shape {beta.shape}\")\n",
    "    \n",
    "    z = beta.clone()\n",
    "    \n",
    "    objectives = []\n",
    "    \n",
    "    # Define penalty functions for HJ-Prox\n",
    "    def l1_func(beta_batch):\n",
    "        return l1_penalty_batch(beta_batch, lambda1)\n",
    "    \n",
    "    def group_func(beta_batch):\n",
    "        return group_penalty_batch(beta_batch, groups, lambda2)\n",
    "    \n",
    "    # Initialize objective value\n",
    "    prev_obj = total_objective(beta, X, y, groups, lambda1, lambda2).item()\n",
    "    \n",
    "    if verbose:\n",
    "        print(\"Starting Davis-Yin with HJ-Prox (adaptive step size)...\")\n",
    "        print(f\"λ₁ = {lambda1}, λ₂ = {lambda2}\")\n",
    "        print(f\"Initial γ = {gamma}, γ_min = {gamma_min}, γ_decay = {gamma_decay}\")\n",
    "        if beta_init is not None:\n",
    "            print(f\"Using custom initial beta with ||β₀|| = {torch.norm(beta):.4f}\")\n",
    "        else:\n",
    "            print(\"Starting from zero initialization\")\n",
    "        print(\"-\" * 70)\n",
    "    \n",
    "    for iteration in range(max_iter):\n",
    "        t_start = time.time()\n",
    "        beta_old = beta.clone()\n",
    "        \n",
    "        # Compute delta with annealing schedule\n",
    "        delta = 1500000 / (iteration + 1)**(2 + eps)\n",
    "        \n",
    "        # Step 1: w = prox_{γh}(z) - proximal of group penalty\n",
    "        z_flat = z.view(-1, 1)\n",
    "        w_flat, ls_iters_group, _ = compute_prox_HJ(\n",
    "            z_flat, gamma, group_func,\n",
    "            delta=delta, int_samples=int_samples_group,\n",
    "            alpha=1.0\n",
    "        )\n",
    "        w = w_flat.view(n_features)\n",
    "        \n",
    "        # Step 2: beta = prox_{γg}(2w - z - γ∇f(w)) - proximal of L1 penalty\n",
    "        grad_w = compute_gradient(w, X, y)\n",
    "        v = 2 * w - z - gamma * grad_w\n",
    "        v_flat = v.view(-1, 1)\n",
    "        \n",
    "        beta_flat, ls_iters_l1, _ = compute_prox_HJ(\n",
    "            v_flat, gamma, l1_func,\n",
    "            delta=delta, int_samples=int_samples_l1,\n",
    "            alpha=1.0\n",
    "        )\n",
    "        beta = beta_flat.view(n_features)\n",
    "        \n",
    "        # Step 3: z = z + (beta - w)\n",
    "        z = z + (beta - w)\n",
    "        \n",
    "        # Compute objective\n",
    "        obj_val = total_objective(beta, X, y, groups, lambda1, lambda2)\n",
    "        current_obj = obj_val.item()\n",
    "        objectives.append(current_obj)\n",
    "        \n",
    "        # Track time\n",
    "        t_iter = time.time() - t_start\n",
    "        \n",
    "        # Compute convergence metric\n",
    "        diff = torch.norm(beta - beta_old).item()\n",
    "        rel_diff = diff / (torch.norm(beta_old).item() + 1e-10)\n",
    "        \n",
    "        if verbose and iteration % 10 == 0:\n",
    "            print(f\"Iteration {iteration:4d}: Objective = {current_obj:.6f}, \"\n",
    "                  f\"||Δβ|| = {diff:.2e}, γ = {gamma:.2e}, \"\n",
    "                  f\"time = {t_iter:.3f}s, ls_iters = {ls_iters_l1 + ls_iters_group}\")\n",
    "        \n",
    "        # Adaptive step size: decrease gamma if objective increased\n",
    "        if current_obj > prev_obj * 1.01 and gamma > gamma_min:\n",
    "            old_gamma = gamma\n",
    "            gamma = max(gamma * 1, gamma_min)\n",
    "            if verbose and iteration % 10 != 0:\n",
    "                print(f\"  -> Objective increased, reducing γ from {old_gamma:.2e} to {gamma:.2e}\")\n",
    "        \n",
    "        prev_obj = current_obj\n",
    "        \n",
    "        # Check convergence\n",
    "        if rel_diff < tol:\n",
    "            if verbose:\n",
    "                print(f\"\\nConverged at iteration {iteration}\")\n",
    "                print(f\"Final objective: {current_obj:.6f}\")\n",
    "                print(f\"Final ||β|| = {torch.norm(beta).item():.4f}\")\n",
    "                print(f\"Non-zero coefficients: {(torch.abs(beta) > 1e-6).sum().item()}\")\n",
    "            break\n",
    "    \n",
    "    if iteration == max_iter - 1 and verbose:\n",
    "        print(f\"\\nReached maximum iterations ({max_iter})\")\n",
    "        print(f\"Final objective: {current_obj:.6f}\")\n",
    "    \n",
    "    # Convert back to numpy\n",
    "    beta_final = beta.cpu().numpy()\n",
    "    \n",
    "    return beta_final, objectives\n",
    "\n",
    "\n",
    "print(\"✓ All algorithms and helper functions loaded successfully\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# ============================================================================\n",
    "# CHUNK 2: DATA GENERATION\n",
    "# ============================================================================\n",
    "\n",
    "def generate_sparse_group_data(n_samples=500):\n",
    "    \"\"\"Generate dataset suitable for Sparse Group LASSO.\"\"\"\n",
    "    np.random.seed(42)\n",
    "    \n",
    "    group_sizes = [10, 10, 10, 10, 10, 10]\n",
    "    n_features = sum(group_sizes)\n",
    "    \n",
    "    # Generate correlated features within groups\n",
    "    X = np.random.randn(n_samples, n_features)\n",
    "    feature_idx = 0\n",
    "    for g, size in enumerate(group_sizes):\n",
    "        group_correlation = 0.75\n",
    "        for i in range(1, size):\n",
    "            X[:, feature_idx + i] = (group_correlation * X[:, feature_idx] + \n",
    "                                     np.sqrt(1 - group_correlation**2) * X[:, feature_idx + i])\n",
    "        feature_idx += size\n",
    "    \n",
    "    # Create true coefficients with group structure\n",
    "    beta_true = np.zeros(n_features)\n",
    "    # Group 1: features 2 and 5\n",
    "    beta_true[2] = 1\n",
    "    beta_true[5] = 1\n",
    "    # Group 3: features 23, 26, and 29\n",
    "    beta_true[23] = 1\n",
    "    beta_true[26] = 1\n",
    "    beta_true[29] = 1\n",
    "    # Group 5: feature 45\n",
    "    beta_true[45] = 1\n",
    "    # Group 6: features 53, 54, 55\n",
    "    beta_true[53] = 1\n",
    "    beta_true[54] = 1\n",
    "    beta_true[55] = 1\n",
    "    \n",
    "    # Generate response\n",
    "    y = X @ beta_true + 0.25 * np.random.randn(n_samples)\n",
    "    \n",
    "    # Create group indices\n",
    "    group_indices = []\n",
    "    start = 0\n",
    "    for size in group_sizes:\n",
    "        group_indices.append((start, start + size))\n",
    "        start += size\n",
    "    \n",
    "    group_names = ['group 1', 'group 2', 'group 3', 'group 4', 'group 5', 'group 6']\n",
    "    \n",
    "    return X, y, beta_true, group_indices, group_names\n",
    "\n",
    "\n",
    "print(\"\\n\" + \"=\"*70)\n",
    "print(\"COMPARISON: STANDARD DAVIS-YIN vs HJ-PROX DAVIS-YIN\")\n",
    "print(\"=\"*70)\n",
    "\n",
    "# Generate data\n",
    "n = 300\n",
    "X, y, beta_true, group_indices, group_names = generate_sparse_group_data(n)\n",
    "\n",
    "print(f\"\\n✓ Dataset: {X.shape[0]} samples, {X.shape[1]} features in {len(group_indices)} groups\")\n",
    "print(f\"✓ True model has {np.sum(beta_true != 0)} non-zero coefficients\")\n",
    "\n",
    "# Set parameters\n",
    "lambda1 = 0.15\n",
    "lambda2 = 0.01\n",
    "L = norm(X.T @ X, 2)\n",
    "gamma = 1.0 / L\n",
    "\n",
    "print(f\"✓ Lipschitz constant L = {L:.2f}\")\n",
    "print(f\"✓ Step size γ = {gamma:.6f}\")\n",
    "print(f\"✓ Penalty parameters: λ1={lambda1}, λ2={lambda2}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# ============================================================================\n",
    "# CHUNK 3: RUN ALGORITHM 1 - Analytical Davis-Yin\n",
    "# ============================================================================\n",
    "\n",
    "print(\"\\n\" + \"=\"*70)\n",
    "print(\"Running Algorithm 1: Standard Davis-Yin (Exact Proximals)...\")\n",
    "print(\"=\"*70)\n",
    "\n",
    "start_time = time.time()\n",
    "\n",
    "beta_Analytical, obj_Analytical = davis_yin_standard(\n",
    "    X, y, group_indices, lambda1*n, lambda2*n, gamma=gamma*0.0072, max_iter=10000\n",
    ")\n",
    "\n",
    "elapsed_time = time.time() - start_time\n",
    "\n",
    "print(f\"\\n✓ DYS-Analytical completed\")\n",
    "print(f\"  - Runtime: {elapsed_time:.2f} seconds\")\n",
    "print(f\"  - Converged in {len(obj_Analytical)} iterations\")\n",
    "print(f\"  - Final objective: {obj_Analytical[-1]:.2f}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# ============================================================================\n",
    "# CHUNK 4: RUN ALGORITHM 2 - Davis-Yin with HJ-Prox\n",
    "# ============================================================================\n",
    "\n",
    "print(\"\\n\" + \"=\"*70)\n",
    "print(\"Running Algorithm 2: Davis-Yin with HJ-Prox...\")\n",
    "print(\"=\"*70)\n",
    "\n",
    "start_time = time.time()\n",
    "\n",
    "beta_HJ, obj_HJ = davis_yin_hjprox(\n",
    "    X, y, group_indices, lambda1*n, lambda2*n, gamma=gamma*0.0072, max_iter=10000,\n",
    "    int_samples_l1=10000, int_samples_group=10000,\n",
    "    delta_l1=.15, delta_group=.1, gamma_decay=1, gamma_min=gamma*0.005, verbose=True\n",
    ")\n",
    "\n",
    "elapsed_time = time.time() - start_time\n",
    "\n",
    "print(f\"\\n✓ DYS-HJ completed\")\n",
    "print(f\"  - Runtime: {elapsed_time:.2f} seconds\")\n",
    "print(f\"  - Converged in {len(obj_HJ)} iterations\")\n",
    "print(f\"  - Final objective: {obj_HJ[-1]:.6f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# ============================================================================\n",
    "# CHUNK 5: GENERATE FIGURES\n",
    "# ============================================================================\n",
    "\n",
    "print(\"\\n\" + \"=\"*70)\n",
    "print(\"Generating figures...\")\n",
    "print(\"=\"*70)\n",
    "\n",
    "# Get x positions\n",
    "x_pos = np.arange(len(beta_true))\n",
    "\n",
    "# --- Figure 1: Ground Truth ---\n",
    "plt.figure(figsize=(11, 10))\n",
    "plt.plot(x_pos, beta_true, 'o', markersize=10,\n",
    "         markerfacecolor='none', markeredgecolor='black',\n",
    "         markeredgewidth=3)\n",
    "# Add vertical separators\n",
    "for start, end in group_indices[:-1]:\n",
    "    plt.axvline(x=end-0.5, color='gray', linestyle='--', alpha=0.3)\n",
    "plt.ylabel('Coefficient Value', fontsize=40)\n",
    "plt.xlabel('Coefficients', fontsize=40)\n",
    "plt.title('Ground Truth', fontsize=40)\n",
    "plt.grid(True, alpha=0.3)\n",
    "plt.gca().tick_params(axis='both', which='major', labelsize=40)\n",
    "plt.tight_layout()\n",
    "plt.savefig('sparse_group_lasso_ground_truth.pdf', format='pdf', dpi=600, bbox_inches='tight')\n",
    "plt.show()\n",
    "plt.close()\n",
    "\n",
    "# --- Figure 2: HJ-DYS Solution ---\n",
    "plt.figure(figsize=(11, 10))\n",
    "plt.plot(x_pos, beta_HJ, 's', markersize=8, color='blue')\n",
    "# Add vertical separators\n",
    "for start, end in group_indices[:-1]:\n",
    "    plt.axvline(x=end-0.5, color='gray', linestyle='--', alpha=0.3)\n",
    "plt.ylabel('Coefficient Value', fontsize=40)\n",
    "plt.xlabel('Coefficient Index', fontsize=40)\n",
    "plt.title('DYS-HJ', fontsize=40)\n",
    "plt.grid(True, alpha=0.3)\n",
    "plt.gca().tick_params(axis='both', which='major', labelsize=40)\n",
    "plt.tight_layout()\n",
    "plt.savefig('sparse_group_lasso_dys_hj.pdf', format='pdf', dpi=600, bbox_inches='tight')\n",
    "plt.show()\n",
    "plt.close()\n",
    "\n",
    "# --- Figure 3: DYS Solution ---\n",
    "plt.figure(figsize=(11, 10))\n",
    "plt.plot(x_pos, beta_Analytical, '^', markersize=8, color='red')\n",
    "# Add vertical separators\n",
    "for start, end in group_indices[:-1]:\n",
    "    plt.axvline(x=end-0.5, color='gray', linestyle='--', alpha=0.3)\n",
    "plt.ylabel('Coefficient Value', fontsize=40)\n",
    "plt.xlabel('Coefficients Index', fontsize=40)\n",
    "plt.title('DYS', fontsize=40)\n",
    "plt.grid(True, alpha=0.3)\n",
    "plt.gca().tick_params(axis='both', which='major', labelsize=40)\n",
    "plt.tight_layout()\n",
    "plt.savefig('sparse_group_lasso_dys.pdf', format='pdf', dpi=600, bbox_inches='tight')\n",
    "plt.show()\n",
    "plt.close()\n",
    "\n",
    "# --- Figure 4: Objective Function Convergence ---\n",
    "plt.figure(figsize=(11, 10))\n",
    "plt.semilogy(obj_HJ, '-', linewidth=3,\n",
    "             label=f'DYS-HJ: {obj_HJ[-1]:.3f}')\n",
    "plt.semilogy(obj_Analytical, '--', linewidth=3,\n",
    "             label=f'DYS: {obj_Analytical[-1]:.3f}')\n",
    "plt.ylabel('Objective Value (log scale)', fontsize=40)\n",
    "plt.xlabel('Iteration', fontsize=40)\n",
    "plt.title('DYS Convergence', fontsize=40)\n",
    "plt.legend(fontsize=40, loc='upper left')\n",
    "plt.grid(True, alpha=0.3, which='both')\n",
    "tick_positions = np.arange(0, 10000 + 1, 2500)\n",
    "plt.gca().set_xticks(tick_positions)\n",
    "plt.gca().tick_params(axis='both', which='major', labelsize=40)\n",
    "plt.tight_layout()\n",
    "plt.savefig('sparse_group_lasso_objectives.pdf', format='pdf', dpi=600, bbox_inches='tight')\n",
    "plt.show()\n",
    "plt.close()\n",
    "\n",
    "print(\"✓ All figures saved: sparse_group_lasso_ground_truth.pdf, sparse_group_lasso_dys_hj.pdf, sparse_group_lasso_dys.pdf, sparse_group_lasso_objectives.pdf\")\n",
    "print(\"\\n\" + \"=\"*70)\n",
    "print(\"✓ ALL EXPERIMENTS COMPLETED SUCCESSFULLY\")\n",
    "print(\"=\"*70)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.10"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
