{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "JSHhjYhBlqiL"
   },
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "id": "Ab04eJTHlqBV"
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "np.random.seed(0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "dk5-CRlZEpsj"
   },
   "source": [
    "# Example 1 - Heavy tail\n",
    "\n",
    "In this example, we fix the source space $\\mathbb{R}^10$.\n",
    "We define $$ f_{\\mu}(x) \\propto \\frac{1}{(1 + \\|x\\|^2)^{\\frac{d+1}{2}}}\\ .$$\n",
    "The cost is $$c(x,y) = \\frac{1}{2}\\|x-y\\|^2\\ . $$\n",
    "\n",
    "$\\nu$ is generated randomly with $M = 46$, as well as $\\mathbf{g}^*$. The probability weight approximated by Monte-Carlo for this example gives $w_{\\min} = 0.00015$.\n",
    "\n",
    "We have $\\mu(B(0,1)) \\geq 1 - \\frac{1}{2}w_{\\min}$."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {
    "id": "QZoTLNtKOTB-"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.00364 0.05212 0.07001 0.03057 0.29715 0.18916 0.06237 0.0573  0.13799\n",
      " 0.09969]\n"
     ]
    }
   ],
   "source": [
    "def draw_heavy_tail(k, d):\n",
    "    \"\"\"\n",
    "    Samples `k` points from the probability distribution in `d` dimensions\n",
    "    with density proportional to 1 / (1 + ||x||^2)^(d+1)/2.\n",
    "    \"\"\"\n",
    "    def radial_density(r, d):\n",
    "        \"\"\"Radial density function proportional to r^(d-1) / (1 + r^2)^((d+1)/2).\"\"\"\n",
    "        return r**(d-1) / (1 + r**2)**((d+1)/2)\n",
    "\n",
    "    def sample_radius(d, size):\n",
    "        \"\"\"\n",
    "        Samples the radius `r` using rejection sampling in batches.\n",
    "        \"\"\"\n",
    "        scale = 1.0  # Exponential parameter\n",
    "        max_density = radial_density(0, d)  # Upper bound for rejection\n",
    "        if max_density == 0 or np.isnan(max_density):\n",
    "            max_density = 1  # Safeguard for edge cases\n",
    "\n",
    "        radii = []\n",
    "        batch_size = 10000\n",
    "        while len(radii) < size:\n",
    "            # Sample a batch of proposals\n",
    "            r_proposal = np.random.exponential(scale=scale, size=batch_size)\n",
    "            # Compute acceptance probabilities for the batch\n",
    "            acceptance_probs = radial_density(r_proposal, d) / max_density\n",
    "            # Perform rejection sampling\n",
    "            accepted = r_proposal[np.random.uniform(0, 1, size=batch_size) < acceptance_probs]\n",
    "            radii.extend(accepted)\n",
    "        return np.array(radii[:size])\n",
    "\n",
    "    def sample_unit_sphere(k, d):\n",
    "        \"\"\"\n",
    "        Samples `k` points uniformly from the unit sphere in `d` dimensions.\n",
    "        \"\"\"\n",
    "        z = np.random.normal(size=(k, d))  # Gaussian samples\n",
    "        return z / np.linalg.norm(z, axis=1, keepdims=True)  # Normalize to unit sphere\n",
    "\n",
    "    # Sample radii and directions\n",
    "    radii = sample_radius(d, k)\n",
    "    directions = sample_unit_sphere(k, d)\n",
    "\n",
    "    # Combine radii and directions to get final samples\n",
    "    samples = radii[:, np.newaxis] * directions\n",
    "    return samples\n",
    "\n",
    "def compute_weights(Y, g_opt, draw, K, d):\n",
    "    \"\"\"\n",
    "    Computes the weights using vectorized sampling and batch processing.\n",
    "    \"\"\"\n",
    "    weights = np.zeros(Y.shape[0])\n",
    "    batch_size = 10000\n",
    "\n",
    "    for _ in range(K // batch_size):\n",
    "        # Draw a batch of samples\n",
    "        x_batch = draw(batch_size, d)\n",
    "        # Compute distances and find the closest index for each sample\n",
    "        distances = 0.5 * np.linalg.norm(Y[None, :, :] - x_batch[:, None, :], axis=2)**2 - g_opt\n",
    "        j_batch = np.argmin(distances, axis=1)\n",
    "        # Update weights\n",
    "        for j in j_batch:\n",
    "            weights[j] += 1\n",
    "    return weights / K\n",
    "\n",
    "# Parameters\n",
    "J = 10\n",
    "d = 10\n",
    "K = 100000\n",
    "\n",
    "# Generate data\n",
    "Y = np.random.rand(J, d)\n",
    "g_opt = np.random.rand(J)\n",
    "\n",
    "# Compute initial weights\n",
    "weights = compute_weights(Y, g_opt, draw_heavy_tail, K, d)\n",
    "print(weights)\n",
    "\n",
    "# # Filter out low-weight points\n",
    "# threshold = 0.1 / J\n",
    "# mask = weights > threshold\n",
    "# Y, g_opt = Y[mask], g_opt[mask]\n",
    "\n",
    "# # Recompute weights after filtering\n",
    "# weights = compute_weights(Y, g_opt, draw_heavy_tail, K, d)\n",
    "\n",
    "generated_data = {'Y': Y, 'g_opt': g_opt, 'weights': weights}\n",
    "np.savez('generated_data_heavy_Tail.npz', Y=Y, g_opt=g_opt, weights=weights)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "WIrTAL4gMZel"
   },
   "source": [
    "# (1,1)-PW inequality but not Holder\n",
    "\n",
    "For this example, we use a measure satisfying a (1,1)-Poincaré-Wirtinger inequality while not having an $\\alpha$-Holder density.  We take  $$f_{\\mu}(x) =  \\frac{1}{2\\sqrt{x}}\\mathbf{1}_{x \\in [0,1]}\\ . $$\n",
    "\n",
    "We sample $M=1000$ points $(y_1, ..., y_M)$ from $\\mathcal{U}(0,1)$ and generated $\\mathbf{g}^*$ randomly."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "2CRkCyu5OyRF"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.     0.     0.     0.1756 0.     0.     0.8244 0.     0.     0.    ]\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "def draw_PW(beta, size=1):\n",
    "    \"\"\"\n",
    "    Samples from the distribution rho(x) = (1-beta)x^(-beta) on [0,1].\n",
    "    \n",
    "    Parameters:\n",
    "    - beta: Parameter in (0, 1).\n",
    "    - size: Number of samples.\n",
    "    \n",
    "    Returns:\n",
    "    - samples: Array of samples.\n",
    "    \"\"\"\n",
    "    u = np.random.uniform(0, 1, size)  # Step 1: Uniform samples\n",
    "    samples = u ** (1 / (1 - beta))    # Step 2: Transform using inverse CDF\n",
    "    return samples\n",
    "\n",
    "def draw_heavy_tail(k, d):\n",
    "    \"\"\"\n",
    "    Samples `k` points from the probability distribution in `d` dimensions\n",
    "    with density proportional to 1 / (1 + ||x||^2)^(d+1)/2.\n",
    "    \"\"\"\n",
    "    def radial_density(r, d):\n",
    "        \"\"\"Radial density function proportional to r^(d-1) / (1 + r^2)^((d+1)/2).\"\"\"\n",
    "        return r**(d-1) / (1 + r**2)**((d+1)/2)\n",
    "\n",
    "    def sample_radius(d, size):\n",
    "        \"\"\"\n",
    "        Samples the radius `r` using rejection sampling in batches.\n",
    "        \"\"\"\n",
    "        scale = 1.0\n",
    "        max_density = radial_density(0, d)\n",
    "        if max_density == 0 or np.isnan(max_density):\n",
    "            max_density = 1.0\n",
    "\n",
    "        radii = []\n",
    "        batch_size = 10000\n",
    "        while len(radii) < size:\n",
    "            r_proposal = np.random.exponential(scale=scale, size=batch_size)\n",
    "            acceptance_probs = radial_density(r_proposal, d) / max_density\n",
    "            accepted = r_proposal[np.random.uniform(0, 1, size=batch_size) < acceptance_probs]\n",
    "            radii.extend(accepted)\n",
    "        return np.array(radii[:size])\n",
    "\n",
    "    def sample_unit_sphere(k, d):\n",
    "        z = np.random.normal(size=(k, d))\n",
    "        return z / np.linalg.norm(z, axis=1, keepdims=True)\n",
    "\n",
    "    radii = sample_radius(d, k)\n",
    "    directions = sample_unit_sphere(k, d)\n",
    "    return radii[:, np.newaxis] * directions\n",
    "\n",
    "# Parameters\n",
    "J = 10\n",
    "# d = 50\n",
    "beta = 0.5\n",
    "K  = 100000\n",
    "\n",
    "# Generate initial data\n",
    "Y = np.random.rand(J, d)\n",
    "g_opt = np.random.rand(J)\n",
    "\n",
    "# Compute initial weights\n",
    "weights = np.zeros(J)\n",
    "batch_size = 10000\n",
    "\n",
    "for _ in range(K // batch_size):\n",
    "    x_batch = draw_PW(beta, size=batch_size)[:, np.newaxis]  # Reshape for compatibility\n",
    "    distances = 0.5 * np.linalg.norm(Y[None, :, :] - x_batch[:, None, :], axis=2)**2 - g_opt\n",
    "    j_batch = np.argmin(distances, axis=1)\n",
    "    for j in j_batch:\n",
    "        weights[j] += 1\n",
    "\n",
    "weights /= K\n",
    "print(weights)\n",
    "\n",
    "# generated_data = {'Y': Y, 'g_opt': g_opt, 'weights': weights}\n",
    "# np.savez('generated_data_Poincare.npz', Y=Y, g_opt=g_opt, weights=weights)\n",
    "\n",
    "# print(\"Filtered J:\", J)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.54914153, 0.69500241])"
      ]
     },
     "execution_count": 126,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "g_opt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.1023647, 0.0903239, 0.0224177, 0.5202587, 0.0293722, 0.0008516,\n",
       "       0.0827107, 0.0713543, 0.0255179, 0.0548283])"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "weights"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Different costs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.01687 0.04591 0.0488  0.23873 0.01679 0.01084 0.58559 0.02164 0.0078\n",
      " 0.00703]\n"
     ]
    }
   ],
   "source": [
    "\n",
    "\n",
    "# Parameters\n",
    "J = 10\n",
    "d = 50\n",
    "K  = 100000\n",
    "\n",
    "# Generate initial data\n",
    "Y = np.random.rand(J, d)\n",
    "g_opt = np.random.rand(J)\n",
    "\n",
    "# Compute initial weights\n",
    "weights = np.zeros(J)\n",
    "batch_size = 10000\n",
    "\n",
    "for _ in range(K // batch_size):\n",
    "    x_batch = np.random.rand(batch_size, d)# Reshape for compatibility\n",
    "    distances = np.linalg.norm(Y[None, :, :] - x_batch[:, None, :], axis=2)**(1.5) - g_opt\n",
    "    j_batch = np.argmin(distances, axis=1)\n",
    "    for j in j_batch:\n",
    "        weights[j] += 1\n",
    "\n",
    "weights /= K\n",
    "print(weights)\n",
    "\n",
    "# Save data\n",
    "generated_data = {'Y': Y, 'g_opt': g_opt, 'weights': weights}\n",
    "np.savez('generated_data_power1_5.npz', Y=Y, g_opt=g_opt, weights=weights)\n",
    "\n",
    "# print(\"Filtered J:\", J)"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "drpasgd",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
