{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "collapsed_sections": [
        "Wt01ptwqQB0R",
        "wY9A8xgh6ET4",
        "TJfFp6iRrqAE",
        "vJyfXMeYZcuC",
        "TI2UCRpAr3he",
        "1TLj2ZQahQmx"
      ]
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# Generating Data"
      ],
      "metadata": {
        "id": "h97tJGNx4CAK"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import numpy as np"
      ],
      "metadata": {
        "id": "gkRDaU-bPIJS"
      },
      "execution_count": 1,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def generate_offline_data(N, d1, d2,\n",
        "                          alpha_prime, beta_prime,\n",
        "                          x_min, x_max,\n",
        "                          y_min, y_max,\n",
        "                          price_bounds,\n",
        "                          sigma,\n",
        "                          seed=None):\n",
        "\n",
        "    if seed is not None:\n",
        "        np.random.seed(seed)\n",
        "    l, u = price_bounds\n",
        "    x_hat = np.random.uniform(x_min, x_max, size=(N, d1))\n",
        "    y_hat = np.random.uniform(y_min, y_max, size=(N, d2))\n",
        "    p_hat = np.random.uniform(l, u, size=N)\n",
        "    eps_hat = np.random.normal(0, sigma, size=N)\n",
        "    D_hat = x_hat.dot(alpha_prime) + (y_hat.dot(beta_prime)) * p_hat + eps_hat\n",
        "    Phi_off = np.hstack([x_hat, (y_hat * p_hat[:, None])])\n",
        "    Sigma_off = Phi_off.T @ Phi_off\n",
        "    min_eig = np.linalg.eigvalsh(Sigma_off).min()\n",
        "    max_eig = np.linalg.eigvalsh(Sigma_off).max()\n",
        "    b_off = Phi_off.T @ D_hat\n",
        "    return x_hat, y_hat, p_hat, D_hat, min_eig,max_eig, Sigma_off, b_off\n",
        "\n",
        "\n",
        "def generate_online_features_and_noise(T, d1, d2,\n",
        "                                        x_min, x_max, y_max, y_min,\n",
        "                                        sigma,\n",
        "                                        seed=None):\n",
        "\n",
        "    if seed is not None:\n",
        "        np.random.seed(seed)\n",
        "    x = np.random.uniform(x_min, x_max, size=(T, d1))\n",
        "    y = np.random.uniform(y_min, y_max, size=(T, d2))\n",
        "    eps = np.random.normal(0, sigma, size=T)\n",
        "    return x, y, eps"
      ],
      "metadata": {
        "id": "NDAmbswA4JDm"
      },
      "execution_count": 2,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Scalar Price Elasticity"
      ],
      "metadata": {
        "id": "DpZswvOU4vbV"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Preliminary"
      ],
      "metadata": {
        "id": "Wt01ptwqQB0R"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "d1, d2 = 5, 1\n",
        "T = 1000\n",
        "n_runs = 20\n",
        "x_min, x_max, y_min, y_max = 1, 2, 1, 2\n",
        "alpha_min, alpha_max, beta_min, beta_max = 0.5, 1, -2, -1\n",
        "sigma = 0.1\n",
        "alpha_on = np.random.uniform(alpha_min, alpha_max, size=d1)\n",
        "beta_on = np.random.uniform(beta_min, beta_max, size=d2)\n",
        "\n",
        "\n",
        "price_bounds = (d1*alpha_min*x_min/(-2*d2*beta_min*y_max), d1*alpha_max*x_max/(-2*d2*beta_max*y_min))\n",
        "l, u = price_bounds\n",
        "\n",
        "\n",
        "lam = 1\n",
        "delta = 1/T**2\n",
        "R = 0.1\n",
        "\n",
        "# Θ† box bounds\n",
        "\n",
        "theta_box = [(alpha_min, alpha_max)] * d1 + [(beta_min, beta_max)] * d2\n",
        "\n",
        "# Compute S and L\n",
        "coord_max = [max(abs(b), abs(a)) for (a, b) in theta_box]\n",
        "S = np.sqrt(sum(m**2 for m in coord_max))\n",
        "L = np.sqrt(x_max**2 + (y_max * u)**2)\n",
        "d = d1 + d2\n",
        "\n",
        "# Generate data\n",
        "x_list = []\n",
        "y_list = []\n",
        "eps_list = []\n",
        "for i in range(n_runs):\n",
        "  x, y, eps = generate_online_features_and_noise(T, d1, d2, x_min, x_max, y_min, y_max, sigma)\n",
        "  x_list.append(x)\n",
        "  y_list.append(y)\n",
        "  eps_list.append(eps)\n",
        "w = [R * np.sqrt(d * np.log(1 + t * L**2 / d*lam) + 2 * np.log(1/delta)) + np.sqrt(lam) * S for t in range(T)]"
      ],
      "metadata": {
        "id": "yLWbkcLqQEee"
      },
      "execution_count": 3,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "N= 35000\n",
        "alpha_off = alpha_on-2*T**(-5/16)\n",
        "beta_off = beta_on-2*T**(-5/16)\n",
        "bias_bound1 = 1.1*np.sqrt(np.linalg.norm(alpha_on - alpha_off)**2 + np.linalg.norm(beta_on - beta_off)**2)\n",
        "bias_bound2 = 10*np.sqrt(np.linalg.norm(alpha_on - alpha_off)**2 + np.linalg.norm(beta_on - beta_off)**2)\n",
        "\n",
        "\n",
        "x_hat, y_hat, p_hat, D_hat,min_eig,max_eig, V_off,b_off = generate_offline_data(\n",
        "        N, d1, d2, alpha_off, beta_off, x_min, x_max, y_min, y_max, price_bounds, sigma, seed=123)\n",
        "w_hat1 = [lam*S/(lam+min_eig) + bias_bound1+np.sqrt(2*np.log(1/delta) + d * np.log(1 + t * L**2 / d*lam))/np.sqrt(lam+min_eig)+R*(np.sqrt(d)+np.sqrt(2*np.log(1/delta)))/np.sqrt(lam+min_eig) for t in range(T)]\n",
        "w_hat2 = [lam*S/(lam+min_eig) + bias_bound2+np.sqrt(2*np.log(1/delta) + d * np.log(1 + t * L**2 / d*lam))/np.sqrt(lam+min_eig)+R*(np.sqrt(d)+np.sqrt(2*np.log(1/delta)))/np.sqrt(lam+min_eig) for t in range(T)]"
      ],
      "metadata": {
        "id": "pbCA_s-SQdIS"
      },
      "execution_count": 4,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "w1 = [R * np.sqrt(d * np.log(1 + (N+t) * L**2 / d*lam) + 2 * np.log(1/delta)) + np.sqrt(lam) * S for t in range(T)]"
      ],
      "metadata": {
        "id": "nQ2u8bmBrc6h"
      },
      "execution_count": 5,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "w_1 = [lam*S/np.sqrt(lam+min_eig) + bias_bound1*max_eig/np.sqrt(lam+max_eig)+np.sqrt(2*np.log(1/delta) + d * np.log(1 + t * L**2 / d*lam))+R*(np.sqrt(d)+np.sqrt(2*np.log(1/delta))) for t in range(T)]\n",
        "w_2 = [lam*S/np.sqrt(lam+min_eig) + bias_bound2*max_eig/np.sqrt(lam+max_eig)+np.sqrt(2*np.log(1/delta) + d * np.log(1 + t * L**2 / d*lam))+R*(np.sqrt(d)+np.sqrt(2*np.log(1/delta))) for t in range(T)]"
      ],
      "metadata": {
        "id": "PqETKPb2zA7h"
      },
      "execution_count": 6,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## UCB"
      ],
      "metadata": {
        "id": "wY9A8xgh6ET4"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from scipy.optimize import minimize\n",
        "import matplotlib.pyplot as plt"
      ],
      "metadata": {
        "id": "qqfdzbsV6HcW"
      },
      "execution_count": 7,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def select_p_and_theta(x, y, theta_hat, V, w, l, u, theta_box_bounds):\n",
        "    \"\"\"\n",
        "    Solve jointly:\n",
        "      max_{p ∈ [l,u], θ ∈ C ∩ Θ†} p * (θ^T [x; y*p])\n",
        "      C = {θ: (θ-θ_hat)^T V (θ-θ_hat) ≤ w^2}\n",
        "      Θ† by box bounds\n",
        "    Returns (p_opt, θ_opt)\n",
        "    \"\"\"\n",
        "    d = theta_hat.shape[0]\n",
        "    # objective (negative for minimizer)\n",
        "    def obj(z):\n",
        "        p = z[0]\n",
        "        theta = z[1:]\n",
        "        phi = np.concatenate([x, y * p])\n",
        "        return -p * (theta.dot(phi))\n",
        "    # ellipsoid constraint\n",
        "    def ellipsoid(z):\n",
        "        theta = z[1:]\n",
        "        diff = theta - theta_hat\n",
        "        return w**2 - diff.dot(V.dot(diff))\n",
        "    cons = ({'type': 'ineq', 'fun': ellipsoid},)\n",
        "    # bounds for p and θ\n",
        "    bounds = [(l, u)] + theta_box_bounds\n",
        "    # initial guess\n",
        "    z0 = np.concatenate([[(l+u)/2], theta_hat])\n",
        "    sol = minimize(\n",
        "        obj, z0,\n",
        "        bounds=bounds,\n",
        "        constraints=cons,\n",
        "        method='SLSQP',\n",
        "        options={'ftol':1e-6, 'maxiter':100}\n",
        "    )\n",
        "    if not sol.success:\n",
        "        return l, theta_hat\n",
        "    p_opt = float(sol.x[0])\n",
        "    theta_opt = sol.x[1:]\n",
        "    return p_opt, theta_opt"
      ],
      "metadata": {
        "id": "lvWkvtBaPAyl"
      },
      "execution_count": 9,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "cum_regret_UCB = np.zeros((n_runs, T))\n",
        "# Simulation\n",
        "for i in range(n_runs):\n",
        "    x, y, eps = x_list[i], y_list[i], eps_list[i]\n",
        "    # Initialize\n",
        "    V = lam * np.eye(d)\n",
        "    b = np.zeros(d)\n",
        "    theta_hat = np.zeros(d)\n",
        "\n",
        "\n",
        "    # Storage\n",
        "    prices = np.zeros(T)\n",
        "    demands = np.zeros(T)\n",
        "    regrets = np.zeros(T)\n",
        "    for t in range(1, T+1):\n",
        "        xt, yt, et = x[t-1], y[t-1], eps[t-1]\n",
        "        w_t = w[t-1]\n",
        "\n",
        "        p_t, _ = select_p_and_theta(xt, yt, theta_hat, V, w_t, l, u, theta_box)\n",
        "        # True demand and revenue\n",
        "        demand = xt.dot(alpha_on) + (yt.dot(beta_on)) * p_t + et\n",
        "        demand_true = xt.dot(alpha_on) + (yt.dot(beta_on)) * p_t\n",
        "        revenue = p_t * demand\n",
        "        revenue_true = p_t * demand_true\n",
        "\n",
        "        # Oracle optimal price & revenue\n",
        "        p_star = np.clip(-xt.dot(alpha_on) / (2 * (yt.dot(beta_on))), l, u)\n",
        "        revenue_star = p_star * (xt.dot(alpha_on) + (yt.dot(beta_on)) * p_star)\n",
        "\n",
        "        regrets[t-1] = revenue_star - revenue_true\n",
        "        prices[t-1] = p_t\n",
        "        demands[t-1] = demand\n",
        "\n",
        "        # Update\n",
        "        phi_t = np.concatenate([xt, yt * p_t])\n",
        "        V += np.outer(phi_t, phi_t)\n",
        "        b += phi_t * demand\n",
        "        theta_hat = np.linalg.solve(V, b)\n",
        "\n",
        "    cum_regret_UCB[i] = np.cumsum(regrets)"
      ],
      "metadata": {
        "id": "ekd_1PDZPMvM"
      },
      "execution_count": 10,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## UCB-Offline"
      ],
      "metadata": {
        "id": "TJfFp6iRrqAE"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "cum_regret_UCB_offline = np.zeros((n_runs, T))\n",
        "# Simulation\n",
        "for i in range(n_runs):\n",
        "    x, y, eps = x_list[i], y_list[i], eps_list[i]\n",
        "\n",
        "    # Initialize\n",
        "    V = lam * np.eye(d) + V_off\n",
        "    b = np.zeros(d)+b_off\n",
        "    theta_hat = np.linalg.solve(V, b)\n",
        "\n",
        "\n",
        "    # Storage\n",
        "    prices = np.zeros(T)\n",
        "    demands = np.zeros(T)\n",
        "    regrets = np.zeros(T)\n",
        "\n",
        "    # Simulation\n",
        "    for t in range(1, T+1):\n",
        "        xt, yt, et = x[t-1], y[t-1], eps[t-1]\n",
        "        w_t = w1[t-1]\n",
        "\n",
        "        p_t, _ = select_p_and_theta(xt, yt, theta_hat, V, w_t, l, u, theta_box)\n",
        "        # True demand and revenue\n",
        "        demand = xt.dot(alpha_on) + (yt.dot(beta_on)) * p_t + et\n",
        "        demand_true = xt.dot(alpha_on) + (yt.dot(beta_on)) * p_t\n",
        "        revenue = p_t * demand\n",
        "        revenue_true = p_t * demand_true\n",
        "\n",
        "        # Oracle optimal price & revenue\n",
        "        p_star = np.clip(-xt.dot(alpha_on) / (2 * (yt.dot(beta_on))), l, u)\n",
        "        revenue_star = p_star * (xt.dot(alpha_on) + (yt.dot(beta_on)) * p_star)\n",
        "\n",
        "        regrets[t-1] = revenue_star - revenue_true\n",
        "        prices[t-1] = p_t\n",
        "        demands[t-1] = demand\n",
        "\n",
        "        # Update\n",
        "        phi_t = np.concatenate([xt, yt * p_t])\n",
        "        V += np.outer(phi_t, phi_t)\n",
        "        b += phi_t * demand\n",
        "        theta_hat = np.linalg.solve(V, b)\n",
        "\n",
        "    cum_regret_UCB_offline[i] = np.cumsum(regrets)"
      ],
      "metadata": {
        "id": "xWADgJCYrsx0"
      },
      "execution_count": 11,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Thompson Sampling"
      ],
      "metadata": {
        "id": "vJyfXMeYZcuC"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "def run_thompson_sampling_online(x, y, eps,\n",
        "                                 alpha_true, beta_true,\n",
        "                                 lam, R,\n",
        "                                 price_bounds,\n",
        "                                 delta=None):\n",
        "    \"\"\"\n",
        "    Run pure online Thompson Sampling for pricing.\n",
        "    Inputs:\n",
        "      x, y, eps   : online data\n",
        "      alpha_true, beta_true : true parameters\n",
        "      lam         : prior precision (ridge)\n",
        "      R           : noise std dev\n",
        "      price_bounds: (l, u)\n",
        "    Returns:\n",
        "      prices      : array of chosen prices\n",
        "      cum_regret  : cumulative regret over time\n",
        "    \"\"\"\n",
        "    T, d1 = x.shape[0], x.shape[1]\n",
        "    d2 = y.shape[1]\n",
        "    d = d1 + d2\n",
        "    l, u = price_bounds\n",
        "\n",
        "    # Prior precision V and sufficient-stat b\n",
        "    V = lam * np.eye(d)\n",
        "    b = np.zeros(d)\n",
        "\n",
        "    prices = np.zeros(T)\n",
        "    regrets = np.zeros(T)\n",
        "\n",
        "    for t in range(T):\n",
        "        # Posterior mean & covariance\n",
        "        theta_mean = np.linalg.solve(V, b)\n",
        "        Sigma =  np.linalg.inv(V)\n",
        "\n",
        "        # Thompson sample\n",
        "        theta_tilde = np.random.multivariate_normal(theta_mean, Sigma)\n",
        "        alpha_tilde = theta_tilde[:d1]\n",
        "        beta_tilde  = theta_tilde[d1:]\n",
        "\n",
        "        # Select price (closed-form argmax)\n",
        "        num = - alpha_tilde.dot(x[t])\n",
        "        den =  2 * beta_tilde.dot(y[t])\n",
        "        p_t = np.clip(num/den, l, u)\n",
        "\n",
        "        # Observe demand and compute regret\n",
        "        demand = alpha_true.dot(x[t]) + (beta_true.dot(y[t])) * p_t + eps[t]\n",
        "        demand_true = alpha_true.dot(x[t]) + (beta_true.dot(y[t])) * p_t\n",
        "        revenue = p_t * demand\n",
        "        revenue_true = p_t * demand_true\n",
        "\n",
        "        # Oracle optimal price & revenue\n",
        "        # Oracle\n",
        "        p_star = np.clip(- alpha_true.dot(x[t]) / (2 * beta_true.dot(y[t])), l, u)\n",
        "        rev_star = p_star * (alpha_true.dot(x[t]) + (beta_true.dot(y[t])) * p_star)\n",
        "        regrets[t] = rev_star - revenue_true\n",
        "        prices[t] = p_t\n",
        "\n",
        "        # Update posterior\n",
        "        phi_t = np.concatenate([x[t], y[t] * p_t])\n",
        "        V += np.outer(phi_t, phi_t)\n",
        "        b += phi_t * demand\n",
        "\n",
        "    cum_regret = np.cumsum(regrets)\n",
        "    return prices, cum_regret"
      ],
      "metadata": {
        "id": "EsJMdh1DZWTw"
      },
      "execution_count": 12,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "cum_regret_TS = np.zeros((n_runs, T))\n",
        "# Simulation\n",
        "for i in range(n_runs):\n",
        "    x, y, eps = x_list[i], y_list[i], eps_list[i]\n",
        "\n",
        "    prices, cum_regret = run_thompson_sampling_online(\n",
        "    x, y, eps,\n",
        "    alpha_on, beta_on,\n",
        "    lam, R,\n",
        "    price_bounds)\n",
        "    cum_regret_TS[i] = cum_regret"
      ],
      "metadata": {
        "id": "LoZumXkGZa7B"
      },
      "execution_count": 13,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Thompson Sampling-Offline"
      ],
      "metadata": {
        "id": "TI2UCRpAr3he"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "def initialize_posterior(x_hat, y_hat, p_hat, D_hat, d1, d2, lam):\n",
        "    \"\"\"\n",
        "    Build initial posterior precision matrix V and sufficient-stat b\n",
        "    from offline data.\n",
        "    \"\"\"\n",
        "    N = x_hat.shape[0]\n",
        "    d = d1 + d2\n",
        "\n",
        "    # design matrix for offline:\n",
        "    Phi_off = np.zeros((N, d))\n",
        "    for i in range(N):\n",
        "        Phi_off[i, :d1]   = x_hat[i]\n",
        "        Phi_off[i, d1:]   = y_hat[i] * p_hat[i]\n",
        "\n",
        "    # ridge‐precision + offline Gram\n",
        "    V = lam * np.eye(d) + Phi_off.T @ Phi_off\n",
        "    b = Phi_off.T @ D_hat\n",
        "    return V, b\n",
        "\n",
        "def get_posterior(V, b, R):\n",
        "    \"\"\"\n",
        "    Given precision V and sufficient-stat b, return posterior mean\n",
        "    and covariance Sigma for θ ~ N(mean, Sigma).\n",
        "    \"\"\"\n",
        "    theta_mean = np.linalg.solve(V, b)\n",
        "    Sigma      = np.linalg.inv(V)\n",
        "    return theta_mean, Sigma\n",
        "\n",
        "def run_thompson_sampling(x_hat, y_hat, p_hat, D_hat,\n",
        "                          x, y, eps,\n",
        "                          alpha_true, beta_true,\n",
        "                          d1, d2,\n",
        "                          lam, R,\n",
        "                          l, u):\n",
        "    \"\"\"\n",
        "    Runs Thompson‐Sampling pricing:\n",
        "      - offline init from (x_hat, y_hat, p_hat, D_hat)\n",
        "      - online rounds over (x, y, eps)\n",
        "    Returns:\n",
        "      prices:    array of length T\n",
        "      cum_regret: cumulative regret array length T\n",
        "    \"\"\"\n",
        "    # offline initialization\n",
        "    V, b = initialize_posterior(x_hat, y_hat, p_hat, D_hat, d1, d2, lam)\n",
        "    theta_mean, Sigma = get_posterior(V, b, R)\n",
        "\n",
        "\n",
        "    T = x.shape[0]\n",
        "    prices = np.zeros(T)\n",
        "    regrets = np.zeros(T)\n",
        "\n",
        "    for t in range(T):\n",
        "        # 1) sample θ̃ ~ N(theta_mean, Sigma)\n",
        "        theta_tilde = np.random.multivariate_normal(theta_mean, Sigma)\n",
        "        alpha_tilde = theta_tilde[:d1]\n",
        "        beta_tilde  = theta_tilde[d1:]\n",
        "\n",
        "\n",
        "        # 2) choose price p_t = argmax p·(α̃ᵀx_t + β̃ᵀy_t p)\n",
        "        #    oracle closed-form: - (α̃·x) / [2 (β̃·y)], clipped to [l,u]\n",
        "\n",
        "        num = - alpha_tilde.dot(x[t])\n",
        "        den =  2 * beta_tilde.dot(y[t])\n",
        "        p_t = np.clip(num/den, l, u)\n",
        "\n",
        "        # 3) observe demand and compute regret\n",
        "        demand_t = alpha_true.dot(x[t]) + (beta_true.dot(y[t])) * p_t + eps[t]\n",
        "        demand_true = alpha_true.dot(x[t]) + (beta_true.dot(y[t])) * p_t\n",
        "        rev      = p_t * demand_t\n",
        "        rev_true = p_t * demand_true\n",
        "        # oracle revenue\n",
        "        num_star = - alpha_true.dot(x[t])\n",
        "        den_star =  2 * beta_true.dot(y[t])\n",
        "        p_star   = np.clip(num_star/den_star, l, u)\n",
        "        rev_star = p_star * (alpha_true.dot(x[t]) + (beta_true.dot(y[t])) * p_star)\n",
        "        regrets[t] = rev_star - rev_true\n",
        "\n",
        "        prices[t] = p_t\n",
        "\n",
        "        # 4) update posterior stats\n",
        "        phi_t = np.concatenate([x[t], y[t] * p_t])\n",
        "        V    += np.outer(phi_t, phi_t)\n",
        "        b    += phi_t * demand_t\n",
        "        theta_mean, Sigma = get_posterior(V, b, R)\n",
        "\n",
        "    cum_regret = np.cumsum(regrets)\n",
        "    return prices, cum_regret"
      ],
      "metadata": {
        "id": "J0pWRcGnr4o1"
      },
      "execution_count": 14,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "cum_regret_TS_offline = np.zeros((n_runs, T))\n",
        "# Simulation\n",
        "for i in range(n_runs):\n",
        "    x, y, eps = x_list[i], y_list[i], eps_list[i]\n",
        "\n",
        "    prices, cum_regret = run_thompson_sampling(\n",
        "        x_hat, y_hat, p_hat, D_hat,\n",
        "        x, y, eps,\n",
        "        alpha_on, beta_on,\n",
        "        d1, d2,\n",
        "        lam, R,\n",
        "        l, u\n",
        "    )\n",
        "    cum_regret_TS_offline[i] = cum_regret"
      ],
      "metadata": {
        "id": "iVh5ZLZ5GfQX"
      },
      "execution_count": 15,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## CO3"
      ],
      "metadata": {
        "id": "1TLj2ZQahQmx"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "def select_price(x, y, theta_hat, theta_hatoff, V,V_, w, w_off,w_, l, u, theta_box_bounds):\n",
        "\n",
        "    # joint optimization over p,theta\n",
        "    def obj(z):\n",
        "        p = z[0]\n",
        "        theta = z[1:]\n",
        "        phi = np.concatenate([x, y * p])\n",
        "        return -p * (theta.dot(phi))\n",
        "    def cons_off(z):\n",
        "        theta = z[1:]\n",
        "        return w_off**2 - np.linalg.norm(theta - theta_hatoff)**2\n",
        "    def cons_on(z):\n",
        "        theta = z[1:]\n",
        "        diff = theta - theta_hat\n",
        "        return w**2 - diff.dot(V.dot(diff))\n",
        "    def cons_(z):\n",
        "        theta = z[1:]\n",
        "        diff = theta - theta_hatoff\n",
        "        return w_**2 - diff.dot(V_.dot(diff))\n",
        "    cons = ({'type':'ineq','fun':cons_off}, {'type':'ineq','fun':cons_on}, {'type':'ineq','fun':cons_})\n",
        "    bounds = [(l, u)] + theta_box_bounds\n",
        "    z0 = np.concatenate([[(l+u)/2], theta_hat])\n",
        "    sol = minimize(obj, z0, bounds=bounds, constraints=cons,\n",
        "                    method='SLSQP', options={'ftol':1e-6,'maxiter':1000})\n",
        "    if not sol.success:\n",
        "        print('yes')\n",
        "        return l, theta_hat\n",
        "    p_opt = float(sol.x[0]); theta_tilde = sol.x[1:]\n",
        "    return p_opt, theta_tilde"
      ],
      "metadata": {
        "id": "t5vnQZNRGdFd"
      },
      "execution_count": 16,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "cum_regret_CO31 = np.zeros((n_runs, T))\n",
        "# Simulation\n",
        "for i in range(n_runs):\n",
        "    x, y, eps = x_list[i], y_list[i], eps_list[i]\n",
        "\n",
        "\n",
        "    # Initialize\n",
        "    V = lam * np.eye(d)\n",
        "    b = np.zeros(d)\n",
        "    theta_hat = np.zeros(d)\n",
        "    V_off = lam * np.eye(d) + V_off\n",
        "    b_off = b_off\n",
        "    theta_hatoff = np.linalg.solve(V_off, b_off)\n",
        "\n",
        "\n",
        "    # Storage\n",
        "    prices = np.zeros(T)\n",
        "    demands = np.zeros(T)\n",
        "    regrets = np.zeros(T)\n",
        "\n",
        "    # Simulation\n",
        "    for t in range(1, T+1):\n",
        "        xt, yt, et = x[t-1], y[t-1], eps[t-1]\n",
        "        wt = w[t-1]\n",
        "        wt_off= w_hat1[t-1]\n",
        "        wt_off1= w_1[t-1]\n",
        "\n",
        "        p_t, _ = select_price(xt, yt, theta_hat, theta_hatoff, V,V_off, wt, wt_off,wt_off1, l, u, theta_box)\n",
        "        # True demand and revenue\n",
        "        demand = xt.dot(alpha_on) + (yt.dot(beta_on)) * p_t + et\n",
        "        demand_true = xt.dot(alpha_on) + (yt.dot(beta_on)) * p_t\n",
        "        revenue = p_t * demand\n",
        "        revenue_true = p_t * demand_true\n",
        "\n",
        "        # Oracle optimal price & revenue\n",
        "        p_star = np.clip(-xt.dot(alpha_on) / (2 * (yt.dot(beta_on))), l, u)\n",
        "        revenue_star = p_star * (xt.dot(alpha_on) + (yt.dot(beta_on)) * p_star)\n",
        "\n",
        "        regrets[t-1] = revenue_star - revenue_true\n",
        "        prices[t-1] = p_t\n",
        "        demands[t-1] = demand\n",
        "\n",
        "        # Update\n",
        "        phi_t = np.concatenate([xt, yt * p_t])\n",
        "        V += np.outer(phi_t, phi_t)\n",
        "        V_off += np.outer(phi_t, phi_t)\n",
        "        b += phi_t * demand\n",
        "        b_off += phi_t * demand\n",
        "        theta_hat = np.linalg.solve(V, b)\n",
        "        theta_hatoff = np.linalg.solve(V_off, b_off)\n",
        "\n",
        "    cum_regret_CO31[i] = np.cumsum(regrets)"
      ],
      "metadata": {
        "id": "FDlwyQXBhVRC"
      },
      "execution_count": 17,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "cum_regret_CO32 = np.zeros((n_runs, T))\n",
        "# Simulation\n",
        "for i in range(n_runs):\n",
        "    x, y, eps = x_list[i], y_list[i], eps_list[i]\n",
        "    # Initialize\n",
        "    V = lam * np.eye(d)\n",
        "    b = np.zeros(d)\n",
        "    theta_hat = np.zeros(d)\n",
        "    V_off = lam * np.eye(d) + V_off\n",
        "    b_off = b_off\n",
        "    theta_hatoff = np.linalg.solve(V_off, b_off)\n",
        "\n",
        "\n",
        "    # Storage\n",
        "    prices = np.zeros(T)\n",
        "    demands = np.zeros(T)\n",
        "    regrets = np.zeros(T)\n",
        "\n",
        "    # Simulation\n",
        "    for t in range(1, T+1):\n",
        "        xt, yt, et = x[t-1], y[t-1], eps[t-1]\n",
        "        wt = w[t-1]\n",
        "        wt_off= w_hat2[t-1]\n",
        "        wt_off1= w_2[t-1]\n",
        "\n",
        "        p_t, _ = select_price(xt, yt, theta_hat, theta_hatoff, V,V_off, wt, wt_off,wt_off1, l, u, theta_box)\n",
        "        # True demand and revenue\n",
        "        demand = xt.dot(alpha_on) + (yt.dot(beta_on)) * p_t + et\n",
        "        demand_true = xt.dot(alpha_on) + (yt.dot(beta_on)) * p_t\n",
        "        revenue = p_t * demand\n",
        "        revenue_true = p_t * demand_true\n",
        "\n",
        "        # Oracle optimal price & revenue\n",
        "        p_star = np.clip(-xt.dot(alpha_on) / (2 * (yt.dot(beta_on))), l, u)\n",
        "        revenue_star = p_star * (xt.dot(alpha_on) + (yt.dot(beta_on)) * p_star)\n",
        "\n",
        "        regrets[t-1] = revenue_star - revenue_true\n",
        "        prices[t-1] = p_t\n",
        "        demands[t-1] = demand\n",
        "\n",
        "        # Update\n",
        "        phi_t = np.concatenate([xt, yt * p_t])\n",
        "        V += np.outer(phi_t, phi_t)\n",
        "        V_off += np.outer(phi_t, phi_t)\n",
        "        b += phi_t * demand\n",
        "        b_off += phi_t * demand\n",
        "        theta_hat = np.linalg.solve(V, b)\n",
        "        theta_hatoff = np.linalg.solve(V_off, b_off)\n",
        "\n",
        "    cum_regret_CO32[i] = np.cumsum(regrets)"
      ],
      "metadata": {
        "id": "QYL_xUEuGtMW"
      },
      "execution_count": 18,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "t = np.arange(1, T+1)\n",
        "plt.figure(dpi=300)\n",
        "\n",
        "mean = cum_regret_UCB.mean(axis=0)\n",
        "sem = cum_regret_UCB.std(axis=0) / np.sqrt(n_runs)\n",
        "plt.plot(t, mean, label='UCB',marker='o',markevery=100)\n",
        "plt.fill_between(t, mean-2*sem, mean+2*sem, alpha=0.3)\n",
        "\n",
        "mean = cum_regret_UCB_offline.mean(axis=0)\n",
        "sem = cum_regret_UCB_offline.std(axis=0) / np.sqrt(n_runs)\n",
        "plt.plot(t, mean, label='UCB-Offline',marker='o',markevery=100)\n",
        "plt.fill_between(t, mean-2*sem, mean+2*sem, alpha=0.3)\n",
        "\n",
        "mean = cum_regret_TS.mean(axis=0)\n",
        "sem = cum_regret_TS.std(axis=0) / np.sqrt(n_runs)\n",
        "plt.plot(t, mean, label='TS',marker='v',linestyle='--',markevery=100)\n",
        "plt.fill_between(t, mean-2*sem, mean+2*sem, alpha=0.3)\n",
        "\n",
        "mean = cum_regret_TS_offline.mean(axis=0)\n",
        "sem = cum_regret_TS_offline.std(axis=0) / np.sqrt(n_runs)\n",
        "plt.plot(t, mean, label='TS-Offline',marker='v',linestyle='--',markevery=100)\n",
        "plt.fill_between(t, mean-2*sem, mean+2*sem, alpha=0.3)\n",
        "\n",
        "mean = cum_regret_CO31.mean(axis=0)\n",
        "sem = cum_regret_CO31.std(axis=0) / np.sqrt(n_runs)\n",
        "plt.plot(t, mean, label='CO3-$1.1V_{\\operatorname{true}}$',marker='s',linestyle='-.', markevery=100)\n",
        "plt.fill_between(t, mean-2*sem, mean+2*sem, alpha=0.3)\n",
        "\n",
        "mean = cum_regret_CO32.mean(axis=0)\n",
        "sem = cum_regret_CO32.std(axis=0) / np.sqrt(n_runs)\n",
        "plt.plot(t, mean, label='CO3-$10V_{\\operatorname{true}}$', linestyle='-.', marker='s',markevery=200)\n",
        "plt.fill_between(t, mean-2*sem, mean+2*sem, alpha=0.3)\n",
        "\n",
        "\n",
        "\n",
        "plt.xlabel('$T$', fontsize=15)\n",
        "plt.ylabel('Cumulative regret', fontsize=15)\n",
        "plt.xticks(fontsize = 15)\n",
        "plt.yticks(fontsize = 15)\n",
        "\n",
        "plt.grid(True, linestyle='--', linewidth=0.5, alpha=0.7)\n",
        "\n",
        "plt.legend(loc='upper left', fontsize=15)\n",
        "plt.ylim(0, 200)\n",
        "plt.tight_layout()\n",
        "plt.show()"
      ],
      "metadata": {
        "id": "k4yvue_Jijna"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# General Price Elasticity"
      ],
      "metadata": {
        "id": "UoXIN4BkQuKa"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Preliminary"
      ],
      "metadata": {
        "id": "7PGvUkLdQ49_"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "d1, d2 = 5, 5\n",
        "T = 1000\n",
        "n_runs = 20\n",
        "x_min, x_max, y_min, y_max = 1, 2, 1, 2\n",
        "alpha_min, alpha_max, beta_min, beta_max = 0.5, 1, -2, -1\n",
        "sigma = 0.1\n",
        "alpha_on = np.random.uniform(alpha_min, alpha_max, size=d1)\n",
        "beta_on = np.random.uniform(beta_min, beta_max, size=d2)\n",
        "\n",
        "\n",
        "price_bounds = (d1*alpha_min*x_min/(-2*d2*beta_min*y_max), d1*alpha_max*x_max/(-2*d2*beta_max*y_min))\n",
        "l, u = price_bounds\n",
        "\n",
        "\n",
        "lam = 1+u**2\n",
        "delta = 1/T**2\n",
        "R = 0.1  # noise bound|\n",
        "\n",
        "# Θ† box bounds\n",
        "\n",
        "theta_box = [(alpha_min, alpha_max)] * d1 + [(beta_min, beta_max)] * d2\n",
        "\n",
        "# Compute S and L\n",
        "coord_max = [max(abs(b), abs(a)) for (a, b) in theta_box]\n",
        "S = np.sqrt(sum(m**2 for m in coord_max))\n",
        "L = np.sqrt(x_max**2 + (y_max * u)**2)\n",
        "d = d1 + d2\n",
        "\n",
        "# Generate data\n",
        "x_list = []\n",
        "y_list = []\n",
        "eps_list = []\n",
        "for i in range(n_runs):\n",
        "  x, y, eps = generate_online_features_and_noise(T, d1, d2, x_min, x_max, y_min, y_max, sigma)\n",
        "  x_list.append(x)\n",
        "  y_list.append(y)\n",
        "  eps_list.append(eps)\n",
        "w = [R * np.sqrt(d * np.log(1 + t * L**2 / d*lam) + 2 * np.log(1/delta)) + np.sqrt(lam) * S for t in range(T)]"
      ],
      "metadata": {
        "id": "MXs-4uzXQ1km"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "N= 35000\n",
        "alpha_off = alpha_on-2*T**(-5/16)\n",
        "beta_off = beta_on-2*T**(-5/16)\n",
        "bias_bound1 = 1.1*np.sqrt(np.linalg.norm(alpha_on - alpha_off)**2 + np.linalg.norm(beta_on - beta_off)**2)\n",
        "bias_bound2 = 10*np.sqrt(np.linalg.norm(alpha_on - alpha_off)**2 + np.linalg.norm(beta_on - beta_off)**2)\n",
        "\n",
        "\n",
        "x_hat, y_hat, p_hat, D_hat,min_eig,V_off,b_off = generate_offline_data(\n",
        "        N, d1, d2, alpha_off, beta_off, x_min, x_max, y_min, y_max, price_bounds, sigma, seed=123)\n",
        "w_hat1 = [lam*S/(lam+min_eig) + bias_bound1+np.sqrt(2*np.log(1/delta) + d * np.log(1 + t * L**2 / d*lam))/np.sqrt(lam+min_eig)+R*(np.sqrt(d)+np.sqrt(2*np.log(1/delta)))/np.sqrt(lam+min_eig) for t in range(T)]\n",
        "w_hat2 = [lam*S/(lam+min_eig) + bias_bound2+np.sqrt(2*np.log(1/delta) + d * np.log(1 + t * L**2 / d*lam))/np.sqrt(lam+min_eig)+R*(np.sqrt(d)+np.sqrt(2*np.log(1/delta)))/np.sqrt(lam+min_eig) for t in range(T)]"
      ],
      "metadata": {
        "id": "6ES8YdNoRbXq"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "w1 = [R * np.sqrt(d * np.log(1 + (N+t) * L**2 / d*lam) + 2 * np.log(1/delta)) + np.sqrt(lam) * S for t in range(T)]"
      ],
      "metadata": {
        "id": "SXZ1A0A5Rdcq"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## UCB"
      ],
      "metadata": {
        "id": "PmnRsStmRfZv"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "def select_p_and_theta(x, y, theta_hat, V, w, l, u, theta_box_bounds):\n",
        "    \"\"\"\n",
        "    Solve jointly:\n",
        "      max_{p ∈ [l,u], θ ∈ C ∩ Θ†} p * (θ^T [x; y*p])\n",
        "      C = {θ: (θ-θ_hat)^T V (θ-θ_hat) ≤ w^2}\n",
        "      Θ† by box bounds\n",
        "    Returns (p_opt, θ_opt)\n",
        "    \"\"\"\n",
        "    d = theta_hat.shape[0]\n",
        "    # objective (negative for minimizer)\n",
        "    def obj(z):\n",
        "        p = z[0]\n",
        "        theta = z[1:]\n",
        "        phi = np.concatenate([x, y * p])\n",
        "        return -p * (theta.dot(phi))\n",
        "    # ellipsoid constraint\n",
        "    def ellipsoid(z):\n",
        "        theta = z[1:]\n",
        "        diff = theta - theta_hat\n",
        "        return w**2 - diff.dot(V.dot(diff))\n",
        "    cons = ({'type': 'ineq', 'fun': ellipsoid},)\n",
        "    # bounds for p and θ\n",
        "    bounds = [(l, u)] + theta_box_bounds\n",
        "    # initial guess\n",
        "    z0 = np.concatenate([[(l+u)/2], theta_hat])\n",
        "    sol = minimize(\n",
        "        obj, z0,\n",
        "        bounds=bounds,\n",
        "        constraints=cons,\n",
        "        method='SLSQP',\n",
        "        options={'ftol':1e-6, 'maxiter':100}\n",
        "    )\n",
        "    if not sol.success:\n",
        "        return l, theta_hat\n",
        "    p_opt = float(sol.x[0])\n",
        "    theta_opt = sol.x[1:]\n",
        "    return p_opt, theta_opt"
      ],
      "metadata": {
        "id": "zeL2z5nhRhar"
      },
      "execution_count": 9,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "cum_regret_UCB = np.zeros((n_runs, T))\n",
        "# Simulation\n",
        "for i in range(n_runs):\n",
        "    x, y, eps = x_list[i], y_list[i], eps_list[i]\n",
        "    # Initialize\n",
        "    V = lam * np.eye(d)\n",
        "    b = np.zeros(d)\n",
        "    theta_hat = np.zeros(d)\n",
        "\n",
        "\n",
        "    # Storage\n",
        "    prices = np.zeros(T)\n",
        "    demands = np.zeros(T)\n",
        "    regrets = np.zeros(T)\n",
        "    for t in range(1, T+1):\n",
        "        xt, yt, et = x[t-1], y[t-1], eps[t-1]\n",
        "        w_t = w[t-1]\n",
        "\n",
        "        p_t, _ = select_p_and_theta(xt, yt, theta_hat, V, w_t, l, u, theta_box)\n",
        "        # True demand and revenue\n",
        "        demand = xt.dot(alpha_on) + (yt.dot(beta_on)) * p_t + et\n",
        "        demand_true = xt.dot(alpha_on) + (yt.dot(beta_on)) * p_t\n",
        "        revenue = p_t * demand\n",
        "        revenue_true = p_t * demand_true\n",
        "\n",
        "        # Oracle optimal price & revenue\n",
        "        p_star = np.clip(-xt.dot(alpha_on) / (2 * (yt.dot(beta_on))), l, u)\n",
        "        revenue_star = p_star * (xt.dot(alpha_on) + (yt.dot(beta_on)) * p_star)\n",
        "\n",
        "        regrets[t-1] = revenue_star - revenue_true\n",
        "        prices[t-1] = p_t\n",
        "        demands[t-1] = demand\n",
        "\n",
        "        # Update\n",
        "        phi_t = np.concatenate([xt, yt * p_t])\n",
        "        V += np.outer(phi_t, phi_t)\n",
        "        b += phi_t * demand\n",
        "        theta_hat = np.linalg.solve(V, b)\n",
        "\n",
        "    cum_regret_UCB[i] = np.cumsum(regrets)"
      ],
      "metadata": {
        "id": "LfUIkYX_Rk8b"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## UCB-Offline"
      ],
      "metadata": {
        "id": "OSTl9jSkRm0c"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "cum_regret_UCB_offline = np.zeros((n_runs, T))\n",
        "# Simulation\n",
        "for i in range(n_runs):\n",
        "    x, y, eps = x_list[i], y_list[i], eps_list[i]\n",
        "\n",
        "    # Initialize\n",
        "    V = lam * np.eye(d) + V_off\n",
        "    b = np.zeros(d)+b_off\n",
        "    theta_hat = np.linalg.solve(V, b)\n",
        "\n",
        "\n",
        "    # Storage\n",
        "    prices = np.zeros(T)\n",
        "    demands = np.zeros(T)\n",
        "    regrets = np.zeros(T)\n",
        "\n",
        "    # Simulation\n",
        "    for t in range(1, T+1):\n",
        "        xt, yt, et = x[t-1], y[t-1], eps[t-1]\n",
        "        w_t = w1[t-1]\n",
        "\n",
        "        p_t, _ = select_p_and_theta(xt, yt, theta_hat, V, w_t, l, u, theta_box)\n",
        "        # True demand and revenue\n",
        "        demand = xt.dot(alpha_on) + (yt.dot(beta_on)) * p_t + et\n",
        "        demand_true = xt.dot(alpha_on) + (yt.dot(beta_on)) * p_t\n",
        "        revenue = p_t * demand\n",
        "        revenue_true = p_t * demand_true\n",
        "\n",
        "        # Oracle optimal price & revenue\n",
        "        p_star = np.clip(-xt.dot(alpha_on) / (2 * (yt.dot(beta_on))), l, u)\n",
        "        revenue_star = p_star * (xt.dot(alpha_on) + (yt.dot(beta_on)) * p_star)\n",
        "\n",
        "        regrets[t-1] = revenue_star - revenue_true\n",
        "        prices[t-1] = p_t\n",
        "        demands[t-1] = demand\n",
        "\n",
        "        # Update\n",
        "        phi_t = np.concatenate([xt, yt * p_t])\n",
        "        V += np.outer(phi_t, phi_t)\n",
        "        b += phi_t * demand\n",
        "        theta_hat = np.linalg.solve(V, b)\n",
        "\n",
        "    cum_regret_UCB_offline[i] = np.cumsum(regrets)"
      ],
      "metadata": {
        "id": "kJC30q-GRoQQ"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Thompson Sampling"
      ],
      "metadata": {
        "id": "qHXuilL1RsXb"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "def run_thompson_sampling_online(x, y, eps,\n",
        "                                 alpha_true, beta_true,\n",
        "                                 lam, R,\n",
        "                                 price_bounds,\n",
        "                                 delta=None):\n",
        "    \"\"\"\n",
        "    Run pure online Thompson Sampling for pricing.\n",
        "    Inputs:\n",
        "      x, y, eps   : online data\n",
        "      alpha_true, beta_true : true parameters\n",
        "      lam         : prior precision (ridge)\n",
        "      R           : noise std dev\n",
        "      price_bounds: (l, u)\n",
        "    Returns:\n",
        "      prices      : array of chosen prices\n",
        "      cum_regret  : cumulative regret over time\n",
        "    \"\"\"\n",
        "    T, d1 = x.shape[0], x.shape[1]\n",
        "    d2 = y.shape[1]\n",
        "    d = d1 + d2\n",
        "    l, u = price_bounds\n",
        "\n",
        "    # Prior precision V and sufficient-stat b\n",
        "    V = lam * np.eye(d)\n",
        "    b = np.zeros(d)\n",
        "\n",
        "    prices = np.zeros(T)\n",
        "    regrets = np.zeros(T)\n",
        "\n",
        "    for t in range(T):\n",
        "        # Posterior mean & covariance\n",
        "        theta_mean = np.linalg.solve(V, b)\n",
        "        Sigma =  np.linalg.inv(V)\n",
        "\n",
        "        # Thompson sample\n",
        "        theta_tilde = np.random.multivariate_normal(theta_mean, Sigma)\n",
        "        alpha_tilde = theta_tilde[:d1]\n",
        "        beta_tilde  = theta_tilde[d1:]\n",
        "\n",
        "        # Select price (closed-form argmax)\n",
        "        num = - alpha_tilde.dot(x[t])\n",
        "        den =  2 * beta_tilde.dot(y[t])\n",
        "        p_t = np.clip(num/den, l, u)\n",
        "\n",
        "        # Observe demand and compute regret\n",
        "        demand = alpha_true.dot(x[t]) + (beta_true.dot(y[t])) * p_t + eps[t]\n",
        "        demand_true = alpha_true.dot(x[t]) + (beta_true.dot(y[t])) * p_t\n",
        "        revenue = p_t * demand\n",
        "        revenue_true = p_t * demand_true\n",
        "\n",
        "        # Oracle optimal price & revenue\n",
        "        # Oracle\n",
        "        p_star = np.clip(- alpha_true.dot(x[t]) / (2 * beta_true.dot(y[t])), l, u)\n",
        "        rev_star = p_star * (alpha_true.dot(x[t]) + (beta_true.dot(y[t])) * p_star)\n",
        "        regrets[t] = rev_star - revenue_true\n",
        "        prices[t] = p_t\n",
        "\n",
        "        # Update posterior\n",
        "        phi_t = np.concatenate([x[t], y[t] * p_t])\n",
        "        V += np.outer(phi_t, phi_t)\n",
        "        b += phi_t * demand\n",
        "\n",
        "    cum_regret = np.cumsum(regrets)\n",
        "    return prices, cum_regret"
      ],
      "metadata": {
        "id": "S_qwhOfyRqkC"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "cum_regret_TS = np.zeros((n_runs, T))\n",
        "# Simulation\n",
        "for i in range(n_runs):\n",
        "    x, y, eps = x_list[i], y_list[i], eps_list[i]\n",
        "\n",
        "    prices, cum_regret = run_thompson_sampling_online(\n",
        "    x, y, eps,\n",
        "    alpha_on, beta_on,\n",
        "    lam, R,\n",
        "    price_bounds)\n",
        "    cum_regret_TS[i] = cum_regret"
      ],
      "metadata": {
        "id": "BBSERpeQRv6j"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## TS-Offline"
      ],
      "metadata": {
        "id": "ci81YtPmRxxG"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "def initialize_posterior(x_hat, y_hat, p_hat, D_hat, d1, d2, lam):\n",
        "    \"\"\"\n",
        "    Build initial posterior precision matrix V and sufficient-stat b\n",
        "    from offline data.\n",
        "    \"\"\"\n",
        "    N = x_hat.shape[0]\n",
        "    d = d1 + d2\n",
        "\n",
        "    # design matrix for offline:\n",
        "    Phi_off = np.zeros((N, d))\n",
        "    for i in range(N):\n",
        "        Phi_off[i, :d1]   = x_hat[i]\n",
        "        Phi_off[i, d1:]   = y_hat[i] * p_hat[i]\n",
        "\n",
        "    # ridge‐precision + offline Gram\n",
        "    V = lam * np.eye(d) + Phi_off.T @ Phi_off\n",
        "    b = Phi_off.T @ D_hat\n",
        "    return V, b\n",
        "\n",
        "def get_posterior(V, b, R):\n",
        "    \"\"\"\n",
        "    Given precision V and sufficient-stat b, return posterior mean\n",
        "    and covariance Sigma for θ ~ N(mean, Sigma).\n",
        "    \"\"\"\n",
        "    theta_mean = np.linalg.solve(V, b)\n",
        "    Sigma      = np.linalg.inv(V)\n",
        "    return theta_mean, Sigma\n",
        "\n",
        "def run_thompson_sampling(x_hat, y_hat, p_hat, D_hat,\n",
        "                          x, y, eps,\n",
        "                          alpha_true, beta_true,\n",
        "                          d1, d2,\n",
        "                          lam, R,\n",
        "                          l, u):\n",
        "    \"\"\"\n",
        "    Runs Thompson‐Sampling pricing:\n",
        "      - offline init from (x_hat, y_hat, p_hat, D_hat)\n",
        "      - online rounds over (x, y, eps)\n",
        "    Returns:\n",
        "      prices:    array of length T\n",
        "      cum_regret: cumulative regret array length T\n",
        "    \"\"\"\n",
        "    # offline initialization\n",
        "    V, b = initialize_posterior(x_hat, y_hat, p_hat, D_hat, d1, d2, lam)\n",
        "    theta_mean, Sigma = get_posterior(V, b, R)\n",
        "\n",
        "\n",
        "    T = x.shape[0]\n",
        "    prices = np.zeros(T)\n",
        "    regrets = np.zeros(T)\n",
        "\n",
        "    for t in range(T):\n",
        "        # 1) sample θ̃ ~ N(theta_mean, Sigma)\n",
        "        theta_tilde = np.random.multivariate_normal(theta_mean, Sigma)\n",
        "        alpha_tilde = theta_tilde[:d1]\n",
        "        beta_tilde  = theta_tilde[d1:]\n",
        "\n",
        "\n",
        "        # 2) choose price p_t = argmax p·(α̃ᵀx_t + β̃ᵀy_t p)\n",
        "        #    oracle closed-form: - (α̃·x) / [2 (β̃·y)], clipped to [l,u]\n",
        "\n",
        "        num = - alpha_tilde.dot(x[t])\n",
        "        den =  2 * beta_tilde.dot(y[t])\n",
        "        p_t = np.clip(num/den, l, u)\n",
        "\n",
        "        # 3) observe demand and compute regret\n",
        "        demand_t = alpha_true.dot(x[t]) + (beta_true.dot(y[t])) * p_t + eps[t]\n",
        "        demand_true = alpha_true.dot(x[t]) + (beta_true.dot(y[t])) * p_t\n",
        "        rev      = p_t * demand_t\n",
        "        rev_true = p_t * demand_true\n",
        "        # oracle revenue\n",
        "        num_star = - alpha_true.dot(x[t])\n",
        "        den_star =  2 * beta_true.dot(y[t])\n",
        "        p_star   = np.clip(num_star/den_star, l, u)\n",
        "        rev_star = p_star * (alpha_true.dot(x[t]) + (beta_true.dot(y[t])) * p_star)\n",
        "        regrets[t] = rev_star - rev_true\n",
        "\n",
        "        prices[t] = p_t\n",
        "\n",
        "        # 4) update posterior stats\n",
        "        phi_t = np.concatenate([x[t], y[t] * p_t])\n",
        "        V    += np.outer(phi_t, phi_t)\n",
        "        b    += phi_t * demand_t\n",
        "        theta_mean, Sigma = get_posterior(V, b, R)\n",
        "\n",
        "    cum_regret = np.cumsum(regrets)\n",
        "    return prices, cum_regret"
      ],
      "metadata": {
        "id": "rGDB3qxWR0LG"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "cum_regret_TS_offline = np.zeros((n_runs, T))\n",
        "# Simulation\n",
        "for i in range(n_runs):\n",
        "    x, y, eps = x_list[i], y_list[i], eps_list[i]\n",
        "\n",
        "    prices, cum_regret = run_thompson_sampling(\n",
        "        x_hat, y_hat, p_hat, D_hat,\n",
        "        x, y, eps,\n",
        "        alpha_on, beta_on,\n",
        "        d1, d2,\n",
        "        lam, R,\n",
        "        l, u\n",
        "    )\n",
        "    cum_regret_TS_offline[i] = cum_regret"
      ],
      "metadata": {
        "id": "2qd_wCU2R3qb"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## GCO3"
      ],
      "metadata": {
        "id": "WFJNNJWyR4US"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "def select_price(x, y, theta_hat, theta_hatoff, V, w, w_off, l, u, theta_box_bounds):\n",
        "\n",
        "    # joint optimization over p,theta\n",
        "    def obj(z):\n",
        "        p = z[0]\n",
        "        theta = z[1:]\n",
        "        phi = np.concatenate([x, y * p])\n",
        "        return -p * (theta.dot(phi))\n",
        "    def cons_off(z):\n",
        "        theta = z[1:]\n",
        "        return w_off**2 - np.linalg.norm(theta - theta_hatoff)**2\n",
        "    def cons_on(z):\n",
        "        theta = z[1:]\n",
        "        diff = theta - theta_hat\n",
        "        return w**2 - diff.dot(V.dot(diff))\n",
        "    cons = ({'type':'ineq','fun':cons_off}, {'type':'ineq','fun':cons_on})\n",
        "    bounds = [(l, u)] + theta_box_bounds\n",
        "    z0 = np.concatenate([[(l+u)/2], theta_hat])\n",
        "    sol = minimize(obj, z0, bounds=bounds, constraints=cons,\n",
        "                    method='SLSQP', options={'ftol':1e-6,'maxiter':1000})\n",
        "    if not sol.success:\n",
        "        return l, theta_hat\n",
        "    p_opt = float(sol.x[0]); theta_tilde = sol.x[1:]\n",
        "    return p_opt, theta_tilde"
      ],
      "metadata": {
        "id": "SQb9heswR6rh"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "cum_regret_GCO31 = np.zeros((n_runs, T))\n",
        "# Simulation\n",
        "for i in range(n_runs):\n",
        "    x, y, eps = x_list[i], y_list[i], eps_list[i]\n",
        "    # Initialize\n",
        "    V = lam * np.eye(d)\n",
        "    b = np.zeros(d)\n",
        "    theta_hat = np.zeros(d)\n",
        "    V_off = lam * np.eye(d) + V_off\n",
        "    b_off = b_off\n",
        "    theta_hatoff = np.linalg.solve(V_off, b_off)\n",
        "\n",
        "\n",
        "    # Storage\n",
        "    prices = np.zeros(T)\n",
        "    demands = np.zeros(T)\n",
        "    regrets = np.zeros(T)\n",
        "\n",
        "    # Simulation\n",
        "    for t in range(1, T+1):\n",
        "        xt, yt, et = x[t-1], y[t-1], eps[t-1]\n",
        "        wt = w[t-1]\n",
        "        wt_off= w_hat1[t-1]\n",
        "\n",
        "        p_t, _ = select_price(xt, yt, theta_hat, theta_hatoff, V, wt, wt_off, l, u, theta_box)\n",
        "        # True demand and revenue\n",
        "        demand = xt.dot(alpha_on) + (yt.dot(beta_on)) * p_t + et\n",
        "        demand_true = xt.dot(alpha_on) + (yt.dot(beta_on)) * p_t\n",
        "        revenue = p_t * demand\n",
        "        revenue_true = p_t * demand_true\n",
        "\n",
        "        # Oracle optimal price & revenue\n",
        "        p_star = np.clip(-xt.dot(alpha_on) / (2 * (yt.dot(beta_on))), l, u)\n",
        "        revenue_star = p_star * (xt.dot(alpha_on) + (yt.dot(beta_on)) * p_star)\n",
        "\n",
        "        regrets[t-1] = revenue_star - revenue_true\n",
        "        prices[t-1] = p_t\n",
        "        demands[t-1] = demand\n",
        "\n",
        "        # Update\n",
        "        phi_t = np.concatenate([xt, yt * p_t])\n",
        "        V += np.outer(phi_t, phi_t)\n",
        "        V_off += np.outer(phi_t, phi_t)\n",
        "        b += phi_t * demand\n",
        "        b_off += phi_t * demand\n",
        "        theta_hat = np.linalg.solve(V, b)\n",
        "        theta_hatoff = np.linalg.solve(V_off, b_off)\n",
        "\n",
        "    cum_regret_GCO31[i] = np.cumsum(regrets)"
      ],
      "metadata": {
        "id": "NDFhK2bXR9H4"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "cum_regret_GCO32 = np.zeros((n_runs, T))\n",
        "# Simulation\n",
        "for i in range(n_runs):\n",
        "    x, y, eps = x_list[i], y_list[i], eps_list[i]\n",
        "    # Initialize\n",
        "    V = lam * np.eye(d)\n",
        "    b = np.zeros(d)\n",
        "    theta_hat = np.zeros(d)\n",
        "    V_off = lam * np.eye(d) + V_off\n",
        "    b_off = b_off\n",
        "    theta_hatoff = np.linalg.solve(V_off, b_off)\n",
        "\n",
        "\n",
        "    # Storage\n",
        "    prices = np.zeros(T)\n",
        "    demands = np.zeros(T)\n",
        "    regrets = np.zeros(T)\n",
        "\n",
        "    # Simulation\n",
        "    for t in range(1, T+1):\n",
        "        xt, yt, et = x[t-1], y[t-1], eps[t-1]\n",
        "        wt = w[t-1]\n",
        "        wt_off= w_hat2[t-1]\n",
        "\n",
        "        p_t, _ = select_price(xt, yt, theta_hat, theta_hatoff, V, wt, wt_off, l, u, theta_box)\n",
        "        # True demand and revenue\n",
        "        demand = xt.dot(alpha_on) + (yt.dot(beta_on)) * p_t + et\n",
        "        demand_true = xt.dot(alpha_on) + (yt.dot(beta_on)) * p_t\n",
        "        revenue = p_t * demand\n",
        "        revenue_true = p_t * demand_true\n",
        "\n",
        "        # Oracle optimal price & revenue\n",
        "        p_star = np.clip(-xt.dot(alpha_on) / (2 * (yt.dot(beta_on))), l, u)\n",
        "        revenue_star = p_star * (xt.dot(alpha_on) + (yt.dot(beta_on)) * p_star)\n",
        "\n",
        "        regrets[t-1] = revenue_star - revenue_true\n",
        "        prices[t-1] = p_t\n",
        "        demands[t-1] = demand\n",
        "\n",
        "        # Update\n",
        "        phi_t = np.concatenate([xt, yt * p_t])\n",
        "        V += np.outer(phi_t, phi_t)\n",
        "        V_off += np.outer(phi_t, phi_t)\n",
        "        b += phi_t * demand\n",
        "        b_off += phi_t * demand\n",
        "        theta_hat = np.linalg.solve(V, b)\n",
        "        theta_hatoff = np.linalg.solve(V_off, b_off)\n",
        "\n",
        "    cum_regret_GCO32[i] = np.cumsum(regrets)"
      ],
      "metadata": {
        "id": "T6MxlOQyR-3E"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "t = np.arange(1, T+1)\n",
        "plt.figure(dpi=300)\n",
        "\n",
        "mean = cum_regret_UCB.mean(axis=0)\n",
        "sem = cum_regret_UCB.std(axis=0) / np.sqrt(n_runs)\n",
        "plt.plot(t, mean, label='UCB',marker='o',markevery=100)\n",
        "plt.fill_between(t, mean-2*sem, mean+2*sem, alpha=0.3)\n",
        "\n",
        "mean = cum_regret_UCB_offline.mean(axis=0)\n",
        "sem = cum_regret_UCB_offline.std(axis=0) / np.sqrt(n_runs)\n",
        "plt.plot(t, mean, label='UCB-Offline',marker='o',markevery=100)\n",
        "plt.fill_between(t, mean-2*sem, mean+2*sem, alpha=0.3)\n",
        "\n",
        "mean = cum_regret_TS.mean(axis=0)\n",
        "sem = cum_regret_TS.std(axis=0) / np.sqrt(n_runs)\n",
        "plt.plot(t, mean, label='TS',marker='v',linestyle='--',markevery=100)\n",
        "plt.fill_between(t, mean-2*sem, mean+2*sem, alpha=0.3)\n",
        "\n",
        "mean = cum_regret_TS_offline.mean(axis=0)\n",
        "sem = cum_regret_TS_offline.std(axis=0) / np.sqrt(n_runs)\n",
        "plt.plot(t, mean, label='TS-Offline',marker='v',linestyle='--',markevery=100)\n",
        "plt.fill_between(t, mean-2*sem, mean+2*sem, alpha=0.3)\n",
        "\n",
        "mean = cum_regret_GCO31.mean(axis=0)\n",
        "sem = cum_regret_GCO31.std(axis=0) / np.sqrt(n_runs)\n",
        "plt.plot(t, mean, label='GCO3-$1.1V_{\\operatorname{true}}$',marker='s',linestyle='-.',markevery=100)\n",
        "plt.fill_between(t, mean-2*sem, mean+2*sem, alpha=0.3)\n",
        "\n",
        "mean = cum_regret_GCO32.mean(axis=0)\n",
        "sem = cum_regret_GCO32.std(axis=0) / np.sqrt(n_runs)\n",
        "plt.plot(t, mean, label='GCO3-$10V_{\\operatorname{true}}$', linestyle='-.', marker='s',markevery=200)\n",
        "plt.fill_between(t, mean-2*sem, mean+2*sem, alpha=0.3)\n",
        "\n",
        "\n",
        "\n",
        "plt.xlabel('$T$', fontsize=15)\n",
        "plt.ylabel('Cumulative regret', fontsize=15)\n",
        "plt.xticks(fontsize = 15)\n",
        "plt.yticks(fontsize = 15)\n",
        "\n",
        "plt.grid(True, linestyle='--', linewidth=0.5, alpha=0.7)\n",
        "\n",
        "plt.legend(loc='upper left', fontsize=15)\n",
        "\n",
        "plt.tight_layout()\n",
        "plt.ylim(0, 100)\n",
        "plt.show()"
      ],
      "metadata": {
        "id": "tNEhbF53SBhM"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Robustness"
      ],
      "metadata": {
        "id": "wrjypIfBSEF7"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "def select_p_and_theta1(a,b, l, u):\n",
        "    \"\"\"\n",
        "    Solve jointly:\n",
        "      max_{p ∈ [l,u], θ ∈ C ∩ Θ†} p * (θ^T [x; y*p])\n",
        "      C = {θ: (θ-θ_hat)^T V (θ-θ_hat) ≤ w^2}\n",
        "      Θ† by box bounds\n",
        "    Returns (p_opt, θ_opt)\n",
        "    \"\"\"\n",
        "\n",
        "    def obj(z):\n",
        "        p = z\n",
        "\n",
        "        return -p * (a + b*p)\n",
        "\n",
        "    bounds = [(l, u)]\n",
        "    # initial guess\n",
        "    z0 = (l+u)/2\n",
        "    sol = minimize(\n",
        "        obj, z0,\n",
        "        bounds=bounds,\n",
        "        method='SLSQP',\n",
        "        options={'ftol':1e-6, 'maxiter':100}\n",
        "    )\n",
        "    if not sol.success:\n",
        "        return l\n",
        "    p_opt = float(sol.x)\n",
        "\n",
        "    return p_opt"
      ],
      "metadata": {
        "id": "6qYthGWxSDjZ"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "d1, d2 = 5, 5\n",
        "T = 5000\n",
        "\n",
        "x_min, x_max, y_min, y_max = 1, 2, 1, 2\n",
        "alpha_min, alpha_max, beta_min, beta_max = 0.5, 1, -2, -1\n",
        "sigma = 0.1\n",
        "\n",
        "alpha_on = np.random.uniform(alpha_min, alpha_max, size=d1)\n",
        "beta_on = np.random.uniform(beta_min, beta_max, size=d2)\n",
        "\n",
        "\n",
        "price_bounds = (d1*alpha_min*x_min/(-2*d2*beta_min*y_max), d1*alpha_max*x_max/(-2*d2*beta_max*y_min))\n",
        "l, u = price_bounds\n",
        "\n",
        "\n",
        "lam = 1+u**2\n",
        "delta = 1/T**2\n",
        "R = 0.1  # noise bound|\n",
        "\n",
        "# Θ† box bounds\n",
        "\n",
        "theta_box = [(alpha_min, alpha_max)] * d1 + [(beta_min, beta_max)] * d2\n",
        "\n",
        "# Compute S and L\n",
        "coord_max = [max(abs(b), abs(a)) for (a, b) in theta_box]\n",
        "S = np.sqrt(sum(m**2 for m in coord_max))\n",
        "L = np.sqrt(x_max**2 + (y_max * u)**2)\n",
        "d = d1 + d2\n",
        "w = [R * np.sqrt(d * np.log(1 + t * L**2 / d*lam) + 2 * np.log(1/delta)) + np.sqrt(lam) * S for t in range(T)]"
      ],
      "metadata": {
        "id": "QJch0glqSGdZ"
      },
      "execution_count": 10,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "interval =10\n",
        "n_runs = 20\n",
        "\n",
        "alpha = 1/4\n",
        "T_prime = int(T**alpha)\n",
        "N= 200000\n",
        "regrets = np.zeros((interval, n_runs))\n",
        "regrets_UCB = np.zeros((interval, n_runs))\n",
        "\n",
        "\n",
        "for j in range(interval):\n",
        "  bias = T**(-(j/10))\n",
        "\n",
        "  alpha_off = alpha_on+bias\n",
        "  beta_off = beta_on+bias\n",
        "\n",
        "  x_hat, y_hat, p_hat, D_hat,min_eig,V_off,b_off = generate_offline_data(\n",
        "        N, d1, d2, alpha_off, beta_off, x_min, x_max, y_min, y_max, price_bounds, sigma)\n",
        "  Theta_off = np.linalg.solve(lam * np.eye(d1 + d2) + V_off, b_off)\n",
        "\n",
        "  for i in range(n_runs):\n",
        "    x, y, eps = generate_online_features_and_noise(T, d1, d2, x_min, x_max, y_min, y_max, sigma)\n",
        "\n",
        "    regret = 0\n",
        "    regret_UCB = 0\n",
        "    V = lam * np.eye(d)\n",
        "    b = np.zeros(d)\n",
        "    theta_hat = np.zeros(d)\n",
        "\n",
        "    for t in range(T):\n",
        "      xt, yt, et = x[t], y[t], eps[t]\n",
        "      w_t = w[t]\n",
        "\n",
        "      p_t, _ = select_p_and_theta(xt, yt, theta_hat, V, w_t, l, u, theta_box)\n",
        "      # True demand and revenue\n",
        "      demand = xt.dot(alpha_on) + (yt.dot(beta_on)) * p_t + et\n",
        "      demand_true = xt.dot(alpha_on) + (yt.dot(beta_on)) * p_t\n",
        "      revenue = p_t * demand\n",
        "      revenue_true = p_t * demand_true\n",
        "\n",
        "      # Oracle optimal price & revenue\n",
        "      p_star = np.clip(-xt.dot(alpha_on) / (2 * (yt.dot(beta_on))), l, u)\n",
        "      revenue_star = p_star * (xt.dot(alpha_on) + (yt.dot(beta_on)) * p_star)\n",
        "\n",
        "      regret_UCB += revenue_star - revenue_true\n",
        "\n",
        "      # Update\n",
        "      phi_t = np.concatenate([xt, yt * p_t])\n",
        "      V += np.outer(phi_t, phi_t)\n",
        "      b += phi_t * demand\n",
        "      theta_hat = np.linalg.solve(V, b)\n",
        "    regrets_UCB[j][i] = regret_UCB\n",
        "\n",
        "    V = lam * np.eye(d)\n",
        "    b = np.zeros(d)\n",
        "\n",
        "    for t in range(T_prime):\n",
        "      # pick p_t ∈ {l, u} uniformly\n",
        "      p_t = np.random.choice([l, u])\n",
        "      # observe demand\n",
        "      d_t = x[t].dot(alpha_on) + (y[t].dot(beta_on)) * p_t + eps[t]\n",
        "      d_true = x[t].dot(alpha_on) + (y[t].dot(beta_on)) * p_t\n",
        "      # regret\n",
        "      p_star = np.clip(- alpha_on.dot(x[t])/(2*beta_on.dot(y[t])), l, u)\n",
        "      r_star = p_star * (alpha_on.dot(x[t]) + beta_on.dot(y[t])*p_star)\n",
        "      regret += r_star - p_t*d_true\n",
        "      # update\n",
        "      phi = np.concatenate([x[t], y[t] * p_t])\n",
        "      V += np.outer(phi, phi)\n",
        "      b += phi * d_t\n",
        "\n",
        "    Theta_on = np.linalg.solve(V, b)\n",
        "\n",
        "\n",
        "    if np.linalg.norm(Theta_off - Theta_on)*6 <= lam*S/(lam+min_eig) + R*(np.sqrt(d)+np.sqrt(2* np.log(3/delta)))/np.sqrt(lam+min_eig) + lam*S/(np.linalg.eigvalsh(V).min()) + R*(np.sqrt(d)+np.sqrt(2* np.log(3/delta)))/np.sqrt(np.linalg.eigvalsh(V).min()):\n",
        "      for t in range(T_prime, T):\n",
        "          # greedy price\n",
        "        a = Theta_off[:d1].dot(x[t])\n",
        "        b = Theta_off[d1:].dot(y[t])\n",
        "        p_t = select_p_and_theta1(a,b, l, u)\n",
        "        d_t = x[t].dot(alpha_on) + (y[t].dot(beta_on)) * p_t + eps[t]\n",
        "        d_true = x[t].dot(alpha_on) + (y[t].dot(beta_on)) * p_t\n",
        "        p_star = np.clip(- alpha_on.dot(x[t])/(2*beta_on.dot(y[t])), l, u)\n",
        "        r_star = p_star * (alpha_on.dot(x[t]) + beta_on.dot(y[t])*p_star)\n",
        "        regret += r_star - p_t*d_true\n",
        "\n",
        "    else:\n",
        "\n",
        "      V = lam * np.eye(d)\n",
        "      b = np.zeros(d)\n",
        "      theta_hat = np.zeros(d)\n",
        "\n",
        "      for t in range(T_prime, T):\n",
        "\n",
        "        xt, yt, et = x[t], y[t], eps[t]\n",
        "        w_t = w[t-T_prime]\n",
        "\n",
        "        p_t, _ = select_p_and_theta(xt, yt, theta_hat, V, w_t, l, u, theta_box)\n",
        "        # True demand and revenue\n",
        "        demand = xt.dot(alpha_on) + (yt.dot(beta_on)) * p_t + et\n",
        "        demand_true = xt.dot(alpha_on) + (yt.dot(beta_on)) * p_t\n",
        "        revenue = p_t * demand\n",
        "        revenue_true = p_t * demand_true\n",
        "\n",
        "        # Oracle optimal price & revenue\n",
        "        p_star = np.clip(-xt.dot(alpha_on) / (2 * (yt.dot(beta_on))), l, u)\n",
        "        revenue_star = p_star * (xt.dot(alpha_on) + (yt.dot(beta_on)) * p_star)\n",
        "\n",
        "        regret += revenue_star - revenue_true\n",
        "\n",
        "        # Update\n",
        "        phi_t = np.concatenate([xt, yt * p_t])\n",
        "        V += np.outer(phi_t, phi_t)\n",
        "        b += phi_t * demand\n",
        "        theta_hat = np.linalg.solve(V, b)\n",
        "\n",
        "    regrets[j][i] = regret"
      ],
      "metadata": {
        "id": "ymW4Rl1ISIYg"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "regrets = regrets[::-1]"
      ],
      "metadata": {
        "id": "iPY49gD7SNUZ"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "m, n = regrets.shape\n",
        "x = (np.arange(m)-9)/5\n",
        "\n",
        "\n",
        "mean = regrets.mean(axis=1)\n",
        "sem  = regrets.std(axis=1) / np.sqrt(n_runs)\n",
        "\n",
        "mean_UCB = regrets_UCB.mean(axis=1)\n",
        "sem_UCB = regrets_UCB.std(axis=1) / np.sqrt(n_runs)"
      ],
      "metadata": {
        "id": "1ihqQhNLSNyk"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "fig, ax = plt.subplots(dpi=300)\n",
        "\n",
        "\n",
        "ax.plot(x, mean, marker='s',linestyle = '--', linewidth=1,label = 'RCO3')\n",
        "ax.fill_between(x, mean-2*sem, mean+2*sem, alpha=0.3)\n",
        "ax.plot(x, mean_UCB, marker='o', linewidth=1,label = 'UCB')\n",
        "ax.fill_between(x, mean_UCB-2*sem_UCB, mean_UCB+2*sem_UCB, alpha=0.3)\n",
        "ax.set_xlabel(r'$V_{\\mathrm{true}}^2\\in\\Theta(T^{x})$', fontsize=15)\n",
        "ax.set_ylabel(r'Cumulative regret',fontsize=15)\n",
        "#ax.set_yscale('log')\n",
        "ax.grid(True, linestyle='--', linewidth=0.5, alpha=0.7)\n",
        "ax.tick_params(axis='both', which='major', labelsize=15)\n",
        "plt.legend(fontsize=15)\n",
        "plt.show()"
      ],
      "metadata": {
        "id": "58shFzMXSQFe"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [],
      "metadata": {
        "id": "888xzPz9SK4q"
      }
    }
  ]
}