{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "86067865-0742-4b44-a0e2-bc87447a977a",
   "metadata": {
    "is_executing": true,
    "tags": []
   },
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "%load_ext line_profiler"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c613b1ca-4366-4c13-b0d4-47e581195d5b",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from tqdm import tqdm\n",
    "\n",
    "import os\n",
    "import torch\n",
    "from scipy.linalg import LinAlgError\n",
    "import numpy as np\n",
    "from torch_geometric.data import Batch, HeteroData\n",
    "from scipy.sparse import coo_array\n",
    "\n",
    "from sklearn.datasets import make_sparse_spd_matrix\n",
    "from utils.evaluation import normalize_cons, data_inactive_constraints, data_contraint_heuristic, gurobi_solve_qp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5253b78c-64f5-4b2d-b4d9-fd7107bbdebd",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "rng = np.random.RandomState(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ca5baa7a-a9fa-41b9-aa05-755f29c333b1",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "root = 'datasets/foundation_1000_2000'\n",
    "os.mkdir(root)\n",
    "os.mkdir(os.path.join(root, 'processed'))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "26cbdb3b-3931-4ae5-a4b5-a6da6b315bd8",
   "metadata": {},
   "source": [
    "### SVM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "48e31733-3880-4299-bb84-1e7bb191dd7c",
   "metadata": {},
   "outputs": [],
   "source": [
    "def soft_svm(nums, dims, lamb, density, rng):\n",
    "    A1 = rng.normal(1 / dims / density, 1 / dims / density, (nums // 2, dims))  # generate gaussian\n",
    "    A2 = rng.normal(-1 / dims / density, 1 / dims / density, (nums // 2, dims))  # generate gaussian\n",
    "    A = np.vstack([A1, A2])\n",
    "    A[rng.rand(*A.shape) > density] = 0.\n",
    "\n",
    "    # labels\n",
    "    b = np.hstack([np.ones(nums // 2) * 1., np.ones(nums // 2) * -1.])\n",
    "\n",
    "    A = A * b[:, None]\n",
    "\n",
    "    A = -np.concatenate([A, np.eye(nums) * 1.], axis=1).astype(np.float64)\n",
    "    b = -np.ones(nums).astype(np.float64)\n",
    "\n",
    "    P = np.diag(np.concatenate([np.ones(dims), np.zeros(nums)], axis=0)).astype(np.float64)\n",
    "    q = np.concatenate([np.zeros(dims), lamb * np.ones(nums)], axis=0).astype(np.float64)\n",
    "\n",
    "    return A, b, P, q\n",
    "\n",
    "def surrogate_gen():\n",
    "    A, b, P, q = soft_svm(100, 100, 0.5, 0.05, rng)\n",
    "    return P, A, b, q, None, None, 0., float('inf')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7fa446d4-2985-4c51-928c-b484c841e446",
   "metadata": {},
   "source": [
    "### portfolio"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e5e134a9-d60f-44c7-bd3f-a60a1b48a6ad",
   "metadata": {},
   "outputs": [],
   "source": [
    "def portfolio(n_assets, density, rng):\n",
    "    P = make_sparse_spd_matrix(n_dim=n_assets, alpha=1 - density / 2.,\n",
    "                               smallest_coef=0.1, largest_coef=0.9, random_state=rng).astype(np.float64)\n",
    "    # P += np.eye(P.shape[1])\n",
    "    q = np.zeros(n_assets, dtype=np.float64)\n",
    "\n",
    "    A = -rng.randn(1, n_assets) * 0.01\n",
    "    Aeq = np.ones((1, n_assets)) * 0.01\n",
    "\n",
    "    # we scale b to 100, and the solution becomes ratio in percentage, and does not affect the solution up to a scale\n",
    "    b = np.array([-1.])\n",
    "    beq = np.ones(1, dtype=np.float64)\n",
    "    return P, A, b, q, Aeq, beq\n",
    "\n",
    "def surrogate_gen():\n",
    "    return *portfolio(100, 0.05, rng), 0., float('inf')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "40aa0590-240a-4f16-b2b3-86f387a14ac2",
   "metadata": {},
   "source": [
    "### Lasso"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "21d3c5b2-aa8f-4b6f-8227-73488a98b4b1",
   "metadata": {},
   "source": [
    "https://medium.com/@nicolay.huarancay/lasso-regression-with-gurobi-and-setting-coefficients-approach-179be20eea14"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "991438ed-99e8-4484-9894-9c45234706fd",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "def lasso(n_samples, n_features, density, reg, rng=None):\n",
    "    # Generate random design matrix\n",
    "    X = rng.randn(n_samples, n_features)\n",
    "    X[rng.rand(*X.shape) > density] = 0.\n",
    "    \n",
    "    # Generate sparse ground-truth weights\n",
    "    w_true = rng.randn(n_features)\n",
    "\n",
    "    # Generate target with noise\n",
    "    noise = rng.randn(n_samples) * 0.5\n",
    "    y = X @ w_true + noise\n",
    "\n",
    "    Q = 0.5 * X.T @ X\n",
    "    c = -X.T @ y\n",
    "    Z = np.zeros_like(Q)\n",
    "    Q = np.block([[Q, Z], [Z, Z]])\n",
    "    c = np.hstack([c, np.ones(c.shape[0]) * reg])\n",
    "    \n",
    "    A = np.block([[-np.eye(n_features), -np.eye(n_features)], [np.eye(n_features), -np.eye(n_features)]])\n",
    "    b = np.zeros(n_samples * 2)\n",
    "    \n",
    "    return Q, A, b, c\n",
    "\n",
    "def surrogate_gen():\n",
    "    Q, A, b, c = lasso(50, 50, 0.02, 0.1, rng)\n",
    "    return Q, A, b, c, None, None, np.hstack([np.ones(50) * -float('inf'), np.zeros(50)]), float('inf')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e4e01d11-8f2c-40a3-be31-7c8250564aac",
   "metadata": {},
   "source": [
    "# create ineq"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "07eb0423-ed04-42e9-accd-483f224901f7",
   "metadata": {},
   "outputs": [],
   "source": [
    "ips = []\n",
    "graphs = []\n",
    "pkg_idx = 0\n",
    "success_cnt = 0\n",
    "\n",
    "max_iter = 15000\n",
    "num = 10000\n",
    "\n",
    "pbar = tqdm(range(max_iter))\n",
    "for i in pbar:\n",
    "    P, A, b, c, Aeq, beq, lb, ub = surrogate_gen()\n",
    "    P = P / np.abs(P).max()\n",
    "    c = c / (np.abs(c).max() + 1.e-10)  # does not change the result\n",
    "    A, b = normalize_cons(A, b)\n",
    "    Aeq, beq = normalize_cons(Aeq, beq)\n",
    "    \n",
    "    try:\n",
    "        solution, duals = gurobi_solve_qp(P, c, A, b, Aeq, beq, lb)\n",
    "        assert solution is not None\n",
    "        \n",
    "    except (AssertionError, LinAlgError):\n",
    "        continue\n",
    "    else:\n",
    "        if solution is not None:\n",
    "            inactive_idx = data_inactive_constraints(A, b, solution)\n",
    "            heur_idx = data_contraint_heuristic(P, A, b, c)\n",
    "\n",
    "            obj = 0.5 * solution @ P @ solution + c.dot(solution)\n",
    "\n",
    "            P = torch.from_numpy(P).to(torch.float)\n",
    "            P_where = torch.where(P)\n",
    "            \n",
    "            A = torch.from_numpy(A).to(torch.float)\n",
    "            b = torch.from_numpy(b).to(torch.float)\n",
    "            if Aeq is not None:\n",
    "                A = torch.vstack([A, torch.from_numpy(Aeq).to(torch.float)])\n",
    "                b = torch.hstack([b, torch.from_numpy(beq).to(torch.float)])\n",
    "            c = torch.from_numpy(c).to(torch.float)\n",
    "            x = torch.from_numpy(solution).to(torch.float)\n",
    "\n",
    "            A_where = torch.where(A)\n",
    "            data = HeteroData(\n",
    "                cons={\n",
    "                    'num_nodes': b.shape[0],\n",
    "                    'x': torch.empty(b.shape[0]),\n",
    "                     },\n",
    "                vals={\n",
    "                    'num_nodes': c.shape[0],\n",
    "                    'x': torch.empty(c.shape[0]),\n",
    "                },\n",
    "                cons__to__vals={'edge_index': torch.vstack(A_where),\n",
    "                                'edge_attr': A[A_where][:, None]},\n",
    "                vals__to__vals={'edge_index': torch.vstack(P_where),\n",
    "                                'edge_attr': P[P_where][:, None]},\n",
    "                x_solution=x,\n",
    "                duals=torch.from_numpy(duals).float(),\n",
    "                obj_solution=torch.tensor(obj).float(),\n",
    "                q=c,\n",
    "                b=b,\n",
    "                inactive_idx=torch.from_numpy(inactive_idx).long(),\n",
    "                heur_idx=torch.from_numpy(heur_idx).long(),\n",
    "            )\n",
    "            success_cnt += 1\n",
    "            graphs.append(data)\n",
    "\n",
    "    if len(graphs) >= 1000 or success_cnt == num:\n",
    "        torch.save(Batch.from_data_list(graphs), f'{root}/processed/batch{pkg_idx}.pt')\n",
    "        pkg_idx += 1\n",
    "        graphs = []\n",
    "\n",
    "    if success_cnt >= num:\n",
    "        break\n",
    "\n",
    "    pbar.set_postfix({'suc': success_cnt, 'obj': obj})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9ba781a6-076b-462a-9366-0fb6a3457468",
   "metadata": {},
   "outputs": [],
   "source": [
    "from data.dataset import LPDataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0f9f79d2-e484-4f1c-a35c-3924381ad80f",
   "metadata": {},
   "outputs": [],
   "source": [
    "ds = LPDataset(root, 'test')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e39d6d22-91bd-4a44-9a27-599fceadf2a4",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = ds[10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e35e0481-6183-4e67-aec9-073a382af917",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
