{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "a1ff22e8-98da-477f-86dc-29eca6225baf",
   "metadata": {},
   "source": [
    "# gp2Scale III -- Topo"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3473f81d-4935-4e9c-ab5c-7679e2f29cd8",
   "metadata": {},
   "source": [
    "Make sure you\n",
    "\n",
    "- make a new environment\n",
    "\n",
    "- activate it\n",
    "\n",
    "- pip install ipykernel\n",
    "\n",
    "- python3 -m ipykernel install --user --name env --display-name MyEnvironment\n",
    "\n",
    "- pip install everything_else\n",
    "\n",
    "- make sure the notebook uses the right kernel\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "af1e0909-65dd-4c1f-8f13-e6b5aed09fc9",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from gpcam import GPOptimizer\n",
    "import matplotlib.pyplot as plt\n",
    "import random\n",
    "import topo_kernelGPU\n",
    "import torch"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "45f0be8d-8625-4974-9b03-afbc83b0049f",
   "metadata": {},
   "source": [
    "run this in the terminal on Perlmutter\n",
    "\n",
    "#./launch-dask-module.sh"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4f6ad26f-6d0b-4282-a380-485780a559c1",
   "metadata": {},
   "outputs": [],
   "source": [
    "import dask\n",
    "from dask.distributed import Client\n",
    "import os\n",
    "import time\n",
    "\n",
    "scheduler_file = os.path.join(os.environ[\"SCRATCH\"], \"scheduler_filetopo.json\")\n",
    "\n",
    "dask.config.config[\"distributed\"][\"dashboard\"][\"link\"] = \"{JUPYTERHUB_SERVICE_PREFIX}proxy/{host}:{port}/status\" \n",
    "\n",
    "while True:\n",
    "    time.sleep(2)\n",
    "    if os.path.isfile(scheduler_file):\n",
    "        print(\"file found\")\n",
    "        time.sleep(2)\n",
    "        client = Client(scheduler_file=scheduler_file)\n",
    "        break\n",
    "print(\"waiting for workers\")\n",
    "client.wait_for_workers(16)\n",
    "print(client)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "39c501bf-bd24-4e5a-ab22-deea98f32503",
   "metadata": {},
   "outputs": [],
   "source": [
    "client"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5b09030f-713e-4da0-a372-74f9966c1247",
   "metadata": {},
   "outputs": [],
   "source": [
    "x_train = np.genfromtxt(\"./data/x_train_2dtopo.csv\", delimiter=\" \")\n",
    "y_train = np.genfromtxt(\"./data/y_train_2dtopo.csv\", delimiter=\" \")\n",
    "\n",
    "x_test = np.genfromtxt(\"./data/x_test_2dtopo.csv\", delimiter=\" \")\n",
    "y_test = np.genfromtxt(\"./data/y_test_2dtopo.csv\", delimiter=\" \")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e2755913",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(x_train.shape)\n",
    "print(x_test.shape)\n",
    "print(np.min(x_train[:,0]), np.max(x_train[:,0]))\n",
    "print(np.min(x_train[:,1]), np.max(x_train[:,1]))\n",
    "print(\" \")\n",
    "print(np.min(y_train), np.max(y_train))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a669f550-e7c7-4dba-8d75-4793c0f859ea",
   "metadata": {},
   "source": [
    "### Wendland, no bumps"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e1ca48fd-444c-4aee-856e-a3317c2add73",
   "metadata": {},
   "outputs": [],
   "source": [
    "hps_bounds = np.zeros((7,2))\n",
    "hps_bounds[0] = np.array([12, 17.])\n",
    "hps_bounds[1] = np.array([0.02,   0.3])\n",
    "hps_bounds[2] = np.array([12, 17.])\n",
    "hps_bounds[3] = np.array([0.001,   0.3])\n",
    "\n",
    "hps_bounds[4] = np.array([-np.pi, np.pi])\n",
    "\n",
    "hps_bounds[5] = np.array([12., 15.]) #signal std dev offset\n",
    "hps_bounds[6] = np.array([.02, 0.4]) #signal std dev slope\n",
    "\n",
    "\n",
    "init_hps = np.zeros((len(hps_bounds)))\n",
    "init_hps[0] = 15. ##the higher, the lower the max length scale in the domain will be\n",
    "init_hps[1] = 0.1 #the higher, the more dependence of the length scale on elevation \n",
    "init_hps[2] = 15. ##the higher, the lower the max lengthscale in the domain will be\n",
    "init_hps[3] = 0.1 #the higher, the more dependence of the lengthscale on elevation \n",
    "\n",
    "init_hps[4] = 0.\n",
    "\n",
    "init_hps[5] = 14 #2.\n",
    "init_hps[6] = 0.2\n",
    "\n",
    "init_hps = np.random.uniform(size = len(hps_bounds), low = hps_bounds[:,0], high = hps_bounds[:,1])\n",
    "\n",
    "#found through training, disable for a fresh run\n",
    "init_hps = np.array([1.35314150e+01, 1.05295736e-01, 1.64352174e+01, 1.19212875e-02,\n",
    " 2.88918622e-01, 1.44259274e+01, 6.58457839e-02])\n",
    "\n",
    "\n",
    "\n",
    "my_gp2S = GPOptimizer(x_train,y_train,init_hyperparameters=init_hps, kernel_function = topo_kernelGPU.kernel, \n",
    "                      gp2Scale = True, gp2Scale_batch_size = 4000, gp2Scale_dask_client = client, \n",
    "                      compute_device=\"gpu\", noise_variances=np.ones(y_train.shape), logging = False, \n",
    "                      )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fdff1510-321c-4cd8-b9e5-ae6943c8422b",
   "metadata": {},
   "outputs": [],
   "source": [
    "st = time.time()\n",
    "print(\"Likelihood: \", my_gp2S.log_likelihood(hyperparameters = init_hps))\n",
    "print(\"exec time: \",time.time() - st)\n",
    "\n",
    "sparsity = float(my_gp2S.prior.K.nnz) / float(my_gp2S.prior.K.shape[0]**2) #would be 0 for full sparsity aka all zeros\n",
    "\n",
    "print(\"sparsity: \", sparsity)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ae32825c-cf68-4d51-b054-56b5cb57f6ce",
   "metadata": {},
   "outputs": [],
   "source": [
    "def in_bounds(v,bounds):\n",
    "    if any(v<bounds[:,0]) or any(v>bounds[:,1]):\n",
    "        ia = list(np.where(v > bounds[:,1])[0])\n",
    "        ib = list(np.where(v < bounds[:,0])[0])\n",
    "        return False, ia + ib\n",
    "    return True, None\n",
    "\n",
    "\n",
    "def prior_function(theta,args):\n",
    "    bounds = args[\"bounds\"]\n",
    "    d = in_bounds(theta, bounds)\n",
    "    if d[0]:\n",
    "        prior = 0. #+ #np.sum(np.log(pis[one_ampl_ind])) + np.sum(np.log(1.-pis[zero_ampl_ind]))\n",
    "        #print(\"PRIOR=\", prior, theta,flush = True)\n",
    "        return prior\n",
    "    else:\n",
    "        print(\"                    PRIOR eval out of bounds\", d[1], theta[d[1]], flush = True)\n",
    "        return -np.inf\n",
    "\n",
    "def proposal_distribution_normal(x0, hps, obj):\n",
    "    cov = obj.prop_args[\"prop_Sigma\"]\n",
    "    #print(cov)\n",
    "    proposal_hps = np.zeros((len(x0)))\n",
    "    proposal_hps = np.random.multivariate_normal(\n",
    "        mean = x0, cov = cov, size = 1).reshape(len(x0))\n",
    "    return proposal_hps\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ad340ebd",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "from gpcam.gp_mcmc import gpMCMC\n",
    "\n",
    "def func(hps, args):\n",
    "    np.save(\"last_hps_backup\", hps)\n",
    "    result = my_gp2S.log_likelihood(hyperparameters=hps)\n",
    "    print(\"                         f(x): \", result)\n",
    "    return result\n",
    "\n",
    "def write_results(obj): np.save(\"current_trace\", obj.trace)\n",
    "\n",
    "from gpcam.gp_mcmc import ProposalDistribution\n",
    "lengthscale_ind = [i for i in range(0,7)]\n",
    "\n",
    "\n",
    "#initial proposal Sigma\n",
    "axis_std_lengthscale = (hps_bounds[lengthscale_ind, 1] - hps_bounds[lengthscale_ind,0])/100.\n",
    "init_s_ls = np.diag(axis_std_lengthscale**2)\n",
    "\n",
    "\n",
    "#normal proposal distr. for core, Wendland, bump positions, and radii\n",
    "pd1 = ProposalDistribution(lengthscale_ind, proposal_dist = proposal_distribution_normal,\n",
    "                        init_prop_Sigma = init_s_ls, adapt_callable=\"normal\", K=10, ID = \"core\")\n",
    "\n",
    "my_mcmc = gpMCMC(func, prior_function, [pd1], args={\"bounds\":hps_bounds})\n",
    "print(init_hps)\n",
    "mcmc_result = my_mcmc.run_mcmc(x0=init_hps, info=True, n_updates=100, run_in_every_iteration=write_results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3a48356d-d97d-4b9e-ac87-c811142da05b",
   "metadata": {},
   "outputs": [],
   "source": [
    "from scipy import sparse\n",
    "hps1 = np.median(mcmc_result[\"x\"][9:], axis=0)\n",
    "print(\"FINAL HYPERPARAMETERS\")\n",
    "print(hps1)\n",
    "\n",
    "\n",
    "my_gp2S.set_hyperparameters(hps1)\n",
    "print(my_gp2S.log_likelihood(my_gp2S.get_hyperparameters()))\n",
    "np.save(\"full_gp2ScaleHPS\", my_gp2S.get_hyperparameters())\n",
    "\n",
    "sparsity = float(my_gp2S.prior.K.nnz) / float(my_gp2S.prior.K.shape[0]**2) #would be 0 for full sparsity i.e. all zeros\n",
    "\n",
    "print(\"sparsity: \", sparsity)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "18b659f4-73db-4cdf-8fba-1f0989049020",
   "metadata": {},
   "outputs": [],
   "source": [
    "rmse = my_gp2S.rmse(x_test, y_test)\n",
    "print(\"rmse: \", rmse)\n",
    "rmse = my_gp2S.crps(x_test, y_test)\n",
    "print(\"rmse: \", rmse)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c7270453-19ce-4991-967f-2d287e131cbe",
   "metadata": {},
   "source": [
    "## Bump Kernel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c0bdb1e8-34cb-4c12-a997-2675829cd559",
   "metadata": {},
   "outputs": [],
   "source": [
    "hps_bounds = np.zeros((410,2))\n",
    "hps_bounds[0] = np.array([12, 17.])\n",
    "hps_bounds[1] = np.array([0.02,   0.3])\n",
    "hps_bounds[2] = np.array([12, 17.])\n",
    "hps_bounds[3] = np.array([0.001,   0.3])\n",
    "\n",
    "hps_bounds[4] = np.array([-np.pi, np.pi])\n",
    "\n",
    "hps_bounds[5] = np.array([12., 15.]) #signal std dev offset\n",
    "hps_bounds[6] = np.array([.02, 0.4]) #signal std dev slope\n",
    "\n",
    "##BUMPS\n",
    "hps_bounds[7:107]   = np.array([0., 1.])  #all x-pos\n",
    "hps_bounds[107:207] = np.array([0., 1.]) # all y-pos\n",
    "\n",
    "hps_bounds[207:307] = np.array([0.0001,0.03]) #radii\n",
    "hps_bounds[307:407] = np.array([0.0,1.0]) #amplitudes\n",
    "                 \n",
    "hps_bounds[407:409] = np.array([[0.04,0.06], #wendland length scale x\n",
    "                                [0.04,0.06], #wendland length scale y\n",
    "                               ])\n",
    "\n",
    "hps_bounds[409] = np.array([[0.01,100.]])\n",
    "\n",
    "\n",
    "init_hps = np.zeros((len(hps_bounds)))\n",
    "\n",
    "#found through training, disable for fresh run\n",
    "init_hps[0:7] = np.array([1.35314150e+01, 1.05295736e-01, 1.64352174e+01, 1.19212875e-02, 2.88918622e-01, 1.44259274e+01, 6.58457839e-02])\n",
    "init_hps[7:409] = np.random.uniform(low = hps_bounds[7:409,0],\n",
    "                                                high= hps_bounds[7:409,1],\n",
    "                                                size = len(hps_bounds[7:409]))\n",
    "init_hps[307:407] = 0.\n",
    "\n",
    "\n",
    "from loguru import logger\n",
    "logger.disable(\"fvgp\")\n",
    "\n",
    "my_gp2S = GPOptimizer(x_train,y_train,init_hyperparameters=init_hps, kernel_function = topo_kernelGPU.kernel_bump, \n",
    "                      gp2Scale = True, gp2Scale_batch_size = 4000, gp2Scale_dask_client = client, \n",
    "                      compute_device=\"gpu\", noise_variances=np.ones(y_train.shape)\n",
    "                      )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "eb4fe3f9-cbdc-4e59-a80c-62863163a307",
   "metadata": {},
   "outputs": [],
   "source": [
    "st = time.time()\n",
    "print(\"Likelihood: \", my_gp2S.log_likelihood(hyperparameters = init_hps))\n",
    "print(\"exec time: \",time.time() - st)\n",
    "\n",
    "sparsity = float(my_gp2S.prior.K.nnz) / float(my_gp2S.prior.K.shape[0]**2) #would be 0 for full sparsity aka all zeros\n",
    "\n",
    "print(\"sparsity: \", sparsity)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "144b1a8a-9175-4805-b7ee-2d0d5fcc8ef4",
   "metadata": {},
   "outputs": [],
   "source": [
    "def in_bounds(v,bounds):\n",
    "    if any(v<bounds[:,0]) or any(v>bounds[:,1]):\n",
    "        ia = list(np.where(v > bounds[:,1])[0])\n",
    "        ib = list(np.where(v < bounds[:,0])[0])\n",
    "        return False, ia + ib\n",
    "    return True, None\n",
    "\n",
    "\n",
    "def prior_function(theta,args):\n",
    "    bounds = args[\"bounds\"]\n",
    "    d = in_bounds(theta, bounds)\n",
    "    pis = np.zeros((100)) + 0.4\n",
    "    amplitudes = theta[307:407]\n",
    "    zero_ampl_ind = np.where(amplitudes == 0.)[0]\n",
    "    one_ampl_ind  = np.where(amplitudes == 1.)[0]\n",
    "    \n",
    "    if d[0]:\n",
    "        prior = 0. + np.sum(np.log(pis[one_ampl_ind])) + np.sum(np.log(1.-pis[zero_ampl_ind]))\n",
    "        #print(\"PRIOR=\", prior, theta,flush = True)\n",
    "        return prior\n",
    "    else:\n",
    "        #print(\"         PRIOR 0 --- out of bounds\", d[1], theta[d[1]], bounds[d[1]], flush = True)\n",
    "        return -np.inf\n",
    "\n",
    "def proposal_distribution_normal(x0, hps, obj):\n",
    "    cov = obj.prop_args[\"prop_Sigma\"]\n",
    "    #print(cov)\n",
    "    proposal_hps = np.zeros((len(x0)))\n",
    "    proposal_hps = np.random.multivariate_normal(\n",
    "        mean = x0, cov = cov, size = 1).reshape(len(x0))\n",
    "    return proposal_hps\n",
    "\n",
    "from scipy.stats import binom\n",
    "def proposal_distribution_binom(x0, hps, obj):\n",
    "    #pi are the probabilities that the corresponding amplitudes are 1.\n",
    "    pi = np.zeros((100)) + 0.4\n",
    "    amplitudes = np.asarray(binom.rvs(1, pi, size=len(x0)))\n",
    "    zero_ampl_ind = np.where(amplitudes == 0.)[0]\n",
    "    #print(len(zero_ampl_ind), \"of 100 are 0\")\n",
    "    return amplitudes\n",
    "\n",
    "\n",
    "def proposal_distribution_beta(x0, hps, obj):\n",
    "    amplitudes = hps[307:407]\n",
    "    proposal_hps = beta.rvs(1. + amplitudes, 2. - amplitudes, size=100)\n",
    "    return proposal_hps"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e469c1a2-051d-48d9-99af-9d32c1c5851d",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "from gpcam.gp_mcmc import gpMCMC\n",
    "from gpcam.gp_mcmc import ProposalDistribution\n",
    "def func(hps, args):\n",
    "    np.save(\"last_hps_backup\", hps)\n",
    "    result = my_gp2S.log_likelihood(hyperparameters=hps)\n",
    "    print(\"f(x): \", result)\n",
    "    return result\n",
    "\n",
    "def write_results(obj): np.save(\"current_trace\", obj.trace)\n",
    "\n",
    "\n",
    "#lengthscale_ind = [i for i in range(0,80)]\n",
    "core_ind = [i for i in range(0,7)] + [407, 408, 409]\n",
    "bump_pos_ind = [i for i in range(7,207)]\n",
    "radii_hps_ind  =  [i for i in range(207,307)]\n",
    "binom_ampl_hps_ind  =  [i for i in range(307,407)]\n",
    "\n",
    "\n",
    "\n",
    "#initial proposal Sigma\n",
    "axis_std_core = (hps_bounds[core_ind, 1] - hps_bounds[core_ind,0])/100.\n",
    "init_s_ls = np.diag(axis_std_core**2)\n",
    "\n",
    "axis_std_bump_pos = (hps_bounds[bump_pos_ind, 1] - hps_bounds[bump_pos_ind, 0])/100.\n",
    "init_s_bump_pos = np.diag(axis_std_bump_pos**2)\n",
    "\n",
    "axis_std_bump_rad = (hps_bounds[radii_hps_ind, 1] - hps_bounds[radii_hps_ind, 0])/100.\n",
    "init_s_bump_rad = np.diag(axis_std_bump_rad**2)\n",
    "\n",
    "\n",
    "#normal proposal distr. for core, Wendland, bump positions, and radii\n",
    "pd1 = ProposalDistribution(core_ind, proposal_dist = proposal_distribution_normal,\n",
    "                        init_prop_Sigma = init_s_ls, adapt_callable=\"normal\", K=10, ID = \"core\")\n",
    "pd2 = ProposalDistribution(bump_pos_ind, proposal_dist = proposal_distribution_normal,\n",
    "                        init_prop_Sigma = init_s_bump_pos, adapt_callable=\"normal\", K=10, ID = \"core\")\n",
    "pd3 = ProposalDistribution(radii_hps_ind, proposal_dist = proposal_distribution_normal,\n",
    "                        init_prop_Sigma = init_s_bump_rad, adapt_callable=\"normal\", K=10, ID = \"core\")\n",
    "pd4 = ProposalDistribution(binom_ampl_hps_ind, proposal_dist = proposal_distribution_binom, adapt_callable=None, ID = \"core\")\n",
    "\n",
    "\n",
    "my_mcmc = gpMCMC(func, prior_function, [pd1, pd2, pd3, pd4], args={\"bounds\":hps_bounds})\n",
    "mcmc_result = my_mcmc.run_mcmc(x0=init_hps, info=True, n_updates=10, run_in_every_iteration=write_results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bd712529-206d-4d74-a62a-f2637dd6b1dc",
   "metadata": {},
   "outputs": [],
   "source": [
    "from scipy import sparse\n",
    "hps1 = np.median(mcmc_result[\"x\"][7:], axis=0)\n",
    "print(\"FINAL HYPERPARAMETERS\")\n",
    "\n",
    "my_gp2S.set_hyperparameters(hps1)\n",
    "\n",
    "sparsity = float(my_gp2S.prior.K.nnz) / float(my_gp2S.prior.K.shape[0]**2) #would be 0 for full sparsity i.e. all zeros\n",
    "sparse.save_npz(\"sparse_matrix1Mill\",my_gp2S.prior.K)\n",
    "\n",
    "print(\"sparsity: \", sparsity)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "16b2a7c4-e2bd-479e-a79d-4fd2bfeb9d5c",
   "metadata": {},
   "outputs": [],
   "source": [
    "rmse = my_gp2S.rmse(x_test, y_test)\n",
    "print(\"rmse: \", rmse)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fb8003a5-b9d4-492d-96e7-cb3c479aff65",
   "metadata": {},
   "outputs": [],
   "source": [
    "crps = my_gp2S.crps(x_test, y_test)\n",
    "print(\"crps: \", crps)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "topoenv",
   "language": "python",
   "name": "tomovenv"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
