{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Multi-Fidelity Deep Gaussian process benchmark\n",
    "\n",
    "This notebook replicates the benchmark experiments from the paper:\n",
    "\n",
    "[Deep Gaussian Processes for Multi-fidelity Modeling (Kurt Cutajar, Mark Pullin, Andreas Damianou, Neil Lawrence, Javier González)](https://arxiv.org/abs/1903.07320)\n",
    "\n",
    "Note that the code for one of the benchmark models in the paper, \"Deep Multi-fidelity Gaussian process\", is not publically available and so does not appear in this notebook."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from prettytable import PrettyTable\n",
    "import numpy as np\n",
    "import scipy.stats\n",
    "from sklearn.metrics import mean_squared_error, r2_score\n",
    "import emukit.examples.multi_fidelity_dgp\n",
    "\n",
    "from emukit.examples.multi_fidelity_dgp.baseline_model_wrappers import LinearAutoRegressiveModel, NonLinearAutoRegressiveModel, HighFidelityGp\n",
    "\n",
    "from emukit.core import ContinuousParameter, ParameterSpace\n",
    "from emukit.core.initial_designs import LatinDesign\n",
    "from emukit.examples.multi_fidelity_dgp.multi_fidelity_deep_gp import MultiFidelityDeepGP\n",
    "\n",
    "from emukit.test_functions.multi_fidelity import (multi_fidelity_borehole_function, multi_fidelity_branin_function,\n",
    "                                                  multi_fidelity_park_function, multi_fidelity_hartmann_3d,\n",
    "                                                  multi_fidelity_currin_function)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Parameters for different benchmark functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import namedtuple\n",
    "\n",
    "Function = namedtuple('Function', ['name', 'y_scale', 'noise_level', 'do_x_scaling', 'num_data', 'fcn'])\n",
    "\n",
    "borehole = Function(name='borehole', y_scale=100, noise_level=[0.05, 0.1], do_x_scaling=True, num_data=[60, 5], \n",
    "                    fcn=multi_fidelity_borehole_function)\n",
    "branin = Function(name='branin', y_scale=1, noise_level=[0., 0., 0.], do_x_scaling=False, num_data=[80, 30, 10], \n",
    "                    fcn=multi_fidelity_branin_function)\n",
    "currin = Function(name='currin', y_scale=1, noise_level=[0., 0.], do_x_scaling=False, num_data=[12, 5], \n",
    "                    fcn=multi_fidelity_currin_function)\n",
    "park = Function(name='park', y_scale=1, noise_level=[0., 0.], do_x_scaling=False, num_data=[30, 5], \n",
    "                    fcn=multi_fidelity_park_function)\n",
    "hartmann_3d = Function(name='hartmann', y_scale=100, noise_level=[0., 0., 0.], do_x_scaling=False, num_data=[80, 40, 20], \n",
    "                    fcn=multi_fidelity_hartmann_3d)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Function to repeat test across different random seeds.\n",
    "\n",
    "def do_benchmark(fcn_tuple):\n",
    "    metrics = dict()\n",
    "\n",
    "    # Some random seeds to use\n",
    "    seeds = [123, 184, 202, 289, 732]\n",
    "\n",
    "    for i, seed in enumerate(seeds):\n",
    "        run_name = str(seed) + str(fcn_tuple.num_data)\n",
    "        metrics[run_name] = test_function(fcn_tuple, seed)\n",
    "        print('After ' + str(i+1) + ' runs of ' + fcn_tuple.name)\n",
    "        print_metrics(metrics)\n",
    "\n",
    "    return metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Print metrics as table \n",
    "def print_metrics(metrics):\n",
    "    model_names = list(list(metrics.values())[0].keys())\n",
    "    metric_names = ['r2', 'mnll', 'rmse']\n",
    "    table = PrettyTable(['model'] + metric_names)\n",
    "\n",
    "    for name in model_names:\n",
    "        mean = []\n",
    "        for metric_name in metric_names:\n",
    "            mean.append(np.mean([metric[name][metric_name] for metric in metrics.values()]))\n",
    "        table.add_row([name] + mean)\n",
    "\n",
    "    print(table)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def test_function(fcn, seed):\n",
    "    np.random.seed(seed)\n",
    "\n",
    "    x_test, y_test, X, Y = generate_data(fcn, 1000)\n",
    "\n",
    "    mf_dgp_fix_lf_mean = MultiFidelityDeepGP(X, Y, n_iter=5000)\n",
    "    mf_dgp_fix_lf_mean.name = 'mf_dgp_fix_lf_mean'\n",
    "\n",
    "    models = [HighFidelityGp(X, Y), LinearAutoRegressiveModel(X, Y), NonLinearAutoRegressiveModel(X, Y), mf_dgp_fix_lf_mean]\n",
    "    return benchmark_models(models, x_test, y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def benchmark_models(models, x_test, y_test):\n",
    "    metrics = dict()\n",
    "    for model in models:\n",
    "        model.optimize()\n",
    "        y_mean, y_var = model.predict(x_test)\n",
    "        metrics[model.name] = calculate_metrics(y_test, y_mean, y_var)\n",
    "        print('+ ######################## +')\n",
    "        print(model.name, 'r2', metrics[model.name]['r2'])\n",
    "        print('+ ######################## + ')\n",
    "    return metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_data(fcn_tuple, n_test_points):\n",
    "    \"\"\"\n",
    "    Generates train and test data for\n",
    "    \"\"\"\n",
    "    \n",
    "    # A different definition of the parameter space for the branin function was used in the paper\n",
    "    if fcn_tuple.name == 'branin':\n",
    "        fcn, space = fcn_tuple.fcn()\n",
    "        new_space = ParameterSpace([ContinuousParameter('x1', -5., 0.), ContinuousParameter('x2', 10., 15.)])\n",
    "    else:\n",
    "        fcn, space = fcn_tuple.fcn()\n",
    "        new_space = ParameterSpace(space._parameters[:-1])\n",
    "    \n",
    "    do_x_scaling = fcn_tuple.do_x_scaling\n",
    "    \n",
    "    \n",
    "    # Generate training data\n",
    "    \n",
    "    latin = LatinDesign(new_space)\n",
    "    X = [latin.get_samples(n) for n in fcn_tuple.num_data]\n",
    "    \n",
    "    # Scale X if required\n",
    "    if do_x_scaling:\n",
    "        scalings = X[0].std(axis=0)\n",
    "    else:\n",
    "        scalings = np.ones(X[0].shape[1])\n",
    "        \n",
    "    for x in X:\n",
    "        x /= scalings\n",
    "    \n",
    "    Y = []\n",
    "    for i, x in enumerate(X):\n",
    "        Y.append(fcn.f[i](x * scalings))\n",
    "    \n",
    "    y_scale = fcn_tuple.y_scale\n",
    "    \n",
    "    # scale y and add noise if required\n",
    "    noise_levels = fcn_tuple.noise_level\n",
    "    if any([n > 0 for n in noise_levels]):\n",
    "        for y, std_noise in zip(Y, noise_levels):\n",
    "            y /= y_scale + std_noise * np.random.randn(y.shape[0], 1)\n",
    "    \n",
    "    # Generate test data\n",
    "    x_test = latin.get_samples(n_test_points)\n",
    "    x_test /= scalings\n",
    "    y_test = fcn.f[-1](x_test * scalings)\n",
    "    y_test /= y_scale\n",
    "\n",
    "    i_highest_fidelity = (len(fcn_tuple.num_data) - 1) * np.ones((x_test.shape[0], 1))\n",
    "    x_test = np.concatenate([x_test, i_highest_fidelity], axis=1)\n",
    "    print(X[1].shape)\n",
    "    return x_test, y_test, X, Y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def calculate_metrics(y_test, y_mean_prediction, y_var_prediction):\n",
    "    # R2\n",
    "    r2 = r2_score(y_test, y_mean_prediction)\n",
    "    # RMSE\n",
    "    rmse = np.sqrt(mean_squared_error(y_test, y_mean_prediction))\n",
    "    # Test log likelihood\n",
    "    mnll = -np.sum(scipy.stats.norm.logpdf(y_test, loc=y_mean_prediction, scale=np.sqrt(y_var_prediction)))/len(y_test)\n",
    "    return {'r2': r2, 'rmse': rmse, 'mnll': mnll}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "metrics = []\n",
    "metrics.append(do_benchmark(branin))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
