{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Multi-fidelity Modeling and Experimental Design (Active Learning)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# General imports\n",
    "#numpy==1.24.3\n",
    "#GPy==1.10.0\n",
    "#emukit==0.4.10\n",
    "#pandas==2.0.1\n",
    "import numpy as np \n",
    "np.random.seed(20)\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "from matplotlib.backends.backend_pdf import PdfPages\n",
    "from matplotlib import colors as mcolors\n",
    "colors = dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS)\n",
    "import sys\n",
    "import os\n",
    "sys.path.append('../utilities')\n",
    "import simulation_utils as sim\n",
    "import plotting_utils as plotting\n",
    "import drawing_utils as draw_mfsm\n",
    "import multi_fidelity_surrogate_model as mfsm\n",
    "from emukit.multi_fidelity.convert_lists_to_array import convert_x_list_to_array"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "version = 'v1.6'\n",
    "file_in=f'Ge77_rates_CNP_{version}.csv'\n",
    "if not os.path.exists(f'out/{version}'):\n",
    "   os.makedirs(f'out/{version}')\n",
    "if not os.path.exists(f'in/Ge77_rates_new_samples_{version}.csv'):\n",
    "   fout = open(f'in/Ge77_rates_new_samples_{version}.csv')\n",
    "   fout.write(\"#\\n ,Sample,Mode,Radius[cm],Thickness[cm],NPanels,Theta[deg],Length[cm],Ge-77[nevents],Ge-77_CNP,Ge-77_CNP_err\")\n",
    "   fout.close()\n",
    "   \n",
    "\n",
    "# Set parameter name/x_labels -> needs to be consistent with data input file\n",
    "x_labels=['Radius[cm]','Thickness[cm]','NPanels', 'Theta[deg]', 'Length[cm]']\n",
    "x_labels_out = ['Radius [cm]','Thickness [cm]','NPanels', 'Angle [deg]', 'Length [cm]']\n",
    "y_label_cnp = 'Ge-77_CNP'\n",
    "y_err_label_cnp = 'Ge-77_CNP_err'\n",
    "y_label_sim = 'rGe77[nuc/(kg*yr)]'\n",
    "\n",
    "# Set parameter boundaries\n",
    "xmin=[0,0,0,0,0]\n",
    "xmax=[265,20,360,90,150]\n",
    "\n",
    "# Set parameter boundaries for aquisition function\n",
    "xlow=[90,2,4,0,1]\n",
    "xhigh=[250,15,360,90,150]\n",
    "\n",
    "# Assign costs\n",
    "low_fidelity_cost = 1.\n",
    "high_fidelity_cost = 2000.\n",
    "\n",
    "# Set a fixed point in space for drawings\n",
    "x_fixed = [160, 2, 40, 45, 20]\n",
    "# number of sigma for error band drawing on prediction\n",
    "factor=1.\n",
    "\n",
    "# Get LF noise from file\n",
    "#with open(f'in/{file_in}') as f:\n",
    "#    first_line = f.readline()\n",
    "#LF_noise=np.round(float(first_line.split(' +')[0].split('= ')[1]),3)\n",
    "\n",
    "# Get HF and LF data samples from file\n",
    "\n",
    "data=pd.read_csv(f'in/{file_in}')\n",
    "#data=data[[f'Mode', x_labels[0], x_labels[1], x_labels[2], x_labels[3], x_labels[4],y_label_cnp,y_err_label_cnp,y_label_sim]]\n",
    "\n",
    "LF_noise=np.mean(data.loc[data['Mode']==0.][y_err_label_cnp].to_numpy())\n",
    "HF_noise=np.mean(data.loc[data['Mode']==1.][y_err_label_cnp].to_numpy())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "#data.plot(kind='scatter', x=x_labels[2], y=x_labels[3], alpha=0.4, s=data[x_labels[4]]*2, label='Ge-77',\n",
    "#figsize=(12, 8), c=y_label, cmap=plt.get_cmap('jet'), colorbar=True)\n",
    "#plt.legend()\n",
    "#plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "\n",
    "x_train_l, x_train_h, y_train_l, y_train_h = ([],[],[],[])\n",
    "row_h=data.index[data['Mode'] == 1].tolist()\n",
    "row_l=data.index[data['Mode'] == 0].tolist()\n",
    "\n",
    "x_train_hf_sim = data.loc[data['Mode']==1.][x_labels].to_numpy().tolist()\n",
    "y_train_hf_sim = data.loc[data['Mode']==1.][y_label_sim].to_numpy().tolist()\n",
    "\n",
    "x_train_hf_cnp = data.loc[data['Mode']==1.][x_labels].to_numpy().tolist()\n",
    "y_train_hf_cnp = data.loc[data['Mode']==1.][ y_label_cnp].to_numpy().tolist()\n",
    "\n",
    "x_train_lf_sim = data.loc[data['Mode']==0.][x_labels].to_numpy().tolist()\n",
    "y_train_lf_sim = data.loc[data['Mode']==0.][ y_label_sim].to_numpy().tolist()\n",
    "\n",
    "x_train_lf_cnp = data.loc[data['Mode']==0.][x_labels].to_numpy().tolist()\n",
    "y_train_lf_cnp = data.loc[data['Mode']==0.][ y_label_cnp].to_numpy().tolist()\n",
    "\n",
    "x_train_lf_cnp, x_train_hf_cnp, x_train_hf_sim, y_train_lf_cnp, y_train_hf_cnp, y_train_hf_sim = (np.atleast_2d(x_train_lf_cnp), np.atleast_2d(x_train_hf_cnp), np.atleast_2d(x_train_hf_cnp), np.atleast_2d(y_train_lf_cnp).T, np.atleast_2d(y_train_hf_cnp).T, np.atleast_2d(y_train_hf_sim).T)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mf_model = mfsm.linear_multi_fidelity_model(x_train_lf_cnp, y_train_lf_cnp, LF_noise, x_train_hf_cnp, y_train_hf_cnp, HF_noise, x_train_hf_sim, y_train_hf_sim)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%capture\n",
    "leg_label = []\n",
    "ncol=1\n",
    "nrow=int(np.ceil(len(x_labels)/ncol))\n",
    "fig1,ax1  = plt.subplots(2,3, figsize=(15, 5),layout=\"constrained\")\n",
    "fig2,ax2  = plt.subplots(nrow,ncol, figsize=(5, 12),layout=\"constrained\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "add_new_sample=True\n",
    "sample=0\n",
    "total_samples=20"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "while ( sample < total_samples and sample <= max(mfsm.get_num_new_samples(version)[0],mfsm.get_num_new_samples(version)[1])+1 ):\n",
    "    print('Sample #', sample)\n",
    "    # add new data point to training data and update model with new training data\n",
    "    x_new_data=np.array([])\n",
    "    y_new_data_h=np.array([])\n",
    "    if sample > 0:\n",
    "        x_train_lf_cnp, y_train_lf_cnp, x_train_hf_cnp, y_train_hf_cnp, x_train_hf_sim, y_train_hf_sim, mf_model = mfsm.add_samples(x_train_lf_cnp, y_train_lf_cnp, x_train_hf_cnp, y_train_hf_cnp, x_train_hf_sim, y_train_hf_sim, mf_model, x_labels, [y_label_cnp, y_label_sim] ,sample-1, version)\n",
    "\n",
    "    # run the model drawing\n",
    "    draw_mfsm.draw_model(mf_model, xmin, xmax, x_labels_out, factor, version)#\n",
    "    fig1 = draw_mfsm.draw_model_updated(fig1, mf_model, xmin, xmax, x_labels_out, factor, version, x_fixed=x_fixed)\n",
    "    fig1.show()\n",
    "    fig1.savefig(\"update.png\")\n",
    "\n",
    "    # find the next data point\n",
    "    x_next_sample, us_acquisition = mfsm.max_acquisition_integrated_variance_reduction(mf_model, xlow, xhigh, x_labels)\n",
    "    print(f'next suggested point to simulated is at: {x_next_sample}')\n",
    "    sim.print_geant4_macro(x_next_sample[0],f\"n{sample}\",mode=\"LF\",version=version)\n",
    "    sim.print_geant4_macro(x_next_sample[0],f\"n{sample}\",mode=\"HF\",version=version)\n",
    "    plotting.draw_moderator_config(x_next_sample[0][0],x_next_sample[0][1],x_next_sample[0][2],x_next_sample[0][3],x_next_sample[0][4])\n",
    "    fig2 = draw_mfsm.draw_acquisition_func(fig2, us_acquisition, xlow, xhigh, x_labels_out, np.array(x_next_sample), version, x_fixed=x_fixed)\n",
    "    xi =x_next_sample[0,:-1]\n",
    "\n",
    "    sample+=1\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "draw_mfsm.draw_model_acquisition_func(fig1,fig2,x_labels,version)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "def model_validation(mf_model, file_in, x_labels, y_label, version):\n",
    "        data=pd.read_csv(file_in)\n",
    "        #data=data[[f'Mode', x_labels[0], x_labels[1], x_labels[2], x_labels[3], x_labels[4],y_label]]\n",
    "\n",
    "        x_train_hf_sim = data.loc[data['Mode']==1.][x_labels].to_numpy().tolist()\n",
    "        y_train_hf_sim = data.loc[data['Mode']==1.][y_label].to_numpy().tolist()\n",
    "        x_train_hf_sim, y_train_hf_sim = (np.atleast_2d(x_train_hf_sim), np.atleast_2d(y_train_hf_sim).T)\n",
    "\n",
    "        counter_1sigma = 0\n",
    "        counter_2sigma = 0\n",
    "        counter_3sigma = 0\n",
    "\n",
    "        mfsm_model_mean = np.empty(shape=[0, 0])\n",
    "        mfsm_model_std = np.empty(shape=[0, 0])\n",
    "        hf_data=[]\n",
    "        x=[]\n",
    "        for i in range(len(x_train_hf_sim)):\n",
    "\n",
    "                SPLIT = 1\n",
    "                x_plot = (np.atleast_2d(x_train_hf_sim[i]))\n",
    "                X_plot = convert_x_list_to_array([x_plot , x_plot, x_plot])\n",
    "                hhf_mean_mf_model, hhf_var_mf_model = mf_model.predict(X_plot[2*SPLIT:])\n",
    "                hhf_std_mf_model = np.sqrt(hhf_var_mf_model)\n",
    "\n",
    "                hf_data.append(y_train_hf_sim[i])\n",
    "                x.append(i)\n",
    "                mfsm_model_mean=np.append(mfsm_model_mean,hhf_mean_mf_model[0,0])\n",
    "                mfsm_model_std=np.append(mfsm_model_std,hhf_std_mf_model[0,0])\n",
    "                if (y_train_hf_sim[i] < hhf_mean_mf_model+hhf_std_mf_model) and (y_train_hf_sim[i] > hhf_mean_mf_model-hhf_std_mf_model):\n",
    "                        counter_1sigma += 1\n",
    "                if (y_train_hf_sim[i] < hhf_mean_mf_model+2*hhf_std_mf_model) and (y_train_hf_sim[i] > hhf_mean_mf_model-2*hhf_std_mf_model):\n",
    "                        counter_2sigma += 1\n",
    "                if (y_train_hf_sim[i] < hhf_mean_mf_model+3*hhf_std_mf_model) and (y_train_hf_sim[i] > hhf_mean_mf_model-3*hhf_std_mf_model):\n",
    "                        counter_3sigma += 1\n",
    "\n",
    "        print(\"1 sigma: \", counter_1sigma/len(hf_data)*100.,\" %\" )\n",
    "        print(\"2 sigma: \", counter_2sigma/len(hf_data)*100.,\" %\" )\n",
    "        print(\"3 sigma: \", counter_3sigma/len(hf_data)*100.,\" %\" )\n",
    "\n",
    "        fig = plt.subplots(figsize=(12, 2.5))\n",
    "        #plt.bar(x=np.arange(len(mfsm_model_mean)), height=mfsm_model_mean, color=\"lightgray\", label='RESuM')\n",
    "        plt.fill_between(x=np.arange(len(mfsm_model_mean)), y1=mfsm_model_mean-3*mfsm_model_std, y2=mfsm_model_mean+3*mfsm_model_std, color=\"coral\",alpha=0.2, label=r'$\\pm 3\\sigma$')\n",
    "        plt.fill_between(x=np.arange(len(mfsm_model_mean)), y1=mfsm_model_mean-2*mfsm_model_std, y2=mfsm_model_mean+2*mfsm_model_std, color=\"yellow\",alpha=0.2, label=r'$\\pm 2\\sigma$')\n",
    "        plt.fill_between(x=np.arange(len(mfsm_model_mean)), y1=mfsm_model_mean-mfsm_model_std, y2=mfsm_model_mean+mfsm_model_std, color=\"green\",alpha=0.2, label=r'RESuM $\\pm 1\\sigma$')\n",
    "        plt.xlabel('HF Simulation Trial Number')\n",
    "        plt.ylim(0.,0.55)\n",
    "        plt.ylabel(r'$y_{raw}$')\n",
    "        plt.plot(x[:],hf_data[:],'.',color=\"black\", label=\"HF Validation Data\")\n",
    "        handles, labels = plt.gca().get_legend_handles_labels()\n",
    "        order = [3,2,1,0]\n",
    "        plt.legend([handles[idx] for idx in order],[labels[idx] for idx in order],loc=9, bbox_to_anchor=(0.665,1.),ncol=5)\n",
    "        plt.savefig(f'out/{version}/model-validation_{version}.pdf')\n",
    "        return fig"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "while ( sample < total_samples and sample <= max(mfsm.get_num_new_samples(version)[0],mfsm.get_num_new_samples(version)[1])+1 ):\n",
    "    print('Sample #', sample)\n",
    "    # add new data point to training data and update model with new training data\n",
    "    x_new_data=np.array([])\n",
    "    y_new_data_h=np.array([])\n",
    "    if sample > 0:\n",
    "        x_train_lf_cnp, y_train_lf_cnp, x_train_hf_cnp, y_train_hf_cnp, x_train_hf_sim, y_train_hf_sim, mf_model = mfsm.add_samples(x_train_lf_cnp, y_train_lf_cnp, x_train_hf_cnp, y_train_hf_cnp, x_train_hf_sim, y_train_hf_sim, mf_model, x_labels, [y_label_cnp, y_label_sim] ,sample-1, version)\n",
    "\n",
    "    # run the model drawing\n",
    "    draw_mfsm.draw_model(mf_model, xmin, xmax, x_labels_out, factor, version)#\n",
    "    fig1 = draw_mfsm.draw_model_updated(fig1, mf_model, xmin, xmax, x_labels_out, factor, version, x_fixed=x_fixed)\n",
    "    fig1.show()\n",
    "    fig1.savefig(\"update.png\")\n",
    "\n",
    "    # find the next data point\n",
    "    x_next_sample, us_acquisition = mfsm.max_acquisition_integrated_variance_reduction(mf_model, xlow, xhigh, x_labels)\n",
    "    print(f'next suggested point to simulated is at: {x_next_sample}')\n",
    "    sim.print_geant4_macro(x_next_sample[0],f\"n{sample}\",mode=\"LF\",version=version)\n",
    "    sim.print_geant4_macro(x_next_sample[0],f\"n{sample}\",mode=\"HF\",version=version)\n",
    "    plotting.draw_moderator_config(x_next_sample[0][0],x_next_sample[0][1],x_next_sample[0][2],x_next_sample[0][3],x_next_sample[0][4])\n",
    "    fig2 = draw_mfsm.draw_acquisition_func(fig2, us_acquisition, xlow, xhigh, x_labels_out, np.array(x_next_sample), version, x_fixed=x_fixed)\n",
    "    xi =x_next_sample[0,:-1]\n",
    "\n",
    "    sample+=1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig = model_validation(mf_model,\"in/hf_validation_data_v1.2.csv\",x_labels, y_label_sim, version)\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x_fmin = [150,10,360,0,3]\n",
    "f_min = 1.\n",
    "dx_fmin=[]\n",
    "dy_fmin=[]\n",
    "dsigma_fmin=[]\n",
    "for i in range(50):\n",
    "    x_fmin_tmp, f_min_tmp = mfsm.get_min_constrained(mf_model, xlow, xhigh, x_labels)\n",
    "    #plotting.draw_moderator_configuration(x_fmin_tmp)\n",
    "    dx_fmin.append(x_fmin_tmp)\n",
    "    dy_fmin.append(f_min_tmp)\n",
    "    dsigma_fmin.append(mfsm.evaluate_model_uncertainty(x_fmin_tmp,mf_model,2))\n",
    "    print(f\"minimum found at {x_fmin_tmp} with f = {f_min_tmp} {mfsm.evaluate_model(x_fmin_tmp,mf_model,2)} +- {mfsm.evaluate_model_uncertainty(x_fmin_tmp,mf_model,2)} and a reduction of {mfsm.evaluate_model([0.,0.,0.,0.,0.],mf_model,2)/f_min_tmp}\")\n",
    "\n",
    "    if f_min_tmp <= f_min:\n",
    "        x_fmin = x_fmin_tmp\n",
    "        f_min = f_min_tmp\n",
    "\n",
    "    itmp=201+i\n",
    "    sim.print_geant4_macro(x_fmin_tmp,f\"{itmp}\",mode=\"LF\",version=f\"{version}\")\n",
    "\n",
    "\n",
    "print(f\"minimum found at {x_fmin} with f = {f_min} and a reduction of {mfsm.evaluate_model([0.,0.,0.,0.,0.], mf_model,2)/f_min}\")\n",
    "print(\"high fidelity\", mfsm.evaluate_model(x_fmin, mf_model,2), mfsm.evaluate_model([x_fmin[0],plotting.get_outer_radius(x_fmin)-plotting.get_inner_radius(x_fmin),360,0.,4.],mf_model,2))\n",
    "print(\"medium fidelity\", mfsm.evaluate_model(x_fmin, mf_model,1), mfsm.evaluate_model([x_fmin[0],plotting.get_outer_radius(x_fmin)-plotting.get_inner_radius(x_fmin),360,0.,10.],mf_model,1))\n",
    "plotting.draw_moderator_configuration(x_fmin)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "legend",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
