{
 "metadata": {
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.10"
  },
  "orig_nbformat": 2,
  "kernelspec": {
   "name": "python3",
   "display_name": "Python 3",
   "language": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2,
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "import sys\n",
    "import json\n",
    "\n",
    "user_path=os.getcwd()[:len(os.getcwd())-10]\n",
    "os.chdir(user_path)\n",
    "\n",
    "from leaspy import Leaspy, AlgorithmSettings,IndividualParameters, Data, Dataset\n",
    "\n",
    "#We encourage you to have a look to this script to see how we train the models in a cross-validation\n",
    "from leaspy.utils.resampling.Calibrate.calibration import update_b_resampling\n",
    "\n",
    "# We encourage you to see the codes ComparaisonPrediction to view how we perform predictions.\n",
    "from leaspy.utils.posterior_analysis.ComparaisonPrediction import *\n",
    "\n",
    "\n"
   ]
  },
  {
   "source": [
    "This notebook is usefull to launch simulations, the models used in the papers are already available and can be inspected in the notebooks Ailzheimer_cohort_results."
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "source": [
    "# Data"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "MMSE_TOT                   1.000000\nMES_MRI_HIPPOCAMPUS_ICV    0.956098\nMES_MRI_VENTRICLES_ICV     1.000000\nMES_CSF_ABETA42            1.000000\ndtype: float64\nMMSE_TOT                   0.000000\nMES_MRI_HIPPOCAMPUS_ICV    0.000000\nMES_MRI_VENTRICLES_ICV     0.073118\nMES_CSF_ABETA42            0.228223\ndtype: float64\n(909, 4)\n"
     ]
    }
   ],
   "source": [
    "# Paths\n",
    "os.chdir(user_path)\n",
    "current_directory = user_path\n",
    "input_directory = os.path.join(current_directory, 'data')\n",
    "output_directory = user_path\n",
    "\n",
    "\n",
    "df = pd.read_csv(os.path.join(input_directory, 'Processed_Aileihmer_cohort.csv'), dtype={'ID':str})\n",
    "df.set_index(['ID','TIME'], inplace=True)\n",
    "\n",
    "print(df.max())\n",
    "print(df.min())\n",
    "print(df.shape)"
   ]
  },
  {
   "source": [
    "# Calibration on a 5-folds"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "source": [
    "### For the following we need data to run the cells."
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "#We select the model to train, linearb= Geodesics Bending\n",
    "# Model\n",
    "leaspy_model = \"linearb\"\n",
    "source_dimension = 2\n",
    "def leaspy_factory(i):\n",
    "    ll = Leaspy(leaspy_model)\n",
    "    ll.model.load_hyperparameters({'source_dimension': source_dimension})\n",
    "    return ll"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Algo settings\n",
    "seed = 0\n",
    "n_iter = 10000\n",
    "algo_settings = AlgorithmSettings('mcmc_saem',n_iter=n_iter, seed=seed, loss=\"MSE_diag_noise\",sampler_ind=\"FastGibbs\",sampler_pop=\"FastGibbs\")\n",
    "\n",
    "# Perso_settings\n",
    "personalize_algorithm = \"scipy_minimize\"\n",
    "n_iter_personalize = 100\n",
    "seed = 0\n",
    "perso_settings = AlgorithmSettings(personalize_algorithm, seed=seed, n_iter=n_iter_personalize)\n",
    "\n",
    "#meta settings\n",
    "\n",
    "\n",
    "meta_settings = {\"kernel_name\" : \"gaussian\", \n",
    "                 \"sigma\" : 0.24,\"nb_compose\":6,\"nb_compose_succ\":1,'iter_in_fit':200,'iter_out_fit':300,'init_b':'logistic'}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Resampling\n",
    "from sklearn.model_selection import RepeatedKFold\n",
    "n_folds = 5\n",
    "n_rep = 1\n",
    "seed = 0\n",
    "skf = RepeatedKFold(n_splits=n_folds, n_repeats=n_rep, random_state=seed)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "experiment_folder = \"Experiment_Ailzheimer_cohort\"\n",
    "\n",
    "path_exp = os.path.join(output_directory, experiment_folder)\n",
    "path_output_calibrate = os.path.join(path_exp, \"calibrate\")\n",
    "if not os.path.exists(path_output_calibrate):\n",
    "    os.makedirs(path_output_calibrate)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "update_b_resampling(df, leaspy_factory, algo_settings,perso_settings,meta_settings, skf, path_output_calibrate,n_jobs=5, setup_extra={'DEBUG':True})"
   ]
  },
  {
   "source": [
    "# Predictions"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "source": [
    "If you want to launch your predictions separetly from the inspection of results, run the following cells, it will take no time if the random effects associated to predictions are already estimated."
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "experiment_folder =\"Experiment_Ailzeihmer_cohort\"\n",
    "name = \"Experiment_Ailzeihmer_cohort\"\n",
    "path_exp = os.path.join(output_directory, experiment_folder)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Algo parameters\n",
    "personalize_algorithm = \"scipy_minimize\"\n",
    "n_iter_personalize = 100\n",
    "seed = 0\n",
    "algo_settings_personalize = AlgorithmSettings(personalize_algorithm, seed=seed, n_iter=n_iter_personalize)\n",
    "#We make predictions on a 5 folds by training on the first visits on training of the last of each patient with GB\n",
    "Pred_test_GB=PersoSpecial(df,path_exp,\"pred_future\",algo_settings_personalize,all_res,th=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "logistic\n",
      "logistic\n",
      "logistic\n",
      "logistic\n",
      "logistic\n"
     ]
    }
   ],
   "source": [
    "#We make predictions on a 5 folds by training on the first visits on training of the last of each patient with the first step of GB\n",
    "Pred_test_DCM=PersoSpecial(df,path_exp,\"pred_future\",algo_settings_personalize,all_res,th=1,kernel_sec=True,n_comp=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "Imp_test_GB=PersoSpecial(df,path_exp,\"imputation\",algo_settings_personalize,all_res,th=1,imputation=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "logistic\n",
      "logistic\n",
      "logistic\n",
      "logistic\n",
      "logistic\n"
     ]
    }
   ],
   "source": [
    "Imp_test_DCM=PersoSpecial(df,path_exp,\"imputation\",algo_settings_personalize,all_res,th=1,imputation=True,kernel_sec=True,n_comp=0)"
   ]
  }
 ]
}