{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "papermill": {
     "duration": 0.024409,
     "end_time": "2020-02-03T01:43:56.828789",
     "exception": false,
     "start_time": "2020-02-03T01:43:56.804380",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# MLP Clustering Significance Stability over Multiple Trainings - Different #Clusters\n",
    "\n",
    "### Before Running on Perceptron\n",
    "\n",
    "Go to `src/train_nn.py` to line 46, and change the path `./models/` to `/scratch/<your username>/models`\n",
    "\n",
    "Do not forget to undo this change!\n",
    "\n",
    "DO NOT COMMIT THIS CHANGE TO GITHUB!!!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": [
     "parameters"
    ]
   },
   "outputs": [],
   "source": [
    "N_CLUSTERS = 4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "papermill": {
     "duration": 0.048552,
     "end_time": "2020-02-03T01:43:56.913093",
     "exception": false,
     "start_time": "2020-02-03T01:43:56.864541",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "papermill": {
     "duration": 3.419212,
     "end_time": "2020-02-03T01:44:00.354270",
     "exception": false,
     "start_time": "2020-02-03T01:43:56.935058",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append('..')\n",
    "\n",
    "import random\n",
    "import subprocess\n",
    "from multiprocessing import Pool\n",
    "\n",
    "import numpy as np\n",
    "import matplotlib.pylab as plt\n",
    "import pandas as pd\n",
    "from tqdm import tqdm, trange\n",
    "from IPython import display\n",
    "\n",
    "from src.visualization import draw_mlp_clustering_report, run_double_spectral_cluster, run_spectral_cluster\n",
    "from src.utils import get_weights_paths, build_clustering_results, cohen_d_stats\n",
    "from src.experiment_tagging import get_model_path, MODEL_TAG_LOOKUP\n",
    "# from src.spectral_cluster_model import SHUFFLE_METHODS\n",
    "SHUFFLE_METHODS = ['layer', 'layer_nonzero']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "papermill": {
     "duration": 0.045065,
     "end_time": "2020-02-03T01:44:00.423583",
     "exception": false,
     "start_time": "2020-02-03T01:44:00.378518",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "N_TRAINED_MODELS = 10\n",
    "\n",
    "# we ar doing 320 experiments (=32 models x 10 times),\n",
    "# and let's have expectation of false-positive <= 1\n",
    "# so we can set n_shuffles to 320, and then p-values < 1/401\n",
    "# if we make it too large, let see, 1000 shuffles...\n",
    "# ... we might reduce the statistical power (given H1, the probability to reject it incorrectly)!\n",
    "N_SHUFFLES = 320\n",
    "\n",
    "N_GPUS = 4\n",
    "\n",
    "TAGS_COMMANDS = {\n",
    "    'MNIST': 'python -m src.train_nn with mlp_config dataset_name=mnist',\n",
    "    'CIFAR10': 'python -m src.train_nn with mlp_config dataset_name=cifar10 pruning_epochs=40',\n",
    "#    'LINE': 'python -m src.train_nn with mlp_config dataset_name=line',\n",
    "    'FASHION': 'python -m src.train_nn with mlp_config dataset_name=fashion',\n",
    "    'MNIST+DROPOUT': 'python -m src.train_nn with mlp_config dataset_name=mnist with_dropout=True',\n",
    "    'CIFAR10+DROPOUT': 'python -m src.train_nn with mlp_config dataset_name=cifar10 epochs=100 pruning_epochs=40 with_dropout=True dropout_rate=0.2',\n",
    "#    'LINE+DROPOUT': 'python -m src.train_nn with mlp_config dataset_name=line with_dropout=True',\n",
    "    'FASHION+DROPOUT': 'python -m src.train_nn with mlp_config dataset_name=fashion with_dropout=True',\n",
    "#    'LINE-MNIST': 'python -m src.train_nn with mlp_config dataset_name=line-mnist',\n",
    "#    'LINE-CIFAR10': 'python -m src.train_nn with mlp_config dataset_name=line-cifar10 epochs=30 pruning_epochs=40',\n",
    "#    'MNIST-CIFAR10': 'python -m src.train_nn with mlp_config dataset_name=mnist-cifar10 epochs=30 pruning_epochs=40',\n",
    "#    'LINE-MNIST-SEPARATED': 'python -m src.train_nn with mlp_config dataset_name=line-mnist-separated',\n",
    "#    'LINE-CIFAR10-SEPARATED': 'python -m src.train_nn with mlp_config dataset_name=line-cifar10-separated epochs=30 pruning_epochs=40',\n",
    "#    'MNIST-CIFAR10-SEPARATED': 'python -m src.train_nn with mlp_config dataset_name=mnist-cifar10-separated epochs=30 pruning_epochs=40',\n",
    "#    'LINE-MNIST+DROPOUT': 'python -m src.train_nn with mlp_config dataset_name=line-mnist with_dropout=True',\n",
    "#    'LINE-CIFAR10+DROPOUT': 'python -m src.train_nn with mlp_config dataset_name=line-cifar10 epochs=30 pruning_epochs=40 with_dropout=True dropout_rate=0.2',\n",
    "#    'MNIST-CIFAR10+DROPOUT': 'python -m src.train_nn with mlp_config dataset_name=mnist-cifar10 epochs=30 pruning_epochs=40 with_dropout=True dropout_rate=0.2',\n",
    "#    'LINE-MNIST-SEPARATED+DROPOUT': 'python -m src.train_nn with mlp_config dataset_name=line-mnist-separated with_dropout=True',\n",
    "#    'LINE-CIFAR10-SEPARATED+DROPOUT': 'python -m src.train_nn with mlp_config dataset_name=line-cifar10-separated epochs=30 pruning_epochs=40 with_dropout=True dropout_rate=0.2',\n",
    "#    'MNIST-CIFAR10-SEPARATED+DROPOUT': 'python -m src.train_nn with mlp_config dataset_name=mnist-cifar10-separated epochs=30 pruning_epochs=40 with_dropout=True dropout_rate=0.2',\n",
    "#    'RANDOM': 'python -m src.train_nn with mlp_config dataset_name=random',\n",
    "#    'RANDOM+DROPOUT': 'python -m src.train_nn with mlp_config dataset_name=random with_dropout=True',\n",
    "#    'MNIST-x1.5-EPOCHS': 'python -m src.train_nn with mlp_config dataset_name=mnist epochs=30',\n",
    "#    'MNIST-x1.5-EPOCHS+DROPOUT':'python -m src.train_nn with mlp_config dataset_name=mnist epochs=30 with_dropout=True',\n",
    "#    'MNIST-x2-EPOCHS':'python -m src.train_nn with mlp_config dataset_name=mnist epochs=40',\n",
    "#    'MNIST-x2-EPOCHS+DROPOUT':'python -m src.train_nn with mlp_config dataset_name=mnist epochs=40 with_dropout=True',\n",
    "#    'MNIST-x10-EPOCHS': 'python -m src.train_nn with mlp_config dataset_name=mnist epochs=200',\n",
    "#    'MNIST-x10-EPOCHS+DROPOUT': 'python -m src.train_nn with mlp_config dataset_name=mnist epochs=200 with_dropout=True',\n",
    "#    'RANDOM-x50-EPOCHS': 'python -m src.train_nn with mlp_config dataset_name=random epochs=1000',\n",
    "#    'RANDOM-x50-EPOCHS+DROPOUT': 'python -m src.train_nn with mlp_config dataset_name=random epochs=1000 with_dropout=True',\n",
    "#    'RANDOM-OVERFITTING': 'python -m src.train_nn with mlp_config dataset_name=random epochs=100 pruning_epochs=100 shuffle=False n_train=3000',\n",
    "#    'RANDOM-OVERFITTING+DROPOUT': 'python -m src.train_nn with mlp_config dataset_name=random epochs=100 pruning_epochs=100 shuffle=False n_train=3000 with_dropout=True'\n",
    "}\n",
    "DATASETS_TAGS = [command.split()[5][13:] for command in TAGS_COMMANDS.values()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "papermill": {
     "duration": 0.032854,
     "end_time": "2020-02-03T01:44:00.477690",
     "exception": false,
     "start_time": "2020-02-03T01:44:00.444836",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "if False:\n",
    "    def train_model_multiply(command, n_models_per_command, gpu_id=None):    \n",
    "        for _ in range(n_models_per_command):\n",
    "            actual_command = f'cd .. && {command}'#' > /dev/null 2>&1'\n",
    "            print(actual_command)\n",
    "            subprocess.run(actual_command, shell=True,\n",
    "                           env={'CUDA_VISIBLE_DEVICES': str(gpu_id)})\n",
    "\n",
    "\n",
    "    def train_models(commands, n_models_per_command, gpu_id=None):\n",
    "        for command in commands:\n",
    "            train_model_multiply(command, n_models_per_command, gpu_id)\n",
    "\n",
    "            commands = list(TAGS_COMMANDS.values())\n",
    "    random.shuffle(commands)\n",
    "\n",
    "    assert len(commands) % N_GPUS == 0\n",
    "    n_commands_per_gpu = len(commands) // N_GPUS\n",
    "\n",
    "    with Pool(N_GPUS) as p:\n",
    "\n",
    "        p.starmap(train_models,\n",
    "                 [(commands[gpu_id*n_commands_per_gpu : (gpu_id+1)*n_commands_per_gpu], N_TRAINED_MODELS, gpu_id)\n",
    "                 for gpu_id in range(N_GPUS)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "papermill": {
     "duration": 19954.263436,
     "end_time": "2020-02-03T07:16:34.805488",
     "exception": false,
     "start_time": "2020-02-03T01:44:00.542052",
     "status": "completed"
    },
    "scrolled": false,
    "tags": []
   },
   "outputs": [],
   "source": [
    "results = {}\n",
    "\n",
    "for shuffle_method in SHUFFLE_METHODS:\n",
    "    results[shuffle_method] = {}\n",
    "    \n",
    "    for (model_tag, _), dataset_tag in zip(TAGS_COMMANDS.items(), tqdm(DATASETS_TAGS)):\n",
    "        model_paths = get_model_path(model_tag, filter_='all')\n",
    "\n",
    "\n",
    "        results[shuffle_method][model_tag] = [run_spectral_cluster(path / f'{dataset_tag}-mlp-pruned-weights.pckl',\n",
    "                                               n_samples=N_SHUFFLES,\n",
    "                                               shuffle_method=shuffle_method,\n",
    "                                               n_clusters=N_CLUSTERS)\n",
    "            for path in tqdm(model_paths[-N_TRAINED_MODELS:])]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "run_spectral_cluster?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def build_all_models_per_shuffle_method_df(single_shuffle_method_results):\n",
    "    model_dfs = []\n",
    "\n",
    "    for model_tag, model_results in single_shuffle_method_results.items():\n",
    "        _, metrics  = zip(*model_results)\n",
    "        model_dfs.append(pd.DataFrame(metrics).assign(model=model_tag))\n",
    "\n",
    "    df = pd.concat(model_dfs, ignore_index=True)\n",
    "\n",
    "    df['is_sig'] = np.isclose(df['percentile'], 1 / (N_SHUFFLES + 1))\n",
    "    \n",
    "    return df\n",
    "\n",
    "layer_df = build_all_models_per_shuffle_method_df(results['layer'])\n",
    "layer_nonzero_df = build_all_models_per_shuffle_method_df(results['layer_nonzero'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "layer_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "layer_nonzero_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "MODEL_DATA_COLUMNS = ['model',\n",
    "                      'train_acc', 'train_loss', 'test_acc', 'test_loss',\n",
    "                      'ncut', 'ave_in_out',\n",
    "                      'n_samples']\n",
    "\n",
    "assert layer_df[MODEL_DATA_COLUMNS].equals(layer_nonzero_df[MODEL_DATA_COLUMNS])\n",
    "\n",
    "SHUFFLING_DATA_COLUMNS = ['mean', 'percentile', 'stdev', 'z_score', 'is_sig']\n",
    "\n",
    "two_shuffling_method_df = pd.merge(layer_df[SHUFFLING_DATA_COLUMNS],\n",
    "                                     layer_nonzero_df[SHUFFLING_DATA_COLUMNS],\n",
    "                                     left_index=True, right_index=True,\n",
    "                                     suffixes=('_layer', '_layer_nonzero'))\n",
    "\n",
    "assert layer_df[MODEL_DATA_COLUMNS].equals(layer_nonzero_df[MODEL_DATA_COLUMNS])\n",
    "\n",
    "df = pd.merge(layer_df[MODEL_DATA_COLUMNS], two_shuffling_method_df,\n",
    "              left_index=True, right_index=True,)\n",
    "\n",
    "df['cohen_d'] = df.apply(lambda r: cohen_d_stats(r['mean_layer'], r['stdev_layer'], r['n_samples'],\n",
    "                                 r['mean_layer_nonzero'], r['stdev_layer_nonzero'], r['n_samples']),\n",
    "                                axis=1)\n",
    "\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# df = pd.read_csv('../results/mlp-clustering-stability-two-shuffling-methods-all-samples.csv' index_col=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "papermill": {
     "duration": 0.582793,
     "end_time": "2020-02-03T07:16:37.128490",
     "exception": false,
     "start_time": "2020-02-03T07:16:36.545697",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "df_grpby_model = df.groupby('model')\n",
    "\n",
    "TWO_SHUFFLE_STATS_COLUMNS = [f'{stat}_{shuffle_method}'\n",
    "                             for stat in ['mean', 'stdev', 'z_score', 'percentile', 'is_sig']\n",
    "                             for shuffle_method in SHUFFLE_METHODS]\n",
    "\n",
    "statistics_df = pd.concat([(df_grpby_model[['ncut'] + TWO_SHUFFLE_STATS_COLUMNS]\n",
    "                             .agg(['mean', 'std'])),\n",
    "                            df_grpby_model.size().rename('n_models'),\n",
    "                            (df_grpby_model[['train_acc', 'test_acc']]\n",
    "                             .agg('mean'))],\n",
    "                           axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "statistics_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "papermill": {
     "duration": 0.583035,
     "end_time": "2020-02-03T07:16:38.268118",
     "exception": false,
     "start_time": "2020-02-03T07:16:37.685083",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# TODO: ask for a code review\n",
    "# std = np.sqrt(np.mean(x**2) - np.mean(x)**2)\n",
    "\n",
    "def calc_overall_stdev(row, suffix=''):\n",
    "    return np.sqrt(                 # 5. apply root square to get the SD from the variance\n",
    "        (np.mean(                   # 3. divide by the number of trained model\n",
    "            (row[f'stdev{suffix}']**2)       # 1. cancel the overall square root\n",
    "             + row[f'mean{suffix}']**2))     # 2. add the mean and get the sum or squares\n",
    "        - np.mean(row[f'mean{suffix}'])**2)  # 4. substrct the square of the overall mean"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "papermill": {
     "duration": 0.594153,
     "end_time": "2020-02-03T07:16:39.407408",
     "exception": false,
     "start_time": "2020-02-03T07:16:38.813255",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "SHUFFLE_METHOD_SUFFIX = ['_layer', '_layer_nonzero']\n",
    "for suffix in SHUFFLE_METHOD_SUFFIX:\n",
    "    statistics_df[f'overall_std{suffix}'] = df_grpby_model.apply(calc_overall_stdev, suffix)\n",
    "\n",
    "for suffix in SHUFFLE_METHOD_SUFFIX:\n",
    "    statistics_df[f'is_stable{suffix}'] = (statistics_df[(f'is_sig{suffix}', 'mean')] == 1)\n",
    "\n",
    "statistics_df = statistics_df.loc[TAGS_COMMANDS.keys(), :]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "statistics_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "papermill": {
     "duration": 0.562647,
     "end_time": "2020-02-03T07:16:40.557627",
     "exception": false,
     "start_time": "2020-02-03T07:16:39.994980",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "df.to_csv(f'../results/mlp-clustering-stability-two-shuffling-methods-all-samples-K-{N_CLUSTERS}.csv')\n",
    "statistics_df.to_csv(f'../results/mlp-clustering-stability-two-shuffling-methods-statistics-K-{N_CLUSTERS}.csv')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "papermill": {
     "duration": 0.533611,
     "end_time": "2020-02-03T07:16:41.645976",
     "exception": false,
     "start_time": "2020-02-03T07:16:41.112365",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "### Model Stability Statistics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "papermill": {
     "duration": 0.61662,
     "end_time": "2020-02-03T07:16:42.844817",
     "exception": false,
     "start_time": "2020-02-03T07:16:42.228197",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "statistics_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "papermill": {
     "duration": 0.551861,
     "end_time": "2020-02-03T07:16:43.956798",
     "exception": false,
     "start_time": "2020-02-03T07:16:43.404937",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "### Appendix - All Samples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "papermill": {
     "duration": 0.592219,
     "end_time": "2020-02-03T07:16:45.103143",
     "exception": false,
     "start_time": "2020-02-03T07:16:44.510924",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Unpruned results without shuffling\n",
    "#### TODO: Refactor\n",
    "- [ ] Combine with the unpruned table"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if False:\n",
    "    unpruned_results = {}\n",
    "\n",
    "    for (model_tag, _), dataset_tag in zip(TAGS_COMMANDS.items(), tqdm(DATASETS_TAGS)):\n",
    "        model_paths = get_model_path(model_tag, filter_='all')\n",
    "        unpruned_results[model_tag] = [run_spectral_cluster(path / f'{dataset_tag}-mlp-unpruned-weights.pckl',\n",
    "                                                   with_shuffle=False)\n",
    "                for path in tqdm(model_paths[-N_TRAINED_MODELS:])]\n",
    "\n",
    "    unpruned_model_dfs = []\n",
    "\n",
    "    for model_tag, model_results in unpruned_results.items():\n",
    "        _, metrics  = zip(*model_results)\n",
    "        unpruned_model_dfs.append(pd.DataFrame(metrics).assign(model=model_tag))\n",
    "\n",
    "    unpruned_df = pd.concat(unpruned_model_dfs, ignore_index=True)\n",
    "\n",
    "    unpruned_df_grpby_model = unpruned_df.groupby('model')\n",
    "\n",
    "\n",
    "    unpruned_statistics_df = pd.concat([(unpruned_df_grpby_model[['ncut']]\n",
    "                                 .agg(['mean', 'std'])),\n",
    "                                unpruned_df_grpby_model.size().rename('n_models'),\n",
    "                                (unpruned_df_grpby_model[['train_acc', 'test_acc']]\n",
    "                                 .agg('mean'))],\n",
    "                               axis=1)\n",
    "\n",
    "    unpruned_statistics_df = unpruned_statistics_df.loc[TAGS_COMMANDS.keys(), :]\n",
    "\n",
    "    unpruned_statistics_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# unpruned_df.to_csv(f'../results/mlp-clustering-stability-all-samples-unpruned-K-{N_CLUSTERS}.csv')\n",
    "# unpruned_statistics_df.to_csv(f'../results/mlp-clustering-stability-statistics-unpruned-K-{N_CLUSTERS}.csv')"
   ]
  }
 ],
 "metadata": {
  "celltoolbar": "Tags",
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  },
  "papermill": {
   "duration": 19970.819594,
   "end_time": "2020-02-03T07:16:46.797933",
   "environment_variables": {},
   "exception": null,
   "input_path": "./notebooks/mlp-clustering-stability.ipynb",
   "output_path": "./notebooks/mlp-clustering-stability.ipynb",
   "parameters": {},
   "start_time": "2020-02-03T01:43:55.978339",
   "version": "1.1.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
