{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b69ad6d8",
   "metadata": {
    "papermill": {
     "duration": 1.320243,
     "end_time": "2023-03-11T01:28:09.700990",
     "exception": false,
     "start_time": "2023-03-11T01:28:08.380747",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.dates as mdates\n",
    "from matplotlib import pyplot as plt\n",
    "import matplotlib_inline\n",
    "import seaborn as sns\n",
    "\n",
    "sns.set(style=\"whitegrid\")\n",
    "matplotlib_inline.backend_inline.set_matplotlib_formats(\"svg\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d9aa4725",
   "metadata": {
    "papermill": {
     "duration": 0.003665,
     "end_time": "2023-03-11T01:28:09.710157",
     "exception": false,
     "start_time": "2023-03-11T01:28:09.706492",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# Read in data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8d6bbe16",
   "metadata": {
    "papermill": {
     "duration": 0.009798,
     "end_time": "2023-03-11T01:28:09.723176",
     "exception": false,
     "start_time": "2023-03-11T01:28:09.713378",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "data_dir = os.path.join(os.path.dirname(os.getcwd()), \"tutorial_performance_data\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "27fc1ed0",
   "metadata": {
    "papermill": {
     "duration": 0.018815,
     "end_time": "2023-03-11T01:28:09.745289",
     "exception": false,
     "start_time": "2023-03-11T01:28:09.726474",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "df = pd.read_csv(os.path.join(data_dir, \"all_data.csv\"))\n",
    "df[\"short_name\"] = df[\"name\"].apply(lambda x: x[:21])\n",
    "df[\"memory_increase\"] = df[\"max_mem\"] - df[\"start_mem\"]\n",
    "df[\"datetime\"] = pd.to_datetime(df[\"datetime\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "efdae2c8",
   "metadata": {
    "papermill": {
     "duration": 0.034196,
     "end_time": "2023-03-11T01:28:09.783748",
     "exception": false,
     "start_time": "2023-03-11T01:28:09.749552",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "df[\"tmp\"] = df.apply(lambda x: x[\"name\"] if x.loc[\"ran_successfully\"] else \"\", axis=1)\n",
    "df[\"ran_successfully\"].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3ea8893e",
   "metadata": {
    "papermill": {
     "duration": 0.068403,
     "end_time": "2023-03-11T01:28:09.855503",
     "exception": false,
     "start_time": "2023-03-11T01:28:09.787100",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "df[\"name_if_ran\"] = df.apply(\n",
    "    lambda x: x.loc[\"name\"] if x.loc[\"ran_successfully\"] else \"\", axis=1\n",
    ")\n",
    "\n",
    "# Identifies runs where the same set of tutorials completed\n",
    "# Used to make sure we don't compare aggregated data from\n",
    "# runs where different tutorials were run\n",
    "df[\"set_of_tutorials_run_hash\"] = df.groupby(\"fname\")[\"name_if_ran\"].transform(\n",
    "    lambda x: hash(\"\".join(x.sort_values().values))\n",
    ")\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d69f9341",
   "metadata": {
    "papermill": {
     "duration": 0.004354,
     "end_time": "2023-03-11T01:28:09.864331",
     "exception": false,
     "start_time": "2023-03-11T01:28:09.859977",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# Plot performance metrics over time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "147e0d13",
   "metadata": {
    "papermill": {
     "duration": 0.029072,
     "end_time": "2023-03-11T01:28:09.897575",
     "exception": false,
     "start_time": "2023-03-11T01:28:09.868503",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "per_hash_data = (\n",
    "    df.groupby([\"mode\", \"set_of_tutorials_run_hash\"])\n",
    "    .agg(\n",
    "        n_runs=(\"fname\", lambda x: len(x.drop_duplicates())),\n",
    "        most_recent_run=(\"datetime\", \"max\"),\n",
    "    )\n",
    "    .reset_index()\n",
    ")\n",
    "# loosen this as more data comes in\n",
    "per_hash_data[\"keep\"] = (per_hash_data[\"n_runs\"] > 2) & (\n",
    "    per_hash_data[\"most_recent_run\"] > \"2023-02-24\"\n",
    ")\n",
    "per_hash_data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "afc21602",
   "metadata": {
    "papermill": {
     "duration": 0.014062,
     "end_time": "2023-03-11T01:28:09.916398",
     "exception": false,
     "start_time": "2023-03-11T01:28:09.902336",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "hashes_to_plot = per_hash_data.loc[\n",
    "    per_hash_data[\"keep\"], [\"mode\", \"set_of_tutorials_run_hash\"]\n",
    "]\n",
    "hashes_to_plot.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6eeccf2a",
   "metadata": {
    "papermill": {
     "duration": 0.029361,
     "end_time": "2023-03-11T01:28:09.949763",
     "exception": false,
     "start_time": "2023-03-11T01:28:09.920402",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "time_series_data_wide = (\n",
    "    df.merge(hashes_to_plot)\n",
    "    .groupby(\"fname\")\n",
    "    .agg(\n",
    "        mode=(\"mode\", \"first\"),\n",
    "        set_of_tutorials_run_hash=(\"set_of_tutorials_run_hash\", \"first\"),\n",
    "        datetime=(\"datetime\", \"first\"),\n",
    "        total_runtime=(\"runtime\", \"sum\"),\n",
    "        slowest_runtime=(\"runtime\", \"max\"),\n",
    "        avg_memory=(\"memory_increase\", \"mean\"),\n",
    "        max_memory=(\"memory_increase\", \"max\"),\n",
    "    )\n",
    ")\n",
    "\n",
    "time_series_data_wide.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "65da77f8",
   "metadata": {
    "papermill": {
     "duration": 0.016922,
     "end_time": "2023-03-11T01:28:09.971783",
     "exception": false,
     "start_time": "2023-03-11T01:28:09.954861",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "time_series_data_long = pd.melt(\n",
    "    time_series_data_wide,\n",
    "    id_vars=[\n",
    "        \"mode\",\n",
    "        \"set_of_tutorials_run_hash\",\n",
    "        \"datetime\",\n",
    "    ],\n",
    "    value_vars=[\"total_runtime\", \"slowest_runtime\", \"avg_memory\", \"max_memory\"],\n",
    "    var_name=\"metric\",\n",
    ")\n",
    "time_series_data_long.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5ce5df1d",
   "metadata": {
    "papermill": {
     "duration": 0.013417,
     "end_time": "2023-03-11T01:28:09.990303",
     "exception": false,
     "start_time": "2023-03-11T01:28:09.976886",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "def make_time_series_plot(\n",
    "    time_series_data_long: pd.DataFrame, mode: str\n",
    ") -> sns.FacetGrid:\n",
    "    keep_data = time_series_data_long[\n",
    "        time_series_data_long[\"mode\"] == mode\n",
    "    ].sort_values(\"datetime\")\n",
    "    if len(keep_data) == 0:\n",
    "        print(f\"No {mode} data.\")\n",
    "        return\n",
    "    g = sns.FacetGrid(\n",
    "        data=keep_data,\n",
    "        col=\"metric\",\n",
    "        col_wrap=2,\n",
    "        aspect=1.5,\n",
    "        sharey=False,\n",
    "        hue=\"set_of_tutorials_run_hash\",\n",
    "        palette=\"viridis\",\n",
    "    )\n",
    "    g.map(plt.plot, \"datetime\", \"value\")\n",
    "    g.map(plt.scatter, \"datetime\", \"value\")\n",
    "    g.add_legend()\n",
    "    g.fig.suptitle(mode)\n",
    "    plt.subplots_adjust(top=0.9)\n",
    "    for ax in g.axes.flatten():\n",
    "        ax.xaxis.set_major_formatter(mdates.DateFormatter(\"%m-%d\"))\n",
    "    return g"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b0c4a69f",
   "metadata": {
    "papermill": {
     "duration": 2.435719,
     "end_time": "2023-03-11T01:28:12.449302",
     "exception": false,
     "start_time": "2023-03-11T01:28:10.013583",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "make_time_series_plot(time_series_data_long, \"standard\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f9398afb",
   "metadata": {
    "papermill": {
     "duration": 2.282553,
     "end_time": "2023-03-11T01:28:14.738616",
     "exception": false,
     "start_time": "2023-03-11T01:28:12.456063",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "make_time_series_plot(time_series_data_long, \"smoke-test\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "135a9d98",
   "metadata": {
    "papermill": {
     "duration": 0.010817,
     "end_time": "2023-03-11T01:28:16.834143",
     "exception": false,
     "start_time": "2023-03-11T01:28:16.823326",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# Per-tutorial time series plots and histograms"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "189e0730",
   "metadata": {},
   "outputs": [],
   "source": [
    "n_days = 7"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7055a7df",
   "metadata": {},
   "outputs": [],
   "source": [
    "def make_per_tutorial_ts_plot(mode: str, metric: str):\n",
    "    idx = (\n",
    "        df[\"ran_successfully\"] & (df[\"mode\"] == mode)\n",
    "        & ((df.datetime.max() - df.datetime).dt.days < n_days)\n",
    "    )\n",
    "    tutorials_for_ts_plot = df[idx]\n",
    "    max_metric = tutorials_for_ts_plot.groupby(\"short_name\")[[metric]].max()\n",
    "    max_metric['rank'] = np.argsort(np.argsort(-max_metric[metric].values))\n",
    "    tutorials_for_ts_plot = tutorials_for_ts_plot.merge(\n",
    "        max_metric[[\"rank\"]].reset_index(),\n",
    "        on=\"short_name\"\n",
    "    )\n",
    "    tutorials_for_ts_plot[\"group\"] = tutorials_for_ts_plot[\"rank\"] // 5\n",
    "    \n",
    "    fig, axes = plt.subplots(\n",
    "        tutorials_for_ts_plot[\"group\"].max() + 1, sharex=True, figsize=(5, 10)\n",
    "    )\n",
    "    for i, ax in enumerate(axes):\n",
    "        to_plot = tutorials_for_ts_plot.loc[lambda x: x[\"group\"] == i]\n",
    "        hue_order = to_plot[[\"short_name\", \"rank\"]].drop_duplicates().sort_values(\"rank\")[\"short_name\"]\n",
    "        sns.lineplot(\n",
    "            data=to_plot,\n",
    "            x=\"datetime\", y=metric, ax=ax,\n",
    "            hue=\"short_name\",\n",
    "            hue_order=hue_order\n",
    "        )\n",
    "        sns.scatterplot(\n",
    "            data=to_plot,\n",
    "            x=\"datetime\", y=metric, ax=ax,\n",
    "            hue=\"short_name\",\n",
    "            hue_order=hue_order,\n",
    "            legend=False\n",
    "        )\n",
    "        ax.legend(bbox_to_anchor=(1, 1))\n",
    "        ax.set_xticks([])\n",
    "        ax.set_xlabel(None)\n",
    "        ax.set_ylabel(None)\n",
    "    sns.despine()\n",
    "    most_recent = df.datetime.max().date().strftime(\"%m-%d\")\n",
    "    fig.suptitle(f\"{metric} over the {n_days} days up to {most_recent}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c2e694cd",
   "metadata": {},
   "outputs": [],
   "source": [
    "make_per_tutorial_ts_plot(\"standard\", \"runtime\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cc664a08",
   "metadata": {},
   "outputs": [],
   "source": [
    "make_per_tutorial_ts_plot(\"standard\", \"memory_increase\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "83a98134",
   "metadata": {},
   "outputs": [],
   "source": [
    "make_per_tutorial_ts_plot(\"smoke-test\", \"runtime\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a1218cda",
   "metadata": {},
   "outputs": [],
   "source": [
    "make_per_tutorial_ts_plot(\"smoke-test\", \"memory_increase\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "447b2089",
   "metadata": {
    "papermill": {
     "duration": 0.05912,
     "end_time": "2023-03-11T01:29:23.713755",
     "exception": false,
     "start_time": "2023-03-11T01:29:23.654635",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# Compare tutorials against each other"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f4b22312",
   "metadata": {
    "papermill": {
     "duration": 0.090399,
     "end_time": "2023-03-11T01:29:23.860249",
     "exception": false,
     "start_time": "2023-03-11T01:29:23.769850",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "last_run_data = df[\n",
    "    df[\"datetime\"] == df.groupby(\"mode\")[\"datetime\"].transform(\"max\")\n",
    "].sort_values(\"memory_increase\")\n",
    "last_run_data_long = pd.melt(\n",
    "    last_run_data,\n",
    "    id_vars=[\"mode\", \"name\"],\n",
    "    value_vars=[\"runtime\", \"memory_increase\"],\n",
    "    var_name=\"metric\",\n",
    ")\n",
    "last_run_data_long.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fbfff91e",
   "metadata": {
    "papermill": {
     "duration": 1.887023,
     "end_time": "2023-03-11T01:29:25.806900",
     "exception": false,
     "start_time": "2023-03-11T01:29:23.919877",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "g = sns.catplot(\n",
    "    data=last_run_data_long,\n",
    "    col=\"metric\",\n",
    "    row=\"mode\",\n",
    "    x=\"value\",\n",
    "    y=\"name\",\n",
    "    kind=\"bar\",\n",
    "    orient=\"h\",\n",
    "    sharex=False,\n",
    "    color=\"k\",\n",
    ")\n",
    "g.fig.suptitle(\"Metrics on the most recent run in this mode\")\n",
    "plt.subplots_adjust(top=0.9)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "eadf86d0",
   "metadata": {
    "papermill": {
     "duration": 0.069851,
     "end_time": "2023-03-11T01:29:25.941969",
     "exception": false,
     "start_time": "2023-03-11T01:29:25.872118",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.8"
  },
  "papermill": {
   "default_parameters": {},
   "duration": 80.132411,
   "end_time": "2023-03-11T01:29:26.649938",
   "environment_variables": {},
   "exception": null,
   "input_path": "/Users/santorella/repos/botorch/notebooks/tutorials_performance_tracking.ipynb",
   "output_path": "/Users/santorella/repos/botorch/notebooks/tutorials_performance_tracking.ipynb",
   "parameters": {},
   "start_time": "2023-03-11T01:28:06.517527",
   "version": "2.4.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
