{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import os\n",
    "import seaborn as sns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "result_files = os.listdir(\"benchmarking\")\n",
    "all_results = []\n",
    "\n",
    "#for rfile in result_files:\n",
    "#    df = pd.read_csv(os.path.join(\"benchmarking\", rfile))\n",
    "#    ds = rfile[:rfile.index(\"0.2\")-1]\n",
    "#    df[\"dataset\"] = ds\n",
    "#    all_results.append(df)\n",
    "\n",
    "#discard the first warmup run\n",
    "for rfile in result_files:\n",
    "    df = pd.read_csv(os.path.join(\"benchmarking\", rfile))\n",
    "    ds = rfile[:rfile.index(\"0.2\")-1]\n",
    "    df[\"dataset\"] = ds\n",
    "    all_results.append(df.loc[1:])\n",
    "\n",
    "res_df = pd.concat(all_results, ignore_index=True)\n",
    "res_df[\"efficiency\"] = res_df[\"efficiency\"].apply(lambda x: float(x[len(\"tensor(\"):-1]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "res_df.groupby([\"dataset\", \"method\"])[\"efficiency\"].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# exclude weird extra value by taking the middle three\n",
    "#res_df = res_df.groupby([\"dataset\", \"method\"])[\"runtime\"].apply(lambda x: x.drop([x.idxmax(), x.idxmin()])).reset_index()\n",
    "\n",
    "#def group_result(x):\n",
    "#    x_mean = x.mean()\n",
    "#    x_max = x.max()\n",
    "#    x_min = x.min()\n",
    "#    x_dev = max(x_mean - x_min, x_max - x_mean)\n",
    "#    return f\"{x_mean:.2f} $\\pm$ {x_dev:.2f}\"\n",
    "#results_table = res_df.groupby([\"dataset\", \"method\"])[\"runtime\"].apply(group_result).reset_index().pivot(index=\"dataset\", columns=\"method\", values=\"runtime\")\n",
    "\n",
    "def group_results(x):\n",
    "    x_mean = x.mean()\n",
    "    x_std = x.std()\n",
    "    return f\"{x_mean:.2f} $\\pm$ {1.96 * x_std:.2f}\"\n",
    "\n",
    "results_table = res_df.groupby([\"dataset\", \"method\"])[\"runtime\"].apply(group_results).reset_index().pivot(index=\"dataset\", columns=\"method\", values=\"runtime\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(results_table.reindex([\"CiteSeer\", \"Amazon_Photos\", \"Cora\", \"PubMed\", \"Coauthor_CS\", \"Coauthor_Physics\"]).to_latex())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "res_df.groupby([\"dataset\", \"method\"])[\"runtime\"].mean().reset_index().groupby(\"dataset\").apply(lambda x: x[x.method==\"baseline\"][\"runtime\"].item()/x[x.method==\"cache+batch\"][\"runtime\"].item())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "avoirpp",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
