{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import json\n",
    "import matplotlib\n",
    "import matplotlib.pyplot as plt\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def setup():\n",
    "    os.makedirs(\"plots\", exist_ok=True)\n",
    "    linewidth = 3\n",
    "\n",
    "    styles = {\n",
    "        'retraining': {'label': 'RfS', 'linestyle': '-', 'color': 'tab:blue', 'linewidth': linewidth},\n",
    "        'mmt': {'label': 'MMT', 'linestyle': '-', 'color': 'tab:green', 'linewidth': linewidth},\n",
    "        'bmt': {'label': 'BMT', 'linestyle': '-', 'color': 'goldenrod', 'linewidth': linewidth},\n",
    "        'standalone': {'label': 'Standalone', 'linestyle': '-', 'color': 'tab:red', 'linewidth': linewidth},\n",
    "        'greedy': {'label': 'Greedy', 'color': 'tab:orange', 'linewidth': linewidth}\n",
    "    }\n",
    "    return styles\n",
    "\n",
    "plot_styles = setup() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot(ax, x, mean, std, **kwargs):\n",
    "    color = kwargs.get(\"color\", None)\n",
    "    ax.plot(x, mean, **kwargs)\n",
    "    ax.fill_between(x, mean-std, mean+std, alpha=.4, facecolor=color)\n",
    "    return\n",
    "\n",
    "def plot_markers(ax, x, mean, **kwargs):\n",
    "    color = kwargs.get(\"color\", None)\n",
    "    ax.scatter(x, mean, c=color, linewidths=4)\n",
    "    return"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### MNIST dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def prepare_data():\n",
    "    diff_seeds = [1, 2, 3, 4, 5]\n",
    "    unlearn_methods = [\"standalone\", \"retraining\", \"bmt\", \"mmt\"]\n",
    "    eval_strategy = \"fixed\"\n",
    "\n",
    "    # aggregate results across different seeds\n",
    "    outputs = {}\n",
    "    for method in unlearn_methods:\n",
    "        res = []\n",
    "        for seed in diff_seeds:\n",
    "            path = f\"../main_results/sequential_unlearning/mnist/seed-{seed}/{method}/stats.json\"\n",
    "            if not os.path.exists(path):\n",
    "                print(f\"Can't load data from {path}\")\n",
    "                continue\n",
    "            stats = json.load(open(path, \"r\"))\n",
    "            stats = pd.DataFrame(stats)     # convert to dataframe for easier aggregation\n",
    "            stats[\"index\"] = range(len(stats))  # create index for grouping\n",
    "            res.append(stats)\n",
    "        res = pd.concat(res).groupby(\"index\")   # group by index across different seeds\n",
    "        outputs[method] = {\"mean\": res.mean(), \"std\": res.std()}\n",
    "\n",
    "    return outputs\n",
    "\n",
    "data = prepare_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = prepare_data()\n",
    "matplotlib.rcParams.update({'font.size': 18})\n",
    "fig, ax = plt.subplots(figsize=(7, 5))\n",
    "\n",
    "metrics = \"test_acc\"\n",
    "\n",
    "marker_x = {}\n",
    "marker_y = {}\n",
    "for method in data:\n",
    "    if method not in plot_styles: \n",
    "        style = plot_styles[\"bmt\"]\n",
    "        style.update({\"label\": method})\n",
    "    else:\n",
    "        style = plot_styles[method]\n",
    "    mean = data[method][\"mean\"][metrics]\n",
    "    std = data[method][\"std\"][metrics]\n",
    "    mean_indices = mean.index\n",
    "    plot(ax, mean_indices, mean, std, **style)\n",
    "\n",
    "ax.set_ylabel(\"Test Accuracy\", labelpad=15);\n",
    "ax.set_xlabel(\"Communication Rounds\", labelpad=15);\n",
    "\n",
    "fig.tight_layout()\n",
    "# ax.set_ylim(40, 90)\n",
    "# ax.legend(loc=\"lower left\", ncol=2);\n",
    "figure_name = \"outputs/mnist_sequential.png\"\n",
    "# figure_name = \"outputs/mnist_sequential_greedy.png\"\n",
    "fig.legend(loc=\"lower left\", bbox_to_anchor=(0.15, 0.2, 0.97, 0))\n",
    "plt.savefig(figure_name, bbox_inches=\"tight\", dpi=300)\n",
    "\n",
    "# legend = fig.legend(loc=\"upper center\", ncols=4, bbox_to_anchor=(0, 0.3, 1, 1)) \n",
    "# legend_fig = legend.figure\n",
    "# legend_fig.canvas.draw()\n",
    "# bbox = legend.get_window_extent().transformed(legend_fig.dpi_scale_trans.inverted())\n",
    "# legend_fig.savefig(\"outputs/legend_sequential.png\", dpi=\"figure\", bbox_inches=bbox)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### FashionMNIST dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def prepare_data():\n",
    "    diff_seeds = [1, 2, 3, 4, 5]\n",
    "    unlearn_methods = [\"standalone\", \"retraining\", \"bmt\", \"mmt\"]\n",
    "\n",
    "    # aggregate results across different seeds\n",
    "    outputs = {}\n",
    "    for method in unlearn_methods:\n",
    "        res = []\n",
    "        for seed in diff_seeds:\n",
    "            path = f\"../main_results/sequential_unlearning/fashion_mnist/seed-{seed}/{method}/stats.json\"\n",
    "            if not os.path.exists(path):\n",
    "                print(f\"Can't load data from {path}\")\n",
    "                continue\n",
    "            # print(path)\n",
    "            stats = json.load(open(path, \"r\"))\n",
    "            stats = pd.DataFrame(stats)     # convert to dataframe for easier aggregation\n",
    "            stats[\"index\"] = range(len(stats))  # create index for grouping\n",
    "            res.append(stats)\n",
    "        res = pd.concat(res).groupby(\"index\")   # group by index across different seeds\n",
    "        outputs[method] = {\"mean\": res.mean(), \"std\": res.std()}\n",
    "\n",
    "    return outputs\n",
    "\n",
    "data = prepare_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = prepare_data()\n",
    "matplotlib.rcParams.update({'font.size': 18})\n",
    "fig, ax = plt.subplots(figsize=(7, 5))\n",
    "\n",
    "metrics = \"test_acc\"\n",
    "ax.set_ylabel(\"Test Accuracy\", labelpad=15);\n",
    "\n",
    "for method in data:\n",
    "    if method not in plot_styles: \n",
    "        # print(f\"Skip {method}.\")\n",
    "        # continue\n",
    "        style = plot_styles[\"bmt\"]\n",
    "        style.update({\"label\": method})\n",
    "    else:\n",
    "        style = plot_styles[method]\n",
    "    mean = data[method][\"mean\"][metrics]\n",
    "    std = data[method][\"std\"][metrics]\n",
    "    x = mean.index\n",
    "    plot(ax, x, mean, std, markevery=80, **style)\n",
    "\n",
    "# ax.set_ylim(45, 105)\n",
    "# ax.set_yticks([10, 45, 80])\n",
    "# ax.set_ylim(40, 90)\n",
    "# ax.set_xlim(-20, 520)\n",
    "ax.set_xlabel(\"Communication Rounds\", labelpad=15);\n",
    "# ax.set_xticks([0, 150, 300, 450, 600, 750])\n",
    "# fig.legend(loc=\"upper center\", ncols=2, bbox_to_anchor=(0, 0.3, 1, 1))\n",
    "fig.legend(loc=\"lower left\", bbox_to_anchor=(0.15, 0.2, 0.97, 0))\n",
    "fig.tight_layout()\n",
    "# ax.legend(loc=\"lower left\", ncol=2);\n",
    "figure_name = \"outputs/fashion_mnist_sequential.png\"\n",
    "plt.savefig(figure_name, bbox_inches=\"tight\", dpi=300)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## CIFAR-10"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def prepare_data():\n",
    "    diff_seeds = [1, 2, 3, 4, 5]\n",
    "    unlearn_methods = [\"standalone\", \"retraining\", \"bmt\", \"mmt\"]\n",
    "\n",
    "    # aggregate results across different seeds\n",
    "    outputs = {}\n",
    "    for method in unlearn_methods:\n",
    "        res = []\n",
    "        for seed in diff_seeds:\n",
    "            path = f\"../main_results/sequential_unlearning/cifar10/seed-{seed}/{method}/stats.json\"\n",
    "            print(f\"Loading stats from {path}\")\n",
    "            stats = json.load(open(path, \"r\"))\n",
    "            stats = pd.DataFrame(stats)     # convert to dataframe for easier aggregation\n",
    "            if \"standalone\" in method:\n",
    "                stats = stats.bfill() \n",
    "            stats[\"index\"] = range(len(stats))  # create index for grouping\n",
    "            res.append(stats)\n",
    "        res = pd.concat(res).groupby(\"index\")   # group by index across different seeds\n",
    "        outputs[method] = {\"mean\": res.mean(), \"std\": res.std()}\n",
    "\n",
    "    return outputs\n",
    "\n",
    "data = prepare_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = prepare_data()\n",
    "matplotlib.rcParams.update({'font.size': 18})\n",
    "fig, ax = plt.subplots(figsize=(7, 5))\n",
    "\n",
    "metrics = \"test_acc\"\n",
    "ax.set_ylabel(\"Test Accuracy\", labelpad=15);\n",
    "for method in data:\n",
    "    if method not in plot_styles: \n",
    "        style = plot_styles[\"bmt\"].copy()\n",
    "        print(style)\n",
    "        style.update({\"label\": method})\n",
    "        print(plot_styles[\"bmt\"])\n",
    "    else:\n",
    "        style = plot_styles[method]\n",
    "    mean = data[method][\"mean\"][metrics]\n",
    "    std = data[method][\"std\"][metrics]\n",
    "    x = mean.index\n",
    "    plot(ax, x, mean, std, markevery=180, **style)\n",
    "\n",
    "# ax.set_ylim(45, 105)\n",
    "# ax.set_yticks([10, 35, 60])\n",
    "# ax.set_ylim(40, 90)\n",
    "# ax.set_xlim(-20, 520)\n",
    "ax.set_xlabel(\"Communication Rounds\", labelpad=15);\n",
    "# ax.set_xticks([0,50, 100])\n",
    "# fig.legend(loc=\"upper center\", ncols=2, bbox_to_anchor=(0, 0.3, 1, 1))\n",
    "fig.legend(loc=\"lower left\", bbox_to_anchor=(0.15, 0.2, 0.97, 0))\n",
    "fig.tight_layout()\n",
    "# ax.legend(loc='\"lower left\", ncol=2);\n",
    "figure_name = \"outputs/cifar10_sequential.png\"\n",
    "plt.savefig(figure_name, bbox_inches=\"tight\", dpi=300)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "###  CIFAR100 dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def prepare_data():\n",
    "    diff_seeds = [1, 2, 3, 4, 5]\n",
    "    unlearn_methods = [\"standalone\", \"retraining\", \"bmt\", \"mmt\"]\n",
    "\n",
    "    # aggregate results across different seeds\n",
    "    outputs = {}\n",
    "    for method in unlearn_methods:\n",
    "        res = []\n",
    "        for seed in diff_seeds:\n",
    "            path = f\"../main_results/sequential_unlearning/cifar100-500rounds/seed-{seed}/{method}/stats.json\"\n",
    "            print(f\"Loading stats from {path}\")\n",
    "            stats = json.load(open(path, \"r\"))\n",
    "            stats = pd.DataFrame(stats)     # convert to dataframe for easier aggregation\n",
    "            stats[\"index\"] = range(len(stats))  # create index for grouping\n",
    "            res.append(stats)\n",
    "        res = pd.concat(res).groupby(\"index\")   # group by index across different seeds\n",
    "        outputs[method] = {\"mean\": res.mean(), \"std\": res.std()}\n",
    "\n",
    "    return outputs\n",
    "\n",
    "data = prepare_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = prepare_data()\n",
    "matplotlib.rcParams.update({'font.size': 18})\n",
    "fig, ax = plt.subplots(figsize=(7, 5))\n",
    "\n",
    "metrics = \"test_acc\"\n",
    "ax.set_ylabel(\"Test Accuracy\", labelpad=15);\n",
    "\n",
    "for method in data:\n",
    "    if method not in plot_styles: \n",
    "        # print(f\"Skip {method}.\")\n",
    "        # continue\n",
    "        style = plot_styles[\"bmt\"].copy()\n",
    "        style.update({\"label\": method})\n",
    "    else:\n",
    "        style = plot_styles[method]\n",
    "    mean = data[method][\"mean\"][metrics]\n",
    "    std = data[method][\"std\"][metrics]\n",
    "    x = mean.index\n",
    "    plot(ax, x, mean, std, markevery=180, **style)\n",
    "\n",
    "# ax.set_ylim(45, 105)\n",
    "# ax.set_yticks([0, 20, 40])\n",
    "# ax.set_ylim(40, 90)\n",
    "# ax.set_xlim(-20, 520)\n",
    "ax.set_xlabel(\"Communication Rounds\", labelpad=15);\n",
    "# ax.set_xticks([0, 300, 600, 900, 1200])\n",
    "# fig.legend(loc=\"upper center\", ncols=2, bbox_to_anchor=(0, 0.3, 1, 1))\n",
    "fig.legend(loc=\"lower left\", bbox_to_anchor=(0.15, 0.2, 0.97, 0))\n",
    "fig.tight_layout()\n",
    "# ax.legend(loc=\"lower left\", ncol=2);\n",
    "figure_name = \"outputs/cifar100_sequential.png\"\n",
    "plt.savefig(figure_name, bbox_inches=\"tight\", dpi=300)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# LLM dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = \"language-identification\"\n",
    "\n",
    "def prepare_data():\n",
    "    diff_seeds = [123, 224, 325, 426, 527]\n",
    "    unlearn_methods = [\"retraining\", \"bmt\", \"mmt\"]\n",
    "    eval_strategy = \"fixed\"\n",
    "\n",
    "    # aggregate results across different seeds\n",
    "    outputs = {}\n",
    "    for method in unlearn_methods:\n",
    "        res = []\n",
    "        for seed in diff_seeds:\n",
    "            path = f\"../main_results/nlp/{dataset}/top8{method}{seed}/stats.json\"\n",
    "            if not os.path.exists(path):\n",
    "                print(f\"Can't load data from {path}\")\n",
    "                continue\n",
    "            stats = json.load(open(path, \"r\"))\n",
    "            stats = pd.DataFrame(stats)     # convert to dataframe for easier aggregation\n",
    "            stats[\"index\"] = range(len(stats))  # create index for grouping\n",
    "            res.append(stats)\n",
    "        res = pd.concat(res).groupby(\"index\")   # group by index across different seeds\n",
    "        outputs[method] = {\"mean\": res.mean(), \"std\": res.std()}\n",
    "\n",
    "    return outputs\n",
    "\n",
    "data = prepare_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = prepare_data()\n",
    "# matplotlib.rcParams.update({'font.size': 15})\n",
    "fig, ax = plt.subplots(figsize=(7,5))\n",
    "\n",
    "metrics = \"test_acc\"\n",
    "ax.set_ylabel(\"Test Accuracy\", labelpad=15);\n",
    "\n",
    "for method in data:\n",
    "    if method not in plot_styles: \n",
    "        # print(f\"Skip {method}.\")\n",
    "        # continue\n",
    "        style = plot_styles[\"bmt\"]\n",
    "        style.update({\"label\": method})\n",
    "    else:\n",
    "        style = plot_styles[method]\n",
    "    mean = data[method][\"mean\"][metrics]\n",
    "    std = data[method][\"std\"][metrics]\n",
    "    x = mean.index\n",
    "    plot(ax, x, mean, std, markevery=80, **style)\n",
    "\n",
    "# ax.set_ylim(45, 105)\n",
    "# ax.set_yticks([10, 50, 90])\n",
    "# ax.set_ylim(40, 90)\n",
    "# ax.set_xlim(-20, 520)\n",
    "ax.set_xlabel(\"Communication Rounds\", labelpad=15);\n",
    "# ax.set_xticks([0, 150, 300, 450, 600, 750])\n",
    "fig.legend(loc=\"lower right\", bbox_to_anchor=(0, 0.21, 0.97, 0))\n",
    "# fig.legend(loc=\"upper center\", ncols=3, bbox_to_anchor=(0, 0.3, 1, 1))\n",
    "fig.tight_layout()\n",
    "# ax.legend(loc=\"lower left\", ncol=2);\n",
    "figure_name = f\"outputs/{dataset}.png\"\n",
    "plt.savefig(figure_name, bbox_inches=\"tight\", dpi=300)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = \"multilingual-sentiments\"\n",
    "\n",
    "def prepare_data():\n",
    "    diff_seeds = [123, 224, 325, 426, 527]\n",
    "    unlearn_methods = [\"retraining\", \"bmt\", \"mmt\"]\n",
    "    eval_strategy = \"fixed\"\n",
    "\n",
    "    # aggregate results across different seeds\n",
    "    outputs = {}\n",
    "    for method in unlearn_methods:\n",
    "        res = []\n",
    "        for seed in diff_seeds:\n",
    "            path = f\"../main_results/nlp/{dataset}/top8{method}{seed}v3/stats.json\"\n",
    "            if not os.path.exists(path):\n",
    "                print(f\"Can't load data from {path}\")\n",
    "                continue\n",
    "            stats = json.load(open(path, \"r\"))\n",
    "            stats = pd.DataFrame(stats)     # convert to dataframe for easier aggregation\n",
    "            stats[\"index\"] = range(len(stats))  # create index for grouping\n",
    "            res.append(stats)\n",
    "        res = pd.concat(res).groupby(\"index\")   # group by index across different seeds\n",
    "        outputs[method] = {\"mean\": res.mean(), \"std\": res.std()}\n",
    "\n",
    "    return outputs\n",
    "\n",
    "data = prepare_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = prepare_data()\n",
    "# matplotlib.rcParams.update({'font.size': 15})\n",
    "fig, ax = plt.subplots(figsize=(7,5))\n",
    "\n",
    "metrics = \"test_acc\"\n",
    "ax.set_ylabel(\"Test Accuracy\", labelpad=15);\n",
    "\n",
    "for method in data:\n",
    "    if method not in plot_styles: \n",
    "        # print(f\"Skip {method}.\")\n",
    "        # continue\n",
    "        style = plot_styles[\"bmt\"]\n",
    "        style.update({\"label\": method})\n",
    "    else:\n",
    "        style = plot_styles[method]\n",
    "    mean = data[method][\"mean\"][metrics]\n",
    "    std = data[method][\"std\"][metrics]\n",
    "    x = mean.index\n",
    "    plot(ax, x, mean, std, markevery=80, **style)\n",
    "\n",
    "# ax.set_ylim(45, 105)\n",
    "# ax.set_yticks([10, 50, 90])\n",
    "# ax.set_ylim(40, 90)\n",
    "# ax.set_xlim(-20, 520)\n",
    "ax.set_xlabel(\"Communication Rounds\", labelpad=15);\n",
    "# ax.set_xticks([0, 150, 300, 450, 600, 750])\n",
    "fig.legend(loc=\"lower right\", bbox_to_anchor=(0, 0.21, 0.97, 0))\n",
    "# fig.legend(loc=\"upper center\", ncols=3, bbox_to_anchor=(0, 0.3, 1, 1))\n",
    "fig.tight_layout()\n",
    "# ax.legend(loc=\"lower left\", ncol=2);\n",
    "figure_name = f\"outputs/{dataset}.png\"\n",
    "plt.savefig(figure_name, bbox_inches=\"tight\", dpi=300)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "unlearning",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
