{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "823baf2d",
   "metadata": {},
   "source": [
    "### Initialize hydra"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "06003632",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from hydra import compose, initialize\n",
    "\n",
    "# CONFIG_PATH = \"configs/Qwen3-4B\"\n",
    "# CONFIG_PATH = \"configs/Meta-Llama-3-8B\"\n",
    "CONFIG_PATH = \"configs/Llama-3.2-3B-Instruct\"\n",
    "# CONFIG_PATH = \"configs/Llama-3.2-1B-Instruct\"\n",
    "\n",
    "# CONFIG_NAME = \"finetune_1024\"\n",
    "CONFIG_NAME = \"unlearn_1024\"\n",
    "\n",
    "with initialize(version_base=None, config_path=CONFIG_PATH):\n",
    "    cfg = compose(config_name=CONFIG_NAME)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6606bcb3",
   "metadata": {},
   "source": [
    "### Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4db7dc1d",
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_scatter(X, Y):\n",
    "    x_blue = X[Y == 0]\n",
    "    x_red = X[Y == 1]\n",
    "\n",
    "    # Create the plot\n",
    "    plt.figure(figsize=(10, 7)) # Set the figure size for better visualization\n",
    "\n",
    "    # Plot points where y is 0 (blue)\n",
    "    plt.scatter(x_blue[:, 0], x_blue[:, 1], color='blue', label='y = 0')\n",
    "\n",
    "    # Plot points where y is 1 (red)\n",
    "    plt.scatter(x_red[:, 0], x_red[:, 1], color='red', label='y = 1')\n",
    "\n",
    "    # Add plot title and labels\n",
    "    #plt.title('Scatter Plot of X points colored by Y values')\n",
    "    plt.xlabel('X-coordinate 1')\n",
    "    plt.ylabel('X-coordinate 2')\n",
    "\n",
    "    # Add a legend to distinguish between red and blue points\n",
    "    #plt.legend()\n",
    "\n",
    "    # Add a grid for better readability\n",
    "    plt.grid(True)\n",
    "\n",
    "    # Display the plot\n",
    "    plt.show()\n",
    "\n",
    "    # You can also print some information about the generated data\n",
    "    print(f\"Shape of x: {X.shape}\")\n",
    "    print(f\"Shape of y: {Y.shape}\")\n",
    "    print(f\"Number of red points (y=0): {len(x_red)}\")\n",
    "    print(f\"Number of blue points (y=1): {len(x_blue)}\")\n",
    "\n",
    "for mode in cfg.model.modes:\n",
    "    points_path = f\"data/{cfg.sample_size}/{mode}/Points.npz\"\n",
    "    if os.path.exists(points_path):\n",
    "        points = np.load(points_path)\n",
    "        plot_scatter(points[\"X\"], points[\"Y\"])\n",
    "        plot_scatter(points[\"X_retain\"], points[\"Y_retain\"])\n",
    "        plot_scatter(points[\"X_unlearn\"], points[\"Y_unlearn\"])\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4400bd48",
   "metadata": {},
   "source": [
    "### Loss and accuracy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3b25ea0e",
   "metadata": {},
   "outputs": [],
   "source": [
    "palette = {\"linear\": \"orange\", \"rectangle\": \"green\", \"random\": \"blue\", \"circle\": \"purple\", \"star\": \"brown\"}\n",
    "model_short_name = cfg.model.base_model.split(\"/\")[-1]\n",
    "\n",
    "for unlearn_size in cfg.model.unlearn_dataset_size:\n",
    "    for path_suffix in list(map(lambda x: \"-step\" + str(x), cfg.model.checkpoints[unlearn_size])) + [\"\"]:\n",
    "        for type in [\"retain\", \"unlearn\"]:\n",
    "            for mode in cfg.model.modes:\n",
    "                loss_path = f\"results/{model_short_name}/{cfg.sample_size}/unlearn_dz_{unlearn_size}/{mode}/{type}_loss{path_suffix}.npy\"\n",
    "                acc_path = f\"results/{model_short_name}/{cfg.sample_size}/unlearn_dz_{unlearn_size}/{mode}/{type}_acc{path_suffix}.npy\"\n",
    "\n",
    "                # Loss\n",
    "                if os.path.exists(loss_path):\n",
    "                    loss = np.load(loss_path)\n",
    "                    x = np.arange(0, cfg.model.eval_every * len(loss), cfg.model.eval_every)\n",
    "                    plt.plot(x, loss, label=f\"{mode}/{type} loss\", color=palette[mode], linestyle='--')\n",
    "\n",
    "                    plt.title(f\"Unlearn dataset size: {unlearn_size}\")\n",
    "                    plt.legend()\n",
    "                    plt.grid(True)\n",
    "\n",
    "                # Accuracy\n",
    "                if os.path.exists(acc_path):\n",
    "                    acc = np.load(acc_path)\n",
    "                    x = np.arange(0, cfg.model.eval_every * len(acc), cfg.model.eval_every)\n",
    "                    plt.plot(x, acc, label=f\"{type.upper()} task\", color=palette[mode], linestyle='-')\n",
    "\n",
    "                    #plt.title(f\"Unlearn dataset size: {unlearn_size}\")\n",
    "                    plt.legend()\n",
    "                    plt.grid(True)\n",
    "\n",
    "            plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "77225746",
   "metadata": {},
   "source": [
    "### Decision boundary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1b1b49bf",
   "metadata": {},
   "outputs": [],
   "source": [
    "model_short_name = cfg.model.base_model.split(\"/\")[-1]\n",
    "for mode in cfg.model.modes:\n",
    "    for unlearn_size in cfg.model.unlearn_dataset_size:\n",
    "        for step in cfg.decision_boundary.checkpoints[unlearn_size]:\n",
    "            decision_boundary_path = f\"results/{model_short_name}/{cfg.sample_size}/unlearn_dz_{unlearn_size}/{mode}/decision_boundary-step{step}.npy\"\n",
    "            # print(decision_boundary_path)\n",
    "            if os.path.exists(decision_boundary_path):\n",
    "\n",
    "                step_sz = cfg.decision_boundary.grid_step_size\n",
    "                grid_width = cfg.decision_boundary.grid_width\n",
    "                grid_height = cfg.decision_boundary.grid_height\n",
    "                y = np.load(decision_boundary_path)\n",
    "                y = np.array(y).reshape((grid_width//step_sz, grid_height//step_sz))\n",
    "                y_t = np.transpose(y)\n",
    "                x_1 = np.arange(0, grid_width, step_sz)\n",
    "                x_2 = np.arange(0, grid_height, step_sz)\n",
    "\n",
    "                \"\"\"Plot the decision boundary for the given data and predictions.\"\"\"\n",
    "                fig, ax = plt.subplots(figsize=(10, 8.5))\n",
    "                ax.contourf(x_1, x_2, y_t, levels=20, cmap=\"coolwarm\", alpha=0.5)\n",
    "                ax.set_xlim(x_1.min(), x_1.max())\n",
    "                ax.set_ylim(x_2.min(), x_2.max())\n",
    "                ax.tick_params(axis=\"x\", labelsize=15)\n",
    "                ax.tick_params(axis=\"y\", labelsize=15)\n",
    "                ax.set_title(f\"{mode} decision boundary with unlearn dz of {unlearn_size} at step {step}\", fontsize=24)\n",
    "                ax.set_xlabel(\"Feature 1\", fontsize=26)\n",
    "                ax.set_ylabel(\"Feature 2\", fontsize=26)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "torch",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
