{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.read_csv(\"../experimental_data/250M_losses.csv\")\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.read_csv(\"../experimental_data/250M_losses.csv\")\n",
    "\n",
    "x = data[\"update_step\"]\n",
    "\n",
    "# pleasant-puddle-358 true configs/llama_250m.json\n",
    "y_250M_lora = data[\"pleasant-puddle-358 - loss\"]\n",
    "y_250M_full = data[\"grateful-wildflower-277 - loss\"]\n",
    "y_71M_full = data[\"volcanic-sun-327 - loss\"]\n",
    "\n",
    "plt.plot(x, y_250M_lora, label=\"250M LoRA\")\n",
    "plt.plot(x, y_250M_full, label=\"250M Full\")\n",
    "plt.plot(x, y_71M_full, label=\"71M Full\")\n",
    "plt.legend()\n",
    "plt.xlabel(\"Update Step\")\n",
    "plt.ylabel(\"Loss\")\n",
    "plt.title(\"Loss vs Update Step\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.loc[743]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data[(data.update_step < 1000) & (data.update_step > 743)][\"grateful-wildflower-277 - loss\"].tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "import pandas as pd\n",
    "\n",
    "# Setting the overall aesthetics.\n",
    "sns.set_theme(style=\"whitegrid\")\n",
    "\n",
    "data = pd.read_csv(\"../experimental_data/250M_losses.csv\")\n",
    "# delete 744th row\n",
    "data = data.drop([742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752])\n",
    "window = 50  # set the window size that suits your data\n",
    "data_smooth = data.rolling(window).mean()\n",
    "\n",
    "x = data_smooth[\"update_step\"]\n",
    "\n",
    "# pleasant-puddle-358 true configs/llama_250m.json\n",
    "y_250M_lora = data_smooth[\"pleasant-puddle-358 - loss\"]\n",
    "y_250M_full = data_smooth[\"grateful-wildflower-277 - loss\"]\n",
    "y_71M_full = data_smooth[\"volcanic-sun-327 - loss\"]\n",
    "\n",
    "# Create a dataframe\n",
    "df = pd.DataFrame({\n",
    "    \"250M\": y_250M_full,\n",
    "    \"Step\": x,\n",
    "    \"250M PaRaLight\": y_250M_lora,\n",
    "    \"71M (trainable-parameter-equivalent)\": y_71M_full\n",
    "})\n",
    "\n",
    "# Melt the dataframe to have a column for models and their corresponding loss\n",
    "df_melt = df.melt('Step', var_name='Models', value_name='Loss')\n",
    "\n",
    "# Create the plot using seaborn\n",
    "plt.figure(figsize=(12, 6), dpi=150)  # Set the figure size\n",
    "loss_plot = sns.lineplot(x='Step', y='Loss', hue='Models', data=df_melt, alpha=0.8, linewidth=2)\n",
    "\n",
    "# Setting the title and labels\n",
    "# plt.title('Loss vs Update Step', fontsize=20)\n",
    "plt.xlabel('Update Step', fontsize=15)\n",
    "plt.ylabel('Loss', fontsize=15)\n",
    "\n",
    "# Set the fontsize and location of the legend\n",
    "plt.legend(fontsize='x-large', title_fontsize='40', loc='upper right')\n",
    "\n",
    "# add vertical line at 5K\n",
    "plt.axvline(x=5000, color='r', linestyle='--')\n",
    "# add text saying \"Enable PaRaLight\"\n",
    "plt.text(5100, 4.1, 'Enable PaRaLight', fontsize=15)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.shape[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "import pandas as pd\n",
    "\n",
    "# Setting the overall aesthetics.\n",
    "sns.set_theme(style=\"whitegrid\")\n",
    "\n",
    "data = pd.read_csv(\"../experimental_data/250M_losses.csv\")\n",
    "# delete rows 742 to 752\n",
    "data = data.drop(list(range(742, 753)))\n",
    "window = 50  # set the window size that suits your data\n",
    "data_smooth = data.rolling(window).mean()\n",
    "\n",
    "x = data_smooth[\"update_step\"]\n",
    "\n",
    "# pleasant-puddle-358 true configs/llama_250m.json\n",
    "y_250M_lora = data_smooth[\"pleasant-puddle-358 - loss\"]\n",
    "y_250M_full = data_smooth[\"grateful-wildflower-277 - loss\"]\n",
    "y_71M_full = data_smooth[\"volcanic-sun-327 - loss\"]\n",
    "\n",
    "# Create a dataframe\n",
    "df = pd.DataFrame({\n",
    "    \"250M\": y_250M_full,\n",
    "    \"Step\": x,\n",
    "    \"250M PaRaLight\": y_250M_lora,\n",
    "    \"71M (trainable-parameter-equivalent)\": y_71M_full\n",
    "})\n",
    "\n",
    "# Melt the dataframe to have a column for models and their corresponding loss\n",
    "df_melt = df.melt('Step', var_name='Models', value_name='Loss')\n",
    "\n",
    "# Create the plot using seaborn\n",
    "fig, ax1 = plt.subplots(figsize=(10, 5), dpi=150)  # Set the figure size\n",
    "\n",
    "color = 'black'\n",
    "ax1.set_xlabel('Update Step', fontsize=15)\n",
    "ax1.set_ylabel('Loss', color=color, fontsize=15)\n",
    "loss_plot = sns.lineplot(x='Step', y='Loss', hue='Models', data=df_melt, alpha=0.8, linewidth=2, ax=ax1)\n",
    "ax1.tick_params(axis='y', labelcolor=color)\n",
    "\n",
    "# Set the fontsize and location of the legend\n",
    "# alpha 1.0\n",
    "ax1.legend(fontsize='x-large', title_fontsize='40', loc='upper right', bbox_to_anchor=(1, 0.95))\n",
    "\n",
    "# add vertical line at 5K\n",
    "ax1.axvline(x=5000, color='r', linestyle='--')\n",
    "# add text saying \"Enable PaRaLight\"\n",
    "ax1.text(5100, 4.3, 'Enable PaRaLight', fontsize=15)\n",
    "\n",
    "# instantiate a second axes that shares the same x-axis\n",
    "ax2 = ax1.twinx()  \n",
    "\n",
    "# black\n",
    "color = 'black'\n",
    "ax2.set_ylabel('Trainable Parameters', color=color, fontsize=15)  # we already handled the x-label with ax1\n",
    "ax2.plot(df[\"Step\"], [250] * df.shape[0], color=\"b\", label='250M Parameters')\n",
    "ax2.plot(df[\"Step\"], ([None] * 5000) + [98] * (df.shape[0] - 5000), color=color, label='98M Parameters (PaRaLight)')\n",
    "ax2.plot(df[\"Step\"], [71] * df.shape[0], color=color, label='71M Parameters')\n",
    "ax2.tick_params(axis='y', labelcolor=color)\n",
    "\n",
    "fig.tight_layout()  # otherwise the right y-label is slightly clipped\n",
    "\n",
    "# logy\n",
    "ax1.set_yscale('log')\n",
    "# ax2.set_yscale('log')\n",
    "\n",
    "# limit x 0 20K\n",
    "ax1.set_xlim([0, 20000])\n",
    "ax2.set_xlim([0, 20000])\n",
    "\n",
    "# lim ax2 y 0 300\n",
    "ax2.set_ylim([0, 300])\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "import pandas as pd\n",
    "import matplotlib.gridspec as gridspec\n",
    "\n",
    "# Setting the overall aesthetics.\n",
    "sns.set_theme(style=\"whitegrid\")\n",
    "\n",
    "data = pd.read_csv(\"../experimental_data/250M_losses.csv\")\n",
    "window = 50  # set the window size that suits your data\n",
    "data_smooth = data.rolling(window).mean()\n",
    "\n",
    "data_smooth2 = data.rolling(10).mean()\n",
    "\n",
    "x = data_smooth[\"update_step\"]\n",
    "\n",
    "# pleasant-puddle-358 true configs/llama_250m.json\n",
    "y_250M_lora = data_smooth[\"pleasant-puddle-358 - loss\"]\n",
    "y_250M_full = data_smooth[\"grateful-wildflower-277 - loss\"]\n",
    "y_71M_full = data_smooth[\"volcanic-sun-327 - loss\"]\n",
    "y_99M_full = data_smooth2[\"silvery-paper-370 - loss\"]\n",
    "\n",
    "# Create a dataframe\n",
    "df = pd.DataFrame({\n",
    "    \"250M\": y_250M_full,\n",
    "    \"Step\": x,\n",
    "    \"250M ReLoRA\\n(99M trainable)\": y_250M_lora,\n",
    "    # \"71M\": y_71M_full,\n",
    "    \"99M\": y_99M_full,\n",
    "})\n",
    "\n",
    "# Melt the dataframe to have a column for models and their corresponding loss\n",
    "df_melt = df.melt('Step', var_name='Models', value_name='Loss')\n",
    "\n",
    "# Create the subplots\n",
    "fig = plt.figure(figsize=(10, 4), dpi=150)\n",
    "# reduce space between first and second row\n",
    "gs = gridspec.GridSpec(2, 1, height_ratios=[2, 0.6])  # 0 height for the empty plot to serve as a spacer\n",
    "ax1 = plt.subplot(gs[0])\n",
    "ax2 = plt.subplot(gs[1])\n",
    "\n",
    "# Plot the loss\n",
    "loss_plot = sns.lineplot(x='Step', y='Loss', hue='Models', data=df_melt, alpha=0.8, linewidth=2, ax=ax1)\n",
    "\n",
    "# Setting the title and labels for loss plot\n",
    "ax1.set_ylabel('Loss', fontsize=13)\n",
    "# ax1.set_yscale('log')  # log scale for loss\n",
    "ax1.set_xlim([0, 20000])  # limit x-axis\n",
    "ax1.set_ylim([3, 5])  # limit x-axis\n",
    "ax1.legend(fontsize='medium', title_fontsize='30', loc='upper right')#, bbox_to_anchor=(1, 0.95))  # Set the fontsize and location of the legend\n",
    "ax1.axvline(x=5000, color='r', linestyle='--')  # add vertical line at 5K\n",
    "# ax1.text(5100, 4.3, 'Enable PaRaLight', fontsize=13)  # add text saying \"Enable PaRaLight\"\n",
    "\n",
    "# remove x-label\n",
    "ax1.set_xlabel('')\n",
    "\n",
    "# Plot the trainable parameters\n",
    "# ax2.plot(df[\"Step\"], ([250] * 5000) + ([None] * (df.shape[0] - 500), color=\"b\", label='250M Parameters')\n",
    "# ax2.plot(df[\"Step\"], ([None] * 5000) + [98] * (df.shape[0] - 5000), color=\"black\", label='98M Parameters (PaRaLight)')\n",
    "# ax2.plot(df[\"Step\"], [71] * df.shape[0], color=\"black\", label='71M Parameters')\n",
    "ax2.plot(df[\"Step\"], ([250] * 5000) + [98] * (df.shape[0] - 5000), label='98M Parameters (PaRaLight)',\n",
    "         linewidth=3)\n",
    "ax2.axvline(x=5000, color='r', linestyle='--')  # add vertical line at 5K\n",
    "# add ticks\n",
    "ax2.set_yticks([0, 100, 250])\n",
    "\n",
    "# Setting the labels for trainable parameters plot\n",
    "ax2.set_xlabel('Step', fontsize=13)\n",
    "ax2.set_ylabel('Trainable Params', fontsize=13)\n",
    "ax2.set_ylim([0, 300])  # limit y-axis\n",
    "\n",
    "# Set the fontsize and location of the legend\n",
    "# ax2.legend(fontsize='x-large', title_fontsize='40', loc='upper right', bbox_to_anchor=(1, 0.95))\n",
    "\n",
    "ax1.set_xlim([0, 20000])\n",
    "ax2.set_xlim([0, 20000])\n",
    "\n",
    "plt.tight_layout()  # Adjust layout to ensure non-overlapping\n",
    "plt.show\n",
    "\n",
    "# save pdf\n",
    "fig.savefig('250M_loss.pdf', bbox_inches='tight')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
