{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "import random\n",
    "\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# How to call the script that uses this data \n",
    "`python plotting/cost_perf.py --direct_csv metrics.csv --output_dir plots --show_x_label --show_title --title \"GPT-2 (14M) on SlimPajama\" `\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "random.seed(420)\n",
    "a = random.randint(0, 120)\n",
    "val_losses = []\n",
    "\n",
    "for n in range(1, 11):\n",
    "    file_name = \"<path to the training log parquet files>\"\n",
    "    df = pd.read_parquet(file_name)\n",
    "    val_losses.append(df[\"Validation Loss\"].dropna().to_numpy()[-1])\n",
    "\n",
    "metrics = pd.read_csv(\n",
    "    \"<file with hardware metrics for architecture and batch size>\"\n",
    ")[:10]\n",
    "metrics[\"val_loss\"] = val_losses\n",
    "metrics[[\"n_trainable\", \"val_loss\", \"median_loop_time\"]].to_csv(\"metrics.csv\", index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
