{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e355b99d",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from lib_project.notebook import setup_notebook\n",
    "setup_notebook(\"../../../../\")\n",
    "               \n",
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "acbd71c6",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "from lib_project.visualization import with_paper_style\n",
    "from defs import BASE_FIGURE_DIR\n",
    "from experiments.memorability.repeated_strings import results as res_util"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e1206ca3-0050-4a98-9e42-355d6aeaab2c",
   "metadata": {
    "tags": []
   },
   "source": [
    "# Shuffled Substrings: Substring Length\n",
    "\n",
    "Numbers in parenthesis denote the number of occurrences for each unique substring."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f47e4e10-76c4-413f-8af3-f4121d57dce8",
   "metadata": {},
   "source": [
    "## 1024 tokens, 26 token alphabet, 16 unique substrings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2ec675f2-0cb1-4810-bec6-36cd28168fe0",
   "metadata": {},
   "outputs": [],
   "source": [
    "seed_ids = list(range(3))\n",
    "results = {\n",
    "    f\"SSL {substr_len}\": res_util.load(\n",
    "        f\"pyt-1b_a-26_sl-{substr_len}_ns-16\",\n",
    "        seed_ids,\n",
    "    )\n",
    "    for substr_len in [4, 8, 16, 32, 64]\n",
    "}\n",
    "res_util.show_results(results, \"Substring Length\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "18994950-f9b1-4f8a-a06b-0b74e40b0346",
   "metadata": {},
   "source": [
    "## 1024 tokens, 7 token alphabet, 16 unique substrings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "828cccaf-9698-43bc-a472-c38174e132d1",
   "metadata": {},
   "outputs": [],
   "source": [
    "seed_ids = list(range(3))\n",
    "results = {\n",
    "    f\"SSL {substr_len}\": res_util.load(\n",
    "        f\"pyt-1b_a-7_sl-{substr_len}_ns-16\",\n",
    "        seed_ids,\n",
    "    )\n",
    "    for substr_len in [4, 8, 16, 32, 64]\n",
    "}\n",
    "res_util.show_results(results, \"Substring Length\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "df4e1d6e-b8d2-4340-b86c-56888d5d493e",
   "metadata": {},
   "source": [
    "## 1024 tokens, 2 token alphabet, 16 unique substrings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fe271e6c-df91-4d50-8129-7cbf6b667386",
   "metadata": {},
   "outputs": [],
   "source": [
    "seed_ids = list(range(3))\n",
    "results = {\n",
    "    f\"SSL {substr_len}\": res_util.load(\n",
    "        f\"pyt-1b_a-2_sl-{substr_len}_ns-16\",\n",
    "        seed_ids,\n",
    "    )\n",
    "    for substr_len in [4, 8, 16, 32, 64]\n",
    "}\n",
    "res_util.show_results(results, \"Substring Length\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4840399b-5edd-4901-ad46-abbb35d770b4",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e3341cec-0464-4638-a04a-ba103bad739c",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# Upload\n",
    "res_util.publish(\"num_unique_substrings\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "de626a7a-9492-4fe0-ad94-b525e91d1c84",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
