{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e355b99d",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from lib_project.notebook import setup_notebook\n",
    "setup_notebook(\"../../../../\")\n",
    "               \n",
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "acbd71c6",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "from lib_project.visualization import with_paper_style\n",
    "from defs import BASE_FIGURE_DIR\n",
    "from experiments.memorability.repeated_strings import results as res_util"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e1206ca3-0050-4a98-9e42-355d6aeaab2c",
   "metadata": {
    "tags": []
   },
   "source": [
    "# Shuffled Substrings: Number of unique substrings\n",
    "\n",
    "Numbers in parenthesis denote the number of occurrences for each unique substring."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f47e4e10-76c4-413f-8af3-f4121d57dce8",
   "metadata": {},
   "source": [
    "## 1024 tokens, 26 token alphabet, substring length 8"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2ec675f2-0cb1-4810-bec6-36cd28168fe0",
   "metadata": {},
   "outputs": [],
   "source": [
    "seed_ids = list(range(3))\n",
    "results = {\n",
    "    f\"NUS {num_unique_substr}\": res_util.load(f\"pyt-1b_a-26_sl-8_ns-{num_unique_substr}\", seed_ids)\n",
    "    for num_unique_substr in [8, 16, 32, 64, 128]\n",
    "}\n",
    "res_util.show_results(\n",
    "    results,\n",
    "    \"# Unique Substrings\",\n",
    "    show_remaining=False,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "000efb31-1415-48a8-9c86-e464de209443",
   "metadata": {},
   "source": [
    "## 1024 tokens, 7 token alphabet, substring length 8"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "830aa2ad-153d-493f-8792-e395bc791403",
   "metadata": {},
   "outputs": [],
   "source": [
    "seed_ids = list(range(3))\n",
    "results = {\n",
    "    f\"NUS {num_unique_substr}\": res_util.load(f\"pyt-1b_a-7_sl-8_ns-{num_unique_substr}\", seed_ids)\n",
    "    for num_unique_substr in [8, 16, 32, 64, 128]\n",
    "}\n",
    "res_util.show_results(\n",
    "    results,\n",
    "    \"# Unique Substrings\",\n",
    "    show_remaining=False,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "aa7fe72a-b6c4-4c9c-a17a-614db94fb534",
   "metadata": {},
   "source": [
    "## 1024 tokens, 2 token alphabet, substring length 8"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "00926e26-7483-413c-b74a-023277e054fa",
   "metadata": {},
   "outputs": [],
   "source": [
    "seed_ids = list(range(3))\n",
    "results = {\n",
    "    f\"NUS {num_unique_substr}\": res_util.load(f\"pyt-1b_a-2_sl-8_ns-{num_unique_substr}\", seed_ids)\n",
    "    for num_unique_substr in [8, 16, 32, 64, 128]\n",
    "}\n",
    "res_util.show_results(\n",
    "    results,\n",
    "    \"# Unique Substrings\",\n",
    "    show_remaining=False,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e3341cec-0464-4638-a04a-ba103bad739c",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# Upload\n",
    "res_util.publish(\"num_unique_substrings\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "de626a7a-9492-4fe0-ad94-b525e91d1c84",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
