{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "BC1MPgj8iIXX",
        "outputId": "42b3b57c-aabe-483a-8318-64b60e480a36"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Requirement already satisfied: datasets in /usr/local/lib/python3.11/dist-packages (2.14.4)\n",
            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.11/dist-packages (from datasets) (2.0.2)\n",
            "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.11/dist-packages (from datasets) (18.1.0)\n",
            "Requirement already satisfied: dill<0.3.8,>=0.3.0 in /usr/local/lib/python3.11/dist-packages (from datasets) (0.3.7)\n",
            "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from datasets) (2.2.2)\n",
            "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.11/dist-packages (from datasets) (2.32.3)\n",
            "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.11/dist-packages (from datasets) (4.67.1)\n",
            "Requirement already satisfied: xxhash in /usr/local/lib/python3.11/dist-packages (from datasets) (3.5.0)\n",
            "Requirement already satisfied: multiprocess in /usr/local/lib/python3.11/dist-packages (from datasets) (0.70.15)\n",
            "Requirement already satisfied: fsspec>=2021.11.1 in /usr/local/lib/python3.11/dist-packages (from fsspec[http]>=2021.11.1->datasets) (2025.3.2)\n",
            "Requirement already satisfied: aiohttp in /usr/local/lib/python3.11/dist-packages (from datasets) (3.11.15)\n",
            "Requirement already satisfied: huggingface-hub<1.0.0,>=0.14.0 in /usr/local/lib/python3.11/dist-packages (from datasets) (0.31.2)\n",
            "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from datasets) (24.2)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from datasets) (6.0.2)\n",
            "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (2.6.1)\n",
            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.3.2)\n",
            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (25.3.0)\n",
            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.6.0)\n",
            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (6.4.3)\n",
            "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (0.3.1)\n",
            "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.20.0)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub<1.0.0,>=0.14.0->datasets) (3.18.0)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub<1.0.0,>=0.14.0->datasets) (4.13.2)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests>=2.19.0->datasets) (3.4.2)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests>=2.19.0->datasets) (3.10)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests>=2.19.0->datasets) (2.4.0)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests>=2.19.0->datasets) (2025.4.26)\n",
            "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets) (2.9.0.post0)\n",
            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets) (2025.2)\n",
            "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets) (2025.2)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.17.0)\n",
            "Collecting minference\n",
            "  Downloading minference-0.1.5.post1.tar.gz (181 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m182.0/182.0 kB\u001b[0m \u001b[31m13.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Requirement already satisfied: transformers>=4.37.0 in /usr/local/lib/python3.11/dist-packages (from minference) (4.51.3)\n",
            "Requirement already satisfied: accelerate in /usr/local/lib/python3.11/dist-packages (from minference) (1.6.0)\n",
            "Requirement already satisfied: torch in /usr/local/lib/python3.11/dist-packages (from minference) (2.6.0+cu124)\n",
            "Requirement already satisfied: triton in /usr/local/lib/python3.11/dist-packages (from minference) (3.2.0)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from transformers>=4.37.0->minference) (3.18.0)\n",
            "Requirement already satisfied: huggingface-hub<1.0,>=0.30.0 in /usr/local/lib/python3.11/dist-packages (from transformers>=4.37.0->minference) (0.31.2)\n",
            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.11/dist-packages (from transformers>=4.37.0->minference) (2.0.2)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from transformers>=4.37.0->minference) (24.2)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from transformers>=4.37.0->minference) (6.0.2)\n",
            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers>=4.37.0->minference) (2024.11.6)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from transformers>=4.37.0->minference) (2.32.3)\n",
            "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers>=4.37.0->minference) (0.21.1)\n",
            "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.11/dist-packages (from transformers>=4.37.0->minference) (0.5.3)\n",
            "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.11/dist-packages (from transformers>=4.37.0->minference) (4.67.1)\n",
            "Requirement already satisfied: psutil in /usr/local/lib/python3.11/dist-packages (from accelerate->minference) (5.9.5)\n",
            "Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.11/dist-packages (from torch->minference) (4.13.2)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch->minference) (3.4.2)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch->minference) (3.1.6)\n",
            "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch->minference) (2025.3.2)\n",
            "Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->minference)\n",
            "  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n",
            "Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->minference)\n",
            "  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n",
            "Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->minference)\n",
            "  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n",
            "Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->minference)\n",
            "  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n",
            "Collecting nvidia-cublas-cu12==12.4.5.8 (from torch->minference)\n",
            "  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n",
            "Collecting nvidia-cufft-cu12==11.2.1.3 (from torch->minference)\n",
            "  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n",
            "Collecting nvidia-curand-cu12==10.3.5.147 (from torch->minference)\n",
            "  Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n",
            "Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch->minference)\n",
            "  Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n",
            "Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch->minference)\n",
            "  Downloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n",
            "Requirement already satisfied: nvidia-cusparselt-cu12==0.6.2 in /usr/local/lib/python3.11/dist-packages (from torch->minference) (0.6.2)\n",
            "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch->minference) (2.21.5)\n",
            "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch->minference) (12.4.127)\n",
            "Collecting nvidia-nvjitlink-cu12==12.4.127 (from torch->minference)\n",
            "  Downloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n",
            "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch->minference) (1.13.1)\n",
            "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch->minference) (1.3.0)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch->minference) (3.0.2)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->transformers>=4.37.0->minference) (3.4.2)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->transformers>=4.37.0->minference) (3.10)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->transformers>=4.37.0->minference) (2.4.0)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->transformers>=4.37.0->minference) (2025.4.26)\n",
            "Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl (363.4 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m363.4/363.4 MB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (13.8 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.8/13.8 MB\u001b[0m \u001b[31m127.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (24.6 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.6/24.6 MB\u001b[0m \u001b[31m96.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (883 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m883.7/883.7 kB\u001b[0m \u001b[31m60.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl (211.5 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.5/211.5 MB\u001b[0m \u001b[31m11.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl (56.3 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m43.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl (127.9 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.9/127.9 MB\u001b[0m \u001b[31m19.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl (207.5 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.5/207.5 MB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m111.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hBuilding wheels for collected packages: minference\n",
            "  Building wheel for minference (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for minference: filename=minference-0.1.5.post1-cp311-cp311-linux_x86_64.whl size=2883810 sha256=bf23e18011b533d3cfa0e8198e19655fa56a9a79993c73b4492fd25abc6d15e3\n",
            "  Stored in directory: /root/.cache/pip/wheels/91/0b/f3/18448119bcb2903945859a65b06b9cb971f5a1623dc197f456\n",
            "Successfully built minference\n",
            "Installing collected packages: nvidia-nvjitlink-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, nvidia-cusparse-cu12, nvidia-cudnn-cu12, nvidia-cusolver-cu12, minference\n",
            "  Attempting uninstall: nvidia-nvjitlink-cu12\n",
            "    Found existing installation: nvidia-nvjitlink-cu12 12.5.82\n",
            "    Uninstalling nvidia-nvjitlink-cu12-12.5.82:\n",
            "      Successfully uninstalled nvidia-nvjitlink-cu12-12.5.82\n",
            "  Attempting uninstall: nvidia-curand-cu12\n",
            "    Found existing installation: nvidia-curand-cu12 10.3.6.82\n",
            "    Uninstalling nvidia-curand-cu12-10.3.6.82:\n",
            "      Successfully uninstalled nvidia-curand-cu12-10.3.6.82\n",
            "  Attempting uninstall: nvidia-cufft-cu12\n",
            "    Found existing installation: nvidia-cufft-cu12 11.2.3.61\n",
            "    Uninstalling nvidia-cufft-cu12-11.2.3.61:\n",
            "      Successfully uninstalled nvidia-cufft-cu12-11.2.3.61\n",
            "  Attempting uninstall: nvidia-cuda-runtime-cu12\n",
            "    Found existing installation: nvidia-cuda-runtime-cu12 12.5.82\n",
            "    Uninstalling nvidia-cuda-runtime-cu12-12.5.82:\n",
            "      Successfully uninstalled nvidia-cuda-runtime-cu12-12.5.82\n",
            "  Attempting uninstall: nvidia-cuda-nvrtc-cu12\n",
            "    Found existing installation: nvidia-cuda-nvrtc-cu12 12.5.82\n",
            "    Uninstalling nvidia-cuda-nvrtc-cu12-12.5.82:\n",
            "      Successfully uninstalled nvidia-cuda-nvrtc-cu12-12.5.82\n",
            "  Attempting uninstall: nvidia-cuda-cupti-cu12\n",
            "    Found existing installation: nvidia-cuda-cupti-cu12 12.5.82\n",
            "    Uninstalling nvidia-cuda-cupti-cu12-12.5.82:\n",
            "      Successfully uninstalled nvidia-cuda-cupti-cu12-12.5.82\n",
            "  Attempting uninstall: nvidia-cublas-cu12\n",
            "    Found existing installation: nvidia-cublas-cu12 12.5.3.2\n",
            "    Uninstalling nvidia-cublas-cu12-12.5.3.2:\n",
            "      Successfully uninstalled nvidia-cublas-cu12-12.5.3.2\n",
            "  Attempting uninstall: nvidia-cusparse-cu12\n",
            "    Found existing installation: nvidia-cusparse-cu12 12.5.1.3\n",
            "    Uninstalling nvidia-cusparse-cu12-12.5.1.3:\n",
            "      Successfully uninstalled nvidia-cusparse-cu12-12.5.1.3\n",
            "  Attempting uninstall: nvidia-cudnn-cu12\n",
            "    Found existing installation: nvidia-cudnn-cu12 9.3.0.75\n",
            "    Uninstalling nvidia-cudnn-cu12-9.3.0.75:\n",
            "      Successfully uninstalled nvidia-cudnn-cu12-9.3.0.75\n",
            "  Attempting uninstall: nvidia-cusolver-cu12\n",
            "    Found existing installation: nvidia-cusolver-cu12 11.6.3.83\n",
            "    Uninstalling nvidia-cusolver-cu12-11.6.3.83:\n",
            "      Successfully uninstalled nvidia-cusolver-cu12-11.6.3.83\n",
            "Successfully installed minference-0.1.5.post1 nvidia-cublas-cu12-12.4.5.8 nvidia-cuda-cupti-cu12-12.4.127 nvidia-cuda-nvrtc-cu12-12.4.127 nvidia-cuda-runtime-cu12-12.4.127 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.2.1.3 nvidia-curand-cu12-10.3.5.147 nvidia-cusolver-cu12-11.6.1.9 nvidia-cusparse-cu12-12.3.1.170 nvidia-nvjitlink-cu12-12.4.127\n",
            "Collecting fuzzywuzzy\n",
            "  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl.metadata (4.9 kB)\n",
            "Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)\n",
            "Installing collected packages: fuzzywuzzy\n",
            "Successfully installed fuzzywuzzy-0.18.0\n",
            "Collecting rouge\n",
            "  Downloading rouge-1.0.1-py3-none-any.whl.metadata (4.1 kB)\n",
            "Requirement already satisfied: six in /usr/local/lib/python3.11/dist-packages (from rouge) (1.17.0)\n",
            "Downloading rouge-1.0.1-py3-none-any.whl (13 kB)\n",
            "Installing collected packages: rouge\n",
            "Successfully installed rouge-1.0.1\n",
            "Collecting flash_attn\n",
            "  Downloading flash_attn-2.7.4.post1.tar.gz (6.0 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m102.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Requirement already satisfied: torch in /usr/local/lib/python3.11/dist-packages (from flash_attn) (2.6.0+cu124)\n",
            "Requirement already satisfied: einops in /usr/local/lib/python3.11/dist-packages (from flash_attn) (0.8.1)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch->flash_attn) (3.18.0)\n",
            "Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.11/dist-packages (from torch->flash_attn) (4.13.2)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch->flash_attn) (3.4.2)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch->flash_attn) (3.1.6)\n",
            "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch->flash_attn) (2025.3.2)\n",
            "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch->flash_attn) (12.4.127)\n",
            "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch->flash_attn) (12.4.127)\n",
            "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch->flash_attn) (12.4.127)\n",
            "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch->flash_attn) (9.1.0.70)\n",
            "Requirement already satisfied: nvidia-cublas-cu12==12.4.5.8 in /usr/local/lib/python3.11/dist-packages (from torch->flash_attn) (12.4.5.8)\n",
            "Requirement already satisfied: nvidia-cufft-cu12==11.2.1.3 in /usr/local/lib/python3.11/dist-packages (from torch->flash_attn) (11.2.1.3)\n",
            "Requirement already satisfied: nvidia-curand-cu12==10.3.5.147 in /usr/local/lib/python3.11/dist-packages (from torch->flash_attn) (10.3.5.147)\n",
            "Requirement already satisfied: nvidia-cusolver-cu12==11.6.1.9 in /usr/local/lib/python3.11/dist-packages (from torch->flash_attn) (11.6.1.9)\n",
            "Requirement already satisfied: nvidia-cusparse-cu12==12.3.1.170 in /usr/local/lib/python3.11/dist-packages (from torch->flash_attn) (12.3.1.170)\n",
            "Requirement already satisfied: nvidia-cusparselt-cu12==0.6.2 in /usr/local/lib/python3.11/dist-packages (from torch->flash_attn) (0.6.2)\n",
            "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch->flash_attn) (2.21.5)\n",
            "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch->flash_attn) (12.4.127)\n",
            "Requirement already satisfied: nvidia-nvjitlink-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch->flash_attn) (12.4.127)\n",
            "Requirement already satisfied: triton==3.2.0 in /usr/local/lib/python3.11/dist-packages (from torch->flash_attn) (3.2.0)\n",
            "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch->flash_attn) (1.13.1)\n",
            "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch->flash_attn) (1.3.0)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch->flash_attn) (3.0.2)\n",
            "Building wheels for collected packages: flash_attn\n",
            "  Building wheel for flash_attn (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for flash_attn: filename=flash_attn-2.7.4.post1-cp311-cp311-linux_x86_64.whl size=187831595 sha256=58853b28a5a926cae14402bfd8d4d93a45ebf8f9e79533f37ab09d0d77a99c05\n",
            "  Stored in directory: /root/.cache/pip/wheels/3d/88/d8/284b89f56af7d5bf366b10d6b8e251ac8a7c7bf3f04203fb4f\n",
            "Successfully built flash_attn\n",
            "Installing collected packages: flash_attn\n",
            "Successfully installed flash_attn-2.7.4.post1\n"
          ]
        }
      ],
      "source": [
        "# install dependencies\n",
        "# if gpu out of ram then restart session\n",
        "!pip install datasets\n",
        "!pip install minference\n",
        "!pip install fuzzywuzzy\n",
        "!pip install rouge\n",
        "!pip install flash_attn"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {
        "id": "meaOXIvdNu11",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "1730a9ef-b674-49c3-c3da-8f01d3c6d01e"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\n",
            "    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|\n",
            "    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|\n",
            "    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|\n",
            "    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|\n",
            "    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|\n",
            "\n",
            "    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .\n",
            "Enter your token (input will not be visible): \n",
            "Add token as git credential? (Y/n) Y\n",
            "Token is valid (permission: write).\n",
            "The token `llama_token` has been saved to /root/.cache/huggingface/stored_tokens\n",
            "\u001b[1m\u001b[31mCannot authenticate through git-credential as no helper is defined on your machine.\n",
            "You might have to re-authenticate when pushing to the Hugging Face Hub.\n",
            "Run the following command in your terminal in case you want to set the 'store' credential helper as default.\n",
            "\n",
            "git config --global credential.helper store\n",
            "\n",
            "Read https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more details.\u001b[0m\n",
            "Token has not been saved to git credential helper.\n",
            "Your token has been saved to /root/.cache/huggingface/token\n",
            "Login successful.\n",
            "The current active token is: `llama_token`\n"
          ]
        }
      ],
      "source": [
        "# login to hugginface account using access token, make sure you have access to llama and mistral models\n",
        "!huggingface-cli login"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "#install dependencies\n",
        "!pip install tiktoken\n",
        "!pip install --upgrade datasets fsspec\n",
        "!rm -rf ~/.cache/huggingface/datasets"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "ZYIaUgfY7m1c",
        "outputId": "3af0c938-143c-406c-b2ef-dbf740f2a857"
      },
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Requirement already satisfied: tiktoken in /usr/local/lib/python3.11/dist-packages (0.9.0)\n",
            "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.11/dist-packages (from tiktoken) (2024.11.6)\n",
            "Requirement already satisfied: requests>=2.26.0 in /usr/local/lib/python3.11/dist-packages (from tiktoken) (2.32.3)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests>=2.26.0->tiktoken) (3.4.2)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests>=2.26.0->tiktoken) (3.10)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests>=2.26.0->tiktoken) (2.4.0)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests>=2.26.0->tiktoken) (2025.4.26)\n",
            "Requirement already satisfied: datasets in /usr/local/lib/python3.11/dist-packages (2.14.4)\n",
            "Collecting datasets\n",
            "  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)\n",
            "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (2025.3.2)\n",
            "Collecting fsspec\n",
            "  Downloading fsspec-2025.5.0-py3-none-any.whl.metadata (11 kB)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from datasets) (3.18.0)\n",
            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.11/dist-packages (from datasets) (2.0.2)\n",
            "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.11/dist-packages (from datasets) (18.1.0)\n",
            "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.11/dist-packages (from datasets) (0.3.7)\n",
            "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from datasets) (2.2.2)\n",
            "Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.11/dist-packages (from datasets) (2.32.3)\n",
            "Requirement already satisfied: tqdm>=4.66.3 in /usr/local/lib/python3.11/dist-packages (from datasets) (4.67.1)\n",
            "Requirement already satisfied: xxhash in /usr/local/lib/python3.11/dist-packages (from datasets) (3.5.0)\n",
            "Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.11/dist-packages (from datasets) (0.70.15)\n",
            "  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)\n",
            "Requirement already satisfied: huggingface-hub>=0.24.0 in /usr/local/lib/python3.11/dist-packages (from datasets) (0.31.2)\n",
            "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from datasets) (24.2)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from datasets) (6.0.2)\n",
            "Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in /usr/local/lib/python3.11/dist-packages (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (3.11.15)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.24.0->datasets) (4.13.2)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets) (3.4.2)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets) (3.10)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets) (2.4.0)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets) (2025.4.26)\n",
            "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets) (2.9.0.post0)\n",
            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets) (2025.2)\n",
            "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets) (2025.2)\n",
            "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (2.6.1)\n",
            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (1.3.2)\n",
            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (25.3.0)\n",
            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (1.6.0)\n",
            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (6.4.3)\n",
            "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (0.3.1)\n",
            "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (1.20.0)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.17.0)\n",
            "Downloading datasets-3.6.0-py3-none-any.whl (491 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m491.5/491.5 kB\u001b[0m \u001b[31m36.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading fsspec-2025.3.0-py3-none-any.whl (193 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m193.6/193.6 kB\u001b[0m \u001b[31m20.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hInstalling collected packages: fsspec, datasets\n",
            "  Attempting uninstall: fsspec\n",
            "    Found existing installation: fsspec 2025.3.2\n",
            "    Uninstalling fsspec-2025.3.2:\n",
            "      Successfully uninstalled fsspec-2025.3.2\n",
            "  Attempting uninstall: datasets\n",
            "    Found existing installation: datasets 2.14.4\n",
            "    Uninstalling datasets-2.14.4:\n",
            "      Successfully uninstalled datasets-2.14.4\n",
            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
            "gcsfs 2025.3.2 requires fsspec==2025.3.2, but you have fsspec 2025.3.0 which is incompatible.\u001b[0m\u001b[31m\n",
            "\u001b[0mSuccessfully installed datasets-3.6.0 fsspec-2025.3.0\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Single layer attention approximation BalanceKV"
      ],
      "metadata": {
        "id": "F0YQ1esT06dg"
      }
    },
    {
      "cell_type": "code",
      "execution_count": 4,
      "metadata": {
        "id": "az6R2Nb7wTli"
      },
      "outputs": [],
      "source": [
        "## next, execute this cell\n",
        "\n",
        "import time\n",
        "import math\n",
        "import re\n",
        "import string\n",
        "from collections import Counter\n",
        "import torch\n",
        "import transformers\n",
        "from typing import List, Optional\n",
        "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
        "from transformers.models.llama.modeling_llama import apply_rotary_pos_emb, repeat_kv\n",
        "from datasets import load_dataset\n",
        "from flash_attn import flash_attn_func\n",
        "\n",
        "\n",
        "\n",
        "DATASET2PROMPT = {\"triviaqa\": \"Answer the question based on the given passage. Only give me the answer and do not output any other words. The following are some examples.\\n\\n{context}\\n\\n{input}\"}\n",
        "DATASET2MAXLEN = {\"triviaqa\": 32,  }\n",
        "\n",
        "\n",
        "def avg_stddev(data):\n",
        "    average = sum(data) / len(data)\n",
        "    variance = sum((x - average) ** 2 for x in data) / len(data)\n",
        "    std_dev = math.sqrt(variance)\n",
        "    return average,std_dev\n",
        "\n",
        "\n",
        "def normalize_answer(s):\n",
        "    \"\"\"Lower text and remove punctuation, articles and extra whitespace.\"\"\"\n",
        "\n",
        "    def remove_articles(text):\n",
        "        return re.sub(r\"\\b(a|an|the)\\b\", \" \", text)\n",
        "\n",
        "    def white_space_fix(text):\n",
        "        return \" \".join(text.split())\n",
        "\n",
        "    def remove_punc(text):\n",
        "        exclude = set(string.punctuation)\n",
        "        return \"\".join(ch for ch in text if ch not in exclude)\n",
        "\n",
        "    def lower(text):\n",
        "        return text.lower()\n",
        "\n",
        "    return white_space_fix(remove_articles(remove_punc(lower(s))))\n",
        "\n",
        "\n",
        "def truncate_input(input: list, max_length: int, manner=\"middle\"):\n",
        "    if max_length < 0:\n",
        "        return input\n",
        "    if len(input) <= max_length:\n",
        "        return input\n",
        "    if manner == \"middle\":\n",
        "        split = max_length // 2\n",
        "        return input[0:split] + input[-split:]\n",
        "    else:\n",
        "        return None\n",
        "\n",
        "\n",
        "def truncate_by_tokens(input, tok, max_tokens, manner: str = \"middle\"):\n",
        "    tokens = tok.encode(input)\n",
        "    len_before = len(tokens)\n",
        "    tokens = truncate_input(tokens, max_length=max_tokens, manner=manner)\n",
        "    len_after = len(tokens)\n",
        "    assert len_after <= len_before\n",
        "    assert len_after <= max_tokens or max_tokens < 0\n",
        "    return tokens\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "def indexing(key, sort_idx, block_size, value=None):\n",
        "  indices = sort_idx.unsqueeze(-1).expand(-1, -1, -1, key.shape[-1])\n",
        "  new_n = math.ceil(sort_idx.shape[-1] / block_size) * block_size\n",
        "  if new_n < sort_idx.shape[-1]:\n",
        "    import pdb; pdb.set_trace();\n",
        "  out_key = torch.nn.functional.pad(key.gather(2, indices), (0,0,0,new_n-sort_idx.shape[-1]), mode='constant', value=0.)\n",
        "  out_value = None\n",
        "  if value is not None:\n",
        "    out_value = torch.nn.functional.pad(value.gather(2, indices), (0,0,0,new_n-sort_idx.shape[-1]), mode='constant', value=0.)\n",
        "  return out_key, out_value\n",
        "\n",
        "\n",
        "def indexing(key, sort_idx, block_size, value=None):\n",
        "  indices = sort_idx.unsqueeze(-1).expand(-1, -1, -1, key.shape[-1])\n",
        "  new_n = math.ceil(sort_idx.shape[-1] / block_size) * block_size\n",
        "  if new_n < sort_idx.shape[-1]:\n",
        "    import pdb; pdb.set_trace();\n",
        "  out_key = torch.nn.functional.pad(key.gather(2, indices), (0,0,0,new_n-sort_idx.shape[-1]), mode='constant', value=0.)\n",
        "  out_value = None\n",
        "  if value is not None:\n",
        "    out_value = torch.nn.functional.pad(value.gather(2, indices), (0,0,0,new_n-sort_idx.shape[-1]), mode='constant', value=0.)\n",
        "  return out_key, out_value\n",
        "\n",
        "def balanced_walk(key, rng, gamma_, temp_, beta_, itrs, block_size, value=None, sort_idx=None, query=None):\n",
        "    b, h, n, d = key.shape\n",
        "    if type(gamma_) != list:\n",
        "        gamma_ = [gamma_] * itrs\n",
        "    const_denom = 0.025 # change this to 0.00 to change the kernel back\n",
        "\n",
        "    if type(block_size) != list:\n",
        "        block_size = [block_size] * itrs\n",
        "    weight_idx = None\n",
        "    for t in range(itrs): #write range(1, itrs) to check everything still works\n",
        "        if sort_idx is not None:\n",
        "            key_sorted, value_sorted = indexing(key, sort_idx, block_size[t], value)\n",
        "            key_sorted = key_sorted.view(b, h, -1, block_size[t], d)\n",
        "            if value is not None:\n",
        "                weight_idx_padded = torch.nn.functional.pad(weight_idx, (0, math.ceil(n / block_size[t]) * block_size[t] - weight_idx.shape[-1]))\n",
        "                value_sorted = value_sorted*weight_idx_padded.unsqueeze(-1)\n",
        "                value_sorted = value_sorted.view(b, h, -1, block_size[t], d)\n",
        "        else:\n",
        "            new_n = math.ceil(n / block_size[t]) * block_size[t]\n",
        "            key_sorted = torch.nn.functional.pad(key, (0,0,0,new_n-n), mode='constant', value=0.).view(b, h, -1, block_size[t], d)\n",
        "            value_sorted = None\n",
        "            if value is not None:\n",
        "                value_sorted = torch.nn.functional.pad(value, (0,0,0,new_n-n), mode='constant', value=0.).view(b, h, -1, block_size[t], d)\n",
        "\n",
        "        normal_keys = key_sorted - torch.mean(key_sorted, dim=-2, keepdim=True)\n",
        "\n",
        "        if query is not None:\n",
        "            query_key_correlation = torch.softmax(torch.einsum('b h n d,b h s m d->b h s n m',query[:,::4,:,:],normal_keys),dim=-1).mean(-2,keepdim=True)\n",
        "            kernel_ = query_key_correlation*query_key_correlation.transpose(-1,-2)\n",
        "        else:\n",
        "            kernel_ = torch.exp(temp_ * torch.einsum('...nd,...sd->...ns', normal_keys, normal_keys)/math.sqrt(d) - beta_)\n",
        "        if value is not None:\n",
        "            kernel_ *= (1e-8 + torch.einsum('...nd,...sd->...ns', value_sorted, value_sorted)+const_denom)\n",
        "\n",
        "        signs = torch.zeros(kernel_.shape[:4], dtype=torch.int16, device=kernel_.device)\n",
        "        signs[:, :, :, 0] = 1\n",
        "        rand_tensor = torch.rand(signs.shape, generator=rng, device=key.device)\n",
        "\n",
        "        for i in range(1, kernel_.shape[3]):\n",
        "            partial_inner_prod = (kernel_[:, :, :, i, :] * signs).sum(dim=-1)\n",
        "            samp_prb = 0.5 - gamma_[t] * partial_inner_prod\n",
        "            signs[:, :, :, i] = 2 * (rand_tensor[:, :, :, i] < samp_prb) - 1\n",
        "\n",
        "        signs = signs.view(b, h, -1)[:, :, :n]\n",
        "\n",
        "        if signs.shape[-1]==0: # simply to deal with n==0\n",
        "            sort_idx = signs[:, :, :0]\n",
        "            weigth_idx = signs[:, :, :0]\n",
        "            break\n",
        "        cumsum_neg = (signs == -1).cumsum(dim=-1)\n",
        "        cumsum_pos = (signs == 1).cumsum(dim=-1)\n",
        "\n",
        "        c_neg = torch.argmax((cumsum_neg == n//2).to(torch.int64), dim=-1) # Shape (b, h)\n",
        "        c_pos = torch.argmax((cumsum_pos == n//2).to(torch.int64), dim=-1) # Shape (b, h)\n",
        "        c = torch.maximum(c_neg, c_pos)\n",
        "\n",
        "        c = c.to(signs.device)\n",
        "\n",
        "        weight = signs\n",
        "\n",
        "        # Create an index tensor `[0, 1, ..., n-1]` for comparison\n",
        "        indices = torch.arange(signs.shape[2], device=signs.device).view(1, 1, -1)\n",
        "        # Set all values after `c[a, b]` to `1`\n",
        "        mask_after_c = indices > c.unsqueeze(-1)  # True for all d > c[a, b]\n",
        "        weight[mask_after_c] = torch.abs(weight[mask_after_c])  # Set those indices to `1`\n",
        "        # Identify where `signs[a, b, c[a, b]] == 1`\n",
        "        mask_flip_needed = (signs.gather(2, c.unsqueeze(-1)) == 1).squeeze(-1)\n",
        "        # Create mask for all indices `<= c[a, b]`\n",
        "        mask_before_c = indices <= c.unsqueeze(-1)\n",
        "        weight[mask_before_c] *= 2\n",
        "        # Apply flipping only when `signs[a, b, c] == 1`\n",
        "        flip_mask = mask_before_c & mask_flip_needed.unsqueeze(-1)\n",
        "        weight[flip_mask] *= -1  # Flip selected values\n",
        "\n",
        "        weight_argsort = torch.argsort(-weight, dim=-1, stable=True)\n",
        "\n",
        "        n = n//2\n",
        "        if sort_idx is None:\n",
        "            sort_idx = weight_argsort[:, :, :n]\n",
        "            weight_idx = weight.gather(-1, weight_argsort[:, :, :n])\n",
        "        else:\n",
        "            sort_idx = sort_idx.gather(2, weight_argsort[:, :, :n])\n",
        "            weigth_idx_1 = weight.gather(-1, weight_argsort[:, :, :n])\n",
        "            weight_idx = weight_idx.gather(-1, weight_argsort[:, :, :n])\n",
        "            weight_idx = weight_idx*weigth_idx_1\n",
        "\n",
        "    return sort_idx, weight_idx\n",
        "def snap_kv(query_states, key_states, value_states, capacity_option='default', window_size=32, pooling='avgpool', kernel_size=5, topk_num=100):\n",
        "\n",
        "    seq_len = key_states.shape[2]\n",
        "    if capacity_option in ['default', 'two_stage_bw']:\n",
        "        max_capacity_prompt = max(int(seq_len * 3.875 / 64), window_size+4)\n",
        "\n",
        "    if query_states.shape[1] != key_states.shape[1]:\n",
        "        num_key_value_groups = query_states.shape[1] // key_states.shape[1]\n",
        "        kk = repeat_kv(key_states, num_key_value_groups)\n",
        "        vv = repeat_kv(value_states, num_key_value_groups)\n",
        "    else:\n",
        "        kk = key_states\n",
        "        vv = value_states\n",
        "\n",
        "    dim = query_states.shape[-1]\n",
        "\n",
        "    attn_weights = query_states[..., -window_size :, :] @ kk.transpose(2,3) / dim**0.5\n",
        "    mask = torch.full((window_size, window_size), torch.finfo(attn_weights.dtype).min, device=attn_weights.device)\n",
        "    mask_cond = torch.arange(mask.size(-1), device=attn_weights.device)\n",
        "    mask.masked_fill_(mask_cond < (mask_cond + 1).view(mask.size(-1), 1), 0)\n",
        "    mask = mask.to(attn_weights.device)\n",
        "    attention_mask = mask[None, None, :, :]\n",
        "\n",
        "    attn_weights[:, :, -window_size:, -window_size:] += attention_mask\n",
        "\n",
        "    attn_weights = torch.nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype)\n",
        "    attn_weights_sum = attn_weights[:, :, -window_size :, : -window_size].sum(dim=-2)\n",
        "    if pooling == \"avgpool\":\n",
        "        attn_cache = torch.nn.functional.avg_pool1d(attn_weights_sum, kernel_size=kernel_size, padding=kernel_size // 2, stride=1)\n",
        "    elif pooling == \"maxpool\":\n",
        "        attn_cache = torch.nn.functional.max_pool1d(attn_weights_sum, kernel_size=kernel_size, padding=kernel_size // 2, stride=1)\n",
        "    else:\n",
        "        raise ValueError(\"Pooling method not supported\")\n",
        "    if capacity_option == 'two_stage_bw':\n",
        "        indices = attn_cache.topk(2*(max_capacity_prompt - window_size), dim=-1).indices\n",
        "    else:\n",
        "        indices = attn_cache.topk(topk_num, dim=-1).indices\n",
        "    return indices\n",
        "    # indices = indices.unsqueeze(-1).expand(-1, -1, -1, dim)\n",
        "    # k_past_compress = kk[:, :, : -window_size, :].gather(dim=2, index=indices)\n",
        "    # v_past_compress = vv[:, :, : -window_size, :].gather(dim=2, index=indices)\n",
        "    # k_cur = kk[:, :, -window_size :, :]\n",
        "    # v_cur = vv[:, :, -window_size :, :]\n",
        "    # if capacity_option == 'two_stage_bw':\n",
        "    #     return k_past_compress, k_cur, v_past_compress, v_cur\n",
        "    # return torch.cat([k_past_compress, k_cur], dim=2), torch.cat([v_past_compress, v_cur], dim=2)\n",
        "\n",
        "\n",
        "\n",
        "def manual_forward_llama(\n",
        "    model,\n",
        "    input_ids,\n",
        "    kv_cache=None, position_ids=None, cache_position=None, num_logits_to_keep=0,\n",
        "    kv_type='bw',\n",
        "    unif = False,\n",
        "    layer = 0,\n",
        "    **kwargs\n",
        "):\n",
        "    balancing_alg = balanced_walk\n",
        "    hh = model.model.embed_tokens(input_ids)\n",
        "    if position_ids is None:\n",
        "        position_ids = torch.arange(len(input_ids[0]), device=input_ids.device).unsqueeze(0)\n",
        "    if int(transformers.__version__.split(\".\")[1]) >= 48:\n",
        "        position_embeddings = model.model.rotary_emb(hh, position_ids)\n",
        "\n",
        "    output_attn = None\n",
        "    for i, decoder_layer in enumerate(model.model.layers):\n",
        "        # hh = decoder_layer(hh, position_ids=position_ids)[0]\n",
        "        res = hh.detach().clone()\n",
        "        hh = decoder_layer.input_layernorm(hh)\n",
        "\n",
        "        # h1, _, kv = decoder_layer.self_attn(hh, position_ids=position_ids, use_cache=False)\n",
        "        # <===\n",
        "        q_len = hh.shape[1]\n",
        "        kv_len = q_len\n",
        "        qq = decoder_layer.self_attn.q_proj(hh).reshape(1, q_len, -1, 128).transpose(1, 2)\n",
        "        kk = decoder_layer.self_attn.k_proj(hh).reshape(1, kv_len, 8, 128).transpose(1, 2)\n",
        "        vv = decoder_layer.self_attn.v_proj(hh).reshape(1, kv_len, 8, 128).transpose(1, 2)\n",
        "\n",
        "        if int(transformers.__version__.split(\".\")[1]) >= 48:\n",
        "            cos, sin = position_embeddings\n",
        "        else:\n",
        "            cos, sin = decoder_layer.self_attn.rotary_emb(vv, position_ids)\n",
        "\n",
        "        qq, kk = apply_rotary_pos_emb(qq, kk, cos, sin)\n",
        "        d = qq.shape[-1]\n",
        "\n",
        "        if q_len > 1:\n",
        "            attn_output = flash_attn_func(qq.transpose(1,2), kk.transpose(1,2), vv.transpose(1,2), causal=True)\n",
        "        if i==layer:\n",
        "\n",
        "          rng = model.config.rng\n",
        "          gamma = model.config.gamma\n",
        "          temp = model.config.temp\n",
        "          beta = model.config.beta\n",
        "          itrs = model.config.itrs\n",
        "          block_size = model.config.block_size\n",
        "          recent_size = model.config.recent_size\n",
        "\n",
        "          kk_first = kk[:,:,:recent_size,:]\n",
        "          vv_first = vv[:,:,:recent_size,:]\n",
        "          kk_old = kk[:,:,recent_size:-recent_size,:]\n",
        "          vv_old = vv[:,:,recent_size:-recent_size,:]\n",
        "          kk_recent = kk[:,:,-recent_size:,:]\n",
        "          vv_recent = vv[:,:,-recent_size:,:]\n",
        "\n",
        "          start_time = time.time()\n",
        "          #start_time_bw = time.time()\n",
        "          sort_idx_bw, weight_idx_bw = balanced_walk(key=kk_old, value= vv_old,rng=rng, gamma_=gamma, temp_=temp, beta_=beta, itrs=itrs, block_size=block_size)\n",
        "          #sort_idx_unif, weight_idx_unif = balancing_alg(key=kk_old, value= vv_old,rng=rng, gamma_=[0.0,0.0,0.0,0.0], temp_=temp, beta_=beta, itrs=itrs, block_size=block_size,unif=unif)\n",
        "          #end_time_bw = time.time()\n",
        "          #total_time_bw = end_time_bw - start_time_bw\n",
        "\n",
        "          #start_time_other = time.time()\n",
        "          bsz, n_heads, _, dim = kk.shape\n",
        "          n_centers_bw = sort_idx_bw.shape[-1]\n",
        "\n",
        "          kk_old_bw = torch.gather(kk_old, 2, sort_idx_bw.unsqueeze(-1).expand(bsz, n_heads, n_centers_bw, dim))\n",
        "          vv_old_bw = torch.gather(vv_old, 2, sort_idx_bw.unsqueeze(-1).expand(bsz, n_heads, n_centers_bw, dim))\n",
        "\n",
        "          if weight_idx_bw != None:#simply to deal with n==0\n",
        "            #if balancing_alg != balanced_walk:\n",
        "            weight_idx_bw_num = weight_idx_bw/2**(itrs)\n",
        "            #else:\n",
        "              #weight_idx_bw_num = weight_idx_bw\n",
        "            vv_old_bw_num = vv_old_bw*weight_idx_bw_num.unsqueeze(-1)\n",
        "            vv_old_bw_num = (vv_old_bw_num).to(torch.bfloat16)\n",
        "          else:\n",
        "            vv_old_bw_num = vv_old_bw\n",
        "\n",
        "          kk_selected_bw_num = torch.cat((kk_first,torch.cat([kk_old_bw]*(2**(itrs)),dim=2),kk_recent),dim=2)\n",
        "          vv_selected_bw_num = torch.cat((vv_first,torch.cat([vv_old_bw_num]*(2**(itrs)),dim=2),vv_recent),dim=2)\n",
        "\n",
        "          kk_selected_bw = torch.cat((kk_first, torch.cat([kk_old_bw]*(2**(itrs)),dim=2) , kk_recent),dim=2)\n",
        "          vv_selected_bw = torch.cat((vv_first, torch.cat([vv_old_bw]*(2**(itrs)),dim=2), vv_recent),dim=2)\n",
        "\n",
        "\n",
        "          if weight_idx_bw == None: #simply to deal with n==0\n",
        "             qq_latest = qq[:,:,-recent_size:,:]\n",
        "             attn_output_bw = flash_attn_func(qq_latest.transpose(1,2), kk_selected_bw_num.transpose(1,2), vv_selected_bw_num.transpose(1,2), causal=True)\n",
        "             end_time = time.time()\n",
        "             total_time = end_time - start_time\n",
        "             attn_output_exact = flash_attn_func(qq_latest.transpose(1,2), kk.transpose(1,2), vv.transpose(1,2), causal=True)\n",
        "             print(f\"{attn_output_bw}, {attn_output_bw}, {attn_output_bw}, {attn_output_exact} the loop happened\")\n",
        "             return attn_output_bw, attn_output_bw, total_time\n",
        "\n",
        "          weight_idx_bw =  torch.cat([weight_idx_bw_num]*(2**(itrs)),dim=2)\n",
        "          weight_idx_first = torch.ones(kk_first.shape[:3], dtype=torch.int16, device=weight_idx_bw.device)\n",
        "          weight_idx_recent = torch.ones(kk_recent.shape[:3], dtype=torch.int16, device=weight_idx_bw.device)\n",
        "          #end_time_other = time.time()\n",
        "          #total_time_other = end_time_other - start_time_other\n",
        "\n",
        "          #n_centers_unif = sort_idx_unif.shape[-1]\n",
        "\n",
        "          #kk_old_unif = torch.gather(kk_old, 2, sort_idx_unif.unsqueeze(-1).expand(bsz, n_heads, n_centers_unif, dim))\n",
        "          #vv_old_unif = torch.gather(vv_old, 2, sort_idx_unif.unsqueeze(-1).expand(bsz, n_heads, n_centers_unif, dim))\n",
        "\n",
        "          #if balancing_alg != balanced_walk:\n",
        "          #  weight_idx_unif = weight_idx_unif/2**(itrs)\n",
        "          #else:\n",
        "          #  weight_idx_unif = weight_idx_unif\n",
        "\n",
        "          #vv_old_unif = vv_old_unif*weight_idx_unif.unsqueeze(-1)\n",
        "          #vv_old_unif = (vv_old_unif).to(torch.bfloat16)\n",
        "\n",
        "\n",
        "          #kk_selected_unif = torch.cat((kk_first,torch.cat([kk_old_unif]*(2**(itrs)),dim=2),kk_recent),dim=2)\n",
        "          #vv_selected_unif = torch.cat((vv_first,torch.cat([vv_old_unif]*(2**(itrs)),dim=2),vv_recent),dim=2)\n",
        "\n",
        "          qq_latest = qq[:,:,-recent_size:,:]\n",
        "\n",
        "\n",
        "          #end_time_other = time.time()\n",
        "          #total_time_other = end_time_other - start_time_other\n",
        "\n",
        "          attn_output_bw = flash_attn_func(qq_latest.transpose(1,2), kk_selected_bw_num.transpose(1,2), vv_selected_bw_num.transpose(1,2), causal=True)\n",
        "          end_time=time.time()\n",
        "          total_time = end_time - start_time\n",
        "          #attn_output_unif = flash_attn_func(qq_latest.transpose(1,2), kk_selected_unif.transpose(1,2), vv_selected_unif.transpose(1,2), causal=True)\n",
        "          attn_output_exact = flash_attn_func(qq_latest.transpose(1,2), kk.transpose(1,2), vv.transpose(1,2), causal=True)\n",
        "\n",
        "          return attn_output_bw,attn_output_exact, total_time\n",
        "          #, total_time_bw, total_time_other\n",
        "\n",
        "\n",
        "\n",
        "        attn_output = attn_output.contiguous().view(qq.shape[0], qq.shape[2], -1)\n",
        "        hh = decoder_layer.self_attn.o_proj(attn_output)\n",
        "        # ===>\n",
        "        hh = res + hh\n",
        "\n",
        "        res = hh.detach().clone()\n",
        "        hh = decoder_layer.post_attention_layernorm(hh)\n",
        "        hh = decoder_layer.mlp(hh)\n",
        "        hh = res + hh\n",
        "\n",
        "\n",
        "\n",
        "@torch.no_grad()\n",
        "def greedy_generate(self, input_ids, max_new_tokens, eos_token_id=128009, kv_type=\"bw\", layer = 0, unif = False,**kwargs):\n",
        "    position_ids = torch.arange(input_ids.shape[-1], device=input_ids.device).unsqueeze(0)\n",
        "    attn_bw,attn_exact,_ = manual_forward_llama(self, input_ids, position_ids=position_ids, num_logits_to_keep=1,layer=layer, kv_type=kv_type,unif=unif)\n",
        "\n",
        "\n",
        "    return attn_bw,attn_exact\n",
        "\n",
        "\n",
        "def main(gamma_1=4,gamma_2 = 4 ,itrs = 2,layer=0,temp=0.1,model_name = \"meta-llama/Llama-3.1-8B-Instruct\"):\n",
        "\n",
        "    layer = layer -1\n",
        "    model_name = model_name\n",
        "    print(f\"model : {model_name}\")\n",
        "\n",
        "    # Load the model and tokenizer\n",
        "    tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)\n",
        "    model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=True)\n",
        "    model = AutoModelForCausalLM.from_pretrained(model_name, device_map='auto', torch_dtype=torch.bfloat16, _attn_implementation='flash_attention_2')\n",
        "    model = model.eval().requires_grad_(False)\n",
        "    device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
        "    model = model.to(device)\n",
        "    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n",
        "\n",
        "\n",
        "    model.model.config.gamma = [gamma_1,gamma_2,gamma_2,gamma_2]\n",
        "    model.model.config.beta = 0.\n",
        "    model.model.config.temp = temp\n",
        "    model.model.config.block_size = [256,256,256,256]\n",
        "    model.model.config.itrs = itrs\n",
        "    model.model.config.recent_size = 256\n",
        "    seeds = [42,7,8,17,98,64,27,32,81,44]\n",
        "    dataset = \"triviaqa\"\n",
        "    print(f\"dataset: {dataset}\")\n",
        "    prompt_format = DATASET2PROMPT[dataset]\n",
        "    maxlen = DATASET2MAXLEN[dataset]\n",
        "\n",
        "    examples = load_dataset('THUDM/LongBench', f\"{dataset}_e\", split='test')\n",
        "    kv_type = \"bw\"\n",
        "    max_input_length = 100_000\n",
        "    errors_bw_global = []\n",
        "    errors_snapkv_global = []\n",
        "    for i,seed in enumerate(seeds):\n",
        "        print(f\"current seed number:{i+1}\")\n",
        "        model.model.config.rng = torch.Generator('cuda').manual_seed(seed)\n",
        "        errors_bw = []\n",
        "        errors_snapkv = []\n",
        "\n",
        "\n",
        "        for i, eg in enumerate(examples):\n",
        "\n",
        "\n",
        "          # input_text = prompt_format.format(**eg)\n",
        "          input_text = prompt_format.format(**eg)\n",
        "          msgs = [dict(role=\"system\", content=input_text)]\n",
        "          input_tokens = tokenizer.apply_chat_template(msgs, add_generation_prompt=True)\n",
        "\n",
        "\n",
        "          input_tokens = truncate_by_tokens(input_text, tokenizer, max_input_length)\n",
        "          input_tensors = {\"input_ids\": torch.tensor(input_tokens).unsqueeze(0).to(device)}\n",
        "          seq_len = len(input_tokens)\n",
        "\n",
        "          terminators = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids(\"<|eot_id|>\")]\n",
        "          attn_bw,exact_attn= greedy_generate(model, input_tensors['input_ids'], max_new_tokens=maxlen, eos_token_id=terminators, kv_type=\"bw\",layer = layer, return_dict_in_generate=True)\n",
        "          # prompt = template.replace('$DOC$', context.strip()).replace('$Q$', item['question'].strip()).replace('$C_A$', item['choice_A'].strip()).replace('$C_B$', item['choice_B'].strip()).replace('$C_C$', item['choice_C'].strip()).replace('$C_D$', item['choice_D'].strip())\n",
        "          # input_ids = tokenizer.encode(prompt)\n",
        "          # if len(input_ids) > max_len:\n",
        "          #   input_ids = input_ids[:max_len//2] + input_ids[-max_len//2:]\n",
        "          #   prompt = tokenizer.decode(input_ids, skip_special_tokens=True)\n",
        "          # msgs = [dict(role=\"system\", content=input_text)]\n",
        "          # input_tokens = tokenizer.apply_chat_template(msgs, add_generation_prompt=True)\n",
        "\n",
        "\n",
        "          # input_tokens = truncate_by_tokens(input_text, tokenizer, max_input_length)\n",
        "          # input_tensors = {\"input_ids\": torch.tensor(input_tokens).unsqueeze(0).to(model.device)}\n",
        "          # seq_len = len(input_tokens)\n",
        "\n",
        "          # terminators = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids(\"<|eot_id|>\")]\n",
        "          # attn_bw,exact_attn = greedy_generate(model, input_tensors['input_ids'], max_new_tokens=maxlen, eos_token_id=terminators, kv_type=\"bw\",layer = layer, return_dict_in_generate=True)\n",
        "          denominator = torch.clamp(exact_attn, min=1e-8)\n",
        "\n",
        "          error_bw = (torch.norm(input = attn_bw - exact_attn, dim=-1)/torch.norm(input = denominator,dim=-1)).mean()\n",
        "          # error_unif = (torch.norm(input = attn_unif - exact_attn, dim=-1)/torch.norm(input = denominator, dim=-1)).mean()\n",
        "\n",
        "          errors_bw.append(error_bw.to(device='cpu').item())\n",
        "          # errors_unif.append(error_unif.to(device='cpu').item())\n",
        "\n",
        "\n",
        "\n",
        "          if (i%100==0 and i>0) or i==len(examples)-1:\n",
        "            print(f\"   layer:{layer}, seed:{seed} | examples processed:{i}/{len(examples)}| avg bw error so far:{sum(errors_bw)/len(errors_bw)} \")\n",
        "\n",
        "          torch.cuda.empty_cache()\n",
        "\n",
        "        torch.cuda.empty_cache()\n",
        "\n",
        "        errors_bw_global.append(sum(errors_bw)/len(errors_bw))\n",
        "\n",
        "\n",
        "    return errors_bw_global"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 5,
      "metadata": {
        "id": "aXfD1TdDNp0F",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000,
          "referenced_widgets": [
            "86ae518814844bdf8b1a3534441eece7",
            "afc675d486ee4ad1ba659f0d4b920d89",
            "ce84c4db5488462999dae33742653cf6",
            "d23f4feefe62453887c1c2541e3e4941",
            "68f3d17985b248b891a971fab3317747",
            "f1a4f12e9c824736a0ccddde6b99c5f9",
            "d534992e98994cb6bd7111c1d6f670d6",
            "c2f79fec3eb84d389b4cd364b92c56c5",
            "90232c78b22f489b83eb2fca0a946ff3",
            "42ad19c70226458b92cd058a1083e1d7",
            "1c20b868083547cd844eaa304d65aa0d",
            "b2f7dcb3d5e4471bbc331dc746508a90",
            "c598a16f35994714b0fc44560c460783",
            "86c1211f02fb482d8d02b7f84a303b63",
            "a263de88100145ff9904645650dcc347",
            "e9695522cdbb4bdabf62e1e56c2fe743",
            "90ac52eef181498bbc475cb8c6aaf1df",
            "1f362bdc7cd542cfb4d435b530ff5fab",
            "c4417d91d0cd4a7b8c2f94e3d5f98779",
            "f3c6924c34ae416d8f9ba6e3e10d0977",
            "27c4e3ab2eb64f8faaf626b5b8d23aae",
            "0c8fe1abbffd4d4291ff93b10a0bb8b4",
            "7f5b752e3bb242f1a32cef3bbe2eb37c",
            "49e64a7c31884f67b9ab7a0ea2b53bf8",
            "b2a0ab02df6444a58e1c212f28ae2a99",
            "47fd6f3d0eac4df38c3af40c231e5340",
            "bfc4e04f0c844c00ae7f99772b73e0a5",
            "a958cd8e8841446691784f51812d551c",
            "03570376b0604e43b070a451343ffb41",
            "70d6dcf1f41242308fecf9e75f98d0aa",
            "27c5bae8b3514749b5e070a4357e8b9b",
            "a692df7644c9461280e4728e5bf7c465",
            "56413ba2c670489fb7855b1ce3ca7083",
            "a4ae7393abe1497a9bb5433a7591debc",
            "ab6f631c0ef24cdfbf6a184026440e1e",
            "ab9b481712e94df2b62a0fe1ba42b705",
            "3e17dad00ba9409788a68f069dbecf4c",
            "665b55d95e65482e863f194d237c44c3",
            "6b752c52601d4c5285040e168c27371b",
            "4d296bf86e4b4c92a526ea4ff59251cf",
            "f3a0a202bacf4350b21a7df6d286fa42",
            "94704493739449578701dea4e0571cce",
            "07601aa2489c4d378b84e9061502e80a",
            "0e71cf74746f4ceb922ece272f461a16",
            "b083d7656f7040b3bc4bbf1d98800de3",
            "9bd02fc74325439f8605e5bee07683c5",
            "f9f98cdf0af34e61a27f952383a60864",
            "66688b73e69c4cc18ad92a7175cb040e",
            "cb2a7a923807429189877bc6cffbcd73",
            "a3ad5255434641179d044795284139d6",
            "818e1932af134ed183d66e737bb0eb9b",
            "9f06ea276e2d46228f9d1eca64b28fde",
            "1b95aa5decfa4e4d81a2f7e7a08624da",
            "0db3a9eafa684dd9822fe5de71bb3ea8",
            "faf833c5fc4641418d5900a69c8b3894",
            "295de9a6e4d64230a2187c553f14ec77",
            "aab0619e346c494cb289bdff69ac118c",
            "7efe7a2a124b48fb9b3f8ee7cf58e5a0",
            "4ae4a1400296437c977b4235dfd1f5cb",
            "17bb593523cc468e9c06150141d3fea9",
            "01bf667168f845bf81cb04aebe65b42a",
            "4a2c303f2f394ba3b898e3643d1948de",
            "fb0731ea21d241afa5cccac9e2b8cc18",
            "097e1b68723b4c5bbe234dfc54aef04f",
            "c43f3c6bdf9a429fa6c8fe47723a2b67",
            "4e58b4c716be414eb5082543be0bbe23",
            "574082d265d54e4098a1f3f9a8d70759",
            "9721cd90a9b64893a1c0195832ea5451",
            "1b701c1c9048450087d888dd6132a108",
            "b4882ca766db4399b8798cbf2e465826",
            "8acc4627b1f4401796011ac31f78cfc6",
            "505dc4a6268d4c0eadaea276e7379bd8",
            "7cdedbdc24454bd68d08c7546c6ec61c",
            "c5227e56d16c4073b6c77559b7a0b5a8",
            "8b06a1cfdc6b4f829d2d24a0458d48e2",
            "7d5043d50db04c37808a96fabb0c0928",
            "7f4ddbc153534762a2e3616c12b12a8b",
            "c9a1efba05554b3290cf9edef59c008c",
            "a54c04da38fd4b8fad734e26c1a5989c",
            "8a66570b2fc7449eab0e503080050650",
            "a1aca4abc96842ba82f8bd24ae4aaab7",
            "a947cd8aa4504c4b8f79fc1aa14c5bb6",
            "62725c82633440a6b528109a1130dd25",
            "3ea7d339bbee4dbca60a7f559b9be193",
            "226e77efaa134957a123a5c1febe490a",
            "b170d4594ef543c88f7f0736f5b876de",
            "5c2021323c50444a8afe8eb1b49de312",
            "39dd25c4b18a4c80bbf68123f1b9288a",
            "8495818676064133a71a6bb7364847bd",
            "874d959573fe435e807eaa22ceed07c4",
            "8fbce067d69c41d3b18395e19cba328a",
            "be68ee509e1741449092f072a9e59371",
            "86894c8c493540e696ab143ed9ea9ff7",
            "677d5a4e2f034ae68452f01752b17ebf",
            "51378a600b2147acba37747cdaa3481f",
            "6b1cdd60cd60426f8ca5c5f817798ba9",
            "3caf3593ea5943a7bd763d37fa62742e",
            "e17d1fa4f4d74edfa2a5a6f32c3cfeed",
            "5e2e83452cea4438a81e002403826ec8",
            "f74f519578f44d24a42d83670c34df1c",
            "e65b76b2e87f4d598323909139e2e75b",
            "b3e895b949b845ab9ed900365bec0764",
            "58ba6da7e7014e758227d1e0e502d915",
            "ae00020ecf244272ae1912e464547ea9",
            "d219f982869a4b9d8d82a7a2dfa27251",
            "2928e5436c5e433bb8a0491ae1961558",
            "3f1a936372d4460ca31b5401dc3f4b98",
            "2d4753d9f84248109563092b895829f2",
            "8a9492e28b144c0d968a3f2ccfc6a7cf",
            "67947638d6f949bf9bd13a3839672f1a",
            "d4b9ac92513f4a9899a2184400874f3a",
            "7131e0a6504345cfa649f1340b6e9f60",
            "5b517b24c4aa4aa1b9cf2718d8e0a3d7",
            "4341096f9a98493f8c3bd0a7128f6fa3",
            "3c2de4e33abd4836a547d983e2bac06a",
            "69bb9900aaac4fd3af6723368db8c743",
            "a1f65d6a914a4540b02621d9fe09f5ca",
            "833416f174bb45eaa68526164ff50c10",
            "b50996d61d614f2fab9087a724b7d3e0",
            "86aa8ea394a8452e8b518be433c32f04",
            "3d73b8ec92484125a191f7417f9aa8b8",
            "11918012e9704ca7b904181c695dc007",
            "392c5f899c304b10813acf3db5d7641c",
            "39c9fd99e0304873b0d6e3f8e0848be6",
            "66ecab5e05104aef80362413f83af065",
            "37c008154ddc463bb20e905a55a9c954",
            "e13dd4fbc1e148b38ff57272044ef999",
            "95e73d576c654c4da40048bee55fa2e9",
            "e65f8543efd84feab027c496b1ca04a9",
            "980a714a26004a9689d4dca73192aa23",
            "a21d8a7092b843418ddd31a2b5a57351",
            "e1f65a9d0b0f4bf4a5ebb118c4c25049",
            "f7cd2a5919244306a52805c91061ca2e",
            "d784cdde54c841f4b54c416617b09b81",
            "bff746c9773b408aab6b5342dab8dc2e",
            "e8004e7e344b4e6dadb4c4505806ef85",
            "252703c6725b4010abd62e16e82999ae",
            "568226c3e2034a35998ade135eadc86a",
            "93c2cfc859d0429cac14f40af378f60a",
            "c042ff878b944ba4aa9fc033bf6dd668",
            "5337b1d0309d41ed91335084decf2a3f",
            "6ccf96f07d2f464885398b7df961855c",
            "8ccd144cfac946399c8a7104b88783aa",
            "5eea305d379b4cb9a76e80102ac05fec",
            "8d4394de58e9433aa4986aa8ec2f7b52",
            "f9d251449a2e423d83f77fa772490be0",
            "b53e1217703346a0b9dcd41459f576a6",
            "7eb5328e3a23430dba9286041a61ba39",
            "a525e4158699491bafc9affda5e8ac73",
            "add785073d3a47829f4c612a89101b10",
            "5f66a15bb6fb45b1baaf1eafdf159282",
            "32443ce5ad13479dbd8e7e44efd6ff99",
            "78df30c6a21446f78658fb1f7c1504de",
            "963061783a884f3f865591a6353af48f",
            "33066273f55a4eb6bdbff3ab3458129e",
            "3b22f55de0f544759b47b791158dfaf9",
            "8934b933ddf74ba0879c146a0335095a",
            "1ef52e732e494292be94d08decb85d31",
            "1de8686035d6448493f0aa6c9a095434",
            "5547b747ffe64fb0a77666e4e628351f",
            "92db9b13d1204e6786230d93a7d7a5a3",
            "b74b5ef8d16e4f56881c2ad333b0dd35",
            "66b8ba28178c4aaf92af1cc973dd8c68",
            "47bdc6eb2c894051986a45d2ec56bc29",
            "5a54311cccce4a3d92df18fed613d98d",
            "c5ad03b120df4f97a903bc2e2a722ef7",
            "120a5eaefafc4337839677e206762135",
            "e975ee69d0f64eeea545a6aae904a6e3",
            "0317d06127194e18a9064136457bf22d",
            "a1f9f544bd504bbab01d40a00792eaeb",
            "cece8b10c58d4fbca2943ad37060aebd",
            "fcb34ef4cf004d5d8ed36ccc73e8bb34",
            "bb2114f01cc14e08966885001212c2c9",
            "73a9a3bc472d46d1a0bb7f4e5371565a",
            "16421c047ad64a8ca0823e9ce14ccd46",
            "eaebcf8b23fa4eea949b6c6e584ac2dd",
            "dbd05f613f354b5da202fab4ea576f62",
            "fee3b6dec4a4463a8a08959d96a611c5",
            "d9ad557efd0046a48b094d227f584deb",
            "c7891fd0b6594dd79c74f75f87ca2768",
            "0cd479ec1bd7475c95f525a8078ba68c",
            "4694ed7bade64c009a1116eb3d894762",
            "8e9f3f5d44d74682a63f6658f3ccd464",
            "827192b696c3431bb1de7c345c335aa2",
            "13dc472bd34c46328f7dd6ac871c1bd8",
            "8ca5b5fdac004d2c824ca20b951d85e3",
            "5afc7114e1cf4177bde260bbae50fd4d"
          ]
        },
        "outputId": "7a987355-e429-4b53-9835-7f7f9d14e560"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "model : meta-llama/Llama-3.1-8B-Instruct\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.11/dist-packages/transformers/models/auto/tokenization_auto.py:898: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.\n",
            "  warnings.warn(\n",
            "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n",
            "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
            "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
            "You will be able to reuse this secret in all of your notebooks.\n",
            "Please note that authentication is recommended but still optional to access public models or datasets.\n",
            "  warnings.warn(\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "tokenizer_config.json:   0%|          | 0.00/55.4k [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "86ae518814844bdf8b1a3534441eece7"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "b2f7dcb3d5e4471bbc331dc746508a90"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "7f5b752e3bb242f1a32cef3bbe2eb37c"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.11/dist-packages/transformers/models/auto/auto_factory.py:476: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.\n",
            "  warnings.warn(\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "config.json:   0%|          | 0.00/855 [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "a4ae7393abe1497a9bb5433a7591debc"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "b083d7656f7040b3bc4bbf1d98800de3"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "295de9a6e4d64230a2187c553f14ec77"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`\n",
            "WARNING:huggingface_hub.file_download:Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`\n",
            "Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`\n",
            "WARNING:huggingface_hub.file_download:Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`\n",
            "Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`\n",
            "WARNING:huggingface_hub.file_download:Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`\n",
            "Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`\n",
            "WARNING:huggingface_hub.file_download:Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "574082d265d54e4098a1f3f9a8d70759"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "c9a1efba05554b3290cf9edef59c008c"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "8495818676064133a71a6bb7364847bd"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "f74f519578f44d24a42d83670c34df1c"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "d4b9ac92513f4a9899a2184400874f3a"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "11918012e9704ca7b904181c695dc007"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "f7cd2a5919244306a52805c91061ca2e"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "dataset: triviaqa\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "README.md:   0%|          | 0.00/16.0k [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "5eea305d379b4cb9a76e80102ac05fec"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "LongBench.py:   0%|          | 0.00/3.98k [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "33066273f55a4eb6bdbff3ab3458129e"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "0000.parquet:   0%|          | 0.00/7.54M [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "c5ad03b120df4f97a903bc2e2a722ef7"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Generating test split:   0%|          | 0/300 [00:00<?, ? examples/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "dbd05f613f354b5da202fab4ea576f62"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "current seed number:1\n",
            "   layer:0, seed:42 | examples processed:100/300| avg bw error so far:0.11844446163366337 \n",
            "   layer:0, seed:42 | examples processed:200/300| avg bw error so far:0.11091272154850747 \n",
            "   layer:0, seed:42 | examples processed:299/300| avg bw error so far:0.10387369791666666 \n",
            "current seed number:2\n",
            "   layer:0, seed:7 | examples processed:100/300| avg bw error so far:0.11855323715965346 \n",
            "   layer:0, seed:7 | examples processed:200/300| avg bw error so far:0.1105009619869403 \n",
            "   layer:0, seed:7 | examples processed:299/300| avg bw error so far:0.10361246744791666 \n",
            "current seed number:3\n",
            "   layer:0, seed:8 | examples processed:100/300| avg bw error so far:0.11829701036509901 \n"
          ]
        },
        {
          "output_type": "error",
          "ename": "KeyboardInterrupt",
          "evalue": "",
          "traceback": [
            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
            "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
            "\u001b[0;32m<ipython-input-5-f39dabe6657f>\u001b[0m in \u001b[0;36m<cell line: 0>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[0mitrs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      8\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcuda\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mempty_cache\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0mbw\u001b[0m\u001b[0;34m=\u001b[0m \u001b[0mmain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgamma_1\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m4.0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mgamma_2\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m4.0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mitrs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mitrs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mlayer\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlayer\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mmodel_name\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmodel_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     10\u001b[0m \u001b[0mbw_avg\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mbw_stddev\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mavg_stddev\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     11\u001b[0m \u001b[0;31m# unif_avg,unif_stddev = avg_stddev(unif)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m<ipython-input-4-9ad48c60572f>\u001b[0m in \u001b[0;36mmain\u001b[0;34m(gamma_1, gamma_2, itrs, layer, temp, model_name)\u001b[0m\n\u001b[1;32m    441\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    442\u001b[0m           \u001b[0mterminators\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mtokenizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0meos_token_id\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtokenizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconvert_tokens_to_ids\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"<|eot_id|>\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 443\u001b[0;31m           \u001b[0mattn_bw\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mexact_attn\u001b[0m\u001b[0;34m=\u001b[0m \u001b[0mgreedy_generate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput_tensors\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'input_ids'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmax_new_tokens\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmaxlen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0meos_token_id\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mterminators\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkv_type\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"bw\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mlayer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlayer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreturn_dict_in_generate\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    444\u001b[0m           \u001b[0;31m# prompt = template.replace('$DOC$', context.strip()).replace('$Q$', item['question'].strip()).replace('$C_A$', item['choice_A'].strip()).replace('$C_B$', item['choice_B'].strip()).replace('$C_C$', item['choice_C'].strip()).replace('$C_D$', item['choice_D'].strip())\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    445\u001b[0m           \u001b[0;31m# input_ids = tokenizer.encode(prompt)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/torch/utils/_contextlib.py\u001b[0m in \u001b[0;36mdecorate_context\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    114\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mdecorate_context\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    115\u001b[0m         \u001b[0;32mwith\u001b[0m \u001b[0mctx_factory\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 116\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    117\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    118\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mdecorate_context\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m<ipython-input-4-9ad48c60572f>\u001b[0m in \u001b[0;36mgreedy_generate\u001b[0;34m(self, input_ids, max_new_tokens, eos_token_id, kv_type, layer, unif, **kwargs)\u001b[0m\n\u001b[1;32m    381\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mgreedy_generate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput_ids\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmax_new_tokens\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0meos_token_id\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m128009\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkv_type\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"bw\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlayer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0munif\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    382\u001b[0m     \u001b[0mposition_ids\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput_ids\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdevice\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minput_ids\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdevice\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munsqueeze\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 383\u001b[0;31m     \u001b[0mattn_bw\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mattn_exact\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmanual_forward_llama\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput_ids\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mposition_ids\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mposition_ids\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnum_logits_to_keep\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mlayer\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlayer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkv_type\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mkv_type\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0munif\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0munif\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    384\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    385\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m<ipython-input-4-9ad48c60572f>\u001b[0m in \u001b[0;36mmanual_forward_llama\u001b[0;34m(model, input_ids, kv_cache, position_ids, cache_position, num_logits_to_keep, kv_type, unif, layer, **kwargs)\u001b[0m\n\u001b[1;32m    288\u001b[0m           \u001b[0mstart_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    289\u001b[0m           \u001b[0;31m#start_time_bw = time.time()\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 290\u001b[0;31m           \u001b[0msort_idx_bw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight_idx_bw\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbalanced_walk\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mkk_old\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m=\u001b[0m \u001b[0mvv_old\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mrng\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrng\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgamma_\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mgamma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtemp_\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtemp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbeta_\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbeta\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mitrs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mitrs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mblock_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mblock_size\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    291\u001b[0m           \u001b[0;31m#sort_idx_unif, weight_idx_unif = balancing_alg(key=kk_old, value= vv_old,rng=rng, gamma_=[0.0,0.0,0.0,0.0], temp_=temp, beta_=beta, itrs=itrs, block_size=block_size,unif=unif)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    292\u001b[0m           \u001b[0;31m#end_time_bw = time.time()\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m<ipython-input-4-9ad48c60572f>\u001b[0m in \u001b[0;36mbalanced_walk\u001b[0;34m(key, rng, gamma_, temp_, beta_, itrs, block_size, value, sort_idx, query)\u001b[0m\n\u001b[1;32m    132\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    133\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkernel_\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 134\u001b[0;31m             \u001b[0mpartial_inner_prod\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mkernel_\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0msigns\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdim\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    135\u001b[0m             \u001b[0msamp_prb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0.5\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mgamma_\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mpartial_inner_prod\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    136\u001b[0m             \u001b[0msigns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m2\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mrand_tensor\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0msamp_prb\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
          ]
        }
      ],
      "source": [
        "#specify layer out of 1,2,5 below,\n",
        "#and for compression rate x in [0.5,0.25,0.125,0.0625], specify compression rate parameter itrs = log_2(1/x). For eg for compression rate 0.5, itrs = log_2(1/0.5) = 1\n",
        "#then execute cell to obtain relative errors for BalanceKV (denoted by bw) for layer and compression rate for llama\n",
        "model_name = \"meta-llama/Llama-3.1-8B-Instruct\"\n",
        "#model_name = \"mistralai/Ministral-8B-Instruct-2410\" #change to this model name for mistral\n",
        "layer = 1\n",
        "itrs = 1\n",
        "torch.cuda.empty_cache()\n",
        "bw= main(gamma_1=4.0,gamma_2=4.0,itrs=itrs,layer=layer,model_name=model_name)\n",
        "bw_avg,bw_stddev = avg_stddev(bw)\n",
        "print(f\"Average BW: {bw_avg}| Standard Deviation BW: {bw_stddev}\")"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Single layer attention approximation for Uniform sampling"
      ],
      "metadata": {
        "id": "A6Gh19DsyIWp"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "## next, execute this cell\n",
        "\n",
        "import time\n",
        "import math\n",
        "import re\n",
        "import string\n",
        "from collections import Counter\n",
        "import torch\n",
        "from typing import List, Optional\n",
        "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
        "from transformers.models.llama.modeling_llama import apply_rotary_pos_emb, repeat_kv\n",
        "from datasets import load_dataset\n",
        "from flash_attn import flash_attn_func\n",
        "\n",
        "\n",
        "\n",
        "DATASET2PROMPT = {\"triviaqa\": \"Answer the question based on the given passage. Only give me the answer and do not output any other words. The following are some examples.\\n\\n{context}\\n\\n{input}\"}\n",
        "DATASET2MAXLEN = {\"triviaqa\": 32,  }\n",
        "\n",
        "\n",
        "def avg_stddev(data):\n",
        "    average = sum(data) / len(data)\n",
        "    variance = sum((x - average) ** 2 for x in data) / len(data)\n",
        "    std_dev = math.sqrt(variance)\n",
        "    return average,std_dev\n",
        "\n",
        "\n",
        "def normalize_answer(s):\n",
        "    \"\"\"Lower text and remove punctuation, articles and extra whitespace.\"\"\"\n",
        "\n",
        "    def remove_articles(text):\n",
        "        return re.sub(r\"\\b(a|an|the)\\b\", \" \", text)\n",
        "\n",
        "    def white_space_fix(text):\n",
        "        return \" \".join(text.split())\n",
        "\n",
        "    def remove_punc(text):\n",
        "        exclude = set(string.punctuation)\n",
        "        return \"\".join(ch for ch in text if ch not in exclude)\n",
        "\n",
        "    def lower(text):\n",
        "        return text.lower()\n",
        "\n",
        "    return white_space_fix(remove_articles(remove_punc(lower(s))))\n",
        "\n",
        "\n",
        "def truncate_input(input: list, max_length: int, manner=\"middle\"):\n",
        "    if max_length < 0:\n",
        "        return input\n",
        "    if len(input) <= max_length:\n",
        "        return input\n",
        "    if manner == \"middle\":\n",
        "        split = max_length // 2\n",
        "        return input[0:split] + input[-split:]\n",
        "    else:\n",
        "        return None\n",
        "\n",
        "\n",
        "def truncate_by_tokens(input, tok, max_tokens, manner: str = \"middle\"):\n",
        "    tokens = tok.encode(input)\n",
        "    len_before = len(tokens)\n",
        "    tokens = truncate_input(tokens, max_length=max_tokens, manner=manner)\n",
        "    len_after = len(tokens)\n",
        "    assert len_after <= len_before\n",
        "    assert len_after <= max_tokens or max_tokens < 0\n",
        "    return tokens\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "def indexing(key, sort_idx, block_size, value=None):\n",
        "  indices = sort_idx.unsqueeze(-1).expand(-1, -1, -1, key.shape[-1])\n",
        "  new_n = math.ceil(sort_idx.shape[-1] / block_size) * block_size\n",
        "  if new_n < sort_idx.shape[-1]:\n",
        "    import pdb; pdb.set_trace();\n",
        "  out_key = torch.nn.functional.pad(key.gather(2, indices), (0,0,0,new_n-sort_idx.shape[-1]), mode='constant', value=0.)\n",
        "  out_value = None\n",
        "  if value is not None:\n",
        "    out_value = torch.nn.functional.pad(value.gather(2, indices), (0,0,0,new_n-sort_idx.shape[-1]), mode='constant', value=0.)\n",
        "  return out_key, out_value\n",
        "\n",
        "\n",
        "def balanced_walk(key, rng, gamma_, temp_, beta_, itrs, block_size,unif, value=None, sort_idx=None,query=None):\n",
        "  b, h, n, d = key.shape\n",
        "\n",
        "  for t in range(itrs):\n",
        "    if sort_idx is not None:\n",
        "      key_sorted, value_sorted = indexing(key, sort_idx, block_size[t], value)\n",
        "      key_sorted = key_sorted.view(b, h, -1, block_size[t], d)\n",
        "      if value is not None:\n",
        "        value_sorted = value_sorted.view(b, h, -1, block_size[t], d)\n",
        "    else:\n",
        "      new_n = math.ceil(n / block_size[t]) * block_size[t]\n",
        "      key_sorted = torch.nn.functional.pad(key, (0,0,0,new_n-n), mode='constant', value=0.).view(b, h, -1, block_size[t], d)\n",
        "      value_sorted = None\n",
        "      if value is not None:\n",
        "        value_sorted = torch.nn.functional.pad(value, (0,0,0,new_n-n), mode='constant', value=0.).view(b, h, -1, block_size[t], d)\n",
        "\n",
        "    normal_keys = key_sorted - torch.mean(key_sorted, dim=-2, keepdim=True)\n",
        "    if query is not None:\n",
        "      query_key_correlation = torch.softmax(torch.einsum('b h n d,b h s m d->b h s n m',query[:,::4,:,:],normal_keys),dim=-1).mean(-2,keepdim=True)\n",
        "      kernel_ = query_key_correlation*query_key_correlation.transpose(-1,-2)\n",
        "    else:\n",
        "      kernel_ = torch.exp(temp_ * torch.einsum('...nd,...sd->...ns', normal_keys, normal_keys)/math.sqrt(d) - beta_)\n",
        "    if value is not None:\n",
        "\n",
        "      kernel_ *= (1e-8 + torch.einsum('...nd,...sd->...ns', value_sorted, value_sorted))\n",
        "\n",
        "    signs = torch.zeros(kernel_.shape[:4], dtype=torch.int16, device=kernel_.device)\n",
        "    signs[:, :, :, 0] = 1\n",
        "    partial_quad_form = kernel_[:, :, :, 0, 0].detach().clone()\n",
        "    rand_tensor = torch.rand(signs.shape, generator=rng, device=key.device)\n",
        "    for i in range(1, kernel_.shape[3]):\n",
        "      partial_inner_prod = (kernel_[:, :, :, i, :] * signs).sum(dim=-1)\n",
        "      samp_prb = 0.5 - gamma_[t] * partial_inner_prod\n",
        "\n",
        "\n",
        "      signs[:, :, :, i] = 2 * (rand_tensor[:, :, :, i] < samp_prb) - 1\n",
        "      partial_quad_form += (2 * signs[:, :, :, i] * partial_inner_prod + kernel_[:, :, :, i, i])\n",
        "\n",
        "    signs = signs.view(b, h, -1)[:, :, :n]\n",
        "\n",
        "    signs_argsort = torch.argsort(signs, dim=-1, stable=True)\n",
        "\n",
        "    n = n//2\n",
        "    if sort_idx is None:\n",
        "      sort_idx = signs_argsort[:, :, :n]\n",
        "    else:\n",
        "      sort_idx = sort_idx.gather(2, signs_argsort[:, :, :n])\n",
        "\n",
        "  return sort_idx\n",
        "\n",
        "\n",
        "\n",
        "def manual_forward_llama(\n",
        "    model,\n",
        "    input_ids,\n",
        "    kv_cache=None, position_ids=None, cache_position=None, num_logits_to_keep=0,\n",
        "    kv_type='bw',\n",
        "    unif = False,\n",
        "    layer = 0,\n",
        "    **kwargs\n",
        "):\n",
        "    hh = model.model.embed_tokens(input_ids)\n",
        "    if position_ids is None:\n",
        "        position_ids = torch.arange(len(input_ids[0]), device=input_ids.device).unsqueeze(0)\n",
        "    if int(transformers.__version__.split(\".\")[1]) >= 48:\n",
        "        position_embeddings = model.model.rotary_emb(hh, position_ids)\n",
        "\n",
        "    output_attn = None\n",
        "    for i, decoder_layer in enumerate(model.model.layers):\n",
        "        # hh = decoder_layer(hh, position_ids=position_ids)[0]\n",
        "        res = hh.detach().clone()\n",
        "        hh = decoder_layer.input_layernorm(hh)\n",
        "\n",
        "        # h1, _, kv = decoder_layer.self_attn(hh, position_ids=position_ids, use_cache=False)\n",
        "        # <===\n",
        "        q_len = hh.shape[1]\n",
        "        kv_len = q_len\n",
        "        qq = decoder_layer.self_attn.q_proj(hh).reshape(1, q_len, -1, 128).transpose(1, 2)\n",
        "        kk = decoder_layer.self_attn.k_proj(hh).reshape(1, kv_len, 8, 128).transpose(1, 2)\n",
        "        vv = decoder_layer.self_attn.v_proj(hh).reshape(1, kv_len, 8, 128).transpose(1, 2)\n",
        "\n",
        "        if int(transformers.__version__.split(\".\")[1]) >= 48:\n",
        "            cos, sin = position_embeddings\n",
        "        else:\n",
        "            cos, sin = decoder_layer.self_attn.rotary_emb(vv, position_ids)\n",
        "        qq, kk = apply_rotary_pos_emb(qq, kk, cos, sin)\n",
        "        d = qq.shape[-1]\n",
        "\n",
        "        if q_len > 1:\n",
        "            attn_output = flash_attn_func(qq.transpose(1,2), kk.transpose(1,2), vv.transpose(1,2), causal=True)\n",
        "        if i==layer:\n",
        "          rng = model.config.rng\n",
        "          gamma = model.config.gamma\n",
        "          temp = model.config.temp\n",
        "          beta = model.config.beta\n",
        "          itrs = model.config.itrs\n",
        "          block_size = model.config.block_size\n",
        "          recent_size = model.config.recent_size\n",
        "\n",
        "\n",
        "          kk_first = kk[:,:,:recent_size,:]\n",
        "          vv_first = vv[:,:,:recent_size,:]\n",
        "          kk_old = kk[:,:,recent_size:-recent_size,:]\n",
        "          vv_old = vv[:,:,recent_size:-recent_size,:]\n",
        "          kk_recent = kk[:,:,-recent_size:,:]\n",
        "          vv_recent = vv[:,:,-recent_size:,:]\n",
        "\n",
        "\n",
        "          sort_idx_bw = balanced_walk(key=kk_old, value= vv_old,rng=rng, gamma_=gamma, temp_=temp, beta_=beta, itrs=itrs, block_size=block_size,unif=unif)\n",
        "          sort_idx_unif = balanced_walk(key=kk_old, value= vv_old,rng=rng, gamma_=[0.0,0.0,0.0,0.0], temp_=temp, beta_=beta, itrs=itrs, block_size=block_size,unif=unif)\n",
        "\n",
        "\n",
        "          bsz, n_heads, _, dim = kk.shape\n",
        "          n_centers_bw = sort_idx_bw.shape[-1]\n",
        "          kk_old_bw = torch.gather(kk_old, 2, sort_idx_bw.unsqueeze(-1).expand(bsz, n_heads, n_centers_bw, dim))\n",
        "          vv_old_bw = torch.gather(vv_old, 2, sort_idx_bw.unsqueeze(-1).expand(bsz, n_heads, n_centers_bw, dim))\n",
        "\n",
        "\n",
        "          kk_selected_bw = torch.cat((kk_first,torch.cat([kk_old_bw]*(2**itrs),dim=2),kk_recent),dim=2)\n",
        "          vv_selected_bw = torch.cat((vv_first,torch.cat([vv_old_bw]*(2**itrs),dim=2),vv_recent),dim=2)\n",
        "\n",
        "          n_centers_unif = sort_idx_unif.shape[-1]\n",
        "\n",
        "          kk_old_unif = torch.gather(kk_old, 2, sort_idx_unif.unsqueeze(-1).expand(bsz, n_heads, n_centers_unif, dim))\n",
        "          vv_old_unif = torch.gather(vv_old, 2, sort_idx_unif.unsqueeze(-1).expand(bsz, n_heads, n_centers_unif, dim))\n",
        "\n",
        "          kk_selected_unif = torch.cat((kk_first,torch.cat([kk_old_unif]*(2**itrs),dim=2),kk_recent),dim=2)\n",
        "          vv_selected_unif = torch.cat((vv_first,torch.cat([vv_old_unif]*(2**itrs),dim=2),vv_recent),dim=2)\n",
        "\n",
        "          qq_latest = qq[:,:,-recent_size:,:]\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "          attn_output_bw = flash_attn_func(qq_latest.transpose(1,2), kk_selected_bw.transpose(1,2), vv_selected_bw.transpose(1,2), causal=True)\n",
        "          attn_output_unif = flash_attn_func(qq_latest.transpose(1,2), kk_selected_unif.transpose(1,2), vv_selected_unif.transpose(1,2), causal=True)\n",
        "          attn_output_exact = flash_attn_func(qq_latest.transpose(1,2), kk.transpose(1,2), vv.transpose(1,2), causal=True)\n",
        "\n",
        "          return attn_output_bw, attn_output_unif,attn_output_exact\n",
        "\n",
        "\n",
        "\n",
        "        attn_output = attn_output.contiguous().view(qq.shape[0], qq.shape[2], -1)\n",
        "        hh = decoder_layer.self_attn.o_proj(attn_output)\n",
        "        # ===>\n",
        "        hh = res + hh\n",
        "\n",
        "        res = hh.detach().clone()\n",
        "        hh = decoder_layer.post_attention_layernorm(hh)\n",
        "        hh = decoder_layer.mlp(hh)\n",
        "        hh = res + hh\n",
        "\n",
        "\n",
        "@torch.no_grad()\n",
        "def greedy_generate(self, input_ids, max_new_tokens, eos_token_id=128009, kv_type=\"bw\", layer = 0, unif = False,**kwargs):\n",
        "    position_ids = torch.arange(input_ids.shape[-1], device=input_ids.device).unsqueeze(0)\n",
        "    attn_bw,attn_unif,attn_exact = manual_forward_llama(self, input_ids, position_ids=position_ids, num_logits_to_keep=1,layer=layer, kv_type=kv_type,unif=unif)\n",
        "\n",
        "\n",
        "    return attn_bw,attn_unif,attn_exact\n",
        "\n",
        "\n",
        "def main(gamma_1=4,gamma_2 = 4 ,itrs = 2,layer=0,temp=0.1,model_name = \"meta-llama/Llama-3.1-8B-Instruct\"):\n",
        "\n",
        "    layer = layer -1\n",
        "    model_name = model_name\n",
        "    print(f\"model : {model_name}\")\n",
        "\n",
        "    # Load the model and tokenizer\n",
        "    tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)\n",
        "    model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=True)\n",
        "    model = AutoModelForCausalLM.from_pretrained(model_name, device_map='auto', torch_dtype=torch.bfloat16, _attn_implementation='flash_attention_2')\n",
        "    model = model.eval().requires_grad_(False)\n",
        "    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n",
        "\n",
        "\n",
        "    model.model.config.gamma = [gamma_1,gamma_2,gamma_2,gamma_2]\n",
        "    model.model.config.beta = 0.\n",
        "    model.model.config.temp = temp\n",
        "    model.model.config.block_size = [256,256,256,256]\n",
        "    model.model.config.itrs = itrs\n",
        "    model.model.config.recent_size = 256\n",
        "    seeds = [42,7,8,17,98,64,27,32,81,44]\n",
        "    dataset = \"triviaqa\"\n",
        "    print(f\"dataset: {dataset}\")\n",
        "    prompt_format = DATASET2PROMPT[dataset]\n",
        "    maxlen = DATASET2MAXLEN[dataset]\n",
        "\n",
        "    examples = load_dataset('THUDM/LongBench', f\"{dataset}_e\", split='test')\n",
        "    kv_type = \"bw\"\n",
        "    max_input_length = 100_000\n",
        "    errors_bw_global = []\n",
        "    errors_unif_global = []\n",
        "    for i,seed in enumerate(seeds):\n",
        "        print(f\"current seed number:{i+1}\")\n",
        "        model.model.config.rng = torch.Generator('cuda').manual_seed(seed)\n",
        "        errors_bw = []\n",
        "        errors_unif = []\n",
        "\n",
        "\n",
        "        for i, eg in enumerate(examples):\n",
        "\n",
        "\n",
        "          input_text = prompt_format.format(**eg)\n",
        "          msgs = [dict(role=\"system\", content=input_text)]\n",
        "          input_tokens = tokenizer.apply_chat_template(msgs, add_generation_prompt=True)\n",
        "\n",
        "\n",
        "          input_tokens = truncate_by_tokens(input_text, tokenizer, max_input_length)\n",
        "          input_tensors = {\"input_ids\": torch.tensor(input_tokens).unsqueeze(0).to(model.device)}\n",
        "          seq_len = len(input_tokens)\n",
        "\n",
        "          terminators = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids(\"<|eot_id|>\")]\n",
        "          attn_bw,attn_unif,exact_attn = greedy_generate(model, input_tensors['input_ids'], max_new_tokens=maxlen, eos_token_id=terminators, kv_type=\"bw\",layer = layer, return_dict_in_generate=True)\n",
        "          denominator = torch.clamp(exact_attn, min=1e-8)\n",
        "\n",
        "          error_bw = (torch.norm(input = attn_bw - exact_attn, dim=-1)/torch.norm(input = denominator,dim=-1)).mean()\n",
        "          error_unif = (torch.norm(input = attn_unif - exact_attn, dim=-1)/torch.norm(input = denominator, dim=-1)).mean()\n",
        "\n",
        "          errors_bw.append(error_bw.to(device='cpu').item())\n",
        "          errors_unif.append(error_unif.to(device='cpu').item())\n",
        "\n",
        "\n",
        "\n",
        "          if (i%100==0 and i>0) or i==len(examples)-1:\n",
        "            print(f\"   layer:{layer}, seed:{seed} | examples processed:{i}/{len(examples)}| avg unif error so far:{sum(errors_unif)/len(errors_unif)}\")\n",
        "\n",
        "          torch.cuda.empty_cache()\n",
        "\n",
        "        torch.cuda.empty_cache()\n",
        "\n",
        "        errors_bw_global.append(sum(errors_bw)/len(errors_bw))\n",
        "        errors_unif_global.append(sum(errors_unif)/len(errors_unif))\n",
        "\n",
        "    return errors_unif_global"
      ],
      "metadata": {
        "id": "5F3Jrjdhy_PW"
      },
      "execution_count": 6,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "#specify layer out of 1,2,5 below,\n",
        "#and for compression rate x in [0.5,0.25,0.125,0.0625], specify compression rate parameter itrs = log_2(1/x). For eg for compression rate 0.5, itrs = log_2(1/0.5) = 1\n",
        "#then execute cell to obtain relative errors for Unif sampling for layer and compression rate for llama\n",
        "model_name = \"meta-llama/Llama-3.1-8B-Instruct\"\n",
        "#model_name = \"mistralai/Ministral-8B-Instruct-2410\" #change to this model name for mistral\n",
        "layer = 1\n",
        "itrs = 1\n",
        "torch.cuda.empty_cache()\n",
        "unif= main(gamma_1=4.0,gamma_2=4.0,itrs=itrs,layer=layer,model_name=model_name)\n",
        "unif_avg,unif_stddev = avg_stddev(unif)\n",
        "print(f\"Average unif: {unif_avg}| Standard Deviation unif: {unif_stddev}\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 661,
          "referenced_widgets": [
            "e161dfa23dac4f7f9bf61a422ed66e09",
            "3e2d68fe62854e66bdb5c2c9b2d40bef",
            "b05d852edef24fe2819e51f4f369ca6c",
            "d87d1e6a3b8a405cba3fea657d4871cf",
            "83e01c9991904b7da3af4645b3d933cc",
            "a7728853eaa840d9be94182c3de104c4",
            "d64f4454cb664d53885c11145ce25b90",
            "affc393e4a884f1d97b8474ba97aa247",
            "585088b551bf4750bc5eb19543c7edcd",
            "1586b03eb2224bc9b627f8ccb198ff50",
            "3a307a556eb14aceb2598510072420c9",
            "613c2bd584834df2af3a0f96b3dfea02",
            "1458e6d5c4ee4678819a86be61f24967",
            "951e9ec7d4d84b8d8b62b8afb56f1f62",
            "cf2cc703b56347709c688708520432ea",
            "cc7f97c0d14f4a8db9ce97262f6e47d4",
            "e8ec95f512cd48dfa28d1b6b3677cebe",
            "f93a8569a79c44918d6b59c27b2a8121",
            "f6b6c12d3ea14f519fe07ddfd3d7cb6f",
            "2879b284175f49c3ab81ed063f9bc57e",
            "33b1a021e40248f79cb264cf596d5d3b",
            "7c755b5c36504cc38e8729799bf37ff0"
          ]
        },
        "id": "wSl2xX2QzBzm",
        "outputId": "c8f5d763-cd06-4644-c864-6ddd80eeb1a1"
      },
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "model : meta-llama/Llama-3.1-8B-Instruct\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "e161dfa23dac4f7f9bf61a422ed66e09"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "613c2bd584834df2af3a0f96b3dfea02"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "dataset: triviaqa\n",
            "current seed number:1\n",
            "   layer:0, seed:42 | examples processed:100/300| avg unif error so far:0.16768351639851486\n",
            "   layer:0, seed:42 | examples processed:200/300| avg unif error so far:0.15785574082711443\n",
            "   layer:0, seed:42 | examples processed:299/300| avg unif error so far:0.148583984375\n",
            "current seed number:2\n",
            "   layer:0, seed:7 | examples processed:100/300| avg unif error so far:0.16836034189356436\n",
            "   layer:0, seed:7 | examples processed:200/300| avg unif error so far:0.1579942086442786\n",
            "   layer:0, seed:7 | examples processed:299/300| avg unif error so far:0.1485888671875\n",
            "current seed number:3\n"
          ]
        },
        {
          "output_type": "error",
          "ename": "KeyboardInterrupt",
          "evalue": "",
          "traceback": [
            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
            "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
            "\u001b[0;32m<ipython-input-7-692bb6b0b767>\u001b[0m in \u001b[0;36m<cell line: 0>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[0mitrs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      8\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcuda\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mempty_cache\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0munif\u001b[0m\u001b[0;34m=\u001b[0m \u001b[0mmain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgamma_1\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m4.0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mgamma_2\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m4.0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mitrs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mitrs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mlayer\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlayer\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mmodel_name\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmodel_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     10\u001b[0m \u001b[0munif_avg\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0munif_stddev\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mavg_stddev\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0munif\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     11\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Average unif: {unif_avg}| Standard Deviation unif: {unif_stddev}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m<ipython-input-6-ab6db1a147de>\u001b[0m in \u001b[0;36mmain\u001b[0;34m(gamma_1, gamma_2, itrs, layer, temp, model_name)\u001b[0m\n\u001b[1;32m    294\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    295\u001b[0m           \u001b[0mterminators\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mtokenizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0meos_token_id\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtokenizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconvert_tokens_to_ids\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"<|eot_id|>\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 296\u001b[0;31m           \u001b[0mattn_bw\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mattn_unif\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mexact_attn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgreedy_generate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput_tensors\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'input_ids'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmax_new_tokens\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmaxlen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0meos_token_id\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mterminators\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkv_type\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"bw\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mlayer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlayer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreturn_dict_in_generate\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    297\u001b[0m           \u001b[0mdenominator\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclamp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexact_attn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmin\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1e-8\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    298\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/torch/utils/_contextlib.py\u001b[0m in \u001b[0;36mdecorate_context\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    114\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mdecorate_context\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    115\u001b[0m         \u001b[0;32mwith\u001b[0m \u001b[0mctx_factory\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 116\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    117\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    118\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mdecorate_context\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m<ipython-input-6-ab6db1a147de>\u001b[0m in \u001b[0;36mgreedy_generate\u001b[0;34m(self, input_ids, max_new_tokens, eos_token_id, kv_type, layer, unif, **kwargs)\u001b[0m\n\u001b[1;32m    237\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mgreedy_generate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput_ids\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmax_new_tokens\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0meos_token_id\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m128009\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkv_type\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"bw\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlayer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0munif\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    238\u001b[0m     \u001b[0mposition_ids\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput_ids\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdevice\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minput_ids\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdevice\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munsqueeze\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 239\u001b[0;31m     \u001b[0mattn_bw\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mattn_unif\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mattn_exact\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmanual_forward_llama\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput_ids\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mposition_ids\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mposition_ids\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnum_logits_to_keep\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mlayer\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlayer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkv_type\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mkv_type\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0munif\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0munif\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    240\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    241\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m<ipython-input-6-ab6db1a147de>\u001b[0m in \u001b[0;36mmanual_forward_llama\u001b[0;34m(model, input_ids, kv_cache, position_ids, cache_position, num_logits_to_keep, kv_type, unif, layer, **kwargs)\u001b[0m\n\u001b[1;32m    190\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    191\u001b[0m           \u001b[0msort_idx_bw\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbalanced_walk\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mkk_old\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m=\u001b[0m \u001b[0mvv_old\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mrng\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrng\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgamma_\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mgamma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtemp_\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtemp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbeta_\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbeta\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mitrs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mitrs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mblock_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mblock_size\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0munif\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0munif\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 192\u001b[0;31m           \u001b[0msort_idx_unif\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbalanced_walk\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mkk_old\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m=\u001b[0m \u001b[0mvv_old\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mrng\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrng\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgamma_\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0.0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0.0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0.0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0.0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtemp_\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtemp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbeta_\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbeta\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mitrs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mitrs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mblock_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mblock_size\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0munif\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0munif\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    193\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    194\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m<ipython-input-6-ab6db1a147de>\u001b[0m in \u001b[0;36mbalanced_walk\u001b[0;34m(key, rng, gamma_, temp_, beta_, itrs, block_size, unif, value, sort_idx, query)\u001b[0m\n\u001b[1;32m    117\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    118\u001b[0m       \u001b[0msigns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m2\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mrand_tensor\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0msamp_prb\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 119\u001b[0;31m       \u001b[0mpartial_quad_form\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;36m2\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0msigns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mpartial_inner_prod\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mkernel_\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    120\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    121\u001b[0m     \u001b[0msigns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msigns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mview\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mh\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
          ]
        }
      ]
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "gpuType": "A100",
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    },
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "86ae518814844bdf8b1a3534441eece7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_afc675d486ee4ad1ba659f0d4b920d89",
              "IPY_MODEL_ce84c4db5488462999dae33742653cf6",
              "IPY_MODEL_d23f4feefe62453887c1c2541e3e4941"
            ],
            "layout": "IPY_MODEL_68f3d17985b248b891a971fab3317747"
          }
        },
        "afc675d486ee4ad1ba659f0d4b920d89": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_f1a4f12e9c824736a0ccddde6b99c5f9",
            "placeholder": "​",
            "style": "IPY_MODEL_d534992e98994cb6bd7111c1d6f670d6",
            "value": "tokenizer_config.json: 100%"
          }
        },
        "ce84c4db5488462999dae33742653cf6": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_c2f79fec3eb84d389b4cd364b92c56c5",
            "max": 55351,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_90232c78b22f489b83eb2fca0a946ff3",
            "value": 55351
          }
        },
        "d23f4feefe62453887c1c2541e3e4941": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_42ad19c70226458b92cd058a1083e1d7",
            "placeholder": "​",
            "style": "IPY_MODEL_1c20b868083547cd844eaa304d65aa0d",
            "value": " 55.4k/55.4k [00:00&lt;00:00, 4.75MB/s]"
          }
        },
        "68f3d17985b248b891a971fab3317747": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "f1a4f12e9c824736a0ccddde6b99c5f9": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "d534992e98994cb6bd7111c1d6f670d6": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "c2f79fec3eb84d389b4cd364b92c56c5": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "90232c78b22f489b83eb2fca0a946ff3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "42ad19c70226458b92cd058a1083e1d7": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "1c20b868083547cd844eaa304d65aa0d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "b2f7dcb3d5e4471bbc331dc746508a90": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_c598a16f35994714b0fc44560c460783",
              "IPY_MODEL_86c1211f02fb482d8d02b7f84a303b63",
              "IPY_MODEL_a263de88100145ff9904645650dcc347"
            ],
            "layout": "IPY_MODEL_e9695522cdbb4bdabf62e1e56c2fe743"
          }
        },
        "c598a16f35994714b0fc44560c460783": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_90ac52eef181498bbc475cb8c6aaf1df",
            "placeholder": "​",
            "style": "IPY_MODEL_1f362bdc7cd542cfb4d435b530ff5fab",
            "value": "tokenizer.json: 100%"
          }
        },
        "86c1211f02fb482d8d02b7f84a303b63": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_c4417d91d0cd4a7b8c2f94e3d5f98779",
            "max": 9085657,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_f3c6924c34ae416d8f9ba6e3e10d0977",
            "value": 9085657
          }
        },
        "a263de88100145ff9904645650dcc347": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_27c4e3ab2eb64f8faaf626b5b8d23aae",
            "placeholder": "​",
            "style": "IPY_MODEL_0c8fe1abbffd4d4291ff93b10a0bb8b4",
            "value": " 9.09M/9.09M [00:00&lt;00:00, 18.6MB/s]"
          }
        },
        "e9695522cdbb4bdabf62e1e56c2fe743": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "90ac52eef181498bbc475cb8c6aaf1df": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "1f362bdc7cd542cfb4d435b530ff5fab": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "c4417d91d0cd4a7b8c2f94e3d5f98779": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "f3c6924c34ae416d8f9ba6e3e10d0977": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "27c4e3ab2eb64f8faaf626b5b8d23aae": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "0c8fe1abbffd4d4291ff93b10a0bb8b4": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "7f5b752e3bb242f1a32cef3bbe2eb37c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_49e64a7c31884f67b9ab7a0ea2b53bf8",
              "IPY_MODEL_b2a0ab02df6444a58e1c212f28ae2a99",
              "IPY_MODEL_47fd6f3d0eac4df38c3af40c231e5340"
            ],
            "layout": "IPY_MODEL_bfc4e04f0c844c00ae7f99772b73e0a5"
          }
        },
        "49e64a7c31884f67b9ab7a0ea2b53bf8": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_a958cd8e8841446691784f51812d551c",
            "placeholder": "​",
            "style": "IPY_MODEL_03570376b0604e43b070a451343ffb41",
            "value": "special_tokens_map.json: 100%"
          }
        },
        "b2a0ab02df6444a58e1c212f28ae2a99": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_70d6dcf1f41242308fecf9e75f98d0aa",
            "max": 296,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_27c5bae8b3514749b5e070a4357e8b9b",
            "value": 296
          }
        },
        "47fd6f3d0eac4df38c3af40c231e5340": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_a692df7644c9461280e4728e5bf7c465",
            "placeholder": "​",
            "style": "IPY_MODEL_56413ba2c670489fb7855b1ce3ca7083",
            "value": " 296/296 [00:00&lt;00:00, 33.4kB/s]"
          }
        },
        "bfc4e04f0c844c00ae7f99772b73e0a5": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "a958cd8e8841446691784f51812d551c": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "03570376b0604e43b070a451343ffb41": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "70d6dcf1f41242308fecf9e75f98d0aa": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "27c5bae8b3514749b5e070a4357e8b9b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "a692df7644c9461280e4728e5bf7c465": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "56413ba2c670489fb7855b1ce3ca7083": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "a4ae7393abe1497a9bb5433a7591debc": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_ab6f631c0ef24cdfbf6a184026440e1e",
              "IPY_MODEL_ab9b481712e94df2b62a0fe1ba42b705",
              "IPY_MODEL_3e17dad00ba9409788a68f069dbecf4c"
            ],
            "layout": "IPY_MODEL_665b55d95e65482e863f194d237c44c3"
          }
        },
        "ab6f631c0ef24cdfbf6a184026440e1e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_6b752c52601d4c5285040e168c27371b",
            "placeholder": "​",
            "style": "IPY_MODEL_4d296bf86e4b4c92a526ea4ff59251cf",
            "value": "config.json: 100%"
          }
        },
        "ab9b481712e94df2b62a0fe1ba42b705": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_f3a0a202bacf4350b21a7df6d286fa42",
            "max": 855,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_94704493739449578701dea4e0571cce",
            "value": 855
          }
        },
        "3e17dad00ba9409788a68f069dbecf4c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_07601aa2489c4d378b84e9061502e80a",
            "placeholder": "​",
            "style": "IPY_MODEL_0e71cf74746f4ceb922ece272f461a16",
            "value": " 855/855 [00:00&lt;00:00, 108kB/s]"
          }
        },
        "665b55d95e65482e863f194d237c44c3": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "6b752c52601d4c5285040e168c27371b": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "4d296bf86e4b4c92a526ea4ff59251cf": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "f3a0a202bacf4350b21a7df6d286fa42": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "94704493739449578701dea4e0571cce": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "07601aa2489c4d378b84e9061502e80a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "0e71cf74746f4ceb922ece272f461a16": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "b083d7656f7040b3bc4bbf1d98800de3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_9bd02fc74325439f8605e5bee07683c5",
              "IPY_MODEL_f9f98cdf0af34e61a27f952383a60864",
              "IPY_MODEL_66688b73e69c4cc18ad92a7175cb040e"
            ],
            "layout": "IPY_MODEL_cb2a7a923807429189877bc6cffbcd73"
          }
        },
        "9bd02fc74325439f8605e5bee07683c5": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_a3ad5255434641179d044795284139d6",
            "placeholder": "​",
            "style": "IPY_MODEL_818e1932af134ed183d66e737bb0eb9b",
            "value": "model.safetensors.index.json: 100%"
          }
        },
        "f9f98cdf0af34e61a27f952383a60864": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_9f06ea276e2d46228f9d1eca64b28fde",
            "max": 23950,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_1b95aa5decfa4e4d81a2f7e7a08624da",
            "value": 23950
          }
        },
        "66688b73e69c4cc18ad92a7175cb040e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_0db3a9eafa684dd9822fe5de71bb3ea8",
            "placeholder": "​",
            "style": "IPY_MODEL_faf833c5fc4641418d5900a69c8b3894",
            "value": " 23.9k/23.9k [00:00&lt;00:00, 2.97MB/s]"
          }
        },
        "cb2a7a923807429189877bc6cffbcd73": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "a3ad5255434641179d044795284139d6": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "818e1932af134ed183d66e737bb0eb9b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "9f06ea276e2d46228f9d1eca64b28fde": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "1b95aa5decfa4e4d81a2f7e7a08624da": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "0db3a9eafa684dd9822fe5de71bb3ea8": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "faf833c5fc4641418d5900a69c8b3894": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "295de9a6e4d64230a2187c553f14ec77": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_aab0619e346c494cb289bdff69ac118c",
              "IPY_MODEL_7efe7a2a124b48fb9b3f8ee7cf58e5a0",
              "IPY_MODEL_4ae4a1400296437c977b4235dfd1f5cb"
            ],
            "layout": "IPY_MODEL_17bb593523cc468e9c06150141d3fea9"
          }
        },
        "aab0619e346c494cb289bdff69ac118c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_01bf667168f845bf81cb04aebe65b42a",
            "placeholder": "​",
            "style": "IPY_MODEL_4a2c303f2f394ba3b898e3643d1948de",
            "value": "Fetching 4 files: 100%"
          }
        },
        "7efe7a2a124b48fb9b3f8ee7cf58e5a0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_fb0731ea21d241afa5cccac9e2b8cc18",
            "max": 4,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_097e1b68723b4c5bbe234dfc54aef04f",
            "value": 4
          }
        },
        "4ae4a1400296437c977b4235dfd1f5cb": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_c43f3c6bdf9a429fa6c8fe47723a2b67",
            "placeholder": "​",
            "style": "IPY_MODEL_4e58b4c716be414eb5082543be0bbe23",
            "value": " 4/4 [00:41&lt;00:00, 41.93s/it]"
          }
        },
        "17bb593523cc468e9c06150141d3fea9": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "01bf667168f845bf81cb04aebe65b42a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "4a2c303f2f394ba3b898e3643d1948de": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "fb0731ea21d241afa5cccac9e2b8cc18": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "097e1b68723b4c5bbe234dfc54aef04f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "c43f3c6bdf9a429fa6c8fe47723a2b67": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "4e58b4c716be414eb5082543be0bbe23": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "574082d265d54e4098a1f3f9a8d70759": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_9721cd90a9b64893a1c0195832ea5451",
              "IPY_MODEL_1b701c1c9048450087d888dd6132a108",
              "IPY_MODEL_b4882ca766db4399b8798cbf2e465826"
            ],
            "layout": "IPY_MODEL_8acc4627b1f4401796011ac31f78cfc6"
          }
        },
        "9721cd90a9b64893a1c0195832ea5451": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_505dc4a6268d4c0eadaea276e7379bd8",
            "placeholder": "​",
            "style": "IPY_MODEL_7cdedbdc24454bd68d08c7546c6ec61c",
            "value": "model-00004-of-00004.safetensors: 100%"
          }
        },
        "1b701c1c9048450087d888dd6132a108": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_c5227e56d16c4073b6c77559b7a0b5a8",
            "max": 1168138808,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_8b06a1cfdc6b4f829d2d24a0458d48e2",
            "value": 1168138808
          }
        },
        "b4882ca766db4399b8798cbf2e465826": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_7d5043d50db04c37808a96fabb0c0928",
            "placeholder": "​",
            "style": "IPY_MODEL_7f4ddbc153534762a2e3616c12b12a8b",
            "value": " 1.17G/1.17G [00:09&lt;00:00, 124MB/s]"
          }
        },
        "8acc4627b1f4401796011ac31f78cfc6": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "505dc4a6268d4c0eadaea276e7379bd8": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "7cdedbdc24454bd68d08c7546c6ec61c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "c5227e56d16c4073b6c77559b7a0b5a8": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "8b06a1cfdc6b4f829d2d24a0458d48e2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "7d5043d50db04c37808a96fabb0c0928": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "7f4ddbc153534762a2e3616c12b12a8b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "c9a1efba05554b3290cf9edef59c008c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_a54c04da38fd4b8fad734e26c1a5989c",
              "IPY_MODEL_8a66570b2fc7449eab0e503080050650",
              "IPY_MODEL_a1aca4abc96842ba82f8bd24ae4aaab7"
            ],
            "layout": "IPY_MODEL_a947cd8aa4504c4b8f79fc1aa14c5bb6"
          }
        },
        "a54c04da38fd4b8fad734e26c1a5989c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_62725c82633440a6b528109a1130dd25",
            "placeholder": "​",
            "style": "IPY_MODEL_3ea7d339bbee4dbca60a7f559b9be193",
            "value": "model-00002-of-00004.safetensors: 100%"
          }
        },
        "8a66570b2fc7449eab0e503080050650": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_226e77efaa134957a123a5c1febe490a",
            "max": 4999802720,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_b170d4594ef543c88f7f0736f5b876de",
            "value": 4999802720
          }
        },
        "a1aca4abc96842ba82f8bd24ae4aaab7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_5c2021323c50444a8afe8eb1b49de312",
            "placeholder": "​",
            "style": "IPY_MODEL_39dd25c4b18a4c80bbf68123f1b9288a",
            "value": " 5.00G/5.00G [00:40&lt;00:00, 177MB/s]"
          }
        },
        "a947cd8aa4504c4b8f79fc1aa14c5bb6": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "62725c82633440a6b528109a1130dd25": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "3ea7d339bbee4dbca60a7f559b9be193": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "226e77efaa134957a123a5c1febe490a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "b170d4594ef543c88f7f0736f5b876de": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "5c2021323c50444a8afe8eb1b49de312": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "39dd25c4b18a4c80bbf68123f1b9288a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "8495818676064133a71a6bb7364847bd": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_874d959573fe435e807eaa22ceed07c4",
              "IPY_MODEL_8fbce067d69c41d3b18395e19cba328a",
              "IPY_MODEL_be68ee509e1741449092f072a9e59371"
            ],
            "layout": "IPY_MODEL_86894c8c493540e696ab143ed9ea9ff7"
          }
        },
        "874d959573fe435e807eaa22ceed07c4": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_677d5a4e2f034ae68452f01752b17ebf",
            "placeholder": "​",
            "style": "IPY_MODEL_51378a600b2147acba37747cdaa3481f",
            "value": "model-00003-of-00004.safetensors: 100%"
          }
        },
        "8fbce067d69c41d3b18395e19cba328a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_6b1cdd60cd60426f8ca5c5f817798ba9",
            "max": 4915916176,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_3caf3593ea5943a7bd763d37fa62742e",
            "value": 4915916176
          }
        },
        "be68ee509e1741449092f072a9e59371": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_e17d1fa4f4d74edfa2a5a6f32c3cfeed",
            "placeholder": "​",
            "style": "IPY_MODEL_5e2e83452cea4438a81e002403826ec8",
            "value": " 4.92G/4.92G [00:41&lt;00:00, 204MB/s]"
          }
        },
        "86894c8c493540e696ab143ed9ea9ff7": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "677d5a4e2f034ae68452f01752b17ebf": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "51378a600b2147acba37747cdaa3481f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "6b1cdd60cd60426f8ca5c5f817798ba9": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "3caf3593ea5943a7bd763d37fa62742e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "e17d1fa4f4d74edfa2a5a6f32c3cfeed": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "5e2e83452cea4438a81e002403826ec8": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "f74f519578f44d24a42d83670c34df1c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_e65b76b2e87f4d598323909139e2e75b",
              "IPY_MODEL_b3e895b949b845ab9ed900365bec0764",
              "IPY_MODEL_58ba6da7e7014e758227d1e0e502d915"
            ],
            "layout": "IPY_MODEL_ae00020ecf244272ae1912e464547ea9"
          }
        },
        "e65b76b2e87f4d598323909139e2e75b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_d219f982869a4b9d8d82a7a2dfa27251",
            "placeholder": "​",
            "style": "IPY_MODEL_2928e5436c5e433bb8a0491ae1961558",
            "value": "model-00001-of-00004.safetensors: 100%"
          }
        },
        "b3e895b949b845ab9ed900365bec0764": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_3f1a936372d4460ca31b5401dc3f4b98",
            "max": 4976698672,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_2d4753d9f84248109563092b895829f2",
            "value": 4976698672
          }
        },
        "58ba6da7e7014e758227d1e0e502d915": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_8a9492e28b144c0d968a3f2ccfc6a7cf",
            "placeholder": "​",
            "style": "IPY_MODEL_67947638d6f949bf9bd13a3839672f1a",
            "value": " 4.98G/4.98G [00:41&lt;00:00, 230MB/s]"
          }
        },
        "ae00020ecf244272ae1912e464547ea9": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "d219f982869a4b9d8d82a7a2dfa27251": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "2928e5436c5e433bb8a0491ae1961558": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "3f1a936372d4460ca31b5401dc3f4b98": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "2d4753d9f84248109563092b895829f2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "8a9492e28b144c0d968a3f2ccfc6a7cf": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "67947638d6f949bf9bd13a3839672f1a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "d4b9ac92513f4a9899a2184400874f3a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_7131e0a6504345cfa649f1340b6e9f60",
              "IPY_MODEL_5b517b24c4aa4aa1b9cf2718d8e0a3d7",
              "IPY_MODEL_4341096f9a98493f8c3bd0a7128f6fa3"
            ],
            "layout": "IPY_MODEL_3c2de4e33abd4836a547d983e2bac06a"
          }
        },
        "7131e0a6504345cfa649f1340b6e9f60": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_69bb9900aaac4fd3af6723368db8c743",
            "placeholder": "​",
            "style": "IPY_MODEL_a1f65d6a914a4540b02621d9fe09f5ca",
            "value": "Loading checkpoint shards: 100%"
          }
        },
        "5b517b24c4aa4aa1b9cf2718d8e0a3d7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_833416f174bb45eaa68526164ff50c10",
            "max": 4,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_b50996d61d614f2fab9087a724b7d3e0",
            "value": 4
          }
        },
        "4341096f9a98493f8c3bd0a7128f6fa3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_86aa8ea394a8452e8b518be433c32f04",
            "placeholder": "​",
            "style": "IPY_MODEL_3d73b8ec92484125a191f7417f9aa8b8",
            "value": " 4/4 [00:04&lt;00:00,  1.10it/s]"
          }
        },
        "3c2de4e33abd4836a547d983e2bac06a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "69bb9900aaac4fd3af6723368db8c743": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "a1f65d6a914a4540b02621d9fe09f5ca": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "833416f174bb45eaa68526164ff50c10": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "b50996d61d614f2fab9087a724b7d3e0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "86aa8ea394a8452e8b518be433c32f04": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "3d73b8ec92484125a191f7417f9aa8b8": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "11918012e9704ca7b904181c695dc007": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_392c5f899c304b10813acf3db5d7641c",
              "IPY_MODEL_39c9fd99e0304873b0d6e3f8e0848be6",
              "IPY_MODEL_66ecab5e05104aef80362413f83af065"
            ],
            "layout": "IPY_MODEL_37c008154ddc463bb20e905a55a9c954"
          }
        },
        "392c5f899c304b10813acf3db5d7641c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_e13dd4fbc1e148b38ff57272044ef999",
            "placeholder": "​",
            "style": "IPY_MODEL_95e73d576c654c4da40048bee55fa2e9",
            "value": "generation_config.json: 100%"
          }
        },
        "39c9fd99e0304873b0d6e3f8e0848be6": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_e65f8543efd84feab027c496b1ca04a9",
            "max": 184,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_980a714a26004a9689d4dca73192aa23",
            "value": 184
          }
        },
        "66ecab5e05104aef80362413f83af065": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_a21d8a7092b843418ddd31a2b5a57351",
            "placeholder": "​",
            "style": "IPY_MODEL_e1f65a9d0b0f4bf4a5ebb118c4c25049",
            "value": " 184/184 [00:00&lt;00:00, 24.1kB/s]"
          }
        },
        "37c008154ddc463bb20e905a55a9c954": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e13dd4fbc1e148b38ff57272044ef999": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "95e73d576c654c4da40048bee55fa2e9": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "e65f8543efd84feab027c496b1ca04a9": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "980a714a26004a9689d4dca73192aa23": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "a21d8a7092b843418ddd31a2b5a57351": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e1f65a9d0b0f4bf4a5ebb118c4c25049": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "f7cd2a5919244306a52805c91061ca2e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_d784cdde54c841f4b54c416617b09b81",
              "IPY_MODEL_bff746c9773b408aab6b5342dab8dc2e",
              "IPY_MODEL_e8004e7e344b4e6dadb4c4505806ef85"
            ],
            "layout": "IPY_MODEL_252703c6725b4010abd62e16e82999ae"
          }
        },
        "d784cdde54c841f4b54c416617b09b81": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_568226c3e2034a35998ade135eadc86a",
            "placeholder": "​",
            "style": "IPY_MODEL_93c2cfc859d0429cac14f40af378f60a",
            "value": "Loading checkpoint shards: 100%"
          }
        },
        "bff746c9773b408aab6b5342dab8dc2e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_c042ff878b944ba4aa9fc033bf6dd668",
            "max": 4,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_5337b1d0309d41ed91335084decf2a3f",
            "value": 4
          }
        },
        "e8004e7e344b4e6dadb4c4505806ef85": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_6ccf96f07d2f464885398b7df961855c",
            "placeholder": "​",
            "style": "IPY_MODEL_8ccd144cfac946399c8a7104b88783aa",
            "value": " 4/4 [00:05&lt;00:00,  1.08s/it]"
          }
        },
        "252703c6725b4010abd62e16e82999ae": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "568226c3e2034a35998ade135eadc86a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "93c2cfc859d0429cac14f40af378f60a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "c042ff878b944ba4aa9fc033bf6dd668": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "5337b1d0309d41ed91335084decf2a3f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "6ccf96f07d2f464885398b7df961855c": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "8ccd144cfac946399c8a7104b88783aa": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "5eea305d379b4cb9a76e80102ac05fec": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_8d4394de58e9433aa4986aa8ec2f7b52",
              "IPY_MODEL_f9d251449a2e423d83f77fa772490be0",
              "IPY_MODEL_b53e1217703346a0b9dcd41459f576a6"
            ],
            "layout": "IPY_MODEL_7eb5328e3a23430dba9286041a61ba39"
          }
        },
        "8d4394de58e9433aa4986aa8ec2f7b52": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_a525e4158699491bafc9affda5e8ac73",
            "placeholder": "​",
            "style": "IPY_MODEL_add785073d3a47829f4c612a89101b10",
            "value": "README.md: 100%"
          }
        },
        "f9d251449a2e423d83f77fa772490be0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_5f66a15bb6fb45b1baaf1eafdf159282",
            "max": 16038,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_32443ce5ad13479dbd8e7e44efd6ff99",
            "value": 16038
          }
        },
        "b53e1217703346a0b9dcd41459f576a6": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_78df30c6a21446f78658fb1f7c1504de",
            "placeholder": "​",
            "style": "IPY_MODEL_963061783a884f3f865591a6353af48f",
            "value": " 16.0k/16.0k [00:00&lt;00:00, 1.91MB/s]"
          }
        },
        "7eb5328e3a23430dba9286041a61ba39": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "a525e4158699491bafc9affda5e8ac73": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "add785073d3a47829f4c612a89101b10": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "5f66a15bb6fb45b1baaf1eafdf159282": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "32443ce5ad13479dbd8e7e44efd6ff99": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "78df30c6a21446f78658fb1f7c1504de": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "963061783a884f3f865591a6353af48f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "33066273f55a4eb6bdbff3ab3458129e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_3b22f55de0f544759b47b791158dfaf9",
              "IPY_MODEL_8934b933ddf74ba0879c146a0335095a",
              "IPY_MODEL_1ef52e732e494292be94d08decb85d31"
            ],
            "layout": "IPY_MODEL_1de8686035d6448493f0aa6c9a095434"
          }
        },
        "3b22f55de0f544759b47b791158dfaf9": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_5547b747ffe64fb0a77666e4e628351f",
            "placeholder": "​",
            "style": "IPY_MODEL_92db9b13d1204e6786230d93a7d7a5a3",
            "value": "LongBench.py: 100%"
          }
        },
        "8934b933ddf74ba0879c146a0335095a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_b74b5ef8d16e4f56881c2ad333b0dd35",
            "max": 3982,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_66b8ba28178c4aaf92af1cc973dd8c68",
            "value": 3982
          }
        },
        "1ef52e732e494292be94d08decb85d31": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_47bdc6eb2c894051986a45d2ec56bc29",
            "placeholder": "​",
            "style": "IPY_MODEL_5a54311cccce4a3d92df18fed613d98d",
            "value": " 3.98k/3.98k [00:00&lt;00:00, 526kB/s]"
          }
        },
        "1de8686035d6448493f0aa6c9a095434": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "5547b747ffe64fb0a77666e4e628351f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "92db9b13d1204e6786230d93a7d7a5a3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "b74b5ef8d16e4f56881c2ad333b0dd35": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "66b8ba28178c4aaf92af1cc973dd8c68": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "47bdc6eb2c894051986a45d2ec56bc29": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "5a54311cccce4a3d92df18fed613d98d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "c5ad03b120df4f97a903bc2e2a722ef7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_120a5eaefafc4337839677e206762135",
              "IPY_MODEL_e975ee69d0f64eeea545a6aae904a6e3",
              "IPY_MODEL_0317d06127194e18a9064136457bf22d"
            ],
            "layout": "IPY_MODEL_a1f9f544bd504bbab01d40a00792eaeb"
          }
        },
        "120a5eaefafc4337839677e206762135": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_cece8b10c58d4fbca2943ad37060aebd",
            "placeholder": "​",
            "style": "IPY_MODEL_fcb34ef4cf004d5d8ed36ccc73e8bb34",
            "value": "0000.parquet: 100%"
          }
        },
        "e975ee69d0f64eeea545a6aae904a6e3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_bb2114f01cc14e08966885001212c2c9",
            "max": 7539367,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_73a9a3bc472d46d1a0bb7f4e5371565a",
            "value": 7539367
          }
        },
        "0317d06127194e18a9064136457bf22d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_16421c047ad64a8ca0823e9ce14ccd46",
            "placeholder": "​",
            "style": "IPY_MODEL_eaebcf8b23fa4eea949b6c6e584ac2dd",
            "value": " 7.54M/7.54M [00:00&lt;00:00, 95.6MB/s]"
          }
        },
        "a1f9f544bd504bbab01d40a00792eaeb": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "cece8b10c58d4fbca2943ad37060aebd": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "fcb34ef4cf004d5d8ed36ccc73e8bb34": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "bb2114f01cc14e08966885001212c2c9": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "73a9a3bc472d46d1a0bb7f4e5371565a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "16421c047ad64a8ca0823e9ce14ccd46": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "eaebcf8b23fa4eea949b6c6e584ac2dd": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "dbd05f613f354b5da202fab4ea576f62": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_fee3b6dec4a4463a8a08959d96a611c5",
              "IPY_MODEL_d9ad557efd0046a48b094d227f584deb",
              "IPY_MODEL_c7891fd0b6594dd79c74f75f87ca2768"
            ],
            "layout": "IPY_MODEL_0cd479ec1bd7475c95f525a8078ba68c"
          }
        },
        "fee3b6dec4a4463a8a08959d96a611c5": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_4694ed7bade64c009a1116eb3d894762",
            "placeholder": "​",
            "style": "IPY_MODEL_8e9f3f5d44d74682a63f6658f3ccd464",
            "value": "Generating test split: 100%"
          }
        },
        "d9ad557efd0046a48b094d227f584deb": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_827192b696c3431bb1de7c345c335aa2",
            "max": 300,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_13dc472bd34c46328f7dd6ac871c1bd8",
            "value": 300
          }
        },
        "c7891fd0b6594dd79c74f75f87ca2768": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_8ca5b5fdac004d2c824ca20b951d85e3",
            "placeholder": "​",
            "style": "IPY_MODEL_5afc7114e1cf4177bde260bbae50fd4d",
            "value": " 300/300 [00:00&lt;00:00, 2783.44 examples/s]"
          }
        },
        "0cd479ec1bd7475c95f525a8078ba68c": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "4694ed7bade64c009a1116eb3d894762": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "8e9f3f5d44d74682a63f6658f3ccd464": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "827192b696c3431bb1de7c345c335aa2": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "13dc472bd34c46328f7dd6ac871c1bd8": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "8ca5b5fdac004d2c824ca20b951d85e3": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "5afc7114e1cf4177bde260bbae50fd4d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "e161dfa23dac4f7f9bf61a422ed66e09": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_3e2d68fe62854e66bdb5c2c9b2d40bef",
              "IPY_MODEL_b05d852edef24fe2819e51f4f369ca6c",
              "IPY_MODEL_d87d1e6a3b8a405cba3fea657d4871cf"
            ],
            "layout": "IPY_MODEL_83e01c9991904b7da3af4645b3d933cc"
          }
        },
        "3e2d68fe62854e66bdb5c2c9b2d40bef": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_a7728853eaa840d9be94182c3de104c4",
            "placeholder": "​",
            "style": "IPY_MODEL_d64f4454cb664d53885c11145ce25b90",
            "value": "Loading checkpoint shards: 100%"
          }
        },
        "b05d852edef24fe2819e51f4f369ca6c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_affc393e4a884f1d97b8474ba97aa247",
            "max": 4,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_585088b551bf4750bc5eb19543c7edcd",
            "value": 4
          }
        },
        "d87d1e6a3b8a405cba3fea657d4871cf": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_1586b03eb2224bc9b627f8ccb198ff50",
            "placeholder": "​",
            "style": "IPY_MODEL_3a307a556eb14aceb2598510072420c9",
            "value": " 4/4 [00:04&lt;00:00,  1.07it/s]"
          }
        },
        "83e01c9991904b7da3af4645b3d933cc": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "a7728853eaa840d9be94182c3de104c4": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "d64f4454cb664d53885c11145ce25b90": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "affc393e4a884f1d97b8474ba97aa247": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "585088b551bf4750bc5eb19543c7edcd": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "1586b03eb2224bc9b627f8ccb198ff50": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "3a307a556eb14aceb2598510072420c9": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "613c2bd584834df2af3a0f96b3dfea02": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_1458e6d5c4ee4678819a86be61f24967",
              "IPY_MODEL_951e9ec7d4d84b8d8b62b8afb56f1f62",
              "IPY_MODEL_cf2cc703b56347709c688708520432ea"
            ],
            "layout": "IPY_MODEL_cc7f97c0d14f4a8db9ce97262f6e47d4"
          }
        },
        "1458e6d5c4ee4678819a86be61f24967": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_e8ec95f512cd48dfa28d1b6b3677cebe",
            "placeholder": "​",
            "style": "IPY_MODEL_f93a8569a79c44918d6b59c27b2a8121",
            "value": "Loading checkpoint shards: 100%"
          }
        },
        "951e9ec7d4d84b8d8b62b8afb56f1f62": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_f6b6c12d3ea14f519fe07ddfd3d7cb6f",
            "max": 4,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_2879b284175f49c3ab81ed063f9bc57e",
            "value": 4
          }
        },
        "cf2cc703b56347709c688708520432ea": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_33b1a021e40248f79cb264cf596d5d3b",
            "placeholder": "​",
            "style": "IPY_MODEL_7c755b5c36504cc38e8729799bf37ff0",
            "value": " 4/4 [00:04&lt;00:00,  1.04it/s]"
          }
        },
        "cc7f97c0d14f4a8db9ce97262f6e47d4": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e8ec95f512cd48dfa28d1b6b3677cebe": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "f93a8569a79c44918d6b59c27b2a8121": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "f6b6c12d3ea14f519fe07ddfd3d7cb6f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "2879b284175f49c3ab81ed063f9bc57e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "33b1a021e40248f79cb264cf596d5d3b": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "7c755b5c36504cc38e8729799bf37ff0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}