{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "view-in-github"
   },
   "source": [
    "<a href=\"https://colab.research.google.com/github/anonymous/hate_scaling/blob/main/code/4_Walkthrough_Pysentimiento_400M_2Ben.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "7shuKM4oOsu4"
   },
   "source": [
    "# GOAL: The goal of this notebook is to calculate the NSFW scores and Pysentimiento hate speech scores for a sample of the LAION-2B En dataset\n",
    "\n",
    "TLDR:\n",
    "\n",
    "- Step-1: Download the LAION datasets\n",
    "\n",
    "- Step-2: Download the data-assets from \n",
    "[here](https://anonymous.edu/assets/data/papers/hate_detect_laion_400m_2B-en.zip) and unzip them into a local directory ```./hate_detect_laion_400m_2B-en```\n",
    "This should consist of 641 files (detailed below)\n",
    "\n",
    "- Step-3: Download the summary data-frame from [here](https://raw.githubusercontent.com/anonymous/hate_scaling/main/data/nlp_hate/df_summary_filewise_400M_2B.csv) that allows one to contextualize and index the data-assets from Step-2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "7JLfChaQ747e"
   },
   "source": [
    "# 0: Standard imports and mounting the directory"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "M1u7E22Uz4pj",
    "outputId": "8eacf4a0-a01f-4fff-a787-3693e556c5d5"
   },
   "outputs": [],
   "source": [
    "from psutil import virtual_memory\n",
    "# Make sure to run it on a high-memory instance\n",
    "ram_gb = virtual_memory().total / 1e9\n",
    "print('Your runtime has {:.1f} gigabytes of available RAM\\n'.format(ram_gb))\n",
    "\n",
    "import numpy as np\n",
    "import os\n",
    "import pandas as pd\n",
    "from pysentimiento import create_analyzer\n",
    "# from tqdm import tqdm_notebook as tqdm\n",
    "from tqdm.notebook import tqdm\n",
    "import csv\n",
    "#%matplotlib inline\n",
    "\n",
    "from scipy.linalg import block_diag\n",
    "#import seaborn as sns\n",
    "# Numpy aesthetics\n",
    "np.set_printoptions(suppress=True)\n",
    "from collections import Counter\n",
    "#from IPython.display import set_matplotlib_formats\n",
    "#set_matplotlib_formats('retina')\n",
    "\n",
    "import itertools\n",
    "%precision 6\n",
    "#############################################\n",
    "import sys\n",
    "import importlib\n",
    "importlib.reload(sys)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import clip\n",
    "from PIL import Image\n",
    "import requests\n",
    "from io import BytesIO"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "torch.cuda.is_available()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "gpus = tf.config.experimental.list_physical_devices('GPU')\n",
    "for gpu in gpus:\n",
    "    tf.config.experimental.set_memory_growth(gpu, True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_safety_model(clip_model=\"ViT-L/14\"):\n",
    "    \"\"\"load the safety model\"\"\"\n",
    "    import autokeras as ak  # pylint: disable=import-outside-toplevel\n",
    "    from tensorflow.keras.models import load_model  # pylint: disable=import-outside-toplevel\n",
    "\n",
    "    cache_folder = \"./NSFW-cache\"\n",
    "\n",
    "    if clip_model == \"ViT-L/14\":\n",
    "        model_dir = cache_folder + \"/clip_autokeras_binary_nsfw\"\n",
    "        dim = 768\n",
    "    else:\n",
    "        raise ValueError(\"Unknown clip model\")\n",
    "    if not os.path.exists(model_dir):\n",
    "        os.makedirs(cache_folder, exist_ok=True)\n",
    "\n",
    "        from urllib.request import urlretrieve  # pylint: disable=import-outside-toplevel\n",
    "\n",
    "        path_to_zip_file = cache_folder + \"/clip_autokeras_binary_nsfw.zip\"\n",
    "        if clip_model == \"ViT-L/14\":\n",
    "            url_model = \"https://raw.githubusercontent.com/LAION-AI/CLIP-based-NSFW-Detector/main/clip_autokeras_binary_nsfw.zip\"\n",
    "        elif clip_model == \"ViT-B/32\":\n",
    "            url_model = (\n",
    "                \"https://raw.githubusercontent.com/LAION-AI/CLIP-based-NSFW-Detector/main/clip_autokeras_nsfw_b32.zip\"\n",
    "            )\n",
    "        else:\n",
    "            raise ValueError(\"Unknown model {}\".format(clip_model))  # pylint: disable=consider-using-f-string\n",
    "        urlretrieve(url_model, path_to_zip_file)\n",
    "        import zipfile  # pylint: disable=import-outside-toplevel\n",
    "\n",
    "        with zipfile.ZipFile(path_to_zip_file, \"r\") as zip_ref:\n",
    "            zip_ref.extractall(cache_folder)\n",
    "\n",
    "    loaded_model = load_model(model_dir, custom_objects=ak.CUSTOM_OBJECTS, compile=False)\n",
    "    \n",
    "    return loaded_model\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "Fd5ofBKRRN02"
   },
   "source": [
    "# 1: Download the LAION datasets\n",
    "\n",
    "Source: https://laion.ai/laion-400-open-dataset/\n",
    "\n",
    "\n",
    "*We produced the dataset in several formats to address the various use cases*: \n",
    "- A 50GB url+caption metadata dataset in parquet files. This can be used to compute statistics and redownload part of the dataset\n",
    "- A 10TB webdataset with 256×256 images, captions and metadata. This is a full version of the dataset, that can be used directly for training\n",
    "- A 1TB set of the 400M text and image clip embeddings, useful to rebuild new knn indices\n",
    "- Two 4GB knn indices allowing to easily search in the dataset + two higher quality 16GB knn indices (running in the webdemo)\n",
    "URL and caption metadata dataset.\n",
    "\n",
    "We provide 32 parquet files of size around 1GB (total 50GB) with the image URLs, the associated texts and additional metadata in the following format:\n",
    "\n",
    "SAMPLE_ID | URL | TEXT | LICENSE | NSFW | similarity | WIDTH | HEIGHT\n",
    "\n",
    "where\n",
    "\n",
    "- SAMPLE_ID:   A unique identifier\n",
    "LICENSE:   If a Creative Commons License could be extracted from the image data, we name it here like e.g. “creativecommons.org/licenses/by-nc-sa/3.0/” – otherwise you’ll find it here a “?”\n",
    "- NSFW: CLIP had been used to estimate if the image has NSFW content. The estimation has been pretty conservative, reducing the number of false negatives at the cost of more false positives. Possible values are “UNLIKELY”, “UNSURE” and “NSFW”\n",
    "- similarity: Value of the cosine similarity between the text and image embedding\n",
    "- WIDTH and HEIGHT: image size as the image was embedded. Originals that were larger than 4K size were resized to 4K\n",
    "\n",
    "*This metadata dataset is best used to redownload the whole dataset or a subset of it. The img2dataset tool can be used to efficiently download such subsets*.\n",
    "\n",
    "Source of the parquet files:\n",
    "https://the-eye.eu/public/AI/cah/laion400m-met-release/laion400m-meta/\n",
    "\n",
    "\n",
    "```\n",
    "!wget http://3080.rom1504.fr/cah/cah_dataframe_unique/part-00000-4d76554c-2d66-4112-9420-0bb9d725a79d-c000.snappy.parquet\n",
    "!wget https://the-eye.eu/public/AI/cah/laion400m-met-release/laion400m-meta/part-00000-5b54c5d5-bbcf-484d-a2ce-0d6f73df1a36-c000.snappy.parquet\n",
    "!wget -m -np -c -U \"eye02\" -w 2 -R \"index.html*\" \"https://the-eye.eu/public/AI/cah/laion400m-met-release/laion400m-meta/\"\n",
    "\n",
    "# LAION-2B-En\n",
    "!git lfs install\n",
    "!git clone https://huggingface.co/datasets/laion/laion2B-en\n",
    "```\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "MyUreWG4H5In"
   },
   "source": [
    "After downloading the datasets, your dir-tree should look like:\n",
    "```\n",
    "the-eye.eu\n",
    "├── robots.txt\n",
    "└── public\n",
    "    └── AI\n",
    "        └── cah\n",
    "            └── laion400m-met-release\n",
    "                ├── laion400m-meta\n",
    "                │   ├── part-00000-5b54c5d5-bbcf-484d-a2ce-0d6f73df1a36-c000.snappy.parquet\n",
    "                │   ├── part-00001-5b54c5d5-bbcf-484d-a2ce-0d6f73df1a36-c000.snappy.parquet\n",
    "                │   ├──  ...\n",
    "\n",
    "```\n",
    "```\n",
    "LAION-2Ben\n",
    "├── laion2B-en\n",
    "│   ├── .git\n",
    "│   ├── .gitattributes\n",
    "│   ├── README.md\n",
    "│   ├── part-00026-5114fd87-297e-42b0-9d11-50f1df323dfa-c000.snappy.parquet\n",
    "│   ├── part-00056-5114fd87-297e-42b0-9d11-50f1df323dfa-c000.snappy.parquet\n",
    "│   ├──  ...\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "5Di9X0JW2sW3"
   },
   "source": [
    "# 2: Download the summary dataframe\n",
    "\n",
    "The two datasets combined have 160 parquet files.\n",
    "\n",
    "- LAION-400M is split into 32 parquet files\n",
    "- LAION-2B-En has 128 parquet files\n",
    "\n",
    "Now, let us download the summary dataframe that allows us to navigate the assets from [here](https://raw.githubusercontent.com/anonymous/hate_scaling/main/data/nlp_hate/df_summary_filewise_400M_2B.csv)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 424
    },
    "id": "gpyZIfqTSiK0",
    "outputId": "ad626409-aa5f-4b05-88a0-3da507174e5e"
   },
   "outputs": [],
   "source": [
    "url_summary='https://raw.githubusercontent.com/anonymous/hate_scaling/main/data/nlp_hate/df_summary_filewise_400M_2B.csv'\n",
    "df_parquet=pd.read_csv(url_summary)\n",
    "df_parquet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "kAelCG5HHATe",
    "outputId": "a3c8f561-8464-4ffc-8edd-5e45f4df3064"
   },
   "outputs": [],
   "source": [
    "parquet_list=df_parquet.file_loc.values\n",
    "df_parquet.groupby('dataset')['file_size_GB'].describe(), df_parquet.groupby('dataset')['file_size_GB'].sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "4Lpht7vY0EWV"
   },
   "outputs": [],
   "source": [
    "#parquet_list_400m=parquet_list[0:32]\n",
    "parquet_list_2b=parquet_list[32:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "parquet_list_2b[2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "jSXfM3bh3c3t"
   },
   "outputs": [],
   "source": [
    "!pip install --quiet pytictoc\n",
    "from pytictoc import TicToc\n",
    "t = TicToc()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "WvueEwdG0kW-"
   },
   "source": [
    "Now, let us look at how the _raw_ parquet files look like:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 254
    },
    "id": "0MiJZQjN0lVs",
    "outputId": "a5f10271-525e-4aa8-ee72-a1702d51431b"
   },
   "outputs": [],
   "source": [
    "t.tic()\n",
    "df_2B = pd.read_parquet(parquet_list_2b[0])\n",
    "print(df_2B.shape)\n",
    "t.toc()\n",
    "df_2B.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_2B['TEXT'] = df_2B['TEXT'].fillna('')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Calculate NSFW and hate speech scores"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "safety_model = load_safety_model()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "device = \"cuda\"\n",
    "model, preprocess = clip.load(\"ViT-L/14\", device=device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def normalized(a, axis=-1, order=2):\n",
    "    import numpy as np  # pylint: disable=import-outside-toplevel\n",
    "    l2 = np.atleast_1d(np.linalg.norm(a, order, axis))\n",
    "    l2[l2 == 0] = 1\n",
    "    return a / np.expand_dims(l2, axis)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "analyzer = create_analyzer(task=\"hate_speech\", lang=\"en\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_nsfw_hate_check(row):\n",
    "    image_url= row['URL']\n",
    "    alt_text = row['TEXT']\n",
    "    id_sample = row['SAMPLE_ID']\n",
    "    nsfw_value= np.nan\n",
    "    if id_sample not in existing_ids:\n",
    "        try:\n",
    "            response = requests.get(image_url, timeout=5) \n",
    "        except:\n",
    "            print('response error')\n",
    "        try:\n",
    "            img = preprocess(Image.open(BytesIO(response.content))).unsqueeze(0).to(device)\n",
    "            with torch.no_grad():\n",
    "                image_features = model.encode_image(img)\n",
    "                emb = np.asarray(normalized(image_features.detach().cpu()))\n",
    "                nsfw_value = safety_model.predict(emb)\n",
    "        except:\n",
    "            print('image error')\n",
    "        hate = analyzer.predict(alt_text).probas\n",
    "        csvwriter.writerow([id_sample, hate, nsfw_value])\n",
    "        csvfile.flush()\n",
    "    return(nsfw_value)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('nsfw/laion0.csv', 'w', newline='') as csvfile:\n",
    "    csvwriter = csv.writer(csvfile, delimiter=',', quoting=csv.QUOTE_MINIMAL)\n",
    "    df_2B['NSFW_VALUE'] = df_2B.apply(lambda row: get_nsfw_hate_check(row), axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_2B_2.to_parquet('./nsfw/laion0.parquet')"
   ]
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "gpuClass": "premium",
   "gpuType": "T4",
   "include_colab_link": true,
   "machine_shape": "hm",
   "provenance": []
  },
  "gpuClass": "premium",
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.0"
  },
  "widgets": {
   "application/vnd.jupyter.widget-state+json": {
    "1935dcca1939454ab027ce7adf492b20": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "34b439cd8b894de09de66ec09a072547": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_1935dcca1939454ab027ce7adf492b20",
      "placeholder": "​",
      "style": "IPY_MODEL_68392799cf964122bd3c9d2344ceea68",
      "value": "Map:  99%"
     }
    },
    "39a99ddce4a04ae2aab3b1216a4b2b7b": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "ProgressStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "ProgressStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "bar_color": null,
      "description_width": ""
     }
    },
    "39b1ced9640a4a5b85801d472193bcf4": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": "hidden",
      "width": null
     }
    },
    "4e4dbfcee4c9485bae06ce518a5d4e2a": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "FloatProgressModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "FloatProgressModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "ProgressView",
      "bar_style": "",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_50b8b40a6a1b46f6aefa10bbf7febb74",
      "max": 100000,
      "min": 0,
      "orientation": "horizontal",
      "style": "IPY_MODEL_39a99ddce4a04ae2aab3b1216a4b2b7b",
      "value": 100000
     }
    },
    "50b8b40a6a1b46f6aefa10bbf7febb74": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "68392799cf964122bd3c9d2344ceea68": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "7f14e30db13941bdbc6f4ceb2e4f9ae9": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "84b705285e954374a84787e4ac3a6af7": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_ade7e024f9764c9693e457ff04acfe2d",
      "placeholder": "​",
      "style": "IPY_MODEL_7f14e30db13941bdbc6f4ceb2e4f9ae9",
      "value": " 99488/100000 [00:19&lt;00:00, 5588.43 examples/s]"
     }
    },
    "9eb6d2b5a8e04059b7fb60e0ae0ce7b3": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HBoxModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HBoxModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HBoxView",
      "box_style": "",
      "children": [
       "IPY_MODEL_34b439cd8b894de09de66ec09a072547",
       "IPY_MODEL_4e4dbfcee4c9485bae06ce518a5d4e2a",
       "IPY_MODEL_84b705285e954374a84787e4ac3a6af7"
      ],
      "layout": "IPY_MODEL_39b1ced9640a4a5b85801d472193bcf4"
     }
    },
    "ade7e024f9764c9693e457ff04acfe2d": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    }
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
