{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "35df4a98",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "if os.path.isdir(\"../notebooks/\"):\n",
    "    os.chdir(\"../badseeds/\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "9422d1b9",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "\n",
    "import preprocess"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "f9a1b33c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# path to config json file containing paths to datasets. change if necessary\n",
    "CONFIG_PATH = \"../config.json\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "5ba20439",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(CONFIG_PATH, \"r\") as f:\n",
    "    config = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "a4a3c6e9",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|███████████████████████████████| 192577/192577 [00:00<00:00, 907520.12it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Preliminary preprocessing complete; continuing with spacy\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|███████████████████████████████████████| 8888/8888 [03:21<00:00, 44.18it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Preliminary preprocessing complete; continuing with spacy\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|█████████████████████████████████████| 28472/28472 [32:07<00:00, 14.77it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Removing all reviews with less than 20 chars and creating datastructure\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|█████████████████████████████| 3565378/3565378 [00:16<00:00, 209772.85it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Remove all books with fewer than 500 reviews\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████████████████████████| 328145/328145 [00:00<00:00, 3320946.93it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sampling 500 random reviews per book\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|███████████████████████████████████████| 389/389 [00:00<00:00, 5496.38it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Preprocessing all reviews now\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 14%|████                          | 484862/3565378 [00:03<00:19, 160357.58it/s]\n",
      "  0%|                                                 | 0/29364 [00:00<?, ?it/s]\u001b[A\n",
      "  0%|                                       | 1/29364 [00:01<9:01:24,  1.11s/it]\u001b[A\n",
      "  1%|▎                                     | 257/29364 [00:02<03:12, 151.15it/s]\u001b[A\n",
      "  2%|▋                                     | 513/29364 [00:02<02:17, 210.48it/s]\u001b[A\n",
      "  3%|▉                                     | 769/29364 [00:03<01:43, 276.28it/s]\u001b[A\n",
      "  3%|█▎                                   | 1025/29364 [00:04<01:33, 302.39it/s]\u001b[A\n",
      "  4%|█▌                                   | 1281/29364 [00:04<01:26, 323.86it/s]\u001b[A\n",
      "  5%|█▉                                   | 1537/29364 [00:05<01:28, 313.50it/s]\u001b[A\n",
      "  6%|██▏                                  | 1779/29364 [00:05<01:03, 431.71it/s]\u001b[A\n",
      "  6%|██▎                                  | 1879/29364 [00:06<01:19, 343.86it/s]\u001b[A\n",
      "  7%|██▌                                  | 2049/29364 [00:06<01:15, 360.98it/s]\u001b[A\n",
      "  8%|██▉                                  | 2305/29364 [00:07<01:12, 371.24it/s]\u001b[A\n",
      "  9%|███▏                                 | 2561/29364 [00:08<01:21, 328.97it/s]\u001b[A\n",
      " 10%|███▌                                 | 2791/29364 [00:08<00:59, 447.00it/s]\u001b[A\n",
      " 10%|███▋                                 | 2894/29364 [00:09<01:22, 319.23it/s]\u001b[A\n",
      " 10%|███▊                                 | 3073/29364 [00:09<01:14, 352.56it/s]\u001b[A\n",
      " 11%|████▏                                | 3329/29364 [00:10<01:17, 334.70it/s]\u001b[A\n",
      " 12%|████▌                                | 3582/29364 [00:10<00:54, 477.38it/s]\u001b[A\n",
      " 13%|████▋                                | 3698/29364 [00:11<01:03, 406.96it/s]\u001b[A\n",
      " 13%|████▊                                | 3841/29364 [00:11<01:08, 371.84it/s]\u001b[A\n",
      " 14%|█████▏                               | 4097/29364 [00:12<01:04, 390.57it/s]\u001b[A\n",
      " 15%|█████▍                               | 4353/29364 [00:13<01:13, 338.54it/s]\u001b[A\n",
      " 16%|█████▊                               | 4569/29364 [00:13<00:54, 452.72it/s]\u001b[A\n",
      " 16%|█████▉                               | 4670/29364 [00:13<01:09, 353.75it/s]\u001b[A\n",
      " 17%|██████▏                              | 4865/29364 [00:14<01:02, 389.21it/s]\u001b[A\n",
      " 17%|██████▍                              | 5121/29364 [00:15<01:12, 336.50it/s]\u001b[A\n",
      " 18%|██████▊                              | 5377/29364 [00:15<01:09, 344.23it/s]\u001b[A\n",
      " 19%|███████                              | 5633/29364 [00:16<01:07, 353.10it/s]\u001b[A\n",
      " 20%|███████▍                             | 5889/29364 [00:17<01:05, 358.61it/s]\u001b[A\n",
      " 21%|███████▋                             | 6145/29364 [00:17<01:03, 366.75it/s]\u001b[A\n",
      " 22%|████████                             | 6401/29364 [00:18<01:08, 337.67it/s]\u001b[A\n",
      " 14%|████                          | 484862/3565378 [00:22<00:19, 160357.58it/s]\u001b[A\n",
      " 24%|████████▋                            | 6913/29364 [00:20<01:13, 303.65it/s]\u001b[A\n",
      " 24%|█████████                            | 7152/29364 [00:20<00:54, 406.10it/s]\u001b[A\n",
      " 25%|█████████▏                           | 7248/29364 [00:21<01:05, 337.03it/s]\u001b[A\n",
      " 25%|█████████▎                           | 7425/29364 [00:21<01:06, 329.84it/s]\u001b[A\n",
      " 26%|█████████▋                           | 7681/29364 [00:22<01:06, 326.48it/s]\u001b[A\n",
      " 27%|██████████                           | 7937/29364 [00:23<01:00, 355.06it/s]\u001b[A\n",
      " 28%|██████████▎                          | 8193/29364 [00:23<00:57, 371.35it/s]\u001b[A\n",
      " 29%|██████████▋                          | 8449/29364 [00:24<00:59, 351.93it/s]\u001b[A\n",
      " 30%|██████████▉                          | 8705/29364 [00:25<00:54, 375.85it/s]\u001b[A\n",
      " 31%|███████████▎                         | 8961/29364 [00:25<00:52, 389.53it/s]\u001b[A\n",
      " 31%|███████████▌                         | 9217/29364 [00:26<00:52, 380.67it/s]\u001b[A\n",
      " 32%|███████████▉                         | 9473/29364 [00:26<00:46, 429.31it/s]\u001b[A\n",
      " 33%|████████████▎                        | 9729/29364 [00:27<00:48, 405.32it/s]\u001b[A\n",
      " 34%|████████████▌                        | 9985/29364 [00:28<00:50, 386.38it/s]\u001b[A\n",
      " 35%|████████████▌                       | 10241/29364 [00:29<00:49, 386.01it/s]\u001b[A\n",
      " 36%|████████████▊                       | 10497/29364 [00:30<00:54, 348.15it/s]\u001b[A\n",
      " 37%|█████████████▏                      | 10733/29364 [00:30<00:40, 459.22it/s]\u001b[A\n",
      " 37%|█████████████▎                      | 10830/29364 [00:30<00:46, 395.17it/s]\u001b[A\n",
      " 37%|█████████████▍                      | 11009/29364 [00:31<00:47, 389.44it/s]\u001b[A\n",
      " 38%|█████████████▊                      | 11265/29364 [00:31<00:44, 408.09it/s]\u001b[A\n",
      " 39%|██████████████                      | 11521/29364 [00:32<00:44, 401.24it/s]\u001b[A\n",
      " 40%|██████████████▍                     | 11777/29364 [00:33<00:55, 319.18it/s]\u001b[A\n",
      " 41%|██████████████▋                     | 12004/29364 [00:33<00:40, 426.22it/s]\u001b[A\n",
      " 41%|██████████████▊                     | 12103/29364 [00:34<00:54, 315.26it/s]\u001b[A\n",
      " 42%|███████████████                     | 12289/29364 [00:34<00:55, 308.70it/s]\u001b[A\n",
      " 43%|███████████████▍                    | 12545/29364 [00:35<00:48, 348.22it/s]\u001b[A\n",
      " 44%|███████████████▋                    | 12801/29364 [00:36<00:43, 380.53it/s]\u001b[A\n",
      " 44%|████████████████                    | 13057/29364 [00:36<00:41, 391.38it/s]\u001b[A\n",
      " 45%|████████████████▎                   | 13313/29364 [00:37<00:39, 411.27it/s]\u001b[A\n",
      " 46%|████████████████▋                   | 13569/29364 [00:37<00:41, 382.36it/s]\u001b[A\n",
      " 47%|████████████████▉                   | 13825/29364 [00:38<00:37, 415.00it/s]\u001b[A\n",
      " 48%|█████████████████▎                  | 14081/29364 [00:39<00:36, 416.01it/s]\u001b[A\n",
      " 49%|█████████████████▌                  | 14337/29364 [00:39<00:35, 422.65it/s]\u001b[A\n",
      " 50%|█████████████████▉                  | 14593/29364 [00:40<00:34, 422.98it/s]\u001b[A\n",
      " 51%|██████████████████▏                 | 14849/29364 [00:41<00:40, 359.54it/s]\u001b[A\n",
      " 51%|██████████████████▌                 | 15105/29364 [00:41<00:37, 383.98it/s]\u001b[A\n",
      " 52%|██████████████████▊                 | 15361/29364 [00:42<00:38, 368.21it/s]\u001b[A\n",
      " 53%|███████████████████▏                | 15617/29364 [00:43<00:33, 410.69it/s]\u001b[A\n",
      " 54%|███████████████████▍                | 15873/29364 [00:43<00:30, 444.32it/s]\u001b[A\n",
      " 55%|███████████████████▊                | 16129/29364 [00:43<00:27, 489.07it/s]\u001b[A\n",
      " 56%|████████████████████                | 16385/29364 [00:44<00:27, 476.18it/s]\u001b[A\n",
      " 57%|████████████████████▍               | 16641/29364 [00:45<00:28, 446.94it/s]\u001b[A\n",
      " 58%|████████████████████▋               | 16897/29364 [00:45<00:28, 444.16it/s]\u001b[A\n",
      " 58%|█████████████████████               | 17153/29364 [00:46<00:27, 437.82it/s]\u001b[A\n",
      " 59%|█████████████████████▎              | 17409/29364 [00:46<00:24, 481.14it/s]\u001b[A\n",
      " 60%|█████████████████████▋              | 17665/29364 [00:47<00:23, 505.52it/s]\u001b[A\n",
      " 61%|█████████████████████▉              | 17921/29364 [00:47<00:25, 447.03it/s]\u001b[A\n",
      " 62%|██████████████████████▎             | 18177/29364 [00:48<00:24, 461.87it/s]\u001b[A\n",
      " 63%|██████████████████████▌             | 18433/29364 [00:48<00:23, 463.55it/s]\u001b[A\n",
      " 64%|██████████████████████▉             | 18689/29364 [00:49<00:23, 450.53it/s]\u001b[A\n",
      " 65%|███████████████████████▏            | 18945/29364 [00:50<00:24, 431.36it/s]\u001b[A\n",
      " 65%|███████████████████████▌            | 19201/29364 [00:50<00:24, 409.74it/s]\u001b[A\n",
      " 66%|███████████████████████▊            | 19457/29364 [00:51<00:23, 425.81it/s]\u001b[A\n",
      " 67%|████████████████████████▏           | 19713/29364 [00:51<00:21, 440.30it/s]\u001b[A\n",
      " 68%|████████████████████████▍           | 19969/29364 [00:52<00:21, 437.55it/s]\u001b[A\n",
      " 69%|████████████████████████▊           | 20225/29364 [00:53<00:21, 434.25it/s]\u001b[A\n",
      " 70%|█████████████████████████           | 20481/29364 [00:54<00:25, 346.50it/s]\u001b[A\n",
      " 70%|█████████████████████████▍          | 20699/29364 [00:54<00:19, 446.79it/s]\u001b[A\n",
      " 71%|█████████████████████████▍          | 20791/29364 [00:54<00:22, 388.05it/s]\u001b[A\n",
      " 71%|█████████████████████████▋          | 20993/29364 [00:55<00:25, 332.38it/s]\u001b[A\n",
      " 72%|██████████████████████████          | 21248/29364 [00:55<00:16, 479.93it/s]\u001b[A\n",
      " 73%|██████████████████████████▏         | 21365/29364 [00:56<00:19, 408.84it/s]\u001b[A\n",
      " 73%|██████████████████████████▎         | 21505/29364 [00:56<00:24, 316.66it/s]\u001b[A\n",
      " 74%|██████████████████████████▋         | 21761/29364 [00:57<00:21, 348.54it/s]\u001b[A\n",
      " 75%|██████████████████████████▉         | 22017/29364 [00:58<00:19, 377.84it/s]\u001b[A\n",
      " 76%|███████████████████████████▎        | 22273/29364 [00:58<00:17, 417.09it/s]\u001b[A\n",
      " 77%|███████████████████████████▌        | 22529/29364 [00:59<00:17, 383.72it/s]\u001b[A\n",
      " 78%|███████████████████████████▉        | 22785/29364 [00:59<00:16, 393.46it/s]\u001b[A\n",
      " 78%|████████████████████████████▏       | 23041/29364 [01:00<00:17, 367.33it/s]\u001b[A\n",
      " 79%|████████████████████████████▌       | 23297/29364 [01:01<00:18, 326.92it/s]\u001b[A\n",
      " 80%|████████████████████████████▊       | 23524/29364 [01:01<00:13, 430.36it/s]\u001b[A\n",
      " 80%|████████████████████████████▉       | 23619/29364 [01:02<00:15, 368.22it/s]\u001b[A\n",
      " 81%|█████████████████████████████▏      | 23809/29364 [01:02<00:15, 363.85it/s]\u001b[A\n",
      " 82%|█████████████████████████████▌      | 24065/29364 [01:03<00:14, 366.02it/s]\u001b[A\n",
      " 83%|█████████████████████████████▊      | 24321/29364 [01:04<00:12, 397.12it/s]\u001b[A\n",
      " 84%|██████████████████████████████▏     | 24577/29364 [01:04<00:12, 393.73it/s]\u001b[A\n",
      " 85%|██████████████████████████████▍     | 24833/29364 [01:05<00:11, 381.37it/s]\u001b[A\n",
      " 85%|██████████████████████████████▊     | 25089/29364 [01:06<00:10, 398.66it/s]\u001b[A\n",
      " 86%|███████████████████████████████     | 25345/29364 [01:06<00:09, 408.84it/s]\u001b[A\n",
      " 87%|███████████████████████████████▍    | 25601/29364 [01:07<00:08, 450.58it/s]\u001b[A\n",
      " 88%|███████████████████████████████▋    | 25857/29364 [01:08<00:10, 320.68it/s]\u001b[A\n",
      " 89%|███████████████████████████████▉    | 26073/29364 [01:08<00:07, 415.57it/s]\u001b[A\n",
      " 89%|████████████████████████████████    | 26166/29364 [01:09<00:10, 294.54it/s]\u001b[A\n",
      " 90%|████████████████████████████████▎   | 26369/29364 [01:09<00:09, 309.16it/s]\u001b[A\n",
      " 91%|████████████████████████████████▋   | 26625/29364 [01:10<00:08, 340.36it/s]\u001b[A\n",
      " 92%|████████████████████████████████▉   | 26881/29364 [01:11<00:06, 383.68it/s]\u001b[A\n",
      " 92%|█████████████████████████████████▎  | 27137/29364 [01:11<00:05, 400.84it/s]\u001b[A\n",
      " 93%|█████████████████████████████████▌  | 27393/29364 [01:12<00:04, 432.66it/s]\u001b[A\n",
      " 94%|█████████████████████████████████▉  | 27649/29364 [01:12<00:04, 410.39it/s]\u001b[A\n",
      " 95%|██████████████████████████████████▏ | 27905/29364 [01:13<00:04, 362.83it/s]\u001b[A\n",
      " 96%|██████████████████████████████████▌ | 28161/29364 [01:14<00:03, 362.34it/s]\u001b[A\n",
      " 97%|██████████████████████████████████▊ | 28417/29364 [01:14<00:02, 391.24it/s]\u001b[A\n",
      " 98%|███████████████████████████████████▏| 28673/29364 [01:15<00:01, 386.01it/s]\u001b[A\n",
      " 99%|███████████████████████████████████▍| 28929/29364 [01:16<00:01, 410.39it/s]\u001b[A\n",
      "100%|████████████████████████████████████| 29364/29364 [01:16<00:00, 382.83it/s]\u001b[A\n",
      " 14%|████▊                            | 515337/3565378 [01:21<52:53, 961.03it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "./data/preprocessed_data/romance/goodreads_reviews_romance_500000.bin\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 28%|████████▎                     | 992596/3565378 [01:24<00:15, 163691.92it/s]\n",
      "  0%|                                                 | 0/28713 [00:00<?, ?it/s]\u001b[A\n",
      "  0%|                                       | 1/28713 [00:00<4:00:56,  1.99it/s]\u001b[A\n",
      "  1%|▎                                     | 257/28713 [00:01<01:51, 255.82it/s]\u001b[A\n",
      "  2%|▋                                     | 513/28713 [00:01<01:14, 377.62it/s]\u001b[A\n",
      "  3%|█                                     | 769/28713 [00:02<01:02, 443.74it/s]\u001b[A\n",
      "  4%|█▎                                   | 1025/28713 [00:02<01:07, 407.43it/s]\u001b[A\n",
      "  4%|█▋                                   | 1281/28713 [00:03<01:03, 434.22it/s]\u001b[A\n",
      "  5%|█▉                                   | 1537/28713 [00:03<00:59, 454.98it/s]\u001b[A\n",
      "  6%|██▎                                  | 1793/28713 [00:04<00:56, 475.35it/s]\u001b[A\n",
      "  7%|██▋                                  | 2049/28713 [00:04<00:54, 486.59it/s]\u001b[A\n",
      "  8%|██▉                                  | 2305/28713 [00:05<00:55, 478.34it/s]\u001b[A\n",
      "  9%|███▎                                 | 2561/28713 [00:05<00:51, 503.05it/s]\u001b[A\n",
      " 10%|███▋                                 | 2817/28713 [00:06<01:00, 427.00it/s]\u001b[A\n",
      " 11%|███▉                                 | 3073/28713 [00:07<00:58, 435.65it/s]\u001b[A\n",
      " 12%|████▎                                | 3329/28713 [00:07<00:56, 451.54it/s]\u001b[A\n",
      " 12%|████▌                                | 3585/28713 [00:08<01:04, 388.42it/s]\u001b[A\n",
      " 13%|████▉                                | 3828/28713 [00:08<00:48, 513.55it/s]\u001b[A\n",
      " 14%|█████                                | 3930/28713 [00:09<01:01, 406.25it/s]\u001b[A\n",
      " 14%|█████▎                               | 4097/28713 [00:09<01:04, 380.28it/s]\u001b[A\n",
      " 15%|█████▌                               | 4353/28713 [00:10<01:04, 378.22it/s]\u001b[A\n",
      " 16%|█████▉                               | 4609/28713 [00:11<01:06, 360.13it/s]\u001b[A\n",
      " 17%|██████▎                              | 4865/28713 [00:11<01:08, 350.25it/s]\u001b[A\n",
      " 18%|██████▌                              | 5116/28713 [00:12<00:49, 479.83it/s]\u001b[A\n",
      " 18%|██████▋                              | 5222/28713 [00:12<01:08, 341.95it/s]\u001b[A\n",
      " 19%|██████▉                              | 5377/28713 [00:13<01:11, 327.58it/s]\u001b[A\n",
      " 20%|███████▎                             | 5633/28713 [00:14<01:08, 337.53it/s]\u001b[A\n",
      " 21%|███████▌                             | 5889/28713 [00:14<00:59, 381.45it/s]\u001b[A\n",
      " 21%|███████▉                             | 6145/28713 [00:15<00:57, 390.91it/s]\u001b[A\n",
      " 22%|████████▏                            | 6401/28713 [00:16<01:18, 285.94it/s]\u001b[A\n",
      " 23%|████████▍                            | 6586/28713 [00:16<01:01, 362.11it/s]\u001b[A\n",
      " 23%|████████▌                            | 6670/28713 [00:18<01:39, 220.69it/s]\u001b[A\n",
      " 28%|████████▎                     | 992596/3565378 [01:42<00:15, 163691.92it/s]\u001b[A\n",
      " 24%|████████▉                            | 6969/28713 [00:18<01:19, 274.33it/s]\u001b[A\n",
      " 25%|█████████▏                           | 7169/28713 [00:19<01:09, 311.63it/s]\u001b[A\n",
      " 26%|█████████▌                           | 7425/28713 [00:19<00:59, 358.41it/s]\u001b[A\n",
      " 27%|█████████▉                           | 7681/28713 [00:20<00:55, 379.83it/s]\u001b[A\n",
      " 28%|██████████▏                          | 7937/28713 [00:20<00:52, 393.01it/s]\u001b[A\n",
      " 29%|██████████▌                          | 8193/28713 [00:21<00:56, 364.45it/s]\u001b[A\n",
      " 29%|██████████▉                          | 8449/28713 [00:22<01:01, 330.44it/s]\u001b[A\n",
      " 30%|███████████▏                         | 8672/28713 [00:22<00:46, 435.44it/s]\u001b[A\n",
      " 31%|███████████▎                         | 8767/28713 [00:23<00:53, 372.69it/s]\u001b[A\n",
      " 31%|███████████▌                         | 8961/28713 [00:23<00:56, 348.10it/s]\u001b[A\n",
      " 32%|███████████▉                         | 9217/28713 [00:24<00:49, 390.28it/s]\u001b[A\n",
      " 33%|████████████▏                        | 9473/28713 [00:25<00:52, 364.83it/s]\u001b[A\n",
      " 34%|████████████▌                        | 9716/28713 [00:25<00:38, 498.67it/s]\u001b[A\n",
      " 34%|████████████▋                        | 9824/28713 [00:25<00:47, 396.35it/s]\u001b[A\n",
      " 35%|████████████▊                        | 9985/28713 [00:26<00:49, 376.14it/s]\u001b[A\n",
      " 36%|████████████▊                       | 10241/28713 [00:26<00:46, 400.32it/s]\u001b[A\n",
      " 37%|█████████████▏                      | 10497/28713 [00:27<00:46, 391.68it/s]\u001b[A\n",
      " 37%|█████████████▍                      | 10753/28713 [00:28<00:46, 382.86it/s]\u001b[A\n",
      " 38%|█████████████▊                      | 11009/28713 [00:28<00:45, 389.10it/s]\u001b[A\n",
      " 39%|██████████████                      | 11265/28713 [00:29<00:39, 441.57it/s]\u001b[A\n",
      " 40%|██████████████▍                     | 11521/28713 [00:29<00:41, 419.17it/s]\u001b[A\n",
      " 41%|██████████████▊                     | 11777/28713 [00:30<00:40, 416.46it/s]\u001b[A\n",
      " 42%|███████████████                     | 12033/28713 [00:31<00:41, 401.33it/s]\u001b[A\n",
      " 43%|███████████████▍                    | 12289/28713 [00:31<00:38, 429.59it/s]\u001b[A\n",
      " 44%|███████████████▋                    | 12545/28713 [00:32<00:37, 436.53it/s]\u001b[A\n",
      " 45%|████████████████                    | 12801/28713 [00:32<00:36, 431.49it/s]\u001b[A\n",
      " 45%|████████████████▎                   | 13057/28713 [00:33<00:38, 406.29it/s]\u001b[A\n",
      " 46%|████████████████▋                   | 13313/28713 [00:34<00:36, 418.97it/s]\u001b[A\n",
      " 47%|█████████████████                   | 13569/28713 [00:34<00:33, 448.36it/s]\u001b[A\n",
      " 48%|█████████████████▎                  | 13825/28713 [00:35<00:32, 459.16it/s]\u001b[A\n",
      " 49%|█████████████████▋                  | 14081/28713 [00:35<00:33, 443.08it/s]\u001b[A\n",
      " 50%|█████████████████▉                  | 14337/28713 [00:36<00:35, 405.70it/s]\u001b[A\n",
      " 51%|██████████████████▎                 | 14593/28713 [00:37<00:36, 391.59it/s]\u001b[A\n",
      " 52%|██████████████████▌                 | 14849/28713 [00:37<00:34, 403.39it/s]\u001b[A\n",
      " 53%|██████████████████▉                 | 15105/28713 [00:38<00:34, 390.23it/s]\u001b[A\n",
      " 53%|███████████████████▎                | 15361/28713 [00:39<00:32, 410.61it/s]\u001b[A\n",
      " 54%|███████████████████▌                | 15617/28713 [00:39<00:34, 382.00it/s]\u001b[A\n",
      " 55%|███████████████████▉                | 15858/28713 [00:40<00:25, 503.87it/s]\u001b[A\n",
      " 56%|████████████████████                | 15958/28713 [00:40<00:32, 388.38it/s]\u001b[A\n",
      " 56%|████████████████████▏               | 16129/28713 [00:41<00:35, 356.32it/s]\u001b[A\n",
      " 57%|████████████████████▌               | 16385/28713 [00:41<00:30, 410.45it/s]\u001b[A\n",
      " 58%|████████████████████▊               | 16641/28713 [00:42<00:28, 429.66it/s]\u001b[A\n",
      " 59%|█████████████████████▏              | 16897/28713 [00:42<00:27, 422.78it/s]\u001b[A\n",
      " 60%|█████████████████████▌              | 17153/28713 [00:43<00:25, 450.59it/s]\u001b[A\n",
      " 61%|█████████████████████▊              | 17409/28713 [00:44<00:26, 430.06it/s]\u001b[A\n",
      " 62%|██████████████████████▏             | 17665/28713 [00:44<00:25, 425.24it/s]\u001b[A\n",
      " 62%|██████████████████████▍             | 17921/28713 [00:45<00:24, 444.98it/s]\u001b[A\n",
      " 63%|██████████████████████▊             | 18177/28713 [00:45<00:22, 462.17it/s]\u001b[A\n",
      " 64%|███████████████████████             | 18433/28713 [00:46<00:24, 423.69it/s]\u001b[A\n",
      " 65%|███████████████████████▍            | 18689/28713 [00:47<00:24, 411.72it/s]\u001b[A\n",
      " 66%|███████████████████████▊            | 18945/28713 [00:47<00:23, 420.98it/s]\u001b[A\n",
      " 67%|████████████████████████            | 19201/28713 [00:48<00:25, 377.74it/s]\u001b[A\n",
      " 68%|████████████████████████▍           | 19448/28713 [00:48<00:18, 502.34it/s]\u001b[A\n",
      " 68%|████████████████████████▌           | 19550/28713 [00:49<00:21, 416.67it/s]\u001b[A\n",
      " 69%|████████████████████████▋           | 19713/28713 [00:49<00:23, 375.67it/s]\u001b[A\n",
      " 70%|█████████████████████████           | 19969/28713 [00:50<00:21, 416.17it/s]\u001b[A\n",
      " 70%|█████████████████████████▎          | 20225/28713 [00:50<00:19, 439.80it/s]\u001b[A\n",
      " 71%|█████████████████████████▋          | 20481/28713 [00:51<00:18, 436.89it/s]\u001b[A\n",
      " 72%|█████████████████████████▉          | 20737/28713 [00:51<00:17, 445.22it/s]\u001b[A\n",
      " 73%|██████████████████████████▎         | 20993/28713 [00:52<00:17, 437.70it/s]\u001b[A\n",
      " 74%|██████████████████████████▋         | 21249/28713 [00:53<00:20, 358.06it/s]\u001b[A\n",
      " 75%|██████████████████████████▉         | 21443/28713 [00:53<00:16, 449.98it/s]\u001b[A\n",
      " 75%|██████████████████████████▉         | 21531/28713 [00:53<00:19, 372.61it/s]\u001b[A\n",
      " 76%|███████████████████████████▎        | 21761/28713 [00:54<00:16, 425.91it/s]\u001b[A\n",
      " 77%|███████████████████████████▌        | 22017/28713 [00:55<00:16, 403.64it/s]\u001b[A\n",
      " 78%|███████████████████████████▉        | 22273/28713 [00:55<00:15, 407.75it/s]\u001b[A\n",
      " 78%|████████████████████████████▏       | 22529/28713 [00:56<00:16, 380.69it/s]\u001b[A\n",
      " 79%|████████████████████████████▌       | 22779/28713 [00:56<00:11, 517.01it/s]\u001b[A\n",
      " 80%|████████████████████████████▋       | 22887/28713 [00:57<00:14, 407.72it/s]\u001b[A\n",
      " 80%|████████████████████████████▉       | 23041/28713 [00:57<00:15, 369.79it/s]\u001b[A\n",
      " 81%|█████████████████████████████▏      | 23297/28713 [00:58<00:13, 396.25it/s]\u001b[A\n",
      " 82%|█████████████████████████████▌      | 23553/28713 [00:58<00:11, 445.25it/s]\u001b[A\n",
      " 83%|█████████████████████████████▊      | 23809/28713 [00:59<00:11, 445.45it/s]\u001b[A\n",
      " 84%|██████████████████████████████▏     | 24065/28713 [00:59<00:10, 460.54it/s]\u001b[A\n",
      " 85%|██████████████████████████████▍     | 24321/28713 [01:00<00:09, 467.03it/s]\u001b[A\n",
      " 86%|██████████████████████████████▊     | 24577/28713 [01:00<00:09, 428.52it/s]\u001b[A\n",
      " 86%|███████████████████████████████▏    | 24833/28713 [01:01<00:09, 404.69it/s]\u001b[A\n",
      " 87%|███████████████████████████████▍    | 25089/28713 [01:02<00:08, 422.15it/s]\u001b[A\n",
      " 88%|███████████████████████████████▊    | 25345/28713 [01:03<00:08, 380.42it/s]\u001b[A\n",
      " 89%|████████████████████████████████    | 25601/28713 [01:03<00:07, 402.21it/s]\u001b[A\n",
      " 90%|████████████████████████████████▍   | 25857/28713 [01:04<00:07, 402.16it/s]\u001b[A\n",
      " 91%|████████████████████████████████▋   | 26113/28713 [01:05<00:06, 375.30it/s]\u001b[A\n",
      " 92%|█████████████████████████████████   | 26369/28713 [01:05<00:06, 336.53it/s]\u001b[A\n",
      " 93%|█████████████████████████████████▍  | 26625/28713 [01:06<00:05, 377.47it/s]\u001b[A\n",
      " 94%|█████████████████████████████████▋  | 26881/28713 [01:07<00:04, 384.62it/s]\u001b[A\n",
      " 95%|██████████████████████████████████  | 27137/28713 [01:07<00:04, 373.93it/s]\u001b[A\n",
      " 95%|██████████████████████████████████▎ | 27393/28713 [01:08<00:03, 370.69it/s]\u001b[A\n",
      " 96%|██████████████████████████████████▋ | 27649/28713 [01:09<00:02, 394.70it/s]\u001b[A\n",
      " 97%|██████████████████████████████████▉ | 27905/28713 [01:09<00:01, 419.58it/s]\u001b[A\n",
      " 98%|███████████████████████████████████▎| 28161/28713 [01:10<00:01, 416.43it/s]\u001b[A\n",
      " 99%|███████████████████████████████████▋| 28417/28713 [01:11<00:00, 359.39it/s]\u001b[A\n",
      "100%|████████████████████████████████████| 28713/28713 [01:11<00:00, 402.43it/s]\u001b[A\n",
      " 28%|█████████                       | 1015185/3565378 [02:37<45:37, 931.69it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "./data/preprocessed_data/romance/goodreads_reviews_romance_1000000.bin\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 42%|████████████                 | 1489354/3565378 [02:40<00:12, 162216.86it/s]\n",
      "  0%|                                                 | 0/25176 [00:00<?, ?it/s]\u001b[A\n",
      "  0%|                                       | 1/25176 [00:00<5:18:30,  1.32it/s]\u001b[A\n",
      "  1%|▍                                     | 257/25176 [00:01<01:53, 219.77it/s]\u001b[A\n",
      "  2%|▊                                     | 513/25176 [00:01<01:17, 318.62it/s]\u001b[A\n",
      "  3%|█▏                                    | 769/25176 [00:02<01:14, 329.19it/s]\u001b[A\n",
      "  4%|█▌                                   | 1025/25176 [00:03<01:11, 338.14it/s]\u001b[A\n",
      "  5%|█▉                                   | 1281/25176 [00:03<01:02, 385.02it/s]\u001b[A\n",
      "  6%|██▎                                  | 1537/25176 [00:04<01:09, 340.78it/s]\u001b[A\n",
      "  7%|██▋                                  | 1793/25176 [00:05<01:08, 340.87it/s]\u001b[A\n",
      "  8%|███                                  | 2049/25176 [00:06<01:06, 348.12it/s]\u001b[A\n",
      "  9%|███▍                                 | 2305/25176 [00:06<01:04, 353.11it/s]\u001b[A\n",
      " 10%|███▊                                 | 2561/25176 [00:07<01:02, 359.51it/s]\u001b[A\n",
      " 11%|████▏                                | 2817/25176 [00:08<00:59, 377.35it/s]\u001b[A\n",
      " 12%|████▌                                | 3073/25176 [00:08<00:58, 379.55it/s]\u001b[A\n",
      " 13%|████▉                                | 3329/25176 [00:09<00:56, 389.59it/s]\u001b[A\n",
      " 14%|█████▎                               | 3585/25176 [00:10<00:53, 402.27it/s]\u001b[A\n",
      " 15%|█████▋                               | 3841/25176 [00:10<00:51, 416.91it/s]\u001b[A\n",
      " 16%|██████                               | 4097/25176 [00:11<00:50, 414.00it/s]\u001b[A\n",
      " 17%|██████▍                              | 4353/25176 [00:12<00:55, 374.44it/s]\u001b[A\n",
      " 42%|████████████                 | 1489354/3565378 [02:52<00:12, 162216.86it/s]\u001b[A\n",
      " 19%|███████▏                             | 4865/25176 [00:13<00:49, 407.29it/s]\u001b[A\n",
      " 20%|███████▌                             | 5121/25176 [00:13<00:49, 406.28it/s]\u001b[A\n",
      " 21%|███████▉                             | 5377/25176 [00:14<00:44, 440.72it/s]\u001b[A\n",
      " 22%|████████▎                            | 5633/25176 [00:15<00:46, 417.32it/s]\u001b[A\n",
      " 23%|████████▋                            | 5889/25176 [00:15<00:49, 391.07it/s]\u001b[A\n",
      " 24%|█████████                            | 6145/25176 [00:16<00:48, 392.39it/s]\u001b[A\n",
      " 25%|█████████▍                           | 6401/25176 [00:17<00:49, 376.12it/s]\u001b[A\n",
      " 26%|█████████▊                           | 6657/25176 [00:17<00:48, 382.95it/s]\u001b[A\n",
      " 27%|██████████▏                          | 6913/25176 [00:18<00:51, 356.11it/s]\u001b[A\n",
      " 28%|██████████▍                          | 7129/25176 [00:18<00:39, 457.45it/s]\u001b[A\n",
      " 29%|██████████▌                          | 7219/25176 [00:19<00:47, 375.34it/s]\u001b[A\n",
      " 29%|██████████▉                          | 7425/25176 [00:20<00:54, 323.46it/s]\u001b[A\n",
      " 30%|███████████▏                         | 7653/25176 [00:20<00:38, 450.08it/s]\u001b[A\n",
      " 31%|███████████▍                         | 7760/25176 [00:20<00:51, 335.26it/s]\u001b[A\n",
      " 32%|███████████▋                         | 7937/25176 [00:21<00:52, 328.10it/s]\u001b[A\n",
      " 33%|████████████                         | 8193/25176 [00:22<00:56, 301.67it/s]\u001b[A\n",
      " 33%|████████████▎                        | 8410/25176 [00:22<00:40, 414.42it/s]\u001b[A\n",
      " 34%|████████████▌                        | 8513/25176 [00:23<00:53, 313.64it/s]\u001b[A\n",
      " 35%|████████████▊                        | 8705/25176 [00:23<00:52, 314.13it/s]\u001b[A\n",
      " 36%|█████████████▏                       | 8961/25176 [00:24<00:48, 335.81it/s]\u001b[A\n",
      " 37%|█████████████▌                       | 9217/25176 [00:25<00:45, 353.50it/s]\u001b[A\n",
      " 38%|█████████████▉                       | 9473/25176 [00:25<00:41, 380.28it/s]\u001b[A\n",
      " 39%|██████████████▎                      | 9729/25176 [00:26<00:39, 386.83it/s]\u001b[A\n",
      " 40%|██████████████▋                      | 9985/25176 [00:27<00:38, 391.61it/s]\u001b[A\n",
      " 41%|██████████████▋                     | 10241/25176 [00:27<00:39, 382.74it/s]\u001b[A\n",
      " 42%|███████████████                     | 10497/25176 [00:28<00:36, 404.17it/s]\u001b[A\n",
      " 43%|███████████████▍                    | 10753/25176 [00:28<00:34, 417.20it/s]\u001b[A\n",
      " 44%|███████████████▋                    | 11009/25176 [00:29<00:31, 445.89it/s]\u001b[A\n",
      " 45%|████████████████                    | 11265/25176 [00:30<00:37, 369.78it/s]\u001b[A\n",
      " 46%|████████████████▍                   | 11512/25176 [00:30<00:27, 493.15it/s]\u001b[A\n",
      " 46%|████████████████▌                   | 11615/25176 [00:31<00:39, 342.62it/s]\u001b[A\n",
      " 47%|████████████████▊                   | 11777/25176 [00:31<00:41, 320.61it/s]\u001b[A\n",
      " 48%|█████████████████▏                  | 12033/25176 [00:32<00:37, 352.95it/s]\u001b[A\n",
      " 49%|█████████████████▌                  | 12289/25176 [00:32<00:32, 395.42it/s]\u001b[A\n",
      " 50%|█████████████████▉                  | 12545/25176 [00:33<00:27, 456.85it/s]\u001b[A\n",
      " 51%|██████████████████▎                 | 12801/25176 [00:33<00:27, 445.25it/s]\u001b[A\n",
      " 52%|██████████████████▋                 | 13057/25176 [00:34<00:29, 414.69it/s]\u001b[A\n",
      " 53%|███████████████████                 | 13313/25176 [00:35<00:28, 419.62it/s]\u001b[A\n",
      " 54%|███████████████████▍                | 13569/25176 [00:35<00:28, 414.32it/s]\u001b[A\n",
      " 55%|███████████████████▊                | 13825/25176 [00:36<00:26, 427.39it/s]\u001b[A\n",
      " 56%|████████████████████▏               | 14081/25176 [00:36<00:24, 455.04it/s]\u001b[A\n",
      " 57%|████████████████████▌               | 14337/25176 [00:37<00:23, 456.34it/s]\u001b[A\n",
      " 58%|████████████████████▊               | 14593/25176 [00:38<00:23, 451.78it/s]\u001b[A\n",
      " 59%|█████████████████████▏              | 14849/25176 [00:38<00:22, 454.65it/s]\u001b[A\n",
      " 60%|█████████████████████▌              | 15105/25176 [00:39<00:24, 405.29it/s]\u001b[A\n",
      " 61%|█████████████████████▉              | 15361/25176 [00:40<00:25, 381.51it/s]\u001b[A\n",
      " 62%|██████████████████████▎             | 15617/25176 [00:40<00:24, 389.31it/s]\u001b[A\n",
      " 63%|██████████████████████▋             | 15873/25176 [00:41<00:27, 341.12it/s]\u001b[A\n",
      " 64%|███████████████████████             | 16128/25176 [00:41<00:19, 460.51it/s]\u001b[A\n",
      " 64%|███████████████████████▏            | 16230/25176 [00:42<00:27, 323.90it/s]\u001b[A\n",
      " 65%|███████████████████████▍            | 16385/25176 [00:43<00:26, 335.26it/s]\u001b[A\n",
      " 66%|███████████████████████▊            | 16641/25176 [00:43<00:24, 344.28it/s]\u001b[A\n",
      " 67%|████████████████████████▏           | 16897/25176 [00:44<00:26, 315.36it/s]\u001b[A\n",
      " 68%|████████████████████████▍           | 17112/25176 [00:44<00:19, 419.19it/s]\u001b[A\n",
      " 68%|████████████████████████▌           | 17210/25176 [00:45<00:23, 342.88it/s]\u001b[A\n",
      " 69%|████████████████████████▉           | 17409/25176 [00:46<00:23, 325.00it/s]\u001b[A\n",
      " 70%|█████████████████████████▎          | 17665/25176 [00:46<00:21, 348.77it/s]\u001b[A\n",
      " 71%|█████████████████████████▋          | 17921/25176 [00:47<00:20, 346.39it/s]\u001b[A\n",
      " 72%|█████████████████████████▉          | 18163/25176 [00:47<00:14, 475.59it/s]\u001b[A\n",
      " 73%|██████████████████████████          | 18269/25176 [00:48<00:18, 365.21it/s]\u001b[A\n",
      " 73%|██████████████████████████▎         | 18433/25176 [00:48<00:18, 374.52it/s]\u001b[A\n",
      " 74%|██████████████████████████▋         | 18689/25176 [00:49<00:16, 390.44it/s]\u001b[A\n",
      " 75%|███████████████████████████         | 18945/25176 [00:49<00:16, 379.46it/s]\u001b[A\n",
      " 76%|███████████████████████████▍        | 19201/25176 [00:50<00:16, 373.08it/s]\u001b[A\n",
      " 77%|███████████████████████████▊        | 19457/25176 [00:51<00:14, 396.54it/s]\u001b[A\n",
      " 78%|████████████████████████████▏       | 19713/25176 [00:51<00:13, 391.05it/s]\u001b[A\n",
      " 79%|████████████████████████████▌       | 19969/25176 [00:52<00:13, 374.53it/s]\u001b[A\n",
      " 80%|████████████████████████████▉       | 20225/25176 [00:53<00:14, 351.84it/s]\u001b[A\n",
      " 81%|█████████████████████████████▎      | 20477/25176 [00:53<00:09, 475.12it/s]\u001b[A\n",
      " 82%|█████████████████████████████▍      | 20580/25176 [00:54<00:13, 329.38it/s]\u001b[A\n",
      " 82%|█████████████████████████████▋      | 20737/25176 [00:55<00:14, 316.86it/s]\u001b[A\n",
      " 83%|██████████████████████████████      | 20993/25176 [00:55<00:13, 307.79it/s]\u001b[A\n",
      " 84%|██████████████████████████████▍     | 21249/25176 [00:56<00:12, 318.02it/s]\u001b[A\n",
      " 85%|██████████████████████████████▊     | 21505/25176 [00:57<00:11, 317.52it/s]\u001b[A\n",
      " 86%|███████████████████████████████     | 21761/25176 [00:58<00:10, 323.15it/s]\u001b[A\n",
      " 87%|███████████████████████████████▍    | 22017/25176 [00:58<00:09, 334.77it/s]\u001b[A\n",
      " 88%|███████████████████████████████▊    | 22273/25176 [00:59<00:08, 352.44it/s]\u001b[A\n",
      " 89%|████████████████████████████████▏   | 22529/25176 [01:00<00:07, 338.37it/s]\u001b[A\n",
      " 91%|████████████████████████████████▌   | 22785/25176 [01:00<00:06, 391.18it/s]\u001b[A\n",
      " 92%|████████████████████████████████▉   | 23041/25176 [01:01<00:06, 352.83it/s]\u001b[A\n",
      " 93%|█████████████████████████████████▎  | 23297/25176 [01:02<00:05, 335.78it/s]\u001b[A\n",
      " 94%|█████████████████████████████████▋  | 23542/25176 [01:02<00:03, 449.03it/s]\u001b[A\n",
      " 94%|█████████████████████████████████▊  | 23642/25176 [01:03<00:04, 372.74it/s]\u001b[A\n",
      " 95%|██████████████████████████████████  | 23809/25176 [01:03<00:04, 318.96it/s]\u001b[A\n",
      " 96%|██████████████████████████████████▍ | 24065/25176 [01:04<00:03, 303.48it/s]\u001b[A\n",
      " 96%|██████████████████████████████████▋ | 24281/25176 [01:04<00:02, 410.26it/s]\u001b[A\n",
      " 97%|██████████████████████████████████▊ | 24381/25176 [01:05<00:02, 295.85it/s]\u001b[A\n",
      " 98%|███████████████████████████████████▏| 24577/25176 [01:06<00:01, 313.25it/s]\u001b[A\n",
      " 99%|███████████████████████████████████▌| 24833/25176 [01:07<00:01, 308.46it/s]\u001b[A\n",
      "100%|████████████████████████████████████| 25176/25176 [01:07<00:00, 373.57it/s]\u001b[A\n",
      " 43%|█████████████▏                 | 1515557/3565378 [03:48<33:05, 1032.64it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "./data/preprocessed_data/romance/goodreads_reviews_romance_1500000.bin\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 56%|████████████████▏            | 1994939/3565378 [03:51<00:09, 159205.77it/s]\n",
      "  0%|                                                 | 0/26506 [00:00<?, ?it/s]\u001b[A\n",
      "  0%|                                       | 1/26506 [00:01<8:32:17,  1.16s/it]\u001b[A\n",
      "  1%|▎                                     | 229/26506 [00:01<01:44, 250.80it/s]\u001b[A\n",
      "  1%|▌                                     | 364/26506 [00:01<01:54, 228.95it/s]\u001b[A\n",
      "  2%|▋                                     | 513/26506 [00:02<01:44, 248.59it/s]\u001b[A\n",
      "  3%|█                                     | 769/26506 [00:02<01:16, 338.08it/s]\u001b[A\n",
      "  4%|█▍                                   | 1025/26506 [00:03<01:13, 348.76it/s]\u001b[A\n",
      "  5%|█▊                                   | 1281/26506 [00:04<01:12, 349.32it/s]\u001b[A\n",
      "  6%|██▏                                  | 1537/26506 [00:04<01:03, 391.16it/s]\u001b[A\n",
      "  7%|██▌                                  | 1793/26506 [00:05<01:02, 397.66it/s]\u001b[A\n",
      "  8%|██▊                                  | 2049/26506 [00:06<01:04, 377.44it/s]\u001b[A\n",
      "  9%|███▏                                 | 2305/26506 [00:06<01:01, 396.29it/s]\u001b[A\n",
      " 10%|███▌                                 | 2561/26506 [00:07<01:02, 384.92it/s]\u001b[A\n",
      " 11%|███▉                                 | 2817/26506 [00:08<01:02, 377.36it/s]\u001b[A\n",
      " 12%|████▎                                | 3073/26506 [00:08<00:59, 391.87it/s]\u001b[A\n",
      " 13%|████▋                                | 3329/26506 [00:09<00:58, 398.14it/s]\u001b[A\n",
      " 14%|█████                                | 3585/26506 [00:10<01:06, 343.92it/s]\u001b[A\n",
      " 56%|████████████████▏            | 1994939/3565378 [04:03<00:09, 159205.77it/s]\u001b[A\n",
      " 15%|█████▋                               | 4097/26506 [00:11<01:01, 366.52it/s]\u001b[A\n",
      " 16%|██████                               | 4353/26506 [00:12<00:55, 395.60it/s]\u001b[A\n",
      " 17%|██████▍                              | 4609/26506 [00:12<00:55, 397.66it/s]\u001b[A\n",
      " 18%|██████▊                              | 4865/26506 [00:13<00:56, 385.80it/s]\u001b[A\n",
      " 19%|███████▏                             | 5121/26506 [00:14<00:51, 412.31it/s]\u001b[A\n",
      " 20%|███████▌                             | 5377/26506 [00:14<00:55, 381.54it/s]\u001b[A\n",
      " 21%|███████▊                             | 5633/26506 [00:15<01:01, 341.98it/s]\u001b[A\n",
      " 22%|████████▏                            | 5863/26506 [00:15<00:46, 448.04it/s]\u001b[A\n",
      " 22%|████████▎                            | 5958/26506 [00:16<00:58, 350.40it/s]\u001b[A\n",
      " 23%|████████▌                            | 6145/26506 [00:17<00:58, 349.63it/s]\u001b[A\n",
      " 24%|████████▉                            | 6401/26506 [00:17<00:55, 362.49it/s]\u001b[A\n",
      " 25%|█████████▎                           | 6657/26506 [00:18<00:50, 389.95it/s]\u001b[A\n",
      " 26%|█████████▋                           | 6913/26506 [00:18<00:49, 399.55it/s]\u001b[A\n",
      " 27%|██████████                           | 7169/26506 [00:19<00:54, 357.88it/s]\u001b[A\n",
      " 28%|██████████▎                          | 7425/26506 [00:20<00:51, 369.63it/s]\u001b[A\n",
      " 29%|██████████▋                          | 7681/26506 [00:21<00:51, 368.45it/s]\u001b[A\n",
      " 30%|███████████                          | 7937/26506 [00:21<00:53, 348.23it/s]\u001b[A\n",
      " 31%|███████████▍                         | 8193/26506 [00:22<00:52, 351.32it/s]\u001b[A\n",
      " 32%|███████████▊                         | 8449/26506 [00:23<00:57, 316.63it/s]\u001b[A\n",
      " 33%|████████████▏                        | 8704/26506 [00:23<00:41, 429.85it/s]\u001b[A\n",
      " 33%|████████████▎                        | 8804/26506 [00:24<00:49, 355.07it/s]\u001b[A\n",
      " 34%|████████████▌                        | 8961/26506 [00:24<00:52, 333.46it/s]\u001b[A\n",
      " 35%|████████████▊                        | 9217/26506 [00:25<00:56, 305.45it/s]\u001b[A\n",
      " 36%|█████████████▏                       | 9473/26506 [00:26<00:51, 327.96it/s]\u001b[A\n",
      " 37%|█████████████▌                       | 9729/26506 [00:27<00:47, 349.83it/s]\u001b[A\n",
      " 38%|█████████████▉                       | 9985/26506 [00:27<00:42, 391.66it/s]\u001b[A\n",
      " 39%|█████████████▉                      | 10241/26506 [00:28<00:46, 347.23it/s]\u001b[A\n",
      " 39%|██████████████▏                     | 10469/26506 [00:28<00:35, 457.74it/s]\u001b[A\n",
      " 40%|██████████████▎                     | 10567/26506 [00:29<00:51, 307.34it/s]\u001b[A\n",
      " 41%|██████████████▌                     | 10753/26506 [00:30<00:52, 302.23it/s]\u001b[A\n",
      " 42%|██████████████▉                     | 11009/26506 [00:31<00:49, 312.19it/s]\u001b[A\n",
      " 42%|███████████████▎                    | 11265/26506 [00:31<00:47, 320.55it/s]\u001b[A\n",
      " 43%|███████████████▋                    | 11521/26506 [00:32<00:46, 324.73it/s]\u001b[A\n",
      " 44%|███████████████▉                    | 11777/26506 [00:33<00:43, 336.00it/s]\u001b[A\n",
      " 45%|████████████████▎                   | 12033/26506 [00:33<00:40, 361.04it/s]\u001b[A\n",
      " 46%|████████████████▋                   | 12289/26506 [00:34<00:37, 375.76it/s]\u001b[A\n",
      " 47%|█████████████████                   | 12545/26506 [00:35<00:37, 373.03it/s]\u001b[A\n",
      " 48%|█████████████████▍                  | 12801/26506 [00:35<00:38, 358.09it/s]\u001b[A\n",
      " 49%|█████████████████▋                  | 13057/26506 [00:36<00:36, 367.39it/s]\u001b[A\n",
      " 50%|██████████████████                  | 13313/26506 [00:37<00:36, 360.34it/s]\u001b[A\n",
      " 51%|██████████████████▍                 | 13569/26506 [00:37<00:35, 368.22it/s]\u001b[A\n",
      " 52%|██████████████████▊                 | 13825/26506 [00:38<00:33, 380.18it/s]\u001b[A\n",
      " 53%|███████████████████                 | 14081/26506 [00:39<00:32, 376.63it/s]\u001b[A\n",
      " 54%|███████████████████▍                | 14337/26506 [00:39<00:29, 406.14it/s]\u001b[A\n",
      " 55%|███████████████████▊                | 14593/26506 [00:40<00:30, 396.86it/s]\u001b[A\n",
      " 56%|████████████████████▏               | 14849/26506 [00:41<00:28, 407.88it/s]\u001b[A\n",
      " 57%|████████████████████▌               | 15105/26506 [00:41<00:26, 426.56it/s]\u001b[A\n",
      " 58%|████████████████████▊               | 15361/26506 [00:42<00:29, 383.89it/s]\u001b[A\n",
      " 59%|█████████████████████▏              | 15617/26506 [00:43<00:30, 355.79it/s]\u001b[A\n",
      " 60%|█████████████████████▌              | 15873/26506 [00:43<00:28, 369.69it/s]\u001b[A\n",
      " 61%|█████████████████████▉              | 16129/26506 [00:44<00:27, 374.46it/s]\u001b[A\n",
      " 62%|██████████████████████▎             | 16385/26506 [00:45<00:27, 362.59it/s]\u001b[A\n",
      " 63%|██████████████████████▌             | 16641/26506 [00:46<00:28, 344.14it/s]\u001b[A\n",
      " 64%|██████████████████████▉             | 16897/26506 [00:46<00:26, 356.47it/s]\u001b[A\n",
      " 65%|███████████████████████▎            | 17153/26506 [00:47<00:25, 370.40it/s]\u001b[A\n",
      " 66%|███████████████████████▋            | 17409/26506 [00:48<00:23, 387.43it/s]\u001b[A\n",
      " 67%|███████████████████████▉            | 17665/26506 [00:48<00:21, 408.58it/s]\u001b[A\n",
      " 68%|████████████████████████▎           | 17921/26506 [00:49<00:20, 418.35it/s]\u001b[A\n",
      " 69%|████████████████████████▋           | 18177/26506 [00:49<00:21, 378.98it/s]\u001b[A\n",
      " 70%|█████████████████████████           | 18433/26506 [00:51<00:24, 325.48it/s]\u001b[A\n",
      " 70%|█████████████████████████▎          | 18637/26506 [00:51<00:19, 413.96it/s]\u001b[A\n",
      " 71%|█████████████████████████▍          | 18723/26506 [00:51<00:23, 336.34it/s]\u001b[A\n",
      " 71%|█████████████████████████▋          | 18945/26506 [00:52<00:22, 334.74it/s]\u001b[A\n",
      " 72%|██████████████████████████          | 19201/26506 [00:53<00:20, 356.01it/s]\u001b[A\n",
      " 73%|██████████████████████████▍         | 19457/26506 [00:53<00:19, 369.27it/s]\u001b[A\n",
      " 74%|██████████████████████████▊         | 19713/26506 [00:54<00:16, 400.88it/s]\u001b[A\n",
      " 75%|███████████████████████████         | 19969/26506 [00:54<00:16, 395.31it/s]\u001b[A\n",
      " 76%|███████████████████████████▍        | 20225/26506 [00:55<00:16, 370.50it/s]\u001b[A\n",
      " 77%|███████████████████████████▊        | 20481/26506 [00:56<00:16, 363.92it/s]\u001b[A\n",
      " 78%|████████████████████████████▏       | 20737/26506 [00:56<00:14, 386.52it/s]\u001b[A\n",
      " 79%|████████████████████████████▌       | 20993/26506 [00:57<00:13, 405.39it/s]\u001b[A\n",
      " 80%|████████████████████████████▊       | 21249/26506 [00:58<00:12, 426.00it/s]\u001b[A\n",
      " 81%|█████████████████████████████▏      | 21505/26506 [00:58<00:13, 375.99it/s]\u001b[A\n",
      " 82%|█████████████████████████████▌      | 21761/26506 [00:59<00:12, 384.11it/s]\u001b[A\n",
      " 83%|█████████████████████████████▉      | 22017/26506 [01:00<00:12, 350.03it/s]\u001b[A\n",
      " 84%|██████████████████████████████▎     | 22273/26506 [01:00<00:11, 381.64it/s]\u001b[A\n",
      " 85%|██████████████████████████████▌     | 22529/26506 [01:01<00:11, 348.55it/s]\u001b[A\n",
      " 86%|██████████████████████████████▉     | 22783/26506 [01:01<00:07, 469.45it/s]\u001b[A\n",
      " 86%|███████████████████████████████     | 22885/26506 [01:02<00:09, 369.26it/s]\u001b[A\n",
      " 87%|███████████████████████████████▎    | 23041/26506 [01:03<00:11, 302.75it/s]\u001b[A\n",
      " 88%|███████████████████████████████▋    | 23289/26506 [01:03<00:07, 439.73it/s]\u001b[A\n",
      " 88%|███████████████████████████████▊    | 23405/26506 [01:04<00:09, 317.57it/s]\u001b[A\n",
      " 89%|███████████████████████████████▉    | 23553/26506 [01:04<00:09, 310.58it/s]\u001b[A\n",
      " 90%|████████████████████████████████▎   | 23809/26506 [01:05<00:09, 296.77it/s]\u001b[A\n",
      " 91%|████████████████████████████████▋   | 24065/26506 [01:06<00:07, 335.16it/s]\u001b[A\n",
      " 92%|█████████████████████████████████   | 24321/26506 [01:06<00:06, 351.57it/s]\u001b[A\n",
      " 93%|█████████████████████████████████▍  | 24577/26506 [01:07<00:05, 366.30it/s]\u001b[A\n",
      " 94%|█████████████████████████████████▋  | 24833/26506 [01:08<00:04, 392.22it/s]\u001b[A\n",
      " 95%|██████████████████████████████████  | 25089/26506 [01:08<00:03, 428.71it/s]\u001b[A\n",
      " 96%|██████████████████████████████████▍ | 25345/26506 [01:09<00:02, 387.12it/s]\u001b[A\n",
      " 97%|██████████████████████████████████▊ | 25601/26506 [01:10<00:02, 322.23it/s]\u001b[A\n",
      " 97%|███████████████████████████████████ | 25809/26506 [01:10<00:01, 413.80it/s]\u001b[A\n",
      " 98%|███████████████████████████████████▏| 25898/26506 [01:11<00:01, 335.43it/s]\u001b[A\n",
      " 99%|███████████████████████████████████▍| 26113/26506 [01:11<00:01, 317.14it/s]\u001b[A\n",
      "100%|████████████████████████████████████| 26506/26506 [01:12<00:00, 366.72it/s]\u001b[A\n",
      " 57%|██████████████████              | 2014653/3565378 [05:05<29:38, 871.97it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "./data/preprocessed_data/romance/goodreads_reviews_romance_2000000.bin\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 70%|████████████████████▎        | 2499426/3565378 [05:08<00:06, 159034.97it/s]\n",
      "  0%|                                                 | 0/24957 [00:00<?, ?it/s]\u001b[A\n",
      "  0%|                                       | 1/24957 [00:00<4:18:11,  1.61it/s]\u001b[A\n",
      "  1%|▍                                     | 257/24957 [00:01<01:49, 224.55it/s]\u001b[A\n",
      "  2%|▊                                     | 513/24957 [00:01<01:14, 329.30it/s]\u001b[A\n",
      "  3%|█▏                                    | 769/24957 [00:02<01:10, 344.83it/s]\u001b[A\n",
      "  4%|█▌                                   | 1025/24957 [00:03<01:14, 323.36it/s]\u001b[A\n",
      "  5%|█▉                                   | 1269/24957 [00:03<00:51, 463.81it/s]\u001b[A\n",
      "  6%|██                                   | 1374/24957 [00:03<01:00, 387.53it/s]\u001b[A\n",
      "  6%|██▎                                  | 1537/24957 [00:04<01:13, 320.13it/s]\u001b[A\n",
      "  7%|██▋                                  | 1793/24957 [00:05<01:10, 330.30it/s]\u001b[A\n",
      "  8%|███                                  | 2049/24957 [00:06<01:09, 331.48it/s]\u001b[A\n",
      "  9%|███▍                                 | 2305/24957 [00:07<01:21, 278.06it/s]\u001b[A\n",
      " 10%|███▋                                 | 2510/24957 [00:07<01:01, 366.00it/s]\u001b[A\n",
      " 10%|███▊                                 | 2599/24957 [00:08<01:12, 306.59it/s]\u001b[A\n",
      " 11%|████▏                                | 2817/24957 [00:08<01:11, 308.43it/s]\u001b[A\n",
      " 12%|████▌                                | 3073/24957 [00:09<01:04, 340.99it/s]\u001b[A\n",
      " 13%|████▉                                | 3329/24957 [00:10<01:07, 321.38it/s]\u001b[A\n",
      " 14%|█████▎                               | 3548/24957 [00:10<00:49, 429.22it/s]\u001b[A\n",
      " 15%|█████▍                               | 3646/24957 [00:10<00:59, 358.46it/s]\u001b[A\n",
      " 15%|█████▋                               | 3841/24957 [00:11<00:59, 356.95it/s]\u001b[A\n",
      " 16%|██████                               | 4097/24957 [00:11<00:52, 398.99it/s]\u001b[A\n",
      " 17%|██████▍                              | 4353/24957 [00:12<00:57, 358.67it/s]\u001b[A\n",
      " 18%|██████▊                              | 4609/24957 [00:13<00:54, 375.19it/s]\u001b[A\n",
      " 70%|████████████████████▎        | 2499426/3565378 [05:23<00:06, 159034.97it/s]\u001b[A\n",
      " 21%|███████▌                             | 5121/24957 [00:14<00:52, 376.95it/s]\u001b[A\n",
      " 22%|███████▉                             | 5377/24957 [00:15<00:53, 364.70it/s]\u001b[A\n",
      " 23%|████████▎                            | 5633/24957 [00:16<01:00, 320.51it/s]\u001b[A\n",
      " 24%|████████▋                            | 5875/24957 [00:16<00:44, 429.47it/s]\u001b[A\n",
      " 24%|████████▊                            | 5972/24957 [00:17<00:53, 352.03it/s]\u001b[A\n",
      " 25%|█████████                            | 6145/24957 [00:17<00:55, 336.06it/s]\u001b[A\n",
      " 26%|█████████▍                           | 6401/24957 [00:18<01:00, 308.82it/s]\u001b[A\n",
      " 27%|█████████▊                           | 6652/24957 [00:18<00:41, 437.86it/s]\u001b[A\n",
      " 27%|██████████                           | 6763/24957 [00:19<00:51, 355.22it/s]\u001b[A\n",
      " 28%|██████████▏                          | 6913/24957 [00:19<00:53, 336.46it/s]\u001b[A\n",
      " 29%|██████████▋                          | 7169/24957 [00:20<00:49, 358.93it/s]\u001b[A\n",
      " 30%|███████████                          | 7425/24957 [00:21<00:48, 360.12it/s]\u001b[A\n",
      " 31%|███████████▍                         | 7681/24957 [00:21<00:46, 368.56it/s]\u001b[A\n",
      " 32%|███████████▊                         | 7937/24957 [00:22<00:44, 382.63it/s]\u001b[A\n",
      " 33%|████████████▏                        | 8193/24957 [00:23<00:40, 417.36it/s]\u001b[A\n",
      " 34%|████████████▌                        | 8449/24957 [00:23<00:39, 420.11it/s]\u001b[A\n",
      " 35%|████████████▉                        | 8705/24957 [00:25<00:54, 298.02it/s]\u001b[A\n",
      " 36%|█████████████▏                       | 8892/24957 [00:25<00:42, 374.44it/s]\u001b[A\n",
      " 36%|█████████████▎                       | 8975/24957 [00:25<00:53, 299.81it/s]\u001b[A\n",
      " 37%|█████████████▋                       | 9217/24957 [00:26<00:46, 339.49it/s]\u001b[A\n",
      " 38%|██████████████                       | 9473/24957 [00:26<00:42, 360.34it/s]\u001b[A\n",
      " 39%|██████████████▍                      | 9729/24957 [00:27<00:41, 369.62it/s]\u001b[A\n",
      " 40%|██████████████▊                      | 9985/24957 [00:28<00:37, 402.28it/s]\u001b[A\n",
      " 41%|██████████████▊                     | 10241/24957 [00:28<00:37, 389.70it/s]\u001b[A\n",
      " 42%|███████████████▏                    | 10497/24957 [00:29<00:35, 405.22it/s]\u001b[A\n",
      " 43%|███████████████▌                    | 10753/24957 [00:30<00:34, 411.29it/s]\u001b[A\n",
      " 44%|███████████████▉                    | 11009/24957 [00:30<00:32, 427.46it/s]\u001b[A\n",
      " 45%|████████████████▏                   | 11265/24957 [00:31<00:31, 440.21it/s]\u001b[A\n",
      " 46%|████████████████▌                   | 11521/24957 [00:31<00:32, 415.05it/s]\u001b[A\n",
      " 47%|████████████████▉                   | 11777/24957 [00:32<00:29, 441.63it/s]\u001b[A\n",
      " 48%|█████████████████▎                  | 12033/24957 [00:33<00:32, 400.15it/s]\u001b[A\n",
      " 49%|█████████████████▋                  | 12289/24957 [00:33<00:30, 417.54it/s]\u001b[A\n",
      " 50%|██████████████████                  | 12545/24957 [00:34<00:31, 388.33it/s]\u001b[A\n",
      " 51%|██████████████████▍                 | 12801/24957 [00:35<00:30, 400.37it/s]\u001b[A\n",
      " 52%|██████████████████▊                 | 13057/24957 [00:35<00:29, 409.58it/s]\u001b[A\n",
      " 53%|███████████████████▏                | 13313/24957 [00:36<00:27, 422.76it/s]\u001b[A\n",
      " 54%|███████████████████▌                | 13569/24957 [00:37<00:29, 380.72it/s]\u001b[A\n",
      " 55%|███████████████████▉                | 13822/24957 [00:37<00:21, 509.55it/s]\u001b[A\n",
      " 56%|████████████████████                | 13926/24957 [00:37<00:29, 373.67it/s]\u001b[A\n",
      " 56%|████████████████████▎               | 14081/24957 [00:38<00:36, 297.20it/s]\u001b[A\n",
      " 57%|████████████████████▋               | 14337/24957 [00:39<00:35, 297.55it/s]\u001b[A\n",
      " 58%|█████████████████████               | 14570/24957 [00:39<00:25, 412.41it/s]\u001b[A\n",
      " 59%|█████████████████████▏              | 14674/24957 [00:40<00:31, 328.05it/s]\u001b[A\n",
      " 59%|█████████████████████▍              | 14849/24957 [00:40<00:32, 313.05it/s]\u001b[A\n",
      " 61%|█████████████████████▊              | 15105/24957 [00:41<00:28, 340.46it/s]\u001b[A\n",
      " 62%|██████████████████████▏             | 15361/24957 [00:42<00:29, 328.72it/s]\u001b[A\n",
      " 63%|██████████████████████▌             | 15609/24957 [00:42<00:20, 458.51it/s]\u001b[A\n",
      " 63%|██████████████████████▋             | 15718/24957 [00:43<00:27, 332.21it/s]\u001b[A\n",
      " 64%|██████████████████████▉             | 15873/24957 [00:44<00:33, 273.64it/s]\u001b[A\n",
      " 65%|███████████████████████▎            | 16129/24957 [00:44<00:30, 292.92it/s]\u001b[A\n",
      " 66%|███████████████████████▋            | 16385/24957 [00:45<00:30, 279.70it/s]\u001b[A\n",
      " 67%|███████████████████████▉            | 16624/24957 [00:45<00:21, 389.05it/s]\u001b[A\n",
      " 67%|████████████████████████▏           | 16727/24957 [00:46<00:26, 310.35it/s]\u001b[A\n",
      " 68%|████████████████████████▎           | 16897/24957 [00:47<00:27, 291.45it/s]\u001b[A\n",
      " 69%|████████████████████████▋           | 17153/24957 [00:48<00:26, 295.27it/s]\u001b[A\n",
      " 70%|█████████████████████████           | 17409/24957 [00:48<00:24, 305.39it/s]\u001b[A\n",
      " 71%|█████████████████████████▍          | 17664/24957 [00:49<00:16, 431.28it/s]\u001b[A\n",
      " 71%|█████████████████████████▋          | 17773/24957 [00:50<00:26, 275.66it/s]\u001b[A\n",
      " 72%|█████████████████████████▊          | 17921/24957 [00:50<00:26, 267.70it/s]\u001b[A\n",
      " 73%|██████████████████████████▏         | 18177/24957 [00:51<00:24, 282.44it/s]\u001b[A\n",
      " 74%|██████████████████████████▌         | 18433/24957 [00:52<00:20, 313.81it/s]\u001b[A\n",
      " 75%|██████████████████████████▉         | 18689/24957 [00:53<00:19, 314.37it/s]\u001b[A\n",
      " 76%|███████████████████████████▎        | 18945/24957 [00:53<00:18, 325.54it/s]\u001b[A\n",
      " 77%|███████████████████████████▋        | 19201/24957 [00:54<00:17, 330.16it/s]\u001b[A\n",
      " 78%|████████████████████████████        | 19457/24957 [00:55<00:17, 320.40it/s]\u001b[A\n",
      " 79%|████████████████████████████▍       | 19713/24957 [00:56<00:16, 312.82it/s]\u001b[A\n",
      " 80%|████████████████████████████▊       | 19957/24957 [00:56<00:11, 421.16it/s]\u001b[A\n",
      " 80%|████████████████████████████▉       | 20055/24957 [00:56<00:14, 328.63it/s]\u001b[A\n",
      " 81%|█████████████████████████████▏      | 20225/24957 [00:57<00:16, 282.98it/s]\u001b[A\n",
      " 82%|█████████████████████████████▌      | 20481/24957 [00:58<00:13, 321.10it/s]\u001b[A\n",
      " 83%|█████████████████████████████▉      | 20737/24957 [00:58<00:11, 365.56it/s]\u001b[A\n",
      " 84%|██████████████████████████████▎     | 20993/24957 [00:59<00:10, 384.00it/s]\u001b[A\n",
      " 85%|██████████████████████████████▋     | 21249/24957 [01:00<00:09, 381.12it/s]\u001b[A\n",
      " 86%|███████████████████████████████     | 21505/24957 [01:00<00:08, 384.83it/s]\u001b[A\n",
      " 87%|███████████████████████████████▍    | 21761/24957 [01:01<00:07, 399.71it/s]\u001b[A\n",
      " 88%|███████████████████████████████▊    | 22017/24957 [01:02<00:07, 408.71it/s]\u001b[A\n",
      " 89%|████████████████████████████████▏   | 22273/24957 [01:02<00:06, 433.42it/s]\u001b[A\n",
      " 90%|████████████████████████████████▍   | 22529/24957 [01:03<00:06, 381.90it/s]\u001b[A\n",
      " 91%|████████████████████████████████▊   | 22785/24957 [01:03<00:05, 400.11it/s]\u001b[A\n",
      " 92%|█████████████████████████████████▏  | 23041/24957 [01:04<00:04, 408.99it/s]\u001b[A\n",
      " 93%|█████████████████████████████████▌  | 23297/24957 [01:05<00:04, 391.50it/s]\u001b[A\n",
      " 94%|█████████████████████████████████▉  | 23553/24957 [01:06<00:03, 368.84it/s]\u001b[A\n",
      " 95%|██████████████████████████████████▎ | 23809/24957 [01:06<00:03, 380.16it/s]\u001b[A\n",
      " 96%|██████████████████████████████████▋ | 24065/24957 [01:07<00:02, 396.86it/s]\u001b[A\n",
      " 97%|███████████████████████████████████ | 24321/24957 [01:08<00:01, 372.23it/s]\u001b[A\n",
      " 98%|███████████████████████████████████▍| 24577/24957 [01:09<00:01, 328.95it/s]\u001b[A\n",
      " 99%|███████████████████████████████████▊| 24811/24957 [01:09<00:00, 433.96it/s]\u001b[A\n",
      "100%|████████████████████████████████████| 24957/24957 [01:09<00:00, 359.40it/s]\u001b[A\n",
      " 71%|██████████████████████▌         | 2514813/3565378 [06:19<20:46, 842.59it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "./data/preprocessed_data/romance/goodreads_reviews_romance_2500000.bin\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 84%|████████████████████████▎    | 2988342/3565378 [06:22<00:03, 147747.20it/s]\n",
      "  0%|                                                 | 0/26288 [00:00<?, ?it/s]\u001b[A\n",
      "  0%|                                       | 1/26288 [00:00<3:58:55,  1.83it/s]\u001b[A\n",
      "  1%|▎                                     | 257/26288 [00:01<01:45, 246.03it/s]\u001b[A\n",
      "  2%|▋                                     | 513/26288 [00:01<01:19, 323.92it/s]\u001b[A\n",
      "  3%|█                                     | 769/26288 [00:02<01:24, 302.32it/s]\u001b[A\n",
      "  4%|█▍                                   | 1025/26288 [00:03<01:14, 340.51it/s]\u001b[A\n",
      "  5%|█▊                                   | 1281/26288 [00:04<01:10, 353.02it/s]\u001b[A\n",
      "  6%|██▏                                  | 1537/26288 [00:04<01:12, 343.57it/s]\u001b[A\n",
      "  7%|██▌                                  | 1790/26288 [00:04<00:51, 477.30it/s]\u001b[A\n",
      "  7%|██▋                                  | 1895/26288 [00:05<01:04, 378.58it/s]\u001b[A\n",
      "  8%|██▉                                  | 2049/26288 [00:06<01:23, 289.67it/s]\u001b[A\n",
      "  9%|███▏                                 | 2298/26288 [00:06<00:55, 429.42it/s]\u001b[A\n",
      "  9%|███▍                                 | 2415/26288 [00:06<01:02, 381.16it/s]\u001b[A\n",
      " 10%|███▌                                 | 2561/26288 [00:07<01:10, 337.93it/s]\u001b[A\n",
      " 11%|███▉                                 | 2817/26288 [00:08<01:16, 307.06it/s]\u001b[A\n",
      " 12%|████▎                                | 3050/26288 [00:08<00:53, 435.43it/s]\u001b[A\n",
      " 12%|████▍                                | 3160/26288 [00:09<01:21, 282.66it/s]\u001b[A\n",
      " 84%|████████████████████████▎    | 2988342/3565378 [06:33<00:03, 147747.20it/s]\u001b[A\n",
      " 14%|█████                                | 3585/26288 [00:11<01:29, 254.65it/s]\u001b[A\n",
      " 15%|█████▍                               | 3821/26288 [00:11<01:01, 363.89it/s]\u001b[A\n",
      " 15%|█████▌                               | 3928/26288 [00:12<01:13, 306.08it/s]\u001b[A\n",
      " 16%|█████▊                               | 4097/26288 [00:12<01:12, 305.63it/s]\u001b[A\n",
      " 17%|██████▏                              | 4353/26288 [00:13<01:10, 312.08it/s]\u001b[A\n",
      " 18%|██████▍                              | 4609/26288 [00:14<01:05, 331.70it/s]\u001b[A\n",
      " 19%|██████▊                              | 4865/26288 [00:14<01:01, 346.51it/s]\u001b[A\n",
      " 19%|███████▏                             | 5121/26288 [00:15<00:55, 378.91it/s]\u001b[A\n",
      " 20%|███████▌                             | 5377/26288 [00:16<01:01, 340.12it/s]\u001b[A\n",
      " 21%|███████▉                             | 5633/26288 [00:17<01:02, 330.30it/s]\u001b[A\n",
      " 22%|████████▎                            | 5889/26288 [00:17<01:00, 334.43it/s]\u001b[A\n",
      " 23%|████████▋                            | 6145/26288 [00:18<01:00, 331.42it/s]\u001b[A\n",
      " 24%|█████████                            | 6401/26288 [00:19<01:12, 272.67it/s]\u001b[A\n",
      " 25%|█████████▎                           | 6635/26288 [00:20<00:53, 363.98it/s]\u001b[A\n",
      " 26%|█████████▍                           | 6727/26288 [00:21<01:15, 257.56it/s]\u001b[A\n",
      " 26%|█████████▋                           | 6913/26288 [00:21<01:11, 269.43it/s]\u001b[A\n",
      " 27%|██████████                           | 7169/26288 [00:22<01:15, 253.40it/s]\u001b[A\n",
      " 28%|██████████▍                          | 7390/26288 [00:22<00:54, 347.67it/s]\u001b[A\n",
      " 28%|██████████▌                          | 7488/26288 [00:23<01:09, 270.14it/s]\u001b[A\n",
      " 29%|██████████▊                          | 7681/26288 [00:24<01:11, 261.79it/s]\u001b[A\n",
      " 30%|███████████▏                         | 7937/26288 [00:25<01:00, 301.69it/s]\u001b[A\n",
      " 31%|███████████▌                         | 8193/26288 [00:25<00:54, 332.63it/s]\u001b[A\n",
      " 32%|███████████▉                         | 8449/26288 [00:26<00:49, 360.89it/s]\u001b[A\n",
      " 33%|████████████▎                        | 8705/26288 [00:26<00:45, 385.83it/s]\u001b[A\n",
      " 34%|████████████▌                        | 8961/26288 [00:27<00:47, 361.72it/s]\u001b[A\n",
      " 35%|████████████▉                        | 9217/26288 [00:28<00:47, 358.60it/s]\u001b[A\n",
      " 36%|█████████████▎                       | 9473/26288 [00:28<00:42, 397.84it/s]\u001b[A\n",
      " 37%|█████████████▋                       | 9729/26288 [00:29<00:47, 345.28it/s]\u001b[A\n",
      " 38%|█████████████▉                       | 9941/26288 [00:29<00:36, 443.73it/s]\u001b[A\n",
      " 38%|█████████████▋                      | 10032/26288 [00:30<00:44, 361.33it/s]\u001b[A\n",
      " 39%|██████████████                      | 10241/26288 [00:31<00:45, 351.54it/s]\u001b[A\n",
      " 40%|██████████████▍                     | 10497/26288 [00:31<00:41, 378.79it/s]\u001b[A\n",
      " 41%|██████████████▋                     | 10753/26288 [00:32<00:45, 344.93it/s]\u001b[A\n",
      " 42%|███████████████                     | 11009/26288 [00:33<00:47, 321.22it/s]\u001b[A\n",
      " 43%|███████████████▍                    | 11265/26288 [00:34<00:46, 320.21it/s]\u001b[A\n",
      " 44%|███████████████▊                    | 11521/26288 [00:34<00:44, 330.36it/s]\u001b[A\n",
      " 45%|████████████████▏                   | 11777/26288 [00:35<00:44, 329.74it/s]\u001b[A\n",
      " 46%|████████████████▍                   | 12033/26288 [00:36<00:43, 326.31it/s]\u001b[A\n",
      " 47%|████████████████▊                   | 12289/26288 [00:37<00:38, 360.91it/s]\u001b[A\n",
      " 48%|█████████████████▏                  | 12545/26288 [00:37<00:36, 379.02it/s]\u001b[A\n",
      " 49%|█████████████████▌                  | 12801/26288 [00:38<00:36, 369.54it/s]\u001b[A\n",
      " 50%|█████████████████▉                  | 13057/26288 [00:39<00:37, 350.51it/s]\u001b[A\n",
      " 51%|██████████████████▏                 | 13313/26288 [00:39<00:34, 372.83it/s]\u001b[A\n",
      " 52%|██████████████████▌                 | 13569/26288 [00:40<00:35, 359.98it/s]\u001b[A\n",
      " 53%|██████████████████▉                 | 13825/26288 [00:41<00:38, 326.78it/s]\u001b[A\n",
      " 54%|███████████████████▎                | 14081/26288 [00:42<00:39, 308.25it/s]\u001b[A\n",
      " 55%|███████████████████▌                | 14327/26288 [00:42<00:28, 413.50it/s]\u001b[A\n",
      " 55%|███████████████████▊                | 14422/26288 [00:43<00:36, 326.14it/s]\u001b[A\n",
      " 56%|███████████████████▉                | 14593/26288 [00:43<00:37, 314.90it/s]\u001b[A\n",
      " 56%|████████████████████▎               | 14849/26288 [00:44<00:37, 303.68it/s]\u001b[A\n",
      " 57%|████████████████████▋               | 15105/26288 [00:45<00:34, 327.66it/s]\u001b[A\n",
      " 58%|█████████████████████               | 15361/26288 [00:45<00:29, 371.04it/s]\u001b[A\n",
      " 59%|█████████████████████▍              | 15617/26288 [00:46<00:28, 378.53it/s]\u001b[A\n",
      " 60%|█████████████████████▋              | 15873/26288 [00:47<00:28, 365.64it/s]\u001b[A\n",
      " 61%|██████████████████████              | 16129/26288 [00:47<00:26, 380.97it/s]\u001b[A\n",
      " 62%|██████████████████████▍             | 16385/26288 [00:48<00:26, 374.13it/s]\u001b[A\n",
      " 63%|██████████████████████▊             | 16641/26288 [00:49<00:24, 390.36it/s]\u001b[A\n",
      " 64%|███████████████████████▏            | 16897/26288 [00:50<00:26, 354.25it/s]\u001b[A\n",
      " 65%|███████████████████████▍            | 17141/26288 [00:50<00:19, 472.06it/s]\u001b[A\n",
      " 66%|███████████████████████▌            | 17241/26288 [00:50<00:22, 406.31it/s]\u001b[A\n",
      " 66%|███████████████████████▊            | 17409/26288 [00:51<00:23, 383.45it/s]\u001b[A\n",
      " 67%|████████████████████████▏           | 17665/26288 [00:51<00:22, 387.50it/s]\u001b[A\n",
      " 68%|████████████████████████▌           | 17921/26288 [00:52<00:21, 385.03it/s]\u001b[A\n",
      " 69%|████████████████████████▉           | 18177/26288 [00:53<00:23, 346.77it/s]\u001b[A\n",
      " 70%|█████████████████████████▏          | 18433/26288 [00:54<00:21, 358.03it/s]\u001b[A\n",
      " 71%|█████████████████████████▌          | 18689/26288 [00:54<00:20, 368.50it/s]\u001b[A\n",
      " 72%|█████████████████████████▉          | 18945/26288 [00:55<00:20, 357.44it/s]\u001b[A\n",
      " 73%|██████████████████████████▎         | 19201/26288 [00:56<00:18, 380.55it/s]\u001b[A\n",
      " 74%|██████████████████████████▋         | 19457/26288 [00:56<00:18, 369.69it/s]\u001b[A\n",
      " 75%|██████████████████████████▉         | 19713/26288 [00:57<00:16, 393.13it/s]\u001b[A\n",
      " 76%|███████████████████████████▎        | 19969/26288 [00:57<00:15, 406.65it/s]\u001b[A\n",
      " 77%|███████████████████████████▋        | 20225/26288 [00:58<00:14, 430.92it/s]\u001b[A\n",
      " 78%|████████████████████████████        | 20481/26288 [00:59<00:14, 394.13it/s]\u001b[A\n",
      " 79%|████████████████████████████▍       | 20737/26288 [01:00<00:15, 355.58it/s]\u001b[A\n",
      " 80%|████████████████████████████▋       | 20993/26288 [01:00<00:14, 357.55it/s]\u001b[A\n",
      " 81%|█████████████████████████████       | 21249/26288 [01:01<00:14, 348.20it/s]\u001b[A\n",
      " 82%|█████████████████████████████▍      | 21505/26288 [01:02<00:13, 361.42it/s]\u001b[A\n",
      " 83%|█████████████████████████████▊      | 21761/26288 [01:02<00:12, 365.96it/s]\u001b[A\n",
      " 84%|██████████████████████████████▏     | 22017/26288 [01:03<00:11, 380.35it/s]\u001b[A\n",
      " 85%|██████████████████████████████▌     | 22273/26288 [01:04<00:11, 336.49it/s]\u001b[A\n",
      " 86%|██████████████████████████████▊     | 22529/26288 [01:05<00:10, 365.28it/s]\u001b[A\n",
      " 87%|███████████████████████████████▏    | 22785/26288 [01:05<00:09, 377.70it/s]\u001b[A\n",
      " 88%|███████████████████████████████▌    | 23041/26288 [01:06<00:07, 411.83it/s]\u001b[A\n",
      " 89%|███████████████████████████████▉    | 23297/26288 [01:06<00:08, 366.95it/s]\u001b[A\n",
      " 90%|████████████████████████████████▎   | 23553/26288 [01:07<00:07, 345.86it/s]\u001b[A\n",
      " 91%|████████████████████████████████▌   | 23809/26288 [01:08<00:07, 312.62it/s]\u001b[A\n",
      " 92%|████████████████████████████████▉   | 24060/26288 [01:08<00:05, 421.75it/s]\u001b[A\n",
      " 92%|█████████████████████████████████   | 24158/26288 [01:09<00:06, 330.45it/s]\u001b[A\n",
      " 93%|█████████████████████████████████▎  | 24321/26288 [01:10<00:06, 287.58it/s]\u001b[A\n",
      " 93%|█████████████████████████████████▋  | 24577/26288 [01:10<00:04, 343.00it/s]\u001b[A\n",
      " 94%|██████████████████████████████████  | 24833/26288 [01:11<00:04, 331.67it/s]\u001b[A\n",
      " 95%|██████████████████████████████████▎ | 25089/26288 [01:12<00:03, 329.03it/s]\u001b[A\n",
      " 96%|██████████████████████████████████▋ | 25345/26288 [01:13<00:02, 341.33it/s]\u001b[A\n",
      " 97%|███████████████████████████████████ | 25601/26288 [01:13<00:01, 363.06it/s]\u001b[A\n",
      " 98%|███████████████████████████████████▍| 25857/26288 [01:14<00:01, 390.38it/s]\u001b[A\n",
      "100%|████████████████████████████████████| 26288/26288 [01:15<00:00, 349.63it/s]\u001b[A\n",
      " 85%|███████████████████████████     | 3013805/3565378 [07:39<10:29, 876.30it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "./data/preprocessed_data/romance/goodreads_reviews_romance_3000000.bin\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 98%|████████████████████████████▍| 3489538/3565378 [07:42<00:00, 156497.56it/s]\n",
      "  0%|                                                 | 0/28342 [00:00<?, ?it/s]\u001b[A\n",
      "  0%|                                       | 1/28342 [00:00<4:01:20,  1.96it/s]\u001b[A\n",
      "  1%|▎                                     | 257/28342 [00:01<01:35, 293.42it/s]\u001b[A\n",
      "  2%|▋                                     | 513/28342 [00:01<01:05, 423.75it/s]\u001b[A\n",
      "  3%|█                                     | 769/28342 [00:02<01:12, 381.74it/s]\u001b[A\n",
      "  4%|█▎                                   | 1025/28342 [00:03<01:29, 304.04it/s]\u001b[A\n",
      "  4%|█▋                                   | 1264/28342 [00:03<01:02, 434.90it/s]\u001b[A\n",
      "  5%|█▊                                   | 1368/28342 [00:04<01:22, 325.60it/s]\u001b[A\n",
      "  5%|██                                   | 1537/28342 [00:05<01:50, 242.05it/s]\u001b[A\n",
      "  6%|██▎                                  | 1763/28342 [00:05<01:14, 355.21it/s]\u001b[A\n",
      "  7%|██▍                                  | 1872/28342 [00:05<01:23, 315.77it/s]\u001b[A\n",
      "  7%|██▋                                  | 2049/28342 [00:06<01:22, 319.64it/s]\u001b[A\n",
      "  8%|███                                  | 2305/28342 [00:06<01:14, 350.05it/s]\u001b[A\n",
      "  9%|███▎                                 | 2561/28342 [00:07<01:09, 372.77it/s]\u001b[A\n",
      " 10%|███▋                                 | 2817/28342 [00:08<01:07, 377.84it/s]\u001b[A\n",
      " 11%|████                                 | 3073/28342 [00:08<01:02, 403.14it/s]\u001b[A\n",
      " 12%|████▎                                | 3329/28342 [00:09<01:01, 408.69it/s]\u001b[A\n",
      " 13%|████▋                                | 3585/28342 [00:10<01:02, 399.17it/s]\u001b[A\n",
      " 98%|████████████████████████████▍| 3489538/3565378 [07:53<00:00, 156497.56it/s]\u001b[A\n",
      " 14%|█████▎                               | 4097/28342 [00:11<01:06, 364.68it/s]\u001b[A\n",
      " 15%|█████▋                               | 4353/28342 [00:12<01:04, 369.39it/s]\u001b[A\n",
      " 16%|██████                               | 4609/28342 [00:12<01:04, 365.68it/s]\u001b[A\n",
      " 17%|██████▎                              | 4865/28342 [00:13<01:09, 340.02it/s]\u001b[A\n",
      " 18%|██████▋                              | 5121/28342 [00:14<01:04, 361.81it/s]\u001b[A\n",
      " 19%|███████                              | 5377/28342 [00:15<01:04, 355.95it/s]\u001b[A\n",
      " 20%|███████▎                             | 5633/28342 [00:15<01:01, 368.07it/s]\u001b[A\n",
      " 21%|███████▋                             | 5889/28342 [00:16<00:56, 400.75it/s]\u001b[A\n",
      " 22%|████████                             | 6145/28342 [00:17<01:03, 350.16it/s]\u001b[A\n",
      " 23%|████████▎                            | 6401/28342 [00:17<01:00, 360.39it/s]\u001b[A\n",
      " 23%|████████▋                            | 6657/28342 [00:18<01:00, 360.79it/s]\u001b[A\n",
      " 24%|█████████                            | 6913/28342 [00:19<01:05, 325.86it/s]\u001b[A\n",
      " 25%|█████████▎                           | 7169/28342 [00:20<01:01, 345.28it/s]\u001b[A\n",
      " 26%|█████████▋                           | 7425/28342 [00:20<00:59, 350.38it/s]\u001b[A\n",
      " 27%|██████████                           | 7681/28342 [00:21<01:04, 322.29it/s]\u001b[A\n",
      " 28%|██████████▎                          | 7937/28342 [00:22<00:59, 340.84it/s]\u001b[A\n",
      " 29%|██████████▋                          | 8193/28342 [00:23<01:05, 309.81it/s]\u001b[A\n",
      " 30%|██████████▉                          | 8397/28342 [00:23<00:50, 395.11it/s]\u001b[A\n",
      " 30%|███████████                          | 8481/28342 [00:24<00:59, 333.33it/s]\u001b[A\n",
      " 31%|███████████▎                         | 8705/28342 [00:24<01:02, 311.83it/s]\u001b[A\n",
      " 32%|███████████▋                         | 8961/28342 [00:25<00:56, 341.84it/s]\u001b[A\n",
      " 33%|████████████                         | 9217/28342 [00:26<00:52, 366.70it/s]\u001b[A\n",
      " 33%|████████████▎                        | 9473/28342 [00:26<00:47, 401.45it/s]\u001b[A\n",
      " 34%|████████████▋                        | 9729/28342 [00:27<00:48, 383.74it/s]\u001b[A\n",
      " 35%|█████████████                        | 9985/28342 [00:28<00:48, 376.38it/s]\u001b[A\n",
      " 36%|█████████████                       | 10241/28342 [00:28<00:46, 392.42it/s]\u001b[A\n",
      " 37%|█████████████▎                      | 10497/28342 [00:29<00:42, 415.41it/s]\u001b[A\n",
      " 38%|█████████████▋                      | 10753/28342 [00:29<00:40, 436.60it/s]\u001b[A\n",
      " 39%|█████████████▉                      | 11009/28342 [00:30<00:44, 387.05it/s]\u001b[A\n",
      " 40%|██████████████▎                     | 11265/28342 [00:31<00:42, 404.57it/s]\u001b[A\n",
      " 41%|██████████████▋                     | 11521/28342 [00:32<00:46, 364.68it/s]\u001b[A\n",
      " 42%|██████████████▉                     | 11777/28342 [00:32<00:44, 368.80it/s]\u001b[A\n",
      " 42%|███████████████▎                    | 12033/28342 [00:33<00:40, 403.90it/s]\u001b[A\n",
      " 43%|███████████████▌                    | 12289/28342 [00:33<00:36, 437.06it/s]\u001b[A\n",
      " 44%|███████████████▉                    | 12545/28342 [00:34<00:35, 442.45it/s]\u001b[A\n",
      " 45%|████████████████▎                   | 12801/28342 [00:34<00:34, 453.42it/s]\u001b[A\n",
      " 46%|████████████████▌                   | 13057/28342 [00:35<00:38, 401.04it/s]\u001b[A\n",
      " 47%|████████████████▉                   | 13313/28342 [00:36<00:35, 420.74it/s]\u001b[A\n",
      " 48%|█████████████████▏                  | 13569/28342 [00:36<00:34, 433.81it/s]\u001b[A\n",
      " 49%|█████████████████▌                  | 13825/28342 [00:37<00:37, 383.70it/s]\u001b[A\n",
      " 50%|█████████████████▉                  | 14079/28342 [00:37<00:27, 513.81it/s]\u001b[A\n",
      " 50%|██████████████████                  | 14184/28342 [00:38<00:39, 359.36it/s]\u001b[A\n",
      " 51%|██████████████████▏                 | 14337/28342 [00:39<00:50, 274.89it/s]\u001b[A\n",
      " 51%|██████████████████▌                 | 14593/28342 [00:40<00:49, 278.16it/s]\u001b[A\n",
      " 52%|██████████████████▊                 | 14849/28342 [00:41<00:48, 275.92it/s]\u001b[A\n",
      " 53%|███████████████████▏                | 15096/28342 [00:41<00:34, 385.36it/s]\u001b[A\n",
      " 54%|███████████████████▎                | 15200/28342 [00:41<00:40, 322.50it/s]\u001b[A\n",
      " 54%|███████████████████▌                | 15361/28342 [00:43<00:52, 247.21it/s]\u001b[A\n",
      " 55%|███████████████████▊                | 15606/28342 [00:43<00:34, 367.32it/s]\u001b[A\n",
      " 55%|███████████████████▉                | 15721/28342 [00:43<00:40, 310.95it/s]\u001b[A\n",
      " 56%|████████████████████▏               | 15873/28342 [00:44<00:38, 319.77it/s]\u001b[A\n",
      " 57%|████████████████████▍               | 16129/28342 [00:44<00:37, 322.80it/s]\u001b[A\n",
      " 58%|████████████████████▊               | 16385/28342 [00:45<00:32, 366.79it/s]\u001b[A\n",
      " 59%|█████████████████████▏              | 16641/28342 [00:46<00:30, 385.31it/s]\u001b[A\n",
      " 60%|█████████████████████▍              | 16897/28342 [00:46<00:29, 383.73it/s]\u001b[A\n",
      " 61%|█████████████████████▊              | 17153/28342 [00:47<00:28, 389.27it/s]\u001b[A\n",
      " 61%|██████████████████████              | 17409/28342 [00:48<00:33, 327.53it/s]\u001b[A\n",
      " 62%|██████████████████████▍             | 17654/28342 [00:48<00:24, 442.21it/s]\u001b[A\n",
      " 63%|██████████████████████▌             | 17755/28342 [00:49<00:29, 361.83it/s]\u001b[A\n",
      " 63%|██████████████████████▊             | 17921/28342 [00:49<00:30, 340.74it/s]\u001b[A\n",
      " 64%|███████████████████████             | 18177/28342 [00:50<00:27, 374.81it/s]\u001b[A\n",
      " 65%|███████████████████████▍            | 18433/28342 [00:50<00:26, 378.60it/s]\u001b[A\n",
      " 66%|███████████████████████▋            | 18689/28342 [00:51<00:23, 406.58it/s]\u001b[A\n",
      " 67%|████████████████████████            | 18945/28342 [00:52<00:27, 342.24it/s]\u001b[A\n",
      " 68%|████████████████████████▍           | 19195/28342 [00:52<00:19, 466.21it/s]\u001b[A\n",
      " 68%|████████████████████████▌           | 19300/28342 [00:53<00:25, 351.15it/s]\u001b[A\n",
      " 69%|████████████████████████▋           | 19457/28342 [00:53<00:26, 341.07it/s]\u001b[A\n",
      " 70%|█████████████████████████           | 19713/28342 [00:54<00:22, 382.60it/s]\u001b[A\n",
      " 70%|█████████████████████████▎          | 19969/28342 [00:54<00:19, 430.68it/s]\u001b[A\n",
      " 71%|█████████████████████████▋          | 20225/28342 [00:55<00:18, 438.88it/s]\u001b[A\n",
      " 72%|██████████████████████████          | 20481/28342 [00:56<00:22, 345.96it/s]\u001b[A\n",
      " 73%|██████████████████████████▎         | 20726/28342 [00:56<00:16, 468.16it/s]\u001b[A\n",
      " 73%|██████████████████████████▍         | 20831/28342 [00:57<00:20, 363.29it/s]\u001b[A\n",
      " 74%|██████████████████████████▋         | 20993/28342 [00:57<00:25, 287.14it/s]\u001b[A\n",
      " 75%|██████████████████████████▉         | 21222/28342 [00:58<00:17, 409.62it/s]\u001b[A\n",
      " 75%|███████████████████████████         | 21333/28342 [00:58<00:19, 351.10it/s]\u001b[A\n",
      " 76%|███████████████████████████▎        | 21505/28342 [00:59<00:19, 342.11it/s]\u001b[A\n",
      " 77%|███████████████████████████▋        | 21761/28342 [00:59<00:16, 397.69it/s]\u001b[A\n",
      " 78%|███████████████████████████▉        | 22017/28342 [01:00<00:13, 452.28it/s]\u001b[A\n",
      " 79%|████████████████████████████▎       | 22273/28342 [01:00<00:14, 413.66it/s]\u001b[A\n",
      " 79%|████████████████████████████▌       | 22529/28342 [01:01<00:13, 432.61it/s]\u001b[A\n",
      " 80%|████████████████████████████▉       | 22785/28342 [01:01<00:13, 423.62it/s]\u001b[A\n",
      " 81%|█████████████████████████████▎      | 23041/28342 [01:02<00:13, 400.44it/s]\u001b[A\n",
      " 82%|█████████████████████████████▌      | 23297/28342 [01:03<00:11, 425.59it/s]\u001b[A\n",
      " 83%|█████████████████████████████▉      | 23553/28342 [01:03<00:11, 408.94it/s]\u001b[A\n",
      " 84%|██████████████████████████████▏     | 23809/28342 [01:04<00:10, 419.90it/s]\u001b[A\n",
      " 85%|██████████████████████████████▌     | 24065/28342 [01:04<00:09, 431.97it/s]\u001b[A\n",
      " 86%|██████████████████████████████▉     | 24321/28342 [01:05<00:09, 443.69it/s]\u001b[A\n",
      " 87%|███████████████████████████████▏    | 24577/28342 [01:06<00:08, 458.05it/s]\u001b[A\n",
      " 88%|███████████████████████████████▌    | 24833/28342 [01:06<00:07, 492.60it/s]\u001b[A\n",
      " 89%|███████████████████████████████▊    | 25089/28342 [01:06<00:05, 551.93it/s]\u001b[A\n",
      " 89%|████████████████████████████████▏   | 25345/28342 [01:07<00:04, 599.72it/s]\u001b[A\n",
      " 90%|████████████████████████████████▌   | 25601/28342 [01:07<00:04, 613.67it/s]\u001b[A\n",
      " 91%|████████████████████████████████▊   | 25857/28342 [01:07<00:03, 630.85it/s]\u001b[A\n",
      " 92%|█████████████████████████████████▏  | 26113/28342 [01:08<00:03, 638.25it/s]\u001b[A\n",
      " 93%|█████████████████████████████████▍  | 26369/28342 [01:08<00:03, 611.21it/s]\u001b[A\n",
      " 94%|█████████████████████████████████▊  | 26625/28342 [01:09<00:02, 631.80it/s]\u001b[A\n",
      " 95%|██████████████████████████████████▏ | 26881/28342 [01:09<00:02, 661.93it/s]\u001b[A\n",
      " 96%|██████████████████████████████████▍ | 27137/28342 [01:09<00:01, 686.17it/s]\u001b[A\n",
      " 97%|██████████████████████████████████▊ | 27393/28342 [01:10<00:01, 663.27it/s]\u001b[A\n",
      " 98%|███████████████████████████████████ | 27649/28342 [01:10<00:01, 659.28it/s]\u001b[A\n",
      " 98%|███████████████████████████████████▍| 27905/28342 [01:10<00:00, 655.59it/s]\u001b[A\n",
      "100%|████████████████████████████████████| 28342/28342 [01:11<00:00, 397.44it/s]\u001b[A\n",
      " 99%|███████████████████████████████▌| 3513203/3565378 [08:55<00:56, 926.03it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "./data/preprocessed_data/romance/goodreads_reviews_romance_3500000.bin\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████████████████████████▉| 3557769/3565378 [08:55<00:02, 2739.27it/s]\n",
      "  0%|                                                  | 0/5154 [00:00<?, ?it/s]\u001b[A\n",
      "  0%|                                          | 1/5154 [00:00<30:29,  2.82it/s]\u001b[A\n",
      "  5%|█▉                                     | 257/5154 [00:00<00:11, 410.64it/s]\u001b[A\n",
      " 10%|███▉                                   | 513/5154 [00:01<00:09, 507.77it/s]\u001b[A\n",
      " 15%|█████▊                                 | 769/5154 [00:01<00:08, 499.50it/s]\u001b[A\n",
      " 20%|███████▌                              | 1025/5154 [00:02<00:08, 497.83it/s]\u001b[A\n",
      " 25%|█████████▍                            | 1281/5154 [00:02<00:07, 553.19it/s]\u001b[A\n",
      " 30%|███████████▎                          | 1537/5154 [00:03<00:06, 559.27it/s]\u001b[A\n",
      " 35%|█████████████▏                        | 1793/5154 [00:03<00:05, 579.93it/s]\u001b[A\n",
      " 40%|███████████████                       | 2049/5154 [00:03<00:05, 546.39it/s]\u001b[A\n",
      " 45%|████████████████▉                     | 2305/5154 [00:04<00:04, 594.73it/s]\u001b[A\n",
      " 50%|██████████████████▉                   | 2561/5154 [00:04<00:04, 626.26it/s]\u001b[A\n",
      " 55%|████████████████████▊                 | 2817/5154 [00:05<00:03, 630.31it/s]\u001b[A\n",
      " 60%|██████████████████████▋               | 3073/5154 [00:05<00:03, 605.24it/s]\u001b[A\n",
      " 65%|████████████████████████▌             | 3329/5154 [00:05<00:03, 604.36it/s]\u001b[A\n",
      " 70%|██████████████████████████▍           | 3585/5154 [00:06<00:02, 600.44it/s]\u001b[A\n",
      " 75%|████████████████████████████▎         | 3841/5154 [00:06<00:02, 573.58it/s]\u001b[A\n",
      " 79%|██████████████████████████████▏       | 4097/5154 [00:07<00:01, 542.65it/s]\u001b[A\n",
      " 84%|████████████████████████████████      | 4353/5154 [00:07<00:01, 548.08it/s]\u001b[A\n",
      " 89%|█████████████████████████████████▉    | 4609/5154 [00:08<00:00, 556.83it/s]\u001b[A\n",
      " 94%|███████████████████████████████████▊  | 4865/5154 [00:08<00:00, 604.71it/s]\u001b[A\n",
      "100%|██████████████████████████████████████| 5154/5154 [00:08<00:00, 588.57it/s]\u001b[A\n",
      "100%|███████████████████████████████| 3565378/3565378 [09:04<00:00, 6547.15it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "./data/preprocessed_data/romance/goodreads_reviews_romance_3565377.bin\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Removing all reviews with less than 20 chars and creating datastructure\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|█████████████████████████████| 2066193/2066193 [00:09<00:00, 222068.18it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Remove all books with fewer than 500 reviews\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████████████████████████| 297045/297045 [00:00<00:00, 3570345.43it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sampling 500 random reviews per book\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|███████████████████████████████████████| 270/270 [00:00<00:00, 5102.38it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Preprocessing all reviews now\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 24%|███████▏                      | 498842/2066193 [00:03<00:09, 160355.60it/s]\n",
      "  0%|                                                 | 0/33381 [00:00<?, ?it/s]\u001b[A\n",
      "  0%|                                       | 1/33381 [00:00<5:07:24,  1.81it/s]\u001b[A\n",
      "  1%|▎                                     | 257/33381 [00:01<02:16, 243.27it/s]\u001b[A\n",
      "  2%|▌                                     | 513/33381 [00:01<01:38, 333.67it/s]\u001b[A\n",
      "  2%|▉                                     | 769/33381 [00:02<01:25, 381.89it/s]\u001b[A\n",
      "  3%|█▏                                   | 1025/33381 [00:02<01:19, 405.58it/s]\u001b[A\n",
      "  4%|█▍                                   | 1281/33381 [00:03<01:13, 434.98it/s]\u001b[A\n",
      "  5%|█▋                                   | 1537/33381 [00:03<01:11, 446.55it/s]\u001b[A\n",
      "  5%|█▉                                   | 1793/33381 [00:04<01:10, 445.80it/s]\u001b[A\n",
      "  6%|██▎                                  | 2049/33381 [00:05<01:08, 460.29it/s]\u001b[A\n",
      "  7%|██▌                                  | 2305/33381 [00:05<01:05, 474.94it/s]\u001b[A\n",
      "  8%|██▊                                  | 2561/33381 [00:06<01:10, 434.21it/s]\u001b[A\n",
      "  8%|███                                  | 2817/33381 [00:06<01:06, 458.77it/s]\u001b[A\n",
      "  9%|███▍                                 | 3073/33381 [00:07<01:05, 464.32it/s]\u001b[A\n",
      " 10%|███▋                                 | 3329/33381 [00:07<01:04, 464.79it/s]\u001b[A\n",
      " 11%|███▉                                 | 3585/33381 [00:08<01:01, 485.91it/s]\u001b[A\n",
      " 12%|████▎                                | 3841/33381 [00:08<01:05, 447.95it/s]\u001b[A\n",
      " 12%|████▌                                | 4097/33381 [00:09<01:03, 464.37it/s]\u001b[A\n",
      " 13%|████▊                                | 4353/33381 [00:10<01:01, 472.24it/s]\u001b[A\n",
      " 14%|█████                                | 4609/33381 [00:10<01:04, 447.23it/s]\u001b[A\n",
      " 15%|█████▍                               | 4865/33381 [00:11<01:04, 439.64it/s]\u001b[A\n",
      " 15%|█████▋                               | 5121/33381 [00:11<01:01, 456.93it/s]\u001b[A\n",
      " 16%|█████▉                               | 5377/33381 [00:12<01:05, 430.15it/s]\u001b[A\n",
      " 17%|██████▏                              | 5633/33381 [00:13<01:05, 426.19it/s]\u001b[A\n",
      " 18%|██████▌                              | 5889/33381 [00:13<01:06, 414.88it/s]\u001b[A\n",
      " 18%|██████▊                              | 6145/33381 [00:14<01:01, 442.61it/s]\u001b[A\n",
      " 24%|███████▏                      | 498842/2066193 [00:18<00:09, 160355.60it/s]\u001b[A\n",
      " 20%|███████▍                             | 6657/33381 [00:15<00:58, 458.95it/s]\u001b[A\n",
      " 21%|███████▋                             | 6913/33381 [00:15<00:54, 481.90it/s]\u001b[A\n",
      " 21%|███████▉                             | 7169/33381 [00:16<00:53, 485.48it/s]\u001b[A\n",
      " 22%|████████▏                            | 7425/33381 [00:16<00:53, 488.50it/s]\u001b[A\n",
      " 23%|████████▌                            | 7681/33381 [00:17<00:54, 467.38it/s]\u001b[A\n",
      " 24%|████████▊                            | 7937/33381 [00:18<00:58, 433.95it/s]\u001b[A\n",
      " 25%|█████████                            | 8193/33381 [00:18<00:54, 464.12it/s]\u001b[A\n",
      " 25%|█████████▎                           | 8449/33381 [00:19<00:53, 467.32it/s]\u001b[A\n",
      " 26%|█████████▋                           | 8705/33381 [00:19<00:51, 477.93it/s]\u001b[A\n",
      " 27%|█████████▉                           | 8961/33381 [00:20<00:57, 427.22it/s]\u001b[A\n",
      " 28%|██████████▏                          | 9217/33381 [00:20<00:52, 457.21it/s]\u001b[A\n",
      " 28%|██████████▌                          | 9473/33381 [00:21<00:52, 452.73it/s]\u001b[A\n",
      " 29%|██████████▊                          | 9729/33381 [00:21<00:50, 471.17it/s]\u001b[A\n",
      " 30%|███████████                          | 9985/33381 [00:22<00:50, 466.03it/s]\u001b[A\n",
      " 31%|███████████                         | 10241/33381 [00:22<00:49, 468.92it/s]\u001b[A\n",
      " 31%|███████████▎                        | 10497/33381 [00:23<00:47, 477.28it/s]\u001b[A\n",
      " 32%|███████████▌                        | 10753/33381 [00:24<00:47, 472.65it/s]\u001b[A\n",
      " 33%|███████████▊                        | 11009/33381 [00:24<00:48, 459.99it/s]\u001b[A\n",
      " 34%|████████████▏                       | 11265/33381 [00:25<00:48, 454.61it/s]\u001b[A\n",
      " 35%|████████████▍                       | 11521/33381 [00:25<00:47, 462.90it/s]\u001b[A\n",
      " 35%|████████████▋                       | 11777/33381 [00:26<00:48, 444.69it/s]\u001b[A\n",
      " 36%|████████████▉                       | 12033/33381 [00:26<00:46, 456.66it/s]\u001b[A\n",
      " 37%|█████████████▎                      | 12289/33381 [00:27<00:49, 430.18it/s]\u001b[A\n",
      " 38%|█████████████▌                      | 12545/33381 [00:28<00:47, 436.08it/s]\u001b[A\n",
      " 38%|█████████████▊                      | 12801/33381 [00:28<00:50, 404.42it/s]\u001b[A\n",
      " 39%|██████████████                      | 13057/33381 [00:29<00:47, 430.65it/s]\u001b[A\n",
      " 40%|██████████████▎                     | 13313/33381 [00:29<00:43, 460.96it/s]\u001b[A\n",
      " 41%|██████████████▋                     | 13569/33381 [00:30<00:43, 458.01it/s]\u001b[A\n",
      " 41%|██████████████▉                     | 13825/33381 [00:30<00:42, 456.95it/s]\u001b[A\n",
      " 42%|███████████████▏                    | 14081/33381 [00:31<00:44, 432.96it/s]\u001b[A\n",
      " 43%|███████████████▍                    | 14337/33381 [00:32<00:44, 430.97it/s]\u001b[A\n",
      " 44%|███████████████▋                    | 14593/33381 [00:32<00:45, 415.27it/s]\u001b[A\n",
      " 44%|████████████████                    | 14849/33381 [00:33<00:41, 443.88it/s]\u001b[A\n",
      " 45%|████████████████▎                   | 15105/33381 [00:34<00:42, 432.12it/s]\u001b[A\n",
      " 46%|████████████████▌                   | 15361/33381 [00:34<00:41, 437.40it/s]\u001b[A\n",
      " 47%|████████████████▊                   | 15617/33381 [00:35<00:43, 409.31it/s]\u001b[A\n",
      " 48%|█████████████████                   | 15873/33381 [00:35<00:39, 443.24it/s]\u001b[A\n",
      " 48%|█████████████████▍                  | 16129/33381 [00:36<00:39, 436.32it/s]\u001b[A\n",
      " 49%|█████████████████▋                  | 16385/33381 [00:37<00:40, 418.07it/s]\u001b[A\n",
      " 50%|█████████████████▉                  | 16641/33381 [00:37<00:39, 428.42it/s]\u001b[A\n",
      " 51%|██████████████████▏                 | 16897/33381 [00:38<00:38, 427.22it/s]\u001b[A\n",
      " 51%|██████████████████▍                 | 17153/33381 [00:38<00:37, 438.36it/s]\u001b[A\n",
      " 52%|██████████████████▊                 | 17409/33381 [00:39<00:36, 437.61it/s]\u001b[A\n",
      " 53%|███████████████████                 | 17665/33381 [00:40<00:37, 418.44it/s]\u001b[A\n",
      " 54%|███████████████████▎                | 17921/33381 [00:40<00:39, 391.89it/s]\u001b[A\n",
      " 54%|███████████████████▌                | 18177/33381 [00:41<00:36, 411.53it/s]\u001b[A\n",
      " 55%|███████████████████▉                | 18433/33381 [00:41<00:36, 414.81it/s]\u001b[A\n",
      " 56%|████████████████████▏               | 18689/33381 [00:42<00:34, 426.72it/s]\u001b[A\n",
      " 57%|████████████████████▍               | 18945/33381 [00:43<00:34, 422.38it/s]\u001b[A\n",
      " 58%|████████████████████▋               | 19201/33381 [00:43<00:31, 444.85it/s]\u001b[A\n",
      " 58%|████████████████████▉               | 19457/33381 [00:44<00:32, 432.96it/s]\u001b[A\n",
      " 59%|█████████████████████▎              | 19713/33381 [00:44<00:32, 426.87it/s]\u001b[A\n",
      " 60%|█████████████████████▌              | 19969/33381 [00:45<00:32, 412.31it/s]\u001b[A\n",
      " 61%|█████████████████████▊              | 20225/33381 [00:46<00:31, 422.85it/s]\u001b[A\n",
      " 61%|██████████████████████              | 20481/33381 [00:46<00:31, 412.74it/s]\u001b[A\n",
      " 62%|██████████████████████▎             | 20737/33381 [00:47<00:28, 449.52it/s]\u001b[A\n",
      " 63%|██████████████████████▋             | 20993/33381 [00:47<00:30, 406.90it/s]\u001b[A\n",
      " 64%|██████████████████████▉             | 21249/33381 [00:48<00:28, 429.29it/s]\u001b[A\n",
      " 64%|███████████████████████▏            | 21505/33381 [00:49<00:27, 433.16it/s]\u001b[A\n",
      " 65%|███████████████████████▍            | 21761/33381 [00:49<00:28, 407.59it/s]\u001b[A\n",
      " 66%|███████████████████████▋            | 22017/33381 [00:50<00:30, 369.42it/s]\u001b[A\n",
      " 67%|████████████████████████            | 22259/33381 [00:50<00:22, 488.81it/s]\u001b[A\n",
      " 67%|████████████████████████            | 22359/33381 [00:51<00:26, 408.57it/s]\u001b[A\n",
      " 67%|████████████████████████▎           | 22529/33381 [00:51<00:30, 360.65it/s]\u001b[A\n",
      " 68%|████████████████████████▌           | 22785/33381 [00:52<00:28, 365.82it/s]\u001b[A\n",
      " 69%|████████████████████████▊           | 23041/33381 [00:53<00:28, 360.05it/s]\u001b[A\n",
      " 70%|█████████████████████████           | 23297/33381 [00:54<00:28, 357.17it/s]\u001b[A\n",
      " 71%|█████████████████████████▍          | 23553/33381 [00:54<00:25, 389.38it/s]\u001b[A\n",
      " 71%|█████████████████████████▋          | 23809/33381 [00:55<00:22, 418.86it/s]\u001b[A\n",
      " 72%|█████████████████████████▉          | 24065/33381 [00:55<00:25, 365.00it/s]\u001b[A\n",
      " 73%|██████████████████████████▏         | 24321/33381 [00:56<00:23, 391.43it/s]\u001b[A\n",
      " 74%|██████████████████████████▌         | 24577/33381 [00:57<00:21, 411.37it/s]\u001b[A\n",
      " 74%|██████████████████████████▊         | 24833/33381 [00:57<00:20, 423.72it/s]\u001b[A\n",
      " 75%|███████████████████████████         | 25089/33381 [00:58<00:19, 436.31it/s]\u001b[A\n",
      " 76%|███████████████████████████▎        | 25345/33381 [00:58<00:17, 461.96it/s]\u001b[A\n",
      " 77%|███████████████████████████▌        | 25601/33381 [00:59<00:16, 465.26it/s]\u001b[A\n",
      " 77%|███████████████████████████▉        | 25857/33381 [00:59<00:15, 471.54it/s]\u001b[A\n",
      " 78%|████████████████████████████▏       | 26113/33381 [01:00<00:15, 460.20it/s]\u001b[A\n",
      " 79%|████████████████████████████▍       | 26369/33381 [01:00<00:15, 443.46it/s]\u001b[A\n",
      " 80%|████████████████████████████▋       | 26625/33381 [01:01<00:15, 436.84it/s]\u001b[A\n",
      " 81%|████████████████████████████▉       | 26881/33381 [01:02<00:17, 364.15it/s]\u001b[A\n",
      " 81%|█████████████████████████████▎      | 27137/33381 [01:03<00:15, 393.75it/s]\u001b[A\n",
      " 82%|█████████████████████████████▌      | 27393/33381 [01:03<00:15, 395.73it/s]\u001b[A\n",
      " 83%|█████████████████████████████▊      | 27649/33381 [01:04<00:15, 380.74it/s]\u001b[A\n",
      " 84%|██████████████████████████████      | 27905/33381 [01:04<00:13, 407.20it/s]\u001b[A\n",
      " 84%|██████████████████████████████▎     | 28161/33381 [01:05<00:12, 419.93it/s]\u001b[A\n",
      " 85%|██████████████████████████████▋     | 28417/33381 [01:06<00:11, 430.34it/s]\u001b[A\n",
      " 86%|██████████████████████████████▉     | 28673/33381 [01:06<00:10, 430.69it/s]\u001b[A\n",
      " 87%|███████████████████████████████▏    | 28929/33381 [01:07<00:10, 430.67it/s]\u001b[A\n",
      " 87%|███████████████████████████████▍    | 29185/33381 [01:07<00:09, 435.76it/s]\u001b[A\n",
      " 88%|███████████████████████████████▊    | 29441/33381 [01:08<00:09, 432.92it/s]\u001b[A\n",
      " 89%|████████████████████████████████    | 29697/33381 [01:08<00:08, 437.73it/s]\u001b[A\n",
      " 90%|████████████████████████████████▎   | 29953/33381 [01:09<00:07, 434.14it/s]\u001b[A\n",
      " 90%|████████████████████████████████▌   | 30209/33381 [01:10<00:06, 456.28it/s]\u001b[A\n",
      " 91%|████████████████████████████████▊   | 30465/33381 [01:10<00:06, 455.21it/s]\u001b[A\n",
      " 92%|█████████████████████████████████▏  | 30721/33381 [01:11<00:05, 481.89it/s]\u001b[A\n",
      " 93%|█████████████████████████████████▍  | 30977/33381 [01:11<00:05, 418.53it/s]\u001b[A\n",
      " 94%|█████████████████████████████████▋  | 31233/33381 [01:12<00:04, 439.35it/s]\u001b[A\n",
      " 94%|█████████████████████████████████▉  | 31489/33381 [01:13<00:04, 421.88it/s]\u001b[A\n",
      " 95%|██████████████████████████████████▏ | 31745/33381 [01:13<00:04, 396.01it/s]\u001b[A\n",
      " 96%|██████████████████████████████████▌ | 32001/33381 [01:14<00:03, 373.01it/s]\u001b[A\n",
      " 97%|██████████████████████████████████▊ | 32257/33381 [01:15<00:02, 409.77it/s]\u001b[A\n",
      " 97%|███████████████████████████████████ | 32513/33381 [01:15<00:02, 404.72it/s]\u001b[A\n",
      " 98%|███████████████████████████████████▎| 32769/33381 [01:16<00:01, 421.00it/s]\u001b[A\n",
      " 99%|███████████████████████████████████▌| 33025/33381 [01:16<00:00, 439.55it/s]\u001b[A\n",
      "100%|████████████████████████████████████| 33381/33381 [01:17<00:00, 432.81it/s]\u001b[A\n",
      " 25%|████████▏                        | 514662/2066193 [01:21<34:11, 756.25it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "./data/preprocessed_data/history_biography/goodreads_reviews_history_biography_500000.bin\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 48%|██████████████▍               | 998150/2066193 [01:25<00:07, 149457.57it/s]\n",
      "  0%|                                                 | 0/31701 [00:00<?, ?it/s]\u001b[A\n",
      "  0%|                                       | 1/31701 [00:00<4:49:31,  1.82it/s]\u001b[A\n",
      "  1%|▎                                     | 257/31701 [00:01<01:53, 276.78it/s]\u001b[A\n",
      "  2%|▌                                     | 513/31701 [00:01<01:25, 364.91it/s]\u001b[A\n",
      "  2%|▉                                     | 769/31701 [00:02<01:17, 396.96it/s]\u001b[A\n",
      "  3%|█▏                                   | 1025/31701 [00:02<01:13, 417.69it/s]\u001b[A\n",
      "  4%|█▍                                   | 1281/31701 [00:03<01:04, 468.30it/s]\u001b[A\n",
      "  5%|█▊                                   | 1537/31701 [00:03<01:03, 476.61it/s]\u001b[A\n",
      "  6%|██                                   | 1793/31701 [00:04<01:02, 476.78it/s]\u001b[A\n",
      "  6%|██▍                                  | 2049/31701 [00:04<01:05, 453.66it/s]\u001b[A\n",
      "  7%|██▋                                  | 2305/31701 [00:05<01:07, 436.32it/s]\u001b[A\n",
      "  8%|██▉                                  | 2561/31701 [00:06<01:06, 436.89it/s]\u001b[A\n",
      "  9%|███▎                                 | 2817/31701 [00:06<01:07, 427.90it/s]\u001b[A\n",
      " 10%|███▌                                 | 3073/31701 [00:07<01:03, 450.88it/s]\u001b[A\n",
      " 11%|███▉                                 | 3329/31701 [00:07<01:06, 423.71it/s]\u001b[A\n",
      " 11%|████▏                                | 3585/31701 [00:08<01:05, 432.19it/s]\u001b[A\n",
      " 12%|████▍                                | 3841/31701 [00:08<00:59, 469.11it/s]\u001b[A\n",
      " 13%|████▊                                | 4097/31701 [00:09<00:57, 480.26it/s]\u001b[A\n",
      " 14%|█████                                | 4353/31701 [00:10<01:01, 444.12it/s]\u001b[A\n",
      " 15%|█████▍                               | 4609/31701 [00:10<00:58, 463.25it/s]\u001b[A\n",
      " 15%|█████▋                               | 4865/31701 [00:11<00:55, 482.44it/s]\u001b[A\n",
      " 16%|█████▉                               | 5121/31701 [00:11<00:52, 509.95it/s]\u001b[A\n",
      " 17%|██████▎                              | 5377/31701 [00:12<00:54, 483.66it/s]\u001b[A\n",
      " 48%|██████████████▍               | 998150/2066193 [01:38<00:07, 149457.57it/s]\u001b[A\n",
      " 19%|██████▊                              | 5889/31701 [00:13<00:54, 469.38it/s]\u001b[A\n",
      " 19%|███████▏                             | 6145/31701 [00:13<00:58, 435.36it/s]\u001b[A\n",
      " 20%|███████▍                             | 6401/31701 [00:14<00:57, 437.53it/s]\u001b[A\n",
      " 21%|███████▊                             | 6657/31701 [00:15<00:57, 437.03it/s]\u001b[A\n",
      " 22%|████████                             | 6913/31701 [00:15<00:56, 442.03it/s]\u001b[A\n",
      " 23%|████████▎                            | 7169/31701 [00:16<00:55, 444.36it/s]\u001b[A\n",
      " 23%|████████▋                            | 7425/31701 [00:16<00:56, 429.44it/s]\u001b[A\n",
      " 24%|████████▉                            | 7681/31701 [00:17<00:53, 448.74it/s]\u001b[A\n",
      " 25%|█████████▎                           | 7937/31701 [00:18<00:55, 424.91it/s]\u001b[A\n",
      " 26%|█████████▌                           | 8193/31701 [00:18<00:54, 434.03it/s]\u001b[A\n",
      " 27%|█████████▊                           | 8449/31701 [00:19<00:51, 453.19it/s]\u001b[A\n",
      " 27%|██████████▏                          | 8705/31701 [00:19<00:47, 480.10it/s]\u001b[A\n",
      " 28%|██████████▍                          | 8961/31701 [00:20<00:47, 480.46it/s]\u001b[A\n",
      " 29%|██████████▊                          | 9217/31701 [00:20<00:47, 473.41it/s]\u001b[A\n",
      " 30%|███████████                          | 9473/31701 [00:21<00:48, 460.35it/s]\u001b[A\n",
      " 31%|███████████▎                         | 9729/31701 [00:21<00:47, 464.11it/s]\u001b[A\n",
      " 31%|███████████▋                         | 9985/31701 [00:22<00:48, 445.23it/s]\u001b[A\n",
      " 32%|███████████▋                        | 10241/31701 [00:22<00:48, 440.25it/s]\u001b[A\n",
      " 33%|███████████▉                        | 10497/31701 [00:23<00:48, 434.73it/s]\u001b[A\n",
      " 34%|████████████▏                       | 10753/31701 [00:24<00:50, 416.01it/s]\u001b[A\n",
      " 35%|████████████▌                       | 11009/31701 [00:24<00:46, 447.71it/s]\u001b[A\n",
      " 36%|████████████▊                       | 11265/31701 [00:25<00:43, 472.39it/s]\u001b[A\n",
      " 36%|█████████████                       | 11521/31701 [00:25<00:43, 462.40it/s]\u001b[A\n",
      " 37%|█████████████▎                      | 11777/31701 [00:26<00:43, 455.91it/s]\u001b[A\n",
      " 38%|█████████████▋                      | 12033/31701 [00:26<00:41, 471.60it/s]\u001b[A\n",
      " 39%|█████████████▉                      | 12289/31701 [00:27<00:41, 471.72it/s]\u001b[A\n",
      " 40%|██████████████▏                     | 12545/31701 [00:27<00:41, 465.46it/s]\u001b[A\n",
      " 40%|██████████████▌                     | 12801/31701 [00:28<00:43, 435.41it/s]\u001b[A\n",
      " 41%|██████████████▊                     | 13057/31701 [00:29<00:43, 427.90it/s]\u001b[A\n",
      " 42%|███████████████                     | 13313/31701 [00:29<00:44, 413.76it/s]\u001b[A\n",
      " 43%|███████████████▍                    | 13569/31701 [00:30<00:43, 419.00it/s]\u001b[A\n",
      " 44%|███████████████▋                    | 13825/31701 [00:31<00:42, 416.64it/s]\u001b[A\n",
      " 44%|███████████████▉                    | 14081/31701 [00:31<00:42, 417.59it/s]\u001b[A\n",
      " 45%|████████████████▎                   | 14337/31701 [00:32<00:39, 436.86it/s]\u001b[A\n",
      " 46%|████████████████▌                   | 14593/31701 [00:32<00:40, 423.01it/s]\u001b[A\n",
      " 47%|████████████████▊                   | 14849/31701 [00:33<00:39, 424.85it/s]\u001b[A\n",
      " 48%|█████████████████▏                  | 15105/31701 [00:34<00:36, 448.54it/s]\u001b[A\n",
      " 48%|█████████████████▍                  | 15361/31701 [00:34<00:34, 474.18it/s]\u001b[A\n",
      " 49%|█████████████████▋                  | 15617/31701 [00:35<00:33, 482.89it/s]\u001b[A\n",
      " 50%|██████████████████                  | 15873/31701 [00:35<00:30, 515.23it/s]\u001b[A\n",
      " 51%|██████████████████▎                 | 16129/31701 [00:35<00:31, 499.81it/s]\u001b[A\n",
      " 52%|██████████████████▌                 | 16385/31701 [00:36<00:30, 499.91it/s]\u001b[A\n",
      " 52%|██████████████████▉                 | 16641/31701 [00:37<00:30, 499.88it/s]\u001b[A\n",
      " 53%|███████████████████▏                | 16897/31701 [00:37<00:32, 462.43it/s]\u001b[A\n",
      " 54%|███████████████████▍                | 17153/31701 [00:38<00:29, 486.48it/s]\u001b[A\n",
      " 55%|███████████████████▊                | 17409/31701 [00:38<00:33, 426.08it/s]\u001b[A\n",
      " 56%|████████████████████                | 17665/31701 [00:39<00:30, 458.13it/s]\u001b[A\n",
      " 57%|████████████████████▎               | 17921/31701 [00:39<00:30, 446.39it/s]\u001b[A\n",
      " 57%|████████████████████▋               | 18177/31701 [00:40<00:29, 456.02it/s]\u001b[A\n",
      " 58%|████████████████████▉               | 18433/31701 [00:41<00:31, 424.32it/s]\u001b[A\n",
      " 59%|█████████████████████▏              | 18689/31701 [00:41<00:30, 429.04it/s]\u001b[A\n",
      " 60%|█████████████████████▌              | 18945/31701 [00:42<00:27, 461.25it/s]\u001b[A\n",
      " 61%|█████████████████████▊              | 19201/31701 [00:43<00:30, 414.19it/s]\u001b[A\n",
      " 61%|██████████████████████              | 19457/31701 [00:43<00:30, 395.56it/s]\u001b[A\n",
      " 62%|██████████████████████▍             | 19713/31701 [00:44<00:29, 406.33it/s]\u001b[A\n",
      " 63%|██████████████████████▋             | 19969/31701 [00:44<00:27, 428.93it/s]\u001b[A\n",
      " 64%|██████████████████████▉             | 20225/31701 [00:45<00:25, 450.23it/s]\u001b[A\n",
      " 65%|███████████████████████▎            | 20481/31701 [00:45<00:24, 454.56it/s]\u001b[A\n",
      " 65%|███████████████████████▌            | 20737/31701 [00:46<00:26, 414.99it/s]\u001b[A\n",
      " 66%|███████████████████████▊            | 20993/31701 [00:47<00:24, 434.75it/s]\u001b[A\n",
      " 67%|████████████████████████▏           | 21249/31701 [00:47<00:23, 438.86it/s]\u001b[A\n",
      " 68%|████████████████████████▍           | 21505/31701 [00:48<00:23, 426.07it/s]\u001b[A\n",
      " 69%|████████████████████████▋           | 21761/31701 [00:49<00:24, 404.71it/s]\u001b[A\n",
      " 69%|█████████████████████████           | 22017/31701 [00:49<00:22, 431.52it/s]\u001b[A\n",
      " 70%|█████████████████████████▎          | 22273/31701 [00:50<00:20, 454.33it/s]\u001b[A\n",
      " 71%|█████████████████████████▌          | 22529/31701 [00:50<00:21, 420.83it/s]\u001b[A\n",
      " 72%|█████████████████████████▊          | 22785/31701 [00:51<00:20, 440.55it/s]\u001b[A\n",
      " 73%|██████████████████████████▏         | 23041/31701 [00:51<00:19, 445.57it/s]\u001b[A\n",
      " 73%|██████████████████████████▍         | 23297/31701 [00:52<00:19, 436.47it/s]\u001b[A\n",
      " 74%|██████████████████████████▋         | 23553/31701 [00:52<00:18, 450.60it/s]\u001b[A\n",
      " 75%|███████████████████████████         | 23809/31701 [00:53<00:17, 452.78it/s]\u001b[A\n",
      " 76%|███████████████████████████▎        | 24065/31701 [00:54<00:16, 463.36it/s]\u001b[A\n",
      " 77%|███████████████████████████▌        | 24321/31701 [00:54<00:15, 471.56it/s]\u001b[A\n",
      " 78%|███████████████████████████▉        | 24577/31701 [00:55<00:14, 487.49it/s]\u001b[A\n",
      " 78%|████████████████████████████▏       | 24833/31701 [00:55<00:14, 475.01it/s]\u001b[A\n",
      " 79%|████████████████████████████▍       | 25089/31701 [00:56<00:14, 467.87it/s]\u001b[A\n",
      " 80%|████████████████████████████▊       | 25345/31701 [00:56<00:15, 422.89it/s]\u001b[A\n",
      " 81%|█████████████████████████████       | 25601/31701 [00:57<00:14, 418.75it/s]\u001b[A\n",
      " 82%|█████████████████████████████▎      | 25857/31701 [00:58<00:13, 419.48it/s]\u001b[A\n",
      " 82%|█████████████████████████████▋      | 26113/31701 [00:58<00:13, 402.29it/s]\u001b[A\n",
      " 83%|█████████████████████████████▉      | 26369/31701 [00:59<00:13, 402.62it/s]\u001b[A\n",
      " 84%|██████████████████████████████▏     | 26625/31701 [01:00<00:11, 424.20it/s]\u001b[A\n",
      " 85%|██████████████████████████████▌     | 26881/31701 [01:00<00:10, 443.08it/s]\u001b[A\n",
      " 86%|██████████████████████████████▊     | 27137/31701 [01:01<00:10, 445.99it/s]\u001b[A\n",
      " 86%|███████████████████████████████     | 27393/31701 [01:01<00:09, 458.22it/s]\u001b[A\n",
      " 87%|███████████████████████████████▍    | 27649/31701 [01:02<00:08, 472.80it/s]\u001b[A\n",
      " 88%|███████████████████████████████▋    | 27905/31701 [01:02<00:08, 454.38it/s]\u001b[A\n",
      " 89%|███████████████████████████████▉    | 28161/31701 [01:03<00:08, 398.72it/s]\u001b[A\n",
      " 90%|████████████████████████████████▎   | 28417/31701 [01:04<00:07, 417.33it/s]\u001b[A\n",
      " 90%|████████████████████████████████▌   | 28673/31701 [01:04<00:06, 439.58it/s]\u001b[A\n",
      " 91%|████████████████████████████████▊   | 28929/31701 [01:05<00:06, 443.65it/s]\u001b[A\n",
      " 92%|█████████████████████████████████▏  | 29185/31701 [01:05<00:05, 457.00it/s]\u001b[A\n",
      " 93%|█████████████████████████████████▍  | 29441/31701 [01:06<00:05, 428.34it/s]\u001b[A\n",
      " 94%|█████████████████████████████████▋  | 29697/31701 [01:06<00:04, 440.04it/s]\u001b[A\n",
      " 94%|██████████████████████████████████  | 29953/31701 [01:07<00:03, 445.04it/s]\u001b[A\n",
      " 95%|██████████████████████████████████▎ | 30209/31701 [01:08<00:03, 434.61it/s]\u001b[A\n",
      " 96%|██████████████████████████████████▌ | 30465/31701 [01:08<00:02, 422.23it/s]\u001b[A\n",
      " 97%|██████████████████████████████████▉ | 30721/31701 [01:09<00:02, 426.09it/s]\u001b[A\n",
      " 98%|███████████████████████████████████▏| 30977/31701 [01:10<00:01, 395.22it/s]\u001b[A\n",
      " 99%|███████████████████████████████████▍| 31233/31701 [01:10<00:01, 416.64it/s]\u001b[A\n",
      "100%|████████████████████████████████████| 31701/31701 [01:11<00:00, 445.47it/s]\u001b[A\n",
      " 49%|███████████████▋                | 1014632/2066193 [02:37<21:53, 800.65it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "./data/preprocessed_data/history_biography/goodreads_reviews_history_biography_1000000.bin\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 72%|████████████████████▊        | 1485151/2066193 [02:40<00:03, 159649.82it/s]\n",
      "  0%|                                                 | 0/31225 [00:00<?, ?it/s]\u001b[A\n",
      "  0%|                                       | 1/31225 [00:00<4:48:39,  1.80it/s]\u001b[A\n",
      "  1%|▎                                     | 257/31225 [00:01<01:53, 271.83it/s]\u001b[A\n",
      "  2%|▌                                     | 513/31225 [00:01<01:32, 332.45it/s]\u001b[A\n",
      "  2%|▉                                     | 769/31225 [00:02<01:18, 387.10it/s]\u001b[A\n",
      "  3%|█▏                                   | 1025/31225 [00:02<01:17, 389.58it/s]\u001b[A\n",
      "  4%|█▌                                   | 1281/31225 [00:03<01:12, 411.10it/s]\u001b[A\n",
      "  5%|█▊                                   | 1537/31225 [00:04<01:13, 406.32it/s]\u001b[A\n",
      "  6%|██                                   | 1793/31225 [00:04<01:09, 422.61it/s]\u001b[A\n",
      "  7%|██▍                                  | 2049/31225 [00:05<01:05, 447.96it/s]\u001b[A\n",
      "  7%|██▋                                  | 2305/31225 [00:05<01:03, 452.41it/s]\u001b[A\n",
      "  8%|███                                  | 2561/31225 [00:06<00:59, 479.54it/s]\u001b[A\n",
      "  9%|███▎                                 | 2817/31225 [00:06<00:59, 475.46it/s]\u001b[A\n",
      " 10%|███▋                                 | 3073/31225 [00:07<01:05, 432.91it/s]\u001b[A\n",
      " 11%|███▉                                 | 3329/31225 [00:08<01:04, 434.72it/s]\u001b[A\n",
      " 11%|████▏                                | 3585/31225 [00:08<01:03, 435.97it/s]\u001b[A\n",
      " 12%|████▌                                | 3841/31225 [00:09<01:04, 423.39it/s]\u001b[A\n",
      " 13%|████▊                                | 4097/31225 [00:09<01:01, 443.71it/s]\u001b[A\n",
      " 14%|█████▏                               | 4353/31225 [00:10<00:57, 468.47it/s]\u001b[A\n",
      " 15%|█████▍                               | 4609/31225 [00:10<00:57, 464.82it/s]\u001b[A\n",
      " 16%|█████▊                               | 4865/31225 [00:11<00:56, 468.06it/s]\u001b[A\n",
      " 16%|██████                               | 5121/31225 [00:11<00:57, 452.72it/s]\u001b[A\n",
      " 17%|██████▎                              | 5377/31225 [00:12<00:55, 463.43it/s]\u001b[A\n",
      " 18%|██████▋                              | 5633/31225 [00:12<00:53, 479.50it/s]\u001b[A\n",
      " 19%|██████▉                              | 5889/31225 [00:13<00:55, 452.93it/s]\u001b[A\n",
      " 20%|███████▎                             | 6145/31225 [00:14<00:53, 470.98it/s]\u001b[A\n",
      " 20%|███████▌                             | 6401/31225 [00:14<00:56, 436.15it/s]\u001b[A\n",
      " 21%|███████▉                             | 6657/31225 [00:15<00:51, 473.86it/s]\u001b[A\n",
      " 22%|████████▏                            | 6913/31225 [00:15<00:50, 486.02it/s]\u001b[A\n",
      " 23%|████████▍                            | 7169/31225 [00:16<00:49, 482.51it/s]\u001b[A\n",
      " 72%|████████████████████▊        | 1485151/2066193 [02:58<00:03, 159649.82it/s]\u001b[A\n",
      " 25%|█████████                            | 7681/31225 [00:17<00:50, 461.83it/s]\u001b[A\n",
      " 25%|█████████▍                           | 7937/31225 [00:18<00:55, 421.94it/s]\u001b[A\n",
      " 26%|█████████▋                           | 8193/31225 [00:18<00:55, 417.81it/s]\u001b[A\n",
      " 27%|██████████                           | 8449/31225 [00:19<00:50, 447.91it/s]\u001b[A\n",
      " 28%|██████████▎                          | 8705/31225 [00:19<00:50, 446.92it/s]\u001b[A\n",
      " 29%|██████████▌                          | 8961/31225 [00:20<00:48, 456.70it/s]\u001b[A\n",
      " 30%|██████████▉                          | 9217/31225 [00:20<00:49, 448.39it/s]\u001b[A\n",
      " 30%|███████████▏                         | 9473/31225 [00:21<00:46, 469.12it/s]\u001b[A\n",
      " 31%|███████████▌                         | 9729/31225 [00:22<00:45, 469.72it/s]\u001b[A\n",
      " 32%|███████████▊                         | 9985/31225 [00:22<00:45, 468.64it/s]\u001b[A\n",
      " 33%|███████████▊                        | 10241/31225 [00:23<00:45, 459.60it/s]\u001b[A\n",
      " 34%|████████████                        | 10497/31225 [00:23<00:45, 457.22it/s]\u001b[A\n",
      " 34%|████████████▍                       | 10753/31225 [00:24<00:46, 441.64it/s]\u001b[A\n",
      " 35%|████████████▋                       | 11009/31225 [00:24<00:43, 459.73it/s]\u001b[A\n",
      " 36%|████████████▉                       | 11265/31225 [00:25<00:42, 471.66it/s]\u001b[A\n",
      " 37%|█████████████▎                      | 11521/31225 [00:25<00:42, 468.23it/s]\u001b[A\n",
      " 38%|█████████████▌                      | 11777/31225 [00:26<00:43, 448.83it/s]\u001b[A\n",
      " 39%|█████████████▊                      | 12033/31225 [00:27<00:43, 443.28it/s]\u001b[A\n",
      " 39%|██████████████▏                     | 12289/31225 [00:27<00:46, 405.13it/s]\u001b[A\n",
      " 40%|██████████████▍                     | 12545/31225 [00:28<00:45, 409.71it/s]\u001b[A\n",
      " 41%|██████████████▊                     | 12801/31225 [00:29<00:42, 431.27it/s]\u001b[A\n",
      " 42%|███████████████                     | 13057/31225 [00:29<00:40, 445.81it/s]\u001b[A\n",
      " 43%|███████████████▎                    | 13313/31225 [00:30<00:41, 429.72it/s]\u001b[A\n",
      " 43%|███████████████▋                    | 13569/31225 [00:30<00:39, 443.25it/s]\u001b[A\n",
      " 44%|███████████████▉                    | 13825/31225 [00:31<00:40, 434.50it/s]\u001b[A\n",
      " 45%|████████████████▏                   | 14081/31225 [00:32<00:41, 415.95it/s]\u001b[A\n",
      " 46%|████████████████▌                   | 14337/31225 [00:32<00:39, 427.16it/s]\u001b[A\n",
      " 47%|████████████████▊                   | 14593/31225 [00:33<00:37, 445.69it/s]\u001b[A\n",
      " 48%|█████████████████                   | 14849/31225 [00:33<00:39, 417.76it/s]\u001b[A\n",
      " 48%|█████████████████▍                  | 15105/31225 [00:34<00:41, 388.07it/s]\u001b[A\n",
      " 49%|█████████████████▋                  | 15361/31225 [00:35<00:39, 397.21it/s]\u001b[A\n",
      " 50%|██████████████████                  | 15617/31225 [00:35<00:37, 418.61it/s]\u001b[A\n",
      " 51%|██████████████████▎                 | 15873/31225 [00:36<00:37, 411.44it/s]\u001b[A\n",
      " 52%|██████████████████▌                 | 16129/31225 [00:36<00:35, 427.91it/s]\u001b[A\n",
      " 52%|██████████████████▉                 | 16385/31225 [00:37<00:34, 430.44it/s]\u001b[A\n",
      " 53%|███████████████████▏                | 16641/31225 [00:38<00:32, 446.70it/s]\u001b[A\n",
      " 54%|███████████████████▍                | 16897/31225 [00:38<00:31, 457.97it/s]\u001b[A\n",
      " 55%|███████████████████▊                | 17153/31225 [00:39<00:30, 455.87it/s]\u001b[A\n",
      " 56%|████████████████████                | 17409/31225 [00:39<00:28, 478.05it/s]\u001b[A\n",
      " 57%|████████████████████▎               | 17665/31225 [00:40<00:29, 461.07it/s]\u001b[A\n",
      " 57%|████████████████████▋               | 17921/31225 [00:40<00:28, 462.44it/s]\u001b[A\n",
      " 58%|████████████████████▉               | 18177/31225 [00:41<00:29, 449.54it/s]\u001b[A\n",
      " 59%|█████████████████████▎              | 18433/31225 [00:41<00:27, 473.53it/s]\u001b[A\n",
      " 60%|█████████████████████▌              | 18689/31225 [00:42<00:25, 493.93it/s]\u001b[A\n",
      " 61%|█████████████████████▊              | 18945/31225 [00:42<00:26, 466.75it/s]\u001b[A\n",
      " 61%|██████████████████████▏             | 19201/31225 [00:43<00:27, 444.37it/s]\u001b[A\n",
      " 62%|██████████████████████▍             | 19457/31225 [00:44<00:25, 456.92it/s]\u001b[A\n",
      " 63%|██████████████████████▋             | 19713/31225 [00:44<00:26, 437.81it/s]\u001b[A\n",
      " 64%|███████████████████████             | 19969/31225 [00:45<00:26, 432.67it/s]\u001b[A\n",
      " 65%|███████████████████████▎            | 20225/31225 [00:45<00:23, 464.27it/s]\u001b[A\n",
      " 66%|███████████████████████▌            | 20481/31225 [00:46<00:24, 435.49it/s]\u001b[A\n",
      " 66%|███████████████████████▉            | 20737/31225 [00:47<00:23, 441.62it/s]\u001b[A\n",
      " 67%|████████████████████████▏           | 20993/31225 [00:47<00:22, 447.70it/s]\u001b[A\n",
      " 68%|████████████████████████▍           | 21249/31225 [00:48<00:22, 450.94it/s]\u001b[A\n",
      " 69%|████████████████████████▊           | 21505/31225 [00:48<00:21, 454.59it/s]\u001b[A\n",
      " 70%|█████████████████████████           | 21761/31225 [00:49<00:22, 414.82it/s]\u001b[A\n",
      " 71%|█████████████████████████▍          | 22017/31225 [00:50<00:23, 397.94it/s]\u001b[A\n",
      " 71%|█████████████████████████▋          | 22273/31225 [00:50<00:22, 398.17it/s]\u001b[A\n",
      " 72%|█████████████████████████▉          | 22529/31225 [00:51<00:20, 422.45it/s]\u001b[A\n",
      " 73%|██████████████████████████▎         | 22785/31225 [00:51<00:19, 438.09it/s]\u001b[A\n",
      " 74%|██████████████████████████▌         | 23041/31225 [00:52<00:18, 452.40it/s]\u001b[A\n",
      " 75%|██████████████████████████▊         | 23297/31225 [00:52<00:17, 452.82it/s]\u001b[A\n",
      " 75%|███████████████████████████▏        | 23553/31225 [00:53<00:17, 442.64it/s]\u001b[A\n",
      " 76%|███████████████████████████▍        | 23809/31225 [00:54<00:16, 451.22it/s]\u001b[A\n",
      " 77%|███████████████████████████▋        | 24065/31225 [00:54<00:17, 403.69it/s]\u001b[A\n",
      " 78%|████████████████████████████        | 24321/31225 [00:55<00:17, 396.50it/s]\u001b[A\n",
      " 79%|████████████████████████████▎       | 24577/31225 [00:56<00:18, 365.12it/s]\u001b[A\n",
      " 80%|████████████████████████████▋       | 24833/31225 [00:56<00:16, 376.19it/s]\u001b[A\n",
      " 80%|████████████████████████████▉       | 25089/31225 [00:57<00:15, 396.60it/s]\u001b[A\n",
      " 81%|█████████████████████████████▏      | 25345/31225 [00:58<00:14, 393.22it/s]\u001b[A\n",
      " 82%|█████████████████████████████▌      | 25601/31225 [00:58<00:13, 408.04it/s]\u001b[A\n",
      " 83%|█████████████████████████████▊      | 25857/31225 [00:59<00:12, 431.42it/s]\u001b[A\n",
      " 84%|██████████████████████████████      | 26113/31225 [00:59<00:12, 418.56it/s]\u001b[A\n",
      " 84%|██████████████████████████████▍     | 26369/31225 [01:00<00:12, 395.94it/s]\u001b[A\n",
      " 85%|██████████████████████████████▋     | 26625/31225 [01:01<00:11, 416.32it/s]\u001b[A\n",
      " 86%|██████████████████████████████▉     | 26881/31225 [01:01<00:09, 456.73it/s]\u001b[A\n",
      " 87%|███████████████████████████████▎    | 27137/31225 [01:02<00:09, 444.54it/s]\u001b[A\n",
      " 88%|███████████████████████████████▌    | 27393/31225 [01:02<00:08, 430.42it/s]\u001b[A\n",
      " 89%|███████████████████████████████▉    | 27649/31225 [01:03<00:08, 440.86it/s]\u001b[A\n",
      " 89%|████████████████████████████████▏   | 27905/31225 [01:03<00:07, 458.98it/s]\u001b[A\n",
      " 90%|████████████████████████████████▍   | 28161/31225 [01:04<00:06, 459.96it/s]\u001b[A\n",
      " 91%|████████████████████████████████▊   | 28417/31225 [01:05<00:06, 450.56it/s]\u001b[A\n",
      " 92%|█████████████████████████████████   | 28673/31225 [01:05<00:05, 472.07it/s]\u001b[A\n",
      " 93%|█████████████████████████████████▎  | 28929/31225 [01:06<00:04, 469.34it/s]\u001b[A\n",
      " 93%|█████████████████████████████████▋  | 29185/31225 [01:06<00:04, 434.29it/s]\u001b[A\n",
      " 94%|█████████████████████████████████▉  | 29441/31225 [01:07<00:04, 437.67it/s]\u001b[A\n",
      " 95%|██████████████████████████████████▏ | 29697/31225 [01:07<00:03, 435.67it/s]\u001b[A\n",
      " 96%|██████████████████████████████████▌ | 29953/31225 [01:08<00:02, 429.78it/s]\u001b[A\n",
      " 97%|██████████████████████████████████▊ | 30209/31225 [01:09<00:02, 437.15it/s]\u001b[A\n",
      " 98%|███████████████████████████████████ | 30465/31225 [01:09<00:01, 451.46it/s]\u001b[A\n",
      " 98%|███████████████████████████████████▍| 30721/31225 [01:10<00:01, 455.87it/s]\u001b[A\n",
      "100%|████████████████████████████████████| 31225/31225 [01:10<00:00, 440.87it/s]\u001b[A\n",
      " 73%|███████████████████████▍        | 1514067/2066193 [03:53<09:13, 998.33it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "./data/preprocessed_data/history_biography/goodreads_reviews_history_biography_1500000.bin\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 96%|███████████████████████████▊ | 1986003/2066193 [03:56<00:00, 153062.74it/s]\n",
      "  0%|                                                 | 0/33664 [00:00<?, ?it/s]\u001b[A\n",
      "  0%|                                       | 1/33664 [00:00<3:42:44,  2.52it/s]\u001b[A\n",
      "  1%|▎                                     | 257/33664 [00:00<01:49, 306.32it/s]\u001b[A\n",
      "  2%|▌                                     | 513/33664 [00:01<01:35, 348.93it/s]\u001b[A\n",
      "  2%|▊                                     | 769/33664 [00:02<01:20, 406.43it/s]\u001b[A\n",
      "  3%|█▏                                   | 1025/33664 [00:02<01:20, 407.69it/s]\u001b[A\n",
      "  4%|█▍                                   | 1281/33664 [00:03<01:15, 429.30it/s]\u001b[A\n",
      "  5%|█▋                                   | 1537/33664 [00:03<01:18, 411.07it/s]\u001b[A\n",
      "  5%|█▉                                   | 1793/33664 [00:04<01:12, 438.35it/s]\u001b[A\n",
      "  6%|██▎                                  | 2049/33664 [00:04<01:08, 460.81it/s]\u001b[A\n",
      "  7%|██▌                                  | 2305/33664 [00:05<01:06, 470.37it/s]\u001b[A\n",
      "  8%|██▊                                  | 2561/33664 [00:06<01:05, 472.29it/s]\u001b[A\n",
      "  8%|███                                  | 2817/33664 [00:06<01:03, 484.06it/s]\u001b[A\n",
      "  9%|███▍                                 | 3073/33664 [00:07<01:03, 483.47it/s]\u001b[A\n",
      " 10%|███▋                                 | 3329/33664 [00:07<01:03, 481.41it/s]\u001b[A\n",
      " 11%|███▉                                 | 3585/33664 [00:08<01:02, 481.83it/s]\u001b[A\n",
      " 11%|████▏                                | 3841/33664 [00:08<01:04, 458.90it/s]\u001b[A\n",
      " 12%|████▌                                | 4097/33664 [00:09<01:03, 466.33it/s]\u001b[A\n",
      " 13%|████▊                                | 4353/33664 [00:09<01:00, 488.30it/s]\u001b[A\n",
      " 14%|█████                                | 4609/33664 [00:10<00:58, 493.77it/s]\u001b[A\n",
      " 14%|█████▎                               | 4865/33664 [00:10<01:01, 466.03it/s]\u001b[A\n",
      " 15%|█████▋                               | 5121/33664 [00:11<01:02, 455.79it/s]\u001b[A\n",
      " 96%|███████████████████████████▊ | 1986003/2066193 [04:08<00:00, 153062.74it/s]\u001b[A\n",
      " 17%|██████▏                              | 5633/33664 [00:12<01:13, 383.10it/s]\u001b[A\n",
      " 17%|██████▍                              | 5889/33664 [00:13<01:10, 396.17it/s]\u001b[A\n",
      " 18%|██████▊                              | 6145/33664 [00:14<01:09, 397.09it/s]\u001b[A\n",
      " 19%|███████                              | 6401/33664 [00:14<01:04, 422.47it/s]\u001b[A\n",
      " 20%|███████▎                             | 6657/33664 [00:15<01:01, 437.48it/s]\u001b[A\n",
      " 21%|███████▌                             | 6913/33664 [00:15<00:58, 460.96it/s]\u001b[A\n",
      " 21%|███████▉                             | 7169/33664 [00:16<00:56, 469.58it/s]\u001b[A\n",
      " 22%|████████▏                            | 7425/33664 [00:16<00:54, 479.23it/s]\u001b[A\n",
      " 23%|████████▍                            | 7681/33664 [00:17<00:57, 450.43it/s]\u001b[A\n",
      " 24%|████████▋                            | 7937/33664 [00:17<00:56, 458.05it/s]\u001b[A\n",
      " 24%|█████████                            | 8193/33664 [00:18<00:55, 457.61it/s]\u001b[A\n",
      " 25%|█████████▎                           | 8449/33664 [00:19<00:57, 440.82it/s]\u001b[A\n",
      " 26%|█████████▌                           | 8705/33664 [00:19<00:55, 449.31it/s]\u001b[A\n",
      " 27%|█████████▊                           | 8961/33664 [00:20<00:58, 423.71it/s]\u001b[A\n",
      " 27%|██████████▏                          | 9217/33664 [00:20<00:56, 431.96it/s]\u001b[A\n",
      " 28%|██████████▍                          | 9473/33664 [00:21<00:55, 436.93it/s]\u001b[A\n",
      " 29%|██████████▋                          | 9729/33664 [00:22<00:54, 436.58it/s]\u001b[A\n",
      " 30%|██████████▉                          | 9985/33664 [00:22<00:50, 468.74it/s]\u001b[A\n",
      " 30%|██████████▉                         | 10241/33664 [00:23<00:50, 463.25it/s]\u001b[A\n",
      " 31%|███████████▏                        | 10497/33664 [00:23<00:52, 443.85it/s]\u001b[A\n",
      " 32%|███████████▍                        | 10753/33664 [00:24<00:55, 414.46it/s]\u001b[A\n",
      " 33%|███████████▊                        | 11009/33664 [00:24<00:52, 430.32it/s]\u001b[A\n",
      " 33%|████████████                        | 11265/33664 [00:25<00:50, 442.62it/s]\u001b[A\n",
      " 34%|████████████▎                       | 11521/33664 [00:26<00:54, 403.31it/s]\u001b[A\n",
      " 35%|████████████▌                       | 11777/33664 [00:26<00:52, 419.68it/s]\u001b[A\n",
      " 36%|████████████▊                       | 12033/33664 [00:27<00:48, 441.63it/s]\u001b[A\n",
      " 37%|█████████████▏                      | 12289/33664 [00:28<00:53, 401.00it/s]\u001b[A\n",
      " 37%|█████████████▍                      | 12545/33664 [00:28<00:49, 422.66it/s]\u001b[A\n",
      " 38%|█████████████▋                      | 12801/33664 [00:29<00:48, 431.52it/s]\u001b[A\n",
      " 39%|█████████████▉                      | 13057/33664 [00:29<00:46, 444.56it/s]\u001b[A\n",
      " 40%|██████████████▏                     | 13313/33664 [00:30<00:42, 473.59it/s]\u001b[A\n",
      " 40%|██████████████▌                     | 13569/33664 [00:30<00:42, 477.38it/s]\u001b[A\n",
      " 41%|██████████████▊                     | 13825/33664 [00:31<00:42, 463.10it/s]\u001b[A\n",
      " 42%|███████████████                     | 14081/33664 [00:32<00:45, 432.22it/s]\u001b[A\n",
      " 43%|███████████████▎                    | 14337/33664 [00:32<00:43, 447.62it/s]\u001b[A\n",
      " 43%|███████████████▌                    | 14593/33664 [00:33<00:41, 458.54it/s]\u001b[A\n",
      " 44%|███████████████▉                    | 14849/33664 [00:33<00:39, 477.46it/s]\u001b[A\n",
      " 45%|████████████████▏                   | 15105/33664 [00:34<00:38, 477.57it/s]\u001b[A\n",
      " 46%|████████████████▍                   | 15361/33664 [00:34<00:39, 464.26it/s]\u001b[A\n",
      " 46%|████████████████▋                   | 15617/33664 [00:35<00:40, 451.11it/s]\u001b[A\n",
      " 47%|████████████████▉                   | 15873/33664 [00:35<00:39, 445.06it/s]\u001b[A\n",
      " 48%|█████████████████▏                  | 16129/33664 [00:36<00:37, 467.09it/s]\u001b[A\n",
      " 49%|█████████████████▌                  | 16385/33664 [00:36<00:35, 482.23it/s]\u001b[A\n",
      " 49%|█████████████████▊                  | 16641/33664 [00:37<00:34, 497.19it/s]\u001b[A\n",
      " 50%|██████████████████                  | 16897/33664 [00:37<00:33, 496.35it/s]\u001b[A\n",
      " 51%|██████████████████▎                 | 17153/33664 [00:38<00:35, 466.38it/s]\u001b[A\n",
      " 52%|██████████████████▌                 | 17409/33664 [00:38<00:33, 478.67it/s]\u001b[A\n",
      " 52%|██████████████████▉                 | 17665/33664 [00:39<00:34, 468.74it/s]\u001b[A\n",
      " 53%|███████████████████▏                | 17921/33664 [00:40<00:33, 464.95it/s]\u001b[A\n",
      " 54%|███████████████████▍                | 18177/33664 [00:40<00:31, 489.87it/s]\u001b[A\n",
      " 55%|███████████████████▋                | 18433/33664 [00:41<00:31, 488.94it/s]\u001b[A\n",
      " 56%|███████████████████▉                | 18689/33664 [00:41<00:33, 443.56it/s]\u001b[A\n",
      " 56%|████████████████████▎               | 18945/33664 [00:42<00:31, 463.62it/s]\u001b[A\n",
      " 57%|████████████████████▌               | 19201/33664 [00:42<00:29, 494.50it/s]\u001b[A\n",
      " 58%|████████████████████▊               | 19457/33664 [00:43<00:27, 516.02it/s]\u001b[A\n",
      " 59%|█████████████████████               | 19713/33664 [00:43<00:26, 521.37it/s]\u001b[A\n",
      " 59%|█████████████████████▎              | 19969/33664 [00:44<00:27, 497.11it/s]\u001b[A\n",
      " 60%|█████████████████████▋              | 20225/33664 [00:44<00:26, 502.75it/s]\u001b[A\n",
      " 61%|█████████████████████▉              | 20481/33664 [00:45<00:26, 500.95it/s]\u001b[A\n",
      " 62%|██████████████████████▏             | 20737/33664 [00:45<00:26, 488.66it/s]\u001b[A\n",
      " 62%|██████████████████████▍             | 20993/33664 [00:46<00:25, 506.50it/s]\u001b[A\n",
      " 63%|██████████████████████▋             | 21249/33664 [00:46<00:25, 486.88it/s]\u001b[A\n",
      " 64%|██████████████████████▉             | 21505/33664 [00:47<00:24, 487.58it/s]\u001b[A\n",
      " 65%|███████████████████████▎            | 21761/33664 [00:47<00:24, 494.16it/s]\u001b[A\n",
      " 65%|███████████████████████▌            | 22017/33664 [00:48<00:23, 499.02it/s]\u001b[A\n",
      " 66%|███████████████████████▊            | 22273/33664 [00:48<00:22, 507.98it/s]\u001b[A\n",
      " 67%|████████████████████████            | 22529/33664 [00:49<00:22, 497.56it/s]\u001b[A\n",
      " 68%|████████████████████████▎           | 22785/33664 [00:49<00:21, 498.55it/s]\u001b[A\n",
      " 68%|████████████████████████▋           | 23041/33664 [00:50<00:21, 493.65it/s]\u001b[A\n",
      " 69%|████████████████████████▉           | 23297/33664 [00:50<00:20, 504.58it/s]\u001b[A\n",
      " 70%|█████████████████████████▏          | 23553/33664 [00:51<00:20, 494.03it/s]\u001b[A\n",
      " 71%|█████████████████████████▍          | 23809/33664 [00:51<00:19, 511.06it/s]\u001b[A\n",
      " 71%|█████████████████████████▋          | 24065/33664 [00:52<00:20, 465.47it/s]\u001b[A\n",
      " 72%|██████████████████████████          | 24321/33664 [00:53<00:19, 488.98it/s]\u001b[A\n",
      " 73%|██████████████████████████▎         | 24577/33664 [00:53<00:21, 430.63it/s]\u001b[A\n",
      " 74%|██████████████████████████▌         | 24833/33664 [00:54<00:20, 433.77it/s]\u001b[A\n",
      " 75%|██████████████████████████▊         | 25089/33664 [00:54<00:19, 434.26it/s]\u001b[A\n",
      " 75%|███████████████████████████         | 25345/33664 [00:55<00:18, 447.56it/s]\u001b[A\n",
      " 76%|███████████████████████████▍        | 25601/33664 [00:56<00:20, 397.43it/s]\u001b[A\n",
      " 77%|███████████████████████████▋        | 25857/33664 [00:56<00:17, 434.31it/s]\u001b[A\n",
      " 78%|███████████████████████████▉        | 26113/33664 [00:57<00:16, 453.18it/s]\u001b[A\n",
      " 78%|████████████████████████████▏       | 26369/33664 [00:57<00:14, 491.28it/s]\u001b[A\n",
      " 79%|████████████████████████████▍       | 26625/33664 [00:58<00:14, 494.22it/s]\u001b[A\n",
      " 80%|████████████████████████████▋       | 26881/33664 [00:58<00:14, 481.60it/s]\u001b[A\n",
      " 81%|█████████████████████████████       | 27137/33664 [00:59<00:13, 492.06it/s]\u001b[A\n",
      " 81%|█████████████████████████████▎      | 27393/33664 [00:59<00:12, 514.29it/s]\u001b[A\n",
      " 82%|█████████████████████████████▌      | 27649/33664 [01:00<00:12, 495.73it/s]\u001b[A\n",
      " 83%|█████████████████████████████▊      | 27905/33664 [01:00<00:11, 502.91it/s]\u001b[A\n",
      " 84%|██████████████████████████████      | 28161/33664 [01:01<00:10, 516.41it/s]\u001b[A\n",
      " 84%|██████████████████████████████▍     | 28417/33664 [01:01<00:10, 499.87it/s]\u001b[A\n",
      " 85%|██████████████████████████████▋     | 28673/33664 [01:02<00:09, 526.50it/s]\u001b[A\n",
      " 86%|██████████████████████████████▉     | 28929/33664 [01:02<00:08, 566.57it/s]\u001b[A\n",
      " 87%|███████████████████████████████▏    | 29185/33664 [01:02<00:07, 596.99it/s]\u001b[A\n",
      " 87%|███████████████████████████████▍    | 29441/33664 [01:03<00:07, 601.39it/s]\u001b[A\n",
      " 88%|███████████████████████████████▊    | 29697/33664 [01:03<00:06, 615.28it/s]\u001b[A\n",
      " 89%|████████████████████████████████    | 29953/33664 [01:04<00:06, 614.06it/s]\u001b[A\n",
      " 90%|████████████████████████████████▎   | 30209/33664 [01:04<00:05, 618.28it/s]\u001b[A\n",
      " 90%|████████████████████████████████▌   | 30465/33664 [01:05<00:05, 602.72it/s]\u001b[A\n",
      " 91%|████████████████████████████████▊   | 30721/33664 [01:05<00:04, 616.73it/s]\u001b[A\n",
      " 92%|█████████████████████████████████▏  | 30977/33664 [01:05<00:04, 623.54it/s]\u001b[A\n",
      " 93%|█████████████████████████████████▍  | 31233/33664 [01:06<00:03, 631.33it/s]\u001b[A\n",
      " 94%|█████████████████████████████████▋  | 31489/33664 [01:06<00:03, 643.59it/s]\u001b[A\n",
      " 94%|█████████████████████████████████▉  | 31745/33664 [01:06<00:02, 651.84it/s]\u001b[A\n",
      " 95%|██████████████████████████████████▏ | 32001/33664 [01:07<00:02, 644.94it/s]\u001b[A\n",
      " 96%|██████████████████████████████████▍ | 32257/33664 [01:07<00:02, 649.01it/s]\u001b[A\n",
      " 97%|██████████████████████████████████▊ | 32513/33664 [01:08<00:01, 644.71it/s]\u001b[A\n",
      " 97%|███████████████████████████████████ | 32769/33664 [01:08<00:01, 639.36it/s]\u001b[A\n",
      " 98%|███████████████████████████████████▎| 33025/33664 [01:08<00:00, 640.33it/s]\u001b[A\n",
      " 99%|███████████████████████████████████▌| 33281/33664 [01:09<00:00, 612.97it/s]\u001b[A\n",
      "100%|████████████████████████████████████| 33664/33664 [01:09<00:00, 483.02it/s]\u001b[A\n",
      " 97%|███████████████████████████████▏| 2013829/2066193 [05:07<00:53, 978.76it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "./data/preprocessed_data/history_biography/goodreads_reviews_history_biography_2000000.bin\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████████████████████████▉| 2058745/2066193 [05:07<00:02, 2837.54it/s]\n",
      "  0%|                                                  | 0/5029 [00:00<?, ?it/s]\u001b[A\n",
      "  0%|                                          | 1/5029 [00:00<33:54,  2.47it/s]\u001b[A\n",
      "  5%|█▉                                     | 257/5029 [00:00<00:12, 372.23it/s]\u001b[A\n",
      " 10%|███▉                                   | 513/5029 [00:01<00:09, 458.70it/s]\u001b[A\n",
      " 15%|█████▉                                 | 769/5029 [00:01<00:08, 498.71it/s]\u001b[A\n",
      " 20%|███████▋                              | 1025/5029 [00:02<00:07, 526.57it/s]\u001b[A\n",
      " 25%|█████████▋                            | 1281/5029 [00:02<00:06, 565.85it/s]\u001b[A\n",
      " 31%|███████████▌                          | 1537/5029 [00:03<00:06, 564.46it/s]\u001b[A\n",
      " 36%|█████████████▌                        | 1793/5029 [00:03<00:05, 595.41it/s]\u001b[A\n",
      " 41%|███████████████▍                      | 2049/5029 [00:03<00:04, 601.83it/s]\u001b[A\n",
      " 46%|█████████████████▍                    | 2305/5029 [00:04<00:04, 618.74it/s]\u001b[A\n",
      " 51%|███████████████████▎                  | 2561/5029 [00:04<00:04, 606.63it/s]\u001b[A\n",
      " 56%|█████████████████████▎                | 2817/5029 [00:05<00:03, 592.64it/s]\u001b[A\n",
      " 61%|███████████████████████▏              | 3073/5029 [00:05<00:03, 587.21it/s]\u001b[A\n",
      " 66%|█████████████████████████▏            | 3329/5029 [00:06<00:02, 581.02it/s]\u001b[A\n",
      " 71%|███████████████████████████           | 3585/5029 [00:06<00:02, 575.52it/s]\u001b[A\n",
      " 76%|█████████████████████████████         | 3841/5029 [00:06<00:02, 575.93it/s]\u001b[A\n",
      " 81%|██████████████████████████████▉       | 4097/5029 [00:07<00:01, 561.66it/s]\u001b[A\n",
      " 87%|████████████████████████████████▉     | 4353/5029 [00:07<00:01, 561.98it/s]\u001b[A\n",
      " 92%|██████████████████████████████████▊   | 4609/5029 [00:08<00:00, 585.51it/s]\u001b[A\n",
      "100%|██████████████████████████████████████| 5029/5029 [00:08<00:00, 587.53it/s]\u001b[A\n",
      "100%|███████████████████████████████| 2066193/2066193 [05:16<00:00, 6528.75it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "./data/preprocessed_data/history_biography/goodreads_reviews_history_biography_2066192.bin\n",
      "peak memory: 19018.19 MiB, increment: 18606.95 MiB\n"
     ]
    }
   ],
   "source": [
    "preprocess.preprocess_datasets(config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "abbb0504",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
