{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "35655d8b-93de-4fe7-98f6-1010c08ebbe8",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import json\n",
    "import gzip\n",
    "import os\n",
    "from tqdm.auto import tqdm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "da5718f5-ae52-4fd5-a637-d26ebb9929a4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ae90a0b5fe9c48ee9ba35cbe65d10dad",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total: 7347630 interactions\n",
      "Rated: 4514094 interactions (61.44%)\n"
     ]
    }
   ],
   "source": [
    "base_dir = os.path.expanduser('~/Documents/lvml/datasets')\n",
    "input_path = f'{base_dir}/goodreads_interactions_comics_graphic.json.gz'\n",
    "\n",
    "entries = []\n",
    "with gzip.open(input_path,'r') as f:\n",
    "    n_lines_total = 0\n",
    "    n_lines_valid = 0\n",
    "    for line in tqdm(f):\n",
    "        entry = json.loads(line)\n",
    "        if entry['rating'] > 0:\n",
    "            entries.append(entry)\n",
    "            n_lines_valid += 1\n",
    "        n_lines_total += 1\n",
    "\n",
    "print(f'Total: {n_lines_total} interactions')\n",
    "print(f'Rated: {n_lines_valid} interactions ({n_lines_valid/n_lines_total:.2%})')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "d6cfa52a-73d0-4032-af9e-00904e5f2c57",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_id</th>\n",
       "      <th>book_id</th>\n",
       "      <th>review_id</th>\n",
       "      <th>is_read</th>\n",
       "      <th>rating</th>\n",
       "      <th>review_text_incomplete</th>\n",
       "      <th>date_added</th>\n",
       "      <th>date_updated</th>\n",
       "      <th>read_at</th>\n",
       "      <th>started_at</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>8842281e1d1347389f2ab93d60773d4d</td>\n",
       "      <td>24815</td>\n",
       "      <td>281e647d84a8581c9b2b3957156f58fb</td>\n",
       "      <td>True</td>\n",
       "      <td>5</td>\n",
       "      <td></td>\n",
       "      <td>Thu Apr 17 23:42:49 -0700 2008</td>\n",
       "      <td>Thu Apr 17 23:42:49 -0700 2008</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>8842281e1d1347389f2ab93d60773d4d</td>\n",
       "      <td>24818</td>\n",
       "      <td>96d10c7701527f0353d5b016fc79200d</td>\n",
       "      <td>True</td>\n",
       "      <td>5</td>\n",
       "      <td></td>\n",
       "      <td>Thu Apr 17 23:42:42 -0700 2008</td>\n",
       "      <td>Thu Apr 17 23:42:42 -0700 2008</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>8842281e1d1347389f2ab93d60773d4d</td>\n",
       "      <td>59715</td>\n",
       "      <td>d1b70d94aa49bb0e3ff855805ed5c172</td>\n",
       "      <td>True</td>\n",
       "      <td>5</td>\n",
       "      <td></td>\n",
       "      <td>Thu Apr 17 23:42:40 -0700 2008</td>\n",
       "      <td>Thu Apr 17 23:42:40 -0700 2008</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>8842281e1d1347389f2ab93d60773d4d</td>\n",
       "      <td>24816</td>\n",
       "      <td>66034d49dac62288c5e7ed4ebf7c8c3b</td>\n",
       "      <td>True</td>\n",
       "      <td>5</td>\n",
       "      <td></td>\n",
       "      <td>Thu Apr 17 23:42:38 -0700 2008</td>\n",
       "      <td>Thu Apr 17 23:42:38 -0700 2008</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>7504b2aee1ecb5b2872d3da381c6c91e</td>\n",
       "      <td>13284343</td>\n",
       "      <td>eec681b896b88ab6858d3999e7c6b825</td>\n",
       "      <td>True</td>\n",
       "      <td>4</td>\n",
       "      <td></td>\n",
       "      <td>Thu Jun 07 11:38:13 -0700 2012</td>\n",
       "      <td>Thu Jun 07 11:38:18 -0700 2012</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                            user_id   book_id  \\\n",
       "0  8842281e1d1347389f2ab93d60773d4d     24815   \n",
       "1  8842281e1d1347389f2ab93d60773d4d     24818   \n",
       "2  8842281e1d1347389f2ab93d60773d4d     59715   \n",
       "3  8842281e1d1347389f2ab93d60773d4d     24816   \n",
       "4  7504b2aee1ecb5b2872d3da381c6c91e  13284343   \n",
       "\n",
       "                          review_id  is_read  rating review_text_incomplete  \\\n",
       "0  281e647d84a8581c9b2b3957156f58fb     True       5                          \n",
       "1  96d10c7701527f0353d5b016fc79200d     True       5                          \n",
       "2  d1b70d94aa49bb0e3ff855805ed5c172     True       5                          \n",
       "3  66034d49dac62288c5e7ed4ebf7c8c3b     True       5                          \n",
       "4  eec681b896b88ab6858d3999e7c6b825     True       4                          \n",
       "\n",
       "                       date_added                    date_updated read_at  \\\n",
       "0  Thu Apr 17 23:42:49 -0700 2008  Thu Apr 17 23:42:49 -0700 2008           \n",
       "1  Thu Apr 17 23:42:42 -0700 2008  Thu Apr 17 23:42:42 -0700 2008           \n",
       "2  Thu Apr 17 23:42:40 -0700 2008  Thu Apr 17 23:42:40 -0700 2008           \n",
       "3  Thu Apr 17 23:42:38 -0700 2008  Thu Apr 17 23:42:38 -0700 2008           \n",
       "4  Thu Jun 07 11:38:13 -0700 2012  Thu Jun 07 11:38:18 -0700 2012           \n",
       "\n",
       "  started_at  \n",
       "0             \n",
       "1             \n",
       "2             \n",
       "3             \n",
       "4             "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame(entries)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "e1370675-19c3-441e-aeb5-be4194cec99e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "41932 distinct users with 20 ratings or more\n"
     ]
    }
   ],
   "source": [
    "n_ratings_threshold = 20\n",
    "users_wl = (\n",
    "    df['user_id']\n",
    "    .value_counts()\n",
    "    .where(lambda x: x>=n_ratings_threshold)\n",
    "    .dropna()\n",
    "    .pipe(lambda s: set(s.index))\n",
    ")\n",
    "print(f'{len(users_wl)} distinct users with {n_ratings_threshold} ratings or more')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "db6947aa-f838-4913-acc7-c3a92c032d3e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Assert that each user-book pair is unique\n",
    "assert df.groupby(['user_id','book_id']).ngroups==len(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "7b69ea5b-e638-49a3-b64f-9c5d6e86b716",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "3679076 rows in user-whitelisted df\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_id</th>\n",
       "      <th>book_id</th>\n",
       "      <th>rating</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>06316bec7a49286f1f98d5acce24f923</td>\n",
       "      <td>575753</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>06316bec7a49286f1f98d5acce24f923</td>\n",
       "      <td>47694</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>06316bec7a49286f1f98d5acce24f923</td>\n",
       "      <td>47700</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>06316bec7a49286f1f98d5acce24f923</td>\n",
       "      <td>47720</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>06316bec7a49286f1f98d5acce24f923</td>\n",
       "      <td>25104</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                             user_id book_id  rating\n",
       "20  06316bec7a49286f1f98d5acce24f923  575753       4\n",
       "21  06316bec7a49286f1f98d5acce24f923   47694       4\n",
       "22  06316bec7a49286f1f98d5acce24f923   47700       3\n",
       "23  06316bec7a49286f1f98d5acce24f923   47720       4\n",
       "24  06316bec7a49286f1f98d5acce24f923   25104       5"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "whitelisted_df = (\n",
    "    df\n",
    "    [df['user_id'].isin(users_wl)]\n",
    "    [['user_id','book_id','rating']]\n",
    ")\n",
    "print(f'{len(whitelisted_df)} rows in user-whitelisted df')\n",
    "whitelisted_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "6ede728b-5663-4ec1-a527-895cf8ea4ae6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "user_id    41932\n",
       "book_id    87565\n",
       "rating         5\n",
       "dtype: int64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "whitelisted_df.nunique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "d6e8ffe0-d8cd-45e6-b72e-c6c79200fed5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:ylabel='Frequency'>"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAGdCAYAAADzOWwgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAqGUlEQVR4nO3de3TU9Z3/8ddILgY2GQmQ2yGGrAYEQjkKbhKKcgmEOwV6Ci7bCMoiXbmlgYPgnq6wyxKULdg2R8SW5apiq2LdA0ZihXgoRCASuRSRloBQEoI2TC6FBML394c/vschCSRDkpnJ5/k4Z87J9zvv7+T94dM2r37m851xWJZlCQAAwGD3eLsBAAAAbyMQAQAA4xGIAACA8QhEAADAeAQiAABgPAIRAAAwHoEIAAAYj0AEAACMF+DtBvzFjRs3dOHCBYWGhsrhcHi7HQAA0AiWZamiokIxMTG6556G14EIRI104cIFxcbGersNAADggXPnzqlr164NPk8gaqTQ0FBJ3/6DhoWFebkbAADQGOXl5YqNjbX/jjeEQNRIN98mCwsLIxABAOBn7rTdhU3VAADAeAQiAABgPAIRAAAwHoEIAAAYj0AEAACMRyACAADGIxABAADjEYgAAIDxCEQAAMB4BCIAAGA8AhEAADAegQgAABiPQAQAAIxHIAIAAMYL8HYDAAA0VbfFO+5Yc2blmFboBG0FK0QAAMB4BCIAAGA8AhEAADAegQgAABiPQAQAAIzHXWYAgDaJO9HQFKwQAQAA4xGIAACA8QhEAADAeAQiAABgPDZVAwB8SmM2QwPNjRUiAABgPAIRAAAwnlcDUVZWlh599FGFhoYqIiJCEyZM0MmTJ91qpk+fLofD4fZITk52q6murtbcuXPVuXNndejQQePHj9f58+fdasrKypSeni6n0ymn06n09HRdvny5pYcIAAD8gFcDUV5enmbPnq38/Hzl5ubq+vXrSktLU1VVlVvdyJEjVVxcbD927tzp9nxGRoa2b9+ubdu2ae/evaqsrNTYsWNVW1tr10ydOlWFhYXKyclRTk6OCgsLlZ6e3irjBAAAvs2rm6pzcnLcjjds2KCIiAgVFBTo8ccft88HBwcrKiqq3tdwuVxav369tmzZomHDhkmStm7dqtjYWH300UcaMWKETpw4oZycHOXn5yspKUmS9Otf/1opKSk6efKkevTo0UIjBAAA/sCn9hC5XC5JUnh4uNv5PXv2KCIiQt27d9fMmTNVWlpqP1dQUKBr164pLS3NPhcTE6PExETt27dPkrR//345nU47DElScnKynE6nXXOr6upqlZeXuz0AAEDb5DO33VuWpczMTA0cOFCJiYn2+VGjRulHP/qR4uLiVFRUpJ/97GcaOnSoCgoKFBwcrJKSEgUFBaljx45urxcZGamSkhJJUklJiSIiIur8zoiICLvmVllZWVq2bFkzjhAA4Gv4vjPc5DOBaM6cOTpy5Ij27t3rdn7KlCn2z4mJierfv7/i4uK0Y8cOTZo0qcHXsyxLDofDPv7uzw3VfNeSJUuUmZlpH5eXlys2NrbR4wEAtA2EJjP4xFtmc+fO1fvvv6/du3era9eut62Njo5WXFycTp06JUmKiopSTU2NysrK3OpKS0sVGRlp11y8eLHOa126dMmuuVVwcLDCwsLcHgAAoG3yaiCyLEtz5szRu+++q48//ljx8fF3vOabb77RuXPnFB0dLUnq16+fAgMDlZuba9cUFxfr2LFjGjBggCQpJSVFLpdLBw4csGs+/fRTuVwuuwYAAJjLq2+ZzZ49W2+88YZ+//vfKzQ01N7P43Q6FRISosrKSi1dulQ//OEPFR0drTNnzuj5559X586dNXHiRLt2xowZWrBggTp16qTw8HAtXLhQffr0se8669mzp0aOHKmZM2dq3bp1kqRnnnlGY8eO5Q4zAADg3UC0du1aSdLgwYPdzm/YsEHTp09Xu3btdPToUW3evFmXL19WdHS0hgwZorfeekuhoaF2/Zo1axQQEKDJkyfrypUrSk1N1caNG9WuXTu75vXXX9e8efPsu9HGjx+v7Ozslh8kAADweQ7LsixvN+EPysvL5XQ65XK52E8EAC3IH7/clU3Vvquxf799YlM1AACANxGIAACA8QhEAADAeAQiAABgPAIRAAAwHoEIAAAYj0AEAACMRyACAADGIxABAADjEYgAAIDxCEQAAMB4BCIAAGA8AhEAADAegQgAABiPQAQAAIxHIAIAAMYjEAEAAOMFeLsBAAD8XbfFO+5Yc2blmFboBJ5ihQgAABiPQAQAAIzHW2YAAKBF+cNbiqwQAQAA4xGIAACA8QhEAADAeAQiAABgPAIRAAAwHoEIAAAYj0AEAACMRyACAADGIxABAADjEYgAAIDxCEQAAMB4BCIAAGA8AhEAADAe33YPAGg1jfnWc8AbWCECAADGIxABAADjEYgAAIDxCEQAAMB4BCIAAGA8AhEAADAegQgAABiPQAQAAIxHIAIAAMYjEAEAAOMRiAAAgPEIRAAAwHgEIgAAYDy+7R4AgFbQbfGOO9acWTmmFTpBfVghAgAAxiMQAQAA4/GWGQCgWTTmLSHAV7FCBAAAjEcgAgAAxiMQAQAA4xGIAACA8bwaiLKysvToo48qNDRUERERmjBhgk6ePOlWY1mWli5dqpiYGIWEhGjw4ME6fvy4W011dbXmzp2rzp07q0OHDho/frzOnz/vVlNWVqb09HQ5nU45nU6lp6fr8uXLLT1EAADgB7waiPLy8jR79mzl5+crNzdX169fV1pamqqqquyal156SatXr1Z2drYOHjyoqKgoDR8+XBUVFXZNRkaGtm/frm3btmnv3r2qrKzU2LFjVVtba9dMnTpVhYWFysnJUU5OjgoLC5Went6q4wUAAL7JYVmW5e0mbrp06ZIiIiKUl5enxx9/XJZlKSYmRhkZGXruueckfbsaFBkZqRdffFGzZs2Sy+VSly5dtGXLFk2ZMkWSdOHCBcXGxmrnzp0aMWKETpw4oV69eik/P19JSUmSpPz8fKWkpOiLL75Qjx497thbeXm5nE6nXC6XwsLCWu4fAQD8FLfd3722+knV3vyU7sb+/fapPUQul0uSFB4eLkkqKipSSUmJ0tLS7Jrg4GANGjRI+/btkyQVFBTo2rVrbjUxMTFKTEy0a/bv3y+n02mHIUlKTk6W0+m0a25VXV2t8vJytwcAAGibfOaDGS3LUmZmpgYOHKjExERJUklJiSQpMjLSrTYyMlJnz561a4KCgtSxY8c6NTevLykpUURERJ3fGRERYdfcKisrS8uWLbu7QQEA0AR835n3+MwK0Zw5c3TkyBG9+eabdZ5zOBxux5Zl1Tl3q1tr6qu/3essWbJELpfLfpw7d64xwwAAAH7IJwLR3Llz9f7772v37t3q2rWrfT4qKkqS6qzilJaW2qtGUVFRqqmpUVlZ2W1rLl68WOf3Xrp0qc7q003BwcEKCwtzewAAgLbJq4HIsizNmTNH7777rj7++GPFx8e7PR8fH6+oqCjl5uba52pqapSXl6cBAwZIkvr166fAwEC3muLiYh07dsyuSUlJkcvl0oEDB+yaTz/9VC6Xy64BAADm8uoeotmzZ+uNN97Q73//e4WGhtorQU6nUyEhIXI4HMrIyNCKFSuUkJCghIQErVixQu3bt9fUqVPt2hkzZmjBggXq1KmTwsPDtXDhQvXp00fDhg2TJPXs2VMjR47UzJkztW7dOknSM888o7FjxzbqDjMAANC2eTUQrV27VpI0ePBgt/MbNmzQ9OnTJUmLFi3SlStX9Oyzz6qsrExJSUnatWuXQkND7fo1a9YoICBAkydP1pUrV5SamqqNGzeqXbt2ds3rr7+uefPm2XejjR8/XtnZ2S07QAAA4Bd86nOIfBmfQwQAt8fnELUOf7zLjM8hAgAA8AMEIgAAYDwCEQAAMB6BCAAAGI9ABAAAjEcgAgAAxiMQAQAA4xGIAACA8QhEAADAeAQiAABgPAIRAAAwHoEIAAAYj0AEAACMRyACAADGIxABAADjEYgAAIDxCEQAAMB4Ad5uAAAANF63xTvuWHNm5ZhW6KRtYYUIAAAYj0AEAACMRyACAADGIxABAADjsakaAHBHjdnIC/gzVogAAIDxCEQAAMB4BCIAAGA8AhEAADAegQgAABiPQAQAAIxHIAIAAMYjEAEAAOMRiAAAgPEIRAAAwHgEIgAAYDwCEQAAMB6BCAAAGI9ABAAAjEcgAgAAxiMQAQAA4xGIAACA8QhEAADAeAQiAABgPAIRAAAwnkeBqKioqLn7AAAA8BqPAtGDDz6oIUOGaOvWrbp69Wpz9wQAANCqPApEn3/+uR5++GEtWLBAUVFRmjVrlg4cONDcvQEAALQKjwJRYmKiVq9erb/+9a/asGGDSkpKNHDgQPXu3VurV6/WpUuXmrtPAACAFnNXm6oDAgI0ceJE/fa3v9WLL76ov/zlL1q4cKG6du2qJ598UsXFxc3VJwAAQIsJuJuLDx06pP/93//Vtm3b1KFDBy1cuFAzZszQhQsX9B//8R/6wQ9+wFtpAAC0sm6Ld9yx5szKMa3Qif/wKBCtXr1aGzZs0MmTJzV69Ght3rxZo0eP1j33fLvgFB8fr3Xr1umhhx5q1mYBAABagkeBaO3atXr66af11FNPKSoqqt6a+++/X+vXr7+r5gAAAFqDR4Ho1KlTd6wJCgrStGnTPHl5AACAVuXRpuoNGzbod7/7XZ3zv/vd77Rp06a7bgoAAKA1eRSIVq5cqc6dO9c5HxERoRUrVtx1UwAAAK3Jo0B09uxZxcfH1zkfFxenr7766q6bAgAAaE0eBaKIiAgdOXKkzvnPP/9cnTp1uuumAAAAWpNHgeiJJ57QvHnztHv3btXW1qq2tlYff/yx5s+fryeeeKLRr/PJJ59o3LhxiomJkcPh0Hvvvef2/PTp0+VwONweycnJbjXV1dWaO3euOnfurA4dOmj8+PE6f/68W01ZWZnS09PldDrldDqVnp6uy5cvezJ0AADQBnkUiJYvX66kpCSlpqYqJCREISEhSktL09ChQ5u0h6iqqkp9+/ZVdnZ2gzUjR45UcXGx/di5c6fb8xkZGdq+fbu2bdumvXv3qrKyUmPHjlVtba1dM3XqVBUWFionJ0c5OTkqLCxUenp60wcOAADaJI9uuw8KCtJbb72l//qv/9Lnn3+ukJAQ9enTR3FxcU16nVGjRmnUqFG3rQkODm7ws45cLpfWr1+vLVu2aNiwYZKkrVu3KjY2Vh999JFGjBihEydOKCcnR/n5+UpKSpIk/frXv1ZKSopOnjypHj16NKlnAADQ9tzVV3d0795d3bt3b65e6rVnzx5FRETovvvu06BBg/Tf//3fioiIkCQVFBTo2rVrSktLs+tjYmKUmJioffv2acSIEdq/f7+cTqcdhiQpOTlZTqdT+/btazAQVVdXq7q62j4uLy9voRECAABv8ygQ1dbWauPGjfrDH/6g0tJS3bhxw+35jz/+uFmaGzVqlH70ox8pLi5ORUVF+tnPfqahQ4eqoKBAwcHBKikpUVBQkDp27Oh2XWRkpEpKSiRJJSUldoD6roiICLumPllZWVq2bFmzjAMAAPg2jwLR/PnztXHjRo0ZM0aJiYlyOBzN3ZckacqUKfbPiYmJ6t+/v+Li4rRjxw5NmjSpwessy3Lrqb7+bq251ZIlS5SZmWkfl5eXKzY2tqlDAAAAfsCjQLRt2zb99re/1ejRo5u7n9uKjo5WXFyc/dUhUVFRqqmpUVlZmdsqUWlpqQYMGGDXXLx4sc5rXbp0SZGRkQ3+ruDgYAUHBzfzCAAAgC/y6C6zoKAgPfjgg83dyx198803OnfunKKjoyVJ/fr1U2BgoHJzc+2a4uJiHTt2zA5EKSkpcrlcOnDggF3z6aefyuVy2TUAAMBsHq0QLViwQL/4xS+UnZ19V2+XVVZW6s9//rN9XFRUpMLCQoWHhys8PFxLly7VD3/4Q0VHR+vMmTN6/vnn1blzZ02cOFGS5HQ6NWPGDC1YsECdOnVSeHi4Fi5cqD59+th3nfXs2VMjR47UzJkztW7dOknSM888o7Fjx3KHGQBI6rZ4h7dbgBc0Zt7PrBzTCp34Bo8C0d69e7V792598MEH6t27twIDA92ef/fddxv1OocOHdKQIUPs45t7dqZNm6a1a9fq6NGj2rx5sy5fvqzo6GgNGTJEb731lkJDQ+1r1qxZo4CAAE2ePFlXrlxRamqqNm7cqHbt2tk1r7/+uubNm2ffjTZ+/PjbfvYRAAAwi0eB6L777rNXae7G4MGDZVlWg89/+OGHd3yNe++9V7/61a/0q1/9qsGa8PBwbd261aMeAQBA2+dRINqwYUNz9wEAAOA1Hm2qlqTr16/ro48+0rp161RRUSFJunDhgiorK5utOQAAgNbg0QrR2bNnNXLkSH311Veqrq7W8OHDFRoaqpdeeklXr17Vq6++2tx9AgAAtBiPVojmz5+v/v37q6ysTCEhIfb5iRMn6g9/+EOzNQcAANAaPL7L7I9//KOCgoLczsfFxemvf/1rszQGAADQWjxaIbpx44Zqa2vrnD9//rzbLfEAAAD+wKNANHz4cL388sv2scPhUGVlpV544YVW/zoPAACAu+XRW2Zr1qzRkCFD1KtXL129elVTp07VqVOn1LlzZ7355pvN3SMAAECL8igQxcTEqLCwUG+++aY+++wz3bhxQzNmzNC//Mu/uG2yBgAA8AceBSJJCgkJ0dNPP62nn366OfsBAABodR4Fos2bN9/2+SeffNKjZgAAALzBo0A0f/58t+Nr167p73//u4KCgtS+fXsCEQAA8Cse3WVWVlbm9qisrNTJkyc1cOBANlUDAAC/4/F3md0qISFBK1eurLN6BAAA4Os83lRdn3bt2unChQvN+ZIAAMBLui3ecceaMyvHtEInLc+jQPT++++7HVuWpeLiYmVnZ+v73/9+szQGAADQWjwKRBMmTHA7djgc6tKli4YOHaqf//znzdEXAABAq/EoEN24caO5+wAAAPCaZttUDQAA4K88WiHKzMxsdO3q1as9+RUAAACtxqNAdPjwYX322We6fv26evToIUn68ssv1a5dOz3yyCN2ncPhaJ4uAQAAWpBHgWjcuHEKDQ3Vpk2b1LFjR0nffljjU089pccee0wLFixo1iYBAABakkd7iH7+858rKyvLDkOS1LFjRy1fvpy7zAAAgN/xKBCVl5fr4sWLdc6XlpaqoqLirpsCAABoTR4FookTJ+qpp57S22+/rfPnz+v8+fN6++23NWPGDE2aNKm5ewQAAGhRHu0hevXVV7Vw4UL9+Mc/1rVr1759oYAAzZgxQ6tWrWrWBgEAAFqaR4Goffv2euWVV7Rq1Sr95S9/kWVZevDBB9WhQ4fm7g8AAKDF3dWXuxYXF6u4uFiPP/64QkJCZFkWt9oDAGCQxnwBrD/waA/RN998o9TUVHXv3l2jR49WcXGxJOlf//VfueUeAAD4HY8C0U9/+lMFBgbqq6++Uvv27e3zU6ZMUU5OTrM1BwAA0Bo8ests165d+vDDD9W1a1e38wkJCTp79myzNAYAANBaPFohqqqqclsZuunrr79WcHDwXTcFAADQmjwKRI8//rg2b95sHzscDt24cUOrVq3SkCFDmq05AACA1uDRW2arVq3S4MGDdejQIdXU1GjRokU6fvy4/va3v+mPf/xjc/cIAADQojxaIerVq5eOHDmif/qnf9Lw4cNVVVWlSZMm6fDhw3rggQeau0cAAIAW1eQVomvXriktLU3r1q3TsmXLWqInAACAVtXkFaLAwEAdO3aMD2AEAABthkdvmT355JNav359c/cCAADgFR5tqq6pqdFvfvMb5ebmqn///nW+w2z16tXN0hwAAEBraFIgOn36tLp166Zjx47pkUcekSR9+eWXbjW8lQYAAPxNkwJRQkKCiouLtXv3bknfflXHL3/5S0VGRrZIcwCAu9NWvngTaGlN2kNkWZbb8QcffKCqqqpmbQgAAKC1ebSp+qZbAxIAAIA/alIgcjgcdfYIsWcIAAD4uybtIbIsS9OnT7e/wPXq1av6yU9+Uucus3fffbf5OgQAAGhhTQpE06ZNczv+8Y9/3KzNAAAAeEOTAtGGDRtaqg8AAACvuatN1QAAAG0BgQgAABiPQAQAAIxHIAIAAMYjEAEAAON59G33AADv43vKgObDChEAADAegQgAABiPQAQAAIzn1UD0ySefaNy4cYqJiZHD4dB7773n9rxlWVq6dKliYmIUEhKiwYMH6/jx42411dXVmjt3rjp37qwOHTpo/PjxOn/+vFtNWVmZ0tPT5XQ65XQ6lZ6ersuXL7fw6AAAgL/waiCqqqpS3759lZ2dXe/zL730klavXq3s7GwdPHhQUVFRGj58uCoqKuyajIwMbd++Xdu2bdPevXtVWVmpsWPHqra21q6ZOnWqCgsLlZOTo5ycHBUWFio9Pb3FxwcAAPyDw7Isy9tNSJLD4dD27ds1YcIESd+uDsXExCgjI0PPPfecpG9XgyIjI/Xiiy9q1qxZcrlc6tKli7Zs2aIpU6ZIki5cuKDY2Fjt3LlTI0aM0IkTJ9SrVy/l5+crKSlJkpSfn6+UlBR98cUX6tGjR6P6Ky8vl9PplMvlUlhYWPP/AwBAE3GXGdqSMyvHtMjrNvbvt8/uISoqKlJJSYnS0tLsc8HBwRo0aJD27dsnSSooKNC1a9fcamJiYpSYmGjX7N+/X06n0w5DkpScnCyn02nX1Ke6ulrl5eVuDwAA0Db5bCAqKSmRJEVGRrqdj4yMtJ8rKSlRUFCQOnbseNuaiIiIOq8fERFh19QnKyvL3nPkdDoVGxt7V+MBAAC+y2cD0U0Oh8Pt2LKsOududWtNffV3ep0lS5bI5XLZj3PnzjWxcwAA4C98NhBFRUVJUp1VnNLSUnvVKCoqSjU1NSorK7ttzcWLF+u8/qVLl+qsPn1XcHCwwsLC3B4AAKBt8tlAFB8fr6ioKOXm5trnampqlJeXpwEDBkiS+vXrp8DAQLea4uJiHTt2zK5JSUmRy+XSgQMH7JpPP/1ULpfLrgEAAGbz6neZVVZW6s9//rN9XFRUpMLCQoWHh+v+++9XRkaGVqxYoYSEBCUkJGjFihVq3769pk6dKklyOp2aMWOGFixYoE6dOik8PFwLFy5Unz59NGzYMElSz549NXLkSM2cOVPr1q2TJD3zzDMaO3Zso+8wAwAAbZtXA9GhQ4c0ZMgQ+zgzM1OSNG3aNG3cuFGLFi3SlStX9Oyzz6qsrExJSUnatWuXQkND7WvWrFmjgIAATZ48WVeuXFFqaqo2btyodu3a2TWvv/665s2bZ9+NNn78+AY/+wgAAJjHZz6HyNfxOUQAfA2fQ4S2hM8hAgAA8DICEQAAMB6BCAAAGI9ABAAAjEcgAgAAxiMQAQAA4xGIAACA8QhEAADAeAQiAABgPAIRAAAwHoEIAAAYj0AEAACMRyACAADGIxABAADjEYgAAIDxCEQAAMB4BCIAAGA8AhEAADAegQgAABgvwNsNAADq6rZ4h7dbAIzCChEAADAegQgAABiPQAQAAIxHIAIAAMYjEAEAAOMRiAAAgPEIRAAAwHgEIgAAYDwCEQAAMB6BCAAAGI9ABAAAjEcgAgAAxiMQAQAA4xGIAACA8QhEAADAeAQiAABgPAIRAAAwHoEIAAAYj0AEAACMRyACAADGIxABAADjEYgAAIDxCEQAAMB4Ad5uAABM023xDm+3AOAWrBABAADjEYgAAIDxCEQAAMB4BCIAAGA8AhEAADAegQgAABiPQAQAAIxHIAIAAMYjEAEAAOMRiAAAgPEIRAAAwHg+HYiWLl0qh8Ph9oiKirKftyxLS5cuVUxMjEJCQjR48GAdP37c7TWqq6s1d+5cde7cWR06dND48eN1/vz51h4KAADwYT4diCSpd+/eKi4uth9Hjx61n3vppZe0evVqZWdn6+DBg4qKitLw4cNVUVFh12RkZGj79u3atm2b9u7dq8rKSo0dO1a1tbXeGA4AAPBBPv9t9wEBAW6rQjdZlqWXX35Z//7v/65JkyZJkjZt2qTIyEi98cYbmjVrllwul9avX68tW7Zo2LBhkqStW7cqNjZWH330kUaMGNGqYwEAAL7J51eITp06pZiYGMXHx+uJJ57Q6dOnJUlFRUUqKSlRWlqaXRscHKxBgwZp3759kqSCggJdu3bNrSYmJkaJiYl2DQAAgE+vECUlJWnz5s3q3r27Ll68qOXLl2vAgAE6fvy4SkpKJEmRkZFu10RGRurs2bOSpJKSEgUFBaljx451am5e35Dq6mpVV1fbx+Xl5c0xJAAA4IN8OhCNGjXK/rlPnz5KSUnRAw88oE2bNik5OVmS5HA43K6xLKvOuVs1piYrK0vLli3zsHMAAOBPfP4ts+/q0KGD+vTpo1OnTtn7im5d6SktLbVXjaKiolRTU6OysrIGaxqyZMkSuVwu+3Hu3LlmHAkAAPAlfhWIqqurdeLECUVHRys+Pl5RUVHKzc21n6+pqVFeXp4GDBggSerXr58CAwPdaoqLi3Xs2DG7piHBwcEKCwtzewAAgLbJp98yW7hwocaNG6f7779fpaWlWr58ucrLyzVt2jQ5HA5lZGRoxYoVSkhIUEJCglasWKH27dtr6tSpkiSn06kZM2ZowYIF6tSpk8LDw7Vw4UL16dPHvusMAJpTt8U7vN0CAA/4dCA6f/68/vmf/1lff/21unTpouTkZOXn5ysuLk6StGjRIl25ckXPPvusysrKlJSUpF27dik0NNR+jTVr1iggIECTJ0/WlStXlJqaqo0bN6pdu3beGhYAAPAxDsuyLG834Q/Ky8vldDrlcrl4+wxAg1ghAjxzZuWYFnndxv799qs9RAAAAC2BQAQAAIxHIAIAAMYjEAEAAOMRiAAAgPEIRAAAwHgEIgAAYDwCEQAAMB6BCAAAGM+nv7oDAHwJn0INtF2sEAEAAOMRiAAAgPEIRAAAwHjsIQIAsT8IMB0rRAAAwHgEIgAAYDwCEQAAMB6BCAAAGI9ABAAAjEcgAgAAxiMQAQAA4xGIAACA8QhEAADAeAQiAABgPAIRAAAwHoEIAAAYj0AEAACMRyACAADGIxABAADjEYgAAIDxCEQAAMB4BCIAAGA8AhEAADAegQgAABiPQAQAAIxHIAIAAMYjEAEAAOMRiAAAgPEIRAAAwHgB3m4AAFpat8U7vN0CAB/HChEAADAegQgAABiPQAQAAIxHIAIAAMYjEAEAAOMRiAAAgPG47R6AX+OWegDNgRUiAABgPFaIAPgsVn8AtBZWiAAAgPEIRAAAwHgEIgAAYDz2EAHwCvYHAfAlrBABAADjEYgAAIDxeMsMQLPj7TAA/saoQPTKK69o1apVKi4uVu/evfXyyy/rscce83ZbgF8h7ABoi4wJRG+99ZYyMjL0yiuv6Pvf/77WrVunUaNG6U9/+pPuv/9+b7cHtLjGBJkzK8e0QicA4HsclmVZ3m6iNSQlJemRRx7R2rVr7XM9e/bUhAkTlJWVdcfry8vL5XQ65XK5FBYW1pKtAk3Gqg0Af9dS/4essX+/jVghqqmpUUFBgRYvXux2Pi0tTfv27av3murqalVXV9vHLpdL0rf/sEDiCx+22u86tmzEHWtuVP+9FToBgJbTUn9fb77undZ/jAhEX3/9tWpraxUZGel2PjIyUiUlJfVek5WVpWXLltU5Hxsb2yI9Ag1xvuztDgCg5bX0/9ZVVFTI6XQ2+LwRgegmh8PhdmxZVp1zNy1ZskSZmZn28Y0bN/S3v/1NnTp1avAaT5SXlys2Nlbnzp1rs2/FtfUxMj7/19bH2NbHJ7X9MTI+z1mWpYqKCsXExNy2zohA1LlzZ7Vr167OalBpaWmdVaObgoODFRwc7Hbuvvvua6kWFRYW1ib/Q/5dbX2MjM//tfUxtvXxSW1/jIzPM7dbGbrJiA9mDAoKUr9+/ZSbm+t2Pjc3VwMGDPBSVwAAwFcYsUIkSZmZmUpPT1f//v2VkpKi1157TV999ZV+8pOfeLs1AADgZcYEoilTpuibb77Rf/7nf6q4uFiJiYnauXOn4uLivNpXcHCwXnjhhTpvz7UlbX2MjM//tfUxtvXxSW1/jIyv5RnzOUQAAAANMWIPEQAAwO0QiAAAgPEIRAAAwHgEIgAAYDwCUQv75JNPNG7cOMXExMjhcOi999674zV5eXnq16+f7r33Xv3jP/6jXn311ZZv1ENNHd+ePXvkcDjqPL744ovWabiJsrKy9Oijjyo0NFQRERGaMGGCTp48ecfr/GUOPRmfv83h2rVr9b3vfc/+wLeUlBR98MEHt73GX+ZPavr4/G3+bpWVlSWHw6GMjIzb1vnTHH5XY8bnb3O4dOnSOr1GRUXd9hpvzB+BqIVVVVWpb9++ys7OblR9UVGRRo8erccee0yHDx/W888/r3nz5umdd95p4U4909Tx3XTy5EkVFxfbj4SEhBbq8O7k5eVp9uzZys/PV25urq5fv660tDRVVVU1eI0/zaEn47vJX+awa9euWrlypQ4dOqRDhw5p6NCh+sEPfqDjx4/XW+9P8yc1fXw3+cv8fdfBgwf12muv6Xvf+95t6/xtDm9q7Phu8qc57N27t1uvR48ebbDWa/NnodVIsrZv337bmkWLFlkPPfSQ27lZs2ZZycnJLdhZ82jM+Hbv3m1JssrKylqlp+ZWWlpqSbLy8vIarPHnOWzM+Px9Di3Lsjp27Gj95je/qfc5f56/m243Pn+dv4qKCishIcHKzc21Bg0aZM2fP7/BWn+cw6aMz9/m8IUXXrD69u3b6HpvzR8rRD5m//79SktLczs3YsQIHTp0SNeuXfNSV83v4YcfVnR0tFJTU7V7925vt9NoLpdLkhQeHt5gjT/PYWPGd5M/zmFtba22bdumqqoqpaSk1Fvjz/PXmPHd5G/zN3v2bI0ZM0bDhg27Y60/zmFTxneTP83hqVOnFBMTo/j4eD3xxBM6ffp0g7Xemj9jPqnaX5SUlNT5wtnIyEhdv35dX3/9taKjo73UWfOIjo7Wa6+9pn79+qm6ulpbtmxRamqq9uzZo8cff9zb7d2WZVnKzMzUwIEDlZiY2GCdv85hY8fnj3N49OhRpaSk6OrVq/qHf/gHbd++Xb169aq31h/nrynj88f527Ztmz777DMdPHiwUfX+NodNHZ+/zWFSUpI2b96s7t276+LFi1q+fLkGDBig48ePq1OnTnXqvTV/BCIf5HA43I6t//9h4ree90c9evRQjx497OOUlBSdO3dO//M//+OT/0X+rjlz5ujIkSPau3fvHWv9cQ4bOz5/nMMePXqosLBQly9f1jvvvKNp06YpLy+vwdDgb/PXlPH52/ydO3dO8+fP165du3Tvvfc2+jp/mUNPxudvczhq1Cj75z59+iglJUUPPPCANm3apMzMzHqv8cb88ZaZj4mKilJJSYnbudLSUgUEBNSbpNuC5ORknTp1yttt3NbcuXP1/vvva/fu3eratetta/1xDpsyvvr4+hwGBQXpwQcfVP/+/ZWVlaW+ffvqF7/4Rb21/jh/TRlffXx5/goKClRaWqp+/fopICBAAQEBysvL0y9/+UsFBASotra2zjX+NIeejK8+vjyHt+rQoYP69OnTYL/emj9WiHxMSkqK/u///s/t3K5du9S/f38FBgZ6qauWdfjwYZ9bwr7JsizNnTtX27dv1549exQfH3/Ha/xpDj0ZX318eQ7rY1mWqqur633On+avIbcbX318ef5SU1Pr3JH01FNP6aGHHtJzzz2ndu3a1bnGn+bQk/HVx5fn8FbV1dU6ceKEHnvssXqf99r8teiWbVgVFRXW4cOHrcOHD1uSrNWrV1uHDx+2zp49a1mWZS1evNhKT0+360+fPm21b9/e+ulPf2r96U9/stavX28FBgZab7/9treGcFtNHd+aNWus7du3W19++aV17Ngxa/HixZYk65133vHWEG7r3/7t3yyn02nt2bPHKi4uth9///vf7Rp/nkNPxudvc7hkyRLrk08+sYqKiqwjR45Yzz//vHXPPfdYu3btsizLv+fPspo+Pn+bv/rceheWv8/hre40Pn+bwwULFlh79uyxTp8+beXn51tjx461QkNDrTNnzliW5TvzRyBqYTdvj7z1MW3aNMuyLGvatGnWoEGD3K7Zs2eP9fDDD1tBQUFWt27drLVr17Z+443U1PG9+OKL1gMPPGDde++9VseOHa2BAwdaO3bs8E7zjVDf2CRZGzZssGv8eQ49GZ+/zeHTTz9txcXFWUFBQVaXLl2s1NRUOyxYln/Pn2U1fXz+Nn/1uTUw+Psc3upO4/O3OZwyZYoVHR1tBQYGWjExMdakSZOs48eP28/7yvw5LOv/71QCAAAwFJuqAQCA8QhEAADAeAQiAABgPAIRAAAwHoEIAAAYj0AEAACMRyACAADGIxABAADjEYgAAIDxCEQAAMB4BCIAAGA8AhEAADDe/wONxN18LbDg3gAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "whitelisted_df.groupby('user_id')['rating'].mean().plot.hist(bins=50)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "20577c99-3c3a-487c-88c9-8be6122edf83",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Done!\n"
     ]
    }
   ],
   "source": [
    "output_path = f'{base_dir}/goodreads_interactions_comics_graphic.csv'\n",
    "whitelisted_df.to_csv(output_path, index=False)\n",
    "print('Done!')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
