{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython.display import display"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "customer_test.csv   stories_description.csv\ttransactions.csv\r\n",
      "customer_train.csv  stories_reaction_test.csv\r\n",
      "sample_submit.csv   stories_reaction_train.csv\r\n"
     ]
    }
   ],
   "source": [
    "!ls *.csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def pd_intersection(a, b):\n",
    "    a = set(a)\n",
    "    b = set(b)\n",
    "    c = a.intersection(b)\n",
    "    print(f'a: {len(a)}, b: {len(b)}, c: {len(c)}')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# customer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>customer_id</th>\n",
       "      <th>product_0</th>\n",
       "      <th>product_1</th>\n",
       "      <th>product_2</th>\n",
       "      <th>product_3</th>\n",
       "      <th>product_4</th>\n",
       "      <th>product_5</th>\n",
       "      <th>product_6</th>\n",
       "      <th>gender_cd</th>\n",
       "      <th>age</th>\n",
       "      <th>marital_status_cd</th>\n",
       "      <th>children_cnt</th>\n",
       "      <th>first_session_dttm</th>\n",
       "      <th>job_position_cd</th>\n",
       "      <th>job_title</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>894436</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UTL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>M</td>\n",
       "      <td>30.0</td>\n",
       "      <td>MAR</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2018-03-20 09:10:16</td>\n",
       "      <td>1</td>\n",
       "      <td>Неруководящий сотрудник - обсл. Персонал</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>524526</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UTL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UTL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>F</td>\n",
       "      <td>20.0</td>\n",
       "      <td>UNM</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2017-03-29 20:38:45</td>\n",
       "      <td>16</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>498134</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UTL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>F</td>\n",
       "      <td>25.0</td>\n",
       "      <td>UNM</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2018-03-12 11:25:06</td>\n",
       "      <td>22</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>278941</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UTL</td>\n",
       "      <td>CLS</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UTL</td>\n",
       "      <td>UTL</td>\n",
       "      <td>M</td>\n",
       "      <td>25.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-02-21 18:47:51</td>\n",
       "      <td>16</td>\n",
       "      <td>Неруководящий сотрудник - специалист</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>877312</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UTL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>F</td>\n",
       "      <td>40.0</td>\n",
       "      <td>MAR</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2018-03-07 11:17:02</td>\n",
       "      <td>22</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   customer_id product_0 product_1 product_2 product_3 product_4 product_5  \\\n",
       "0       894436       NaN       NaN       NaN       NaN       NaN       UTL   \n",
       "1       524526       NaN       UTL       NaN       NaN       NaN       UTL   \n",
       "2       498134       NaN       UTL       NaN       NaN       NaN       NaN   \n",
       "3       278941       NaN       NaN       UTL       CLS       NaN       UTL   \n",
       "4       877312       NaN       UTL       NaN       NaN       NaN       NaN   \n",
       "\n",
       "  product_6 gender_cd   age marital_status_cd  children_cnt  \\\n",
       "0       NaN         M  30.0               MAR           0.0   \n",
       "1       NaN         F  20.0               UNM           0.0   \n",
       "2       NaN         F  25.0               UNM           0.0   \n",
       "3       UTL         M  25.0               NaN           NaN   \n",
       "4       NaN         F  40.0               MAR           0.0   \n",
       "\n",
       "    first_session_dttm  job_position_cd  \\\n",
       "0  2018-03-20 09:10:16                1   \n",
       "1  2017-03-29 20:38:45               16   \n",
       "2  2018-03-12 11:25:06               22   \n",
       "3  2016-02-21 18:47:51               16   \n",
       "4  2018-03-07 11:17:02               22   \n",
       "\n",
       "                                  job_title  \n",
       "0  Неруководящий сотрудник - обсл. Персонал  \n",
       "1                                       NaN  \n",
       "2                                       NaN  \n",
       "3      Неруководящий сотрудник - специалист  \n",
       "4                                       NaN  "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_cust_train = pd.read_csv('customer_train.csv')\n",
    "df_cust_train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>customer_id</th>\n",
       "      <th>product_0</th>\n",
       "      <th>product_1</th>\n",
       "      <th>product_2</th>\n",
       "      <th>product_3</th>\n",
       "      <th>product_4</th>\n",
       "      <th>product_5</th>\n",
       "      <th>product_6</th>\n",
       "      <th>gender_cd</th>\n",
       "      <th>age</th>\n",
       "      <th>marital_status_cd</th>\n",
       "      <th>children_cnt</th>\n",
       "      <th>first_session_dttm</th>\n",
       "      <th>job_position_cd</th>\n",
       "      <th>job_title</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>234305</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UTL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>F</td>\n",
       "      <td>25.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2015-10-29 09:59:10</td>\n",
       "      <td>22</td>\n",
       "      <td>Менеджер проектов</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>914339</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UTL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>M</td>\n",
       "      <td>15.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2018-04-09 08:51:54</td>\n",
       "      <td>22</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>895631</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UTL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>M</td>\n",
       "      <td>20.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2018-03-22 10:38:33</td>\n",
       "      <td>16</td>\n",
       "      <td>Менеджер по продажам</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>954837</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UTL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>F</td>\n",
       "      <td>25.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2018-05-09 18:24:32</td>\n",
       "      <td>16</td>\n",
       "      <td>Инженер</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>391590</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UTL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>M</td>\n",
       "      <td>30.0</td>\n",
       "      <td>UNM</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2016-10-08 05:29:20</td>\n",
       "      <td>20</td>\n",
       "      <td>сборщик мебели</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   customer_id product_0 product_1 product_2 product_3 product_4 product_5  \\\n",
       "0       234305       NaN       NaN       NaN       NaN       NaN       UTL   \n",
       "1       914339       NaN       NaN       NaN       NaN       NaN       UTL   \n",
       "2       895631       NaN       NaN       NaN       NaN       NaN       UTL   \n",
       "3       954837       NaN       NaN       NaN       NaN       NaN       UTL   \n",
       "4       391590       NaN       UTL       NaN       NaN       NaN       NaN   \n",
       "\n",
       "  product_6 gender_cd   age marital_status_cd  children_cnt  \\\n",
       "0       NaN         F  25.0               NaN           0.0   \n",
       "1       NaN         M  15.0               NaN           0.0   \n",
       "2       NaN         M  20.0               NaN           0.0   \n",
       "3       NaN         F  25.0               NaN           0.0   \n",
       "4       NaN         M  30.0               UNM           0.0   \n",
       "\n",
       "    first_session_dttm  job_position_cd             job_title  \n",
       "0  2015-10-29 09:59:10               22     Менеджер проектов  \n",
       "1  2018-04-09 08:51:54               22                   NaN  \n",
       "2  2018-03-22 10:38:33               16  Менеджер по продажам  \n",
       "3  2018-05-09 18:24:32               16               Инженер  \n",
       "4  2016-10-08 05:29:20               20        сборщик мебели  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_cust_valid = pd.read_csv('customer_test.csv')\n",
    "df_cust_valid.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a: 50000, b: 54245, c: 50000\n"
     ]
    }
   ],
   "source": [
    "pd_intersection(\n",
    "    df_cust_train['customer_id'].unique().tolist(),\n",
    "    df_cust_valid['customer_id'].unique().tolist(),\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# stories_reaction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>customer_id</th>\n",
       "      <th>story_id</th>\n",
       "      <th>event_dttm</th>\n",
       "      <th>event</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>15</td>\n",
       "      <td>138</td>\n",
       "      <td>2018-07-24 15:33:22</td>\n",
       "      <td>view</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>15</td>\n",
       "      <td>202</td>\n",
       "      <td>2018-06-04 08:08:08</td>\n",
       "      <td>skip</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>15</td>\n",
       "      <td>222</td>\n",
       "      <td>2018-06-17 13:44:45</td>\n",
       "      <td>skip</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>15</td>\n",
       "      <td>379</td>\n",
       "      <td>2018-05-23 05:41:43</td>\n",
       "      <td>skip</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>15</td>\n",
       "      <td>544</td>\n",
       "      <td>2018-07-25 02:16:29</td>\n",
       "      <td>view</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   customer_id  story_id           event_dttm event\n",
       "0           15       138  2018-07-24 15:33:22  view\n",
       "1           15       202  2018-06-04 08:08:08  skip\n",
       "2           15       222  2018-06-17 13:44:45  skip\n",
       "3           15       379  2018-05-23 05:41:43  skip\n",
       "4           15       544  2018-07-25 02:16:29  view"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_user_item_train = pd.read_csv('stories_reaction_train.csv')\n",
    "df_user_item_train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>customer_id</th>\n",
       "      <th>story_id</th>\n",
       "      <th>event_dttm</th>\n",
       "      <th>answer_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>466906</td>\n",
       "      <td>1152</td>\n",
       "      <td>2018-08-01 00:00:46</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>466906</td>\n",
       "      <td>1355</td>\n",
       "      <td>2018-08-01 00:00:46</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>146395</td>\n",
       "      <td>537</td>\n",
       "      <td>2018-08-01 00:02:08</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>897303</td>\n",
       "      <td>915</td>\n",
       "      <td>2018-08-01 00:02:30</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>50094</td>\n",
       "      <td>1415</td>\n",
       "      <td>2018-08-01 00:03:13</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   customer_id  story_id           event_dttm  answer_id\n",
       "0       466906      1152  2018-08-01 00:00:46          0\n",
       "1       466906      1355  2018-08-01 00:00:46          1\n",
       "2       146395       537  2018-08-01 00:02:08          2\n",
       "3       897303       915  2018-08-01 00:02:30          3\n",
       "4        50094      1415  2018-08-01 00:03:13          4"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_user_item_valid = pd.read_csv('stories_reaction_test.csv')\n",
    "df_user_item_valid.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_user_item_train['event_dttm'] = pd.to_datetime(df_user_item_train['event_dttm'])\n",
    "df_user_item_valid['event_dttm'] = pd.to_datetime(df_user_item_valid['event_dttm'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a: 41001, b: 22646, c: 14260\n"
     ]
    }
   ],
   "source": [
    "pd_intersection(\n",
    "    df_user_item_train['customer_id'].unique().tolist(),\n",
    "    df_user_item_valid['customer_id'].unique().tolist(),\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "min   2018-03-29 11:30:44\n",
       "max   2018-07-31 23:58:14\n",
       "Name: event_dttm, dtype: datetime64[ns]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_user_item_train['event_dttm'].agg(['min', 'max'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "min   2018-08-01 00:00:46\n",
       "max   2018-08-22 20:49:33\n",
       "Name: event_dttm, dtype: datetime64[ns]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_user_item_valid['event_dttm'].agg(['min', 'max'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_dates(s_dt):\n",
    "    s = s_dt.dt.floor('D')\n",
    "    s = s - pd.to_timedelta(s.dt.day - 1, 'D')\n",
    "    s = s.value_counts()\n",
    "    return s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x7f38ee23c780>"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZAAAAECCAYAAADQEYGEAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3deXxU1f3/8deHsAQSwhb2LWERBDQsEdyLO9pW3Kutda3Lz2q1RSsurVW7qHWp1tYWv1q1VRQEK7hRilqt1oXFsAsICGENBAIkQLbP74+5YEAkIUxyZ3k/H495zOTcO3c+Mwx5zzn3zIm5OyIiIgeqQdgFiIhIfFKAiIhIrShARESkVhQgIiJSKwoQERGplYZhFxBtmZmZnpWVFXYZIiJxZcaMGRvcve2B3CfhAiQrK4vp06eHXYaISFwxsy8P9D4awhIRkVpRgIiISK0oQEREpFYUICIiUisKEBERqRUFiIiI1IoCREREaiXhvgciIiLV215awfKNxSzbELnUhgJERCRBlVdUsmrzdpZuKGZpQTHLNmyLBEZBMauLdhz08RUgIiJxzN0p2LaTZQWRnkTVsFhRWEJZxVd/NDAjtSE92qZzZI82ZGemkd02jezMNLLapJF+/4E/tgJERCQObN1RxvINJSzdsC0IiK8u23aW796vccMGZLdJo3e75pzavwPZmWn0yIwEReu0xphZ1GpSgIiIxIjS8kpWFJZEehIF23b3KJZtKKZg687d+5lBl1ZNyc5MZ0j3VpHeRHDp1LIpKQ2iFxL7owAREalHlZXO2i07qgw3bdvdk1hZWELlVyNOZKY3JjszjRP6tCU7Mz3Sm2ibRrfWzUhtlBLekwgoQERE6sDmktJI76GgmKXByeulBcUs31jMjrLK3fs1a5xCdmYah3VuwcicTsF5iXSy26TRolmjEJ9B9RQgIiK1tKMsmApbUPy1mU6bSsp279ewgdGtdTOyM9M4tlfm7pPXPTLTaZ/RJKrnJepTtQFiZl2B54D2gANj3P1RM2sNvARkAcuBC9x9k0VeiUeBM4AS4DJ3nxkc61LgzuDQv3b3Z4P2IcAzQFPgDeBGd/dveoyDftYiIjVUUems2rR9j17EriGnVZu377Fvh4xUsjPTOP2wjrtPXGdnptG1dTMapSTe97Zr0gMpB0a5+0wzaw7MMLOpwGXANHe/z8xGA6OBW4HTgd7BZRjwBDAsCIO7gFwiQTTDzCYFgfAEcBXwMZEAGQG8GRxzX48hIhI17s6GbaVBMERmOe06eb1iYwmlFV8NOTUPpsIOzW69x8nr7Mw00pok16BOtc/W3dcAa4LbW81sAdAZGAkMD3Z7FniXyC/3kcBz7u7AR2bW0sw6BvtOdfdCgCCERpjZu0CGu38UtD8HnEUkQL7pMUREDti2neUs3zWzqcq5iWUFxWytOhU2pQFZmc3o2TaNkw9tH+lNBMNObaI8FTaeHVBcmlkWMIhIT6F9EC4Aa4kMcUEkXFZWuVt+0La/9vx9tLOfx9i7rquBqwG6det2IE9JRBJMaXklKzeVVPli3Vffm1i/11TYzi2bkp2ZxjmDOwdfrEunRz1PhY1nNQ4QM0sHJgA3ufuWqgkcnK/wb7xzFOzvMdx9DDAGIDc3t07rEJHwuQdTYfdx8nrlpu1UVJkL2yYtMhX2W4e0Jbvtri/VpdO9TWxMhY1nNQoQM2tEJDyed/eJQfM6M+vo7muCIar1QfsqoGuVu3cJ2lbx1XDUrvZ3g/Yu+9h/f48hIknK3fnxCzN5Y87a3W1NG0Wmwvbv3ILv5nTa47xEy2aNQ6w2sdVkFpYBTwEL3P3hKpsmAZcC9wXXr1Zpv97MXiRyEr0oCIApwG/NrFWw36nAbe5eaGZbzOxIIkNjlwB/rOYxRCRJjZu+kjfmrOWKY7I5+dB2ZLdNo0NGqs5LhKAmPZBjgB8Cc8zss6DtdiK/1MeZ2ZXAl8AFwbY3iEzhXUJkGu/lAEFQ3At8Gux3z64T6sB1fDWN983gwn4eQ0SSUP6mEu59bQFH9WjDnd8+lAY6TxEqi0yWShy5ubk+ffr0sMsQkSirrHQufupjZucX8eaNx9G1dbOwS0ooZjbD3XMP5D6J980WEUlIf//oSz78YiN3fvtQhUeMUICISMxbtqGY+95cyPA+bfneEV2rv4PUCwWIiMS0ikrn5vF5NEox7j/3cJ0sjyHJ9b17EYk7T/13KTO+3MQfvjeQ9hmpYZcjVagHIiIxa/G6rTz4r0Wc1r89Iwd2Crsc2YsCRERiUllFJaPG55HepCG/OfswDV3FIA1hiUhMeuLdL5idX8SffzCYzPQmYZcj+6AeiIjEnHmri3hs2mLOzOnEGYd1DLsc+QYKEBGJKTvLKxg1Lo9WaY25Z2T/sMuR/dAQlojElMemLWbh2q08fVmuFkKMceqBiEjMmLViE0+8+wUX5HbhxL77/PM/EkMUICISE3aUVTBqfB4dMlK58zv9wi5HakBDWCISEx6c8jlLC4r5x5XDyEhtFHY5UgPqgYhI6D5ZVshTHyzjh0d259jemWGXIzWkABGRUBXvLOfm8Xl0a92M0af3DbscOQAawhKRUP3uzQWs3FTCuGuOIq2JfiXFE/VARCQ07y8u4B8freBHx2ZzRFbrsMuRA6QAEZFQbNlRxs9fnk3PtmmMOrVP2OVILai/KCKhuHfyfNZt2cHE644htVFK2OVILagHIiL1btqCdYyfkc91w3sxsGvLsMuRWlKAiEi92lRcyuiJc+jboTk/Oal32OXIQdAQlojUq19OmsfmklKevXwojRvqM2w807+eiNSb12evYXLean5yYm/6dcoIuxw5SAoQEakXBVt3cuc/55DTpQX/b3jPsMuRKFCAiEidc3fueGUOxaUVPHRBDg1T9KsnEehfUUTq3CuzVvGv+eu45dQ+9GrXPOxyJEoUICJSp9YW7eCuSfPI7d6KK47NDrsciSIFiIjUGXfn1gmzKa9wHjw/h5QGFnZJEkUKEBGpMy9+upL/LCrgtjP6kpWZFnY5EmUKEBGpEysLS/j1a/M5plcbLh7WPexypA4oQEQk6iornVtezsPMeOC8HBpo6CohKUBEJOqe+99yPlpayC++cyidWzYNuxypIwoQEYmqpQXbuO+thZzQpy0X5HYNuxypQwoQEYmaikrn5vF5NGmYwn3nHo6Zhq4SmRZTFJGoefL9pcxcsZlHLxxI+4zUsMuROqYeiIhExaJ1W3n4X4sY0b8DZ+Z0CrscqQcKEBE5aGUVlfxs3Gc0T23Ir88eoKGrJKEhLBE5aH9+5wvmrtrCXy4eTGZ6k7DLkXpSbQ/EzJ42s/VmNrdK26/MbJWZfRZczqiy7TYzW2Jmn5vZaVXaRwRtS8xsdJX2bDP7OGh/ycwaB+1Ngp+XBNuzovWkRSR65q4q4o9vL+asgZ0YMaBj2OVIParJENYzwIh9tD/i7gODyxsAZtYPuBDoH9znz2aWYmYpwJ+A04F+wEXBvgD3B8fqBWwCrgzarwQ2Be2PBPuJSAzZWV7BqHF5tE5rzN1nDgi7HKln1QaIu78HFNbweCOBF919p7svA5YAQ4PLEndf6u6lwIvASIsMlJ4IvBzc/1ngrCrHeja4/TJwkmlgVSSm/OHfi/l83VbuP/dwWjRrFHY5Us8O5iT69WY2OxjiahW0dQZWVtknP2j7pvY2wGZ3L9+rfY9jBduLgv2/xsyuNrPpZja9oKDgIJ6SiNTUzBWb+Ot/vuB7uV05oW+7sMuRENQ2QJ4AegIDgTXAQ1GrqBbcfYy757p7btu2bcMsRSQpbC+t4OZxeXRs0ZQ7v3No2OVISGoVIO6+zt0r3L0SeJLIEBXAKqDq2gVdgrZvat8ItDSzhnu173GsYHuLYH8RCdnvp3zO0g3F/P68w2meqqGrZFWrADGzqlMtzgZ2zdCaBFwYzKDKBnoDnwCfAr2DGVeNiZxon+TuDrwDnBfc/1Lg1SrHujS4fR7wdrC/iIToo6Ub+duHy7jkqO4c3Ssz7HIkRNV+D8TMxgLDgUwzywfuAoab2UDAgeXANQDuPs/MxgHzgXLgx+5eERznemAKkAI87e7zgoe4FXjRzH4NzAKeCtqfAv5uZkuInMS/8KCfrYgclOKd5dzych7dWjdj9Ol9wy5HQmaJ9qE+NzfXp0+fHnYZIgnpjlfm8MInKxh/zVHkZrUOuxyJIjOb4e65B3IfLWUiIjXy3qICnv94BVcd10PhIYACRERqoGh7GbdOmE2vdun87JRDwi5HYoTWwhKRat0zeT7rt+5k4sVDSG2UEnY5EiPUAxGR/Zo6fx0TZuZz3fCe5HRtGXY5EkMUICLyjTYVl3LbxDkc2jGDG07sHXY5EmM0hCUi3+gXr86laHspf79yKI0b6vOm7EnvCBHZp9dmr+a12Wu46eRDOLRjRtjlSAxSgIjI16zfuoNf/HMuOV1bcs3xPcIuR2KUAkRE9uDu3D5xLiWlFTx0fg4NU/RrQvZN7wwR2cPEmav494J13HJaH3q1Sw+7HIlhChAR2W1N0XZ+NXkeR2S14vJjssMuR2KcAkREgMjQ1c9fnk15hfPg+TmkNNAfAJX9U4CICABjP1nJ+4s3cPsZfeneJi3sciQOKEBEhJWFJfz69fkc2yuTHwzrHnY5EicUICJJrrLSuXl8Hilm3H/e4TTQ0JXUkAJEJMk98+FyPl5WyC++24/OLZuGXY7EEQWISBL7omAb97+1kJP6tuP8IV3CLkfijAJEJElVBENXqY1S+N05h2GmoSs5MFpMUSRJjXlvKbNWbObRCwfSLiM17HIkDqkHIpKEPl+7lUemLuKMwzpwZk6nsMuROKUAEUkyZRWV/GzcZzRPbci9Iwdo6EpqTUNYIknm8beXMG/1Fv76wyG0SW8SdjkSx9QDEUkic/KL+NM7Szh7UGdO698h7HIkzilARJLEzvIKRo3/jDbpjfnVd/uHXY4kAA1hiSSJR6YuZtG6bfzt8iNo0axR2OVIAlAPRCQJzPhyE2Pe+4ILj+jKCX3ahV2OJAgFiEiC215awc3j8+jYoil3fPvQsMuRBKIhLJEE98CUhSzbUMwLVw2jeaqGriR61AMRSWD/+2Ijf/tgOZcdncXRPTPDLkcSjAJEJEFt21nOLS/nkdWmGT8f0SfsciQBaQhLJEH95vUFrN68nfHXHkWzxvqvLtGnHohIAvrPogLGfrKCq47rwZDurcMuRxKUAkQkwRRtL+PWl2fTu106Pz3lkLDLkQSmfq1Igrl78jwKtu3kyUtySW2UEnY5ksDUAxFJIP+at5aJM1fx4+E9OaxLi7DLkQSnABFJEIXFpdz+yhz6dczg+hN7h12OJAENYYkkiF/8cy5F28v4x4+G0bihPhtK3dO7TCQBTM5bzetz1nDTyYfQt0NG2OVIkqg2QMzsaTNbb2Zzq7S1NrOpZrY4uG4VtJuZPWZmS8xstpkNrnKfS4P9F5vZpVXah5jZnOA+j1nw59G+6TFEZE/rt+7gF6/OJadrS645vkfY5UgSqUkP5BlgxF5to4Fp7t4bmBb8DHA60Du4XA08AZEwAO4ChgFDgbuqBMITwFVV7jeimscQkYC7c9uEOWwvreCh83NomKJBBak/1b7b3P09oHCv5pHAs8HtZ4GzqrQ/5xEfAS3NrCNwGjDV3QvdfRMwFRgRbMtw94/c3YHn9jrWvh5DRAIvz8hn2sL13HJaH3q1Sw+7HEkytf240t7d1wS31wLtg9udgZVV9ssP2vbXnr+P9v09xteY2dVmNt3MphcUFNTi6YjEn9Wbt3PP5PkMzW7NFcdkh12OJKGD7u8GPQePQi21fgx3H+Puue6e27Zt27osRSQmuDu3TphNhTsPnpdDgwYWdkmShGobIOuC4SeC6/VB+yqga5X9ugRt+2vvso/2/T2GSNJ7/uMVvL94A7efcSjd2jQLuxxJUrUNkEnArplUlwKvVmm/JJiNdSRQFAxDTQFONbNWwcnzU4EpwbYtZnZkMPvqkr2Ota/HEElqKzaW8Ns3FnBc70x+MKxb2OVIEqv2i4RmNhYYDmSaWT6R2VT3AePM7ErgS+CCYPc3gDOAJUAJcDmAuxea2b3Ap8F+97j7rhPz1xGZ6dUUeDO4sJ/HEElalZXOzS/nkWLG/eceTjDrXSQU1QaIu1/0DZtO2se+Dvz4G47zNPD0PtqnAwP20b5xX48hksz+9uFyPllWyO/PO5xOLZuGXY4kOU0aF4kTXxRs44G3FnJS33acN6RL9XcQqWMKEJE4UF5RyahxeTRtnMLvzjlMQ1cSE7SYokgc+Ot7S/ls5WYeu2gQ7TJSwy5HBFAPRCTmLVy7hT/8exHfPqwj3z28Y9jliOymABGJYaXllfzspTxaNG3EvWcN0NCVxBQNYYnEsMffWcL8NVv46w+H0DqtcdjliOxBPRCRGDU7fzN/emcJ5wzqzGn9O4RdjsjXKEBEYtCOsgpGjcujbXoT7vpu/7DLEdknDWGJxKBHpi5i8fptPHP5EbRo1ijsckT2ST0QkRgz48tCxry/lIuGdmN4n3ZhlyPyjRQgIjGkpLScUePy6NyyKXd8+9CwyxHZLw1hicSQB976nOUbSxh71ZGkN9F/T4lt6oGIxIgPv9jAMx8u57KjsziqZ5uwyxGplgJEJAZs3VHGLeNnk52Zxq0j+oZdjkiNqI8sEgN++8YC1hRtZ/y1R9G0cUrY5YjUiHogIiF75/P1jP1kJVcd34Mh3VuHXY5IjSlAREJUVFLG6AmzOaR9Oj89+ZCwyxE5IBrCEgnRrybPY8O2Uv7vkiNIbaShK4kv6oGIhOStuWt5ZdYqrj+hF4d1aRF2OSIHTAEiEoKN23Zyxytz6N8pg+tP7BV2OSK1oiEskXrm7tz5z7ls3VHO81fl0ChFn+MkPumdK1LPJuWt5s25a7nplN707ZARdjkitaYAEalH67fs4JevzmNQt5ZcfVyPsMsROSgKEJF64u6MnjiHHWUVPHh+Dg01dCVxTu9gkXoyfkY+by9cz60j+tKzbXrY5YgcNAWISD1YtqGYeyfPZ1h2ay47OivsckSiQgEiUsden72GM//4Xxo0MH5/Xg4NGljYJYlEhabxitSRHWUV3PPafF74eAWDurXksQsH0bV1s7DLEokaBYhIHVi8bivXvzCLz9dt5dpv9WTUqYfo+x6ScBQgIlHk7oyfns8vJ80lvUlDnrtiKMcf0jbsskTqhAJEJEq27ijjjlfmMilvNcf0asMj3xtIu+apYZclUmcUICJRMDt/MzeMnUX+pu3cclofrv1WT1J0slwSnAJE5CC4O09/sJz73lxA2/QmvHT1keRm6Y9CSXJQgIjUUmFxKbeMz2PawvWc2q89D5x3OC2bNQ67LJF6owARqYWPl27kxhc/o7C4lLvP7M8lR3XHTENWklwUICIHoKLSefztJTw6bRHd26Qx8dKjGdBZfwxKkpMCRKSG1m3ZwY0vzuKjpYWcM6gz95w1gPQm+i8kyUvvfpEaeGfhekaNz2NHWQUPnZ/DuUO6hF2SSOgO6quxZrbczOaY2WdmNj1oa21mU81scXDdKmg3M3vMzJaY2WwzG1zlOJcG+y82s0urtA8Jjr8kuK8GmaVelZZX8pvX53P5M5/SPiOVyTccq/AQCURjbYUT3H2gu+cGP48Gprl7b2Ba8DPA6UDv4HI18AREAge4CxgGDAXu2hU6wT5XVbnfiCjUK1IjKzaWcP5fPuTJ95dxyVHdeeW6o7UMu0gVdTGENRIYHtx+FngXuDVof87dHfjIzFqaWcdg36nuXghgZlOBEWb2LpDh7h8F7c8BZwFv1kHNInuYnLea2yfOwQz+cvFgRgzoGHZJIjHnYAPEgX+ZmQN/dfcxQHt3XxNsXwu0D253BlZWuW9+0La/9vx9tH+NmV1NpFdDt27dDub5SJLbXlrBPa/NY+wnKxncrSWPXTSILq20gq7IvhxsgBzr7qvMrB0w1cwWVt3o7h6ES50KgmsMQG5ubp0/niSmReu2cv0LM1m8fhvXDe/JT0/RCroi+3NQAeLuq4Lr9Wb2CpFzGOvMrKO7rwmGqNYHu68Cula5e5egbRVfDXntan83aO+yj/1FosrdefHTldw9ed7uFXSP660VdEWqU+uPV2aWZmbNd90GTgXmApOAXTOpLgVeDW5PAi4JZmMdCRQFQ11TgFPNrFVw8vxUYEqwbYuZHRnMvrqkyrFEomLLjjJuGDuL2ybO4Yis1rxx43EKD5EaOpgeSHvglWBmbUPgBXd/y8w+BcaZ2ZXAl8AFwf5vAGcAS4AS4HIAdy80s3uBT4P97tl1Qh24DngGaErk5LlOoEvU5K2MrKC7avN2fj6iD9ce31N/blbkAFhkUlTiyM3N9enTp4ddhsSwykrnqf8u4/63FtI+I5XHLhrIkO5aQVeSm5nNqPJ1jBrRN9ElqWzctpObx+fxzucFnNa/PQ+cm0OLZo3CLkskLilAJGn874uN3PTSLDaVlHHvyP5cfKRW0BU5GAoQSXgVlc6j0xbzx7cXk90mjacvO4L+nbSCrsjBUoBIQltTtJ0bX/yMT5YVcu7gLtwzsj9pWkFXJCr0P0kS1rQF67h5fB47yyt5+IIczhmsRRBFokkBIgmntLyS+95cyNMfLKNfxwwe//4gemgRRJGoU4BIQlm+oZgbxs5izqoiLjs6i9Gn9yW1UUrYZYkkJAWIJIxXP1vFHa/MJaWB8dcfDuG0/h3CLkkkoSlAJO6VlJbzq0nzGDc9n9zurXj0okF0btk07LJEEp4CROLawrVbuP6FWXxRsI0fn9CTn558CA21gq5IvVCASFxyd174ZAX3TJ5P89RG/P2KYRzbOzPsskSSigJE4k7R9jJunziH1+es4bjemTx8wUDaNm8SdlkiSUcBInFl1opN3DB2FmuKdnDriL5cc3wPraArEhIFiMSFykrnyfeX8vspn9M+I5Vx1xzFkO6twi5LJKkpQCTmbdi2k1Hj8vjPogJG9O/A/ecerhV0RWKAAkRi2odLNnDTS5+xeXsZ9541gIuHddMKuiIxQgEiMam8opLHpi3mj+8soUdmGs9eMZRDO2aEXZaIVKEAkZizevN2bnrxMz5ZXsh5QyIr6DZrrLeqSKzR/0qJKVPnr+OWl/MoK6/kke/lcPYgraArEqsUIBITdpZXcN+bC/nbB8vp3ymDx78/mOzMtLDLEpH9UIBI6JZtKOaGsTOZu2oLlx2dxW1n9KVJQ62gKxLrFCASqn/OWsUdr8yhUcMGPHlJLqf0ax92SSJSQwoQCUVJaTl3vTqP8TPyOSKrFY9eOIhOWkFXJK4oQKTeLVizhetfmMnSDcXccGIvbjypt1bQFYlDChCpN+7O8x+v4J7X5tOiaSOev3IYR/fSCroi8UoBIvWiaHsZoyfM5s25azn+kLY8fEEOmelaQVcknilApM7NXLGJG16YxbotO7jt9L5cdZxW0BVJBAoQqTOVlc6Y95fy4JTP6dAilfHXHsWgblpBVyRRKECkThRs3cnPxn3G+4s3cMZhHfjdOYfToqlW0BVJJAoQiboPghV0t2wv4zdnD+D7Q7WCrkgiUoBI1JRXVPKHfy/mT+8uoWfbdP5+5VD6dtAKuiKJSgEiUbFq83ZuHDuL6V9u4oLcLvzqTK2gK5Lo9D9cDtqUeWv5+cuzKa+o5NELBzJyYOewSxKReqAAkVrbURZZQfeZD5czoHMGj180mCytoCuSNBQgUitLC7Zx/QuzmL9mC1cck82tp/fRCroiSUYBIgds4sx87vznXBo3bMD/XZLLyVpBVyQpKUCkxop3lvPLV+cxYWY+Q7Na8+hFA+nYQivoiiQrBYjUyPzVW7h+7EyWbSjmJyf15icn9tIKuiJJLuZ/A5jZCDP73MyWmNnosOtJNu7Oc/9bzll//oBtO8p5/kfD+Nkphyg8RCS2eyBmlgL8CTgFyAc+NbNJ7j4/3MqSQ1FJGT+fkMeUeesY3qctD52fQxutoCsigZgOEGAosMTdlwKY2YvASOAbA2TRuq2c/PB/9ntQd6/2gavfo6Y71Wy3aNVUg8MEx6p+x80lZWwvreCOMw7lymOztYKuiOwh1gOkM7Cyys/5wLC9dzKzq4GrATI69aBP++bVH7kGvwtr8uuypms81exY0TlO5FjV71ndHg1TjO8P687Ari1r+KgikkxiPUBqxN3HAGMAcnNz/U8/GBxyRSIiiS/Wz4SuArpW+blL0CYiIiGL9QD5FOhtZtlm1hi4EJgUck0iIkKMD2G5e7mZXQ9MAVKAp919XshliYgIMR4gAO7+BvBG2HWIiMieYn0IS0REYpQCREREakUBIiIitaIAERGRWrGaLKERT8ysCFgcdh0JpAVQFHYRCUKvZXTp9Yyu3u7e4kDuEPOzsGrhJXe/OuwiEoWZjdHrGR16LaNLr2d0mdmYA71PIg5hTQ67gASj1zN69FpGl17P6Drg1zPhhrBERKR+JGIPRERE6oECREREaiXuAsTM3Mz+UeXnhmZWYGavhVlXPDOzs4LXtW/YtcQrvS/rjpltC7uGRFPda2pm75pZbnXHibsAAYqBAWbWNPj5FA5wiXczS8TZZwfjIuC/wXWNBX9yWCIO+n0pEm/iMUAgsrjit4PbFwFjd20ws6Fm9j8zm2VmH5pZn6D9MjObZGZvA9Pqv+TYZGbpwLHAlUSWy8fMhpvZe2b2upl9bmZ/MbMGwbZtZvaQmeUBR4VXeUyqzfvyPTMbWGW//5pZTr1WHQeC9+RrVX5+3MwuC24vN7O7zWymmc1RT7pm9vea1lS8BsiLwIVmlgocDnxcZdtC4Dh3HwT8EvhtlW2DgfPc/Vv1VmnsGwm85e6LgI1mNiRoHwrcAPQDegLnBO1pwMfunuPu/633amNbbd6XTwGXAZjZIUCqu+fVW8WJY4O7DwaeAG4Ou5hkEZcB4u6zgSwin/L2Xuq9BTDezOYCjwD9q2yb6u6F9VJk/LiIyC8+gutdw1ifuPtSd68g8kn62KC9AphQvyXGh1q+L8cD3zGzRsAVwDP1UmzimRhczyDybyD1IJ7PBUwCHgSGA22qtN8LvOPuZ5tZFvBulW3F9VRbXDCz1gN7uG4AAALxSURBVMCJwGFm5kT+aJcDrwfXVe36eUcQKrJvB/S+dPcSM5tKpCd4ATAE2Zdy9vzAm7rX9p3BdQXx/XutPlX3mlYrLnsggaeBu919zl7tLfjq5OVl9VpR/DkP+Lu7d3f3LHfvCiwDjgOGBn9KuAHwPSIn2aV6tXlf/h/wGPCpu2+q2/Li1pdAPzNrYmYtgZPCLigBHPRrGrcB4u757v7YPjY9APzOzGahTyLVuQh4Za+2CUH7p8DjwAIiobL3frIPtXlfuvsMYAvwt3ooMa4EMyZ3uvtKYBwwN7ieFWphcSyar6mWMpGvMbPhwM3u/p2wa0kGZtaJyJBWX3evDLmcmBLMSHvS3YeGXUuiiOZrGrc9EJFEYGaXEJmtdYfCY09mdi2RCRx3hl1Looj2a6oeiIiI1Ip6ICIiUisKkCRjZl3N7B0zm29m88zsxqC9tZlNNbPFwXWroL1v8A3qnWZ2817H+mlwjLlmNjb4Ap2IJAkFSPIpB0a5ez/gSODHZtYPGA1Mc/feRJZ6GR3sXwj8hMh3G3Yzs85Be667DyDyHZIL6+cpiEgsUIAkGXdf4+4zg9tbiUzT7Uzki2zPBrs9C5wV7LPe3T8FyvZxuIZA02BaYDNgdR2XLyIxRAGSxIJvRA8iMguovbuvCTatBdrv777uvopIr2QFsAYocvd/1VmxIhJzFCBJKliFdwJwk7tvqbrNI1Pz9js9LzhHMhLIBjoBaWZ2cR2VKyIxSAGShIKF+yYAz7v7rkXo1plZx2B7R2B9NYc5GVjm7gXuXkZkMbuj66pmEYk9CpAkY2ZGZAnxBe7+cJVNk4BLg9uXAq9Wc6gVwJFm1iw45klEzqeISJLQFwmTjJkdC7wPzAF2ffP5diLnQcYB3YgssnaBuxeaWQdgOpAR7L8N6OfuW8zsbiILLZYTWUfnR+6+ExFJCgoQERGpFQ1hiYhIrShARESkVhQgIiJSKwoQERGpFQWIiIjUigJERERqRQEiIiK18v8BisJZxPsO/AQAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plot_dates(df_user_item_train['event_dttm']).plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2018-08-01    172049\n",
       "Name: event_dttm, dtype: int64"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "plot_dates(df_user_item_valid['event_dttm'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.7333359165517135"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df_user_item_train) / (len(df_user_item_train) + len(df_user_item_valid))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_user_item_train = df_user_item_train.sort_values('event_dttm')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "split_pos = int(len(df_user_item_train) * 0.75)\n",
    "df_user_item_train, df_user_item_test = df_user_item_train.iloc[:split_pos], df_user_item_train.iloc[split_pos:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x7f38ee2c3c18>"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZQAAAD+CAYAAAAd3fMoAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3deXxU1fnH8c9DQtghBMIOguyLrAFxrdYN1BasilB/AmpF64ZorVRrtWqtS9VC61JUtqpsrmhRirjWlSBh38KiBFkCgbAGSPL8/rgndkyTkMDM3DuT5/16zWtmzl3mO4chZ+69Z84RVcUYY4w5XlX8DmCMMSY+WINijDEmLKxBMcYYExbWoBhjjAkLa1CMMcaERaLfAfzSsGFDbd26td8xjDEmpixcuHCHqqaWtKzSNiitW7cmPT3d7xjGGBNTROTb0pbZKS9jjDFhEbYGRURaisiHIrJCRJaLyGhXniIi80Rkrbuv78pFRMaLSKaILBGR3iH7GuHWXysiI0LK+4jIUrfNeBGRsl7DGGNM9ITzCCUfuENVuwD9gZtEpAswFpivqu2B+e45wECgvbuNAp4Fr3EA7gNOBvoB94U0EM8C14VsN8CVl/YaxhhjoiRsDYqqblHVb9zjvcBKoDkwCJjiVpsCDHaPBwFT1fMlkCwiTYELgHmqmqOqu4B5wAC3rK6qfqneeDFTi+2rpNcwxhgTJRG5hiIirYFewFdAY1Xd4hZtBRq7x82BTSGbZbmyssqzSiinjNcwxhgTJWFvUESkNvAacJuq7gld5o4sIjoaZVmvISKjRCRdRNKzs7MjGcMYYyqdsDYoIlIVrzF5WVVfd8Xb3Okq3P12V74ZaBmyeQtXVlZ5ixLKy3qNH1HVCaqapqppqakldqM2xsSZIwWFvP5NFrkHj/gdJe6Fs5eXAC8CK1X1yZBFs4GinlojgLdCyoe73l79gVx32moucL6I1HcX488H5rple0Skv3ut4cX2VdJrGGMquXHvr+X2mYsZNTWdQ/kFfseJa+E8QjkNuAr4qYhkuNuFwCPAeSKyFjjXPQeYA6wHMoHngRsBVDUHeBBY4G4PuDLcOi+4bdYB77ry0l7DGFOJfbl+J09/lEmPlsl8tSGHu15dgs0BFTlSWSs3LS1N7ZfyxsSv3ANHGDDuE6pXTeCdW05n8ucbeXzuam79aTtuP7+j3/FilogsVNW0kpZV2qFXjDHxS1X53RtLyN57iNdvPJVa1RK58ay2fLfzAOM/yKRF/ZoM6dvy6DsyFWINijEm7sxKz2LO0q3cNaAT3VskAyAiPHRJN77PPcjdbyylaXJ1zmhvnXPCycbyMsbElfXZ+7j/7eWc2rYB15954o+WVU2owtNX9qZdo9rc+NI3rNq6p5S9mGNhDYoxJm4czi9k9PQMkhKr8OSQnlSpIv+zTt3qVZk4si81qyVwzaQFbNuT50PS+GQNijEmbjw5bw1LN+fyyC+606Re9VLXa5Zcg4kj+5J78AjXTF7A/kP5UUwZv6xBMcbEhc8zd/CPT9YxrF8rBnRrctT1uzarx9+v7M2qrXu5+ZVvyC8ojELK+GYNijEm5u3af5gxMzNo07AW917cudzbnd2xEQ8M6sqHq7O5b/Zy+43KcbJeXsaYmKaqjH19CTn7D/PiiL7UTKrYn7UrTz6B73IO8I+P13NCg5qMOrNthJLGP2tQjDExbdrXm5i7fBv3XNiZbs3rHdM+7rqgE1k5B3l4ziqaJ9fkou5Nw5yycrAGxRgTszK37+OBd5ZzRvuGXHt6m2PeT5UqwhNDerB1Tx5jZmbQpF41+pyQEsaklYNdQzHGxKRD+QWMnr6ImkmJPHF5jxK7CFdE9aoJPD88jWb1qnPd1IVs3LE/TEkrD2tQjDEx6S9zV7P8+z08dml3GtUtvYtwRaTUSmLS1f1QVa6evICc/YfDst/KwhoUY0zM+XRtNs9/uoGr+p/AuV3CO0Frm4a1eH54Gpt3H2TU1HTyjtiQ9+VlDYoxJqbs3HeI22cupn2j2txzUfm7CFdEWusUnhzSg/Rvd/GbWYspLLTuxOURzgm2JorIdhFZFlI2I2RulI0ikuHKW4vIwZBlz4Vs00dElopIpoiMd5NpISIpIjJPRNa6+/quXNx6mSKyRER6h+s9GWOCRVW567Ul5B48wvhhvaheNSFir3Vx92aMHdiJd5Zs4fF/r47Y68STcB6hTAYGhBao6hWq2lNVe+JNDfx6yOJ1RctU9YaQ8meB64D27la0z7HAfFVtD8x3zwEGhqw7ym1vjIlDL331He+v3M7YAZ3o3LRuxF/v+jNP5MqTW/HsR+t45avvIv56sS5sDYqqfgLklLTMHWUMAaaVtQ83H3xdVf1SvZ+sTgUGu8WDgCnu8ZRi5VPV8yWQXDS/vDEmfqzZtpeH3lnBWR1Tufq01lF5TRHhjz/vylkdU7n3rWV8uHp7VF43VkXrGsoZwDZVXRtS1kZEFonIxyJyhitrDmSFrJPlygAau3nlAbYCjUO22VTKNj8iIqNEJF1E0rOzs4/j7RhjoinvSAG3TltEneqJPH5ZD9yZ8KhITKjC33/Zm46N63Dzy9+w/PvcqL12rIlWgzKMHx+dbAFaqWov4HbgFREp9/GrO3qp8FUyVZ2gqmmqmpaaahPrGBMrHn1vFau27uXxy3qQWqda1F+/drVEJo7sS90aVblm8gK25B6MeoZYEPEGRUQSgV8AM4rKVPWQqu50jxcC64AOwGagRcjmLVwZwLaiU1nuvujYczPQspRtjDEx7sPV25n02UZGntqaszs18i1Hk3rVmTiyL/sPFXD1pAXszTviW5agisYRyrnAKlX94VSWiKSKSIJ7fCLeBfX17pTWHhHp7667DAfecpvNBka4xyOKlQ93vb36A7khp8aMMTEse+8h7py1mE5N6jB2YCe/49C5aV2eubI3a7fv46ZXFnHEhrz/kXB2G54GfAF0FJEsEbnWLRrK/16MPxNY4roRvwrcoKpFF/RvBF4AMvGOXN515Y8A54nIWrxG6hFXPgdY79Z/3m1vjIlxqspvX13M3rx8xg2NbBfhijizQyoPX9KNT9Zkc++by2zI+xBhGxxSVYeVUj6yhLLX8LoRl7R+OtCthPKdwDkllCtwUwXjGmMCbsrnG/lwdTYPDOpKxyZ1/I7zI1f0bcWmnIP8/cNMWqbU5Kaz2/kdKRBstGFjTOCs2rqHh99dxTmdGnFV/xP8jlOiO87vwKZdB3h87mpa1K/BoJ4ldi6tVKxBMcYESlEX4Xo1qvLYZd2j2kW4IkSExy7rzpbdedw5awlN69WgX5vKPeS9jeVljAmUh+esZM22fTxxeQ8a1I5+F+GKqJaYwIThfWiRUoNR/0xnXfY+vyP5yhoUY0xgzF+5jalffMuvTm/DmR1i47diyTWTmDyyHwkiXD1pATv3HfI7km+sQTHGBML2PXnc+eoSujSty50DOvodp0JaNajJCyPS2LYnj19V4iHvrUExxviusFC5Y9ZiDhzOZ/ywnlRLDEYX4Yro1ao+44b2JGPTbm6bnlEph7y3BsUY47uJn23g07U7uPfiLrRrFKwuwhUxoFtT7rmwM+8t38qf313pd5yos15exhhfLf8+l8feW815XRrzy36t/I5z3K49vQ2bcg7w/KcbaJlSk+GntPY7UtRYg2KM8c3Bw14X4fq1qvLopcHtIlwRIsIfftaVzbsPcv/s5TRPrsE5ncM7TXFQ2SkvY4xvHvrXCtbv2M+TQ3qSUivJ7zhhk1BFGD+sF12b1ePmVxaxNKtyDHlvDYoxxhdzl2/l5a++Y9QZJ3Jau4Z+xwm7mkmJvDgyjZRaSVwzZQFZuw74HSnirEExxkTd1tw87nptCd2a1+WO82Ori3BFNKpTnUlX9yXvSAHXTF5A7sH4HvLeGhRjTFR5XYQzOHSkkHFDe5GUGN9/hjo0rsM//q8P67P38+uXFnI4P36HvI/vf0ljTOA8/+l6Psvcyf0/70Lb1Np+x4mKU9s15JFLu/P5up3c/cbSuB3yPpzzoUwUke0isiyk7H4R2SwiGe52Yciy34lIpoisFpELQsoHuLJMERkbUt5GRL5y5TNEJMmVV3PPM93y1uF6T8aY8Fqalcvjc1czsFsThqS1PPoGceSyPi247dz2vLowi/HzM/2OExHhPEKZDAwoofwpVe3pbnMARKQL3sRbXd02z4hIgpvF8WlgINAFGObWBXjU7asdsAsomsDrWmCXK3/KrWeMCZgDh/MZPX0RqXWq8edfnBQXXYQravQ57flF7+Y89f4aXluYdfQNYkzYGhRV/QTIOeqKnkHAdDe3/Aa82Rb7uVumqq5X1cPAdGCQmw74p3izOwJMAQaH7GuKe/wqcI5Uxk+qMQH3wNsr2LDT6yKcXDN+ughXhIjwyC+6c8qJDRj7+hI+X7fD70hhFY1rKDeLyBJ3Sqy+K2sObApZJ8uVlVbeANitqvnFyn+0L7c8163/P0RklIiki0h6dnb28b8zY0y5vLt0C9MXbOLXP2nLKW1L/O9ZaSQlVuG5q/rQukEtrv/nQtZu2+t3pLCJdIPyLNAW6AlsAZ6I8OuVSVUnqGqaqqalpsbG0NjGxLrvdx9k7OtL6dGiHmPO6+B3nECoV6MqE0f2pVpiAiMnLWD73jy/I4VFRBsUVd2mqgWqWgg8j3dKC2AzEHpFroUrK618J5AsIonFyn+0L7e8nlvfGOOzgkJlzIwM8gu8LsJVE6xjaZGWKTWZODKNnP2H+dWUdA4czj/6RgEX0X9dEWka8vQSoKgH2GxgqOuh1QZoD3wNLADaux5dSXgX7mer18fuQ+Ayt/0I4K2QfY1wjy8DPtB47ZNnTIx57uN1fLUhhz8O6kbrhrX8jhM43Vsk87dhvVi2OZdbp2VQEOND3oez2/A04Augo4hkici1wGMislRElgBnA2MAVHU5MBNYAbwH3OSOZPKBm4G5wEpgplsX4C7gdhHJxLtG8qIrfxFo4MpvB37oamyM8U/Gpt08NW8NF3dvyqW9mx99g0rq3C6Nue9nXXl/5TYefGeF33GOi1TWL/NpaWmanp7udwxj4tK+Q/lcNP5T8guUOaPPoF6Nqn5HCrwH31nBi//ZwB8u7sI1p7fxO06pRGShqqaVtMyGrzfGhN39s5ezKecA00edYo1JOd19YWeydh3gwX+toHn9GlzQtYnfkSrMrpAZY8Lq7cXf8+rCLG4+ux392qT4HSdmJFQR/npFL7q3SGb09EVkbNrtd6QKswbFGBM2WbsOcPcbS+ndKplbz2nvd5yYUyMpgRdHpJFapxq/mrKATTmxNeS9NSjGmLDILyhkzIwMVGHc0F4kWhfhY9KwdjUmjezHkQJlxKSv2X3gsN+Rys3+xY0xYfHMR+tYsHEXDw7uSsuUmn7HiWntGtVmwlV9yMo5yPX/XMih/AK/I5WLNSjGmOO28NtdjJu/lsE9m3FJrxZ+x4kLJ5/YgMcv785XG3K469UlMTHkvfXyMsYcl715R7htxiKa1qvOA4O7+R0nrgzq2ZxNOQf4y7/X0CqlJrcHfHZLa1CMMcflD28t5/vdecy8/hTqVrcuwuF209nt2JRzkPEfZNIipWag55GxBsUYc8zeXLSZNxZtZsy5HehzQv2jb2AqTER46JJufJ97kLtfX0qzejU4vX1Dv2OVyK6hGGOOyaacA/z+zWWknVCfm85u63ecuFY1oQpPX9mbdo1q8+uXFrJq6x6/I5XIGhRjTIXlFxQyevoiROCvQ3taF+EoqFvdG/K+RlIC10xawLY9wRvy3j4FxpgKG/9BJt98t5s/XXISLepbF+FoaZZcg4kj+7L74BGumbyA/YeCNeS9NSjGmApZsDGHv3+wlkt7t+DnPZr5HafS6da8Hk//sjcrt+zhlmmLyC8o9DvSD8I5fP1EEdkuIstCyh4XkVVuCuA3RCTZlbcWkYMikuFuz4Vs08cNeZ8pIuOL5ocXkRQRmScia919fVcubr1M9zq9w/WejDE/lnvwCLdNz6BlSk3+OKir33EqrbM7NeKBQd34YNV27n97eWB+oxLOI5TJwIBiZfOAbqraHVgD/C5k2TpV7eluN4SUPwtchzfpVvuQfY4F5qtqe2A+/533ZGDIuqPc9saYMFNVfv/mMrbuyeOvV/SkdjXrJOqn/+t/Atf/5ERe+vI7nv90vd9xgDA2KKr6CZBTrOzfbtIsgC/xpu4tlZvhsa6qfulmXZwKDHaLBwFT3OMpxcqnqudLvKmCQ2eKNMaEwevfbObtxd9z+3kd6NXKuggHwV0XdOKik5ry8JxV/GvJFr/jRPUayjXAuyHP24jIIhH5WETOcGXNgayQdbJcGUBjVS2qsa1A45BtNpWyzY+IyCgRSReR9Ozs7ON4K8ZULht37OcPby3j5DYp3PAT6yIcFFWqCE8M6UGfE+ozZmYGC7/NOfpGkcwTjRcRkXuAfOBlV7QFaKWqvfCm7X1FROqWd3/u6KXCJw1VdYKqpqlqWmpqakU3N6ZSOlJQyOgZGSRUEZ66oicJVcTvSCZE9aoJPD88jWb1qnPd1IVs3LHftywRb1BEZCRwMXClawhQ1UOqutM9XgisAzoAm/nxabEWrgxgW9GpLHe/3ZVvBlqWso0x5jj99f01LN60m0cu7U6z5Bp+xzElSKmVxKSr+6GqXD15ATn7/RnyPqINiogMAH4L/FxVD4SUp4pIgnt8It4F9fXulNYeEenvencNB95ym80GRrjHI4qVD3e9vfoDuSGnxowxx+HL9Tt55qN1DElrwYUn2aXJIGvTsBbPD09j8+6DjJqaTt6R6A95H85uw9OAL4COIpIlItcCfwfqAPOKdQ8+E1giIhnAq8ANqlp08u9G4AUgE+/Ipei6yyPAeSKyFjjXPQeYA6x36z/vtjfGHKfdBw4zZkYGrRvU4r6fWRfhWJDWOoUnh/Qg/dtd/GbWYgoLo9udOGz9/lR1WAnFL5ay7mvAa6UsSwf+Zwxsd4rsnBLKFbipQmGNMWVSVe5+YynZew/x+o2nUsu6CMeMi7s3I2vXQR55dxUtU2py14BOUXtt+5QYY/7HrPQs5izdytiBnejeItnvOKaCrj/zRL7LOcCzH62jZf2a/PLkVlF5XWtQjDE/sj57H/fNXs6pbRsw6owT/Y5jjoGI8MDPu7J510HufWsZzZKrc1bHRhF/XRvLyxjzg8P5hYyenkG1qlV4ckhPqlgX4ZiV6Ia879i4Dje9/A3Lv8+N+Gtag2KM+cET81azdHMuj17anSb1qvsdxxyn2tUSmTiyL3VrVOWayQvYknswoq9nDYoxBoDPM3cw4ZP1DOvXigu6NvE7jgmTJvWqM3FkX/YfKuDqSQvYm3ckYq9lDYoxhl37DzNmZgYnNqzFvRd39juOCbPOTevyzJW9Wbt9Hze9sogjERry3hoUYyo5VeWu15aQs/8w44b2omaS9dWJR2d2SOVPg7vxyZps7n1zWUSGvLdPjjGV3LSvN/HvFdv4/UWd6da8nt9xTAQN7deKTbsO8PSH62iZUpObzm4X1v1bg2JMJZa5fS8PvLOcM9o35JrT2vgdx0TBb87vSNaugzw+dzUt6tdgUM8SB2c/JtagGFNJHcov4NZpGdRMSuSJy3tYF+FKQkR47LLubNmdx52zltC0Xg36tUkJy77tGooxldTj761mxZY9PHZpdxrVtS7ClUm1xAQmDO9Di5QajPpnOuuy94Vlv9agGFMJfbImmxf+s4Gr+p/AuV0aH30DE3eSayYxeWQ/EkS4etICdu47dNz7tAbFmEpm575D3DFrMe0b1eaei6yLcGXWqkFNnh+RxrY9efwqDEPeW4NiTCWiqvz21SXkHjzC+GG9qF41we9Ixme9W9Vn3NCeZGzazW3TM45ryPuwNigiMlFEtovIspCyFBGZJyJr3X19Vy4iMl5EMkVkiYj0DtlmhFt/rYiMCCnvIyJL3Tbj3SRcpb6GMebHXvryW+av2s7YAZ3o3LTcs26bODegW1PuubAz7y3fyp/fXXnM+wn3EcpkYECxsrHAfFVtD8x3zwEG4s3U2B4YBTwLXuMA3AecDPQD7gtpIJ4FrgvZbsBRXsMY46zZtpeH/rWSszqmcvVprf2OYwLm2tPbMOKUE3j+0w1M/WLjMe0jrA2Kqn4C5BQrHgRMcY+nAINDyqeq50sg2c0VfwEwT1VzVHUXMA8Y4JbVVdUv3aRaU4vtq6TXMMYAeUcKuHXaIupUT+Txy3rgDu6N+YGI8IefdeXczo24f/Zy5q/cVuF9ROMaSuOQOd63AkVdSpoDm0LWy3JlZZVnlVBe1mv8iIiMEpF0EUnPzs4+xrdjTOx59L1VrNq6l8cv60FqnWp+xzEBlVBFGD+sF12b1ePmVxaxNKtiQ95H9aK8O7KI6CTHZb2Gqk5Q1TRVTUtNTY1kDGMC48PV25n02UZGntqasztFfpIlE9tqJiXy4og0Umolcc2UBWTtOlDubaPRoGxzp6tw99td+WagZch6LVxZWeUtSigv6zWMqdSy9x7izlmL6dSkDmMHRm9ucRPbGtWtzqSr+5J3pIBrJi8g92D5hryPRoMyGyjqqTUCeCukfLjr7dUfyHWnreYC54tIfXcx/nxgrlu2R0T6u95dw4vtq6TXMKbSUlXufHUxe/PyGTfUugibiunQuA7/+L8+rM/ez69fWsjh/KMPeR/ubsPTgC+AjiKSJSLXAo8A54nIWuBc9xxgDrAeyASeB24EUNUc4EFggbs94Mpw67zgtlkHvOvKS3sNYyqtyZ9v5KPV2dxzUWc6NqnjdxwTg05t15BHLu3O5+t2cvcbS4865H1YB4dU1WGlLDqnhHUVuKmU/UwEJpZQng50K6F8Z0mvYUxltXLLHv787irO6dSIq/qf4HccE8Mu69OCTTkHGDd/La1Sapa5ro02bEycKeoiXK9GVR67rLt1ETbH7bZz27Mp5wBPzltT5nrWoBgTZx6es5K12/cx9Zp+NKhtXYTN8RMRHrm0O1ty8/i2jPVsLC9j4sj7K7Yx9Ytv+dXpbTizg3WNN+GTlFiFSVf3LXMda1CMiRPb9+Tx29eW0KVpXe4c0NHvOCYOHa2noDUoxsSBwkLljlmLOXA4n/HDelIt0boIm+izBsWYODDxsw18unYH917chXaNrIuw8Yc1KMbEuGWbc3n0vVWc36Uxv+zXyu84phKzBsWYGHbwcAGjpy8ipVYSj15qXYSNv6zbsDEx7MF/rWD9jv28dO3J1K+V5HccU8nZEYoxMeq9ZVt55avvGHXmiZzWrqHfcYyxBsWYWLQ1N4+xry/hpOb1uOM86yJsgsEaFGNiTGGhcvvMDA4dKWTc0J4kJdp/YxMMdg3FmBgz4dP1fL5uJ49eehInptb2O44xP4j4VxsR6SgiGSG3PSJym4jcLyKbQ8ovDNnmdyKSKSKrReSCkPIBrixTRMaGlLcRka9c+QwRsauTJi4tydrNX+auZmC3JgxJa3n0DYyJoog3KKq6WlV7qmpPoA9wAHjDLX6qaJmqzgEQkS7AUKArMAB4RkQSRCQBeBoYCHQBhrl1AR51+2oH7AKujfT7Miba9h/KZ/T0DFLrVOPPvzjJugibwIn2yddzgHWqWtaAlYOA6ap6SFU34E2m1c/dMlV1vaoeBqYDg9zsjT8FXnXbTwEGR+wdGOOTB95ewcad+3nqip4k17SDcBM80W5QhgLTQp7fLCJLRGSim+4XoDmwKWSdLFdWWnkDYLeq5hcrNyZuzFm6hRnpm7jxrLb0P7GB33GMKVHUGhR3XePnwCxX9CzQFugJbAGeiEKGUSKSLiLp2dnZkX45Y8Li+90HGfvaEnq0TOa2czv4HceYUkXzCGUg8I2qbgNQ1W2qWqCqhXhzyvdz620GQq82tnBlpZXvBJJFJLFY+f9Q1QmqmqaqaampNleECb6CQmXMjAwKCpVxV/SkaoJ1ETbBFc1P5zBCTneJSNOQZZcAy9zj2cBQEakmIm2A9sDXwAKgvevRlYR3+my2m5v+Q+Ayt/0I4K2IvhNjouS5j9fx1YYc/jioG60b1vI7jjFlisrvUESkFnAecH1I8WMi0hNQYGPRMlVdLiIzgRVAPnCTqha4/dwMzAUSgImqutzt6y5guog8BCwCXoz4mzImwhZ9t4sn563h4u5NubS3XRY0wSfeF/zKJy0tTdPT0/2OYUyJ9h3K58Jxn1JQqMwZfQb1alT1O5IxAIjIQlVNK2mZ/VLemAC6763lZO06wIzrT7HGxMQMu8JnTMDMXvw9r32Txc1nt6Nv6xS/4xhTbtagGBMgi77bxT1vLKV3q2RuPae933GMqRA75WVMABQUKs99vI4n562hSd3qjBvai0TrImxijDUoxvhsS+5BxszI4Mv1OfysRzMeGtzNrpuYmGQNijE+em/ZFu56bSlHCgr5y+U9uLR3cxv00cQsa1CM8cGBw/k8+M5Kpn39Hd1b1GPc0F60sR8umhhnDYoxUbb8+1xunbaI9Tv2c8NP2nL7eR1s1kUTF6xBMSZKCguVSZ9v5NF3V5FcsyovXXsyp7Vr6HcsY8LGGhRjoiB77yF+M2sxH6/J5tzOjXnssu6k1LI5TUx8sQbFmAj7aPV2fjNrMXvz8nlwcDf+7+RWduHdxCVrUIyJkEP5BTz67momfraBTk3q8Mp1/enQuI7fsYyJGGtQjImAzO17uWVaBiu37GHkqa0ZO7AT1asm+B3LmIiyBsWYMFJVpn29iQfeWU7NpEReHJHGOZ0b+x3LmKiI5hTAG0VkqYhkiEi6K0sRkXkistbd13flIiLjRSTTzTnfO2Q/I9z6a0VkREh5H7f/TLetnaQ2UbVr/2FueGkhd7+xlL6tU3hv9BnWmJhKJdqd389W1Z4hY+mPBearantgvnsO3nTB7d1tFN7884hICnAfcDLelMH3FTVCbp3rQrYbEPm3Y4zni3U7GTjuUz5YtZ17LuzMlKv70ahudb9jGRNVfv+aahAwxT2eAgwOKZ+qni/x5oxvClwAzFPVHFXdBcwDBrhldVX1Szcl8NSQfRkTMUcKCnl87ip++cKX1ExK4I0bT+O6M0+kShU7QDaVTzSvoSjwb9/fi0MAABXuSURBVBFR4B+qOgForKpb3PKtQNH5gebAppBts1xZWeVZJZT/iIiMwjvioVWrVsf7fkwl9+3O/dw6PYPFm3ZzRVpL/vCzLtSqZpclTeUVzU//6aq6WUQaAfNEZFXoQlVV19hEjGvEJoA3BXAkX8vEtzcWZXHvm8upIvD0L3tzUfemfkcyxndRa1BUdbO73y4ib+BdA9kmIk1VdYs7bbXdrb4ZaBmyeQtXthk4q1j5R668RQnrGxNWe/OOcO+by3gz43v6tU7hqaE9aZ5cw+9YxgRCVK6hiEgtEalT9Bg4H1gGzAaKemqNAN5yj2cDw11vr/5Arjs1Nhc4X0Tqu4vx5wNz3bI9ItLf9e4aHrIvY8Lim+92ceH4T3l7yRZuP68D00b1t8bEmBDROkJpDLzhevImAq+o6nsisgCYKSLXAt8CQ9z6c4ALgUzgAHA1gKrmiMiDwAK33gOqmuMe3whMBmoA77qbMcetoFB59qNMnnp/LU3rVWfm9f3pc4LN9W5MceJ1iqp80tLSND093e8YJuC+332Q22Zk8PUGbzbFP13SjbrVbTZFU3mJyMKQn378iHVJMaYU7y7dwtjXl5JfUMgTl/fgFzabojFlsgbFmGK82RRXMO3rTXRvUY/xQ3vR2mZTNOaorEExJsSyzbncOn0RG3bs59dntWXMuTabojHlZQ2KMXizKU78bAOPvbea+rWq8vK1J3OqzaZoTIVYg2Iqve178/jNrCV8siab87o05tFLbTZFY46FNSimUvtw1XbufNWbTfGhwd240mZTNOaYWYNiKqW8IwU8+t4qJn22kU5N6jDtuv60t9kUjTku1qCYSmfttr3cMm0Rq7butdkUjQkja1BMpaGqvPL1dzz4zgpqJSUycWQaP+1kE2AZEy7WoJhKYdf+w9z12hL+vWIbZ7RvyBNDetCojk2AZUw4WYNi4t7n63YwZkYGOfsP8/uLOnPNaW1sAixjIsAaFBO3jhQU8tS8NTz78TraNKzFiyP60q15Pb9jGRO3rEExcenbnfu5ddoiFmflMrSvN5tizST7uBsTSfY/zMQVVeWNRZu5981lJFQRnrmyNxeeZLMpGhMNER+kSERaisiHIrJCRJaLyGhXfr+IbBaRDHe7MGSb34lIpoisFpELQsoHuLJMERkbUt5GRL5y5TNExH7mXAntyTvCbTMyuH3mYro2r8d7t51pjYkxURSNI5R84A5V/cbN2rhQROa5ZU+p6l9CVxaRLsBQoCvQDHhfRDq4xU8D5wFZwAIRma2qK4BH3b6mi8hzwLXAsxF/ZyYwFn67i9HTF7ElN487zuvAjWe3I8EuvBsTVRE/QlHVLar6jXu8F1gJNC9jk0HAdFU9pKob8GZt7Odumaq6XlUPA9OBQW7K358Cr7rtpwCDI/NuTNAUFCp/m7+WIf/4AoCZ15/CLee0t8bEGB9EdVxuEWkN9AK+ckU3i8gSEZno5ogHr7HZFLJZlisrrbwBsFtV84uVl/T6o0QkXUTSs7Ozw/COjJ827z7IsAlf8sS8NVzcvSlzRp9BnxPqH31DY0xERK1BEZHawGvAbaq6B++UVFugJ7AFeCLSGVR1gqqmqWpaampqpF/ORNCcpVsY+NdPWP59Lk8O6cFfr+hpU/Ma47Oo9PISkap4jcnLqvo6gKpuC1n+PPCOe7oZaBmyeQtXRinlO4FkEUl0Rymh65s4c+BwPg+8vYLpCzbRo2Uy44f25IQGNpuiMUEQjV5eArwIrFTVJ0PKQ7vfXAIsc49nA0NFpJqItAHaA18DC4D2rkdXEt6F+9mqqsCHwGVu+xHAW5F8T8YfyzbncvH4/zAjfRM3ntWWV284xRoTYwIkGkcopwFXAUtFJMOV3Q0ME5GegAIbgesBVHW5iMwEVuD1ELtJVQsARORmYC6QAExU1eVuf3cB00XkIWARXgNm4kRhofLifzbw2NxVNKhVjZd/dTKntrXZFI0JGvG+4Fc+aWlpmp6e7ncMcxTb9+Zxx8zFfLp2B+e72RTr22yKxvhGRBaqalpJy+yX8iawPli1jTtnLWH/4Xz+dEk3ftnPZlM0JsisQTGBk3ekgEfeXcXkz73ZFKcPs9kUjYkF1qCYQFmzbS+3utkUrzmtDb8d0NFmUzQmRliDYgJBVXnpq+946J0V1K6WyKSRfTm7UyO/YxljKsAaFOO7HDeb4rwV2zizQyp/uby7zaZoTAyyBsX46vPMHYyZabMpGhMPrEExvjicX8iT89bwj09sNkVj4oU1KCbqNu7Yz63TF7EkK5dh/Vpx78WdbTZFY+KA/S82UaOqvPbNZu57axmJCVV49sreDLQJsIyJG9agmKjYk3eEe95YxtuLv+fkNik8dUVPmiXX8DuWMSaMrEExEbfw2xxGT89gS24evzm/A78+y2ZTNCYeWYNiIia/oJCnP1zH+A/W0iy5OrNuOIXerWwCLGPilTUoJiI27z7IbdMXsWDjLgb3bMaDg7tRxybAMiauxU2DIiIDgHF4Q9u/oKqP+Byp0vrXki387vUlFCo8dUUPLunVwu9IxpgoiIsGRUQSgKeB8/DmlF8gIrNVdYW/ySqX/Yfy+ePby5mZnmWzKRpTCcVFgwL0AzJVdT2AiEwHBuFN0lWi7H2HmPDJuqPuuCLTxZR31Yrts/wrR2Jqm/LOl6MKbyzazIad+7np7Lbcdm4HqiZEfEJQY0yAxEuD0hzYFPI8Czi5+EoiMgoYBZDUpB0Pz1kVnXSVRPPkGrzyq/6c0raB31GMMT6IlwalXFR1AjABoHefPvrpHy8o13YVmdNJKN/KkZonqrz7LW/OiuwzQcTG4TKmEouXBmUz0DLkeQtXVqoqItSqFi9v3xhj/BcvJ7kXAO1FpI2IJAFDgdk+ZzLGmEolLr6iq2q+iNwMzMXrNjxRVZf7HMsYYyqVuGhQAFR1DjDH7xzGGFNZxcspL2OMMT6zBsUYY0xYWINijDEmLKS8v4SONyKyF1jtd45SNAR2+B3iGAQ5d5CzlSSoeYOaqzyCmj2ouUrTUVXrlLQgbi7KH4PVqprmd4iSiEh6ULOVJci5g5ytJEHNG9Rc5RHU7EHNVRoRSS9tmZ3yMsYYExbWoBhjjAmLytygTPA7QBmCnK0sQc4d5GwlCWreoOYqj6BmD2qu0pSat9JelDfGGBNelfkIxRhjTBhZg2KMMSYsrEExxhgTFtag+EBEeotIVb9zxBOr0/ARkZ+KSC2/c8STylKncdmgiMgoERntHgdmCkER+aWILAYuAAr9zlMRVqfh4erxQRGp4XeW4kTkShFZCJwNHPE7T3lZnYbPcdelqsbNDagO3IM3W+MuoLXfmUJyPQJsBE4ttkz8zmd1GvGsAlQFfu3yrgLO8DtXSL5E4Dfu37e/33msTmO3LuPiCEVEEgBUNQ9IV9XmwPPAQ74Gc1yu7cAU4CsRqSEi54tIHXX/okFjdRoeIpKkniPAN0Bn4B/A1SLSwN90HlXNB9YCLwHfikiSiFwqIs18jlYiq9PwCXddxvzvUETkfiAV+EBVXxORKqpa6M5XZgCjVPXDovIo5roZ+FhVl7rn7YDrgZ5AE7yBKQWYq6oTop2vLFanYct7H3AS8A4wW1VzXHl1vCmqXwRm+ZFRRO4G5qvqV+55I2A48H94364zgMbAR6r6J7/rsojVaVjzhr8u/T7cOs5DtfvxZmkcDHwEjAFSQpbfAnxCFE+BACcAHwNbgXnFll0B/BVo5J6fi/chq+d3XVqdhj3zGODfwDnAP4FxQNOQ5cOAt4ATo5yrKfAasBtYW2zZKcDDQAv3vBveKZsGfn8urU5joy59/5AcR4VUBd4HOrvnPwEeB24qtt5nwC+AVsBFUchVBxjl7ucCI0OWJQHVQ543A6YCTfyuT6vTsOZNcBlOc887AH8GHi223j+Bq4G+wJVRylYTGOoeLwBuD1mWCFQrtv40vOHK/f5sWp3GQF3GxDWU4r2K3KHiEWAlXksK8DneP2Z3EekQsvpfgFfxvlXXjHAuUdW9wD/d/XPAzSHdWY+od+6/6BrFPXgfuOxw5ioPq9PIcHkLgG3Ar1xxJvA60FlE+oSsPhV4xi2rHo18qnoA+Jd7Oga4R0SS3PNCVT0EICJVReRvQF3g22hkK43VafhEui5jokEpTv97Tu9fQCsR6eT+GC4FcvG+peIq5168i0xdVHVWhHOpuz/oit4C1gB/DF0uIsOBdLxuhNe6f+Boq+2yFF18D0qdFs8V6DoVkeahz4vy4A2g10JE+ri63Qh8jXe9p+j6z4N4F207quqLkc4WknGv+8PyH7xTic+58kK33SDgC6AAuLyowY4WEeknInVD8gapTn+ULSRjIOtURH4uIm1Dcka2Lv067CrnodkAvD8gDwFpoYds7r4l3h+WP4csewe4NGR5yyjmqkKxawtAH2Ah3uma9u6+K9DOh/oUoBHetZEZxZb5VqdHySVBrFO8azULgYeKlVdx90nAb0PfDzAer7EDSCFCp+XKyPZDXQKJ7r4xkIM3a2BXvNOYLfChezjeKdYVwAu4aw4BqtPSsgWyTt1n4Au8I/XTo1WXUf3AlLMiBO/wajLwH+DneBddnwUahP5xAZKB0/B6JNzqlr8HDAhArqohzyfifXP+HDjB5/qt6+poJTDQlYVmjVqdHkMuX+vUfQaS8E4DZACDiy1PCHmcinex9n2803Bt8a7/jAhIttDrTi/g/Sh0Ad5Rpx+fy+p4p1aGBqVOjyGbr3XqPgO1gbfxvpyd4x5f6ZYnRrouo/7BqUDlXMJ/vzWfCTxXrOKecX9UGuJdNJqEd3rmfp9zPY3324jWrmwMsAm4MwB1WgXogveDwEHAFyHLqvpYp0fLFag6dVnuD8neo9jyp/GOYJvgfUP9E97puD8EINvfgTddfVcBrsI73eHr59P9QXvePa6B1+kjFfclwn02/arTo2V7Okh1SkjDh9crc2bI80Tgb5GqS98+QCVUwq3uD8qQYuWX4x22fYh3Tu9UoB/ekUL9kPUSCPmGEKBc5xDS7danOr00pCwZ79tWQ3d/g/sPk+ZDnR5rrqjWaUjeK9zztsB8vM4JGXjfAicAZwGt8f6o1y+2j2pBzObqN9nHz+Zl7nkrvIvDZ+B9U34TeBnvel1jvAvE0a7TY8oW7ToNyXt5sfIqwJXAE0V1hXeNZFKk6jKqH6JSKkPwvnF+BlyGd8pjJP/9XcFZeD++ScQbGuAFoHHI9gkBzZUYiVzHmT3Ffdj/4Nb7DbAfeLvY9tGu0/LmimqdlpK36BzzLXjXljriXcMZjddRITlk+4jUY5iy+fL5PEruJ/CG/TjXPe8CLAPaB6BOy5MtCJ/PkUBqyDqnAqtK2T7sdZmIz1RVReRs4Pfq/fp6H3A+oMAUVf2oaF0RWYp3mmm/615a1AUuiLnyI5GrPErJPgA4iPfhO1NE5uD9YPAzYD380GU32nVakVxRrdNS8g4UkSGq+jcRmaSq+1zGRcDJwJFI12OYsvny+Swjd9EPVG/B+5KGqq4Qkf8Atdz7qOJTnZY3WxA+n+fj9ST7p1vncxHJEpFBqvqW64mmkarLqHYbLum3D+5hOt7hJKr6Ht4YOJ2L/fYBvMo6ABxUT1iGLQhqrvKoQPbVQA+gF5AFLFDVrsBQ4CwRae5TnUY1VxjyrgL6iEjHoj/Yznl4n4G8cOcNcrayVDB3GrAH+D1wu4h0FZF78X5Zvsmt61edRjXbceZdC3QVkU5uvbruPRx262gk80b7dyg/GhI55E1lAnVE5CT3/GOgHlBXvEHVrhKRJXjngH8XgZY1qLnKo7zZP8E7/bEduEFV73Pr5+D9YnZzJcl1vHk/xuuRVgdARIaKyDK8o6q7I/QZCHK2slQ0dxtVfQzvtw83Ae3wrgvsrGTZjjdvPdzvuVR1D16X5cbRCBmVBkVE+ovIa8DT4o0Im+DKi065fQ3kA+eLSKKqrgCaA31U9TDet4Bfq+pwVd0e77kilH053h+WXqqaJyIJRd96in2bjctcYcxb9BlIc8u/JUKfgSBni0Dupnjn+1HVqcBoVR2hqlsqS7Yw5g39DIDX62typLNCFBoUETmL//58fzXeyJv1Q885qmom3qFbW2Cs2/QQbngCVf1IVT+rDLnK4zizb3TLC4oOf+M9V4TyFn0GvlDVTytTtgjmXl+0H/VGaag02SKQd2NI3qiNdBCNI5TueOfFX8Y7XKwK7NP/DknwkIi8iPfL3vFAP/FmOMvBGw2zsuUqj6BmD2quSOSdW4mzlSXIn4EgZ4uHvOHvNgz0BzqEPO/p3uB9eAOSfYT347kr8A4jXyFkyAy8c39h78Md1FyxnD2ouWIxb5CzxWruIGeLh7wlvocwVkYy3sCCe/F6Q9QOWdbPVUTReFDX4s3+1yNknSoR+kcKZK5Yzh7UXLGYN8jZYjV3kLPFQ96ybuE85VUL71D7Fvf4jKIFqvo13lAFRUM2f+AqcRf80Ic7Ut3ugpqrPIKaPai5ShPkvEHOVpYg5w5ytpLEWt5SHVeDIiLDReQnIlJXve6dE4CZQB5wsrj5k0WkGt4gfje6Tc/B+3V0HoS/T3RQc8Vy9qDmisW8Qc4Wq7mDnC0e8pZXheeUFxHBG1TsFbzRNNfhtaqjVXWHW+c0YAiQrqr/dGVd8c4FNsEbJfZmVV0ZpvcR2FyxnD2ouWIxb5CzxWruIGeLh7zHpILn+opG2e0AvFRUhjd65evF1h2DN19IMlDDldUgAvM9BzVXLGcPaq5YzBvkbLGaO8jZ4iHvMb/P8lYG8DDwKN5EMz/DG8+qaHkVYCvwk5Cy2njj3yzA66HQPBL/SEHMFcvZg5orFvMGOVus5g5ytnjIe7y3o15DEZGf4PVzro/3M/8H8Q67zhaRfvDDebz73a3IRXjn/TKAkzTMQ2gENVd5BDV7UHPFYt4gZytLkHMHOVs85A2LcrSwZwBXhTx/Bm+49pHAwpBWtgneRaXWrmwQcGYEW/5A5orl7EHNFYt5g5wtVnMHOVs85A3Ley5HpdQEqvHfc4BX4uYbx2tBb3GP04BpUfzHCmSuWM4e1FyxmDfI2WI1d5CzxUPecNyOespLVQ+o6iH970il5+HNVAhwNd5w7u8A04Bv4H+HWo6EoOYqj6BmD2qu0gQ5b5CzlSXIuYOcrSSxljccyj3BlnijXCreMMizXfFe4G68eQE2qDvXp67ZjYag5iqPoGYPaq7SBDlvkLOVJci5g5ytJLGW93hU5IeNhXiDk+0AuruW9V6gUFX/o/5dOApqrvIIavag5ipNkPMGOVtZgpw7yNlKEmt5j11Fzo/hDV5WCPwHN9dyEG5BzRXL2YOaKxbzBjlbrOYOcrZ4yHustwr9Ul5EWgBXAU+q6qGKNFyRFNRc5RHU7EHNVZog5w1ytrIEOXeQs5Uk1vIeqwoPvWKMMcaUJNpzyhtjjIlT1qAYY4wJC2tQjDHGhIU1KMYYY8LCGhRjjDFhYQ2KMcaYsLAGxRhjTFj8P9U6E+dilPRXAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plot_dates(df_user_item_train['event_dttm']).plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2018-07-01    118286\n",
       "Name: event_dttm, dtype: int64"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "plot_dates(df_user_item_test['event_dttm'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Пересечение train и test по просмотренным items"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "337245"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df_user_item_train.groupby(['customer_id', 'story_id']).count())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "113391"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df_user_item_test.groupby(['customer_id', 'story_id']).count())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>event_dttm_x</th>\n",
       "      <th>event_x</th>\n",
       "      <th>event_dttm_y</th>\n",
       "      <th>event_y</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>customer_id</th>\n",
       "      <th>story_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>521</th>\n",
       "      <th>1211</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1939</th>\n",
       "      <th>509</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2679</th>\n",
       "      <th>509</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2846</th>\n",
       "      <th>956</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3431</th>\n",
       "      <th>1312</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>974220</th>\n",
       "      <th>1245</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>975675</th>\n",
       "      <th>1286</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>979046</th>\n",
       "      <th>1224</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>980003</th>\n",
       "      <th>449</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>987293</th>\n",
       "      <th>509</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>834 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                      event_dttm_x  event_x  event_dttm_y  event_y\n",
       "customer_id story_id                                              \n",
       "521         1211                 2        2             2        2\n",
       "1939        509                  1        1             1        1\n",
       "2679        509                  1        1             1        1\n",
       "2846        956                  1        1             1        1\n",
       "3431        1312                 1        1             1        1\n",
       "...                            ...      ...           ...      ...\n",
       "974220      1245                 1        1             1        1\n",
       "975675      1286                 1        1             1        1\n",
       "979046      1224                 1        1             1        1\n",
       "980003      449                  1        1             1        1\n",
       "987293      509                  1        1             3        3\n",
       "\n",
       "[834 rows x 4 columns]"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(\n",
    "    df_user_item_train.groupby(['customer_id', 'story_id']).count(),\n",
    "    df_user_item_test.groupby(['customer_id', 'story_id']).count(),\n",
    "    how='inner', left_index=True, right_index=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# transactions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>customer_id</th>\n",
       "      <th>transaction_month</th>\n",
       "      <th>transaction_day</th>\n",
       "      <th>transaction_amt</th>\n",
       "      <th>merchant_id</th>\n",
       "      <th>merchant_mcc</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>855115</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>1500</td>\n",
       "      <td>4554547</td>\n",
       "      <td>5411</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>997036</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>1657528</td>\n",
       "      <td>5411</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>398237</td>\n",
       "      <td>5</td>\n",
       "      <td>24</td>\n",
       "      <td>2500</td>\n",
       "      <td>26375569</td>\n",
       "      <td>5813</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>997036</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>16304402</td>\n",
       "      <td>5411</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>291636</td>\n",
       "      <td>7</td>\n",
       "      <td>25</td>\n",
       "      <td>0</td>\n",
       "      <td>1259505</td>\n",
       "      <td>5411</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   customer_id  transaction_month  transaction_day  transaction_amt  \\\n",
       "0       855115                  7                3             1500   \n",
       "1       997036                  6                6                0   \n",
       "2       398237                  5               24             2500   \n",
       "3       997036                  6                2                0   \n",
       "4       291636                  7               25                0   \n",
       "\n",
       "   merchant_id  merchant_mcc  \n",
       "0      4554547          5411  \n",
       "1      1657528          5411  \n",
       "2     26375569          5813  \n",
       "3     16304402          5411  \n",
       "4      1259505          5411  "
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_trans = pd.read_csv('transactions.csv')\n",
    "df_trans.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "46948"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_trans['customer_id'].nunique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a: 34379, b: 46948, c: 30795\n"
     ]
    }
   ],
   "source": [
    "pd_intersection(\n",
    "    df_user_item_train['customer_id'].unique().tolist(),\n",
    "    df_trans['customer_id'].unique().tolist(),\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a: 17218, b: 46948, c: 15757\n"
     ]
    }
   ],
   "source": [
    "pd_intersection(\n",
    "    df_user_item_test['customer_id'].unique().tolist(),\n",
    "    df_trans['customer_id'].unique().tolist(),\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a: 22646, b: 46948, c: 20053\n"
     ]
    }
   ],
   "source": [
    "pd_intersection(\n",
    "    df_user_item_valid['customer_id'].unique().tolist(),\n",
    "    df_trans['customer_id'].unique().tolist(),\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# stories_description"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>story_id</th>\n",
       "      <th>story_json</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>127</td>\n",
       "      <td>{\"guid\":\"770a5bae-0e3f-4a6b-b924-bd87bd51a038\"...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>254</td>\n",
       "      <td>{\"guid\":\"64f4c9ef-647b-4e04-b4d4-02297e939388\"...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>865</td>\n",
       "      <td>{\"guid\":\"3482206b-d223-4aec-92ba-0150055cd68a\"...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1491</td>\n",
       "      <td>{\"guid\":\"5f4a9215-01de-4777-b70f-a18899db8f1c\"...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>598</td>\n",
       "      <td>{\"guid\":\"ed8754bd-67be-4fa1-9289-5508d96f1fa4\"...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   story_id                                         story_json\n",
       "0       127  {\"guid\":\"770a5bae-0e3f-4a6b-b924-bd87bd51a038\"...\n",
       "1       254  {\"guid\":\"64f4c9ef-647b-4e04-b4d4-02297e939388\"...\n",
       "2       865  {\"guid\":\"3482206b-d223-4aec-92ba-0150055cd68a\"...\n",
       "3      1491  {\"guid\":\"5f4a9215-01de-4777-b70f-a18899db8f1c\"...\n",
       "4       598  {\"guid\":\"ed8754bd-67be-4fa1-9289-5508d96f1fa4\"..."
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_stories = pd.read_csv('stories_description.csv')\n",
    "df_stories.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Baseline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import roc_auc_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dislike   -10.0\n",
       "skip       -0.1\n",
       "view        0.1\n",
       "like        0.5\n",
       "dtype: float64"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_event_weights = pd.Series({\n",
    "    'dislike': -10,\n",
    "    'skip': -0.1,\n",
    "    'view': 0.1,\n",
    "    'like': 0.5,\n",
    "})\n",
    "df_event_weights"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "def tinkoff_metric(df):\n",
    "    s_event_weights = df['event'].map(df_event_weights)\n",
    "    res = s_event_weights * df['score']\n",
    "    return res.mean().round(6), len(res)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "def tinkoff_metric_split(df):\n",
    "    df = df.copy()\n",
    "    df['cust_kind'] = df['customer_id'].isin(df_user_item_train['customer_id'].unique()) \\\n",
    "        .map({True: 'warm', False: 'cold'})\n",
    "    \n",
    "    return {\n",
    "        'all': tinkoff_metric(df),\n",
    "        'cold': tinkoff_metric(df[df['cust_kind'].eq('cold')]),\n",
    "        'warm': tinkoff_metric(df[df['cust_kind'].eq('warm')]),\n",
    "    }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "def label_ranking_average_precision_score(df_scores):\n",
    "    df = pd.merge(df_scores, df_true.assign(hit=1), how='left', on=['user_id', 'item_id'])\n",
    "    df['hit'] = df['hit'].fillna(0)\n",
    "    df = df.sort_values(['user_id', 'relevance'], ascending=[True, False])\n",
    "\n",
    "    df['rank'] = df.groupby('user_id').cumcount() + 1\n",
    "    df['hit_count'] = df.groupby('user_id')['hit'].cumsum()\n",
    "    df['score'] = df['hit_count'] / df['rank']\n",
    "    df = df[df['hit'].eq(1)]\n",
    "\n",
    "    rank_hist = df.groupby('rank')['user_id'].count().sort_index()\n",
    "    total_rank_hist = rank_hist.sum()\n",
    "    show_pos = 5\n",
    "    info = ', '.join(\n",
    "        f'{x:.2f}' for x in [v / total_rank_hist for _, v in sorted(rank_hist.iloc[:show_pos].to_dict().items())] +\n",
    "        [rank_hist.iloc[show_pos:].sum() / total_rank_hist]\n",
    "    )\n",
    "    logger.info(f'rank_hist: [{info} ...] from {total_rank_hist}')\n",
    "\n",
    "    df = df.groupby('user_id')['score'].mean()\n",
    "\n",
    "    if not reduce:\n",
    "        return df\n",
    "    score = df.mean()\n",
    "    return float(score)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Const"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_test_predict = df_user_item_test.copy()\n",
    "df_test_predict['score'] = -1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'all': (0.057478, 118286),\n",
       " 'cold': (0.090072, 22634),\n",
       " 'warm': (0.049765, 95652)}"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tinkoff_metric_split(df_test_predict)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Polular prediction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>customer_id</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>event</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>dislike</th>\n",
       "      <td>0.014606</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>like</th>\n",
       "      <td>0.132426</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>skip</th>\n",
       "      <td>0.353203</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>view</th>\n",
       "      <td>0.499765</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         customer_id\n",
       "event               \n",
       "dislike     0.014606\n",
       "like        0.132426\n",
       "skip        0.353203\n",
       "view        0.499765"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_user_item_train.groupby('event')[['customer_id']].count() / len(df_user_item_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "event\n",
       "dislike    0.068501\n",
       "like       0.252072\n",
       "skip       0.734780\n",
       "view       0.823032\n",
       "dtype: float64"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_user_item_train.pivot_table(\n",
    "    index='customer_id', columns='event',\n",
    "    values='story_id', aggfunc='count', fill_value=0).gt(0).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "event\n",
       "dislike    0.533825\n",
       "like       0.674047\n",
       "skip       0.932349\n",
       "view       0.971710\n",
       "dtype: float64"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_user_item_train.pivot_table(\n",
    "    index='story_id', columns='event',\n",
    "    values='customer_id', aggfunc='count', fill_value=0).gt(0).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>event</th>\n",
       "      <th>dislike</th>\n",
       "      <th>like</th>\n",
       "      <th>skip</th>\n",
       "      <th>view</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>customer_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.411765</td>\n",
       "      <td>0.588235</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>73</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.846154</td>\n",
       "      <td>0.153846</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>129</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.562500</td>\n",
       "      <td>0.437500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>144</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.333333</td>\n",
       "      <td>0.666667</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1095251</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1098683</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1099266</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.500000</td>\n",
       "      <td>0.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1099955</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1103495</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>34379 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "event        dislike      like      skip      view\n",
       "customer_id                                       \n",
       "15               0.0  0.000000  0.411765  0.588235\n",
       "73               0.0  0.000000  0.846154  0.153846\n",
       "129              0.0  0.000000  0.562500  0.437500\n",
       "144              0.0  0.000000  0.000000  1.000000\n",
       "150              0.0  0.333333  0.666667  0.000000\n",
       "...              ...       ...       ...       ...\n",
       "1095251          0.0  0.000000  0.000000  1.000000\n",
       "1098683          0.0  0.000000  1.000000  0.000000\n",
       "1099266          0.0  0.000000  0.500000  0.500000\n",
       "1099955          0.0  0.000000  0.000000  1.000000\n",
       "1103495          0.0  0.000000  0.000000  1.000000\n",
       "\n",
       "[34379 rows x 4 columns]"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# fit users\n",
    "df_train_user_popularity = df_user_item_train.pivot_table(\n",
    "    index='customer_id', columns='event',\n",
    "    values='story_id', aggfunc='count', fill_value=0)\n",
    "\n",
    "df_train_user_popularity = df_train_user_popularity.div(df_train_user_popularity.sum(axis=1), axis=0)\n",
    "\n",
    "df_train_user_popularity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA6IAAADSCAYAAABQMn3bAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3deZxU5Zn3/8/VO80Ozd7si4CAoqioJKKignEbt9EkJnHyxElM8kt+yWjMZpbJE7NnYjQxxDgmZhT3hIwYY1xwQRRRWWSRZu1ukIZuuht6r6r7+aNOY4nQC1TXXXX6+369eFl16lB93VhXn7rOvZlzDhEREREREZFUyfIdgIiIiIiIiHQvKkRFREREREQkpVSIioiIiIiISEqpEBUREREREZGUUiEqIiIiIiIiKaVCVERERERERFJKhWgImNm9ZvYDM/uQmW3swPnfNbM/B49HmdkBM8sOnj9vZv+nq2MW6e7MbJuZzTOzb5jZ3cGxMWbmzCzHd3wi8p7E6+ZhXjuYwyKSemb2pJl90ncc0nn6shMizrkXgeM6+Xd2AL26JiIRaY9z7oe+YxCRo6ccFvHLObfAdwxydNQjKiIiIiIiIimlQjQDmdlMM3vDzPab2YNAQXB8rpmVJZz3NTMrD87baGbnHua9jjgU0MyGmdlqM7speN7XzP5gZruC9/1B65BeETk67Qz5uyIYwjsteD7bzJaZWbWZrTKzuSkNVqQbaO/aaWa5ZvaAmT1qZnmHTHdpvabeYGY7g+vlf/hpiUh4BHn5yCHHfmVmtx86rczM/s3M1pvZPjN7ysxGB8e/Z2a/Dh7nmlmdmf00eN7DzBrNbEAq29XdqRDNMGaWB/wFuA8YADwMXHGY844DvgCc4pzrDVwAbOvEzxkLLAXucM79NDh8LxABJgAzgfMBzScV6QJmdj3wY2Cec26tmY0AngB+QDz3/wN41MwGeQxTJFTau3aaWQ/i1+Am4GrnXPMR3upsYCLx6+TXzGxeV8Yt0g0sAi40s94AQUfI1cD9iSeZ2aXAN4DLgUHAi8ADwctLgbnB41OAd4EPB89PBzY656q6rglyKBWimWc2kAv8l3OuxTn3CLDiMOdFgXxgqpnlOue2Oec2d/BnTAWeA77jnFsIYGZDgAuBLzvn6pxzFcAvgWuOsT0i8kFfBm4C5jrnSoJjHweWOOeWOOdizrmngdeJ56WIJEdb184+wN+BzcD1zrloG+/zveBauQb4b+DaLo1aJOScc9uBN4B/CQ6dA9Q755Yfcupngducc+udcxHgh8CJQa/oK8BEMxtIvAD9AzDCzHoBZxEvVCWFVIhmnuFAuXPOJRzbfuhJwZfXLwPfBSrMbJGZDe/gz/gYUA4kDoEYTbwA3hUMC6wGfgcM7nwTRKQdNwF3OufKEo6NBq5qzb8gB+cAw7xEKBJC7Vw7ZwMzgB8dcg0+nNKEx9uJX7tF5Njcz3s3dT7KIb2hgdHArxKuk1WAASOccw3Eb+CeRbwQXQosA85EhagXKkQzzy7id28s4diow53onLvfOTeHeFI64sP8OuK7wF7g/oQ5oKXEhyIVOef6BX/6OOeOP5pGiEibzge+ZWaJw+5LgfsS8q+fc66nc+5HnmIUCaU2rp3/AG4DnglGCbVlZMLjUcDOpAcq0v08DMw1s2LiPaOHK0RLgX8/5FrZwzm3LHh9KfHe1JnERxQuJT4E/1TghS5vgbyPCtHM8wrxeZr/XzDR+nLiyfM+ZnacmZ1jZvlAI9AAxDr4M1qAq4CewJ/MLMs5t4v4RfjnZtbHzLLMbLyZnZWMRonI+7wNzAfuNLNLgmN/Bi42swvMLNvMCoIFyor9hSkSLu1dO51zPyH+5fcZMytq462+bWaFZnY8cD3wYFfGLdIdOOf2AM8TH+6+1Tm3/jCn3QV8Pci91oU2r0p4fSnwCWBdMMf7eeLrnWwN3l9SSIVohgmS5nLgU8SHG/wr8NhhTs0HfkS8Z/Nd4kNov34UP2cIcI+ZZRFP3DxgHbCP+NBdDQsU6QLOuVXARcDvzWyBc64UaF2EYQ/xu743od/jIsnU7rXTOfefxBcs+mcbK2wuBUqAZ4CfOef+0WURi3Qv9wPzOHxvKM65x4mPYlhkZrXAWiBxn9FlQA/e6/1cR/ymk3pDPbD2pzmIiIiISHvMbAywFcgNFkoREZEj0J10ERERERERSSkVoiIiIiIiIpJSGporIiIiIiIiKaUeUREREREREUkpFaIiIiIiIiKSUjm+fnBRUZEbM2aMrx8vkhZWrly51zk3yHcch6McFVGOiqQ75ahIemsrR70VomPGjOH111/39eNF0oKZbfcdw5EoR0WUoyLpTjkqkt7aylENzRUREREREZGUUiEqIiIiIiIiKdVuIWpm95hZhZmtPcLrZma3m1mJma02s5OSH6aIiIiIiIiERUd6RO8F5rfx+gJgYvDnBuC3xx6WiIiIiIiIhFW7ixU5514wszFtnHIp8CfnnAOWm1k/MxvmnNuVpBhFQun5jRW+QxCRw2iJxvjxkxuoaWjxHYpIRnn8zTKWlVT6DkMk1GoaWvjFPzZS3xz1HcoxS8aquSOA0oTnZcGxDxSiZnYD8V5TRo0alYQfLZK57l22zXcIH6AcFYFNuw9w90tb6V+Y6zuUD1COSjq7/ZkSdtU0MKAwz3co3ihHpaut2FrFH1/ZTlGvfPKyzXc4xySl27c45xYCCwFmzZrlUvmzRdJNLA0zQDkqAjEX/+j/5MoTOP87noM5hHJU0llTS5SLZgznZ1edkLKfad9I2Y/qEOWodLWGlnhP6AOfOY2JQ3p7jqZ9beVoMlbNLQdGJjwvDo6JiIhknNZCNCuzbzSLpFxzNEZejjZkEOlKjUEhmp+T7TmSY5eM3xaLgU8Eq+fOBmo0P1Skfc7pRqlIOmodrZBlqkRFOqMpEiMvW4WoSFdqisQAKMjN/Fxrd2iumT0AzAWKzKwM+A6QC+CcuwtYAlwIlAD1wPVdFayIiEhXa+0RVR0q0jnNkRj56hEV6VIHe0RzM79HtCOr5l7bzusO+HzSIhIREfHIHRyaq0pUpKOcc/EeURWiIl2qtUc0DDd9Mr8FIiIiSaShuSKd1xKNJ04kHVfiEwmR7ZV1gApRETkGmiIqkp5iMS1WJNJZDcGehoUhGC4oks4K8+IDWi0EN0tViIp44lAlKpKOoq1Dc1WJinRY65YSA3vle45EJNx2VNVT3L+H7zCSQoWoiIhIAqehuSKdVlnXBEBhnnpERbpSXVOEaEiGwKsQFfFEQ3NF0pP2ERXpvL0HmgEo0NBckS61bmctEwb38h1GUqgQFRERSdB6ozkM829EUuX5jRUAHDe0t+dIRMKrsSXK/qYIxf0LfYeSFCpERTxRj6hIelKPqEjnvVVaDcCYgeH4giySjjbtPgDA8cP7eI4kOVSIioiIJHhv1VxVoiId9eaOaqYM66ORBCJd6K2y+A2fKcNUiIrIMdCquSLpqXURiGx1iYp0yI7KegBOGdPfcyQi4fa3t3YCMGVYOIbAqxAVERFJ8N4cUb9xiGSK17dXAXDG+IGeIxEJt501DfTMyz64l2imUyEq4onmiIqkp9Y5ouoRFemYu5ZuBmDWmAGeIxEJr5KKA5Tta+DTHxrnO5SkUSEq4onqUJH0dHBorrpERTqkrinK8L4FFPXK9x2KSGi9tjU+8uDiGcM8R5I8KkRFREQSHFw1Vz2iIu167I0yyqsbOGNCke9QREIrFnN8Z/FaAEYOCM/K1CpERXxRl6hIWlKPqEjHbQsWKvrSuRM9RyISXlsr62iJOj4+exQFudm+w0kaFaIiIiIJtGquSMdU1TVz78tbyc/JClUvjUg6cc7xvb+tA+CaU0Z5jia5VIiKeKLtW0TSk4bminTMohU7qG2MMGFwL9+hiITWm6XVvPDOHgAmDw3Hti2tVIiKiIgkiMbi/9XQXJG2/XPdbgD++vkzPUciEl6tixQ9fuMZ5GSHq3QLV2tEMoi2bxFJT+/1iHoORCTNRR30KcgJ3ZdjkXSyYmsVvfNzmFHcz3coSReO3VBFMpDqUJH0dLAQVY+oyGGVVtXzsbtfpby6gQunh2crCZF00tgS5fLfLGPj7v1cOH1YKNct0C0sERGRBFo1V6Rt63fVsqOqnvnHD+VTZ4zxHY5IKK3fVcu6XbWcNWkQ/2fOWN/hdIkOFaJmNt/MNppZiZndcpjXR5nZc2b2ppmtNrMLkx+qSLg4jc0VSUuthagWKxI5vL+t3gXAzfOP4+TR/T1HIxJOj75RBsCtF03lhJHhG5YLHShEzSwbuBNYAEwFrjWzqYec9i3gIefcTOAa4DfJDlRERCQVWofmhnEYlEgylFbF9w4d2rfAcyQi4bVscyV5OVmMHhjerZE60iN6KlDinNvinGsGFgGXHnKOA/oEj/sCO5MXokg4qT9UJD1p1VyRttU1RVgwbSj5Odm+QxEJpdrGFrbsqeOTp4/GQnwt6shiRSOA0oTnZcBph5zzXeAfZvZFoCcwLynRiYiIpJhWzRX5IOccF9/xEpt2H6ApEgvtUEERnyr2N3Lxr19iX10LANNG9PUcUddK1qq51wL3Oud+bmanA/eZ2TTnXCzxJDO7AbgBYNSoUUn60SKZKR2niCpHRdJ7sSLlqPhS1xxlbXktZ4wfyIziflw2c7jvkNKSclSOxROrd7G7tokF04Zy3NDezJsyxHdIXaojhWg5MDLheXFwLNGngfkAzrlXzKwAKAIqEk9yzi0EFgLMmjUrDb+Gi6ROOiaAclQkoRBNwzmiylHxZdveOgD+ZeYIrpo1sp2zuy/lqByLRa/FB6H++tqZ3WJ/3o60cAUw0czGmlke8cWIFh9yzg7gXAAzmwIUAHuSGaiIiEgqtK5oHeZ5OSKdddfSzQAM69vDcyQi4fT6tio27t7PvCmDu0URCh0oRJ1zEeALwFPAeuKr475tZt83s0uC074KfMbMVgEPAJ9y2ptCpG1KEZG0FHUuLXtDRXxa+k68f2HWGG3XItIVbn5kNQA3fHi850hSp0NzRJ1zS4Alhxy7NeHxOuDM5IYmIiKSetFYes4PFfFlxbYq9jdGuGXBZApytVKuSLJV1TWzZW8dk4f25tSxA3yHkzLdo99XJA2pP1QkPcWc04q5Igl2VjcAcOb4Is+RiITTi5viIw6+f+k0z5GkVrJWzRUREQmFrXvrNHJeurUdlfX8+dXtBxfu2vBuLQDD+hX4DEskNEoqDvDgih0EKcaq0mrM4MRuti2SClERT/RFVyQ95WQZTZFY+yeKhNTDK0tZ+MIWeuW/9zVx8tDe9OuR6zEqkfD4/QtbeGhlKT3z3suxOROKyMvpXsNxVIiKeOI0OFckLTVHYkwb0cd3GCLerN+1n/6Fubx56/m+QxEJpSVrdzFnQhH3ffo036F41b3KbhERkXbUN0fpoQVZpBvbV998cMigiCTXup217G+MMHpgoe9QvFMhKuKJhuaKpKf6lig98jRgSLqn6vpmVm7fx/hBPX2HIhJKX3s0vk3Lv8wc4TkS/1SIioiIJGhojtAjV5dH6Z7+ub4CgItmDPcciUj4rNy+jzXlNZw+biAnj+4+27Qcia60Ip6oR1QkPTW0RClUj6h0Q+/s3s9/PLwKgBNHda/VO0W6WmlVPVf8dhkAH5s9ynM06UGFqIiISIKG5igFmiMq3dBbpdUA/PyqE5jZzbaREOlqv3m+BID/+tcTuXDaMM/RpAfd8hXxRB2iIunHOUdtY4TeBbo8Svfzk79vAODcKYMxM8/RiITHzuoGHnitFIDLNDf0IPWIioiIBFqijuZIjL7aL1G6mcaWKHsPNDNzVD/6Feb5DkckVL71l7UA3PHRmZ4jSS8qREU8cZokKpJ2GiNRAPK72abiIss27wW0SJFIsh1oivDshgrGD+qp/DqErrQiIiKBppYYAPmaIyrdTEVtEwBnTRrkORKRcFn81k4Abp4/2XMk6UeTYERERAL76psB9YhKuNQ2trB9b32b52yqOADA4D75qQhJJDTKqxuoOtB8xNdXbt8HwLwpQ1IVUsZQISriiUbmiqSf//zfdQD00xxRCZEb//wGL5Xsbfe8gtwsemrrIpEOq21s4eyfPk9zNNbmeeMG9SQ7SwuAHUq/bURERAJNLTF65GZzzuTBvkMRSZrSffWcNnYAn/nQuDbPG9G/h74si3TCjsp6mqMxvnD2BE5sY8ujCYN7pTCqzKFCVMQTpw1cRNJOXXOEM8YPJCdbQ3MlHJxzbK+sZ86EIuZN1dBAkWRat7MWgI/MGMaUYX08R5N5dKUVEREJNDRH6ZGnhYokPMr2NQBoSyKRJIvFHLcuXsuIfj04bkhv3+FkJBWiIp5ojqhI+qlrjmiOnITKb5duBuCENoYNikjn3fPyVhpbYsyfNpQsDWk/KipERTxRHSqSfuqbohTmq0dUwmNteQ2gbVlEkikWc9z/2g6yDG6cO953OBlLhaiIiAjQEo2xvylCnrZukZD46VMbWF1Ww6UnDqdAe+OKJM1H717Olj113Dh3AgN7acujo9Whq62ZzTezjWZWYma3HOGcq81snZm9bWb3JzdMkfBxGpsrklZa9xAtyNEXdgmH3y3dAsAXz5ngORKR8Fi2eS/Lt1Rx0qh+7a5ELW1rdyKMmWUDdwLnAWXACjNb7Jxbl3DORODrwJnOuX1mpnXvRUQko+xvjADx/d5EMl11fTORmOPDkwYxYbAWUhFJlh89uQGAn199In0LtQjYsejIigynAiXOuS0AZrYIuBRYl3DOZ4A7nXP7AJxzFckOVCRs1B8qkl7erWkEoHeBFiuSzLWmrIby6gYq65oAuOSE4Z4jEslszjle2VxJbWMEcGzYtZ9LThjO2CLdtDxWHbnajgBKE56XAacdcs4kADN7GcgGvuuc+/uhb2RmNwA3AIwaNepo4hWRLqQcle7suQ3xe6jD+vbwHMmRKUelLY0tUa747TKao7GDx4b3K/AYUfejHA2ft3fW8tG7X33fMe3JmxzJuu2bA0wE5gLFwAtmNt05V514knNuIbAQYNasWeoQku4tDTNAOSrdWWMkSpaR1puSK0elLWX7GmiOxrh5/nHMnTSYHnnZjBlY6DusbkU5Gj5/W70TgIXXnUxx/0Lycozxg3p5jiocOlKIlgMjE54XB8cSlQGvOudagK1m9g7xwnRFUqIUCSFdnUTSS3V9C2MGaqiVZK4/L98OwOxxA5k6PH1vqIhkiljMHVz0a96UIdovNMk6smruCmCimY01szzgGmDxIef8hXhvKGZWRHyo7pYkxikiItKlahpa6NNDC09IZmqOxNi85wAA04b39RyNSOZrjsRYumkPABdOH6oitAu02yPqnIuY2ReAp4jP/7zHOfe2mX0feN05tzh47XwzWwdEgZucc5VdGbhIptP2LSLppaTiAMerF0ky1CV3vMSGd/dz1qRB2gtX5Bg557jw9hcpqYjf3PnKeZM8RxROHZoj6pxbAiw55NitCY8d8JXgj4iISEZpikTZVdPIVScX+w5FpNMaW6JseHc/86YM5pYFk32HI5Lx3thRTUnFAU4Z059PnD5GWyB1Ea1RL+KJ+kNF0sfumvhWF8UDtLCLZJ7WfQ3PnFCkL8wix6i8uoErfrsMgNuvnZnWK6lnOo3dEBGRbm9vsOfiwJ55niMR6bz7X9sBwIJpwzxHIpLZnHN8+y9rAbjy5GIVoV1MhaiIJ5oiKpI+Nu3eD0BfLVYkGWZVaTXNkRjXzR7N0L7aM1TkWCzfUsWzwZ7SP71yhudowk+FqIgnToNzRdJGRW28R3S0tm+RDHP7M5sAuPbUUZ4jEcl8tz25HoCH/v10zLRKbldTISoiIt1eXXMUgP6F6hGVzLKzphFA+4aKHKN3axpZXVbDnAlFnDp2gO9wugUVoiKeaGiuSPqoa4rQt0cuOdm6LErmiMUc63fV8snTR/sORSTjLVmzC4CvX6iVp1NFV1wREen2qhtaGKCFiiTDbKusA6CoV77nSEQy33MbKxhX1JPjh/f1HUq3oUJUxBP1iIqkj5qGFvoUaEczySz76lsAmDhEW7aIHAvnHK9v28eciUW+Q+lWdNUV8aCmoYXy6gbfYYh0SweaIvzP8u00RWIHj22uOMC4QVqoSLrO4lU72ba3LqnvuaOqHoCiXurNl+7hidW72LznQNLfNxpzNLREtfJ0iqkQFfHgmfW7fYcg0m09u6GC257c8IHjl5w43EM00h0caIrwpUVvdslImF75OYwcUJj8NxZJMw3NUb74wBvEumhEWXaWMWWYFv1KJRWiIh5EohqXK+JL5YH4Vi2vf2se/Qvf60nKztJS/dI1tu6pwzn4zcdO4oLjhyb1vQ3I0mdXuoFNFfuJObjzoycxf1py86iVrgOppUJUxIOoJoiKeFO2Lz4svn9hnr50SEpsr4oPyR1b1FOfOZGj9Pe17wIweVhv5VFIaLEiEQ9iKkRFvHm3ppEs051vSZ2te+KFqIbQihy9lzdXAjB2oObzh4UKUREPump+g4i0b39ThBH9e/gOQ7qRlzfvZWifAnrlayCayNGoqmtmVWk1C6YN1VD0EFEhKuKBU4+oiDdl++oZ0FP7LkpqVNQ2snxLFSeO7Oc7FJGMdcezJQB8bu54z5FIMqkQFfEgpi5RES8i0Rhb9tQxsKe2u5DU+OyfVwKwYHrXLK4iEnalVfXc8/JWPjSxiOkj+voOR5JIhaiIB6pDRfwoDRYqGqM5RpICFbWNvLGjmpws4yPTh/kORyTjRKIxfvDEOgCuP3MMZhqWGyYqREU80GJFIn6sKa8BYM7EgZ4jke7gM396HYAfXj6dnGx95RLprP95dQdPvb2b4X0LOGfyEN/hSJLpt6KIBypERfxoXb10+gjN15OuVVXXzJryGs6cMJDLZ47wHY5IRnpwRSkAD3/uDM+RSFfQ8m0iHmhorogfv10aX/CiqJfmiErX+f8ffIvH3ywH4HNnTVBvqEgnLV61kx8+sZ53axuZOLgXI/pppfMwUiEq4oF6REVSLxKN0dgSY+qwPppnJF1mf2MLj79ZzgnFfTlt3EBOGdvfd0giGee3z2+mrinCtaeO4tpTR/oOR7pIh27Rmdl8M9toZiVmdksb511hZs7MZiUvRJHwUR0qknob3t0PwFWzij1HImHWus3EV88/jm9cOIX8nGzPEYlkluVbKlm/q5YR/Xtw2+XTmVGsqRRh1W4hambZwJ3AAmAqcK2ZTT3Meb2BLwGvJjtIkbCJamyuSMpt3RufHzpxcG/PkUiYPbyyjCF98pkzoch3KCIZ6YHXdgBw2+XTPUciXa0jQ3NPBUqcc1sAzGwRcCmw7pDz/hP4MXBTUiMUCSENzRVJrb0Hmrj5kdUATBray3M0Eja/fPodSioOAFDT0MK8KYPJytLwb5GOaGyJ8r2/raO2oQWAFduq6F2Qw8xRGtYedh0ZmjsCKE14XhYcO8jMTgJGOueeaOuNzOwGM3vdzF7fs2dPp4MVCYt07RBVjkpYPbuhgoaWKNNH9GVgz3zf4Rw15Wj62VfXzK+e2cRr26rYuHs/Ewb14mOnjfYdlniiHO28N3bs44HXdrCqrJqNu/fTp0cuXzp3ou+wJAWOebEiM8sCfgF8qr1znXMLgYUAs2bNStOv4iJdzzlHOt4sV45KWG3ec4DsLOPxG88gOx2Tr4OUo+nnrhc2A/Bf/3oiZ2o4brenHO28P7y4FYC/fP5Minpl7o1C6byO9IiWA4nLVRUHx1r1BqYBz5vZNmA2sFgLFokcWcw5srRqp0jKrN+1n1EDCrWNhiRVNOZY9Fp80NjJozWMUKSzmiMxXt68l175OSpCu6GOXJFXABPNbKyZ5QHXAItbX3TO1TjnipxzY5xzY4DlwCXOude7JGKREIg5VIiKpMj+xhZeeGcP4wf19B2KhEhjS5TzfrGUmoYWvn3RVApytTquSGe8srmSSd96ksaWGLde/IF1UKUbaLcQdc5FgC8ATwHrgYecc2+b2ffN7JKuDlAkjGIxR5Y6ZkRSYk1ZDQCzxw30HImEye9f2MKWvXXMmzKYK0/WlkAineGc45uPrwHg388ax0emD/MckfjQoTmizrklwJJDjt16hHPnHntYIuGmobkiqbO6PF6IXjZzRDtninRMY0uUnz/9DgA/v/pE+vbI9RyRSGZ5qWQvW/bW0b8wl68vmOI7HPFEfTIiHmhorkjqvLRpL5OG9NL8I0maRcE+h/952TQVoSJH4cEV8bnVD3/2dM+RiE8qREU8iDmH6lCRrtcSjfHatirmTBjkOxQJkUffKOe4Ib25bra2aRHprFjM8cSaXZxQ3JcJg3v7Dkc8UiEq4oFTj6hISlTXt9AciTG2qNB3KBISNfUtrCmvYcH0ob5DEclIu2obcQ7OnTLEdyji2THvIyoinVNV18y9y7bRu0DpJ5JszZEYn7//DfbsbwKgKRIDoHeBhk9K+/6+9l1+98JmXBu7Pza2RAGYMqxPiqISSX97DzTxpUVvUtcUbffc1hwap5XMuz19ExZJsY3v7gdg5qj+rPUci0jYbKrYz9PrdjNtRB8G9IzPCR3ZfwinjRvgOTLJBH9cto1te+uYXtzviOf06ZHLmIE9OWWMPlMirVZu38fLJZXMGt2fwvy2y4s+PXIZW9STOROKUhSdpCsVoiIpFo3Fb7V/8ZwJ3Oc5FpGw2VFZD8CPLp/BtBF9PUcjmaS2sYVXtlTy6Tlj+fZF2tNQpDO27a0D4HfXncxALQwnHaQ5oiIpFonFhwpqjqhI8m3cvR8zDfmSzvvDi1sBOHfyYM+RiGSeB17bQc+8bAb0zPMdimQQFaIiKdbaI5qTpUJUJNlWbt/HyP6FFOZpwI90XEnFAX71zCamjejDGRouKNIpK7ZVsa2ynuOG9sZ0k106QYWoSIpFgkI0W4WoSFLVN0d4cdNeZhRrSK50XGNLlI/c/iIAP7hsuudoRDLL/sYWrrrrFQB+fMUMz9FIplEhKpJisdYe0WwVoiLJtGLbPgDNDZVO+f0LW2iKxLjy5GJOHHnkRYpE5INeLtkLwHlThzBxiPYElc7R2CWRFItoaK5I0kVjjm88tgaAeVM0x5/iIvIAABOBSURBVE/aF4nGuOWxNTyysozcbONHl6s3VKSjmiJRbnl0DW/s2EdOlnHnR0/yHZJkIPWIiqRY9ODQXKWfSLKsLqumvLqBcYN6MmagFiqS9t23fDuPrCxjUO98/u9l08nJ1u9kkY5avqWKx98sp0duNp84fQx5Ocof6Tz1iIqkmHpERZLv7pfiK54u+sxsFRTSrtKqen64ZD0AL3/tHH2JFumEfXXNfPKe18gy+Mvnz6QgN9t3SJKh9JtXJMWiwfYtWqxIJHn+uW43BblZDOqt/eukfd94fA0tUcddHz9ZRahIJz2wYgcAl5wwXEWoHBP99hVJMfWIiiRXxf5GmiIxPnbaaG0dIO0qr27gpZK9XHzCcOZPG+o7HJGMs3xLFXnZWfzsqhN8hyIZTkNzRVJs6cY9gHpERY5FVV0zT6zeSTTm2FnTCMCcidr/Ud7vrdJq3tqx733HHny9DOfgP86f5CkqkczQHInx+JtlNDRH33f87fIaFkwfqmkQcsxUiIqk2D/W7QagZ77ST+Ro/XHZNn71zKaDz3OzjQmDenmMSNLRlxe9ybbK+g8cHzOwkFEDCj1EJJI5nttYwdceXXPY16YM65PiaCSM9E1YJIVaV8y9ce54zasQOQary6oZ0a8H//vFOQDk52ZRmKdLmrynur6ZbZX1fG7ueG740Lj3vdYzP0fDuEXasWJrFQDLbjmHHgnfWcygX2Ger7AkRHTVFkmhxpb48Ja+PXI9RyKSueqbIzy3cQ8fmlhE/576MiSHd/29KwA4f+oQfU5EOqk5EuPul7YyakAhw/v18B2OhJQGd4ukUGshqt5QkaN3T7BVywXHa6EZObw/L9/OmzuqGTeoJzNH9fcdjkjGWfjCZgBOHNnPcyQSZh0qRM1svpltNLMSM7vlMK9/xczWmdlqM3vGzEYnP1SRzFfXFC9EWwtSEemclmiMn/3jHQAumznCczSSjpZvqeRbf1kLwAOfme05GpHMs3Vv3cHfs9+4cIrnaCTM2i1EzSwbuBNYAEwFrjWzqYec9iYwyzk3A3gE+EmyAxUJg6ZIvAAdpmEuIkflidW7APjuxVPppQW/5BDOOb726GoAXrz5bIb0KfAckUjmWbIm/nv2vz91CkP7Koek63SkR/RUoMQ5t8U51wwsAi5NPME595xzrnVZuuVAcXLDFAmH+mAJ9J55Gpor0hmxmCMaczyzoYI+BTlcd/oY3yFJGokGn4/n39nD9sp6rj9zDCO1Kq5IhznnDubRss17mTSkF2dPHuw7LAm5jtxOHgGUJjwvA05r4/xPA08eS1AiYXXf8u2Atm4R6Yx9dc3M/dnz1DS0ADBrdH/twysHPfDaDr7+2Pu3mPja/MmeohHJTJfe+TKry2oOPv/IjGEeo5HuIqnfhs3s48As4KwjvH4DcAPAqFGjkvmjRTJC5YEmAE5K08UzlKOSjlZsq6KmoYWrZxVT3L+QOROLfIfkjXL0g/7n1fgNvq+cNwmAiYN7aUE48SYTc7S6vpnVZTWcM3nwwcWJ5k/TYnDS9TpSiJYDIxOeFwfH3sfM5gHfBM5yzjUd7o2ccwuBhQCzZs1ynY5WJMNtr6pnRnFf8nLSc8Fq5aiko18/WwLAl+dN6vbbCChH3+8XT7/D2vJavnreJL547kTf4YhkZI7etXQLANedPpqzj9NwXEmdjnwbXgFMNLOxZpYHXAMsTjzBzGYCvwMucc5VJD9MkXDIzcrSnXqRTthRWc+a8hr6FOR0+yJU3m91WTW3P7MJgH+bM9ZzNCKZKRpzB7dqOX3cQM/RSHfTbiHqnIsAXwCeAtYDDznn3jaz75vZJcFpPwV6AQ+b2VtmtvgIbyfSre050MQoLaAh0iG1jS08uTa+euN9n25raQLpTpoiUarrm/nG42vIy8nima+epXn3IkehvjnC2vIaYg5unn+cbpRLynXoN7dzbgmw5JBjtyY8npfkuERC5+WSvVTVNWvLCZEOeHRlGV99eBUAvQtymFHc13NEkg4aW6Kc8aNnqaprBuCaU0YyflAvz1GJZJ7Xtlbx8T+8SnMkBsBpY9UbKqmnb8QiKfLchvio9Y/PHu05EpH0d8/LW8nPyeKWBZM5bmhvzLRKrsDit3ZSVdfMOZMHM/e4QVw0Y7jvkEQy0iubK2mOxPjKefG59zODRYpEUkmFqEiKNEfjdx0nDNbde5G2lFbV8/bOWiYN6cX1Z2run8SVVtXzzb+sYdygnvzuupPJzU7PRd9EMsFDr8d3ZvziORN0o0+8USEqkgLOORatKGXMQM0PFWnLX98q569v7QTg2xdN9RyNpAPnHHe/uJVf/vMdnIOF181SESpylGrqW7j7pS3UNLQwe9wAFaHilQpRkRTYVllPcyRGnx65vkMRSVuNLVG+tOgtACYN6aUVHAWAV7ZU8n+XrAfgIzOGaVSJyDH4x7p3+fWzJRTmZXPNKZmxz6mElwpRkRTYVd0AwC3zJ3uORCR9tc6jvvOjJ/GRGcM8RyPpwDnHbUs20L8wl2W3nEuPPK3qKXIs3tixD4CV3zpP+STeqRAVSYHbn43vdTe0b4HnSETS0+3PbOIXT78DwNmTB3mORtLB3gNNXPirF6nY38StF03Vl2aRY/Tipj088FopedlZyidJCypERVJgR2U9vfJzGDOwp+9QRNJOJBrj/ld3MK6oJ589azyFebo0Cfz+xS1U7G/iohnDuOKkYt/hiGS8p9ftBuAnV87wHIlInGb7i3SxHZX17Kxp5Pozx5CVpUUBRBLV1Lfw4Z88x7u1jXx27niuPmWk75AkDbzwzh5+t3QLQ/sUcMdHT6JvoebXixyLd2sa+dMr2xk9sJDLZo7wHY4IoEJUpMstWbsLgA9P0nBDkUTOOW5/dhM7axqZOqwPF0wd6jskSQNNkSj/ft9KAG67YrrnaETC4dWtlQCcfdxgz5GIvEfjn0S62B9e2grACcXaLFok0X+/vO1gfjx24xkU5GrOksBXHlxFQ0uUi08Yri/NIkly77JtANw4d7zfQEQSqEdUpAvtqKxnz/4mZo8bQF6O0k2k1fItlXz/f9cB8L9fnKMiVACoqG3kiTXxUSQ//JdpnqMRCYemSJQ3d1QzZmAhg/to0URJH/pmLNKF7n5pCwDXzR7jNxCRNFJaVc81C5cD8NMrZzBtRF/PEUm6+PZf1wLwo8un07tA80JFkmHpxj0AXDhd22JJelEhKtJFItEYf3plOwAXHD/EczQi6aG2sYWL73gJgJsuOI6rZmlxIonbe6CJp96Or+p5tT4XIknzm+c3A3Dj2RM8RyLyfpojKtJFHnuzHICrZxWTk617PiLRmOOE7/0D5+AjM4bxeX0pkgQ3P7IagIXXnawVxkWSpCUa463Sas6aNIhe+fraL+lF345FusjCF+LDcr910VTPkYikhx88sQ7nYNqIPtxx7Uzf4UgaiURjPL+xgh652Zw3VSNIRJJl5fZ9AMweN9BzJCIfpEJUpAus21lLScUBzpk8mD6a5yRCScUB/vvlbQA8+rkzMFOPl7xn8aqdxBx8bf5x+myIJNGumgYAPjSxyHMkIh+kQlSkC9z25HogPgdOpLuLRGN89eFVADz5pQ+Rn6MVcuX9ntlQAcDlJxd7jkQkXKrqWgAo7t/DcyQiH6RCVCTJ1pTV8OKmvXzi9NFMGdbHdzgi3t30yGpWlVZz2+XTlRPyAc45XttaxYcnDdIIEpEkq22IF6LKLUlHKkRFkigSjXHzo6vpkZvNV89Tb6jIqtJqHn+znDPGD+TaU0f5DkfSUHl1A3v2N2luqEgXiMRiZGeZFgCTtKTls0SS6L7l21m/q5afXjmDvoW6+yjdWyzm+N7f3qZnXja//NcTfYcjaWp7ZT0AEwf38hyJSPhEYo4cFaGSptQjKpIk++qa+fWzJZw6doD2RhQBFq0o5Y0d1XznkuMZ0qfAdziSphqaowD0zNO9cZFki0RViEr66lAhambzzWyjmZWY2S2HeT3fzB4MXn/VzMYkO1CRdPfDJeupaWjhe5cc7zsUEe9iMcfCFzYzfURfrtICNNKGlmgMgNwcfVkWSbZozGkvc0lb7X4yzSwbuBNYAEwFrjWzQzdG/DSwzzk3Afgl8ONkByqSzp5Zv5uHV5Zxw4fHaTEWEeDZDRVsq6znhg+P03Yc0qbm1kJUX5ZFkq4lGiM3W7+DJT11ZBzMqUCJc24LgJktAi4F1iWccynw3eDxI8AdZmbOOXekN62ub+axN8qOKmiRdPOVh1YxrqgnX5430XcoSaMclWOxZM275GVnsWDaUN+hhFZYcnTFtn0A5KkQlZBJhxwtqThAtobmSprqSCE6AihNeF4GnHakc5xzETOrAQYCexNPMrMbgBsA8oZO4CsPrTrKsEXSzzmTB2f8/ojKUUmmyUN7a0hYkoU1R/NysrTAm4RCOubo9BF9fYcgclgpXRnAObcQWAgw48ST3N9umpvKHy/SpYr7F/oO4ZgpRyWZinrl+w4hdMKao30KcrXPoYRCOubooN76XSzpqSOFaDmQuARocXDscOeUmVkO0BeobOtN83KyGD2wZydCFZFUUo6KpDflqEh6U46KtK0jY6ZWABPNbKyZ5QHXAIsPOWcx8Mng8ZXAs23NDxUREREREZHuq90e0WDO5xeAp4Bs4B7n3Ntm9n3gdefcYuAPwH1mVgJUES9WRURERERERD6gQ3NEnXNLgCWHHLs14XEjcFVyQxMREREREZEw0nKGIiIiIiIiklIqREVERERERCSlzNeaQma2H9jo5YenThGH7KUaMmrfsRvtnBvUxT/jqHSDHA375xfC30blqHI004W9jcpR5WimC3sbveZoSvcRPcRG59wsjz+/y5nZ62Fuo9oXeqHO0e7w/zfsbQx7+zpAOZrhwt7GsLevA5SjGS7sbfTdPg3NFRERERERkZRSISoiIiIiIiIp5bMQXejxZ6dK2Nuo9oVb2Nsf9vZB+NsY9va1J+ztD3v7IPxtDHv72hP29oe9fRD+Nnptn7fFikRERERERKR70tBcERERERERSSkvhaiZzTezjWZWYma3+Iiho8zsHjOrMLO1CccGmNnTZrYp+G//4LiZ2e1Bu1ab2UkJf+eTwfmbzOyTCcdPNrM1wd+53cwsxe0baWbPmdk6M3vbzL4UwjYWmNlrZrYqaOP3guNjzezVIK4HzSwvOJ4fPC8JXh+T8F5fD45vNLMLEo5nzGe6PZnWFuVoZrdR+dl5mdYe5Whmt1E52nmZ1h7laGa3MaNz1DmX0j9ANrAZGAfkAauAqamOoxPxfhg4CVibcOwnwC3B41uAHwePLwSeBAyYDbwaHB8AbAn+2z943D947bXgXAv+7oIUt28YcFLwuDfwDjA1ZG00oFfwOBd4NYjnIeCa4PhdwOeCxzcCdwWPrwEeDB5PDT6v+cDY4HOcnWmf6Xb+rTKuLcrRzG6j8rPT/14Z1x7laGa3UTna6X+vjGuPcjSz25jJOeqjR/RUoMQ5t8U51wwsAi71EEeHOOdeAKoOOXwp8Mfg8R+ByxKO/8nFLQf6mdkw4ALgaedclXNuH/A0MD94rY9zbrmLfwL+lPBeKeGc2+WceyN4vB9YD4wgXG10zrkDwdPc4I8DzgEeCY4f2sbWtj8CnBvc2boUWOSca3LObQVKiH+eM+oz3Y6Ma4tyFMjgNio/Oy3j2qMcBTK4jcrRTsu49ihHgQxuYybnqI9CdARQmvC8LDiWSYY453YFj98FhgSPj9S2to6XHea4F0HX/Ezid1JC1UYzyzazt4AK4r84NgPVzrnIYeI62Jbg9RpgIJ1veyYKS1tC9fltFdYcVX52SljaE5rPbyLlKKAcDUt7QvP5TaQcBdIoR7VY0TEK7nxk/NLDZtYLeBT4snOuNvG1MLTRORd1zp0IFBO/szPZc0iSImH4/EK4c1T52b1l+ue3lXJUwirTP7+tlKPpx0chWg6MTHheHBzLJLuDbniC/1YEx4/UtraOFx/meEqZWS7xxPwf59xjweFQtbGVc64aeA44nfhQi5zDxHWwLcHrfYFKOt/2TBSWtoTq89tdclT52SFhaU+oPr/KUeVogrC0J1SfX+Voeuaoj0J0BTAxWMkpj/gk2cUe4jgWi4HWlbI+Cfw14fgngtW2ZgM1QZf/U8D5ZtY/WJHrfOCp4LVaM5sdjM3+RMJ7pUTwc/8ArHfO/SLhpTC1cZCZ9Qse9wDOIz4/4DngyuC0Q9vY2vYrgWeDO2WLgWuC1cbGAhOJT04Pw2e6VVjaEqbPb6hzVPnZaWFpTyg+v6AcPUL7lKOZ355QfH5BOXqE9qVHjroUrlrV+of4alTvEB+//E0fMXQi1geAXUAL8THRnyY+jvoZYBPwT2CAe2/VqjuDdq0BZiW8z78Rn/RbAlyfcHwWsDb4O3cAluL2zSE+FGE18Fbw58KQtXEG8GbQxrXArcHxccQTrAR4GMgPjhcEz0uC18clvNc3g3ZsJGFFtEz6THfg3yuj2qIczew2Kj+P6t8so9qjHM3sNipHj+rfLKPaoxzN7DZmco5a8OYiIiIiIiIiKaHFikRERERERCSlVIiKiIiIiIhISqkQFRERERERkZRSISoiIiIiIiIppUJUREREREREUkqFqIiIiIiIiKSUClERERERERFJKRWiIiIiIiIiklL/DyKDdOK4EJ88AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 1152x216 with 4 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "_, axs = plt.subplots(1, 4, figsize=(16, 3), sharey=True)\n",
    "for col, ax in zip(['dislike', 'like', 'skip', 'view'], axs):\n",
    "    df_train_user_popularity[col].sort_values().reset_index(drop=True).plot(ax=ax, title=col)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_event_scores = {\n",
    "    'dislike': -10,\n",
    "    'skip': -0.0,\n",
    "    'view': 0.0,\n",
    "    'like': 1.0,\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_train_user_popularity += 0.01\n",
    "df_train_user_popularity_score = df_train_user_popularity.mul(df_event_scores)\n",
    "df_train_user_popularity_score = df_train_user_popularity_score.div(df_train_user_popularity.sum(axis=1), axis=0)\n",
    "df_train_user_popularity_score = df_train_user_popularity_score.sum(axis=1).rename('user_score')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x7f38eda94e48>"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXkAAAD4CAYAAAAJmJb0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAVbElEQVR4nO3de5Cd9X3f8fd3r7pLKySQjIQuGEywYwQSF3fsOLbxjaYhTmIP6ZQ4jRNNbJPGaT0Zp8ykSTpuEyduk0ySumpMYjfYGN+CJ8Z1UMd2UmcQFiCu5iIECIFAdySttLezv/5xHplF7K7O6jxH5/cc3q+ZnX3Oc/3+znP2o0e/5znPEyklJEmdqavdBUiSWseQl6QOZshLUgcz5CWpgxnyktTBetpdwERLlixJq1evbncZklQpd999976U0tLJpmUV8qtXr2br1q3tLkOSKiUinp5qmt01ktTBDHlJ6mCGvCR1MENekjqYIS9JHcyQl6QOZshLUgfL6jp5SdKpHRwc4W/vfJrR2vgp5zXkJalivvnAbj59x2MAREw/ryEvSRWz/+gIAI9/8r30dncRfzD1vPbJS1LFvHBkiIE5vfR2nzrCDXlJqpgde4+yZsnchuY15CWpQp5/cYg7dxxg7dJ5Dc1vyEtSRTy1b5DrP7sFgOuvWtXQMp54laQKODg4woc+9wOe2DvIe9+wjEtWLmpoOUNekjK3fc9RfvGzW9h9eIi//qXLedtFZze8rCEvSW329P5BvnL3LkbGxhkbT9RO/KRErZb4xn3PMau3i1t+9SquXHvWjNZtyEtSmwyN1rj9gd381289wt4jw8zq7aI7gu6uEz9ddHfBaxbN4r+878dnHPBgyEvSGXV0eIyv37OL2x94nkeeP8zBY6O87pz53PwrV3LhOfNL354hL0klGhqtcevWZ9h18DhjtURtfJzR8Xq3y2htnO8+tpcDgyOsPmsOl69ezM9edi7vvHgZ3V2nuD/BaTLkJWkGauOJodEaY7XE6Pg4o7VxDgyOsPnhPWx9+gDbdh7iyPAYfd1d9Pd00d0d9HQFPV1ddHcF61Yu4tfeej4bVg3Q1aJgn6jlIR8R7wH+FOgG/iqlNM1dFiSp/VJK7Dp4nOOjNUbGxjk6PMa9Ow/xxN6j3PHwC7x4fHTS5V53znze/mNnc93l5/Gm82fef94KLQ35iOgG/gJ4J7AL+EFEfCOl9HArtytJ00kp8fzhIQaHaxwYHGHLjv3sHxxheGycF4+P8NBzh3l6/7FXLHf2/H42rBpg/eoB+nu66e0Oeru76Ovu4sq1i1kxMKcNrZleq4/krwC2p5R2AETELcC1gCEvqWWOjYzx5L5B9h8dYc+RYXYeOMa+o8Ps3H+MFw4PcWBwhP2DIy9bZl5/D7P7upnd283apXO5/qpVLFs4i/6ebvp6urho2XzOWTCrTS06fa0O+XOBZya83gVc2eJtSupgQ6M1dh44xjMHjvHcoePsfnGIncXw/sERDh0bnbQ7Zcm8Ps5ZMIvzl87j0vN6uHj5AhbP62defzcXL1/IsoXVC/BGtP3Ea0RsBDYCnHfeeaWt9/hIjZu+/ySHp+g7OyE1uL6UTj1nA7M0vM1G1pUarL7RuhpbVwPvQ0PraXB7Daytsfeqwe01NGN57/uZ3s9lvg8N1VXi38Tg8BiPvXCEpw8ce1l93V3BioHZLF84i3UrFzGvv4fXLJrNqrPmsGReP8sXzuKsef3M62973LVFq1v9LLBywusVxbgfSSltAjYBbNiwoZQ4Sinxia/dz23bnqO/p+uUT04JGjvDfar11NfVmGhgZQ2tq8ENNjJbIzXV5zvD22toXWWtqez93Mi6GvgstGE/N7S9RtrXcO3Tz9jTHVy0bAHXrjuXtUvnsnLxHFYsms3iuX30NHBf9VerVof8D4ALImIN9XC/DvjXLd4mf/39p7ht23N8/F0XcsPbL2j15iQpWy0N+ZTSWETcAHyb+iWUN6WUHmrlNu/csZ9P3v5D3nXxOXzkJ1/byk1JUvZa3kmVUroduL3V24H6zfRv+MI9rDprDp/+wCVn5IsGkpSzjunIGq2Nc/1nt3B8pMam69czf1Zvu0uSpLbrmNPNf7L5MR7fc5Q/fv8lvPbs8m/yI0lV1BFH8g8++yKf+d4O/tUlr+Hn169odzmSlI3Kh/yRoVH+3RfvZfHcPn7/p1/f7nIkKSuV76654Qv3smPfIH/7oSsZmNvX7nIkKSuVPpLf9swhvvfYXn7ydUt58wVL2l2OJGWn0iH/n257kAWzevizX7i03aVIUpYqG/K33LWT+3a9yPVvWsUCL5eUpElVMuSHRmt88vYfMqevm49dfWG7y5GkbFXyxOvv//3DHBka47994BJ6vTGRJE2pcgl5cHCEL2zZyUXL5vO+S89tdzmSlLXKhfzNW54G4D//zBtKvWWqJHWiSoX84PAYf/ndJ3jLBUu4fPXidpcjSdmrVMh/9Z5dHBup8YENK089sySpWiH/9/ftBuCNKxa2uRJJqobKhPz4eOKenQf5uctWsOqsue0uR5IqoTIhf+vWZxgbT1yxZqDdpUhSZVQm5O/csR+A96+3P16SGlWJkB8arfHNB3bzlguW+Eg/SZqBSoT8X35nO6O1xC++aXW7S5GkSqlEyD994BgAV//Y2W2uRJKqJfuQTylx27bnuGrtYr/hKkkzlH3I79g3CMCaJfPaXIkkVU/2IX/vzkMA3oxMkk5D9iG/62C9P37dykVtrkSSqifrkE8p8Tf//BRL5vXR15N1qZKUpayTc++RYQ4dG2VgTl+7S5GkSso75I8OA/Dv3+kj/iTpdGQd8t99dC8AZy/ob3MlklRNLQv5iPijiHgkIu6PiK9HxIzPnO4/OgLAZed5UzJJOh2tPJK/A3hDSumNwGPAb890BcdHx1g6v98vQUnSaWpZyKeU/iGlNFa8vBNYMcPl+eJdzzCrN+seJUnK2plK0F8GvjXZhIjYGBFbI2Lr3r17fzT+yeKbrotme2WNJJ2unmYWjojNwLJJJt2YUrqtmOdGYAy4ebJ1pJQ2AZsANmzYkE6Mf/T5IwB8/N2va6ZESXpVayrkU0pXTzc9In4J+CngHSmlNN28JxsaqwGwcmD26ZYnSa96TYX8dCLiPcBvAW9NKR2b6fIHB0cBWDi7t+TKJOnVo5V98n8OzAfuiIhtEfGZmSz8tXt3ATB/liEvSaerZUfyKaXXNrP8rJ5uzprrPWskqRnZJuixkZp3npSkJmUb8kOjNWb3dbe7DEmqtCxDfs+RIXbsG2SOIS9JTcky5L/7SP1LUecv9ZF/ktSMLEP+xeP1yyd/4crz2lyJJFVbliF/ZLh+y5t5fS27+EeSXhWyDPmh0Rqzervo6vLuk5LUjIxD3pOuktSsfEO+x5CXpGZlGvLj3kdekkqQZZLaXSNJ5cgy5B994Qj93rNGkpqWZZLO7u3m8NDYqWeUJE0ry5A/PlrjjSsWtrsMSaq8LEN+cLjGHL8IJUlNyzLkj4+MeXMySSpBdiE/Pp44NlpjriEvSU3LLuRvvmsnKcGcfrtrJKlZ2YX833z/SQDWrxpocyWSVH3ZhfxoLfG+S8/l8tWL212KJFVehiE/Tm+3d5+UpDJkF/IjY+P0dmdXliRVUnZpOlIbp89bGkhSKbJL03p3TXZlSVIlZZem4+PQ7ROhJKkU2YV8LSXMeEkqR3YhP54SXWHKS1IZsgv5lDDkJakkLQ/5iPgPEZEiYkmjyxjyklSOloZ8RKwE3gXsbGT+VPz24hpJKker4/S/A7/FS/k9vWKu8EhekkrRspCPiGuBZ1NK951ivo0RsTUitu7dtw/wEkpJKktT9/ONiM3Askkm3Qj8R+pdNdNKKW0CNgFctn59OgBeQilJJWkq5FNKV082PiJ+HFgD3Fd0vawA7omIK1JKz0+5vuK3J14lqRwteTJHSukB4OwTryPiKWBDSmnf9AvWfxnyklSOrK5jeelIvq1lSFLHOCPP2EsprZ7J/J54laRyZHUk7yWUklSurEJ+PNVT3iN5SSpHZiFf/31spNbeQiSpQ2QV8if6a85dNKvNdUhSZ8gq5F+6d01WZUlSZWWVpkWXPD32yUtSKbIK+RPH8p54laRyZBXyHslLUrnyCvnid5chL0mlyCrkT/BIXpLKkVXI7zk8DNgnL0llySrkj42MAbB2ybw2VyJJnSGrkAf4tbeez8I5ve0uQ5I6QlYhn/Ah3pJUpuwi1W+7SlJ5sktUr6yRpPJkF/JeWSNJ5THkJamDZRfydtdIUnmyC/kuH/0nSaXJLuTtrpGk8mQX8t6cTJLKk1/Im/GSVJrsQl6SVB5DXpI6WHYhH9hfI0llyS7kJUnlyS7kvUxeksqTXchLksrT0pCPiF+PiEci4qGI+FQrtyVJeqWeVq04It4GXAtcklIajoizG1quVQVJ0qtQK4/kPwz8QUppGCCltKeF25IkTaKVIX8h8JaI2BIR34uIyyebKSI2RsTWiNhaf93CiiTpVaap7pqI2Awsm2TSjcW6FwNXAZcDt0bE2pRSmjhjSmkTsAmgf/kF6eQVSZJOX1Mhn1K6eqppEfFh4GtFqN8VEePAEmBvM9uUJDWuld01fwe8DSAiLgT6gH2nWshvvEpSeVp2dQ1wE3BTRDwIjAAfPLmrRpLUWi0L+ZTSCPBvZrygB/KSVBq/8SpJHcyQl6QOll3I21sjSeXJLuQlSeXJLuTDr7xKUmmyC3lJUnkMeUnqYNmFvJ01klSe7EJeklSe7ELe866SVJ7sQl6SVB5DXpI6WHYhb3eNJJUnu5CXJJUnu5D3oSGSVJ7sQl6SVJ7sQt4+eUkqT3YhL0kqjyEvSR3MkJekDmbIS1IHyy7kfWiIJJUnu5CXJJXHkJekDpZdyNtZI0nlyS7kJUnlyS7kPe8qSeXJLuQlSeVpWchHxLqIuDMitkXE1oi4olXbkiRNrpVH8p8Cfi+ltA74neL1KXmrYUkqTytDPgELiuGFwHMt3JYkaRI9LVz3x4BvR8QfU//H5F9MNlNEbAQ2AvQte60nXiWpRE2FfERsBpZNMulG4B3Ab6aUvhoRHwA+C1x98owppU3AJoD+5RekZuqRJL1cUyGfUnpFaJ8QEZ8HfqN4+WXgr5rZliRp5lrZJ/8c8NZi+O3A440sZG+NJJWnlX3yvwr8aUT0AEMU/e6SpDOnZSGfUvp/wPqZLueJV0kqj994laQOZshLUgfLMOTtr5GksmQY8pKksmQX8p54laTyZBfykqTyGPKS1MGyC3l7aySpPNmFvCSpPNmFfHjmVZJKk13IS5LKY8hLUgfLLuTtrJGk8mQX8pKk8mQX8p53laTyZBfykqTyGPKS1MGyC3m7aySpPNmFvCSpPNmFfHgRpSSVJruQlySVJ7+Q90BekkqTX8hLkkpjyEtSB8su5O2tkaTyZBfykqTyZBXyA3P6WLdyUbvLkKSOkVXIrxiYzaI5fe0uQ5I6RlMhHxHvj4iHImI8IjacNO23I2J7RDwaEe9urkxJ0unoaXL5B4GfBf7nxJERcTFwHfB64DXA5oi4MKVUa3J7kqQZaOpIPqX0w5TSo5NMuha4JaU0nFJ6EtgOXNHMtiRJM9eqPvlzgWcmvN5VjHuFiNgYEVsjYuvevXtbVI4kvTqdsrsmIjYDyyaZdGNK6bZmC0gpbQI2AWzYsCE1uz5J0ktOGfIppatPY73PAisnvF5RjJMknUGt6q75BnBdRPRHxBrgAuCuFm1LkjSFZi+hfF9E7ALeBHwzIr4NkFJ6CLgVeBj4P8BHvbJGks68SCmfbvCIOAJMdrVOlSwB9rW7iCZVvQ1Vrx9sQw6qVP+qlNLSySY0e5182R5NKW049Wz5ioittqG9ql4/2IYcVL3+E7K6rYEkqVyGvCR1sNxCflO7CyiBbWi/qtcPtiEHVa8fyOzEqySpXLkdyUuSSmTIS1IHyybkI+I9xb3nt0fEJ9pdz0QR8VREPBAR2yJiazFucUTcERGPF78HivEREX9WtOP+iLhswno+WMz/eER8sMU13xQReyLiwQnjSqs5ItYX78n2YtnSH887RRt+NyKeLfbFtoi4ZsK0SZ9hMNVnKyLWRMSWYvyXIqLUJ9ZExMqI+E5EPFw8d+E3ivGV2Q/TtKES+yEiZkXEXRFxX1H/7023zah/S/9LxfgtEbH6dNuVjZRS23+AbuAJYC3QB9wHXNzuuibU9xSw5KRxnwI+UQx/AvjDYvga4FvUn0l+FbClGL8Y2FH8HiiGB1pY808AlwEPtqJm6repuKpY5lvAe89QG34X+Pgk815cfG76gTXF56l7us8W9W9lX1cMfwb4cMn1LwcuK4bnA48VdVZmP0zThkrsh+J9mVcM9wJbivdr0m0CHwE+UwxfB3zpdNuVy08uR/JXANtTSjtSSiPALdTvSZ+za4HPFcOfA35mwvjPp7o7gUURsRx4N3BHSulASukgcAfwnlYVl1L6R+BAK2oupi1IKd2Z6n8Bn5+wrla3YSpTPcNg0s9WccT7duArxfIT34+y6t+dUrqnGD4C/JD6Lbcrsx+macNUstoPxXt5tHjZW/ykabY5cd98BXhHUeOM2lVW/WXIJeQbvv98myTgHyLi7ojYWIw7J6W0uxh+HjinGJ6qLTm0sayazy2GTx5/ptxQdGfcdKKrg5m34SzgUEpp7KTxLVH8t/9S6keSldwPJ7UBKrIfIqI7IrYBe6j/A/nENNv8UZ3F9BeLGnP+u55WLiGfuzenlC4D3gt8NCJ+YuLE4iiqUteiVrHmwv8AzgfWAbuBT7e3nFOLiHnAV4GPpZQOT5xWlf0wSRsqsx9SSrWU0jrqtzy/AriozSWdUbmEfNb3n08pPVv83gN8nfoH5YXiv8sUv/cUs0/VlhzaWFbNzxbDJ49vuZTSC8Uf7Tjwv3jpsZIzbcN+6t0hPSeNL1VE9FIPx5tTSl8rRldqP0zWhqrth6LmQ8B3qN81d6pt/qjOYvrCosac/66n1+6TAvUDGXqon0xaw0snL17f7rqK2uYC8ycM/zP1vvQ/4uUnzz5VDP9LXn7y7K5i/GLgSeonzgaK4cUtrn01Lz9pWVrNvPKE3zVnqA3LJwz/JvV+Uqg/NH7iibEd1E+KTfnZAr7My0++faTk2oN6P/mfnDS+MvthmjZUYj8AS4FFxfBs4J+An5pqm8BHefmJ11tPt125/LS9gAk74xrqZ+6foP5owbbXVNS1tthx9wEPnaiNej/d/wUeBzZP+KML4C+KdjwAbJiwrl+mfsJmO/BvW1z3F6n/N3qUej/hh8qsGdgAPFgs8+cU354+A23430WN91N/OM3EsLmxqOdRJlxlMtVnq9i3dxVt+zLQX3L9b6beFXM/sK34uaZK+2GaNlRiPwBvBO4t6nwQ+J3ptgnMKl5vL6avPd125fLjbQ0kqYPl0icvSWoBQ16SOpghL0kdzJCXpA5myEtSBzPkJamDGfKS1MH+P4C/EXXchej1AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "df_train_user_popularity_score.sort_values().reset_index(drop=True).plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "# fit items\n",
    "df_train_item_popularity = pd.merge(\n",
    "    df_user_item_train[lambda x: x['event'].isin(['dislike', 'like'])].assign(score=df_user_item_train['event'].map(df_event_scores)),\n",
    "    df_train_user_popularity_score,\n",
    "    left_on='customer_id', right_index=True,\n",
    ").assign(item_score=lambda x: x.score - x.user_score * 0.2) \\\n",
    "    .groupby('story_id')['item_score'].mean().sort_values()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x7f38e9fd04a8>"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXkAAAD4CAYAAAAJmJb0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAbKElEQVR4nO3deZCcd33n8fe3j+menum5NDOakTTSSEg+hC0LW74dwGC85kiAxAuEhTgJFQFhdwmbLRfEVKpSC6kEUkvIbipBBeyRwHIEAgSDjU1Ys9jYRgIZy9ZtHdYx99UzPX3/9o9+ZjTIY13ztPrpns+ramr6Ofrp3280+uin7/N7nsecc4iISH0KVbsBIiJSOQp5EZE6ppAXEaljCnkRkTqmkBcRqWORajdgoc7OTtff31/tZoiI1JRdu3aNOOe6FtsWqJDv7+9n586d1W6GiEhNMbNjL7dN5RoRkTqmkBcRqWMKeRGROqaQFxGpYwp5EZE6ppAXEaljCnkRkTqmkBcRqWFf+9mL59yukBcRqWF//+PD59yukBcRqVGnJmZ5YXjmnPso5EVEatTjh0bOu49CXkSkRj1+aITO5oZz7qOQFxGpMYViib98aB/f2n2K2zd2nnPfQN2FUkREXipbKHJkZIaDg9McHEzx/T0DHBya5tVXdPGB17yCvznHexXyIiI+cc6RLzqyhSLZQolcoUS2UCJbKJ55nS+RKxbJ5ksL9invn8kXmUjnmckVyRaKZPJFjoykOTiYolByAIQMruxp4XPvvYG7N6/EzM7ZJoW8iNSdYskxmy+SzhXI5Eqk8wVmc2eCdjrrLRdLZPPnCOSzQnj+Kz/33rl9zmxfqsZomKZYhFgkRDwaYk17gjuv7OKq3hauWNlM/4om4tHwBR9PIS8iNeH4aJoXx9McG02z+8VxBqeyzOaK8wFefl0k7YX5pQgZxCJhYtEQDeEQsWiIWCS84HWI1sYosWSMWMTbFimvj0VDxMIhYtEwsUjozPqF+yx27PltZ5b9pJAXkcBxzjE8neX4aJpH9g7y2P5h9g2k5rd3NDXQ195IY0OY7mScxmiYxoYwiYbwIq8j869jkRDRSIjmWHldw1zweqEcCdl5yx+1RiEvIlVXKjl+eXKSIyPT7Dw6zoPPnmYinQcgEjJu7O/g42++mmtXt9LTGmdtR6LuwrhSFPIiUlUnJ2Z57xeemr9yMx4N8YbNPVy/to2+9gRb+lrpTsar3MrapZAXkcsiky8yNZtncCrLVCbP9/ec5qE9g4zNZGmMhvn0vVvY2tdGf2cT0bAu4fGLQl5EfFEoljgyMsP+wRQjqSwnJ2ZJZQrM5IocHEz9Sk0dIBo23nhNL/2dTfz6ll42rUxWqeX1TSEvIhekVHJkvItyjozMMDSVZWAqw2Q6z7GxGX5+fOJXZrXEIiHaElEao2H6OhK88ZpeOpob6GqO0ZaIsqGzie4WlWEqTSEvIvOccwxMZRiayjI2k+PAYIr9AymOjs6w59TUS6YmNkRCtDVG6WmN895b1vHKVS1c2ZOkpyVOR1ODTo4GgEJepI7NXcRTKDryxRKzuSKjMzkmZ3NMZ4tMpnPsHUhxaGiaQ0PTTM3m56+snLOyJUb/iibeua2P1e2NdDQ1sGVNKx1NDTohWgMU8iI1JJMvMjaTY2wmx4nxNIeGpsuXwOdLzGQLHBhKMZ0pMJMtMJ7OM5svnveYyXiEK1cmed1V3XQlY6xqjbOqrZHWxigbu5tpS5z7LocSbAp5kYAolRxj6Rwnx2d5YWSakVSOmVyBfadTDE9n2Xd6ipncS0O7IXzmisuN3c30dDeTaIjQnojSlogSj4aJhkNEvf1WNDfQlmigORYmGY/SnYyprFLHFPIil0EmX2RoKsvegSlmsgWmswUODKY4OT7LqYkMpybLM1EWs25FgpXJOPfesIZur9bd0dRAVzLG5t6Wi7qPiSw/CnmRi+Rc+eZX09mCVxopMjmbZyiVYSiVZWgqy1Aqw3Aqy/B0luGpLKnsSwM8GY/Q156gryPBLRs6aEs0kIxHWLeiiXUrEvS0xufvZyJyqRTyIgsUS45UJs+QN8/7xPgshwZTpDIFxtM5jo6mOT6WpnjWycmFEg1hupMxupNxru5t4dWbYnQlY3QnY2zoamZFUwPxaJiVLSqTSOVVPOTN7B7gs0AY+Lxz7i8q/ZkiCznnmM4WmMoUmEznGZnOsvf0FBOzefaenmJ0OsdUJs9EOs9UJo87K7+TsQgtjVFaG6Nc3Zvknmt6aG2M0hSL0BwL0xyL0hKP0JUsh3kyHq1OR0UWUdGQN7Mw8LfAG4ATwM/M7DvOuecr+bmyfDjnODA4zQvD00xl8qQyBSZn80zO5stTAjN5jo+mmVqk3h0y2NDVTF97I+s7m2hPRGlNNNCeiLKiOcbqtjidzTHdDEtqWqVH8jcBh5xzLwCY2VeAtwIKeXlZzjkmZ/OcGJ9l1JsqeHwszdRsnuGUd5XlbDnQU5nCS0onIYOmWIQNnU10Nce4ZlUrG7qaaPVG422JBq7qSWpqoCwLlQ751cCLC5ZPADcv3MHMtgPbAdauXVvh5kg1zU0R3Hc6xc+PjzMynWUinZ8/gZnKFpjOlssmZ880iYaN1sYGOpsb6G2Ns6k7STIeIRmPsLotwda+NloTUZLxCM0NEUIhjbxFIAAnXp1zO4AdANu2bXv5s1kSSJl8keNjaVKZPOlckelMYf7pPXMX7Yync4zN5BlP535l1N3aGKU9ESUZj9IUC7O6rZFkPElzLMK6FQl6Wxvpbomxuq2Rnpa4glvkElQ65E8CfQuW13jrpAZNZwvsOz3FYweGOTk+yxOHRxlMZV5yohKgLRGlqzlGe1MDGzqbuWFdlPZEA93JGOs6m7h5fQeJhqqPMUTqXqX/lv0M2GRm6ymH+7uAd1f4M2WJ5h699typKcamcxwZmeH4WJoHnz1NseQIGfS0xLm6N8k7b+xjQ1cTbYkGEt4j19a0JWhNaIaJSBBUNOSdcwUz+/fAw5SnUH7ROfdcJT9TXl4mX2QinWf/YIpnT0wwkc4zMVuugU/O5phI5xmbyTF51k2qQgZtiQbetnU1d13dze2bOmnRNEGRmlDx/y87574HfK/SnyNnDE1lePLIGIOTGY6OzvD0kTEGJjMvueoy0RCen23S1hjlFV3N3Li+/LotEeW6NW20NzXwiq5mwqqHi9QkFUVr2HAqy9HRGX5ycIS9p6d49uQkE2fdeTASMu7Y1Mltr1hBV7JcI1+ZjHPbxhWqiYssA/pbXiOOj6Z57MAQR0fTHBud4dhomoND0/Pb+1ckuHl9x/wVl3de2c3ajgQtjRFdyCOyjCnkA6hQLPHCyAyP7R/mR/uHODw8zeBUFoDGaJh1K8o3tXrr1lX0dzbxa5u6aG1UjVxEXkohX2VTmTwHB1PsPDrOUCrLTw+Pcmh4ev4xa5t7W7h9YyebupO8+dpe+joaNTIXkQumkK+CIyMz7B9I8a1fnOSh5wbm1zeEQ1zX18p9t67j6t4WtqxpZWO3nmAvIpdOIX+ZDKUyPLRngL2nU3xt54sUS45YJMT7X72Bm9Z3sGVNG13JWLWbKSJ1RiFfYb88McGHv7KbIyMzALTEI9y+sZP/fPcVbOhqpjmmPwIRqRwljA8m0jl+fHCE8ZkcmXyRnx0d44XhGSZn84zO5FjVGufjb76am9ev4No1rdVurogsIwr5S+Sc4+u7TvCD5wb4yaERMvnS/LaOpgZu3bCClsYoa9ob+Y3rVtHXkahia0VkuVLIX6Rdx8Z55PlBHjswzN7TU6ztSPD2V63mnTeuZW1HglgkRDwa1hWiIhIICvkLdGgoxT/tOsnfP3aYSMi4fl07n3z7Nbz7prWa0igigaWQP4+dR8d47MAwn/vxC+QKJd62dRWfePu1OmEqIjVBSfUyBqcyfOSru3ni8ChmsGVNG5/6rS1c2aN56yJSOxTyizgwmOKD/7iLgckM999zJffd2k+TRu4iUoOUXGfZeXSM+774NJFwiC/87o3csmFFtZskInLJFPIL/NOuE/zJN59lTXsjX/6DW+hpjVe7SSIiS6KQpzzn/fhYmk8/vI+N3c38r9+/SbcYEJG6sOxD/sWxNO/5wlMcG01jBp++9zoFvIjUjWUX8pl8kX955hSHh2d4/vQUO4+OEQ2H+MTbruHm9R1sWqnZMyJSP5ZdyP/jk8f4xIN7CRmsW9HEm6/t5f2v2aBb+opIXVpWIf/t3Sf5xIN7uaonybc+dDvxaLjaTRIRqahQtRtwuZRKjs88coBYJMSf/+a1CngRWRaWzUj+e3tOc3Q0zWfftZXr17ZXuzkiIpdF3Yd8Olfgu8+c5s+/Xy7TvGXLqmo3SUTksqnbkP/R/iH++pEDHByaJp0rclVPks+99wbdAlhElpW6DPnJdJ4P/MMuVrc38o5tfbzp2l5u7G/XLYFFZNmpWMib2aeBXwdywGHg95xzE5X6vIUePzxCtlDiU7+1hW39HZfjI0VEAqmSs2seAa5xzm0BDgAfq+Bn/Yrv7D5FMh5ha1/b5fpIEZFAqthI3jn3gwWLTwL3VuqzALKFIu//h108d2qK4VSW//j6TUTCy2aGqIjIoi5XTf73ga8utsHMtgPbAdauXXvJH/DE4VH+7/5h7t68kts3dvLumy/9WCIi9WJJIW9mjwI9i2x6wDn3bW+fB4AC8KXFjuGc2wHsANi2bZu7lHZk8kV2PPYCTQ1h/tu7X0UsogudRERgiSHvnLvrXNvN7HeBtwCvd85dUoBfiE89tJ+fvjDK/fdcqYAXEVmgkrNr7gHuB17jnEtX4jNOTszy+f/3Av/j8aP8zq3r+MPXbqzEx4iI1KxK1uT/OxADHvHmpz/pnPuAXwffdWycd37upzjgHdvW8PE3b/br0CIidaOSs2sqOqz+l2dOEQkbj/6n17CmPVHJjxIRqVk1O8fwicMj3NjfoYAXETmHmgz5VCbPgcFpbtLVrCIi51STIb9/IAXA5lUtVW6JiEiw1WTI7/VC/upehbyIyLnUZMg/cWiEjqYGelvj1W6KiEig1VzIf/Qbv+T7ewZ4x7Y+3TpYROQ8airkj43O8NWdL3L92jb+8M5XVLs5IiKBV1Mh/1+++zwN4RB/954baIlHq90cEZHAq5mQ3/3iBI/uHeIjb7iClS2qxYuIXIiaCfkvP3WMpoYw77llXbWbIiJSM2oi5AvFEj94fpC7X9lDc6wuH0srIlIRNRHyz5yYYCKd5+7NK6vdFBGRmlITIb9/YBqAa1a3VrklIiK1pSZC/tDQNI3RMKvbGqvdFBGRmlIbIT88zYauJkIhXfwkInIxAh/ypZLj+VOTXLkyWe2miIjUnMCH/L6BFCPTOW7b2FntpoiI1JzAh/wTh0cAuEMhLyJy0QIf8vsGUnQ2x+jRHSdFRC5a4EP+4GCKK1Y2V7sZIiI1KdAhXyo5Dg5Nc4VOuoqIXJJAh/yXnjpGOlfk5vV6lquIyKUIdMg/sneIK1Y2c881PdVuiohITQp0yA+nsqztSOgJUCIilyjwId+VjFW7GSIiNaviIW9mf2xmzswuaqJ7oVhidCZLV1JTJ0VELlVFQ97M+oC7geMX+96xmRzOoZG8iMgSVHok/xngfsBd7BuHUlkAuhXyIiKXrGIhb2ZvBU465545z37bzWynme0cHh6eX390dAaAXl3pKiJyyZb0LD0zexRYbH7jA8CfUC7VnJNzbgewA2Dbtm3zI/4fHxgmGY+wubdlKU0UEVnWlhTyzrm7FltvZtcC64FnvOmPa4Cfm9lNzrmBCzn244dGuWNjJ5FwoCcAiYgEWkUS1Dn3rHOu2znX75zrB04A119owOcKJU5NzrJJtzMQEVmSQA6TByYzOAdr9Lg/EZElWVK55kJ5o/kLdmIiDcDqdoW8iMhSBHIkf3J8FkAP7hYRWaJAhvyh4WlCBr1tmj4pIrIUgQv5w8PTfPnJ49yxqYtYJFzt5oiI1LTAhfwnH9xLKlvgA6/eUO2miIjUvMCFfDpX4IZ17dymB3eLiCxZ4EK+UHTEIoFrlohITQpcmuaLJaK6ylVExBeBS9N80REN60lQIiJ+CFzIF0oayYuI+CVwaZovOt2UTETEJ4FL03yxRDSkco2IiB8CF/KFolO5RkTEJ4FL03yxREQnXkVEfBHIkNdIXkTEH4FLU02hFBHxT+BCvlAqaXaNiIhPApem+aLT7BoREZ8EKuSd9101eRERfwQqTZ2X8irXiIj4I1Bp6ryxvE68ioj4I1gh743kVa4REfFHoNL0TLlGI3kRET8EK+TnyzWBapaISM0KVJqeKddoJC8i4odAhfzcHMpIKFjNEhGpVYFKU5VrRET8VdE0NbP/YGb7zOw5M/vU+fZXuUZExF+RSh3YzO4E3gpc55zLmln3+d4zN5LXxVAiIv6oZJp+EPgL51wWwDk3dL43aCQvIuKvSob8FcCvmdlTZvaYmd242E5mtt3MdprZzomJSUA1eRERvyypXGNmjwI9i2x6wDt2B3ALcCPwNTPb4NzceL3MObcD2AFw5TVbXRaFvIiIX5YU8s65u15um5l9EPimF+pPm1kJ6ASGz3G8cqN0q2EREV9Ucsj8LeBOADO7AmgARs71Bt1qWETEXxWbXQN8Efiime0BcsB9Z5dqzja3URkvIuKPioW8cy4HvOfi3lT+FjKVa0RE/BCwMXM55RXyIiL+CFTInynXKORFRPwQqJCfo4G8iIg/AhXyTjV5ERFfBSrk56hcIyLij0CF/FxNXgN5ERF/BCrk54SV8iIivghWyDtNoRQR8VOgQn6uXKOQFxHxR6BCfo4e8Soi4o9AxammUIqI+CtQIT9HUyhFRPwRqJDXFEoREX8FKuTnqFwjIuKPYIW8N5TXPHkREX8EKuSdl/LKeBERfwQq5AFCBqaUFxHxRaBC3qF6vIiInwIV8jgIafqkiIhvghXylMs1IiLij0CFvMo1IiL+ClTIg6ZPioj4KVAh75zT9EkRER8FKuRB960REfFToEJeNXkREX8FKuRBF0KJiPipYiFvZlvN7Ekz221mO83spvO+yUE4cP/siIjUrkpG6qeAP3PObQX+1Fs+J5VrRET8VcmQd0CL97oVOHUhb1LIi4j4J1LBY/8R8LCZ/RXlf0xuW2wnM9sObAdoXbVBz3cVEfHRkkLezB4FehbZ9ADweuAjzrlvmNk7gC8Ad529o3NuB7ADYOWGzU4XQ4mI+GdJIe+ce0lozzGz/w182Fv8OvD58x4PlWtERPxUyeLIKeA13uvXAQcv5E3KeBER/1SyJv8HwGfNLAJk8Oru56MrXkVE/FOxkHfO/QS44eLeo3KNiIifAjeXRSEvIuKfQIW8w2kKpYiIj4IVqSrXiIj4KlAhrymUIiL+ClTIg57xKiLip8CFvKZQioj4J1Ah75zuJy8i4qdAhTyoXCMi4qdAhbzDqVwjIuKjQIU8aHaNiIifAhXyuq2BiIi/AhXyoJq8iIifAhXyDk2hFBHxU6BCHjSFUkTET8EKeadyjYiInwIV8ppCKSLir0CFPKhcIyLip0CFvAPCCnkREd8EKuRVkxcR8VegQl73kxcR8VegQh4gpKG8iIhvghfyyngREd8EKuSd0xRKERE/BSrkQVMoRUT8FKiQ1xRKERF/BSrkQTV5ERE/LSnkzezfmtlzZlYys21nbfuYmR0ys/1m9m8u5Hh6xquIiL8iS3z/HuA3gc8tXGlmm4F3Aa8EVgGPmtkVzrniuQ5Wco7VbY1LbJKIiMxZ0kjeObfXObd/kU1vBb7inMs6544Ah4Cbzne8tkSU37u9fylNEhGRBSpVk18NvLhg+YS37iXMbLuZ7TSznfHCDJFw4E4TiIjUrPOWa8zsUaBnkU0POOe+vdQGOOd2ADsAtm3b5pZ6PBEROeO8Ie+cu+sSjnsS6FuwvMZbJyIil1GlaiPfAd5lZjEzWw9sAp6u0GeJiMjLWOoUyreb2QngVuBBM3sYwDn3HPA14HngIeBD55tZIyIi/lvSFErn3D8D//wy2z4JfHIpxxcRkaXRVBYRkTqmkBcRqWMKeRGROmbOBWdqupmlgMWuoK1nncBItRtxmanPy4P6fPmsc851LbZhqfeu8dt+59y28+9WP8xsp/pc/9Tn5SGIfVa5RkSkjinkRUTqWNBCfke1G1AF6vPyoD4vD4Hrc6BOvIqIiL+CNpIXEREfKeRFROpYYELezO7xngd7yMw+Wu32+MXMvmhmQ2a2Z8G6DjN7xMwOet/bvfVmZn/j/Qx+aWbXV6/ll8bM+szsR2b2vPf83w976+u2zwBmFjezp83sGa/ff+atX29mT3n9+6qZNXjrY97yIW97fzXbf6nMLGxmvzCz73rL9d7fo2b2rJntNrOd3rpA/24HIuTNLAz8LfBGYDPw295zYuvB/wTuOWvdR4EfOuc2AT/0lqHc/03e13bg7y5TG/1UAP7YObcZuAX4kPdnWc99BsgCr3POXQdsBe4xs1uAvwQ+45zbCIwD7/P2fx8w7q3/jLdfLfowsHfBcr33F+BO59zWBfPhg/277Zyr+hflWxU/vGD5Y8DHqt0uH/vXD+xZsLwf6PVe91K+CAzKD0T/7cX2q9Uv4NvAG5ZZnxPAz4GbKV/9GPHWz/+eAw8Dt3qvI95+Vu22X2Q/11AOtdcB3wWsnvvrtf0o0HnWukD/bgdiJM9FPBO2Tqx0zp32Xg8AK73XdfVz8P5L/irgKZZBn73SxW5gCHgEOAxMOOcK3i4L+zbfb2/7JLDi8rZ4yf4auB8oecsrqO/+AjjgB2a2y8y2e+sC/bsdtNsaLDvOOWdmdTeP1cyagW8Af+ScmzKz+W312mdXfjDOVjNro/ychauq3KSKMbO3AEPOuV1m9tpqt+cyusM5d9LMuoFHzGzfwo1B/N0Oykh+uT0TdtDMegG870Pe+rr4OZhZlHLAf8k5901vdV33eSHn3ATwI8rlijYzmxtMLezbfL+97a3A6GVu6lLcDvyGmR0FvkK5ZPNZ6re/ADjnTnrfhyj/Q34TAf/dDkrI/wzY5J2ZbwDeRfk5sfXqO8B93uv7KNet59b/jndW/hZgcsF/A2uClYfsXwD2Ouf+64JNddtnADPr8kbwmFkj5fMQeymH/b3ebmf3e+7ncS/wr84r3NYC59zHnHNrnHP9lP++/qtz7t9Rp/0FMLMmM0vOvQbuBvYQ9N/tap/IWHBS4k3AAcp1zAeq3R4f+/V/gNNAnnJN7n2Ua5E/BA4CjwId3r5GeZbRYeBZYFu1238J/b2Dct3yl8Bu7+tN9dxnrx9bgF94/d4D/Km3fgPlh9gfAr4OxLz1cW/5kLd9Q7X7sIS+vxb4br331+vbM97Xc3M5FfTfbd3WQESkjgWlXCMiIhWgkBcRqWMKeRGROqaQFxGpYwp5EZE6ppAXEaljCnkRkTr2/wFiLT07r/1VgQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "df_train_item_popularity.reset_index(drop=True).plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "# predict\n",
    "df_train_predict = pd.merge(\n",
    "    pd.merge(df_user_item_train, df_train_item_popularity,\n",
    "                           how='left', left_on='story_id', right_index=True).fillna(0.0),\n",
    "    df_train_user_popularity_score,\n",
    "    how='left', left_on='customer_id', right_index=True\n",
    ").fillna(0.0).assign(score=lambda x: x.item_score + x.user_score * 0.2)\n",
    "\n",
    "\n",
    "df_test_predict = pd.merge(\n",
    "    pd.merge(df_user_item_test, df_train_item_popularity,\n",
    "                           how='left', left_on='story_id', right_index=True).fillna(0.0),\n",
    "    df_train_user_popularity_score,\n",
    "    how='left', left_on='customer_id', right_index=True\n",
    ").fillna(0.0).assign(score=lambda x: x.item_score + x.user_score * 0.2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>customer_id</th>\n",
       "      <th>story_id</th>\n",
       "      <th>event_dttm</th>\n",
       "      <th>event</th>\n",
       "      <th>item_score</th>\n",
       "      <th>user_score</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>415192</th>\n",
       "      <td>843538</td>\n",
       "      <td>522</td>\n",
       "      <td>2018-03-29 11:30:44</td>\n",
       "      <td>skip</td>\n",
       "      <td>0.673315</td>\n",
       "      <td>-0.086538</td>\n",
       "      <td>0.656007</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>415188</th>\n",
       "      <td>843538</td>\n",
       "      <td>247</td>\n",
       "      <td>2018-03-29 11:30:44</td>\n",
       "      <td>view</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-0.086538</td>\n",
       "      <td>-0.017308</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>376367</th>\n",
       "      <td>749436</td>\n",
       "      <td>498</td>\n",
       "      <td>2018-03-31 04:47:12</td>\n",
       "      <td>skip</td>\n",
       "      <td>0.421199</td>\n",
       "      <td>-0.086538</td>\n",
       "      <td>0.403891</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>415190</th>\n",
       "      <td>843538</td>\n",
       "      <td>428</td>\n",
       "      <td>2018-04-03 05:13:38</td>\n",
       "      <td>skip</td>\n",
       "      <td>0.231920</td>\n",
       "      <td>-0.086538</td>\n",
       "      <td>0.214612</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>415189</th>\n",
       "      <td>843538</td>\n",
       "      <td>419</td>\n",
       "      <td>2018-04-03 05:13:38</td>\n",
       "      <td>view</td>\n",
       "      <td>0.065511</td>\n",
       "      <td>-0.086538</td>\n",
       "      <td>0.048203</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>64049</th>\n",
       "      <td>132061</td>\n",
       "      <td>314</td>\n",
       "      <td>2018-07-15 11:49:52</td>\n",
       "      <td>dislike</td>\n",
       "      <td>-3.246814</td>\n",
       "      <td>-2.009615</td>\n",
       "      <td>-3.648737</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>205344</th>\n",
       "      <td>387489</td>\n",
       "      <td>745</td>\n",
       "      <td>2018-07-15 11:52:38</td>\n",
       "      <td>view</td>\n",
       "      <td>-0.014721</td>\n",
       "      <td>-0.086538</td>\n",
       "      <td>-0.032028</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>205345</th>\n",
       "      <td>387489</td>\n",
       "      <td>997</td>\n",
       "      <td>2018-07-15 11:52:38</td>\n",
       "      <td>view</td>\n",
       "      <td>-0.940392</td>\n",
       "      <td>-0.086538</td>\n",
       "      <td>-0.957700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>219993</th>\n",
       "      <td>414840</td>\n",
       "      <td>1000083</td>\n",
       "      <td>2018-07-15 11:53:04</td>\n",
       "      <td>view</td>\n",
       "      <td>1.060707</td>\n",
       "      <td>-0.086538</td>\n",
       "      <td>1.043399</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>219955</th>\n",
       "      <td>414840</td>\n",
       "      <td>1295</td>\n",
       "      <td>2018-07-15 11:53:04</td>\n",
       "      <td>skip</td>\n",
       "      <td>-0.828836</td>\n",
       "      <td>-0.086538</td>\n",
       "      <td>-0.846144</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>354855 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        customer_id  story_id          event_dttm    event  item_score  \\\n",
       "415192       843538       522 2018-03-29 11:30:44     skip    0.673315   \n",
       "415188       843538       247 2018-03-29 11:30:44     view    0.000000   \n",
       "376367       749436       498 2018-03-31 04:47:12     skip    0.421199   \n",
       "415190       843538       428 2018-04-03 05:13:38     skip    0.231920   \n",
       "415189       843538       419 2018-04-03 05:13:38     view    0.065511   \n",
       "...             ...       ...                 ...      ...         ...   \n",
       "64049        132061       314 2018-07-15 11:49:52  dislike   -3.246814   \n",
       "205344       387489       745 2018-07-15 11:52:38     view   -0.014721   \n",
       "205345       387489       997 2018-07-15 11:52:38     view   -0.940392   \n",
       "219993       414840   1000083 2018-07-15 11:53:04     view    1.060707   \n",
       "219955       414840      1295 2018-07-15 11:53:04     skip   -0.828836   \n",
       "\n",
       "        user_score     score  \n",
       "415192   -0.086538  0.656007  \n",
       "415188   -0.086538 -0.017308  \n",
       "376367   -0.086538  0.403891  \n",
       "415190   -0.086538  0.214612  \n",
       "415189   -0.086538  0.048203  \n",
       "...            ...       ...  \n",
       "64049    -2.009615 -3.648737  \n",
       "205344   -0.086538 -0.032028  \n",
       "205345   -0.086538 -0.957700  \n",
       "219993   -0.086538  1.043399  \n",
       "219955   -0.086538 -0.846144  \n",
       "\n",
       "[354855 rows x 7 columns]"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train_predict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "def rocauc_by_cust(df):\n",
    "    df = df.copy()\n",
    "    df = df[df['event'].isin(['dislike', 'like'])]\n",
    "    df['event'] = df['event'].map({'dislike': 0, 'like': 1})\n",
    "\n",
    "    res = df.groupby('customer_id').apply(lambda x: roc_auc_score(x['event'], x['score'])\n",
    "                                          if x['event'].nunique() > 1 else np.NaN)\n",
    "    return res    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.7140948563228902"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rocauc_by_cust(df_train_predict).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.7217124146908913"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rocauc_by_cust(df_test_predict).mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "0.7204284251205677"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.7206346775503243"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rocauc_by_cust(df_test_predict[lambda x: x.customer_id.isin(df_train_predict.customer_id)]).mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "0.7192049210481039"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.7312056737588652"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rocauc_by_cust(df_test_predict[lambda x: ~x.customer_id.isin(df_train_predict.customer_id)]).mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "0.7312056737588652"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# df_embeddings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 298,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>v000</th>\n",
       "      <th>v001</th>\n",
       "      <th>v002</th>\n",
       "      <th>v003</th>\n",
       "      <th>v004</th>\n",
       "      <th>v005</th>\n",
       "      <th>v006</th>\n",
       "      <th>v007</th>\n",
       "      <th>v008</th>\n",
       "      <th>v009</th>\n",
       "      <th>...</th>\n",
       "      <th>v054</th>\n",
       "      <th>v055</th>\n",
       "      <th>v056</th>\n",
       "      <th>v057</th>\n",
       "      <th>v058</th>\n",
       "      <th>v059</th>\n",
       "      <th>v060</th>\n",
       "      <th>v061</th>\n",
       "      <th>v062</th>\n",
       "      <th>v063</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>customer_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>0.045986</td>\n",
       "      <td>0.073066</td>\n",
       "      <td>-0.102657</td>\n",
       "      <td>-0.070000</td>\n",
       "      <td>-0.226388</td>\n",
       "      <td>0.021740</td>\n",
       "      <td>0.095810</td>\n",
       "      <td>0.140051</td>\n",
       "      <td>0.131922</td>\n",
       "      <td>-0.026116</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.005688</td>\n",
       "      <td>0.162406</td>\n",
       "      <td>-0.067965</td>\n",
       "      <td>-0.040175</td>\n",
       "      <td>0.164212</td>\n",
       "      <td>-0.099997</td>\n",
       "      <td>0.077077</td>\n",
       "      <td>0.011274</td>\n",
       "      <td>-0.008279</td>\n",
       "      <td>-0.003820</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>73</th>\n",
       "      <td>0.154809</td>\n",
       "      <td>0.037429</td>\n",
       "      <td>-0.177134</td>\n",
       "      <td>-0.045597</td>\n",
       "      <td>-0.121371</td>\n",
       "      <td>-0.060306</td>\n",
       "      <td>0.174855</td>\n",
       "      <td>0.182637</td>\n",
       "      <td>0.093187</td>\n",
       "      <td>-0.010200</td>\n",
       "      <td>...</td>\n",
       "      <td>0.097886</td>\n",
       "      <td>0.177703</td>\n",
       "      <td>0.123618</td>\n",
       "      <td>-0.145646</td>\n",
       "      <td>0.152623</td>\n",
       "      <td>-0.057651</td>\n",
       "      <td>0.164589</td>\n",
       "      <td>-0.121165</td>\n",
       "      <td>-0.115062</td>\n",
       "      <td>-0.051746</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>91</th>\n",
       "      <td>0.014153</td>\n",
       "      <td>-0.048872</td>\n",
       "      <td>-0.187590</td>\n",
       "      <td>0.035219</td>\n",
       "      <td>-0.189449</td>\n",
       "      <td>0.017896</td>\n",
       "      <td>0.180154</td>\n",
       "      <td>-0.005428</td>\n",
       "      <td>0.079310</td>\n",
       "      <td>0.065733</td>\n",
       "      <td>...</td>\n",
       "      <td>0.105551</td>\n",
       "      <td>0.161654</td>\n",
       "      <td>-0.068084</td>\n",
       "      <td>-0.094257</td>\n",
       "      <td>0.150516</td>\n",
       "      <td>-0.059367</td>\n",
       "      <td>-0.019642</td>\n",
       "      <td>-0.059673</td>\n",
       "      <td>-0.019760</td>\n",
       "      <td>0.078213</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>144</th>\n",
       "      <td>0.003958</td>\n",
       "      <td>0.175994</td>\n",
       "      <td>-0.069661</td>\n",
       "      <td>0.079811</td>\n",
       "      <td>-0.202884</td>\n",
       "      <td>-0.170080</td>\n",
       "      <td>0.166775</td>\n",
       "      <td>0.042914</td>\n",
       "      <td>0.159432</td>\n",
       "      <td>-0.060284</td>\n",
       "      <td>...</td>\n",
       "      <td>0.018247</td>\n",
       "      <td>0.256953</td>\n",
       "      <td>-0.035306</td>\n",
       "      <td>-0.055328</td>\n",
       "      <td>0.184141</td>\n",
       "      <td>-0.125160</td>\n",
       "      <td>-0.011663</td>\n",
       "      <td>-0.024973</td>\n",
       "      <td>-0.033970</td>\n",
       "      <td>0.025868</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150</th>\n",
       "      <td>-0.117107</td>\n",
       "      <td>0.131219</td>\n",
       "      <td>-0.116869</td>\n",
       "      <td>0.079403</td>\n",
       "      <td>-0.202366</td>\n",
       "      <td>0.062064</td>\n",
       "      <td>0.164296</td>\n",
       "      <td>0.051221</td>\n",
       "      <td>0.113916</td>\n",
       "      <td>0.027527</td>\n",
       "      <td>...</td>\n",
       "      <td>0.004127</td>\n",
       "      <td>0.177279</td>\n",
       "      <td>0.014191</td>\n",
       "      <td>-0.062098</td>\n",
       "      <td>0.028283</td>\n",
       "      <td>-0.136526</td>\n",
       "      <td>-0.015416</td>\n",
       "      <td>0.072359</td>\n",
       "      <td>-0.042742</td>\n",
       "      <td>0.024230</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 64 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                 v000      v001      v002      v003      v004      v005  \\\n",
       "customer_id                                                               \n",
       "15           0.045986  0.073066 -0.102657 -0.070000 -0.226388  0.021740   \n",
       "73           0.154809  0.037429 -0.177134 -0.045597 -0.121371 -0.060306   \n",
       "91           0.014153 -0.048872 -0.187590  0.035219 -0.189449  0.017896   \n",
       "144          0.003958  0.175994 -0.069661  0.079811 -0.202884 -0.170080   \n",
       "150         -0.117107  0.131219 -0.116869  0.079403 -0.202366  0.062064   \n",
       "\n",
       "                 v006      v007      v008      v009  ...      v054      v055  \\\n",
       "customer_id                                          ...                       \n",
       "15           0.095810  0.140051  0.131922 -0.026116  ... -0.005688  0.162406   \n",
       "73           0.174855  0.182637  0.093187 -0.010200  ...  0.097886  0.177703   \n",
       "91           0.180154 -0.005428  0.079310  0.065733  ...  0.105551  0.161654   \n",
       "144          0.166775  0.042914  0.159432 -0.060284  ...  0.018247  0.256953   \n",
       "150          0.164296  0.051221  0.113916  0.027527  ...  0.004127  0.177279   \n",
       "\n",
       "                 v056      v057      v058      v059      v060      v061  \\\n",
       "customer_id                                                               \n",
       "15          -0.067965 -0.040175  0.164212 -0.099997  0.077077  0.011274   \n",
       "73           0.123618 -0.145646  0.152623 -0.057651  0.164589 -0.121165   \n",
       "91          -0.068084 -0.094257  0.150516 -0.059367 -0.019642 -0.059673   \n",
       "144         -0.035306 -0.055328  0.184141 -0.125160 -0.011663 -0.024973   \n",
       "150          0.014191 -0.062098  0.028283 -0.136526 -0.015416  0.072359   \n",
       "\n",
       "                 v062      v063  \n",
       "customer_id                      \n",
       "15          -0.008279 -0.003820  \n",
       "73          -0.115062 -0.051746  \n",
       "91          -0.019760  0.078213  \n",
       "144         -0.033970  0.025868  \n",
       "150         -0.042742  0.024230  \n",
       "\n",
       "[5 rows x 64 columns]"
      ]
     },
     "execution_count": 298,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_embeddings = pd.read_pickle('/data/kireev-ia/data_open_ds/tinkoff_all_vectors.pickle').set_index('customer_id')\n",
    "df_embeddings.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Feature approach"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>customer_id</th>\n",
       "      <th>story_id</th>\n",
       "      <th>event_dttm</th>\n",
       "      <th>event</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>298085</th>\n",
       "      <td>588284</td>\n",
       "      <td>950</td>\n",
       "      <td>2018-05-22 18:52:24</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>343896</th>\n",
       "      <td>679459</td>\n",
       "      <td>592</td>\n",
       "      <td>2018-05-22 20:58:07</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>175247</th>\n",
       "      <td>325281</td>\n",
       "      <td>317</td>\n",
       "      <td>2018-05-22 21:13:42</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>175249</th>\n",
       "      <td>325281</td>\n",
       "      <td>352</td>\n",
       "      <td>2018-05-22 21:13:42</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>266386</th>\n",
       "      <td>519549</td>\n",
       "      <td>956</td>\n",
       "      <td>2018-05-22 21:16:29</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>67985</th>\n",
       "      <td>137202</td>\n",
       "      <td>1314</td>\n",
       "      <td>2018-07-15 11:45:57</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>67974</th>\n",
       "      <td>137202</td>\n",
       "      <td>956</td>\n",
       "      <td>2018-07-15 11:45:57</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>67970</th>\n",
       "      <td>137202</td>\n",
       "      <td>713</td>\n",
       "      <td>2018-07-15 11:45:57</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>67984</th>\n",
       "      <td>137202</td>\n",
       "      <td>1295</td>\n",
       "      <td>2018-07-15 11:45:57</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>64049</th>\n",
       "      <td>132061</td>\n",
       "      <td>314</td>\n",
       "      <td>2018-07-15 11:49:52</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>52175 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        customer_id  story_id          event_dttm  event\n",
       "298085       588284       950 2018-05-22 18:52:24    1.0\n",
       "343896       679459       592 2018-05-22 20:58:07    0.0\n",
       "175247       325281       317 2018-05-22 21:13:42    1.0\n",
       "175249       325281       352 2018-05-22 21:13:42    1.0\n",
       "266386       519549       956 2018-05-22 21:16:29    1.0\n",
       "...             ...       ...                 ...    ...\n",
       "67985        137202      1314 2018-07-15 11:45:57    1.0\n",
       "67974        137202       956 2018-07-15 11:45:57    1.0\n",
       "67970        137202       713 2018-07-15 11:45:57    0.0\n",
       "67984        137202      1295 2018-07-15 11:45:57    1.0\n",
       "64049        132061       314 2018-07-15 11:49:52    0.0\n",
       "\n",
       "[52175 rows x 4 columns]"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_f_train = df_user_item_train \\\n",
    "    .assign(event=lambda x: x.event.map({'dislike': 0, 'like': 1})) \\\n",
    "    [lambda x: x.event.isin([0, 1])]\n",
    "df_f_train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[379,\n",
       " 644,\n",
       " 956,\n",
       " 1057,\n",
       " 1008,\n",
       " 419,\n",
       " 967,\n",
       " 449,\n",
       " 968,\n",
       " 950,\n",
       " 592,\n",
       " 1279,\n",
       " 1005,\n",
       " 980,\n",
       " 708,\n",
       " 1018,\n",
       " 885,\n",
       " 997,\n",
       " 1031,\n",
       " 1079,\n",
       " 959,\n",
       " 1276,\n",
       " 745,\n",
       " 1269,\n",
       " 420,\n",
       " 354,\n",
       " 1294,\n",
       " 1120,\n",
       " 1056,\n",
       " 1100,\n",
       " 409,\n",
       " 1023,\n",
       " 1261,\n",
       " 1145,\n",
       " 912,\n",
       " 448,\n",
       " 627,\n",
       " 1182,\n",
       " 1264,\n",
       " 1093,\n",
       " 978,\n",
       " 1020,\n",
       " 1039,\n",
       " 1046,\n",
       " 975,\n",
       " 1160,\n",
       " 1152,\n",
       " 544,\n",
       " 954,\n",
       " 1000,\n",
       " 1092,\n",
       " 1214,\n",
       " 1101,\n",
       " 1040,\n",
       " 877,\n",
       " 1243,\n",
       " 1272,\n",
       " 875,\n",
       " 999,\n",
       " 992,\n",
       " 537,\n",
       " 1141,\n",
       " 1095,\n",
       " 949,\n",
       " 1058,\n",
       " 1074,\n",
       " 794,\n",
       " 1029,\n",
       " 1184,\n",
       " 1087,\n",
       " 538,\n",
       " 823,\n",
       " 314,\n",
       " 543,\n",
       " 977,\n",
       " 1180,\n",
       " 1013,\n",
       " 784,\n",
       " 586,\n",
       " 893,\n",
       " 1223,\n",
       " 610,\n",
       " 1037,\n",
       " 1310,\n",
       " 293,\n",
       " 819,\n",
       " 1311,\n",
       " 382,\n",
       " 870,\n",
       " 1102,\n",
       " 789,\n",
       " 1157,\n",
       " 734,\n",
       " 480,\n",
       " 582,\n",
       " 1247,\n",
       " 1289,\n",
       " 1003,\n",
       " 1147,\n",
       " 938,\n",
       " 1025,\n",
       " 958,\n",
       " 607,\n",
       " 1181,\n",
       " 820,\n",
       " 1173,\n",
       " 1170,\n",
       " 1002,\n",
       " 1217,\n",
       " 516,\n",
       " 1009,\n",
       " 800,\n",
       " 1049,\n",
       " 988,\n",
       " 990,\n",
       " 534,\n",
       " 924,\n",
       " 1146,\n",
       " 312,\n",
       " 580,\n",
       " 1004,\n",
       " 961,\n",
       " 693,\n",
       " 692,\n",
       " 985,\n",
       " 1237,\n",
       " 1071,\n",
       " 570,\n",
       " 1015,\n",
       " 1042,\n",
       " 1293,\n",
       " 1045,\n",
       " 1043,\n",
       " 770,\n",
       " 1314,\n",
       " 454,\n",
       " 1026,\n",
       " 897,\n",
       " 919,\n",
       " 933,\n",
       " 650,\n",
       " 499,\n",
       " 498,\n",
       " 1024,\n",
       " 995,\n",
       " 973,\n",
       " 874,\n",
       " 779,\n",
       " 470,\n",
       " 1240,\n",
       " 1221,\n",
       " 517,\n",
       " 620,\n",
       " 812,\n",
       " 771,\n",
       " 916,\n",
       " 1194,\n",
       " 1033,\n",
       " 1198,\n",
       " 1070,\n",
       " 1315,\n",
       " 781,\n",
       " 1255,\n",
       " 1126,\n",
       " 628,\n",
       " 845,\n",
       " 898,\n",
       " 1215,\n",
       " 981,\n",
       " 846,\n",
       " 1254,\n",
       " 748,\n",
       " 317,\n",
       " 496,\n",
       " 1253,\n",
       " 936,\n",
       " 1064,\n",
       " 838,\n",
       " 1226,\n",
       " 955,\n",
       " 921,\n",
       " 348,\n",
       " 663,\n",
       " 848,\n",
       " 1169,\n",
       " 613,\n",
       " 1048,\n",
       " 910,\n",
       " 839,\n",
       " 964,\n",
       " 1016,\n",
       " 807,\n",
       " 732,\n",
       " 1076,\n",
       " 1060,\n",
       " 1286,\n",
       " 1209,\n",
       " 798,\n",
       " 906,\n",
       " 892,\n",
       " 1012,\n",
       " 1001,\n",
       " 1112,\n",
       " 1122,\n",
       " 1065,\n",
       " 835,\n",
       " 984,\n",
       " 349,\n",
       " 1178,\n",
       " 744,\n",
       " 730,\n",
       " 923,\n",
       " 918,\n",
       " 458,\n",
       " 991,\n",
       " 951,\n",
       " 715,\n",
       " 1156,\n",
       " 1298,\n",
       " 900,\n",
       " 1260,\n",
       " 539,\n",
       " 1227,\n",
       " 811,\n",
       " 731,\n",
       " 1275,\n",
       " 347,\n",
       " 1000074,\n",
       " 1151,\n",
       " 1206,\n",
       " 1190,\n",
       " 814,\n",
       " 1291,\n",
       " 1210,\n",
       " 979,\n",
       " 821,\n",
       " 1038,\n",
       " 1017,\n",
       " 1007,\n",
       " 681,\n",
       " 492,\n",
       " 405,\n",
       " 1242,\n",
       " 428,\n",
       " 1307,\n",
       " 785,\n",
       " 917,\n",
       " 804,\n",
       " 522,\n",
       " 1284,\n",
       " 626,\n",
       " 1143,\n",
       " 600,\n",
       " 589,\n",
       " 808,\n",
       " 1306,\n",
       " 905,\n",
       " 532,\n",
       " 706,\n",
       " 868,\n",
       " 1085,\n",
       " 926,\n",
       " 1176,\n",
       " 540,\n",
       " 486,\n",
       " 1078,\n",
       " 774,\n",
       " 1248,\n",
       " 987,\n",
       " 695,\n",
       " 773,\n",
       " 1295,\n",
       " 1125,\n",
       " 962,\n",
       " 833,\n",
       " 647,\n",
       " 793,\n",
       " 1123,\n",
       " 986,\n",
       " 595,\n",
       " 1277,\n",
       " 982,\n",
       " 616,\n",
       " 521,\n",
       " 1127,\n",
       " 426,\n",
       " 1189,\n",
       " 381,\n",
       " 1075,\n",
       " 482,\n",
       " 1135,\n",
       " 1245,\n",
       " 1211,\n",
       " 792,\n",
       " 483,\n",
       " 830,\n",
       " 939,\n",
       " 323,\n",
       " 443,\n",
       " 1080,\n",
       " 952,\n",
       " 488,\n",
       " 749,\n",
       " 596,\n",
       " 311,\n",
       " 809,\n",
       " 1220,\n",
       " 1188,\n",
       " 953,\n",
       " 994,\n",
       " 993,\n",
       " 442,\n",
       " 1014,\n",
       " 303,\n",
       " 500,\n",
       " 1312,\n",
       " 1105,\n",
       " 796,\n",
       " 837,\n",
       " 857,\n",
       " 1225,\n",
       " 768,\n",
       " 602,\n",
       " 834,\n",
       " 1000012,\n",
       " 623,\n",
       " 1313,\n",
       " 1109,\n",
       " 475,\n",
       " 858,\n",
       " 594,\n",
       " 944,\n",
       " 862,\n",
       " 844,\n",
       " 1287,\n",
       " 904,\n",
       " 1028,\n",
       " 876,\n",
       " 622,\n",
       " 459,\n",
       " 423,\n",
       " 1059,\n",
       " 1251,\n",
       " 352,\n",
       " 847,\n",
       " 652,\n",
       " 856,\n",
       " 1230,\n",
       " 599,\n",
       " 1154,\n",
       " 806,\n",
       " 422,\n",
       " 1136,\n",
       " 866,\n",
       " 491,\n",
       " 1140,\n",
       " 859,\n",
       " 799,\n",
       " 394,\n",
       " 1000010,\n",
       " 960,\n",
       " 1052,\n",
       " 746,\n",
       " 1010,\n",
       " 653,\n",
       " 1158,\n",
       " 920,\n",
       " 1252,\n",
       " 1000040,\n",
       " 1006,\n",
       " 872,\n",
       " 871,\n",
       " 1246,\n",
       " 932,\n",
       " 676,\n",
       " 1000054,\n",
       " 1019,\n",
       " 372,\n",
       " 855,\n",
       " 696,\n",
       " 1280,\n",
       " 380,\n",
       " 816,\n",
       " 945,\n",
       " 849,\n",
       " 1032,\n",
       " 657,\n",
       " 1000073,\n",
       " 957,\n",
       " 894,\n",
       " 308,\n",
       " 983,\n",
       " 551,\n",
       " 460,\n",
       " 566,\n",
       " 901,\n",
       " 1103,\n",
       " 1155,\n",
       " 1000095,\n",
       " 1268]"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ix_items = df_f_train['story_id'].value_counts(normalize=True).cumsum().iloc[:400].index.tolist()\n",
    "ix_items"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 394,
   "metadata": {},
   "outputs": [],
   "source": [
    "def make_features(df):\n",
    "    df = df_cust_valid.set_index('customer_id')\n",
    "    \n",
    "    X_features = []\n",
    "    \n",
    "    col_product = [col for col in df.columns if col.startswith('product_')]\n",
    "    for val in ['CLS', 'OPN', 'UTL']:\n",
    "        X_features.append(df[col_product].eq(val).astype(int))\n",
    "        \n",
    "    for val in ['M', 'F']:\n",
    "        X_features.append(df[['gender_cd']].eq(val).astype(int))\n",
    "\n",
    "    X_features.append(df[['age']].fillna(30.0).astype(float).values)\n",
    "    \n",
    "    for val in ['MAR', 'UNM', 'DIV', 'CIV', 'WID', 'DLW']:\n",
    "        X_features.append(df[['marital_status_cd']].eq(val).astype(int))\n",
    "        \n",
    "    X_features.append(\n",
    "        (pd.Timestamp('2019-10-01') - pd.to_datetime(df['first_session_dttm'])).dt.days\n",
    "        .fillna(0).values.reshape(-1, 1) / 365 * 5\n",
    "    )\n",
    "    \n",
    "    for val in ['0.0', 'nan', '1.0', '2.0', '3.0']:\n",
    "        X_features.append(df[['children_cnt']].clip(0, 3).astype(str).eq(val).astype(int))\n",
    "        \n",
    "    for val in [22, 16, 17, 2, 11, 20, 4, 1, 8, 3, 19, 12, 18, 7, 14, 21, 13, 15, 9, 5]:\n",
    "        X_features.append(df[['job_position_cd']].eq(val).astype(int))\n",
    "    \n",
    "    _t = df_user_item_train.pivot_table(\n",
    "        index='customer_id', columns='event',\n",
    "        values='event_dttm', aggfunc='count',\n",
    "    ).reindex(index=df.index, columns=['dislike', 'like', 'skip', 'view'])\n",
    "    X_features.append(_t.fillna(-1).values)\n",
    "    \n",
    "    _t = _t.fillna(0) + (df_user_item_train.assign(cnt=1).groupby('event')['cnt'].sum() / len(df_user_item_train))\n",
    "    _t = _t.div(_t.sum(axis=1), axis=0)\n",
    "    X_features.append(_t.values)\n",
    "    \n",
    "    # X_features.append(df_embeddings.reindex(index=df.index).fillna(0.0).values)\n",
    "    \n",
    "    _t = df_trans \\\n",
    "        [lambda x: x.merchant_mcc.isin(df_trans['merchant_mcc'].value_counts().iloc[:300].index)] \\\n",
    "        .pivot_table(index='customer_id', columns='merchant_mcc', values='transaction_amt', aggfunc='count') \\\n",
    "        .reindex(index=df.index) \\\n",
    "        .fillna(0)\n",
    "    X_features.append(_t.values)\n",
    "    _t = _t.div(_t.sum(axis=1) + 0.1, axis=0)\n",
    "    X_features.append(_t.values)\n",
    "    \n",
    "    df = pd.DataFrame(index=df.index, data=np.concatenate(X_features, axis=1))\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 395,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>...</th>\n",
       "      <th>654</th>\n",
       "      <th>655</th>\n",
       "      <th>656</th>\n",
       "      <th>657</th>\n",
       "      <th>658</th>\n",
       "      <th>659</th>\n",
       "      <th>660</th>\n",
       "      <th>661</th>\n",
       "      <th>662</th>\n",
       "      <th>663</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>customer_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>234305</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.003998</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>914339</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>895631</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.025940</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.038911</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>954837</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.020394</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.006798</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>391590</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>750486</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>883189</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>760673</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49313</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.015848</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>187694</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>54245 rows × 664 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             0    1    2    3    4    5    6    7    8    9    ...  654  655  \\\n",
       "customer_id                                                    ...             \n",
       "234305       0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   \n",
       "914339       0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   \n",
       "895631       0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   \n",
       "954837       0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   \n",
       "391590       0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   \n",
       "...          ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...   \n",
       "750486       0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   \n",
       "883189       0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   \n",
       "760673       0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   \n",
       "49313        0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   \n",
       "187694       0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   \n",
       "\n",
       "             656       657  658  659  660       661  662  663  \n",
       "customer_id                                                    \n",
       "234305       0.0  0.003998  0.0  0.0  0.0  0.000000  0.0  0.0  \n",
       "914339       0.0  0.000000  0.0  0.0  0.0  0.000000  0.0  0.0  \n",
       "895631       0.0  0.025940  0.0  0.0  0.0  0.038911  0.0  0.0  \n",
       "954837       0.0  0.020394  0.0  0.0  0.0  0.006798  0.0  0.0  \n",
       "391590       0.0  0.000000  0.0  0.0  0.0  0.000000  0.0  0.0  \n",
       "...          ...       ...  ...  ...  ...       ...  ...  ...  \n",
       "750486       0.0  0.000000  0.0  0.0  0.0  0.000000  0.0  0.0  \n",
       "883189       0.0  0.000000  0.0  0.0  0.0  0.000000  0.0  0.0  \n",
       "760673       0.0  0.000000  0.0  0.0  0.0  0.000000  0.0  0.0  \n",
       "49313        0.0  0.015848  0.0  0.0  0.0  0.000000  0.0  0.0  \n",
       "187694       0.0  0.000000  0.0  0.0  0.0  0.000000  0.0  0.0  \n",
       "\n",
       "[54245 rows x 664 columns]"
      ]
     },
     "execution_count": 395,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_cust_features = make_features(df_cust_valid)\n",
    "df_cust_features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 396,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tqdm.autonotebook import tqdm\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from xgboost import XGBClassifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 398,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5a77e4ef54034c08b507367790dac6cf",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, max=400), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "models = {}\n",
    "for story_id in tqdm(ix_items):\n",
    "    _i_train = df_f_train[lambda x: x.story_id.eq(story_id)].set_index('customer_id')\n",
    "    \n",
    "    if _i_train['event'].nunique() == 1:\n",
    "        continue\n",
    "        \n",
    "    # model = LogisticRegression(solver='liblinear')\n",
    "    model = RandomForestClassifier(n_estimators=300, max_depth=3)\n",
    "    # model = XGBClassifier(booster='gblinear', n_estimators=100)\n",
    "    # model = XGBClassifier(booster='gbtree', n_estimators=100, max_depth=2)\n",
    "    model.fit(df_cust_features.reindex(index=_i_train.index).values, _i_train['event'])\n",
    "    models[story_id] = model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 399,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "_i_train = df_f_train.set_index('customer_id')\n",
    "\n",
    "model = RandomForestClassifier(n_estimators=300, max_depth=3)\n",
    "model.fit(df_cust_features.reindex(index=_i_train.index).values, _i_train['event'])\n",
    "models['common_model'] = model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 400,
   "metadata": {},
   "outputs": [],
   "source": [
    "# predict"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "df_train_predict = []\n",
    "\n",
    "for story_id in tqdm(df_user_item_train['story_id'].unique()):\n",
    "    _i_train = df_user_item_train[lambda x: x.story_id.eq(story_id)].set_index('customer_id')\n",
    "    \n",
    "    if story_id in models:\n",
    "        model = models[story_id]\n",
    "    else:\n",
    "        model = models['common_model']\n",
    "    \n",
    "    if len(model.classes_) == 1:\n",
    "        df_train_predict.append(\n",
    "            _i_train.assign(score=model.classes_[0])\n",
    "        )\n",
    "    else:\n",
    "        df_train_predict.append(\n",
    "            _i_train.assign(score=model.predict_proba(\n",
    "                df_cust_features.reindex(index=_i_train.index).values)[:, 1])\n",
    "        )\n",
    "        \n",
    "df_train_predict = pd.concat(df_train_predict, axis=0).reset_index()\n",
    "df_train_predict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 401,
   "metadata": {},
   "outputs": [],
   "source": [
    "class ConstModel:\n",
    "    @property\n",
    "    def classes_(self):\n",
    "        return np.array([0, 1])\n",
    "    \n",
    "    def predict_proba(self, X):\n",
    "        return np.full((len(X), 1), 0.90) * np.array([[1, 1]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 402,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "85fd80193d1344eb88cc8cbe472acd38",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, max=707), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>customer_id</th>\n",
       "      <th>story_id</th>\n",
       "      <th>event_dttm</th>\n",
       "      <th>event</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>678851</td>\n",
       "      <td>1314</td>\n",
       "      <td>2018-07-15 11:53:19</td>\n",
       "      <td>view</td>\n",
       "      <td>0.854216</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>650973</td>\n",
       "      <td>1314</td>\n",
       "      <td>2018-07-15 12:02:10</td>\n",
       "      <td>like</td>\n",
       "      <td>0.928626</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>488457</td>\n",
       "      <td>1314</td>\n",
       "      <td>2018-07-15 12:02:37</td>\n",
       "      <td>view</td>\n",
       "      <td>0.865151</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>69969</td>\n",
       "      <td>1314</td>\n",
       "      <td>2018-07-15 12:16:23</td>\n",
       "      <td>view</td>\n",
       "      <td>0.863857</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>159256</td>\n",
       "      <td>1314</td>\n",
       "      <td>2018-07-15 12:38:56</td>\n",
       "      <td>skip</td>\n",
       "      <td>0.862513</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>118281</th>\n",
       "      <td>695789</td>\n",
       "      <td>1403</td>\n",
       "      <td>2018-07-31 19:04:15</td>\n",
       "      <td>skip</td>\n",
       "      <td>0.900000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>118282</th>\n",
       "      <td>372812</td>\n",
       "      <td>1403</td>\n",
       "      <td>2018-07-31 19:28:31</td>\n",
       "      <td>skip</td>\n",
       "      <td>0.900000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>118283</th>\n",
       "      <td>177503</td>\n",
       "      <td>1403</td>\n",
       "      <td>2018-07-31 19:40:05</td>\n",
       "      <td>skip</td>\n",
       "      <td>0.900000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>118284</th>\n",
       "      <td>1007199</td>\n",
       "      <td>1405</td>\n",
       "      <td>2018-07-31 18:23:35</td>\n",
       "      <td>skip</td>\n",
       "      <td>0.900000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>118285</th>\n",
       "      <td>714105</td>\n",
       "      <td>1405</td>\n",
       "      <td>2018-07-31 18:40:21</td>\n",
       "      <td>skip</td>\n",
       "      <td>0.900000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>118286 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        customer_id  story_id          event_dttm event     score\n",
       "0            678851      1314 2018-07-15 11:53:19  view  0.854216\n",
       "1            650973      1314 2018-07-15 12:02:10  like  0.928626\n",
       "2            488457      1314 2018-07-15 12:02:37  view  0.865151\n",
       "3             69969      1314 2018-07-15 12:16:23  view  0.863857\n",
       "4            159256      1314 2018-07-15 12:38:56  skip  0.862513\n",
       "...             ...       ...                 ...   ...       ...\n",
       "118281       695789      1403 2018-07-31 19:04:15  skip  0.900000\n",
       "118282       372812      1403 2018-07-31 19:28:31  skip  0.900000\n",
       "118283       177503      1403 2018-07-31 19:40:05  skip  0.900000\n",
       "118284      1007199      1405 2018-07-31 18:23:35  skip  0.900000\n",
       "118285       714105      1405 2018-07-31 18:40:21  skip  0.900000\n",
       "\n",
       "[118286 rows x 5 columns]"
      ]
     },
     "execution_count": 402,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_test_predict = []\n",
    "\n",
    "for story_id in tqdm(df_user_item_test['story_id'].unique()):\n",
    "    _i_test = df_user_item_test[lambda x: x.story_id.eq(story_id)].set_index('customer_id')\n",
    "    \n",
    "    if story_id in models:\n",
    "        model = models[story_id]\n",
    "    else:\n",
    "        model = models['common_model']\n",
    "        model = ConstModel()\n",
    "        \n",
    "    if len(model.classes_) == 1:\n",
    "        df_test_predict.append(\n",
    "            _i_test.assign(score=model.classes_[0])\n",
    "        )\n",
    "    else:\n",
    "        df_test_predict.append(\n",
    "            _i_test.assign(score=model.predict_proba(\n",
    "                df_cust_features.reindex(index=_i_test.index).values)[:, 1])\n",
    "        )\n",
    "        \n",
    "df_test_predict = pd.concat(df_test_predict, axis=0).reset_index()\n",
    "df_test_predict"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "rocauc_by_cust(df_train_predict).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 403,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "- all:  0.7101059140036896\n",
      "- warm: 0.7057781635000825\n",
      "- cold: 0.7482269503546098\n",
      "\n"
     ]
    }
   ],
   "source": [
    "_a = rocauc_by_cust(df_test_predict).mean()\n",
    "_w = rocauc_by_cust(df_test_predict[lambda x: x.customer_id.isin(df_train_predict.customer_id)]).mean()\n",
    "_c = rocauc_by_cust(df_test_predict[lambda x: ~x.customer_id.isin(df_train_predict.customer_id)]).mean()\n",
    "\n",
    "print(f\"\"\"\n",
    "- all:  {_a}\n",
    "- warm: {_w}\n",
    "- cold: {_c}\n",
    "\"\"\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "```\n",
    "Cust Feature Model, Popularity Feature, Trx Agg, (n_estimators=300, max_depth=3):\n",
    "- all:  0.7101059140036896\n",
    "- warm: 0.7057781635000825\n",
    "- cold: 0.7482269503546098\n",
    "\n",
    "Cust Feature Model, Popularity Feature, Embeddings, (n_estimators=300, max_depth=3):\n",
    "- all:  0.6894582850396903\n",
    "- warm: 0.6861681225522476\n",
    "- cold: 0.7184397163120567\n",
    "\n",
    "Cust Feature Model, Popularity Feature (n_estimators=300, max_depth=3):\n",
    "- all:  0.7061087017011921\n",
    "- warm: 0.6967780470634047\n",
    "- cold: 0.7882978723404255\n",
    "\n",
    "Cust, Popularity Feature (n_estimators=300, max_depth=3):\n",
    "- all:  0.5941552290203047\n",
    "- warm: 0.5992887936675373\n",
    "- cold: 0.5489361702127659\n",
    "\n",
    "Cust Feature Model (n_estimators=300, max_depth=3, max_features=10):\n",
    "- all:  0.7109305341996397\n",
    "- warm: 0.705287382285106\n",
    "- cold: 0.7606382978723404\n",
    "\n",
    "Cust Feature Model, Popularity Feature, Linear:\n",
    "- all:  0.7206963192876485\n",
    "- warm: 0.7180539529587907\n",
    "- cold: 0.7439716312056738\n",
    "\n",
    "Popular model:\n",
    "- all:  0.7204284251205677\n",
    "- warm: 0.7192049210481039\n",
    "- cold: 0.7312056737588652\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "automl_env",
   "language": "python",
   "name": "automl_env"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
