{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython.display import display"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "PROJECT_PATH = '/data/kireev-ia/data_open_ds/data-like-tinkoff-2019/'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "customer_test.csv\t    tinkoff_all_vectors.pickle\r\n",
      "customer_train.csv\t    TinkoffDataView.ipynb\r\n",
      "Hack_desc.xlsx\t\t    Tinkoff_Metric_Learning-Copy1.ipynb\r\n",
      "sample_submit.csv\t    Tinkoff_Metric_Learning.ipynb\r\n",
      "stories_description.csv     tinkoff_trx.p\r\n",
      "stories_reaction_test.csv   transactions.csv\r\n",
      "stories_reaction_train.csv\r\n"
     ]
    }
   ],
   "source": [
    "!ls $PROJECT_PATH"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# customer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>customer_id</th>\n",
       "      <th>product_0</th>\n",
       "      <th>product_1</th>\n",
       "      <th>product_2</th>\n",
       "      <th>product_3</th>\n",
       "      <th>product_4</th>\n",
       "      <th>product_5</th>\n",
       "      <th>product_6</th>\n",
       "      <th>gender_cd</th>\n",
       "      <th>age</th>\n",
       "      <th>marital_status_cd</th>\n",
       "      <th>children_cnt</th>\n",
       "      <th>first_session_dttm</th>\n",
       "      <th>job_position_cd</th>\n",
       "      <th>job_title</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>894436</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UTL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>M</td>\n",
       "      <td>30.0</td>\n",
       "      <td>MAR</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2018-03-20 09:10:16</td>\n",
       "      <td>1</td>\n",
       "      <td>Неруководящий сотрудник - обсл. Персонал</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>524526</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UTL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UTL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>F</td>\n",
       "      <td>20.0</td>\n",
       "      <td>UNM</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2017-03-29 20:38:45</td>\n",
       "      <td>16</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>498134</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UTL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>F</td>\n",
       "      <td>25.0</td>\n",
       "      <td>UNM</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2018-03-12 11:25:06</td>\n",
       "      <td>22</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>278941</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UTL</td>\n",
       "      <td>CLS</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UTL</td>\n",
       "      <td>UTL</td>\n",
       "      <td>M</td>\n",
       "      <td>25.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-02-21 18:47:51</td>\n",
       "      <td>16</td>\n",
       "      <td>Неруководящий сотрудник - специалист</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>877312</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UTL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>F</td>\n",
       "      <td>40.0</td>\n",
       "      <td>MAR</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2018-03-07 11:17:02</td>\n",
       "      <td>22</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   customer_id product_0 product_1 product_2 product_3 product_4 product_5  \\\n",
       "0       894436       NaN       NaN       NaN       NaN       NaN       UTL   \n",
       "1       524526       NaN       UTL       NaN       NaN       NaN       UTL   \n",
       "2       498134       NaN       UTL       NaN       NaN       NaN       NaN   \n",
       "3       278941       NaN       NaN       UTL       CLS       NaN       UTL   \n",
       "4       877312       NaN       UTL       NaN       NaN       NaN       NaN   \n",
       "\n",
       "  product_6 gender_cd   age marital_status_cd  children_cnt  \\\n",
       "0       NaN         M  30.0               MAR           0.0   \n",
       "1       NaN         F  20.0               UNM           0.0   \n",
       "2       NaN         F  25.0               UNM           0.0   \n",
       "3       UTL         M  25.0               NaN           NaN   \n",
       "4       NaN         F  40.0               MAR           0.0   \n",
       "\n",
       "    first_session_dttm  job_position_cd  \\\n",
       "0  2018-03-20 09:10:16                1   \n",
       "1  2017-03-29 20:38:45               16   \n",
       "2  2018-03-12 11:25:06               22   \n",
       "3  2016-02-21 18:47:51               16   \n",
       "4  2018-03-07 11:17:02               22   \n",
       "\n",
       "                                  job_title  \n",
       "0  Неруководящий сотрудник - обсл. Персонал  \n",
       "1                                       NaN  \n",
       "2                                       NaN  \n",
       "3      Неруководящий сотрудник - специалист  \n",
       "4                                       NaN  "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_cust_train = pd.read_csv(PROJECT_PATH + 'customer_train.csv')\n",
    "df_cust_train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>customer_id</th>\n",
       "      <th>product_0</th>\n",
       "      <th>product_1</th>\n",
       "      <th>product_2</th>\n",
       "      <th>product_3</th>\n",
       "      <th>product_4</th>\n",
       "      <th>product_5</th>\n",
       "      <th>product_6</th>\n",
       "      <th>gender_cd</th>\n",
       "      <th>age</th>\n",
       "      <th>marital_status_cd</th>\n",
       "      <th>children_cnt</th>\n",
       "      <th>first_session_dttm</th>\n",
       "      <th>job_position_cd</th>\n",
       "      <th>job_title</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>234305</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UTL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>F</td>\n",
       "      <td>25.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2015-10-29 09:59:10</td>\n",
       "      <td>22</td>\n",
       "      <td>Менеджер проектов</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>914339</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UTL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>M</td>\n",
       "      <td>15.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2018-04-09 08:51:54</td>\n",
       "      <td>22</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>895631</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UTL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>M</td>\n",
       "      <td>20.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2018-03-22 10:38:33</td>\n",
       "      <td>16</td>\n",
       "      <td>Менеджер по продажам</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>954837</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UTL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>F</td>\n",
       "      <td>25.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2018-05-09 18:24:32</td>\n",
       "      <td>16</td>\n",
       "      <td>Инженер</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>391590</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UTL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>M</td>\n",
       "      <td>30.0</td>\n",
       "      <td>UNM</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2016-10-08 05:29:20</td>\n",
       "      <td>20</td>\n",
       "      <td>сборщик мебели</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   customer_id product_0 product_1 product_2 product_3 product_4 product_5  \\\n",
       "0       234305       NaN       NaN       NaN       NaN       NaN       UTL   \n",
       "1       914339       NaN       NaN       NaN       NaN       NaN       UTL   \n",
       "2       895631       NaN       NaN       NaN       NaN       NaN       UTL   \n",
       "3       954837       NaN       NaN       NaN       NaN       NaN       UTL   \n",
       "4       391590       NaN       UTL       NaN       NaN       NaN       NaN   \n",
       "\n",
       "  product_6 gender_cd   age marital_status_cd  children_cnt  \\\n",
       "0       NaN         F  25.0               NaN           0.0   \n",
       "1       NaN         M  15.0               NaN           0.0   \n",
       "2       NaN         M  20.0               NaN           0.0   \n",
       "3       NaN         F  25.0               NaN           0.0   \n",
       "4       NaN         M  30.0               UNM           0.0   \n",
       "\n",
       "    first_session_dttm  job_position_cd             job_title  \n",
       "0  2015-10-29 09:59:10               22     Менеджер проектов  \n",
       "1  2018-04-09 08:51:54               22                   NaN  \n",
       "2  2018-03-22 10:38:33               16  Менеджер по продажам  \n",
       "3  2018-05-09 18:24:32               16               Инженер  \n",
       "4  2016-10-08 05:29:20               20        сборщик мебели  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_cust_valid = pd.read_csv(PROJECT_PATH + 'customer_test.csv')\n",
    "df_cust_valid.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# stories_reaction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>customer_id</th>\n",
       "      <th>story_id</th>\n",
       "      <th>event_dttm</th>\n",
       "      <th>event</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>15</td>\n",
       "      <td>138</td>\n",
       "      <td>2018-07-24 15:33:22</td>\n",
       "      <td>view</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>15</td>\n",
       "      <td>202</td>\n",
       "      <td>2018-06-04 08:08:08</td>\n",
       "      <td>skip</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>15</td>\n",
       "      <td>222</td>\n",
       "      <td>2018-06-17 13:44:45</td>\n",
       "      <td>skip</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>15</td>\n",
       "      <td>379</td>\n",
       "      <td>2018-05-23 05:41:43</td>\n",
       "      <td>skip</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>15</td>\n",
       "      <td>544</td>\n",
       "      <td>2018-07-25 02:16:29</td>\n",
       "      <td>view</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   customer_id  story_id           event_dttm event\n",
       "0           15       138  2018-07-24 15:33:22  view\n",
       "1           15       202  2018-06-04 08:08:08  skip\n",
       "2           15       222  2018-06-17 13:44:45  skip\n",
       "3           15       379  2018-05-23 05:41:43  skip\n",
       "4           15       544  2018-07-25 02:16:29  view"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_user_item_train = pd.read_csv(PROJECT_PATH + 'stories_reaction_train.csv')\n",
    "df_user_item_train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>customer_id</th>\n",
       "      <th>story_id</th>\n",
       "      <th>event_dttm</th>\n",
       "      <th>answer_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>466906</td>\n",
       "      <td>1152</td>\n",
       "      <td>2018-08-01 00:00:46</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>466906</td>\n",
       "      <td>1355</td>\n",
       "      <td>2018-08-01 00:00:46</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>146395</td>\n",
       "      <td>537</td>\n",
       "      <td>2018-08-01 00:02:08</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>897303</td>\n",
       "      <td>915</td>\n",
       "      <td>2018-08-01 00:02:30</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>50094</td>\n",
       "      <td>1415</td>\n",
       "      <td>2018-08-01 00:03:13</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   customer_id  story_id           event_dttm  answer_id\n",
       "0       466906      1152  2018-08-01 00:00:46          0\n",
       "1       466906      1355  2018-08-01 00:00:46          1\n",
       "2       146395       537  2018-08-01 00:02:08          2\n",
       "3       897303       915  2018-08-01 00:02:30          3\n",
       "4        50094      1415  2018-08-01 00:03:13          4"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_user_item_valid = pd.read_csv(PROJECT_PATH + 'stories_reaction_test.csv')\n",
    "df_user_item_valid.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_user_item_train['event_dttm'] = pd.to_datetime(df_user_item_train['event_dttm'])\n",
    "df_user_item_valid['event_dttm'] = pd.to_datetime(df_user_item_valid['event_dttm'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "min   2018-03-29 11:30:44\n",
       "max   2018-07-31 23:58:14\n",
       "Name: event_dttm, dtype: datetime64[ns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_user_item_train['event_dttm'].agg(['min', 'max'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "min   2018-08-01 00:00:46\n",
       "max   2018-08-22 20:49:33\n",
       "Name: event_dttm, dtype: datetime64[ns]"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_user_item_valid['event_dttm'].agg(['min', 'max'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_dates(s_dt):\n",
    "    s = s_dt.dt.floor('D')\n",
    "    s = s - pd.to_timedelta(s.dt.day - 1, 'D')\n",
    "    s = s.value_counts()\n",
    "    return s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x7fed95926a20>"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZAAAAECCAYAAADQEYGEAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3deXxU1f3/8deHsAQSwhb2LWERBDQsEdyLO9pW3Kutda3Lz2q1RSsurVW7qHWp1tYWv1q1VRQEK7hRilqt1oXFsAsICGENBAIkQLbP74+5YEAkIUxyZ3k/H495zOTcO3c+Mwx5zzn3zIm5OyIiIgeqQdgFiIhIfFKAiIhIrShARESkVhQgIiJSKwoQERGplYZhFxBtmZmZnpWVFXYZIiJxZcaMGRvcve2B3CfhAiQrK4vp06eHXYaISFwxsy8P9D4awhIRkVpRgIiISK0oQEREpFYUICIiUisKEBERqRUFiIiI1IoCREREaiXhvgciIiLV215awfKNxSzbELnUhgJERCRBlVdUsmrzdpZuKGZpQTHLNmyLBEZBMauLdhz08RUgIiJxzN0p2LaTZQWRnkTVsFhRWEJZxVd/NDAjtSE92qZzZI82ZGemkd02jezMNLLapJF+/4E/tgJERCQObN1RxvINJSzdsC0IiK8u23aW796vccMGZLdJo3e75pzavwPZmWn0yIwEReu0xphZ1GpSgIiIxIjS8kpWFJZEehIF23b3KJZtKKZg687d+5lBl1ZNyc5MZ0j3VpHeRHDp1LIpKQ2iFxL7owAREalHlZXO2i07qgw3bdvdk1hZWELlVyNOZKY3JjszjRP6tCU7Mz3Sm2ibRrfWzUhtlBLekwgoQERE6sDmktJI76GgmKXByeulBcUs31jMjrLK3fs1a5xCdmYah3VuwcicTsF5iXSy26TRolmjEJ9B9RQgIiK1tKMsmApbUPy1mU6bSsp279ewgdGtdTOyM9M4tlfm7pPXPTLTaZ/RJKrnJepTtQFiZl2B54D2gANj3P1RM2sNvARkAcuBC9x9k0VeiUeBM4AS4DJ3nxkc61LgzuDQv3b3Z4P2IcAzQFPgDeBGd/dveoyDftYiIjVUUems2rR9j17EriGnVZu377Fvh4xUsjPTOP2wjrtPXGdnptG1dTMapSTe97Zr0gMpB0a5+0wzaw7MMLOpwGXANHe/z8xGA6OBW4HTgd7BZRjwBDAsCIO7gFwiQTTDzCYFgfAEcBXwMZEAGQG8GRxzX48hIhI17s6GbaVBMERmOe06eb1iYwmlFV8NOTUPpsIOzW69x8nr7Mw00pok16BOtc/W3dcAa4LbW81sAdAZGAkMD3Z7FniXyC/3kcBz7u7AR2bW0sw6BvtOdfdCgCCERpjZu0CGu38UtD8HnEUkQL7pMUREDti2neUs3zWzqcq5iWUFxWytOhU2pQFZmc3o2TaNkw9tH+lNBMNObaI8FTaeHVBcmlkWMIhIT6F9EC4Aa4kMcUEkXFZWuVt+0La/9vx9tLOfx9i7rquBqwG6det2IE9JRBJMaXklKzeVVPli3Vffm1i/11TYzi2bkp2ZxjmDOwdfrEunRz1PhY1nNQ4QM0sHJgA3ufuWqgkcnK/wb7xzFOzvMdx9DDAGIDc3t07rEJHwuQdTYfdx8nrlpu1UVJkL2yYtMhX2W4e0Jbvtri/VpdO9TWxMhY1nNQoQM2tEJDyed/eJQfM6M+vo7muCIar1QfsqoGuVu3cJ2lbx1XDUrvZ3g/Yu+9h/f48hIknK3fnxCzN5Y87a3W1NG0Wmwvbv3ILv5nTa47xEy2aNQ6w2sdVkFpYBTwEL3P3hKpsmAZcC9wXXr1Zpv97MXiRyEr0oCIApwG/NrFWw36nAbe5eaGZbzOxIIkNjlwB/rOYxRCRJjZu+kjfmrOWKY7I5+dB2ZLdNo0NGqs5LhKAmPZBjgB8Cc8zss6DtdiK/1MeZ2ZXAl8AFwbY3iEzhXUJkGu/lAEFQ3At8Gux3z64T6sB1fDWN983gwn4eQ0SSUP6mEu59bQFH9WjDnd8+lAY6TxEqi0yWShy5ubk+ffr0sMsQkSirrHQufupjZucX8eaNx9G1dbOwS0ooZjbD3XMP5D6J980WEUlIf//oSz78YiN3fvtQhUeMUICISMxbtqGY+95cyPA+bfneEV2rv4PUCwWIiMS0ikrn5vF5NEox7j/3cJ0sjyHJ9b17EYk7T/13KTO+3MQfvjeQ9hmpYZcjVagHIiIxa/G6rTz4r0Wc1r89Iwd2Crsc2YsCRERiUllFJaPG55HepCG/OfswDV3FIA1hiUhMeuLdL5idX8SffzCYzPQmYZcj+6AeiIjEnHmri3hs2mLOzOnEGYd1DLsc+QYKEBGJKTvLKxg1Lo9WaY25Z2T/sMuR/dAQlojElMemLWbh2q08fVmuFkKMceqBiEjMmLViE0+8+wUX5HbhxL77/PM/EkMUICISE3aUVTBqfB4dMlK58zv9wi5HakBDWCISEx6c8jlLC4r5x5XDyEhtFHY5UgPqgYhI6D5ZVshTHyzjh0d259jemWGXIzWkABGRUBXvLOfm8Xl0a92M0af3DbscOQAawhKRUP3uzQWs3FTCuGuOIq2JfiXFE/VARCQ07y8u4B8freBHx2ZzRFbrsMuRA6QAEZFQbNlRxs9fnk3PtmmMOrVP2OVILai/KCKhuHfyfNZt2cHE644htVFK2OVILagHIiL1btqCdYyfkc91w3sxsGvLsMuRWlKAiEi92lRcyuiJc+jboTk/Oal32OXIQdAQlojUq19OmsfmklKevXwojRvqM2w807+eiNSb12evYXLean5yYm/6dcoIuxw5SAoQEakXBVt3cuc/55DTpQX/b3jPsMuRKFCAiEidc3fueGUOxaUVPHRBDg1T9KsnEehfUUTq3CuzVvGv+eu45dQ+9GrXPOxyJEoUICJSp9YW7eCuSfPI7d6KK47NDrsciSIFiIjUGXfn1gmzKa9wHjw/h5QGFnZJEkUKEBGpMy9+upL/LCrgtjP6kpWZFnY5EmUKEBGpEysLS/j1a/M5plcbLh7WPexypA4oQEQk6iornVtezsPMeOC8HBpo6CohKUBEJOqe+99yPlpayC++cyidWzYNuxypIwoQEYmqpQXbuO+thZzQpy0X5HYNuxypQwoQEYmaikrn5vF5NGmYwn3nHo6Zhq4SmRZTFJGoefL9pcxcsZlHLxxI+4zUsMuROqYeiIhExaJ1W3n4X4sY0b8DZ+Z0CrscqQcKEBE5aGUVlfxs3Gc0T23Ir88eoKGrJKEhLBE5aH9+5wvmrtrCXy4eTGZ6k7DLkXpSbQ/EzJ42s/VmNrdK26/MbJWZfRZczqiy7TYzW2Jmn5vZaVXaRwRtS8xsdJX2bDP7OGh/ycwaB+1Ngp+XBNuzovWkRSR65q4q4o9vL+asgZ0YMaBj2OVIParJENYzwIh9tD/i7gODyxsAZtYPuBDoH9znz2aWYmYpwJ+A04F+wEXBvgD3B8fqBWwCrgzarwQ2Be2PBPuJSAzZWV7BqHF5tE5rzN1nDgi7HKln1QaIu78HFNbweCOBF919p7svA5YAQ4PLEndf6u6lwIvASIsMlJ4IvBzc/1ngrCrHeja4/TJwkmlgVSSm/OHfi/l83VbuP/dwWjRrFHY5Us8O5iT69WY2OxjiahW0dQZWVtknP2j7pvY2wGZ3L9+rfY9jBduLgv2/xsyuNrPpZja9oKDgIJ6SiNTUzBWb+Ot/vuB7uV05oW+7sMuRENQ2QJ4AegIDgTXAQ1GrqBbcfYy757p7btu2bcMsRSQpbC+t4OZxeXRs0ZQ7v3No2OVISGoVIO6+zt0r3L0SeJLIEBXAKqDq2gVdgrZvat8ItDSzhnu173GsYHuLYH8RCdnvp3zO0g3F/P68w2meqqGrZFWrADGzqlMtzgZ2zdCaBFwYzKDKBnoDnwCfAr2DGVeNiZxon+TuDrwDnBfc/1Lg1SrHujS4fR7wdrC/iIToo6Ub+duHy7jkqO4c3Ssz7HIkRNV+D8TMxgLDgUwzywfuAoab2UDAgeXANQDuPs/MxgHzgXLgx+5eERznemAKkAI87e7zgoe4FXjRzH4NzAKeCtqfAv5uZkuInMS/8KCfrYgclOKd5dzych7dWjdj9Ol9wy5HQmaJ9qE+NzfXp0+fHnYZIgnpjlfm8MInKxh/zVHkZrUOuxyJIjOb4e65B3IfLWUiIjXy3qICnv94BVcd10PhIYACRERqoGh7GbdOmE2vdun87JRDwi5HYoTWwhKRat0zeT7rt+5k4sVDSG2UEnY5EiPUAxGR/Zo6fx0TZuZz3fCe5HRtGXY5EkMUICLyjTYVl3LbxDkc2jGDG07sHXY5EmM0hCUi3+gXr86laHspf79yKI0b6vOm7EnvCBHZp9dmr+a12Wu46eRDOLRjRtjlSAxSgIjI16zfuoNf/HMuOV1bcs3xPcIuR2KUAkRE9uDu3D5xLiWlFTx0fg4NU/RrQvZN7wwR2cPEmav494J13HJaH3q1Sw+7HIlhChAR2W1N0XZ+NXkeR2S14vJjssMuR2KcAkREgMjQ1c9fnk15hfPg+TmkNNAfAJX9U4CICABjP1nJ+4s3cPsZfeneJi3sciQOKEBEhJWFJfz69fkc2yuTHwzrHnY5EicUICJJrrLSuXl8Hilm3H/e4TTQ0JXUkAJEJMk98+FyPl5WyC++24/OLZuGXY7EEQWISBL7omAb97+1kJP6tuP8IV3CLkfijAJEJElVBENXqY1S+N05h2GmoSs5MFpMUSRJjXlvKbNWbObRCwfSLiM17HIkDqkHIpKEPl+7lUemLuKMwzpwZk6nsMuROKUAEUkyZRWV/GzcZzRPbci9Iwdo6EpqTUNYIknm8beXMG/1Fv76wyG0SW8SdjkSx9QDEUkic/KL+NM7Szh7UGdO698h7HIkzilARJLEzvIKRo3/jDbpjfnVd/uHXY4kAA1hiSSJR6YuZtG6bfzt8iNo0axR2OVIAlAPRCQJzPhyE2Pe+4ILj+jKCX3ahV2OJAgFiEiC215awc3j8+jYoil3fPvQsMuRBKIhLJEE98CUhSzbUMwLVw2jeaqGriR61AMRSWD/+2Ijf/tgOZcdncXRPTPDLkcSjAJEJEFt21nOLS/nkdWmGT8f0SfsciQBaQhLJEH95vUFrN68nfHXHkWzxvqvLtGnHohIAvrPogLGfrKCq47rwZDurcMuRxKUAkQkwRRtL+PWl2fTu106Pz3lkLDLkQSmfq1Igrl78jwKtu3kyUtySW2UEnY5ksDUAxFJIP+at5aJM1fx4+E9OaxLi7DLkQSnABFJEIXFpdz+yhz6dczg+hN7h12OJAENYYkkiF/8cy5F28v4x4+G0bihPhtK3dO7TCQBTM5bzetz1nDTyYfQt0NG2OVIkqg2QMzsaTNbb2Zzq7S1NrOpZrY4uG4VtJuZPWZmS8xstpkNrnKfS4P9F5vZpVXah5jZnOA+j1nw59G+6TFEZE/rt+7gF6/OJadrS645vkfY5UgSqUkP5BlgxF5to4Fp7t4bmBb8DHA60Du4XA08AZEwAO4ChgFDgbuqBMITwFVV7jeimscQkYC7c9uEOWwvreCh83NomKJBBak/1b7b3P09oHCv5pHAs8HtZ4GzqrQ/5xEfAS3NrCNwGjDV3QvdfRMwFRgRbMtw94/c3YHn9jrWvh5DRAIvz8hn2sL13HJaH3q1Sw+7HEkytf240t7d1wS31wLtg9udgZVV9ssP2vbXnr+P9v09xteY2dVmNt3MphcUFNTi6YjEn9Wbt3PP5PkMzW7NFcdkh12OJKGD7u8GPQePQi21fgx3H+Puue6e27Zt27osRSQmuDu3TphNhTsPnpdDgwYWdkmShGobIOuC4SeC6/VB+yqga5X9ugRt+2vvso/2/T2GSNJ7/uMVvL94A7efcSjd2jQLuxxJUrUNkEnArplUlwKvVmm/JJiNdSRQFAxDTQFONbNWwcnzU4EpwbYtZnZkMPvqkr2Ota/HEElqKzaW8Ns3FnBc70x+MKxb2OVIEqv2i4RmNhYYDmSaWT6R2VT3AePM7ErgS+CCYPc3gDOAJUAJcDmAuxea2b3Ap8F+97j7rhPz1xGZ6dUUeDO4sJ/HEElalZXOzS/nkWLG/eceTjDrXSQU1QaIu1/0DZtO2se+Dvz4G47zNPD0PtqnAwP20b5xX48hksz+9uFyPllWyO/PO5xOLZuGXY4kOU0aF4kTXxRs44G3FnJS33acN6RL9XcQqWMKEJE4UF5RyahxeTRtnMLvzjlMQ1cSE7SYokgc+Ot7S/ls5WYeu2gQ7TJSwy5HBFAPRCTmLVy7hT/8exHfPqwj3z28Y9jliOymABGJYaXllfzspTxaNG3EvWcN0NCVxBQNYYnEsMffWcL8NVv46w+H0DqtcdjliOxBPRCRGDU7fzN/emcJ5wzqzGn9O4RdjsjXKEBEYtCOsgpGjcujbXoT7vpu/7DLEdknDWGJxKBHpi5i8fptPHP5EbRo1ijsckT2ST0QkRgz48tCxry/lIuGdmN4n3ZhlyPyjRQgIjGkpLScUePy6NyyKXd8+9CwyxHZLw1hicSQB976nOUbSxh71ZGkN9F/T4lt6oGIxIgPv9jAMx8u57KjsziqZ5uwyxGplgJEJAZs3VHGLeNnk52Zxq0j+oZdjkiNqI8sEgN++8YC1hRtZ/y1R9G0cUrY5YjUiHogIiF75/P1jP1kJVcd34Mh3VuHXY5IjSlAREJUVFLG6AmzOaR9Oj89+ZCwyxE5IBrCEgnRrybPY8O2Uv7vkiNIbaShK4kv6oGIhOStuWt5ZdYqrj+hF4d1aRF2OSIHTAEiEoKN23Zyxytz6N8pg+tP7BV2OSK1oiEskXrm7tz5z7ls3VHO81fl0ChFn+MkPumdK1LPJuWt5s25a7nplN707ZARdjkitaYAEalH67fs4JevzmNQt5ZcfVyPsMsROSgKEJF64u6MnjiHHWUVPHh+Dg01dCVxTu9gkXoyfkY+by9cz60j+tKzbXrY5YgcNAWISD1YtqGYeyfPZ1h2ay47OivsckSiQgEiUsden72GM//4Xxo0MH5/Xg4NGljYJYlEhabxitSRHWUV3PPafF74eAWDurXksQsH0bV1s7DLEokaBYhIHVi8bivXvzCLz9dt5dpv9WTUqYfo+x6ScBQgIlHk7oyfns8vJ80lvUlDnrtiKMcf0jbsskTqhAJEJEq27ijjjlfmMilvNcf0asMj3xtIu+apYZclUmcUICJRMDt/MzeMnUX+pu3cclofrv1WT1J0slwSnAJE5CC4O09/sJz73lxA2/QmvHT1keRm6Y9CSXJQgIjUUmFxKbeMz2PawvWc2q89D5x3OC2bNQ67LJF6owARqYWPl27kxhc/o7C4lLvP7M8lR3XHTENWklwUICIHoKLSefztJTw6bRHd26Qx8dKjGdBZfwxKkpMCRKSG1m3ZwY0vzuKjpYWcM6gz95w1gPQm+i8kyUvvfpEaeGfhekaNz2NHWQUPnZ/DuUO6hF2SSOgO6quxZrbczOaY2WdmNj1oa21mU81scXDdKmg3M3vMzJaY2WwzG1zlOJcG+y82s0urtA8Jjr8kuK8GmaVelZZX8pvX53P5M5/SPiOVyTccq/AQCURjbYUT3H2gu+cGP48Gprl7b2Ba8DPA6UDv4HI18AREAge4CxgGDAXu2hU6wT5XVbnfiCjUK1IjKzaWcP5fPuTJ95dxyVHdeeW6o7UMu0gVdTGENRIYHtx+FngXuDVof87dHfjIzFqaWcdg36nuXghgZlOBEWb2LpDh7h8F7c8BZwFv1kHNInuYnLea2yfOwQz+cvFgRgzoGHZJIjHnYAPEgX+ZmQN/dfcxQHt3XxNsXwu0D253BlZWuW9+0La/9vx9tH+NmV1NpFdDt27dDub5SJLbXlrBPa/NY+wnKxncrSWPXTSILq20gq7IvhxsgBzr7qvMrB0w1cwWVt3o7h6ES50KgmsMQG5ubp0/niSmReu2cv0LM1m8fhvXDe/JT0/RCroi+3NQAeLuq4Lr9Wb2CpFzGOvMrKO7rwmGqNYHu68Cula5e5egbRVfDXntan83aO+yj/1FosrdefHTldw9ed7uFXSP660VdEWqU+uPV2aWZmbNd90GTgXmApOAXTOpLgVeDW5PAi4JZmMdCRQFQ11TgFPNrFVw8vxUYEqwbYuZHRnMvrqkyrFEomLLjjJuGDuL2ybO4Yis1rxx43EKD5EaOpgeSHvglWBmbUPgBXd/y8w+BcaZ2ZXAl8AFwf5vAGcAS4AS4HIAdy80s3uBT4P97tl1Qh24DngGaErk5LlOoEvU5K2MrKC7avN2fj6iD9ce31N/blbkAFhkUlTiyM3N9enTp4ddhsSwykrnqf8u4/63FtI+I5XHLhrIkO5aQVeSm5nNqPJ1jBrRN9ElqWzctpObx+fxzucFnNa/PQ+cm0OLZo3CLkskLilAJGn874uN3PTSLDaVlHHvyP5cfKRW0BU5GAoQSXgVlc6j0xbzx7cXk90mjacvO4L+nbSCrsjBUoBIQltTtJ0bX/yMT5YVcu7gLtwzsj9pWkFXJCr0P0kS1rQF67h5fB47yyt5+IIczhmsRRBFokkBIgmntLyS+95cyNMfLKNfxwwe//4gemgRRJGoU4BIQlm+oZgbxs5izqoiLjs6i9Gn9yW1UUrYZYkkJAWIJIxXP1vFHa/MJaWB8dcfDuG0/h3CLkkkoSlAJO6VlJbzq0nzGDc9n9zurXj0okF0btk07LJEEp4CROLawrVbuP6FWXxRsI0fn9CTn558CA21gq5IvVCASFxyd174ZAX3TJ5P89RG/P2KYRzbOzPsskSSigJE4k7R9jJunziH1+es4bjemTx8wUDaNm8SdlkiSUcBInFl1opN3DB2FmuKdnDriL5cc3wPraArEhIFiMSFykrnyfeX8vspn9M+I5Vx1xzFkO6twi5LJKkpQCTmbdi2k1Hj8vjPogJG9O/A/ecerhV0RWKAAkRi2odLNnDTS5+xeXsZ9541gIuHddMKuiIxQgEiMam8opLHpi3mj+8soUdmGs9eMZRDO2aEXZaIVKEAkZizevN2bnrxMz5ZXsh5QyIr6DZrrLeqSKzR/0qJKVPnr+OWl/MoK6/kke/lcPYgraArEqsUIBITdpZXcN+bC/nbB8vp3ymDx78/mOzMtLDLEpH9UIBI6JZtKOaGsTOZu2oLlx2dxW1n9KVJQ62gKxLrFCASqn/OWsUdr8yhUcMGPHlJLqf0ax92SSJSQwoQCUVJaTl3vTqP8TPyOSKrFY9eOIhOWkFXJK4oQKTeLVizhetfmMnSDcXccGIvbjypt1bQFYlDChCpN+7O8x+v4J7X5tOiaSOev3IYR/fSCroi8UoBIvWiaHsZoyfM5s25azn+kLY8fEEOmelaQVcknilApM7NXLGJG16YxbotO7jt9L5cdZxW0BVJBAoQqTOVlc6Y95fy4JTP6dAilfHXHsWgblpBVyRRKECkThRs3cnPxn3G+4s3cMZhHfjdOYfToqlW0BVJJAoQiboPghV0t2wv4zdnD+D7Q7WCrkgiUoBI1JRXVPKHfy/mT+8uoWfbdP5+5VD6dtAKuiKJSgEiUbFq83ZuHDuL6V9u4oLcLvzqTK2gK5Lo9D9cDtqUeWv5+cuzKa+o5NELBzJyYOewSxKReqAAkVrbURZZQfeZD5czoHMGj180mCytoCuSNBQgUitLC7Zx/QuzmL9mC1cck82tp/fRCroiSUYBIgds4sx87vznXBo3bMD/XZLLyVpBVyQpKUCkxop3lvPLV+cxYWY+Q7Na8+hFA+nYQivoiiQrBYjUyPzVW7h+7EyWbSjmJyf15icn9tIKuiJJLuZ/A5jZCDP73MyWmNnosOtJNu7Oc/9bzll//oBtO8p5/kfD+Nkphyg8RCS2eyBmlgL8CTgFyAc+NbNJ7j4/3MqSQ1FJGT+fkMeUeesY3qctD52fQxutoCsigZgOEGAosMTdlwKY2YvASOAbA2TRuq2c/PB/9ntQd6/2gavfo6Y71Wy3aNVUg8MEx6p+x80lZWwvreCOMw7lymOztYKuiOwh1gOkM7Cyys/5wLC9dzKzq4GrATI69aBP++bVH7kGvwtr8uuypms81exY0TlO5FjV71ndHg1TjO8P687Ari1r+KgikkxiPUBqxN3HAGMAcnNz/U8/GBxyRSIiiS/Wz4SuArpW+blL0CYiIiGL9QD5FOhtZtlm1hi4EJgUck0iIkKMD2G5e7mZXQ9MAVKAp919XshliYgIMR4gAO7+BvBG2HWIiMieYn0IS0REYpQCREREakUBIiIitaIAERGRWrGaLKERT8ysCFgcdh0JpAVQFHYRCUKvZXTp9Yyu3u7e4kDuEPOzsGrhJXe/OuwiEoWZjdHrGR16LaNLr2d0mdmYA71PIg5hTQ67gASj1zN69FpGl17P6Drg1zPhhrBERKR+JGIPRERE6oECREREaiXuAsTM3Mz+UeXnhmZWYGavhVlXPDOzs4LXtW/YtcQrvS/rjpltC7uGRFPda2pm75pZbnXHibsAAYqBAWbWNPj5FA5wiXczS8TZZwfjIuC/wXWNBX9yWCIO+n0pEm/iMUAgsrjit4PbFwFjd20ws6Fm9j8zm2VmH5pZn6D9MjObZGZvA9Pqv+TYZGbpwLHAlUSWy8fMhpvZe2b2upl9bmZ/MbMGwbZtZvaQmeUBR4VXeUyqzfvyPTMbWGW//5pZTr1WHQeC9+RrVX5+3MwuC24vN7O7zWymmc1RT7pm9vea1lS8BsiLwIVmlgocDnxcZdtC4Dh3HwT8EvhtlW2DgfPc/Vv1VmnsGwm85e6LgI1mNiRoHwrcAPQDegLnBO1pwMfunuPu/633amNbbd6XTwGXAZjZIUCqu+fVW8WJY4O7DwaeAG4Ou5hkEZcB4u6zgSwin/L2Xuq9BTDezOYCjwD9q2yb6u6F9VJk/LiIyC8+gutdw1ifuPtSd68g8kn62KC9AphQvyXGh1q+L8cD3zGzRsAVwDP1UmzimRhczyDybyD1IJ7PBUwCHgSGA22qtN8LvOPuZ5tZFvBulW3F9VRbXDCz1gN7uG4AAALxSURBVMCJwGFm5kT+aJcDrwfXVe36eUcQKrJvB/S+dPcSM5tKpCd4ATAE2Zdy9vzAm7rX9p3BdQXx/XutPlX3mlYrLnsggaeBu919zl7tLfjq5OVl9VpR/DkP+Lu7d3f3LHfvCiwDjgOGBn9KuAHwPSIn2aV6tXlf/h/wGPCpu2+q2/Li1pdAPzNrYmYtgZPCLigBHPRrGrcB4u757v7YPjY9APzOzGahTyLVuQh4Za+2CUH7p8DjwAIiobL3frIPtXlfuvsMYAvwt3ooMa4EMyZ3uvtKYBwwN7ieFWphcSyar6mWMpGvMbPhwM3u/p2wa0kGZtaJyJBWX3evDLmcmBLMSHvS3YeGXUuiiOZrGrc9EJFEYGaXEJmtdYfCY09mdi2RCRx3hl1Looj2a6oeiIiI1Ip6ICIiUisKkCRjZl3N7B0zm29m88zsxqC9tZlNNbPFwXWroL1v8A3qnWZ2817H+mlwjLlmNjb4Ap2IJAkFSPIpB0a5ez/gSODHZtYPGA1Mc/feRJZ6GR3sXwj8hMh3G3Yzs85Be667DyDyHZIL6+cpiEgsUIAkGXdf4+4zg9tbiUzT7Uzki2zPBrs9C5wV7LPe3T8FyvZxuIZA02BaYDNgdR2XLyIxRAGSxIJvRA8iMguovbuvCTatBdrv777uvopIr2QFsAYocvd/1VmxIhJzFCBJKliFdwJwk7tvqbrNI1Pz9js9LzhHMhLIBjoBaWZ2cR2VKyIxSAGShIKF+yYAz7v7rkXo1plZx2B7R2B9NYc5GVjm7gXuXkZkMbuj66pmEYk9CpAkY2ZGZAnxBe7+cJVNk4BLg9uXAq9Wc6gVwJFm1iw45klEzqeISJLQFwmTjJkdC7wPzAF2ffP5diLnQcYB3YgssnaBuxeaWQdgOpAR7L8N6OfuW8zsbiILLZYTWUfnR+6+ExFJCgoQERGpFQ1hiYhIrShARESkVhQgIiJSKwoQERGpFQWIiIjUigJERERqRQEiIiK18v8BisJZxPsO/AQAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plot_dates(df_user_item_train['event_dttm']).plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2018-08-01    172049\n",
       "Name: event_dttm, dtype: int64"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "plot_dates(df_user_item_valid['event_dttm'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.7333359165517135"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df_user_item_train) / (len(df_user_item_train) + len(df_user_item_valid))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_user_item_train = df_user_item_train.sort_values('event_dttm')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "split_pos = int(len(df_user_item_train) * 0.75)\n",
    "df_user_item_train, df_user_item_test = df_user_item_train.iloc[:split_pos], df_user_item_train.iloc[split_pos:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x7fed958853c8>"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZQAAAD+CAYAAAAd3fMoAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3deXxU1fnH8c9DQtghBMIOguyLrAFxrdYN1BasilB/AmpF64ZorVRrtWqtS9VC61JUtqpsrmhRirjWlSBh38KiBFkCgbAGSPL8/rgndkyTkMDM3DuT5/16zWtmzl3mO4chZ+69Z84RVcUYY4w5XlX8DmCMMSY+WINijDEmLKxBMcYYExbWoBhjjAkLa1CMMcaERaLfAfzSsGFDbd26td8xjDEmpixcuHCHqqaWtKzSNiitW7cmPT3d7xjGGBNTROTb0pbZKS9jjDFhEbYGRURaisiHIrJCRJaLyGhXniIi80Rkrbuv78pFRMaLSKaILBGR3iH7GuHWXysiI0LK+4jIUrfNeBGRsl7DGGNM9ITzCCUfuENVuwD9gZtEpAswFpivqu2B+e45wECgvbuNAp4Fr3EA7gNOBvoB94U0EM8C14VsN8CVl/YaxhhjoiRsDYqqblHVb9zjvcBKoDkwCJjiVpsCDHaPBwFT1fMlkCwiTYELgHmqmqOqu4B5wAC3rK6qfqneeDFTi+2rpNcwxhgTJRG5hiIirYFewFdAY1Xd4hZtBRq7x82BTSGbZbmyssqzSiinjNcwxhgTJWFvUESkNvAacJuq7gld5o4sIjoaZVmvISKjRCRdRNKzs7MjGcMYYyqdsDYoIlIVrzF5WVVfd8Xb3Okq3P12V74ZaBmyeQtXVlZ5ixLKy3qNH1HVCaqapqppqakldqM2xsSZIwWFvP5NFrkHj/gdJe6Fs5eXAC8CK1X1yZBFs4GinlojgLdCyoe73l79gVx32moucL6I1HcX488H5rple0Skv3ut4cX2VdJrGGMquXHvr+X2mYsZNTWdQ/kFfseJa+E8QjkNuAr4qYhkuNuFwCPAeSKyFjjXPQeYA6wHMoHngRsBVDUHeBBY4G4PuDLcOi+4bdYB77ry0l7DGFOJfbl+J09/lEmPlsl8tSGHu15dgs0BFTlSWSs3LS1N7ZfyxsSv3ANHGDDuE6pXTeCdW05n8ucbeXzuam79aTtuP7+j3/FilogsVNW0kpZV2qFXjDHxS1X53RtLyN57iNdvPJVa1RK58ay2fLfzAOM/yKRF/ZoM6dvy6DsyFWINijEm7sxKz2LO0q3cNaAT3VskAyAiPHRJN77PPcjdbyylaXJ1zmhvnXPCycbyMsbElfXZ+7j/7eWc2rYB15954o+WVU2owtNX9qZdo9rc+NI3rNq6p5S9mGNhDYoxJm4czi9k9PQMkhKr8OSQnlSpIv+zTt3qVZk4si81qyVwzaQFbNuT50PS+GQNijEmbjw5bw1LN+fyyC+606Re9VLXa5Zcg4kj+5J78AjXTF7A/kP5UUwZv6xBMcbEhc8zd/CPT9YxrF8rBnRrctT1uzarx9+v7M2qrXu5+ZVvyC8ojELK+GYNijEm5u3af5gxMzNo07AW917cudzbnd2xEQ8M6sqHq7O5b/Zy+43KcbJeXsaYmKaqjH19CTn7D/PiiL7UTKrYn7UrTz6B73IO8I+P13NCg5qMOrNthJLGP2tQjDExbdrXm5i7fBv3XNiZbs3rHdM+7rqgE1k5B3l4ziqaJ9fkou5Nw5yycrAGxRgTszK37+OBd5ZzRvuGXHt6m2PeT5UqwhNDerB1Tx5jZmbQpF41+pyQEsaklYNdQzHGxKRD+QWMnr6ImkmJPHF5jxK7CFdE9aoJPD88jWb1qnPd1IVs3LE/TEkrD2tQjDEx6S9zV7P8+z08dml3GtUtvYtwRaTUSmLS1f1QVa6evICc/YfDst/KwhoUY0zM+XRtNs9/uoGr+p/AuV3CO0Frm4a1eH54Gpt3H2TU1HTyjtiQ9+VlDYoxJqbs3HeI22cupn2j2txzUfm7CFdEWusUnhzSg/Rvd/GbWYspLLTuxOURzgm2JorIdhFZFlI2I2RulI0ikuHKW4vIwZBlz4Vs00dElopIpoiMd5NpISIpIjJPRNa6+/quXNx6mSKyRER6h+s9GWOCRVW567Ul5B48wvhhvaheNSFir3Vx92aMHdiJd5Zs4fF/r47Y68STcB6hTAYGhBao6hWq2lNVe+JNDfx6yOJ1RctU9YaQ8meB64D27la0z7HAfFVtD8x3zwEGhqw7ym1vjIlDL331He+v3M7YAZ3o3LRuxF/v+jNP5MqTW/HsR+t45avvIv56sS5sDYqqfgLklLTMHWUMAaaVtQ83H3xdVf1SvZ+sTgUGu8WDgCnu8ZRi5VPV8yWQXDS/vDEmfqzZtpeH3lnBWR1Tufq01lF5TRHhjz/vylkdU7n3rWV8uHp7VF43VkXrGsoZwDZVXRtS1kZEFonIxyJyhitrDmSFrJPlygAau3nlAbYCjUO22VTKNj8iIqNEJF1E0rOzs4/j7RhjoinvSAG3TltEneqJPH5ZD9yZ8KhITKjC33/Zm46N63Dzy9+w/PvcqL12rIlWgzKMHx+dbAFaqWov4HbgFREp9/GrO3qp8FUyVZ2gqmmqmpaaahPrGBMrHn1vFau27uXxy3qQWqda1F+/drVEJo7sS90aVblm8gK25B6MeoZYEPEGRUQSgV8AM4rKVPWQqu50jxcC64AOwGagRcjmLVwZwLaiU1nuvujYczPQspRtjDEx7sPV25n02UZGntqaszs18i1Hk3rVmTiyL/sPFXD1pAXszTviW5agisYRyrnAKlX94VSWiKSKSIJ7fCLeBfX17pTWHhHp7667DAfecpvNBka4xyOKlQ93vb36A7khp8aMMTEse+8h7py1mE5N6jB2YCe/49C5aV2eubI3a7fv46ZXFnHEhrz/kXB2G54GfAF0FJEsEbnWLRrK/16MPxNY4roRvwrcoKpFF/RvBF4AMvGOXN515Y8A54nIWrxG6hFXPgdY79Z/3m1vjIlxqspvX13M3rx8xg2NbBfhijizQyoPX9KNT9Zkc++by2zI+xBhGxxSVYeVUj6yhLLX8LoRl7R+OtCthPKdwDkllCtwUwXjGmMCbsrnG/lwdTYPDOpKxyZ1/I7zI1f0bcWmnIP8/cNMWqbU5Kaz2/kdKRBstGFjTOCs2rqHh99dxTmdGnFV/xP8jlOiO87vwKZdB3h87mpa1K/BoJ4ldi6tVKxBMcYESlEX4Xo1qvLYZd2j2kW4IkSExy7rzpbdedw5awlN69WgX5vKPeS9jeVljAmUh+esZM22fTxxeQ8a1I5+F+GKqJaYwIThfWiRUoNR/0xnXfY+vyP5yhoUY0xgzF+5jalffMuvTm/DmR1i47diyTWTmDyyHwkiXD1pATv3HfI7km+sQTHGBML2PXnc+eoSujSty50DOvodp0JaNajJCyPS2LYnj19V4iHvrUExxviusFC5Y9ZiDhzOZ/ywnlRLDEYX4Yro1ao+44b2JGPTbm6bnlEph7y3BsUY47uJn23g07U7uPfiLrRrFKwuwhUxoFtT7rmwM+8t38qf313pd5yos15exhhfLf8+l8feW815XRrzy36t/I5z3K49vQ2bcg7w/KcbaJlSk+GntPY7UtRYg2KM8c3Bw14X4fq1qvLopcHtIlwRIsIfftaVzbsPcv/s5TRPrsE5ncM7TXFQ2SkvY4xvHvrXCtbv2M+TQ3qSUivJ7zhhk1BFGD+sF12b1ePmVxaxNKtyDHlvDYoxxhdzl2/l5a++Y9QZJ3Jau4Z+xwm7mkmJvDgyjZRaSVwzZQFZuw74HSnirEExxkTd1tw87nptCd2a1+WO82Ori3BFNKpTnUlX9yXvSAHXTF5A7sH4HvLeGhRjTFR5XYQzOHSkkHFDe5GUGN9/hjo0rsM//q8P67P38+uXFnI4P36HvI/vf0ljTOA8/+l6Psvcyf0/70Lb1Np+x4mKU9s15JFLu/P5up3c/cbSuB3yPpzzoUwUke0isiyk7H4R2SwiGe52Yciy34lIpoisFpELQsoHuLJMERkbUt5GRL5y5TNEJMmVV3PPM93y1uF6T8aY8Fqalcvjc1czsFsThqS1PPoGceSyPi247dz2vLowi/HzM/2OExHhPEKZDAwoofwpVe3pbnMARKQL3sRbXd02z4hIgpvF8WlgINAFGObWBXjU7asdsAsomsDrWmCXK3/KrWeMCZgDh/MZPX0RqXWq8edfnBQXXYQravQ57flF7+Y89f4aXluYdfQNYkzYGhRV/QTIOeqKnkHAdDe3/Aa82Rb7uVumqq5X1cPAdGCQmw74p3izOwJMAQaH7GuKe/wqcI5Uxk+qMQH3wNsr2LDT6yKcXDN+ughXhIjwyC+6c8qJDRj7+hI+X7fD70hhFY1rKDeLyBJ3Sqy+K2sObApZJ8uVlVbeANitqvnFyn+0L7c8163/P0RklIiki0h6dnb28b8zY0y5vLt0C9MXbOLXP2nLKW1L/O9ZaSQlVuG5q/rQukEtrv/nQtZu2+t3pLCJdIPyLNAW6AlsAZ6I8OuVSVUnqGqaqqalpsbG0NjGxLrvdx9k7OtL6dGiHmPO6+B3nECoV6MqE0f2pVpiAiMnLWD73jy/I4VFRBsUVd2mqgWqWgg8j3dKC2AzEHpFroUrK618J5AsIonFyn+0L7e8nlvfGOOzgkJlzIwM8gu8LsJVE6xjaZGWKTWZODKNnP2H+dWUdA4czj/6RgEX0X9dEWka8vQSoKgH2GxgqOuh1QZoD3wNLADaux5dSXgX7mer18fuQ+Ayt/0I4K2QfY1wjy8DPtB47ZNnTIx57uN1fLUhhz8O6kbrhrX8jhM43Vsk87dhvVi2OZdbp2VQEOND3oez2/A04Augo4hkici1wGMislRElgBnA2MAVHU5MBNYAbwH3OSOZPKBm4G5wEpgplsX4C7gdhHJxLtG8qIrfxFo4MpvB37oamyM8U/Gpt08NW8NF3dvyqW9mx99g0rq3C6Nue9nXXl/5TYefGeF33GOi1TWL/NpaWmanp7udwxj4tK+Q/lcNP5T8guUOaPPoF6Nqn5HCrwH31nBi//ZwB8u7sI1p7fxO06pRGShqqaVtMyGrzfGhN39s5ezKecA00edYo1JOd19YWeydh3gwX+toHn9GlzQtYnfkSrMrpAZY8Lq7cXf8+rCLG4+ux392qT4HSdmJFQR/npFL7q3SGb09EVkbNrtd6QKswbFGBM2WbsOcPcbS+ndKplbz2nvd5yYUyMpgRdHpJFapxq/mrKATTmxNeS9NSjGmLDILyhkzIwMVGHc0F4kWhfhY9KwdjUmjezHkQJlxKSv2X3gsN+Rys3+xY0xYfHMR+tYsHEXDw7uSsuUmn7HiWntGtVmwlV9yMo5yPX/XMih/AK/I5WLNSjGmOO28NtdjJu/lsE9m3FJrxZ+x4kLJ5/YgMcv785XG3K469UlMTHkvfXyMsYcl715R7htxiKa1qvOA4O7+R0nrgzq2ZxNOQf4y7/X0CqlJrcHfHZLa1CMMcflD28t5/vdecy8/hTqVrcuwuF209nt2JRzkPEfZNIipWag55GxBsUYc8zeXLSZNxZtZsy5HehzQv2jb2AqTER46JJufJ97kLtfX0qzejU4vX1Dv2OVyK6hGGOOyaacA/z+zWWknVCfm85u63ecuFY1oQpPX9mbdo1q8+uXFrJq6x6/I5XIGhRjTIXlFxQyevoiROCvQ3taF+EoqFvdG/K+RlIC10xawLY9wRvy3j4FxpgKG/9BJt98t5s/XXISLepbF+FoaZZcg4kj+7L74BGumbyA/YeCNeS9NSjGmApZsDGHv3+wlkt7t+DnPZr5HafS6da8Hk//sjcrt+zhlmmLyC8o9DvSD8I5fP1EEdkuIstCyh4XkVVuCuA3RCTZlbcWkYMikuFuz4Vs08cNeZ8pIuOL5ocXkRQRmScia919fVcubr1M9zq9w/WejDE/lnvwCLdNz6BlSk3+OKir33EqrbM7NeKBQd34YNV27n97eWB+oxLOI5TJwIBiZfOAbqraHVgD/C5k2TpV7eluN4SUPwtchzfpVvuQfY4F5qtqe2A+/533ZGDIuqPc9saYMFNVfv/mMrbuyeOvV/SkdjXrJOqn/+t/Atf/5ERe+vI7nv90vd9xgDA2KKr6CZBTrOzfbtIsgC/xpu4tlZvhsa6qfulmXZwKDHaLBwFT3OMpxcqnqudLvKmCQ2eKNMaEwevfbObtxd9z+3kd6NXKuggHwV0XdOKik5ry8JxV/GvJFr/jRPUayjXAuyHP24jIIhH5WETOcGXNgayQdbJcGUBjVS2qsa1A45BtNpWyzY+IyCgRSReR9Ozs7ON4K8ZULht37OcPby3j5DYp3PAT6yIcFFWqCE8M6UGfE+ozZmYGC7/NOfpGkcwTjRcRkXuAfOBlV7QFaKWqvfCm7X1FROqWd3/u6KXCJw1VdYKqpqlqWmpqakU3N6ZSOlJQyOgZGSRUEZ66oicJVcTvSCZE9aoJPD88jWb1qnPd1IVs3LHftywRb1BEZCRwMXClawhQ1UOqutM9XgisAzoAm/nxabEWrgxgW9GpLHe/3ZVvBlqWso0x5jj99f01LN60m0cu7U6z5Bp+xzElSKmVxKSr+6GqXD15ATn7/RnyPqINiogMAH4L/FxVD4SUp4pIgnt8It4F9fXulNYeEenvencNB95ym80GRrjHI4qVD3e9vfoDuSGnxowxx+HL9Tt55qN1DElrwYUn2aXJIGvTsBbPD09j8+6DjJqaTt6R6A95H85uw9OAL4COIpIlItcCfwfqAPOKdQ8+E1giIhnAq8ANqlp08u9G4AUgE+/Ipei6yyPAeSKyFjjXPQeYA6x36z/vtjfGHKfdBw4zZkYGrRvU4r6fWRfhWJDWOoUnh/Qg/dtd/GbWYgoLo9udOGz9/lR1WAnFL5ay7mvAa6UsSwf+Zwxsd4rsnBLKFbipQmGNMWVSVe5+YynZew/x+o2nUsu6CMeMi7s3I2vXQR55dxUtU2py14BOUXtt+5QYY/7HrPQs5izdytiBnejeItnvOKaCrj/zRL7LOcCzH62jZf2a/PLkVlF5XWtQjDE/sj57H/fNXs6pbRsw6owT/Y5jjoGI8MDPu7J510HufWsZzZKrc1bHRhF/XRvLyxjzg8P5hYyenkG1qlV4ckhPqlgX4ZiV6Ia879i4Dje9/A3Lv8+N+Gtag2KM+cET81azdHMuj17anSb1qvsdxxyn2tUSmTiyL3VrVOWayQvYknswoq9nDYoxBoDPM3cw4ZP1DOvXigu6NvE7jgmTJvWqM3FkX/YfKuDqSQvYm3ckYq9lDYoxhl37DzNmZgYnNqzFvRd39juOCbPOTevyzJW9Wbt9Hze9sogjERry3hoUYyo5VeWu15aQs/8w44b2omaS9dWJR2d2SOVPg7vxyZps7n1zWUSGvLdPjjGV3LSvN/HvFdv4/UWd6da8nt9xTAQN7deKTbsO8PSH62iZUpObzm4X1v1bg2JMJZa5fS8PvLOcM9o35JrT2vgdx0TBb87vSNaugzw+dzUt6tdgUM8SB2c/JtagGFNJHcov4NZpGdRMSuSJy3tYF+FKQkR47LLubNmdx52zltC0Xg36tUkJy77tGooxldTj761mxZY9PHZpdxrVtS7ClUm1xAQmDO9Di5QajPpnOuuy94Vlv9agGFMJfbImmxf+s4Gr+p/AuV0aH30DE3eSayYxeWQ/EkS4etICdu47dNz7tAbFmEpm575D3DFrMe0b1eaei6yLcGXWqkFNnh+RxrY9efwqDEPeW4NiTCWiqvz21SXkHjzC+GG9qF41we9Ixme9W9Vn3NCeZGzazW3TM45ryPuwNigiMlFEtovIspCyFBGZJyJr3X19Vy4iMl5EMkVkiYj0DtlmhFt/rYiMCCnvIyJL3Tbj3SRcpb6GMebHXvryW+av2s7YAZ3o3LTcs26bODegW1PuubAz7y3fyp/fXXnM+wn3EcpkYECxsrHAfFVtD8x3zwEG4s3U2B4YBTwLXuMA3AecDPQD7gtpIJ4FrgvZbsBRXsMY46zZtpeH/rWSszqmcvVprf2OYwLm2tPbMOKUE3j+0w1M/WLjMe0jrA2Kqn4C5BQrHgRMcY+nAINDyqeq50sg2c0VfwEwT1VzVHUXMA8Y4JbVVdUv3aRaU4vtq6TXMMYAeUcKuHXaIupUT+Txy3rgDu6N+YGI8IefdeXczo24f/Zy5q/cVuF9ROMaSuOQOd63AkVdSpoDm0LWy3JlZZVnlVBe1mv8iIiMEpF0EUnPzs4+xrdjTOx59L1VrNq6l8cv60FqnWp+xzEBlVBFGD+sF12b1ePmVxaxNKtiQ95H9aK8O7KI6CTHZb2Gqk5Q1TRVTUtNTY1kDGMC48PV25n02UZGntqasztFfpIlE9tqJiXy4og0Umolcc2UBWTtOlDubaPRoGxzp6tw99td+WagZch6LVxZWeUtSigv6zWMqdSy9x7izlmL6dSkDmMHRm9ucRPbGtWtzqSr+5J3pIBrJi8g92D5hryPRoMyGyjqqTUCeCukfLjr7dUfyHWnreYC54tIfXcx/nxgrlu2R0T6u95dw4vtq6TXMKbSUlXufHUxe/PyGTfUugibiunQuA7/+L8+rM/ez69fWsjh/KMPeR/ubsPTgC+AjiKSJSLXAo8A54nIWuBc9xxgDrAeyASeB24EUNUc4EFggbs94Mpw67zgtlkHvOvKS3sNYyqtyZ9v5KPV2dxzUWc6NqnjdxwTg05t15BHLu3O5+t2cvcbS4865H1YB4dU1WGlLDqnhHUVuKmU/UwEJpZQng50K6F8Z0mvYUxltXLLHv787irO6dSIq/qf4HccE8Mu69OCTTkHGDd/La1Sapa5ro02bEycKeoiXK9GVR67rLt1ETbH7bZz27Mp5wBPzltT5nrWoBgTZx6es5K12/cx9Zp+NKhtXYTN8RMRHrm0O1ty8/i2jPVsLC9j4sj7K7Yx9Ytv+dXpbTizg3WNN+GTlFiFSVf3LXMda1CMiRPb9+Tx29eW0KVpXe4c0NHvOCYOHa2noDUoxsSBwkLljlmLOXA4n/HDelIt0boIm+izBsWYODDxsw18unYH917chXaNrIuw8Yc1KMbEuGWbc3n0vVWc36Uxv+zXyu84phKzBsWYGHbwcAGjpy8ipVYSj15qXYSNv6zbsDEx7MF/rWD9jv28dO3J1K+V5HccU8nZEYoxMeq9ZVt55avvGHXmiZzWrqHfcYyxBsWYWLQ1N4+xry/hpOb1uOM86yJsgsEaFGNiTGGhcvvMDA4dKWTc0J4kJdp/YxMMdg3FmBgz4dP1fL5uJ49eehInptb2O44xP4j4VxsR6SgiGSG3PSJym4jcLyKbQ8ovDNnmdyKSKSKrReSCkPIBrixTRMaGlLcRka9c+QwRsauTJi4tydrNX+auZmC3JgxJa3n0DYyJoog3KKq6WlV7qmpPoA9wAHjDLX6qaJmqzgEQkS7AUKArMAB4RkQSRCQBeBoYCHQBhrl1AR51+2oH7AKujfT7Miba9h/KZ/T0DFLrVOPPvzjJugibwIn2yddzgHWqWtaAlYOA6ap6SFU34E2m1c/dMlV1vaoeBqYDg9zsjT8FXnXbTwEGR+wdGOOTB95ewcad+3nqip4k17SDcBM80W5QhgLTQp7fLCJLRGSim+4XoDmwKWSdLFdWWnkDYLeq5hcrNyZuzFm6hRnpm7jxrLb0P7GB33GMKVHUGhR3XePnwCxX9CzQFugJbAGeiEKGUSKSLiLp2dnZkX45Y8Li+90HGfvaEnq0TOa2czv4HceYUkXzCGUg8I2qbgNQ1W2qWqCqhXhzyvdz620GQq82tnBlpZXvBJJFJLFY+f9Q1QmqmqaqaampNleECb6CQmXMjAwKCpVxV/SkaoJ1ETbBFc1P5zBCTneJSNOQZZcAy9zj2cBQEakmIm2A9sDXwAKgvevRlYR3+my2m5v+Q+Ayt/0I4K2IvhNjouS5j9fx1YYc/jioG60b1vI7jjFlisrvUESkFnAecH1I8WMi0hNQYGPRMlVdLiIzgRVAPnCTqha4/dwMzAUSgImqutzt6y5guog8BCwCXoz4mzImwhZ9t4sn563h4u5NubS3XRY0wSfeF/zKJy0tTdPT0/2OYUyJ9h3K58Jxn1JQqMwZfQb1alT1O5IxAIjIQlVNK2mZ/VLemAC6763lZO06wIzrT7HGxMQMu8JnTMDMXvw9r32Txc1nt6Nv6xS/4xhTbtagGBMgi77bxT1vLKV3q2RuPae933GMqRA75WVMABQUKs99vI4n562hSd3qjBvai0TrImxijDUoxvhsS+5BxszI4Mv1OfysRzMeGtzNrpuYmGQNijE+em/ZFu56bSlHCgr5y+U9uLR3cxv00cQsa1CM8cGBw/k8+M5Kpn39Hd1b1GPc0F60sR8umhhnDYoxUbb8+1xunbaI9Tv2c8NP2nL7eR1s1kUTF6xBMSZKCguVSZ9v5NF3V5FcsyovXXsyp7Vr6HcsY8LGGhRjoiB77yF+M2sxH6/J5tzOjXnssu6k1LI5TUx8sQbFmAj7aPV2fjNrMXvz8nlwcDf+7+RWduHdxCVrUIyJkEP5BTz67momfraBTk3q8Mp1/enQuI7fsYyJGGtQjImAzO17uWVaBiu37GHkqa0ZO7AT1asm+B3LmIiyBsWYMFJVpn29iQfeWU7NpEReHJHGOZ0b+x3LmKiI5hTAG0VkqYhkiEi6K0sRkXkistbd13flIiLjRSTTzTnfO2Q/I9z6a0VkREh5H7f/TLetnaQ2UbVr/2FueGkhd7+xlL6tU3hv9BnWmJhKJdqd389W1Z4hY+mPBearantgvnsO3nTB7d1tFN7884hICnAfcDLelMH3FTVCbp3rQrYbEPm3Y4zni3U7GTjuUz5YtZ17LuzMlKv70ahudb9jGRNVfv+aahAwxT2eAgwOKZ+qni/x5oxvClwAzFPVHFXdBcwDBrhldVX1Szcl8NSQfRkTMUcKCnl87ip++cKX1ExK4I0bT+O6M0+kShU7QDaVTzSvoSjwb9/fi0MAABXuSURBVBFR4B+qOgForKpb3PKtQNH5gebAppBts1xZWeVZJZT/iIiMwjvioVWrVsf7fkwl9+3O/dw6PYPFm3ZzRVpL/vCzLtSqZpclTeUVzU//6aq6WUQaAfNEZFXoQlVV19hEjGvEJoA3BXAkX8vEtzcWZXHvm8upIvD0L3tzUfemfkcyxndRa1BUdbO73y4ib+BdA9kmIk1VdYs7bbXdrb4ZaBmyeQtXthk4q1j5R668RQnrGxNWe/OOcO+by3gz43v6tU7hqaE9aZ5cw+9YxgRCVK6hiEgtEalT9Bg4H1gGzAaKemqNAN5yj2cDw11vr/5Arjs1Nhc4X0Tqu4vx5wNz3bI9ItLf9e4aHrIvY8Lim+92ceH4T3l7yRZuP68D00b1t8bEmBDROkJpDLzhevImAq+o6nsisgCYKSLXAt8CQ9z6c4ALgUzgAHA1gKrmiMiDwAK33gOqmuMe3whMBmoA77qbMcetoFB59qNMnnp/LU3rVWfm9f3pc4LN9W5MceJ1iqp80tLSND093e8YJuC+332Q22Zk8PUGbzbFP13SjbrVbTZFU3mJyMKQn378iHVJMaYU7y7dwtjXl5JfUMgTl/fgFzabojFlsgbFmGK82RRXMO3rTXRvUY/xQ3vR2mZTNOaorEExJsSyzbncOn0RG3bs59dntWXMuTabojHlZQ2KMXizKU78bAOPvbea+rWq8vK1J3OqzaZoTIVYg2Iqve178/jNrCV8siab87o05tFLbTZFY46FNSimUvtw1XbufNWbTfGhwd240mZTNOaYWYNiKqW8IwU8+t4qJn22kU5N6jDtuv60t9kUjTku1qCYSmfttr3cMm0Rq7butdkUjQkja1BMpaGqvPL1dzz4zgpqJSUycWQaP+1kE2AZEy7WoJhKYdf+w9z12hL+vWIbZ7RvyBNDetCojk2AZUw4WYNi4t7n63YwZkYGOfsP8/uLOnPNaW1sAixjIsAaFBO3jhQU8tS8NTz78TraNKzFiyP60q15Pb9jGRO3rEExcenbnfu5ddoiFmflMrSvN5tizST7uBsTSfY/zMQVVeWNRZu5981lJFQRnrmyNxeeZLMpGhMNER+kSERaisiHIrJCRJaLyGhXfr+IbBaRDHe7MGSb34lIpoisFpELQsoHuLJMERkbUt5GRL5y5TNExH7mXAntyTvCbTMyuH3mYro2r8d7t51pjYkxURSNI5R84A5V/cbN2rhQROa5ZU+p6l9CVxaRLsBQoCvQDHhfRDq4xU8D5wFZwAIRma2qK4BH3b6mi8hzwLXAsxF/ZyYwFn67i9HTF7ElN487zuvAjWe3I8EuvBsTVRE/QlHVLar6jXu8F1gJNC9jk0HAdFU9pKob8GZt7Odumaq6XlUPA9OBQW7K358Cr7rtpwCDI/NuTNAUFCp/m7+WIf/4AoCZ15/CLee0t8bEGB9EdVxuEWkN9AK+ckU3i8gSEZno5ogHr7HZFLJZlisrrbwBsFtV84uVl/T6o0QkXUTSs7Ozw/COjJ827z7IsAlf8sS8NVzcvSlzRp9BnxPqH31DY0xERK1BEZHawGvAbaq6B++UVFugJ7AFeCLSGVR1gqqmqWpaampqpF/ORNCcpVsY+NdPWP59Lk8O6cFfr+hpU/Ma47Oo9PISkap4jcnLqvo6gKpuC1n+PPCOe7oZaBmyeQtXRinlO4FkEUl0Rymh65s4c+BwPg+8vYLpCzbRo2Uy44f25IQGNpuiMUEQjV5eArwIrFTVJ0PKQ7vfXAIsc49nA0NFpJqItAHaA18DC4D2rkdXEt6F+9mqqsCHwGVu+xHAW5F8T8YfyzbncvH4/zAjfRM3ntWWV284xRoTYwIkGkcopwFXAUtFJMOV3Q0ME5GegAIbgesBVHW5iMwEVuD1ELtJVQsARORmYC6QAExU1eVuf3cB00XkIWARXgNm4kRhofLifzbw2NxVNKhVjZd/dTKntrXZFI0JGvG+4Fc+aWlpmp6e7ncMcxTb9+Zxx8zFfLp2B+e72RTr22yKxvhGRBaqalpJy+yX8iawPli1jTtnLWH/4Xz+dEk3ftnPZlM0JsisQTGBk3ekgEfeXcXkz73ZFKcPs9kUjYkF1qCYQFmzbS+3utkUrzmtDb8d0NFmUzQmRliDYgJBVXnpq+946J0V1K6WyKSRfTm7UyO/YxljKsAaFOO7HDeb4rwV2zizQyp/uby7zaZoTAyyBsX46vPMHYyZabMpGhMPrEExvjicX8iT89bwj09sNkVj4oU1KCbqNu7Yz63TF7EkK5dh/Vpx78WdbTZFY+KA/S82UaOqvPbNZu57axmJCVV49sreDLQJsIyJG9agmKjYk3eEe95YxtuLv+fkNik8dUVPmiXX8DuWMSaMrEExEbfw2xxGT89gS24evzm/A78+y2ZTNCYeWYNiIia/oJCnP1zH+A/W0iy5OrNuOIXerWwCLGPilTUoJiI27z7IbdMXsWDjLgb3bMaDg7tRxybAMiauxU2DIiIDgHF4Q9u/oKqP+Byp0vrXki387vUlFCo8dUUPLunVwu9IxpgoiIsGRUQSgKeB8/DmlF8gIrNVdYW/ySqX/Yfy+ePby5mZnmWzKRpTCcVFgwL0AzJVdT2AiEwHBuFN0lWi7H2HmPDJuqPuuCLTxZR31Yrts/wrR2Jqm/LOl6MKbyzazIad+7np7Lbcdm4HqiZEfEJQY0yAxEuD0hzYFPI8Czi5+EoiMgoYBZDUpB0Pz1kVnXSVRPPkGrzyq/6c0raB31GMMT6IlwalXFR1AjABoHefPvrpHy8o13YVmdNJKN/KkZonqrz7LW/OiuwzQcTG4TKmEouXBmUz0DLkeQtXVqoqItSqFi9v3xhj/BcvJ7kXAO1FpI2IJAFDgdk+ZzLGmEolLr6iq2q+iNwMzMXrNjxRVZf7HMsYYyqVuGhQAFR1DjDH7xzGGFNZxcspL2OMMT6zBsUYY0xYWINijDEmLKS8v4SONyKyF1jtd45SNAR2+B3iGAQ5d5CzlSSoeYOaqzyCmj2ouUrTUVXrlLQgbi7KH4PVqprmd4iSiEh6ULOVJci5g5ytJEHNG9Rc5RHU7EHNVRoRSS9tmZ3yMsYYExbWoBhjjAmLytygTPA7QBmCnK0sQc4d5GwlCWreoOYqj6BmD2qu0pSat9JelDfGGBNelfkIxRhjTBhZg2KMMSYsrEExxhgTFtag+EBEeotIVb9zxBOr0/ARkZ+KSC2/c8STylKncdmgiMgoERntHgdmCkER+aWILAYuAAr9zlMRVqfh4erxQRGp4XeW4kTkShFZCJwNHPE7T3lZnYbPcdelqsbNDagO3IM3W+MuoLXfmUJyPQJsBE4ttkz8zmd1GvGsAlQFfu3yrgLO8DtXSL5E4Dfu37e/33msTmO3LuPiCEVEEgBUNQ9IV9XmwPPAQ74Gc1yu7cAU4CsRqSEi54tIHXX/okFjdRoeIpKkniPAN0Bn4B/A1SLSwN90HlXNB9YCLwHfikiSiFwqIs18jlYiq9PwCXddxvzvUETkfiAV+EBVXxORKqpa6M5XZgCjVPXDovIo5roZ+FhVl7rn7YDrgZ5AE7yBKQWYq6oTop2vLFanYct7H3AS8A4wW1VzXHl1vCmqXwRm+ZFRRO4G5qvqV+55I2A48H94364zgMbAR6r6J7/rsojVaVjzhr8u/T7cOs5DtfvxZmkcDHwEjAFSQpbfAnxCFE+BACcAHwNbgXnFll0B/BVo5J6fi/chq+d3XVqdhj3zGODfwDnAP4FxQNOQ5cOAt4ATo5yrKfAasBtYW2zZKcDDQAv3vBveKZsGfn8urU5joy59/5AcR4VUBd4HOrvnPwEeB24qtt5nwC+AVsBFUchVBxjl7ucCI0OWJQHVQ543A6YCTfyuT6vTsOZNcBlOc887AH8GHi223j+Bq4G+wJVRylYTGOoeLwBuD1mWCFQrtv40vOHK/f5sWp3GQF3GxDWU4r2K3KHiEWAlXksK8DneP2Z3EekQsvpfgFfxvlXXjHAuUdW9wD/d/XPAzSHdWY+od+6/6BrFPXgfuOxw5ioPq9PIcHkLgG3Ar1xxJvA60FlE+oSsPhV4xi2rHo18qnoA+Jd7Oga4R0SS3PNCVT0EICJVReRvQF3g22hkK43VafhEui5jokEpTv97Tu9fQCsR6eT+GC4FcvG+peIq5168i0xdVHVWhHOpuz/oit4C1gB/DF0uIsOBdLxuhNe6f+Boq+2yFF18D0qdFs8V6DoVkeahz4vy4A2g10JE+ri63Qh8jXe9p+j6z4N4F207quqLkc4WknGv+8PyH7xTic+58kK33SDgC6AAuLyowY4WEeknInVD8gapTn+ULSRjIOtURH4uIm1Dcka2Lv067CrnodkAvD8gDwFpoYds7r4l3h+WP4csewe4NGR5yyjmqkKxawtAH2Ah3uma9u6+K9DOh/oUoBHetZEZxZb5VqdHySVBrFO8azULgYeKlVdx90nAb0PfDzAer7EDSCFCp+XKyPZDXQKJ7r4xkIM3a2BXvNOYLfChezjeKdYVwAu4aw4BqtPSsgWyTt1n4Au8I/XTo1WXUf3AlLMiBO/wajLwH+DneBddnwUahP5xAZKB0/B6JNzqlr8HDAhArqohzyfifXP+HDjB5/qt6+poJTDQlYVmjVqdHkMuX+vUfQaS8E4DZACDiy1PCHmcinex9n2803Bt8a7/jAhIttDrTi/g/Sh0Ad5Rpx+fy+p4p1aGBqVOjyGbr3XqPgO1gbfxvpyd4x5f6ZYnRrouo/7BqUDlXMJ/vzWfCTxXrOKecX9UGuJdNJqEd3rmfp9zPY3324jWrmwMsAm4MwB1WgXogveDwEHAFyHLqvpYp0fLFag6dVnuD8neo9jyp/GOYJvgfUP9E97puD8EINvfgTddfVcBrsI73eHr59P9QXvePa6B1+kjFfclwn02/arTo2V7Okh1SkjDh9crc2bI80Tgb5GqS98+QCVUwq3uD8qQYuWX4x22fYh3Tu9UoB/ekUL9kPUSCPmGEKBc5xDS7danOr00pCwZ79tWQ3d/g/sPk+ZDnR5rrqjWaUjeK9zztsB8vM4JGXjfAicAZwGt8f6o1y+2j2pBzObqN9nHz+Zl7nkrvIvDZ+B9U34TeBnvel1jvAvE0a7TY8oW7ToNyXt5sfIqwJXAE0V1hXeNZFKk6jKqH6JSKkPwvnF+BlyGd8pjJP/9XcFZeD++ScQbGuAFoHHI9gkBzZUYiVzHmT3Ffdj/4Nb7DbAfeLvY9tGu0/LmimqdlpK36BzzLXjXljriXcMZjddRITlk+4jUY5iy+fL5PEruJ/CG/TjXPe8CLAPaB6BOy5MtCJ/PkUBqyDqnAqtK2T7sdZmIz1RVReRs4Pfq/fp6H3A+oMAUVf2oaF0RWYp3mmm/615a1AUuiLnyI5GrPErJPgA4iPfhO1NE5uD9YPAzYD380GU32nVakVxRrdNS8g4UkSGq+jcRmaSq+1zGRcDJwJFI12OYsvny+Swjd9EPVG/B+5KGqq4Qkf8Atdz7qOJTnZY3WxA+n+fj9ST7p1vncxHJEpFBqvqW64mmkarLqHYbLum3D+5hOt7hJKr6Ht4YOJ2L/fYBvMo6ABxUT1iGLQhqrvKoQPbVQA+gF5AFLFDVrsBQ4CwRae5TnUY1VxjyrgL6iEjHoj/Yznl4n4G8cOcNcrayVDB3GrAH+D1wu4h0FZF78X5Zvsmt61edRjXbceZdC3QVkU5uvbruPRx262gk80b7dyg/GhI55E1lAnVE5CT3/GOgHlBXvEHVrhKRJXjngH8XgZY1qLnKo7zZP8E7/bEduEFV73Pr5+D9YnZzJcl1vHk/xuuRVgdARIaKyDK8o6q7I/QZCHK2slQ0dxtVfQzvtw83Ae3wrgvsrGTZjjdvPdzvuVR1D16X5cbRCBmVBkVE+ovIa8DT4o0Im+DKi065fQ3kA+eLSKKqrgCaA31U9TDet4Bfq+pwVd0e77kilH053h+WXqqaJyIJRd96in2bjctcYcxb9BlIc8u/JUKfgSBni0Dupnjn+1HVqcBoVR2hqlsqS7Yw5g39DIDX62typLNCFBoUETmL//58fzXeyJv1Q885qmom3qFbW2Cs2/QQbngCVf1IVT+rDLnK4zizb3TLC4oOf+M9V4TyFn0GvlDVTytTtgjmXl+0H/VGaag02SKQd2NI3qiNdBCNI5TueOfFX8Y7XKwK7NP/DknwkIi8iPfL3vFAP/FmOMvBGw2zsuUqj6BmD2quSOSdW4mzlSXIn4EgZ4uHvOHvNgz0BzqEPO/p3uB9eAOSfYT347kr8A4jXyFkyAy8c39h78Md1FyxnD2ouWIxb5CzxWruIGeLh7wlvocwVkYy3sCCe/F6Q9QOWdbPVUTReFDX4s3+1yNknSoR+kcKZK5Yzh7UXLGYN8jZYjV3kLPFQ96ybuE85VUL71D7Fvf4jKIFqvo13lAFRUM2f+AqcRf80Ic7Ut3ugpqrPIKaPai5ShPkvEHOVpYg5w5ytpLEWt5SHVeDIiLDReQnIlJXve6dE4CZQB5wsrj5k0WkGt4gfje6Tc/B+3V0HoS/T3RQc8Vy9qDmisW8Qc4Wq7mDnC0e8pZXheeUFxHBG1TsFbzRNNfhtaqjVXWHW+c0YAiQrqr/dGVd8c4FNsEbJfZmVV0ZpvcR2FyxnD2ouWIxb5CzxWruIGeLh7zHpILn+opG2e0AvFRUhjd65evF1h2DN19IMlDDldUgAvM9BzVXLGcPaq5YzBvkbLGaO8jZ4iHvMb/P8lYG8DDwKN5EMz/DG8+qaHkVYCvwk5Cy2njj3yzA66HQPBL/SEHMFcvZg5orFvMGOVus5g5ytnjIe7y3o15DEZGf4PVzro/3M/8H8Q67zhaRfvDDebz73a3IRXjn/TKAkzTMQ2gENVd5BDV7UHPFYt4gZytLkHMHOVs85A2LcrSwZwBXhTx/Bm+49pHAwpBWtgneRaXWrmwQcGYEW/5A5orl7EHNFYt5g5wtVnMHOVs85A3Ley5HpdQEqvHfc4BX4uYbx2tBb3GP04BpUfzHCmSuWM4e1FyxmDfI2WI1d5CzxUPecNyOespLVQ+o6iH970il5+HNVAhwNd5w7u8A04Bv4H+HWo6EoOYqj6BmD2qu0gQ5b5CzlSXIuYOcrSSxljccyj3BlnijXCreMMizXfFe4G68eQE2qDvXp67ZjYag5iqPoGYPaq7SBDlvkLOVJci5g5ytJLGW93hU5IeNhXiDk+0AuruW9V6gUFX/o/5dOApqrvIIavag5ipNkPMGOVtZgpw7yNlKEmt5j11Fzo/hDV5WCPwHN9dyEG5BzRXL2YOaKxbzBjlbrOYOcrZ4yHustwr9Ul5EWgBXAU+q6qGKNFyRFNRc5RHU7EHNVZog5w1ytrIEOXeQs5Uk1vIeqwoPvWKMMcaUJNpzyhtjjIlT1qAYY4wJC2tQjDHGhIU1KMYYY8LCGhRjjDFhYQ2KMcaYsLAGxRhjTFj8P9U6E+dilPRXAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plot_dates(df_user_item_train['event_dttm']).plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2018-07-01    118286\n",
       "Name: event_dttm, dtype: int64"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "plot_dates(df_user_item_test['event_dttm'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# transactions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>customer_id</th>\n",
       "      <th>transaction_month</th>\n",
       "      <th>transaction_day</th>\n",
       "      <th>transaction_amt</th>\n",
       "      <th>merchant_id</th>\n",
       "      <th>merchant_mcc</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>855115</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>1500</td>\n",
       "      <td>4554547</td>\n",
       "      <td>5411</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>997036</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>1657528</td>\n",
       "      <td>5411</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>398237</td>\n",
       "      <td>5</td>\n",
       "      <td>24</td>\n",
       "      <td>2500</td>\n",
       "      <td>26375569</td>\n",
       "      <td>5813</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>997036</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>16304402</td>\n",
       "      <td>5411</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>291636</td>\n",
       "      <td>7</td>\n",
       "      <td>25</td>\n",
       "      <td>0</td>\n",
       "      <td>1259505</td>\n",
       "      <td>5411</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   customer_id  transaction_month  transaction_day  transaction_amt  \\\n",
       "0       855115                  7                3             1500   \n",
       "1       997036                  6                6                0   \n",
       "2       398237                  5               24             2500   \n",
       "3       997036                  6                2                0   \n",
       "4       291636                  7               25                0   \n",
       "\n",
       "   merchant_id  merchant_mcc  \n",
       "0      4554547          5411  \n",
       "1      1657528          5411  \n",
       "2     26375569          5813  \n",
       "3     16304402          5411  \n",
       "4      1259505          5411  "
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_trans = pd.read_csv(PROJECT_PATH + 'transactions.csv')\n",
    "df_trans.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "46948"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_trans['customer_id'].nunique()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# stories_description"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>story_id</th>\n",
       "      <th>story_json</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>127</td>\n",
       "      <td>{\"guid\":\"770a5bae-0e3f-4a6b-b924-bd87bd51a038\"...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>254</td>\n",
       "      <td>{\"guid\":\"64f4c9ef-647b-4e04-b4d4-02297e939388\"...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>865</td>\n",
       "      <td>{\"guid\":\"3482206b-d223-4aec-92ba-0150055cd68a\"...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1491</td>\n",
       "      <td>{\"guid\":\"5f4a9215-01de-4777-b70f-a18899db8f1c\"...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>598</td>\n",
       "      <td>{\"guid\":\"ed8754bd-67be-4fa1-9289-5508d96f1fa4\"...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   story_id                                         story_json\n",
       "0       127  {\"guid\":\"770a5bae-0e3f-4a6b-b924-bd87bd51a038\"...\n",
       "1       254  {\"guid\":\"64f4c9ef-647b-4e04-b4d4-02297e939388\"...\n",
       "2       865  {\"guid\":\"3482206b-d223-4aec-92ba-0150055cd68a\"...\n",
       "3      1491  {\"guid\":\"5f4a9215-01de-4777-b70f-a18899db8f1c\"...\n",
       "4       598  {\"guid\":\"ed8754bd-67be-4fa1-9289-5508d96f1fa4\"..."
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_stories = pd.read_csv(PROJECT_PATH + 'stories_description.csv')\n",
    "df_stories.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Baseline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dislike   -10.0\n",
       "skip       -0.1\n",
       "view        0.1\n",
       "like        0.5\n",
       "dtype: float64"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_event_weights = pd.Series({\n",
    "    'dislike': -10,\n",
    "    'skip': -0.1,\n",
    "    'view': 0.1,\n",
    "    'like': 0.5,\n",
    "})\n",
    "df_event_weights"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "def tinkoff_metric(df):\n",
    "    s_event_weights = df['event'].map(df_event_weights)\n",
    "    res = s_event_weights * df['score']\n",
    "    return res.mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Const"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_test_predict = df_user_item_test.copy()\n",
    "df_test_predict['score'] = -1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.057477638942901095"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tinkoff_metric(df_test_predict)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Polular features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "event\n",
       "dislike    0.014606\n",
       "like       0.132426\n",
       "skip       0.353203\n",
       "view       0.499765\n",
       "Name: customer_id, dtype: float64"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# fit default_value\n",
    "s_default_reaction_proba = df_user_item_train.groupby('event')['customer_id'].count() / len(df_user_item_train)\n",
    "s_default_reaction_proba"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>event</th>\n",
       "      <th>dislike</th>\n",
       "      <th>like</th>\n",
       "      <th>skip</th>\n",
       "      <th>view</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>customer_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>0.000811</td>\n",
       "      <td>0.007357</td>\n",
       "      <td>0.408511</td>\n",
       "      <td>0.583320</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>73</th>\n",
       "      <td>0.001043</td>\n",
       "      <td>0.009459</td>\n",
       "      <td>0.810943</td>\n",
       "      <td>0.178555</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>129</th>\n",
       "      <td>0.000859</td>\n",
       "      <td>0.007790</td>\n",
       "      <td>0.550188</td>\n",
       "      <td>0.441163</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178</th>\n",
       "      <td>0.002434</td>\n",
       "      <td>0.022071</td>\n",
       "      <td>0.892201</td>\n",
       "      <td>0.083294</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>362</th>\n",
       "      <td>0.167478</td>\n",
       "      <td>0.007357</td>\n",
       "      <td>0.019622</td>\n",
       "      <td>0.805542</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1034355</th>\n",
       "      <td>0.001217</td>\n",
       "      <td>0.011035</td>\n",
       "      <td>0.862767</td>\n",
       "      <td>0.124980</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1040157</th>\n",
       "      <td>0.002434</td>\n",
       "      <td>0.022071</td>\n",
       "      <td>0.558867</td>\n",
       "      <td>0.416627</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1042646</th>\n",
       "      <td>0.002434</td>\n",
       "      <td>0.022071</td>\n",
       "      <td>0.558867</td>\n",
       "      <td>0.416627</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1085762</th>\n",
       "      <td>0.000974</td>\n",
       "      <td>0.008828</td>\n",
       "      <td>0.290214</td>\n",
       "      <td>0.699984</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1103495</th>\n",
       "      <td>0.001217</td>\n",
       "      <td>0.011035</td>\n",
       "      <td>0.029434</td>\n",
       "      <td>0.958314</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>17421 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "event         dislike      like      skip      view\n",
       "customer_id                                        \n",
       "15           0.000811  0.007357  0.408511  0.583320\n",
       "73           0.001043  0.009459  0.810943  0.178555\n",
       "129          0.000859  0.007790  0.550188  0.441163\n",
       "178          0.002434  0.022071  0.892201  0.083294\n",
       "362          0.167478  0.007357  0.019622  0.805542\n",
       "...               ...       ...       ...       ...\n",
       "1034355      0.001217  0.011035  0.862767  0.124980\n",
       "1040157      0.002434  0.022071  0.558867  0.416627\n",
       "1042646      0.002434  0.022071  0.558867  0.416627\n",
       "1085762      0.000974  0.008828  0.290214  0.699984\n",
       "1103495      0.001217  0.011035  0.029434  0.958314\n",
       "\n",
       "[17421 rows x 4 columns]"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# fit users\n",
    "df_user_reaction_proba = df_user_item_train \\\n",
    "    [lambda x: x['customer_id'].isin(\n",
    "        df_user_item_train.groupby('customer_id')['event_dttm'].count()[lambda x: x > 3].index)] \\\n",
    "    .pivot_table(\n",
    "        index='customer_id', columns='event',\n",
    "        values='story_id', aggfunc='count', fill_value=0)\n",
    "\n",
    "df_user_reaction_proba = df_user_reaction_proba + s_default_reaction_proba\n",
    "df_user_reaction_proba = df_user_reaction_proba.div(df_user_reaction_proba.sum(axis=1), axis=0)\n",
    "\n",
    "df_user_reaction_proba"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>event</th>\n",
       "      <th>dislike</th>\n",
       "      <th>like</th>\n",
       "      <th>skip</th>\n",
       "      <th>view</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>story_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>123</th>\n",
       "      <td>0.000016</td>\n",
       "      <td>0.000146</td>\n",
       "      <td>0.404152</td>\n",
       "      <td>0.595686</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>126</th>\n",
       "      <td>0.000022</td>\n",
       "      <td>0.000202</td>\n",
       "      <td>0.776112</td>\n",
       "      <td>0.223664</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>127</th>\n",
       "      <td>0.000221</td>\n",
       "      <td>0.002006</td>\n",
       "      <td>0.778079</td>\n",
       "      <td>0.219693</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>128</th>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000170</td>\n",
       "      <td>0.424073</td>\n",
       "      <td>0.575738</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>129</th>\n",
       "      <td>0.000044</td>\n",
       "      <td>0.000395</td>\n",
       "      <td>0.514487</td>\n",
       "      <td>0.485074</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1100023</th>\n",
       "      <td>0.001043</td>\n",
       "      <td>0.009459</td>\n",
       "      <td>0.168086</td>\n",
       "      <td>0.821412</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1100025</th>\n",
       "      <td>0.000974</td>\n",
       "      <td>0.008828</td>\n",
       "      <td>0.090214</td>\n",
       "      <td>0.899984</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1100026</th>\n",
       "      <td>0.002921</td>\n",
       "      <td>0.026485</td>\n",
       "      <td>0.070641</td>\n",
       "      <td>0.899953</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1100027</th>\n",
       "      <td>0.001826</td>\n",
       "      <td>0.016553</td>\n",
       "      <td>0.044150</td>\n",
       "      <td>0.937471</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1100028</th>\n",
       "      <td>0.001328</td>\n",
       "      <td>0.012039</td>\n",
       "      <td>0.123018</td>\n",
       "      <td>0.863615</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>749 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "event      dislike      like      skip      view\n",
       "story_id                                        \n",
       "123       0.000016  0.000146  0.404152  0.595686\n",
       "126       0.000022  0.000202  0.776112  0.223664\n",
       "127       0.000221  0.002006  0.778079  0.219693\n",
       "128       0.000019  0.000170  0.424073  0.575738\n",
       "129       0.000044  0.000395  0.514487  0.485074\n",
       "...            ...       ...       ...       ...\n",
       "1100023   0.001043  0.009459  0.168086  0.821412\n",
       "1100025   0.000974  0.008828  0.090214  0.899984\n",
       "1100026   0.002921  0.026485  0.070641  0.899953\n",
       "1100027   0.001826  0.016553  0.044150  0.937471\n",
       "1100028   0.001328  0.012039  0.123018  0.863615\n",
       "\n",
       "[749 rows x 4 columns]"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# fit items\n",
    "df_item_reaction_proba = df_user_item_train \\\n",
    "    [lambda x: x['story_id'].isin(\n",
    "        df_user_item_train.groupby('story_id')['event_dttm'].count()[lambda x: x > 3].index)] \\\n",
    "    .pivot_table(\n",
    "        index='story_id', columns='event',\n",
    "        values='customer_id', aggfunc='count', fill_value=0)\n",
    "\n",
    "df_item_reaction_proba = df_item_reaction_proba + s_default_reaction_proba\n",
    "df_item_reaction_proba = df_item_reaction_proba.div(df_item_reaction_proba.sum(axis=1), axis=0)\n",
    "\n",
    "df_item_reaction_proba"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "COLS_EVENT = ['dislike', 'like', 'skip', 'view']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>sum</th>\n",
       "      <th>mean</th>\n",
       "      <th>0.1</th>\n",
       "      <th>0.25</th>\n",
       "      <th>0.5</th>\n",
       "      <th>0.75</th>\n",
       "      <th>0.9</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>customer_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>0.461512</td>\n",
       "      <td>1.461347</td>\n",
       "      <td>1.000834</td>\n",
       "      <td>0.621661</td>\n",
       "      <td>0.690875</td>\n",
       "      <td>0.782445</td>\n",
       "      <td>0.874050</td>\n",
       "      <td>1.026817</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>73</th>\n",
       "      <td>0.572359</td>\n",
       "      <td>1.395484</td>\n",
       "      <td>0.823479</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.621661</td>\n",
       "      <td>0.731389</td>\n",
       "      <td>0.866579</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>91</th>\n",
       "      <td>0.477912</td>\n",
       "      <td>1.295631</td>\n",
       "      <td>0.818590</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.621661</td>\n",
       "      <td>0.690875</td>\n",
       "      <td>0.782445</td>\n",
       "      <td>0.908262</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>144</th>\n",
       "      <td>0.179176</td>\n",
       "      <td>0.851739</td>\n",
       "      <td>0.690875</td>\n",
       "      <td>0.530330</td>\n",
       "      <td>0.621661</td>\n",
       "      <td>0.621661</td>\n",
       "      <td>0.690875</td>\n",
       "      <td>0.769667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150</th>\n",
       "      <td>0.160944</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                count       sum      mean       0.1      0.25       0.5  \\\n",
       "customer_id                                                               \n",
       "15           0.461512  1.461347  1.000834  0.621661  0.690875  0.782445   \n",
       "73           0.572359  1.395484  0.823479  0.000000  0.000000  0.621661   \n",
       "91           0.477912  1.295631  0.818590  0.000000  0.621661  0.690875   \n",
       "144          0.179176  0.851739  0.690875  0.530330  0.621661  0.621661   \n",
       "150          0.160944  0.000000  0.000000  0.000000  0.000000  0.000000   \n",
       "\n",
       "                 0.75       0.9  \n",
       "customer_id                      \n",
       "15           0.874050  1.026817  \n",
       "73           0.731389  0.866579  \n",
       "91           0.782445  0.908262  \n",
       "144          0.690875  0.769667  \n",
       "150          0.000000  0.000000  "
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_amnt_agg = pd.concat([\n",
    "    df_trans.groupby('customer_id')['transaction_amt'].agg(['count', 'sum', 'mean']),\n",
    "    df_trans.groupby('customer_id')['transaction_amt'].quantile([0.1, 0.25, 0.5, 0.75, 0.9]).unstack(),\n",
    "], axis=1)\n",
    "\n",
    "df_amnt_agg = np.log1p(df_amnt_agg) / 10\n",
    "\n",
    "df_amnt_agg.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "ix_mcc = df_trans['merchant_mcc'].value_counts().iloc[:127].index.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "def norm_row(df):\n",
    "    return df.div(df.sum(axis=1) + 1e-5, axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"10\" halign=\"left\">count</th>\n",
       "      <th>...</th>\n",
       "      <th colspan=\"10\" halign=\"left\">std</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>merchant_mcc</th>\n",
       "      <th>0.0</th>\n",
       "      <th>1.0</th>\n",
       "      <th>2.0</th>\n",
       "      <th>3.0</th>\n",
       "      <th>4.0</th>\n",
       "      <th>5.0</th>\n",
       "      <th>6.0</th>\n",
       "      <th>7.0</th>\n",
       "      <th>8.0</th>\n",
       "      <th>9.0</th>\n",
       "      <th>...</th>\n",
       "      <th>118.0</th>\n",
       "      <th>119.0</th>\n",
       "      <th>120.0</th>\n",
       "      <th>121.0</th>\n",
       "      <th>122.0</th>\n",
       "      <th>123.0</th>\n",
       "      <th>124.0</th>\n",
       "      <th>125.0</th>\n",
       "      <th>126.0</th>\n",
       "      <th>127.0</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>customer_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>0.080000</td>\n",
       "      <td>0.260000</td>\n",
       "      <td>0.020000</td>\n",
       "      <td>0.080000</td>\n",
       "      <td>0.080000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.04000</td>\n",
       "      <td>0.030000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>73</th>\n",
       "      <td>0.019672</td>\n",
       "      <td>0.150820</td>\n",
       "      <td>0.026230</td>\n",
       "      <td>0.013115</td>\n",
       "      <td>0.036066</td>\n",
       "      <td>0.295082</td>\n",
       "      <td>0.016393</td>\n",
       "      <td>0.02623</td>\n",
       "      <td>0.009836</td>\n",
       "      <td>0.009836</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>91</th>\n",
       "      <td>0.025424</td>\n",
       "      <td>0.152542</td>\n",
       "      <td>0.101695</td>\n",
       "      <td>0.016949</td>\n",
       "      <td>0.076271</td>\n",
       "      <td>0.025424</td>\n",
       "      <td>0.144068</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.067797</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>144</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.20000</td>\n",
       "      <td>0.399999</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.499999</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.249999</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.249999</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 384 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                 count                                                    \\\n",
       "merchant_mcc     0.0       1.0       2.0       3.0       4.0       5.0     \n",
       "customer_id                                                                \n",
       "15            0.080000  0.260000  0.020000  0.080000  0.080000  0.000000   \n",
       "73            0.019672  0.150820  0.026230  0.013115  0.036066  0.295082   \n",
       "91            0.025424  0.152542  0.101695  0.016949  0.076271  0.025424   \n",
       "144           0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   \n",
       "150           0.000000  0.499999  0.000000  0.249999  0.000000  0.000000   \n",
       "\n",
       "                                                     ...   std              \\\n",
       "merchant_mcc     6.0      7.0       8.0       9.0    ... 118.0 119.0 120.0   \n",
       "customer_id                                          ...                     \n",
       "15            0.000000  0.04000  0.030000  0.000000  ...   0.0   0.0   0.0   \n",
       "73            0.016393  0.02623  0.009836  0.009836  ...   0.0   0.0   0.0   \n",
       "91            0.144068  0.00000  0.000000  0.067797  ...   0.0   0.0   0.0   \n",
       "144           0.000000  0.20000  0.399999  0.000000  ...   0.0   0.0   0.0   \n",
       "150           0.000000  0.00000  0.249999  0.000000  ...   0.0   0.0   0.0   \n",
       "\n",
       "                                                        \n",
       "merchant_mcc 121.0 122.0 123.0 124.0 125.0 126.0 127.0  \n",
       "customer_id                                             \n",
       "15             0.0   0.0   0.0   0.0   0.0   0.0   0.0  \n",
       "73             0.0   0.0   0.0   0.0   0.0   0.0   0.0  \n",
       "91             0.0   0.0   0.0   0.0   0.0   0.0   0.0  \n",
       "144            0.0   0.0   0.0   0.0   0.0   0.0   0.0  \n",
       "150            0.0   0.0   0.0   0.0   0.0   0.0   0.0  \n",
       "\n",
       "[5 rows x 384 columns]"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_mcc_agg = df_trans \\\n",
    "    .assign(merchant_mcc=df_trans['merchant_mcc'].map({v: i + 1 for i, v in enumerate(ix_mcc)}).fillna(0))\n",
    "\n",
    "df_mcc_agg = pd.concat([\n",
    "    norm_row(df_mcc_agg.pivot_table(index='customer_id', columns='merchant_mcc',\n",
    "                                    values='transaction_amt',aggfunc=['count']).fillna(0)),\n",
    "    norm_row(df_mcc_agg.pivot_table(index='customer_id', columns='merchant_mcc',\n",
    "                                    values='transaction_amt',aggfunc=['sum']).fillna(0)),\n",
    "    norm_row(df_mcc_agg.pivot_table(index='customer_id', columns='merchant_mcc',\n",
    "                                    values='transaction_amt',aggfunc=['std']).fillna(0)),\n",
    "], axis=1)\n",
    "\n",
    "df_mcc_agg.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>v000</th>\n",
       "      <th>v001</th>\n",
       "      <th>v002</th>\n",
       "      <th>v003</th>\n",
       "      <th>v004</th>\n",
       "      <th>v005</th>\n",
       "      <th>v006</th>\n",
       "      <th>v007</th>\n",
       "      <th>v008</th>\n",
       "      <th>v009</th>\n",
       "      <th>...</th>\n",
       "      <th>v054</th>\n",
       "      <th>v055</th>\n",
       "      <th>v056</th>\n",
       "      <th>v057</th>\n",
       "      <th>v058</th>\n",
       "      <th>v059</th>\n",
       "      <th>v060</th>\n",
       "      <th>v061</th>\n",
       "      <th>v062</th>\n",
       "      <th>v063</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>customer_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>0.045986</td>\n",
       "      <td>0.073066</td>\n",
       "      <td>-0.102657</td>\n",
       "      <td>-0.070000</td>\n",
       "      <td>-0.226388</td>\n",
       "      <td>0.021740</td>\n",
       "      <td>0.095810</td>\n",
       "      <td>0.140051</td>\n",
       "      <td>0.131922</td>\n",
       "      <td>-0.026116</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.005688</td>\n",
       "      <td>0.162406</td>\n",
       "      <td>-0.067965</td>\n",
       "      <td>-0.040175</td>\n",
       "      <td>0.164212</td>\n",
       "      <td>-0.099997</td>\n",
       "      <td>0.077077</td>\n",
       "      <td>0.011274</td>\n",
       "      <td>-0.008279</td>\n",
       "      <td>-0.003820</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>73</th>\n",
       "      <td>0.154809</td>\n",
       "      <td>0.037429</td>\n",
       "      <td>-0.177134</td>\n",
       "      <td>-0.045597</td>\n",
       "      <td>-0.121371</td>\n",
       "      <td>-0.060306</td>\n",
       "      <td>0.174855</td>\n",
       "      <td>0.182637</td>\n",
       "      <td>0.093187</td>\n",
       "      <td>-0.010200</td>\n",
       "      <td>...</td>\n",
       "      <td>0.097886</td>\n",
       "      <td>0.177703</td>\n",
       "      <td>0.123618</td>\n",
       "      <td>-0.145646</td>\n",
       "      <td>0.152623</td>\n",
       "      <td>-0.057651</td>\n",
       "      <td>0.164589</td>\n",
       "      <td>-0.121165</td>\n",
       "      <td>-0.115062</td>\n",
       "      <td>-0.051746</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>91</th>\n",
       "      <td>0.014153</td>\n",
       "      <td>-0.048872</td>\n",
       "      <td>-0.187590</td>\n",
       "      <td>0.035219</td>\n",
       "      <td>-0.189449</td>\n",
       "      <td>0.017896</td>\n",
       "      <td>0.180154</td>\n",
       "      <td>-0.005428</td>\n",
       "      <td>0.079310</td>\n",
       "      <td>0.065733</td>\n",
       "      <td>...</td>\n",
       "      <td>0.105551</td>\n",
       "      <td>0.161654</td>\n",
       "      <td>-0.068084</td>\n",
       "      <td>-0.094257</td>\n",
       "      <td>0.150516</td>\n",
       "      <td>-0.059367</td>\n",
       "      <td>-0.019642</td>\n",
       "      <td>-0.059673</td>\n",
       "      <td>-0.019760</td>\n",
       "      <td>0.078213</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>144</th>\n",
       "      <td>0.003958</td>\n",
       "      <td>0.175994</td>\n",
       "      <td>-0.069661</td>\n",
       "      <td>0.079811</td>\n",
       "      <td>-0.202884</td>\n",
       "      <td>-0.170080</td>\n",
       "      <td>0.166775</td>\n",
       "      <td>0.042914</td>\n",
       "      <td>0.159432</td>\n",
       "      <td>-0.060284</td>\n",
       "      <td>...</td>\n",
       "      <td>0.018247</td>\n",
       "      <td>0.256953</td>\n",
       "      <td>-0.035306</td>\n",
       "      <td>-0.055328</td>\n",
       "      <td>0.184141</td>\n",
       "      <td>-0.125160</td>\n",
       "      <td>-0.011663</td>\n",
       "      <td>-0.024973</td>\n",
       "      <td>-0.033970</td>\n",
       "      <td>0.025868</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150</th>\n",
       "      <td>-0.117107</td>\n",
       "      <td>0.131219</td>\n",
       "      <td>-0.116869</td>\n",
       "      <td>0.079403</td>\n",
       "      <td>-0.202366</td>\n",
       "      <td>0.062064</td>\n",
       "      <td>0.164296</td>\n",
       "      <td>0.051221</td>\n",
       "      <td>0.113916</td>\n",
       "      <td>0.027527</td>\n",
       "      <td>...</td>\n",
       "      <td>0.004127</td>\n",
       "      <td>0.177279</td>\n",
       "      <td>0.014191</td>\n",
       "      <td>-0.062098</td>\n",
       "      <td>0.028283</td>\n",
       "      <td>-0.136526</td>\n",
       "      <td>-0.015416</td>\n",
       "      <td>0.072359</td>\n",
       "      <td>-0.042742</td>\n",
       "      <td>0.024230</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 64 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                 v000      v001      v002      v003      v004      v005  \\\n",
       "customer_id                                                               \n",
       "15           0.045986  0.073066 -0.102657 -0.070000 -0.226388  0.021740   \n",
       "73           0.154809  0.037429 -0.177134 -0.045597 -0.121371 -0.060306   \n",
       "91           0.014153 -0.048872 -0.187590  0.035219 -0.189449  0.017896   \n",
       "144          0.003958  0.175994 -0.069661  0.079811 -0.202884 -0.170080   \n",
       "150         -0.117107  0.131219 -0.116869  0.079403 -0.202366  0.062064   \n",
       "\n",
       "                 v006      v007      v008      v009  ...      v054      v055  \\\n",
       "customer_id                                          ...                       \n",
       "15           0.095810  0.140051  0.131922 -0.026116  ... -0.005688  0.162406   \n",
       "73           0.174855  0.182637  0.093187 -0.010200  ...  0.097886  0.177703   \n",
       "91           0.180154 -0.005428  0.079310  0.065733  ...  0.105551  0.161654   \n",
       "144          0.166775  0.042914  0.159432 -0.060284  ...  0.018247  0.256953   \n",
       "150          0.164296  0.051221  0.113916  0.027527  ...  0.004127  0.177279   \n",
       "\n",
       "                 v056      v057      v058      v059      v060      v061  \\\n",
       "customer_id                                                               \n",
       "15          -0.067965 -0.040175  0.164212 -0.099997  0.077077  0.011274   \n",
       "73           0.123618 -0.145646  0.152623 -0.057651  0.164589 -0.121165   \n",
       "91          -0.068084 -0.094257  0.150516 -0.059367 -0.019642 -0.059673   \n",
       "144         -0.035306 -0.055328  0.184141 -0.125160 -0.011663 -0.024973   \n",
       "150          0.014191 -0.062098  0.028283 -0.136526 -0.015416  0.072359   \n",
       "\n",
       "                 v062      v063  \n",
       "customer_id                      \n",
       "15          -0.008279 -0.003820  \n",
       "73          -0.115062 -0.051746  \n",
       "91          -0.019760  0.078213  \n",
       "144         -0.033970  0.025868  \n",
       "150         -0.042742  0.024230  \n",
       "\n",
       "[5 rows x 64 columns]"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_embeddings = pd.read_pickle(PROJECT_PATH + \"tinkoff_all_vectors.pickle\").set_index('customer_id')\n",
    "df_embeddings.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {},
   "outputs": [],
   "source": [
    "import re"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>story_id</th>\n",
       "      <th>story_json</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>127</td>\n",
       "      <td>{\"guid\":\"770a5bae-0e3f-4a6b-b924-bd87bd51a038\"...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>254</td>\n",
       "      <td>{\"guid\":\"64f4c9ef-647b-4e04-b4d4-02297e939388\"...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>865</td>\n",
       "      <td>{\"guid\":\"3482206b-d223-4aec-92ba-0150055cd68a\"...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1491</td>\n",
       "      <td>{\"guid\":\"5f4a9215-01de-4777-b70f-a18899db8f1c\"...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>598</td>\n",
       "      <td>{\"guid\":\"ed8754bd-67be-4fa1-9289-5508d96f1fa4\"...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>954</th>\n",
       "      <td>882</td>\n",
       "      <td>{\"guid\":\"15eebb21-5e66-4e04-bd89-7003b8a90e31\"...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>955</th>\n",
       "      <td>1473</td>\n",
       "      <td>{\"guid\":\"4f9261e5-b049-41bd-9817-cb678c536c88\"...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>956</th>\n",
       "      <td>266</td>\n",
       "      <td>{\"guid\":\"e98bd2c6-274c-4ce4-b137-2df085621388\"...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>957</th>\n",
       "      <td>764</td>\n",
       "      <td>{\"guid\":\"0bce8dea-18f4-4b52-a8c3-a049be1c9c4d\"...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>958</th>\n",
       "      <td>224</td>\n",
       "      <td>{\"guid\":\"d24210e8-5247-4fc0-85e8-2c18a8e1363d\"...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>959 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     story_id                                         story_json\n",
       "0         127  {\"guid\":\"770a5bae-0e3f-4a6b-b924-bd87bd51a038\"...\n",
       "1         254  {\"guid\":\"64f4c9ef-647b-4e04-b4d4-02297e939388\"...\n",
       "2         865  {\"guid\":\"3482206b-d223-4aec-92ba-0150055cd68a\"...\n",
       "3        1491  {\"guid\":\"5f4a9215-01de-4777-b70f-a18899db8f1c\"...\n",
       "4         598  {\"guid\":\"ed8754bd-67be-4fa1-9289-5508d96f1fa4\"...\n",
       "..        ...                                                ...\n",
       "954       882  {\"guid\":\"15eebb21-5e66-4e04-bd89-7003b8a90e31\"...\n",
       "955      1473  {\"guid\":\"4f9261e5-b049-41bd-9817-cb678c536c88\"...\n",
       "956       266  {\"guid\":\"e98bd2c6-274c-4ce4-b137-2df085621388\"...\n",
       "957       764  {\"guid\":\"0bce8dea-18f4-4b52-a8c3-a049be1c9c4d\"...\n",
       "958       224  {\"guid\":\"d24210e8-5247-4fc0-85e8-2c18a8e1363d\"...\n",
       "\n",
       "[959 rows x 2 columns]"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_stories"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>json_len</th>\n",
       "      <th>word_count</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>story_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>127</th>\n",
       "      <td>0.946553</td>\n",
       "      <td>0.1125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>254</th>\n",
       "      <td>0.944707</td>\n",
       "      <td>0.1000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>865</th>\n",
       "      <td>1.035609</td>\n",
       "      <td>0.4850</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1491</th>\n",
       "      <td>1.032430</td>\n",
       "      <td>0.5700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>598</th>\n",
       "      <td>1.031012</td>\n",
       "      <td>0.4650</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>882</th>\n",
       "      <td>0.968689</td>\n",
       "      <td>0.1250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1473</th>\n",
       "      <td>0.966890</td>\n",
       "      <td>0.1550</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>266</th>\n",
       "      <td>0.974432</td>\n",
       "      <td>0.2000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>764</th>\n",
       "      <td>0.936837</td>\n",
       "      <td>0.1425</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224</th>\n",
       "      <td>0.966224</td>\n",
       "      <td>0.1575</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>959 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          json_len  word_count\n",
       "story_id                      \n",
       "127       0.946553      0.1125\n",
       "254       0.944707      0.1000\n",
       "865       1.035609      0.4850\n",
       "1491      1.032430      0.5700\n",
       "598       1.031012      0.4650\n",
       "...            ...         ...\n",
       "882       0.968689      0.1250\n",
       "1473      0.966890      0.1550\n",
       "266       0.974432      0.2000\n",
       "764       0.936837      0.1425\n",
       "224       0.966224      0.1575\n",
       "\n",
       "[959 rows x 2 columns]"
      ]
     },
     "execution_count": 134,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_stori_features = (np.log1p(df_stories.set_index('story_id')['story_json'].str.len()) / 10) \\\n",
    "    .rename('json_len').to_frame()\n",
    "df_stori_features['word_count'] = df_stories.set_index('story_id')['story_json'].apply(\n",
    "    lambda x: len(re.findall(r'[а-яА-Я]+', x))) / 400\n",
    "    \n",
    "df_stori_features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x7feca8b725c0>"
      ]
     },
     "execution_count": 129,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAQD0lEQVR4nO3dfYxl9V3H8fe3bFuRQRa67UgW2sFka6SMRXZErI2dCVqX3aRLY0MgtN1FdE2ljQ8bw6qJEJsmawzVEFvqVgiLWgZsrWx4sCErk02tS9m1lQUqdoWh3RV307Jsu4DVga9/3DNyd5g7987Mffzl/Upu5jzNPZ+5985nzj3n3DORmUiSyvK6XgeQJLWf5S5JBbLcJalAlrskFchyl6QCreh1AIBVq1blyMhIw/kvvPACp512WvcCLdEg5DRj+wxCTjO2Tz/m3L9//3cy883zzszMnt/Wrl2bC3nooYcWnN8vBiGnGdtnEHKasX36MSewLxv0qrtlJKlAlrskFchyl6QCWe6SVCDLXZIKZLlLUoEsd0kqkOUuSQWy3CWpQH1x+YFBNbLtvpPGt47OsHnOtE6Y3r6h4+uQNNjccpekAlnuklQgy12SCmS5S1KBLHdJKpDlLkkF8lTIATT3FMzFWO7pmp6GKQ0Gt9wlqUCWuyQVyHKXpAJZ7pJUIMtdkgpkuUtSgSx3SSqQ5S5JBbLcJalAlrskFchyl6QCWe6SVCDLXZIK1LTcI+LciHgoIp6IiMcj4jer6WdFxIMR8c3q65nV9IiImyPiYEQ8GhEXdfqHkCSdrJUt9xlga2aeD1wCXBcR5wPbgN2ZuQbYXY0DXAasqW5bgFvanlqStKCm5Z6Zz2bmv1TD3we+AawGNgI7q8V2ApdXwxuBO7JmL7AyIs5ue3JJUkORma0vHDEC7AEuAL6VmSur6QEcy8yVEXEvsD0zv1zN2w1cn5n75tzXFmpb9gwPD6+dnJxsuN4TJ04wNDS0iB+rOw4cPn7S+PCpcOSlHoVp0XIzjq4+o31hGujX53uuQchpxvbpx5wTExP7M3Nsvnkt/yemiBgCvgD8VmZ+r9bnNZmZEdH6X4na9+wAdgCMjY3l+Ph4w2WnpqZYaH6vzP2PRltHZ7jpQH//c6vlZpy+erx9YRro1+d7rkHIacb2GZScs1o6WyYiXk+t2P8mM/+umnxkdndL9fVoNf0wcG7dt59TTZMkdUkrZ8sEcCvwjcz8ZN2sXcCmangTcE/d9A9XZ81cAhzPzGfbmFmS1EQr789/DvgQcCAivl5N+31gO3B3RFwLPANcUc27H1gPHAReBK5pa2JJUlNNy706MBoNZl86z/IJXLfMXJKkZfATqpJUIMtdkgpkuUtSgSx3SSqQ5S5JBbLcJalAlrskFai/L4SivjMy53o6nbB1dOY11+2Z3r6h4+uVSuKWuyQVyHKXpAJZ7pJUIMtdkgpkuUtSgSx3SSqQ5S5JBbLcJalAlrskFchyl6QCDfzlB7rxcXhJGjRuuUtSgSx3SSqQ5S5JBbLcJalAlrskFchyl6QCWe6SVCDLXZIKZLlLUoEsd0kqkOUuSQWy3CWpQJa7JBXIcpekAlnuklQgy12SCmS5S1KBLHdJKlDTco+I2yLiaEQ8Vjftxog4HBFfr27r6+b9XkQcjIgnI+KXOhVcktRYK1vutwPr5pn+p5l5YXW7HyAizgeuBN5Rfc+nI+KUdoWVJLWmabln5h7guRbvbyMwmZk/yMyngYPAxcvIJ0lagsjM5gtFjAD3ZuYF1fiNwGbge8A+YGtmHouIPwf2ZuZfV8vdCjyQmZ+f5z63AFsAhoeH105OTjZc/4kTJxgaGpp33oHDx5vm75bhU+HIS71OsbBBzTi6+ozehFnAQq/LfmHG9unHnBMTE/szc2y+eSuWeJ+3AB8Hsvp6E/Ari7mDzNwB7AAYGxvL8fHxhstOTU3RaP7mbfctZrUdtXV0hpsOLPUh7Y5BzTh99XhvwixgoddlvzBj+wxKzllLOlsmM49k5suZ+QrwWV7d9XIYOLdu0XOqaZKkLlpSuUfE2XWj7wdmz6TZBVwZEW+MiPOANcBXlxdRkrRYTd+fR8SdwDiwKiIOATcA4xFxIbXdMtPArwNk5uMRcTfwBDADXJeZL3cmuiSpkablnplXzTP51gWW/wTwieWEkiQtj59QlaQCWe6SVCDLXZIKZLlLUoEsd0kqkOUuSQWy3CWpQJa7JBXIcpekAlnuklQgy12SCmS5S1KBLHdJKpDlLkkFstwlqUCWuyQVyHKXpAJZ7pJUIMtdkgpkuUtSgSx3SSqQ5S5JBbLcJalAlrskFchyl6QCWe6SVCDLXZIKZLlLUoEsd0kqkOUuSQWy3CWpQJa7JBXIcpekAlnuklQgy12SCmS5S1KBmpZ7RNwWEUcj4rG6aWdFxIMR8c3q65nV9IiImyPiYEQ8GhEXdTK8JGl+rWy53w6smzNtG7A7M9cAu6txgMuANdVtC3BLe2JKkhajabln5h7guTmTNwI7q+GdwOV10+/Imr3Ayog4u11hJUmtWeo+9+HMfLYa/i9guBpeDXy7brlD1TRJUhdFZjZfKGIEuDczL6jGn8/MlXXzj2XmmRFxL7A9M79cTd8NXJ+Z++a5zy3Udt0wPDy8dnJysuH6T5w4wdDQ0LzzDhw+3jR/twyfCkde6nWKhQ1qxtHVZ/QmzAIWel32CzO2Tz/mnJiY2J+ZY/PNW7HE+zwSEWdn5rPVbpej1fTDwLl1y51TTXuNzNwB7AAYGxvL8fHxhiubmpqi0fzN2+5bbPaO2To6w00HlvqQdsegZpy+erw3YRaw0OuyX5ixfQYl56yl7pbZBWyqhjcB99RN/3B11swlwPG63TeSpC5pugkXEXcC48CqiDgE3ABsB+6OiGuBZ4ArqsXvB9YDB4EXgWs6kFmS1ETTcs/MqxrMunSeZRO4brmhpLlGerj7bXr7hp6tW1oqP6EqSQWy3CWpQJa7JBXIcpekAlnuklQgy12SCmS5S1KBLHdJKpDlLkkFstwlqUCWuyQVyHKXpAJZ7pJUIMtdkgpkuUtSgSx3SSqQ5S5JBbLcJalAlrskFchyl6QCWe6SVCDLXZIKZLlLUoEsd0kqkOUuSQWy3CWpQJa7JBXIcpekAlnuklQgy12SCmS5S1KBLHdJKpDlLkkFstwlqUCWuyQVyHKXpAJZ7pJUoBXL+eaImAa+D7wMzGTmWEScBdwFjADTwBWZeWx5MSVJi9GOLfeJzLwwM8eq8W3A7sxcA+yuxiVJXdSJ3TIbgZ3V8E7g8g6sQ5K0gMjMpX9zxNPAMSCBv8jMHRHxfGaurOYHcGx2fM73bgG2AAwPD6+dnJxsuJ4TJ04wNDQ077wDh48vOX+7DZ8KR17qdYqFmXHxRlefMe/0hV6X/cKM7dOPOScmJvbX7TU5ybL2uQPvzszDEfEW4MGI+Lf6mZmZETHvX4/M3AHsABgbG8vx8fGGK5mamqLR/M3b7lta8g7YOjrDTQeW+5B2lhkXb/rq8XmnL/S67BdmbJ9ByTlrWbtlMvNw9fUo8EXgYuBIRJwNUH09utyQkqTFWXK5R8RpEXH67DDwXuAxYBewqVpsE3DPckNKkhZnOe99h4Ev1narswL4XGb+Q0Q8AtwdEdcCzwBXLD+mJGkxllzumfkU8M55pn8XuHQ5oaR+MtLguM7W0ZmOHvOZ3r6hY/et8vkJVUkqkOUuSQWy3CWpQJa7JBXIcpekAlnuklQgy12SCmS5S1KBLHdJKpDlLkkFstwlqUCWuyQVyHKXpAJZ7pJUIMtdkgpkuUtSgSx3SSqQ5S5JBbLcJalAlrskFchyl6QCWe6SVCDLXZIKZLlLUoEsd0kqkOUuSQWy3CWpQJa7JBXIcpekAlnuklQgy12SCrSi1wEkzW9k233Lvo+tozNsXuT9TG/fsOz1qvfccpekAlnuklQgy12SCmS5S1KBLHdJKlDHyj0i1kXEkxFxMCK2dWo9kqTX6sipkBFxCvAp4BeBQ8AjEbErM5/oxPoktU87TsFcjPrTNT0Ns306dZ77xcDBzHwKICImgY2A5S6p77TyB20pnxloRaf+oEVmtv9OIz4ArMvMX63GPwT8TGZ+tG6ZLcCWavTHgScXuMtVwHfaHrT9BiGnGdtnEHKasX36MefbMvPN883o2SdUM3MHsKOVZSNiX2aOdTjSsg1CTjO2zyDkNGP7DErOWZ06oHoYOLdu/JxqmiSpCzpV7o8AayLivIh4A3AlsKtD65IkzdGR3TKZORMRHwW+BJwC3JaZjy/jLlvafdMHBiGnGdtnEHKasX0GJSfQoQOqkqTe8hOqklQgy12SCtRX5d7skgUR8caIuKua/3BEjPRhxt+JiCci4tGI2B0Rb+t2xlZy1i33yxGREdH1U7xayRgRV1SP5+MR8bl+yxgRb42IhyLia9Vzvr4HGW+LiKMR8ViD+RERN1c/w6MRcVEfZry6ynYgIr4SEe/sdsYqx4I565b76YiYqT7T058ysy9u1A68/gfwY8AbgH8Fzp+zzG8An6mGrwTu6sOME8APV8Mf6XbGVnNWy50O7AH2AmP9lhFYA3wNOLMaf0sfZtwBfKQaPh+Y7sHz/fPARcBjDeavBx4AArgEeLgPM76r7nm+rBcZW8lZ97r4R+B+4AO9yNnKrZ+23P//kgWZ+T/A7CUL6m0EdlbDnwcujYjop4yZ+VBmvliN7qV2jn+3tfJYAnwc+GPgv7sZrtJKxl8DPpWZxwAy82gfZkzgR6rhM4D/7GK+WoDMPcBzCyyyEbgja/YCKyPi7O6kq2mWMTO/Mvs807vfm1YeS4CPAV8Auv16XJR+KvfVwLfrxg9V0+ZdJjNngOPAm7qSbs76K/NlrHcttS2mbmuas3prfm5mdvcqUa9q5bF8O/D2iPiniNgbEeu6lq6mlYw3Ah+MiEPUtuQ+1p1oi7LY122v9er3pqmIWA28H7il11ma8R9kd0hEfBAYA97T6yxzRcTrgE8Cm3scpZkV1HbNjFPbktsTEaOZ+XxPU53sKuD2zLwpIn4W+KuIuCAzX+l1sEEUERPUyv3dvc7SwJ8B12fmK93dabB4/VTurVyyYHaZQxGxgtrb4O92J95J658172UVIuIXgD8A3pOZP+hStnrNcp4OXABMVS/QHwV2RcT7MnNfn2SE2hbmw5n5v8DTEfHv1Mr+ke5EbCnjtcA6gMz854j4IWoXmOqnt+wDcTmQiPhJ4C+ByzKzm7/XizEGTFa/N6uA9RExk5l/39tY8+j1Tv+6gxQrgKeA83j14NU75ixzHScfUL27DzP+FLWDcGv6+bGcs/wU3T+g2spjuQ7YWQ2vorZr4U19lvEBYHM1/BPU9rlHD57zERofrNzAyQdUv9rtfC1kfCtwEHhXL7K1mnPOcrfTxwdU+2bLPRtcsiAi/gjYl5m7gFupve09SO2gx5V9mPFPgCHgb6u/7t/KzPf1Yc6eajHjl4D3RsQTwMvA72YXt+hazLgV+GxE/Da1g6ubs/rN75aIuJParqtV1b7/G4DXVz/DZ6gdC1hPrTxfBK7pZr4WM/4hteNnn65+b2ayB1dgbCHnwPDyA5JUoH46W0aS1CaWuyQVyHKXpAJZ7pJUIMtdkgpkuUtSgSx3SSrQ/wFjHsfaHUGicQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "df_stori_features['word_count'].hist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "metadata": {},
   "outputs": [],
   "source": [
    "def make_features(df, use_amnt_agg=False, use_mcc_agg=False, use_embeddings=False):\n",
    "    feature_list = [\n",
    "        df_user_reaction_proba.reindex(index=df['customer_id'], columns=COLS_EVENT)\n",
    "            .fillna(s_default_reaction_proba).values,\n",
    "        np.log1p(df.groupby('customer_id')[['event']].count().reindex(index=df['customer_id'])\n",
    "                 .fillna(0.0).values),\n",
    "        df_item_reaction_proba.reindex(index=df['story_id'], columns=COLS_EVENT)\n",
    "            .fillna(s_default_reaction_proba).values,\n",
    "        np.log1p(df.groupby('story_id')[['event']].count().reindex(index=df['story_id'])\n",
    "                 .fillna(0.0).values),\n",
    "        df_stori_features.reindex(index=df['story_id'])\n",
    "            .fillna({'json_len': 1.014882, 'word_count': 0.4}).values\n",
    "    ]\n",
    "    \n",
    "    if use_amnt_agg:\n",
    "        feature_list.append(\n",
    "            df_amnt_agg.reindex(index=df['customer_id']).fillna(0.0).values)        \n",
    "    \n",
    "    if use_mcc_agg:\n",
    "        feature_list.append(\n",
    "            df_mcc_agg.reindex(index=df['customer_id']).fillna(0.0).values)\n",
    "        \n",
    "    if use_embeddings:\n",
    "        feature_list.append(\n",
    "            df_embeddings.reindex(index=df['customer_id']).fillna(0.0).values)\n",
    "    \n",
    "    X = np.concatenate(feature_list, axis=1)\n",
    "    return X"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_train = df_user_item_train['event'].map({v: i for i, v in enumerate(COLS_EVENT)})\n",
    "y_test = df_user_item_test['event'].map({v: i for i, v in enumerate(COLS_EVENT)})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [],
   "source": [
    "import xgboost as xgb\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "import lightgbm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [],
   "source": [
    "def roc_auc_mc_score(df):\n",
    "    def pair_ranking_rate(df):\n",
    "        events = df['event'].map({'dislike': 0, 'skip': 1, 'view': 2, 'like': 3}).values\n",
    "        scores = df['score'].values\n",
    "        mask = (np.sign(events.reshape(-1, 1) - events.reshape(1, -1)) > 0).astype(int)\n",
    "        right_pairs = ((mask * np.sign(scores.reshape(-1, 1) - scores.reshape(1, -1))) > 0).astype(float)\n",
    "        return right_pairs.sum() / mask.sum() if mask.sum() > 0 else np.NaN\n",
    "    \n",
    "    return df.groupby('customer_id').apply(pair_ranking_rate).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [],
   "source": [
    "def estimate_model_and_features(model):\n",
    "    model.fit(X_train, y_train)\n",
    "    predict = model.predict_proba(X_test)\n",
    "\n",
    "    score1 = tinkoff_metric(df_user_item_test.assign(\n",
    "        score=np.sign((predict * np.array([[-10, 0.5, -0.1, 0.1]])).sum(axis=1))))\n",
    "    \n",
    "    score2 = roc_auc_mc_score(df_user_item_test.assign(\n",
    "        score=(predict * np.array([[-10, 0.5, -0.1, 0.1]])).sum(axis=1)))\n",
    "    print(f'{model.__class__.__name__:25s}: {score1:.4f}, {score2:.4f}') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 148,
   "metadata": {},
   "outputs": [],
   "source": [
    "def estimate_model_and_features_ens(model):\n",
    "    model.fit(X_train, df_user_item_train['event'].map({'dislike': 0, 'skip': 1, 'view': 1, 'like': 1}))\n",
    "    predict1 = model.predict_proba(X_test)[:, 1]\n",
    "\n",
    "    model.fit(X_train, df_user_item_train['event'].map({'dislike': 0, 'skip': 0, 'view': 1, 'like': 1}))\n",
    "    predict2 = model.predict_proba(X_test)[:, 1]\n",
    "\n",
    "    model.fit(X_train, df_user_item_train['event'].map({'dislike': 0, 'skip': 0, 'view': 0, 'like': 1}))\n",
    "    predict3 = model.predict_proba(X_test)[:, 1]\n",
    "    \n",
    "    predict = (predict1 + 0.5 * predict2 + predict3)\n",
    "\n",
    "    score2 = roc_auc_mc_score(df_user_item_test.assign(score=predict))\n",
    "    print(f'{model.__class__.__name__:25s}: {score2:.4f}') "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Base features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 149,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((354855, 12), (118286, 12))"
      ]
     },
     "execution_count": 149,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train = make_features(df_user_item_train)\n",
    "X_test = make_features(df_user_item_test)\n",
    "X_train.shape, X_test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 150,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LogisticRegression       : 0.5979\n",
      "RandomForestClassifier   : 0.5956\n",
      "LGBMClassifier           : 0.6127\n",
      "CPU times: user 5min 40s, sys: 18.7 s, total: 5min 59s\n",
      "Wall time: 2min 14s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "estimate_model_and_features_ens(LogisticRegression(solver='liblinear', multi_class='ovr'))\n",
    "estimate_model_and_features_ens(RandomForestClassifier(n_estimators=100, n_jobs=4))\n",
    "# estimate_model_and_features_ens(xgb.XGBClassifier(n_estimators=300, n_jobs=4))\n",
    "estimate_model_and_features_ens(lightgbm.LGBMClassifier(n_estimators=300, n_jobs=4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 151,
   "metadata": {},
   "outputs": [],
   "source": [
    "def estimate_model_and_features_ens(model):\n",
    "    model.fit(X_train, df_user_item_train['event'].map({'dislike': 0, 'skip': 1, 'view': 1, 'like': 1}))\n",
    "    predict1 = model.predict_proba(X_test)[:, 1]\n",
    "\n",
    "    model.fit(X_train, df_user_item_train['event'].map({'dislike': 0, 'skip': 0, 'view': 1, 'like': 1}))\n",
    "    predict2 = model.predict_proba(X_test)[:, 1]\n",
    "\n",
    "    model.fit(X_train, df_user_item_train['event'].map({'dislike': 0, 'skip': 0, 'view': 0, 'like': 1}))\n",
    "    predict3 = model.predict_proba(X_test)[:, 1]\n",
    "    \n",
    "    predict = (predict1 + 2.0 * predict2 + predict3)\n",
    "\n",
    "    score2 = roc_auc_mc_score(df_user_item_test.assign(score=predict))\n",
    "    print(f'{model.__class__.__name__:25s}: {score2:.4f}') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 152,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LogisticRegression       : 0.6071\n",
      "RandomForestClassifier   : 0.6013\n",
      "LGBMClassifier           : 0.6195\n",
      "CPU times: user 5min 41s, sys: 18.6 s, total: 6min\n",
      "Wall time: 2min 14s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "estimate_model_and_features_ens(LogisticRegression(solver='liblinear', multi_class='ovr'))\n",
    "estimate_model_and_features_ens(RandomForestClassifier(n_estimators=100, n_jobs=4))\n",
    "# estimate_model_and_features_ens(xgb.XGBClassifier(n_estimators=300, n_jobs=4))\n",
    "estimate_model_and_features_ens(lightgbm.LGBMClassifier(n_estimators=300, n_jobs=4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 155,
   "metadata": {},
   "outputs": [],
   "source": [
    "def estimate_model_and_features_ens(model):\n",
    "    model.fit(X_train, df_user_item_train['event'].map({'dislike': 0, 'skip': 1, 'view': 1, 'like': 1}))\n",
    "    predict1_tr = model.predict_proba(X_train)[:, 1]\n",
    "    predict1_ts = model.predict_proba(X_test)[:, 1]\n",
    "\n",
    "    model.fit(X_train, df_user_item_train['event'].map({'dislike': 0, 'skip': 0, 'view': 1, 'like': 1}))\n",
    "    predict2_tr = model.predict_proba(X_train)[:, 1]\n",
    "    predict2_ts = model.predict_proba(X_test)[:, 1]\n",
    "\n",
    "    model.fit(X_train, df_user_item_train['event'].map({'dislike': 0, 'skip': 0, 'view': 0, 'like': 1}))\n",
    "    predict3_tr = model.predict_proba(X_train)[:, 1]\n",
    "    predict3_ts = model.predict_proba(X_test)[:, 1]\n",
    "    \n",
    "    predict_tr = (predict1_tr + 10.0 * predict2_tr + 2 * predict3_tr)\n",
    "    predict_ts = (predict1_ts + 10.0 * predict2_ts + 2 * predict3_ts)\n",
    "\n",
    "    score_tr = roc_auc_mc_score(df_user_item_train.assign(score=predict_tr))\n",
    "    score_ts = roc_auc_mc_score(df_user_item_test.assign(score=predict_ts))\n",
    "    print(f'{model.__class__.__name__:25s}: train {score_tr:.4f}, test {score_ts:.4f}') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 156,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LogisticRegression       : train 0.6549, test 0.6080\n",
      "RandomForestClassifier   : train 0.8752, test 0.6032\n",
      "LGBMClassifier           : train 0.7021, test 0.6206\n",
      "CPU times: user 8min 34s, sys: 23.9 s, total: 8min 58s\n",
      "Wall time: 4min 23s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "estimate_model_and_features_ens(LogisticRegression(solver='liblinear', multi_class='ovr'))\n",
    "estimate_model_and_features_ens(RandomForestClassifier(n_estimators=100, n_jobs=4))\n",
    "# estimate_model_and_features_ens(xgb.XGBClassifier(n_estimators=300, n_jobs=4))\n",
    "estimate_model_and_features_ens(lightgbm.LGBMClassifier(n_estimators=300, n_jobs=4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LogisticRegression       : 0.6061\n",
      "RandomForestClassifier   : 0.6005\n",
      "LGBMClassifier           : 0.6163\n",
      "CPU times: user 6min 2s, sys: 29 s, total: 6min 31s\n",
      "Wall time: 2min 25s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "estimate_model_and_features_ens(LogisticRegression(solver='liblinear', multi_class='ovr'))\n",
    "estimate_model_and_features_ens(RandomForestClassifier(n_estimators=100, n_jobs=4))\n",
    "# estimate_model_and_features_ens(xgb.XGBClassifier(n_estimators=300, n_jobs=4))\n",
    "estimate_model_and_features_ens(lightgbm.LGBMClassifier(n_estimators=300, n_jobs=4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LogisticRegression       : 0.1014, 0.5891\n",
      "RandomForestClassifier   : 0.0853, 0.5354\n",
      "LGBMClassifier           : 0.0794, 0.5674\n",
      "CPU times: user 4min, sys: 14.5 s, total: 4min 15s\n",
      "Wall time: 1min 54s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "estimate_model_and_features(LogisticRegression(solver='liblinear', multi_class='ovr'))\n",
    "estimate_model_and_features(RandomForestClassifier(n_estimators=100, n_jobs=4))\n",
    "# estimate_model_and_features(xgb.XGBClassifier(n_estimators=300, n_jobs=4))\n",
    "estimate_model_and_features(lightgbm.LGBMClassifier(n_estimators=300, n_jobs=4))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# use_amnt_agg features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((354855, 20), (118286, 20))"
      ]
     },
     "execution_count": 142,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train = make_features(df_user_item_train, use_amnt_agg=True)\n",
    "X_test = make_features(df_user_item_test, use_amnt_agg=True)\n",
    "X_train.shape, X_test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LogisticRegression       : 0.6069\n",
      "RandomForestClassifier   : 0.5976\n",
      "LGBMClassifier           : 0.6172\n",
      "CPU times: user 7min 34s, sys: 51.8 s, total: 8min 26s\n",
      "Wall time: 3min 10s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "estimate_model_and_features_ens(LogisticRegression(solver='liblinear', multi_class='ovr'))\n",
    "estimate_model_and_features_ens(RandomForestClassifier(n_estimators=100, n_jobs=4))\n",
    "# estimate_model_and_features_ens(xgb.XGBClassifier(n_estimators=300, n_jobs=4))\n",
    "estimate_model_and_features_ens(lightgbm.LGBMClassifier(n_estimators=300, n_jobs=4))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# use_mcc_agg features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((354855, 396), (118286, 396))"
      ]
     },
     "execution_count": 144,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train = make_features(df_user_item_train, use_mcc_agg=True)\n",
    "X_test = make_features(df_user_item_test, use_mcc_agg=True)\n",
    "X_train.shape, X_test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LogisticRegression       : 0.6078\n",
      "RandomForestClassifier   : 0.5953\n",
      "LGBMClassifier           : 0.6140\n",
      "CPU times: user 25min 4s, sys: 2min 2s, total: 27min 7s\n",
      "Wall time: 9min 1s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "estimate_model_and_features_ens(LogisticRegression(solver='liblinear', multi_class='ovr'))\n",
    "estimate_model_and_features_ens(RandomForestClassifier(n_estimators=100, n_jobs=4))\n",
    "# estimate_model_and_features_ens(xgb.XGBClassifier(n_estimators=300, n_jobs=4))\n",
    "estimate_model_and_features_ens(lightgbm.LGBMClassifier(n_estimators=300, n_jobs=4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# use_embeddings features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 146,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((354855, 76), (118286, 76))"
      ]
     },
     "execution_count": 146,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train = make_features(df_user_item_train, use_embeddings=True)\n",
    "X_test = make_features(df_user_item_test, use_embeddings=True)\n",
    "X_train.shape, X_test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 147,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LogisticRegression       : 0.6062\n",
      "RandomForestClassifier   : 0.5989\n",
      "LGBMClassifier           : 0.6165\n",
      "CPU times: user 20min 57s, sys: 56.9 s, total: 21min 54s\n",
      "Wall time: 6min 55s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "estimate_model_and_features_ens(LogisticRegression(solver='liblinear', multi_class='ovr'))\n",
    "estimate_model_and_features_ens(RandomForestClassifier(n_estimators=100, n_jobs=4))\n",
    "# estimate_model_and_features_ens(xgb.XGBClassifier(n_estimators=300, n_jobs=4))\n",
    "estimate_model_and_features_ens(lightgbm.LGBMClassifier(n_estimators=300, n_jobs=4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "automl_env",
   "language": "python",
   "name": "automl_env"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
