{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from scipy.special import expit\n",
    "from scipy.stats import gamma, poisson, bernoulli\n",
    "import scipy.sparse as sparse\n",
    "import os\n",
    "import dill\n",
    "import argparse\n",
    "import sys\n",
    "from sklearn.metrics import roc_auc_score\n",
    "import cavi_regression as cr\n",
    "from importlib import reload\n",
    "\n",
    "def load_mats(read=os.path.join('..', 'dat', 'lastfm-influence-sim', 'alpha=1.0')):\n",
    "    ##load sparse mats\n",
    "    Y_past = sparse.load_npz(os.path.join(read, 'past_obs.gz.npz'))\n",
    "    Z = np.loadtxt(os.path.join(read, 'user_embed.gz'))\n",
    "    return Y_past, Z\n",
    "\n",
    "def evaluate(holdout_pairs, Z, Gamma):\n",
    "    truth = []\n",
    "    pred = []\n",
    "    for (i,j,val) in holdout_pairs:\n",
    "        truth.append(val)\n",
    "        z_i = Z[i,:]\n",
    "        g_j = Gamma[j,:]\n",
    "        rate = (z_i*g_j).sum()\n",
    "        pred.append(rate)\n",
    "    return roc_auc_score(truth, pred)\n",
    "\n",
    "def get_holdout_pairs(mat):\n",
    "\n",
    "    holdout_pairs = []\n",
    "    N = np.arange(mat.shape[0])\n",
    "    M = np.arange(mat.shape[1])\n",
    "    for i in N:\n",
    "        j = np.random.choice(M)\n",
    "        count_ij = mat[i][j]\n",
    "        count_j = mat[:,j].sum()\n",
    "\n",
    "        if (count_j - count_ij) == 0:\n",
    "            continue\n",
    "        \n",
    "        holdout_pairs.append((i,j, count_ij))\n",
    "        mat[i][j] = 0\n",
    "\n",
    "    return holdout_pairs, mat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "Y_past, Z = load_mats()\n",
    "Y_past = Y_past.toarray()\n",
    "Y_past.shape\n",
    "\n",
    "holdout_pairs, Y_past = get_holdout_pairs(Y_past)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\r",
      "\tAfter ITERATION: 0\tObjective: -7223329.55\tOld objective: -inf\tImprovement: nan"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/dhanyasridhar/Documents/social-fake-news/src/cavi_regression.py:108: RuntimeWarning: invalid value encountered in double_scalars\n",
      "  improvement = (bound - old_bd) / abs(old_bd)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\tAfter ITERATION: 15\tObjective: -5959395.73\tOld objective: -5962334.26\tImprovement: 0.00049\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "PoissonMF(max_iter=100, n_components=50, random_state=None, smoothness=100,\n",
       "     tol=0.0005, verbose=True)"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reload(cr)\n",
    "\n",
    "K = Z.shape[1]\n",
    "model = cr.PoissonMF(n_components=K, verbose=True)\n",
    "model.fit(Y_past, Z)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.015763567082472278, 0.012139682711821378)"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Gamma_p = model.Eb.T\n",
    "Gamma_p.mean(), Z.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.791162383089313"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "evaluate(holdout_pairs, Z, Gamma_p)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.savetxt('../dat/lastfm-influence-sim/cavi_item_embeddings.gz', Gamma_p)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
