{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "485d811e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# -*- coding: utf-8 -*-\n",
    "import numpy as np\n",
    "import torch\n",
    "import pdb\n",
    "from sklearn.metrics import roc_auc_score\n",
    "np.random.seed(2020)\n",
    "torch.manual_seed(2020)\n",
    "import pandas as pd\n",
    "from dataset import load_data\n",
    "from matrix_factorization import MF, MF_N_IPS, MF_N_DR_JL, MF_N_MRDR_JL\n",
    "\n",
    "from utils import gini_index, ndcg_func, get_user_wise_ctr, rating_mat_to_sample, binarize, shuffle, minU,recall_func, precision_func\n",
    "mse_func = lambda x,y: np.mean((x-y)**2)\n",
    "acc_func = lambda x,y: np.sum(x == y) / len(x)\n",
    "\n",
    "dataset_name = \"kuai\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "63177282",
   "metadata": {},
   "outputs": [],
   "source": [
    "if dataset_name == \"kuai\":\n",
    "    rdf_train = np.array(pd.read_table(\"./data/kuai/user.txt\", header = None, sep = ','))     \n",
    "    rdf_test = np.array(pd.read_table(\"./data/kuai/random.txt\", header = None, sep = ','))\n",
    "    rdf_train_new = np.c_[rdf_train, np.ones(rdf_train.shape[0])]\n",
    "    rdf_test_new = np.c_[rdf_test, np.zeros(rdf_test.shape[0])]\n",
    "    rdf = np.r_[rdf_train_new, rdf_test_new]\n",
    "    \n",
    "    rdf = rdf[np.argsort(rdf[:, 0])]\n",
    "    c = rdf.copy()\n",
    "    for i in range(rdf.shape[0]):\n",
    "        if i == 0:\n",
    "            c[:, 0][i] = i\n",
    "            temp = rdf[:, 0][0]\n",
    "        else:\n",
    "            if c[:, 0][i] == temp:\n",
    "                c[:, 0][i] = c[:, 0][i-1]\n",
    "            else:\n",
    "                c[:, 0][i] = c[:, 0][i-1] + 1\n",
    "            temp = rdf[:, 0][i]\n",
    "    \n",
    "    c = c[np.argsort(c[:, 1])]\n",
    "    d = c.copy()\n",
    "    for i in range(rdf.shape[0]):\n",
    "        if i == 0:\n",
    "            d[:, 1][i] = i\n",
    "            temp = c[:, 1][0]\n",
    "        else:\n",
    "            if d[:, 1][i] == temp:\n",
    "                d[:, 1][i] = d[:, 1][i-1]\n",
    "            else:\n",
    "                d[:, 1][i] = d[:, 1][i-1] + 1\n",
    "            temp = c[:, 1][i]\n",
    "\n",
    "    y_train = d[:, 2][d[:, 3] == 1]\n",
    "    y_test = d[:, 2][d[:, 3] == 0]\n",
    "    x_train = d[:, :2][d[:, 3] == 1]\n",
    "    x_test = d[:, :2][d[:, 3] == 0]\n",
    "    \n",
    "    num_user = x_train[:,0].max() + 1\n",
    "    num_item = x_train[:,1].max() + 1\n",
    "\n",
    "y_train = binarize(y_train, 2)\n",
    "y_test = binarize(y_test, 2)\n",
    "num_user = int(num_user)\n",
    "num_item = int(num_item)\n",
    "    \n",
    "print(\"# user: {}, # item: {}\".format(num_user, num_item))\n",
    "print(sum(y_train)/len(y_train))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b005339c-f453-4265-b344-949e2f14b078",
   "metadata": {},
   "outputs": [],
   "source": [
    "x_train = np.int64(x_train)\n",
    "y_train = np.int64(y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "304cd8bb",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 9196,
     "status": "ok",
     "timestamp": 1681654325865,
     "user": {
      "displayName": "Jaqueline Noonan",
      "userId": "14082822236352942107"
     },
     "user_tz": -480
    },
    "id": "304cd8bb",
    "outputId": "dd3be69c-feb6-4bad-9ef3-4ad15f573f83"
   },
   "outputs": [],
   "source": [
    "\"MF naive\"\n",
    "mf = MF(num_user, num_item, batch_size=2048)\n",
    "mf.cuda()\n",
    "mf.fit(x_train, y_train, \n",
    "    lr=0.01,\n",
    "    lamb=5e-5,\n",
    "    tol=1e-5)\n",
    "test_pred = mf.predict(x_test)\n",
    "mse_mf = mse_func(y_test, test_pred)\n",
    "auc_mf = roc_auc_score(y_test, test_pred)\n",
    "ndcg_res = ndcg_func(mf, x_test, y_test, top_k_list = [20, 50])\n",
    "recall_res = recall_func(mf, x_test, y_test, top_k_list = [20, 50])\n",
    "\n",
    "print(\"***\"*5 + \"[MF]\" + \"***\"*5)\n",
    "print(\"[MF] test mse:\", mse_mf)\n",
    "print(\"[MF] test auc:\", auc_mf)\n",
    "print(\"[MF] ndcg@20:{:.6f}, ndcg@50:{:.6f}\".format(\n",
    "        np.mean(ndcg_res[\"ndcg_20\"]), np.mean(ndcg_res[\"ndcg_50\"])))\n",
    "print(\"[MF] recall@20:{:.6f}, recall@50:{:.6f}\".format(\n",
    "        np.mean(recall_res[\"recall_20\"]), np.mean(recall_res[\"recall_50\"])))\n",
    "user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)\n",
    "gi,gu = gini_index(user_wise_ctr)\n",
    "print(\"***\"*5 + \"[MF]\" + \"***\"*5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4c47b9a3",
   "metadata": {},
   "outputs": [],
   "source": [
    "\"MF N IPS\"\n",
    "mf_interference_ips = MF_N_IPS(num_user, num_item, low = 0.05, up = 0.95, c = 10)\n",
    "mf_interference_ips.cuda()\n",
    "\n",
    "ips_idxs = np.arange(len(y_test))\n",
    "np.random.shuffle(ips_idxs)\n",
    "y_ips = y_test[ips_idxs[:int(0.05 * len(ips_idxs))]]\n",
    "\n",
    "mf_interference_ips.propensity_model.fit(x_train, lr = 0.01, thr = 0.9, batch_size = 2048, lamb = 1e-5)\n",
    "\n",
    "mf_interference_ips.fit(x_train, y_train, y_ips, thr = 0.8, g_value = [0],\n",
    "    lr=0.01,\n",
    "    g = 200,\n",
    "    h = 100,\n",
    "    batch_size=2048,\n",
    "    lamb1 = 5e-5,\n",
    "    lamb2 = 5e-5,\n",
    "    tol=1e-5,\n",
    "    verbose=False)\n",
    "test_pred = mf_interference_ips.predict(x_test)\n",
    "mse_mfips = mse_func(y_test, test_pred)\n",
    "auc_mfips = roc_auc_score(y_test, test_pred)\n",
    "ndcg_res = ndcg_func(mf_interference_ips, x_test, y_test, top_k_list = [20, 50])\n",
    "recall_res = recall_func(mf_interference_ips, x_test, y_test, top_k_list = [20, 50])\n",
    "precision_res = precision_func(mf_interference_ips, x_test, y_test, top_k_list = [20, 50])\n",
    "\n",
    "print(\"***\"*5 + \"[MF-Interference-IPS]\" + \"***\"*5)\n",
    "print(\"[MF-Interference-IPS] test mse:\", mse_func(y_test, test_pred))\n",
    "print(\"[MF-Interference-IPS] test auc:\", auc_mfips)\n",
    "print(\"[MF-Interference-IPS] ndcg@20:{:.6f}, ndcg@50:{:.6f}\".format(\n",
    "        np.mean(ndcg_res[\"ndcg_20\"]), np.mean(ndcg_res[\"ndcg_50\"])))\n",
    "print(\"[MF-Interference-IPS] recall@20:{:.6f}, recall@50:{:.6f}\".format(\n",
    "        np.mean(recall_res[\"recall_20\"]), np.mean(recall_res[\"recall_50\"])))\n",
    "print(\"[MF-Interference-IPS] precision@20:{:.6f}, precision@50:{:.6f}\".format(\n",
    "        np.mean(precision_res[\"precision_20\"]), np.mean(precision_res[\"precision_50\"])))    \n",
    "print('f1@50', np.mean(recall_res[\"recall_50\"]) * np.mean(precision_res[\"precision_50\"])/\n",
    "     (np.mean(recall_res[\"recall_50\"]) + np.mean(precision_res[\"precision_50\"])))\n",
    "user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)\n",
    "gi,gu = gini_index(user_wise_ctr)\n",
    "print(\"***\"*5 + \"[MF-Interference-IPS]\" + \"***\"*5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "d2407d00",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[MF-Interference-IPS] epoch:18, xent:125.20612573623657\n",
      "[MF-N-DR-JL] epoch:105, xent:10009488.09375\n",
      "***************[MF-Interference-IPS]***************\n",
      "[MF-Interference-IPS] test mse: 0.06218944475425952\n",
      "[MF-Interference-IPS] test auc: 0.7791225770306165\n",
      "[MF-Interference-IPS] ndcg@20:0.465605, ndcg@50:0.543906\n",
      "[MF-Interference-IPS] recall@20:0.442665, recall@50:0.679644\n",
      "[MF-Interference-IPS] precision@20:0.089157, precision@50:0.063019\n",
      "f1@50 0.05767160548177845\n",
      "Num User: 1411\n",
      "Gini index: 0.8282210368823074\n",
      "Global utility: 0.04847625797306875\n",
      "***************[MF-Interference-IPS]***************\n"
     ]
    }
   ],
   "source": [
    "\"MF N DR JL\"\n",
    "mf_interference_dr_jl = MF_N_DR_JL(num_user, num_item, low = 0.05, up = 0.95, c = 10)\n",
    "mf_interference_dr_jl.cuda()\n",
    "\n",
    "ips_idxs = np.arange(len(y_test))\n",
    "np.random.shuffle(ips_idxs)\n",
    "y_ips = y_test[ips_idxs[:int(0.05 * len(ips_idxs))]]\n",
    "\n",
    "mf_interference_dr_jl.propensity_model.fit(x_train, lr = 0.01, thr = 1, lamb = 1e-5, batch_size = 2048)\n",
    "\n",
    "mf_interference_dr_jl.fit(x_train, y_train, y_ips, g_value = [0],\n",
    "    lr=0.01,\n",
    "    g = 200,\n",
    "    h = 200,\n",
    "    G = 2,\n",
    "    batch_size=2048,\n",
    "    lamb1 = 1e-5,\n",
    "    lamb2 = 1e-5,\n",
    "    tol=1e-5,\n",
    "    verbose=False)\n",
    "test_pred = mf_interference_dr_jl.predict(x_test)\n",
    "mse_mfdrjl = mse_func(y_test, test_pred)\n",
    "auc_mfdrjl = roc_auc_score(y_test, test_pred)\n",
    "ndcg_res = ndcg_func(mf_interference_dr_jl, x_test, y_test, top_k_list = [20, 50])\n",
    "recall_res = recall_func(mf_interference_dr_jl, x_test, y_test, top_k_list = [20, 50])\n",
    "precision_res = precision_func(mf_interference_dr_jl, x_test, y_test, top_k_list = [20, 50])\n",
    "\n",
    "print(\"***\"*5 + \"[MF-Interference-IPS]\" + \"***\"*5)\n",
    "print(\"[MF-Interference-IPS] test mse:\", mse_func(y_test, test_pred))\n",
    "print(\"[MF-Interference-IPS] test auc:\", auc_mfdrjl)\n",
    "print(\"[MF-Interference-IPS] ndcg@20:{:.6f}, ndcg@50:{:.6f}\".format(\n",
    "        np.mean(ndcg_res[\"ndcg_20\"]), np.mean(ndcg_res[\"ndcg_50\"])))\n",
    "print(\"[MF-Interference-IPS] recall@20:{:.6f}, recall@50:{:.6f}\".format(\n",
    "        np.mean(recall_res[\"recall_20\"]), np.mean(recall_res[\"recall_50\"])))\n",
    "print(\"[MF-Interference-IPS] precision@20:{:.6f}, precision@50:{:.6f}\".format(\n",
    "        np.mean(precision_res[\"precision_20\"]), np.mean(precision_res[\"precision_50\"])))    \n",
    "print('f1@50', np.mean(recall_res[\"recall_50\"]) * np.mean(precision_res[\"precision_50\"])/\n",
    "     (np.mean(recall_res[\"recall_50\"]) + np.mean(precision_res[\"precision_50\"])))\n",
    "user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)\n",
    "gi,gu = gini_index(user_wise_ctr)\n",
    "print(\"***\"*5 + \"[MF-Interference-IPS]\" + \"***\"*5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5255bfad",
   "metadata": {},
   "outputs": [],
   "source": [
    "\"MF N MRDR JL\"\n",
    "mf_interference_mrdr_jl = MF_N_MRDR_JL(num_user, num_item, low = 0.05, up = 0.95, c = 10)\n",
    "mf_interference_mrdr_jl.cuda()\n",
    "\n",
    "ips_idxs = np.arange(len(y_test))\n",
    "np.random.shuffle(ips_idxs)\n",
    "y_ips = y_test[ips_idxs[:int(0.05 * len(ips_idxs))]]\n",
    "\n",
    "mf_interference_mrdr_jl.propensity_model.fit(x_train, lr = 0.01, thr = 1, lamb = 1e-5, batch_size = 2048)\n",
    "\n",
    "mf_interference_mrdr_jl.fit(x_train, y_train, y_ips, g_value = [0],\n",
    "    lr=0.01,\n",
    "    g = 200,\n",
    "    h = 200,\n",
    "    G = 1,\n",
    "    batch_size=2048,\n",
    "    lamb1 = 1e-5,\n",
    "    lamb2 = 1e-5,\n",
    "    tol=1e-5,\n",
    "    verbose=False)\n",
    "test_pred = mf_interference_mrdr_jl.predict(x_test)\n",
    "mse_mfmrdrjl = mse_func(y_test, test_pred)\n",
    "auc_mfmrdrjl = roc_auc_score(y_test, test_pred)\n",
    "ndcg_res = ndcg_func(mf_interference_mrdr_jl, x_test, y_test, top_k_list = [20, 50])\n",
    "recall_res = recall_func(mf_interference_mrdr_jl, x_test, y_test, top_k_list = [20, 50])\n",
    "precision_res = precision_func(mf_interference_mrdr_jl, x_test, y_test, top_k_list = [20, 50])\n",
    "\n",
    "print(\"***\"*5 + \"[MF-Interference-IPS]\" + \"***\"*5)\n",
    "print(\"[MF-Interference-IPS] test mse:\", mse_func(y_test, test_pred))\n",
    "print(\"[MF-Interference-IPS] test auc:\", auc_mfmrdrjl)\n",
    "print(\"[MF-Interference-IPS] ndcg@20:{:.6f}, ndcg@50:{:.6f}\".format(\n",
    "        np.mean(ndcg_res[\"ndcg_20\"]), np.mean(ndcg_res[\"ndcg_50\"])))\n",
    "print(\"[MF-Interference-IPS] recall@20:{:.6f}, recall@50:{:.6f}\".format(\n",
    "        np.mean(recall_res[\"recall_20\"]), np.mean(recall_res[\"recall_50\"])))\n",
    "print(\"[MF-Interference-IPS] precision@20:{:.6f}, precision@50:{:.6f}\".format(\n",
    "        np.mean(precision_res[\"precision_20\"]), np.mean(precision_res[\"precision_50\"])))    \n",
    "print('f1@50', np.mean(recall_res[\"recall_50\"]) * np.mean(precision_res[\"precision_50\"])/\n",
    "     (np.mean(recall_res[\"recall_50\"]) + np.mean(precision_res[\"precision_50\"])))\n",
    "user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)\n",
    "gi,gu = gini_index(user_wise_ctr)\n",
    "print(\"***\"*5 + \"[MF-Interference-IPS]\" + \"***\"*5)"
   ]
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "provenance": []
  },
  "gpuClass": "standard",
  "kernelspec": {
   "display_name": "Python [conda env:pytorch-gpu]",
   "language": "python",
   "name": "conda-env-pytorch-gpu-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
