{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# -*- coding: utf-8 -*-\n",
    "import numpy as np\n",
    "import torch\n",
    "import pdb\n",
    "from sklearn.metrics import roc_auc_score\n",
    "np.random.seed(2020)\n",
    "torch.manual_seed(2020)\n",
    "\n",
    "from dataset import load_data\n",
    "from NCF_NB_Single_Model__NCF_IPS_IPSAT_SNIPS_DR_CVIB import NCF, NCF_IPS, NCF_SNIPS, NCF_DR, NCF_CVIB, NCF_IPS_AT\n",
    "from NCF_NB_Joint_Learning__DRJL_MRDRJL import NCF_DR_JL, NCF_MRDR_JL\n",
    "from NCF_NB_Cycle_Learning__SDR_SMRDR import NCF_Stable_DR, NCF_Stable_MRDR\n",
    "from utils import gini_index, ndcg_func, get_user_wise_ctr, rating_mat_to_sample, binarize, shuffle, minU\n",
    "mse_func = lambda x,y: np.mean((x-y)**2)\n",
    "acc_func = lambda x,y: np.sum(x == y) / len(x)\n",
    "\n",
    "dataset_name = \"yahoo\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if dataset_name == \"coat\":\n",
    "    train_mat, test_mat = load_data(\"coat\")        \n",
    "    x_train, y_train = rating_mat_to_sample(train_mat)\n",
    "    x_test, y_test = rating_mat_to_sample(test_mat)\n",
    "    num_user = train_mat.shape[0]\n",
    "    num_item = train_mat.shape[1]\n",
    "\n",
    "elif dataset_name == \"yahoo\":\n",
    "    x_train, y_train, x_test, y_test = load_data(\"yahoo\")\n",
    "    #x_train, y_train = shuffle(x_train, y_train)\n",
    "    num_user = x_train[:,0].max() + 1\n",
    "    num_item = x_train[:,1].max() + 1\n",
    "\n",
    "print(\"# user: {}, # item: {}\".format(num_user, num_item))\n",
    "# binarize\n",
    "y_train = binarize(y_train)\n",
    "y_test = binarize(y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"NCF naive\"\n",
    "ncf = NCF(num_user, num_item)\n",
    "\n",
    "ncf.fit(x_train, y_train, lr=0.01, lamb=1e-6, tol=1e-5, \n",
    "    batch_size = 2048, verbose=0)\n",
    "test_pred = ncf.predict(x_test)\n",
    "mse_ncf = mse_func(y_test, test_pred)\n",
    "auc_ncf = roc_auc_score(y_test, test_pred)\n",
    "ndcg_res = ndcg_func(ncf, x_test, y_test)\n",
    "\n",
    "print(\"***\"*5 + \"[NCF]\" + \"***\"*5)\n",
    "print(\"[NCF] test mse:\", mse_ncf)\n",
    "print(\"[NCF] test auc:\", auc_ncf)\n",
    "print(\"[NCF] ndcg@5:{:.6f}, ndcg@10:{:.6f}\".format(\n",
    "        np.mean(ndcg_res[\"ndcg_5\"]), np.mean(ndcg_res[\"ndcg_10\"])))\n",
    "user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)\n",
    "gi,gu = gini_index(user_wise_ctr)\n",
    "print(\"***\"*5 + \"[NCF]\" + \"***\"*5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"NCF IPS\"\n",
    "ncf_ips = NCF_IPS(num_user, num_item)\n",
    "\n",
    "ips_idxs = np.arange(len(y_test))\n",
    "np.random.shuffle(ips_idxs)\n",
    "y_ips = y_test[ips_idxs[:int(0.05 * len(ips_idxs))]]\n",
    "\n",
    "ncf_ips.fit(x_train, y_train, \n",
    "    y_ips=y_ips,\n",
    "    lr=0.01,\n",
    "    batch_size=8192,\n",
    "    lamb=1e-4,tol=1e-6, verbose=0)\n",
    "test_pred = ncf_ips.predict(x_test)\n",
    "mse_ncfips = mse_func(y_test, test_pred)\n",
    "auc_ncfips = roc_auc_score(y_test, test_pred)\n",
    "ndcg_res = ndcg_func(ncf_ips, x_test, y_test)\n",
    "\n",
    "print(\"***\"*5 + \"[NCF-IPS]\" + \"***\"*5)\n",
    "print(\"[NCF-IPS] test mse:\", mse_ncfips)\n",
    "print(\"[NCF-IPS] test auc:\", auc_ncfips)\n",
    "print(\"[NCF-IPS] ndcg@5:{:.6f}, ndcg@10:{:.6f}\".format(\n",
    "        np.mean(ndcg_res[\"ndcg_5\"]), np.mean(ndcg_res[\"ndcg_10\"])))\n",
    "user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)\n",
    "gi,gu = gini_index(user_wise_ctr)\n",
    "print(\"***\"*5 + \"[NCF-IPS]\" + \"***\"*5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"NCF IPS_AT\"\n",
    "ncf_ips = NCF_IPS_AT(num_user, num_item)\n",
    "\n",
    "ips_idxs = np.arange(len(y_test))\n",
    "np.random.shuffle(ips_idxs)\n",
    "y_ips = y_test[ips_idxs[:int(0.05 * len(ips_idxs))]]\n",
    "\n",
    "ncf_ips.fit(x_train, y_train,  y_ips=y_ips, batch_size = 4096, tao = 0.1,\n",
    "    lr = 0.01,\n",
    "    G = 4,\n",
    "    lamb = 5*1e-6,\n",
    "    tol = 1e-5)\n",
    "test_pred = ncf_ips.predict(x_test)\n",
    "mse_mfips = mse_func(y_test, test_pred)\n",
    "auc_mfips = roc_auc_score(y_test, test_pred)\n",
    "ndcg_res = ndcg_func(ncf_ips, x_test, y_test)\n",
    "\n",
    "print(\"***\"*5 + \"[NCF-IPS_AT]\" + \"***\"*5)\n",
    "print(\"[NCF-IPS_AT] test mse:\", mse_mfips)\n",
    "print(\"[NCF-IPS_AT] test auc:\", auc_mfips)\n",
    "print(\"[NCF-IPS_AT] ndcg@5:{:.6f}, ndcg@10:{:.6f}\".format(\n",
    "        np.mean(ndcg_res[\"ndcg_5\"]), np.mean(ndcg_res[\"ndcg_10\"])))\n",
    "user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)\n",
    "gi,gu = gini_index(user_wise_ctr)\n",
    "print(\"***\"*5 + \"[NCF-IPS_AT]\" + \"***\"*5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"NCF SNIPS\"\n",
    "ncf_snips = NCF_SNIPS(num_user, num_item)\n",
    "\n",
    "ips_idxs = np.arange(len(y_test))\n",
    "np.random.shuffle(ips_idxs)\n",
    "y_ips = y_test[ips_idxs[:int(0.05 * len(ips_idxs))]]\n",
    "\n",
    "ncf_snips.fit(x_train, y_train, \n",
    "    y_ips=y_ips,\n",
    "    lr=0.01,\n",
    "    batch_size=2048,\n",
    "    lamb=1e-4,tol=1e-6, verbose=0)\n",
    "\n",
    "ndcg_res = ndcg_func(ncf_snips, x_test, y_test)\n",
    "test_pred = ncf_snips.predict(x_test)\n",
    "mse_ncfsnips = mse_func(y_test, test_pred)\n",
    "auc_ncfsnips = roc_auc_score(y_test, test_pred)\n",
    "print(\"***\"*5 + \"[NCF-SNIPS]\" + \"***\"*5)\n",
    "print(\"[NCF-SNIPS] test mse:\", mse_ncfsnips)\n",
    "print(\"[NCF-SNIPS] test auc:\", auc_ncfsnips)\n",
    "print(\"ndcg@5:{:.6f}, ndcg@10:{:.6f}\".format(\n",
    "    np.mean(ndcg_res[\"ndcg_5\"]), np.mean(ndcg_res[\"ndcg_10\"])))\n",
    "user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)\n",
    "gi,gu = gini_index(user_wise_ctr)\n",
    "print(\"***\"*5 + \"[NCF-SNIPS]\" + \"***\"*5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"NCF CVIB\"\n",
    "ncf_cvib = NCF_CVIB(num_user, num_item)\n",
    "ncf_cvib.fit(x_train, y_train, lr=0.05, \n",
    "    alpha=0.5, gamma=1e-3, lamb=1e-4, tol=1e-6, \n",
    "    batch_size = 2048, verbose=1)\n",
    "\n",
    "test_pred = ncf_cvib.predict(x_test)\n",
    "mse_ncf = mse_func(y_test, test_pred)\n",
    "auc_ncf = roc_auc_score(y_test, test_pred)\n",
    "ndcg_res = ndcg_func(ncf_cvib, x_test, y_test)\n",
    "\n",
    "print(\"***\"*5 + \"[NCF-CVIB]\" + \"***\"*5)\n",
    "print(\"[NCF-CVIB] test mse:\", mse_ncf)\n",
    "print(\"[NCF-CVIB] test auc:\", auc_ncf)\n",
    "print(\"ndcg@5:{:.6f}, ndcg@10:{:.6f}\".format(\n",
    "    np.mean(ndcg_res[\"ndcg_5\"]), np.mean(ndcg_res[\"ndcg_10\"])))\n",
    "user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)\n",
    "gi,gu = gini_index(user_wise_ctr)\n",
    "print(\"***\"*5 + \"[NCF-CVIB]\" + \"***\"*5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"NCF DR\"\n",
    "ncf_dr = NCF_DR(num_user, num_item)\n",
    "\n",
    "ips_idxs = np.arange(len(y_test))\n",
    "np.random.shuffle(ips_idxs)\n",
    "y_ips = y_test[ips_idxs[:int(0.05 * len(ips_idxs))]]\n",
    "\n",
    "ncf_dr.fit(x_train, y_train, y_ips=y_ips, batch_size=2048, \n",
    "    lr=0.01, lamb=1e-3, verbose=0)\n",
    "\n",
    "ndcg_res = ndcg_func(ncf_dr, x_test, y_test)\n",
    "test_pred = ncf_dr.predict(x_test)\n",
    "mse_ncfdr = mse_func(y_test, test_pred)\n",
    "auc_ncfdr = roc_auc_score(y_test, test_pred)\n",
    "print(\"***\"*5 + \"[NCF-DR]\" + \"***\"*5)\n",
    "print(\"[NCF-DR] test mse:\", mse_ncfdr)\n",
    "print(\"[NCF-DR] test auc:\", auc_ncfdr)\n",
    "print(\"ndcg@5:{:.6f}, ndcg@10:{:.6f}\".format(\n",
    "    np.mean(ndcg_res[\"ndcg_5\"]), np.mean(ndcg_res[\"ndcg_10\"])))\n",
    "user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)\n",
    "gi,gu = gini_index(user_wise_ctr)\n",
    "print(\"***\"*5 + \"[NCF-DR]\" + \"***\"*5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"NCF DR JL\"\n",
    "ncf_dr_jl = NCF_DR_JL(num_user, num_item)\n",
    "\n",
    "ips_idxs = np.arange(len(y_test))\n",
    "np.random.shuffle(ips_idxs)\n",
    "y_ips = y_test[ips_idxs[:int(0.05 * len(ips_idxs))]]\n",
    "    \n",
    "ncf_dr_jl.fit(x_train, y_train, y_ips=y_ips, batch_size=8192, \n",
    "    lr=0.01, lamb=1e-4, verbose=0)\n",
    "\n",
    "ndcg_res = ndcg_func(ncf_dr_jl, x_test, y_test)\n",
    "\n",
    "test_pred = ncf_dr_jl.predict(x_test)\n",
    "mse_ncfdrjl = mse_func(y_test, test_pred)\n",
    "auc_ncfdrjl = roc_auc_score(y_test, test_pred)\n",
    "print(\"***\"*5 + \"[NCF-DR-JL]\" + \"***\"*5)\n",
    "print(\"[NCF-DR-JL] test mse:\", mse_ncfdrjl)\n",
    "print(\"[NCF-DR-JL] test auc:\", auc_ncfdrjl)\n",
    "print(\"ndcg@5:{:.6f}, ndcg@10:{:.6f}\".format(\n",
    "    np.mean(ndcg_res[\"ndcg_5\"]), np.mean(ndcg_res[\"ndcg_10\"])))\n",
    "user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)\n",
    "gi,gu = gini_index(user_wise_ctr)\n",
    "print(\"***\"*5 + \"[NCF-DR-JL]\" + \"***\"*5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"NCF Stable DR\"\n",
    "ncf_stable_dr = NCF_Stable_DR(num_user, num_item)\n",
    "\n",
    "ips_idxs = np.arange(len(y_test))\n",
    "np.random.shuffle(ips_idxs)\n",
    "y_ips = y_test[ips_idxs[:int(0.05 * len(ips_idxs))]]\n",
    "\n",
    "ncf_stable_dr.fit(x_train, y_train, y_ips,\n",
    "    stop = 1,\n",
    "    eta = 1000,\n",
    "    lr=0.01,\n",
    "    G = 4,\n",
    "    batch_size=8192,\n",
    "    lr1 = 150,\n",
    "    lamb=1e-3,\n",
    "    tol=1e-5,\n",
    "    verbose=False)\n",
    "\n",
    "test_pred = ncf_stable_dr.predict(x_test)\n",
    "mse_ncfsdr = mse_func(y_test, test_pred)\n",
    "auc_ncfsdr = roc_auc_score(y_test, test_pred)\n",
    "ndcg_res = ndcg_func(ncf_stable_dr, x_test, y_test)\n",
    "\n",
    "print(\"***\"*5 + \"[NCF-Stable-DR]\" + \"***\"*5)\n",
    "print(\"[NCF-Stable-DR] test mse:\", mse_ncfsdr)\n",
    "print(\"[NCF-Stable-DR] test auc:\", auc_ncfsdr)\n",
    "print(\"[NCF-Stable-DR] ndcg@5:{:.6f}, ndcg@10:{:.6f}\".format(\n",
    "        np.mean(ndcg_res[\"ndcg_5\"]), np.mean(ndcg_res[\"ndcg_10\"])))\n",
    "user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)\n",
    "gi,gu = gini_index(user_wise_ctr)\n",
    "print(\"***\"*5 + \"[NCF-Stable-DR]\" + \"***\"*5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"NCF MRDR JL\"\n",
    "ncf_mrdr_jl = NCF_MRDR_JL(num_user, num_item)\n",
    "\n",
    "ips_idxs = np.arange(len(y_test))\n",
    "np.random.shuffle(ips_idxs)\n",
    "y_ips = y_test[ips_idxs[:int(0.05 * len(ips_idxs))]]\n",
    "\n",
    "ncf_mrdr_jl.fit(x_train, y_train, y_ips=y_ips, batch_size=8192, \n",
    "    lr=0.01, lamb=1e-4, verbose=0)\n",
    "ndcg_res = ndcg_func(ncf_mrdr_jl, x_test, y_test)\n",
    "\n",
    "test_pred = ncf_mrdr_jl.predict(x_test)\n",
    "mse_ncfmrdrjl = mse_func(y_test, test_pred)\n",
    "auc_ncfmrdrjl = roc_auc_score(y_test, test_pred)\n",
    "print(\"***\"*5 + \"[NCF-MRDR-JL]\" + \"***\"*5)\n",
    "print(\"[NCF-MRDR-JL] test mse:\", mse_ncfmrdrjl)\n",
    "print(\"[NCF-MRDR-JL] test auc:\", auc_ncfmrdrjl)\n",
    "print(\"ndcg@5:{:.6f}, ndcg@10:{:.6f}\".format(\n",
    "    np.mean(ndcg_res[\"ndcg_5\"]), np.mean(ndcg_res[\"ndcg_10\"])))\n",
    "user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)\n",
    "gi,gu = gini_index(user_wise_ctr)\n",
    "print(\"***\"*5 + \"[NCF-MRDR-JL]\" + \"***\"*5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"NCF Stable MRDR\"\n",
    "ncf_stable_mrdr = NCF_Stable_MRDR(num_user, num_item)\n",
    "\n",
    "ips_idxs = np.arange(len(y_test))\n",
    "np.random.shuffle(ips_idxs)\n",
    "y_ips = y_test[ips_idxs[:int(0.05 * len(ips_idxs))]]\n",
    "\n",
    "ncf_stable_mrdr.fit(x_train, y_train, y_ips,\n",
    "    stop = 1,\n",
    "    eta = 1000,\n",
    "    lr=0.01,\n",
    "    G = 4,\n",
    "    batch_size=8192,\n",
    "    lr1 = 150,\n",
    "    lamb=1e-3,\n",
    "    tol=1e-5,\n",
    "    verbose=False)\n",
    "\n",
    "test_pred = ncf_stable_mrdr.predict(x_test)\n",
    "mse_ncfsmrdr = mse_func(y_test, test_pred)\n",
    "auc_ncfsmrdr = roc_auc_score(y_test, test_pred)\n",
    "ndcg_res = ndcg_func(ncf_stable_mrdr, x_test, y_test)\n",
    "\n",
    "print(\"***\"*5 + \"[NCF-Stable-MRDR]\" + \"***\"*5)\n",
    "print(\"[NCF-Stable-MRDR] test mse:\", mse_ncfsmrdr)\n",
    "print(\"[NCF-Stable-MRDR] test auc:\", auc_ncfsmrdr)\n",
    "print(\"[NCF-Stable-MRDR] ndcg@5:{:.6f}, ndcg@10:{:.6f}\".format(\n",
    "        np.mean(ndcg_res[\"ndcg_5\"]), np.mean(ndcg_res[\"ndcg_10\"])))\n",
    "user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)\n",
    "gi,gu = gini_index(user_wise_ctr)\n",
    "print(\"***\"*5 + \"[NCF-Stable-MRDR]\" + \"***\"*5)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:pytorch-gpu]",
   "language": "python",
   "name": "conda-env-pytorch-gpu-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
