{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "User-bias"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "Experiments on semi-synthetic dataset\n",
    "\"\"\"\n",
    "import torch\n",
    "import pickle\n",
    "import numpy as np\n",
    "from MF import MF_UDR, MF_IDR, MF_UIDR\n",
    "\n",
    "\n",
    "def getAccuracy(real, predList):\n",
    "    returnList = []\n",
    "    for i in predList:\n",
    "        returnList.append(np.abs(real-i)/real)\n",
    "    return np.array(returnList)\n",
    "\n",
    "def getmse(real, predList):\n",
    "    returnList = []\n",
    "    for i in predList:\n",
    "        returnList.append(np.mean((real-i) ** 2))\n",
    "    return np.array(returnList)\n",
    "\n",
    "matrix = np.loadtxt(\"data/u.data\", dtype=int)[:, :-1]\n",
    "#         print(matrix)\n",
    "user = matrix[:, 0] - 1\n",
    "item = matrix[:, 1] - 1\n",
    "# obs = np.ones(matrix[:, 2]\n",
    "user_num = np.max(user)+1\n",
    "item_num = np.max(item)+1\n",
    "\n",
    "file = open(\"data/synthetic_data\", \"rb\")\n",
    "ground_truth = pickle.load(file)\n",
    "one = pickle.load(file)\n",
    "three = pickle.load(file)\n",
    "five = pickle.load(file)\n",
    "rotate = pickle.load(file)\n",
    "skew = pickle.load(file)\n",
    "crs = pickle.load(file)\n",
    "file.close()\n",
    "\n",
    "propensity = np.copy(ground_truth)\n",
    "p = 0.5\n",
    "propensity[np.where(propensity == 0.9)] = p ** 1\n",
    "propensity[np.where(propensity == 0.7)] = p ** 2\n",
    "propensity[np.where(propensity == 0.5)] = p ** 3\n",
    "propensity[np.where(propensity == 0.3)] = p ** 4\n",
    "propensity[np.where(propensity == 0.1)] = p ** 4\n",
    "\n",
    "for tub in [-0.5, 0, 0.5, 1, 1.5]:\n",
    "    for emb_size in [16]:\n",
    "        print(emb_size, tub)\n",
    "        res = np.zeros([6, 8])\n",
    "        MSE_matrix = np.zeros([6, 8])\n",
    "        res_var = np.zeros([6, 8])\n",
    "        MSE_matrix_var = np.zeros([6, 8])\n",
    "        for i in range(30):\n",
    "            observation = np.random.binomial(1, propensity)\n",
    "            ones = np.count_nonzero(observation)\n",
    "            zeros = observation.shape[0] - ones\n",
    "            p_o = ones/(ones+zeros)\n",
    "            ground_truth_copy = np.random.binomial(1, ground_truth)\n",
    "            o = np.where(observation == 1)\n",
    "            a = np.random.random((user_num*item_num))\n",
    "            p_hat = a/propensity + (1-a)/p_o\n",
    "            predList = [one, three, five, rotate, skew, crs]\n",
    "            for j in range(6):\n",
    "                prediction = predList[j]\n",
    "                ce = -ground_truth_copy * np.log(prediction) - (1 - ground_truth_copy) * np.log(1 - prediction)\n",
    "                \n",
    "                prediction_hat = (ground_truth.reshape([user_num, item_num]) + 0.2 * (tub + 1) * \n",
    "                      np.random.random(user_num).reshape([user_num, -1])).reshape(-1)\n",
    "\n",
    "                ce_hat = -prediction_hat * np.log(prediction) - (1 - prediction_hat) * np.log(1 - prediction)\n",
    "\n",
    "                real_ce = np.mean(ce)\n",
    "                naive_ce = np.mean(ce[o])\n",
    "                eib_ce = np.mean(ce_hat*(1-observation)+ce*observation)\n",
    "                ips_ce = np.mean(ce * observation * p_hat)\n",
    "                snips_ce = np.sum(ce * observation * p_hat) / np.sum(p_hat * observation)\n",
    "                dr_ce = np.mean(ce_hat + observation * (ce - ce_hat) * p_hat)\n",
    "\n",
    "                # for our methods， train MF,\n",
    "                sample = []\n",
    "                for i in range(user_num):\n",
    "                    sample.extend([[i,j] for j in range(item_num)])\n",
    "\n",
    "                sample = np.array(sample)\n",
    "                user = sample[:, 0].reshape([user_num, item_num])\n",
    "                item = sample[:, 1].reshape([user_num, item_num])\n",
    "                propensity_udr = propensity.reshape([user_num, item_num])\n",
    "\n",
    "                batch_size_i = 50\n",
    "                batch_size_u = 50\n",
    "                l2_reg_lambda = 1e-4    # Validated by grid-search\n",
    "                gamma = 0.02\n",
    "                observation_udr = torch.Tensor(observation.reshape([user_num, item_num]))\n",
    "                prediction = torch.Tensor(prediction.reshape([user_num, item_num]))\n",
    "\n",
    "                mf_udr = MF_UDR(num_users=user_num, num_items=item_num, embedding_k=emb_size, l2_reg_lambda=l2_reg_lambda)\n",
    "                mf_udr.cuda()\n",
    "\n",
    "                mf_udr.fit(user, item, propensity_udr, r_hat = prediction, obs = observation_udr, batch_size = batch_size_u, gamma = gamma, L = 10)\n",
    "                propensity_udr_inv = np.clip(mf_udr.predict(sample[:, 0], sample[:, 1]), 1, 1/gamma)\n",
    "                udr_ce = np.mean(ce_hat + observation * (ce - ce_hat) * propensity_udr_inv)                                                           \n",
    "                \n",
    "                mf_uidr = MF_UIDR(num_users=user_num, num_items=item_num, embedding_k=emb_size, l2_reg_lambda=l2_reg_lambda)\n",
    "                mf_uidr.cuda()\n",
    "\n",
    "                mf_uidr.fit(user, item, propensity_udr, r_hat = prediction, obs = observation_udr, batch_size_u = batch_size_u, gamma = gamma, L = 10)\n",
    "                propensity_uidr_inv = np.clip(mf_uidr.predict(sample[:, 0], sample[:, 1]), 1, 1/gamma)\n",
    "                uidr_ce = np.mean(ce_hat + observation * (ce - ce_hat) * propensity_uidr_inv)                                       \n",
    "                \n",
    "                mf_idr = MF_IDR(num_users=user_num, num_items=item_num, embedding_k=emb_size, l2_reg_lambda=l2_reg_lambda)\n",
    "                mf_idr.cuda()\n",
    "\n",
    "                mf_idr.fit(user, item, propensity_udr, r_hat = prediction, obs = observation_udr, batch_size = batch_size_i, gamma = gamma, L = 10)\n",
    "                propensity_idr_inv = np.clip(mf_idr.predict(sample[:, 0], sample[:, 1]), 1, 1/gamma)\n",
    "                idr_ce = np.mean(ce_hat + observation * (ce - ce_hat) * propensity_idr_inv)                    \n",
    " \n",
    "                acc = getAccuracy(real_ce, [udr_ce, idr_ce, uidr_ce])\n",
    "                mse_matrix = getmse(propensity, [1/propensity_udr_inv, 1/propensity_uidr_inv, 1/propensity_idr_inv])\n",
    "        \n",
    "                MSE_matrix[j] += mse_matrix\n",
    "                MSE_matrix_var[j] += mse_matrix ** 2\n",
    "                \n",
    "                res[j] += acc\n",
    "                res_var[j] += acc ** 2\n",
    "\n",
    "                print(acc)\n",
    "            print()\n",
    "        print(res/30)\n",
    "        print(np.sqrt((1/30)*(res_var - 30*(res/30)**2))) \n",
    "        \n",
    "        print(MSE_matrix/30)\n",
    "        print(np.sqrt((1/30)*(MSE_matrix_var - 30*(MSE_matrix/30)**2)))           "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Item-bias"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "Experiments on semi-synthetic dataset\n",
    "\"\"\"\n",
    "import torch\n",
    "import pickle\n",
    "import numpy as np\n",
    "from MF import MF_UDR, MF_IDR, MF_UIDR\n",
    "\n",
    "\n",
    "def getAccuracy(real, predList):\n",
    "    returnList = []\n",
    "    for i in predList:\n",
    "        returnList.append(np.abs(real-i)/real)\n",
    "    return np.array(returnList)\n",
    "\n",
    "def getmse(real, predList):\n",
    "    returnList = []\n",
    "    for i in predList:\n",
    "        returnList.append(np.mean((real-i) ** 2))\n",
    "    return np.array(returnList)\n",
    "\n",
    "matrix = np.loadtxt(\"data/u.data\", dtype=int)[:, :-1]\n",
    "#         print(matrix)\n",
    "user = matrix[:, 0] - 1\n",
    "item = matrix[:, 1] - 1\n",
    "# obs = np.ones(matrix[:, 2]\n",
    "user_num = np.max(user)+1\n",
    "item_num = np.max(item)+1\n",
    "\n",
    "\n",
    "file = open(\"data/synthetic_data\", \"rb\")\n",
    "ground_truth = pickle.load(file)\n",
    "one = pickle.load(file)\n",
    "three = pickle.load(file)\n",
    "five = pickle.load(file)\n",
    "rotate = pickle.load(file)\n",
    "skew = pickle.load(file)\n",
    "crs = pickle.load(file)\n",
    "file.close()\n",
    "\n",
    "propensity = np.copy(ground_truth)\n",
    "p = 0.5\n",
    "propensity[np.where(propensity == 0.9)] = p ** 1\n",
    "propensity[np.where(propensity == 0.7)] = p ** 2\n",
    "propensity[np.where(propensity == 0.5)] = p ** 3\n",
    "propensity[np.where(propensity == 0.3)] = p ** 4\n",
    "propensity[np.where(propensity == 0.1)] = p ** 4\n",
    "\n",
    "for tub in [-0.5, 0, 0.5, 1, 1.5]:\n",
    "    for emb_size in [16]:\n",
    "        print(emb_size, tub)\n",
    "        res = np.zeros([6, 8])\n",
    "        MSE_matrix = np.zeros([6, 8])\n",
    "        res_var = np.zeros([6, 8])\n",
    "        MSE_matrix_var = np.zeros([6, 8])\n",
    "        for i in range(30):\n",
    "            observation = np.random.binomial(1, propensity)\n",
    "            ones = np.count_nonzero(observation)\n",
    "            zeros = observation.shape[0] - ones\n",
    "            p_o = ones/(ones+zeros)\n",
    "            ground_truth_copy = np.random.binomial(1, ground_truth)\n",
    "            o = np.where(observation == 1)\n",
    "            a = np.random.random((user_num*item_num))\n",
    "            p_hat = a/propensity + (1-a)/p_o\n",
    "            predList = [one, three, five, rotate, skew, crs]\n",
    "            for j in range(6):\n",
    "                prediction = predList[j]\n",
    "                ce = -ground_truth_copy * np.log(prediction) - (1 - ground_truth_copy) * np.log(1 - prediction)\n",
    "                \n",
    "                prediction_hat = (ground_truth.reshape([user_num, item_num]) + 0.2 * (tub + 1) * \n",
    "                      np.random.random(item_num).reshape([-1, item_num])).reshape(-1)\n",
    "                # DR\n",
    "                \n",
    "#                 idx = np.arange(user_num)/user_num\n",
    "#                 np.random.shuffle(idx)\n",
    "#                 prediction_hat = (ground_truth.reshape([user_num, item_num]) + \n",
    "#                                   idx.reshape([user_num, -1])).reshape(-1)\n",
    "                \n",
    "#                 idx = np.arange(item_num)/(2*item_num)\n",
    "#                 np.random.shuffle(idx)\n",
    "#                 prediction_hat = (ground_truth.reshape([user_num, item_num]) + \n",
    "#                                   idx.reshape([-1, item_num])).reshape(-1)                \n",
    "#                 print(prediction_hat.shape)\n",
    "                ce_hat = -prediction_hat * np.log(prediction) - (1 - prediction_hat) * np.log(1 - prediction)\n",
    "\n",
    "                real_ce = np.mean(ce)\n",
    "                naive_ce = np.mean(ce[o])\n",
    "                eib_ce = np.mean(ce_hat*(1-observation)+ce*observation)\n",
    "                ips_ce = np.mean(ce * observation * p_hat)\n",
    "                snips_ce = np.sum(ce * observation * p_hat) / np.sum(p_hat * observation)\n",
    "                dr_ce = np.mean(ce_hat + observation * (ce - ce_hat) * p_hat)\n",
    "\n",
    "                # for our methods， train MF,\n",
    "                sample = []\n",
    "                for i in range(user_num):\n",
    "                    sample.extend([[i,j] for j in range(item_num)])\n",
    "\n",
    "                sample = np.array(sample)\n",
    "                user = sample[:, 0].reshape([user_num, item_num])\n",
    "                item = sample[:, 1].reshape([user_num, item_num])\n",
    "                propensity_udr = propensity.reshape([user_num, item_num])\n",
    "\n",
    "                batch_size_i = 50\n",
    "                batch_size_u = 50\n",
    "                l2_reg_lambda = 1e-4    # Validated by grid-search\n",
    "                gamma = 0.02\n",
    "                observation_udr = torch.Tensor(observation.reshape([user_num, item_num]))\n",
    "                prediction = torch.Tensor(prediction.reshape([user_num, item_num]))\n",
    "\n",
    "                mf_udr = MF_UDR(num_users=user_num, num_items=item_num, embedding_k=emb_size, l2_reg_lambda=l2_reg_lambda)\n",
    "                mf_udr.cuda()\n",
    "\n",
    "                mf_udr.fit(user, item, propensity_udr, r_hat = prediction, obs = observation_udr, batch_size = batch_size_u, gamma = gamma, L = 2)\n",
    "                propensity_udr_inv = np.clip(mf_udr.predict(sample[:, 0], sample[:, 1]), 1, 1/gamma)\n",
    "                udr_ce = np.mean(ce_hat + observation * (ce - ce_hat) * propensity_udr_inv)                                                           \n",
    "                \n",
    "                mf_uidr = MF_UIDR(num_users=user_num, num_items=item_num, embedding_k=emb_size, l2_reg_lambda=l2_reg_lambda)\n",
    "                mf_uidr.cuda()\n",
    "\n",
    "                mf_uidr.fit(user, item, propensity_udr, r_hat = prediction, obs = observation_udr, batch_size_u = batch_size_u, gamma = gamma, L = 5)\n",
    "                propensity_uidr_inv = np.clip(mf_uidr.predict(sample[:, 0], sample[:, 1]), 1, 1/gamma)\n",
    "                uidr_ce = np.mean(ce_hat + observation * (ce - ce_hat) * propensity_uidr_inv)                                       \n",
    "                \n",
    "                mf_idr = MF_IDR(num_users=user_num, num_items=item_num, embedding_k=emb_size, l2_reg_lambda=l2_reg_lambda)\n",
    "                mf_idr.cuda()\n",
    "\n",
    "                mf_idr.fit(user, item, propensity_udr, r_hat = prediction, obs = observation_udr, batch_size = batch_size_i, gamma = gamma, L = 2)\n",
    "                propensity_idr_inv = np.clip(mf_idr.predict(sample[:, 0], sample[:, 1]), 1, 1/gamma)\n",
    "                idr_ce = np.mean(ce_hat + observation * (ce - ce_hat) * propensity_idr_inv)                    \n",
    "                \n",
    "#                 mf_udr = MF_UDR(num_users=user_num, num_items=item_num, embedding_k=emb_size, l2_reg_lambda=l2_reg_lambda)\n",
    "#                 mf_udr.cuda()\n",
    "\n",
    "#                 mf_udr.fit(user, item, propensity_udr, r_hat = prediction, obs = observation_udr, batch_size = batch_size_u, gamma = gamma, L = 5)\n",
    "#                 propensity_udr_inv = np.clip(mf_udr.predict(sample[:, 0], sample[:, 1]), 1, 1/gamma)\n",
    "#                 udr_ce = np.mean(ce_hat + observation * (ce - ce_hat) * propensity_udr_inv)                           \n",
    "\n",
    "                acc = getAccuracy(real_ce, [udr_ce, idr_ce, uidr_ce])\n",
    "                mse_matrix = getmse(propensity, [1/propensity_udr_inv, 1/propensity_uidr_inv, 1/propensity_idr_inv])\n",
    "\n",
    "                MSE_matrix[j] += mse_matrix\n",
    "                MSE_matrix_var[j] += mse_matrix ** 2\n",
    "                \n",
    "                res[j] += acc\n",
    "                res_var[j] += acc ** 2\n",
    "\n",
    "                print(acc)\n",
    "            print()\n",
    "        print(res/15)\n",
    "        print(np.sqrt((1/15)*(res_var - 15*(res/15)**2))) # L = 2, u扰动，batch成比例\n",
    "        \n",
    "        print(MSE_matrix/5)\n",
    "        print(np.sqrt((1/15)*(MSE_matrix_var - 15*(MSE_matrix/15)**2))) # L = 2, u扰动，batch成比例                "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "User-bias varying, Item-bias = 0.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "Experiments on semi-synthetic dataset\n",
    "\"\"\"\n",
    "import torch\n",
    "import pickle\n",
    "import numpy as np\n",
    "from MF import MF_UDR, MF_IDR, MF_UIDR\n",
    "\n",
    "\n",
    "def getAccuracy(real, predList):\n",
    "    returnList = []\n",
    "    for i in predList:\n",
    "        returnList.append(np.abs(real-i)/real)\n",
    "    return np.array(returnList)\n",
    "\n",
    "def getmse(real, predList):\n",
    "    returnList = []\n",
    "    for i in predList:\n",
    "        returnList.append(np.mean((real-i) ** 2))\n",
    "    return np.array(returnList)\n",
    "\n",
    "matrix = np.loadtxt(\"data/u.data\", dtype=int)[:, :-1]\n",
    "#         print(matrix)\n",
    "user = matrix[:, 0] - 1\n",
    "item = matrix[:, 1] - 1\n",
    "# obs = np.ones(matrix[:, 2]\n",
    "user_num = np.max(user)+1\n",
    "item_num = np.max(item)+1\n",
    "\n",
    "file = open(\"data/synthetic_data\", \"rb\")\n",
    "ground_truth = pickle.load(file)\n",
    "one = pickle.load(file)\n",
    "three = pickle.load(file)\n",
    "five = pickle.load(file)\n",
    "rotate = pickle.load(file)\n",
    "skew = pickle.load(file)\n",
    "crs = pickle.load(file)\n",
    "file.close()\n",
    "\n",
    "propensity = np.copy(ground_truth)\n",
    "p = 0.5\n",
    "propensity[np.where(propensity == 0.9)] = p ** 1\n",
    "propensity[np.where(propensity == 0.7)] = p ** 2\n",
    "propensity[np.where(propensity == 0.5)] = p ** 3\n",
    "propensity[np.where(propensity == 0.3)] = p ** 4\n",
    "propensity[np.where(propensity == 0.1)] = p ** 4\n",
    "\n",
    "for tub in [-0.5, 0, 0.5, 1, 1.5]:\n",
    "    for emb_size in [16]:\n",
    "        print(emb_size, tub)\n",
    "        res = np.zeros([6, 8])\n",
    "        MSE_matrix = np.zeros([6, 8])\n",
    "        res_var = np.zeros([6, 8])\n",
    "        MSE_matrix_var = np.zeros([6, 8])\n",
    "        for i in range(30):\n",
    "            observation = np.random.binomial(1, propensity)\n",
    "            ones = np.count_nonzero(observation)\n",
    "            zeros = observation.shape[0] - ones\n",
    "            p_o = ones/(ones+zeros)\n",
    "            ground_truth_copy = np.random.binomial(1, ground_truth)\n",
    "            o = np.where(observation == 1)\n",
    "            a = np.random.random((user_num*item_num))\n",
    "            p_hat = a/propensity + (1-a)/p_o\n",
    "            predList = [one, three, five, rotate, skew, crs]\n",
    "            for j in range(6):\n",
    "                prediction = predList[j]\n",
    "                ce = -ground_truth_copy * np.log(prediction) - (1 - ground_truth_copy) * np.log(1 - prediction)\n",
    "                \n",
    "                prediction_hat = (ground_truth.reshape([user_num, item_num]) + 0.2 * (tub + 1) * \n",
    "                      np.random.random(user_num).reshape([user_num, -1]) + 0.2 * (-0.5 + 1) * \n",
    "                      np.random.random(item).reshape([-1, item_num])).reshape(-1)\n",
    "\n",
    "                ce_hat = -prediction_hat * np.log(prediction) - (1 - prediction_hat) * np.log(1 - prediction)\n",
    "\n",
    "                real_ce = np.mean(ce)\n",
    "                naive_ce = np.mean(ce[o])\n",
    "                eib_ce = np.mean(ce_hat*(1-observation)+ce*observation)\n",
    "                ips_ce = np.mean(ce * observation * p_hat)\n",
    "                snips_ce = np.sum(ce * observation * p_hat) / np.sum(p_hat * observation)\n",
    "                dr_ce = np.mean(ce_hat + observation * (ce - ce_hat) * p_hat)\n",
    "\n",
    "                # for our methods， train MF,\n",
    "                sample = []\n",
    "                for i in range(user_num):\n",
    "                    sample.extend([[i,j] for j in range(item_num)])\n",
    "\n",
    "                sample = np.array(sample)\n",
    "                user = sample[:, 0].reshape([user_num, item_num])\n",
    "                item = sample[:, 1].reshape([user_num, item_num])\n",
    "                propensity_udr = propensity.reshape([user_num, item_num])\n",
    "\n",
    "                batch_size_i = 50\n",
    "                batch_size_u = 50\n",
    "                l2_reg_lambda = 1e-4    # Validated by grid-search\n",
    "                gamma = 0.02\n",
    "                observation_udr = torch.Tensor(observation.reshape([user_num, item_num]))\n",
    "                prediction = torch.Tensor(prediction.reshape([user_num, item_num]))\n",
    "\n",
    "                mf_udr = MF_UDR(num_users=user_num, num_items=item_num, embedding_k=emb_size, l2_reg_lambda=l2_reg_lambda)\n",
    "                mf_udr.cuda()\n",
    "\n",
    "                mf_udr.fit(user, item, propensity_udr, r_hat = prediction, obs = observation_udr, batch_size = batch_size_u, gamma = gamma, L = 10)\n",
    "                propensity_udr_inv = np.clip(mf_udr.predict(sample[:, 0], sample[:, 1]), 1, 1/gamma)\n",
    "                udr_ce = np.mean(ce_hat + observation * (ce - ce_hat) * propensity_udr_inv)                                                           \n",
    "                \n",
    "                mf_uidr = MF_UIDR(num_users=user_num, num_items=item_num, embedding_k=emb_size, l2_reg_lambda=l2_reg_lambda)\n",
    "                mf_uidr.cuda()\n",
    "\n",
    "                mf_uidr.fit(user, item, propensity_udr, r_hat = prediction, obs = observation_udr, batch_size_u = batch_size_u, gamma = gamma, L = 10)\n",
    "                propensity_uidr_inv = np.clip(mf_uidr.predict(sample[:, 0], sample[:, 1]), 1, 1/gamma)\n",
    "                uidr_ce = np.mean(ce_hat + observation * (ce - ce_hat) * propensity_uidr_inv)                                       \n",
    "                \n",
    "                mf_idr = MF_IDR(num_users=user_num, num_items=item_num, embedding_k=emb_size, l2_reg_lambda=l2_reg_lambda)\n",
    "                mf_idr.cuda()\n",
    "\n",
    "                mf_idr.fit(user, item, propensity_udr, r_hat = prediction, obs = observation_udr, batch_size = batch_size_i, gamma = gamma, L = 10)\n",
    "                propensity_idr_inv = np.clip(mf_idr.predict(sample[:, 0], sample[:, 1]), 1, 1/gamma)\n",
    "                idr_ce = np.mean(ce_hat + observation * (ce - ce_hat) * propensity_idr_inv)                    \n",
    " \n",
    "                acc = getAccuracy(real_ce, [udr_ce, idr_ce, uidr_ce])\n",
    "                mse_matrix = getmse(propensity, [1/propensity_udr_inv, 1/propensity_uidr_inv, 1/propensity_idr_inv])\n",
    "        \n",
    "                MSE_matrix[j] += mse_matrix\n",
    "                MSE_matrix_var[j] += mse_matrix ** 2\n",
    "                \n",
    "                res[j] += acc\n",
    "                res_var[j] += acc ** 2\n",
    "\n",
    "                print(acc)\n",
    "            print()\n",
    "        print(res/30)\n",
    "        print(np.sqrt((1/30)*(res_var - 30*(res/30)**2))) \n",
    "        \n",
    "        print(MSE_matrix/30)\n",
    "        print(np.sqrt((1/30)*(MSE_matrix_var - 30*(MSE_matrix/30)**2)))           "
   ]
  },
  {
   "cell_type": "raw",
   "metadata": {},
   "source": [
    "Item-bias fix User-bias to 0.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "Experiments on semi-synthetic dataset\n",
    "\"\"\"\n",
    "import torch\n",
    "import pickle\n",
    "import numpy as np\n",
    "from MF import MF_UDR, MF_IDR, MF_UIDR\n",
    "\n",
    "\n",
    "def getAccuracy(real, predList):\n",
    "    returnList = []\n",
    "    for i in predList:\n",
    "        returnList.append(np.abs(real-i)/real)\n",
    "    return np.array(returnList)\n",
    "\n",
    "def getmse(real, predList):\n",
    "    returnList = []\n",
    "    for i in predList:\n",
    "        returnList.append(np.mean((real-i) ** 2))\n",
    "    return np.array(returnList)\n",
    "\n",
    "matrix = np.loadtxt(\"data/u.data\", dtype=int)[:, :-1]\n",
    "#         print(matrix)\n",
    "user = matrix[:, 0] - 1\n",
    "item = matrix[:, 1] - 1\n",
    "# obs = np.ones(matrix[:, 2]\n",
    "user_num = np.max(user)+1\n",
    "item_num = np.max(item)+1\n",
    "\n",
    "file = open(\"data/synthetic_data\", \"rb\")\n",
    "ground_truth = pickle.load(file)\n",
    "one = pickle.load(file)\n",
    "three = pickle.load(file)\n",
    "five = pickle.load(file)\n",
    "rotate = pickle.load(file)\n",
    "skew = pickle.load(file)\n",
    "crs = pickle.load(file)\n",
    "file.close()\n",
    "\n",
    "propensity = np.copy(ground_truth)\n",
    "p = 0.5\n",
    "propensity[np.where(propensity == 0.9)] = p ** 1\n",
    "propensity[np.where(propensity == 0.7)] = p ** 2\n",
    "propensity[np.where(propensity == 0.5)] = p ** 3\n",
    "propensity[np.where(propensity == 0.3)] = p ** 4\n",
    "propensity[np.where(propensity == 0.1)] = p ** 4\n",
    "\n",
    "for tub in [-0.5, 0, 0.5, 1, 1.5]:\n",
    "    for emb_size in [16]:\n",
    "        print(emb_size, tub)\n",
    "        res = np.zeros([6, 8])\n",
    "        MSE_matrix = np.zeros([6, 8])\n",
    "        res_var = np.zeros([6, 8])\n",
    "        MSE_matrix_var = np.zeros([6, 8])\n",
    "        for i in range(30):\n",
    "            observation = np.random.binomial(1, propensity)\n",
    "            ones = np.count_nonzero(observation)\n",
    "            zeros = observation.shape[0] - ones\n",
    "            p_o = ones/(ones+zeros)\n",
    "            ground_truth_copy = np.random.binomial(1, ground_truth)\n",
    "            o = np.where(observation == 1)\n",
    "            a = np.random.random((user_num*item_num))\n",
    "            p_hat = a/propensity + (1-a)/p_o\n",
    "            predList = [one, three, five, rotate, skew, crs]\n",
    "            for j in range(6):\n",
    "                prediction = predList[j]\n",
    "                ce = -ground_truth_copy * np.log(prediction) - (1 - ground_truth_copy) * np.log(1 - prediction)\n",
    "                \n",
    "                prediction_hat = (ground_truth.reshape([user_num, item_num]) + 0.2 * (tub + 1) * \n",
    "                      np.random.random(user_num).reshape([user_num, -1]) + 0.2 * (-0.5 + 1) * \n",
    "                      np.random.random(item).reshape([-1, item_num])).reshape(-1)\n",
    "\n",
    "                ce_hat = -prediction_hat * np.log(prediction) - (1 - prediction_hat) * np.log(1 - prediction)\n",
    "\n",
    "                real_ce = np.mean(ce)\n",
    "                naive_ce = np.mean(ce[o])\n",
    "                eib_ce = np.mean(ce_hat*(1-observation)+ce*observation)\n",
    "                ips_ce = np.mean(ce * observation * p_hat)\n",
    "                snips_ce = np.sum(ce * observation * p_hat) / np.sum(p_hat * observation)\n",
    "                dr_ce = np.mean(ce_hat + observation * (ce - ce_hat) * p_hat)\n",
    "\n",
    "                # for our methods， train MF,\n",
    "                sample = []\n",
    "                for i in range(user_num):\n",
    "                    sample.extend([[i,j] for j in range(item_num)])\n",
    "\n",
    "                sample = np.array(sample)\n",
    "                user = sample[:, 0].reshape([user_num, item_num])\n",
    "                item = sample[:, 1].reshape([user_num, item_num])\n",
    "                propensity_udr = propensity.reshape([user_num, item_num])\n",
    "\n",
    "                batch_size_i = 50\n",
    "                batch_size_u = 50\n",
    "                l2_reg_lambda = 1e-4    # Validated by grid-search\n",
    "                gamma = 0.02\n",
    "                observation_udr = torch.Tensor(observation.reshape([user_num, item_num]))\n",
    "                prediction = torch.Tensor(prediction.reshape([user_num, item_num]))\n",
    "\n",
    "                mf_udr = MF_UDR(num_users=user_num, num_items=item_num, embedding_k=emb_size, l2_reg_lambda=l2_reg_lambda)\n",
    "                mf_udr.cuda()\n",
    "\n",
    "                mf_udr.fit(user, item, propensity_udr, r_hat = prediction, obs = observation_udr, batch_size = batch_size_u, gamma = gamma, L = 2)\n",
    "                propensity_udr_inv = np.clip(mf_udr.predict(sample[:, 0], sample[:, 1]), 1, 1/gamma)\n",
    "                udr_ce = np.mean(ce_hat + observation * (ce - ce_hat) * propensity_udr_inv)                                                           \n",
    "                \n",
    "                mf_uidr = MF_UIDR(num_users=user_num, num_items=item_num, embedding_k=emb_size, l2_reg_lambda=l2_reg_lambda)\n",
    "                mf_uidr.cuda()\n",
    "\n",
    "                mf_uidr.fit(user, item, propensity_udr, r_hat = prediction, obs = observation_udr, batch_size_u = batch_size_u, gamma = gamma, L = 5)\n",
    "                propensity_uidr_inv = np.clip(mf_uidr.predict(sample[:, 0], sample[:, 1]), 1, 1/gamma)\n",
    "                uidr_ce = np.mean(ce_hat + observation * (ce - ce_hat) * propensity_uidr_inv)                                       \n",
    "                \n",
    "                mf_idr = MF_IDR(num_users=user_num, num_items=item_num, embedding_k=emb_size, l2_reg_lambda=l2_reg_lambda)\n",
    "                mf_idr.cuda()\n",
    "\n",
    "                mf_idr.fit(user, item, propensity_udr, r_hat = prediction, obs = observation_udr, batch_size = batch_size_i, gamma = gamma, L = 2)\n",
    "                propensity_idr_inv = np.clip(mf_idr.predict(sample[:, 0], sample[:, 1]), 1, 1/gamma)\n",
    "                idr_ce = np.mean(ce_hat + observation * (ce - ce_hat) * propensity_idr_inv)                    \n",
    " \n",
    "                acc = getAccuracy(real_ce, [udr_ce, idr_ce, uidr_ce])\n",
    "                mse_matrix = getmse(propensity, [1/propensity_udr_inv, 1/propensity_uidr_inv, 1/propensity_idr_inv])\n",
    "        \n",
    "                MSE_matrix[j] += mse_matrix\n",
    "                MSE_matrix_var[j] += mse_matrix ** 2\n",
    "                \n",
    "                res[j] += acc\n",
    "                res_var[j] += acc ** 2\n",
    "\n",
    "                print(acc)\n",
    "            print()\n",
    "        print(res/30)\n",
    "        print(np.sqrt((1/30)*(res_var - 30*(res/30)**2))) \n",
    "        \n",
    "        print(MSE_matrix/30)\n",
    "        print(np.sqrt((1/30)*(MSE_matrix_var - 30*(MSE_matrix/30)**2)))           "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:pytorch-gpu]",
   "language": "python",
   "name": "conda-env-pytorch-gpu-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
