{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.7 0.3 0.1 0.5 0.3 0.3 0.7 0.7 0.7 0.5 0.7 0.9 0.3 0.7 0.5 0.3 0.1 0.1\n",
      " 0.5 0.3]\n",
      "[0.6 0.2 0.2 0.2 0.2 0.2 0.6 0.6 0.6 0.2 0.6 0.6 0.2 0.6 0.2 0.2 0.2 0.2\n",
      " 0.2 0.2]\n",
      "[0.7 0.3 0.1 0.5 0.3 0.3 0.7 0.7 0.7 0.5 0.7 0.9 0.3 0.7 0.5 0.3 0.1 0.1\n",
      " 0.5 0.3]\n"
     ]
    }
   ],
   "source": [
    "\"\"\"\n",
    "Convert the rating matrix into conversion rate matrix;\n",
    "Generate the simulated prediction conversion rate matrix.\n",
    "\"\"\"\n",
    "import pickle\n",
    "import numpy as np\n",
    "\n",
    "file = open(\"data/predicted_matrix\", \"rb\")\n",
    "prediction = np.array(pickle.load(file), dtype=float)\n",
    "user_num = pickle.load(file)\n",
    "item_num = pickle.load(file)\n",
    "file.close()\n",
    "\n",
    "# CVR = [0.1, 0.3, 0.5, 0.7, 0.9]\n",
    "# ratio = [0.53, 0.24, 0.14, 0.06, 0.03] (the same distribution as in Yahoo R3! MAR test set)\n",
    "total_num = prediction.shape[0]\n",
    "index = np.argsort(prediction)\n",
    "index_inverse = np.argsort(index)\n",
    "prediction = prediction[index]\n",
    "prediction[:int(total_num*0.53)] = 0.1\n",
    "prediction[int(total_num*0.53):int(total_num*0.77)] = 0.3\n",
    "prediction[int(total_num*0.77):int(total_num*0.91)] = 0.5\n",
    "prediction[int(total_num*0.91):int(total_num*0.98)] = 0.7\n",
    "prediction[int(total_num*0.98):] = 0.9\n",
    "ground_truth = prediction[index_inverse]\n",
    "print(ground_truth[:20])\n",
    "\n",
    "# Simulated prediction 1 - ONE\n",
    "# Randomly select n_0.9 0.1, and set 0.1 to 0.9, where n_0.9 denotes the number of the 0.9 in ground_truth\n",
    "n_0_1 = np.count_nonzero(np.where(ground_truth == 0.1))\n",
    "n_0_9 = np.count_nonzero(np.where(ground_truth == 0.9))\n",
    "select = np.random.choice(n_0_1, n_0_9, replace=False)\n",
    "prediction = ground_truth[index]\n",
    "prediction[select] = 0.9\n",
    "one = prediction[index_inverse]\n",
    "\n",
    "# Simulated prediction 2 - THREE\n",
    "# Randomly select n_0.9 0.3, and set 0.3 to 0.9, where n_0.9 denotes the number of the 0.9 in ground_truth\n",
    "n_0_3 = np.count_nonzero(np.where(ground_truth == 0.3))\n",
    "select = np.random.choice(n_0_3, n_0_9, replace=False)+int(total_num*0.53)\n",
    "prediction = ground_truth[index]\n",
    "prediction[select] = 0.9\n",
    "three = prediction[index_inverse]\n",
    "\n",
    "# Simulated prediction 3 - FIVE\n",
    "# Randomly select n_0.9 0.5, and set 0.5 to 0.9, where n_0.9 denotes the number of the 0.9 in ground_truth\n",
    "n_0_5 = np.count_nonzero(np.where(ground_truth == 0.5))\n",
    "select = np.random.choice(n_0_5, n_0_9, replace=False)+int(total_num*0.77)\n",
    "prediction = ground_truth[index]\n",
    "prediction[select] = 0.9\n",
    "five = prediction[index_inverse]\n",
    "\n",
    "# Simulated prediction 4 - Rotate\n",
    "prediction = ground_truth[index]\n",
    "prediction[:int(total_num*0.53)] = 0.9\n",
    "prediction[int(total_num*0.53):int(total_num*0.77)] = 0.1\n",
    "prediction[int(total_num*0.77):int(total_num*0.91)] = 0.3\n",
    "prediction[int(total_num*0.91):int(total_num*0.98)] = 0.5\n",
    "prediction[int(total_num*0.98):] = 0.7\n",
    "rotate = prediction[index_inverse]\n",
    "\n",
    "\n",
    "# Simulated prediction 5 - SKEW\n",
    "# r ~ N(\\mu=r, \\sigma=(1-r)/2), and then r is clipped to [0.1~0.9]\n",
    "prediction = np.copy(ground_truth)\n",
    "for i in range(prediction.shape[0]):\n",
    "    prediction[i] = np.random.normal(loc=prediction[i], scale=(1-prediction[i])/2, size=1)\n",
    "skew = np.clip(prediction, 0.1, 0.9)\n",
    "\n",
    "# Simulated prediction 6 - CRS\n",
    "# r:=0.2, if r<=0.6; r:=0.6, else\n",
    "prediction = np.copy(ground_truth)\n",
    "select1 = np.where(prediction <= 0.6)\n",
    "select2 = np.where(prediction > 0.6)\n",
    "prediction[select1] = 0.2\n",
    "prediction[select2] = 0.6\n",
    "crs = prediction\n",
    "print(crs[:20])\n",
    "print(ground_truth[:20])\n",
    "\n",
    "file = open(\"data/synthetic_data\", \"wb\")\n",
    "pickle.dump(ground_truth, file)\n",
    "pickle.dump(one, file)\n",
    "pickle.dump(three, file)\n",
    "pickle.dump(five, file)\n",
    "pickle.dump(rotate, file)\n",
    "pickle.dump(skew, file)\n",
    "pickle.dump(crs, file)\n",
    "file.close()\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:pytorch-gpu]",
   "language": "python",
   "name": "conda-env-pytorch-gpu-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
