{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import tqdm\n",
    "from sentence_transformers import SentenceTransformer\n",
    "model = SentenceTransformer('sentence-transformers/all-MiniLM-L12-v1')\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [],
   "source": [
    "jokes = pd.read_excel(\"./data/benchmark/jester/jester-data-1.xls\",header=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [],
   "source": [
    "### drop rows with even a single 99\n",
    "jokes = jokes[jokes.iloc[:,:]!=99]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [],
   "source": [
    "jokes = jokes.dropna().reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [],
   "source": [
    "jokes_text = pd.read_excel(\"./data/benchmark/jester/Dataset4JokeSet.xlsx\",header=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": [
    "jokes_embeddings = jokes_text.iloc[:,0].apply(lambda x: model.encode(x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [],
   "source": [
    "jokes_embeddings = np.vstack(jokes_embeddings.values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [],
   "source": [
    "embeddings_subset = jokes_embeddings[:100,:]\n",
    "#### \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [],
   "source": [
    "import cvxpy as cp\n",
    "def get_good_vectors(embeddings):\n",
    "    n = embeddings.shape[0]\n",
    "    d = embeddings.shape[1]\n",
    "    x = cp.Variable(n)\n",
    "    constraints = [cp.sum(x)==1,x>=0]\n",
    "    objective_function = embeddings.T @ cp.diag(x) @ embeddings\n",
    "    objective = cp.Maximize(cp.lambda_min(objective_function))\n",
    "    prob = cp.Problem(objective,constraints)\n",
    "    prob.solve()\n",
    "    return x.value\n",
    "\n",
    "good_vectors = get_good_vectors(embeddings_subset)\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "### save ratings and embeddings \n",
    "import pickle\n",
    "\n",
    "with open(\"./data/benchmark/joke s/jokes_ratings.pkl\",\"wb\") as f:\n",
    "    pickle.dump(jokes,f)\n",
    "with open(\"./data/benchmark/jokes/jokes_embeddings.pkl\",\"wb\") as f:\n",
    "    pickle.dump(embeddings_subset,f)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "### \n",
    "\n",
    "from sklearn.linear_model import Lasso,LinearRegression,LassoCV\n",
    "N_random = 30\n",
    "top_k = 50\n",
    "N_mc = 10\n",
    "N_users = 7200\n",
    "user_indices = np.random.choice(jokes.index,size=N_users)\n",
    "\n",
    "regret_lasso = np.zeros((N_users,N_mc,N_random+top_k))\n",
    "regret_random = np.zeros((N_users,N_mc,N_random+top_k))\n",
    "regret_lr = np.zeros((N_users,N_mc,N_random+top_k))\n",
    "for j,user_index in tqdm.tqdm(enumerate(user_indices)):\n",
    "    ratings = jokes.iloc[user_index,1:].values\n",
    "    for i in range(N_mc):\n",
    "\n",
    "        random_indices = np.random.choice(embeddings_subset.shape[0], size=N_random, replace=False)\n",
    "        labels_not_random = [x for x in np.arange(ratings.shape[0]) if x not in random_indices]\n",
    "        embeddings_random = embeddings_subset[random_indices,:]\n",
    "        labels_random = ratings[random_indices]\n",
    "        sorted_true = np.sort(ratings)[::-1][:top_k+N_random]\n",
    "\n",
    "        ### LR\n",
    "        model_lr_ = LinearRegression() #LassoCV(max_iter=10000)\n",
    "        model_lr_.fit(embeddings_random,labels_random)\n",
    "        recommend = np.argsort(model_lr_.predict(embeddings_subset))[::-1]\n",
    "        sorted_ratings = ratings[recommend]\n",
    "        sorted_ratings = sorted_ratings[labels_not_random]\n",
    "        sorted_ratings = sorted_ratings[:top_k]\n",
    "        ratings_pred = np.concatenate([labels_random,sorted_ratings])\n",
    "        regret_lr[j,i,:] = np.cumsum(sorted_true-ratings_pred)\n",
    "         ### Lasso\n",
    "        subset_tobe_sampledfrom = np.argsort(good_vectors)[::-1][:60]\n",
    "        random_indices = np.random.choice(subset_tobe_sampledfrom, size=N_random, replace=False)\n",
    "        labels_not_random = [x for x in np.arange(ratings.shape[0]) if x not in random_indices]\n",
    "        embeddings_random = embeddings_subset[random_indices,:]\n",
    "        labels_random = ratings[random_indices]\n",
    "        sorted_true = np.sort(ratings)[::-1][:top_k+N_random]\n",
    "        model = Lasso(alpha=0.001,max_iter=10000)\n",
    "        model.fit(embeddings_random,labels_random)\n",
    "        non_zero_coef = model.coef_!=0\n",
    "        X = embeddings_random[:,non_zero_coef]\n",
    "        y = labels_random\n",
    "        model_lr = LinearRegression()\n",
    "        model_lr.fit(X,y)\n",
    "        recommend = np.argsort(model_lr.predict(embeddings_subset[:,non_zero_coef]))[::-1]\n",
    "        sorted_ratings = ratings[recommend]\n",
    "        sorted_ratings = sorted_ratings[labels_not_random]\n",
    "        sorted_ratings = sorted_ratings[:top_k]\n",
    "        ratings_pred = np.concatenate([y,sorted_ratings])\n",
    "        regret_lasso[j,i,:] = np.cumsum(sorted_true-ratings_pred)\n",
    "\n",
    "        ### Random \n",
    "        random_indices = np.random.choice(ratings.shape[0],size=N_random+top_k,replace=False)\n",
    "        ratings_random = ratings[random_indices]\n",
    "\n",
    "        regret_random[j,i,:] = np.cumsum(sorted_true-ratings_random)\n",
    "plt.plot(regret_lasso.mean(axis=1).mean(axis=0),label=\"Lasso\")\n",
    "plt.plot(regret_random.mean(axis=1).mean(axis=0),label=\"Random\")\n",
    "plt.plot(regret_lr.mean(axis=1).mean(axis=0),label=\"LR\")\n",
    "plt.legend(fontsize=15)\n",
    "regret_lasso_mean = regret_lasso.mean(axis=1).mean(axis=0)\n",
    "regret_lasso_std = regret_lasso.std(axis=1).mean(axis=0)\n",
    "regret_random_mean = regret_random.mean(axis=1).mean(axis=0)\n",
    "regret_random_std = regret_random.std(axis=1).mean(axis=0)\n",
    "regret_lr_mean = regret_lr.mean(axis=1).mean(axis=0)\n",
    "regret_lr_std = regret_lr.std(axis=1).mean(axis=0)\n",
    "\n",
    "plt.fill_between(np.arange(N_random+top_k),regret_lasso_mean-regret_lasso_std,regret_lasso_mean+regret_lasso_std,alpha=0.2)\n",
    "plt.fill_between(np.arange(N_random+top_k),regret_random_mean-regret_random_std,regret_random_mean+regret_random_std,alpha=0.2)\n",
    "plt.fill_between(np.arange(N_random+top_k),regret_lr_mean-regret_lr_std,regret_lr_mean+regret_lr_std,alpha=0.2)\n",
    "plt.xlabel(\"Number of Rounds\",fontsize=15)\n",
    "plt.ylabel(\"Regret\",fontsize=15)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "### \n",
    "\n",
    "from sklearn.linear_model import Lasso,LinearRegression,LassoCV\n",
    "N_total = 80\n",
    "\n",
    "N_mc = 100\n",
    "N_users = 5\n",
    "user_indices = np.random.choice(jokes.index,size=N_users)\n",
    "N_randoms = np.arange(20,N_total-5,5)\n",
    "N_randoms_shape = len(N_randoms)\n",
    "regret_lasso = np.zeros((N_randoms_shape,N_users,N_mc,N_total))\n",
    "\n",
    "regret_random = np.zeros((N_randoms_shape,N_users,N_mc,N_total))\n",
    "regret_lr = np.zeros((N_randoms_shape,N_users,N_mc,N_total))\n",
    "for k,N_random in tqdm.tqdm(enumerate(N_randoms)):\n",
    "    top_k = N_total-N_random\n",
    "\n",
    "\n",
    "    for j,user_index in enumerate(user_indices):\n",
    "        ratings = jokes.iloc[user_index,1:].values\n",
    "        for i in range(N_mc):\n",
    "\n",
    "            random_indices = np.random.choice(embeddings_subset.shape[0], size=N_random, replace=False)\n",
    "            labels_not_random = [x for x in np.arange(ratings.shape[0]) if x not in random_indices]\n",
    "            embeddings_random = embeddings_subset[random_indices,:]\n",
    "            labels_random = ratings[random_indices]\n",
    "            sorted_true = np.sort(ratings)[::-1][:top_k+N_random]\n",
    "    ### LR\n",
    "            model_lr_ = LinearRegression() #LassoCV(max_iter=10000)\n",
    "            model_lr_.fit(embeddings_random,labels_random)\n",
    "            recommend = np.argsort(model_lr_.predict(embeddings_subset))[::-1]\n",
    "            sorted_ratings = ratings[recommend]\n",
    "            sorted_ratings = sorted_ratings[labels_not_random]\n",
    "            sorted_ratings = sorted_ratings[:top_k]\n",
    "            ratings_pred = np.concatenate([labels_random,sorted_ratings])\n",
    "            regret_lr[k,j,i,:] = np.cumsum(sorted_true-ratings_pred)\n",
    "            ### Lasso\n",
    "            subset_tobe_sampledfrom = np.argsort(good_vectors)[::-1][:60]\n",
    "            random_indices = np.random.choice(subset_tobe_sampledfrom, size=N_random, replace=False)\n",
    "            labels_not_random = [x for x in np.arange(ratings.shape[0]) if x not in random_indices]\n",
    "            embeddings_random = embeddings_subset[random_indices,:]\n",
    "            labels_random = ratings[random_indices]\n",
    "            model = Lasso(alpha=0.02,max_iter=10000)\n",
    "            model.fit(embeddings_random,labels_random)\n",
    "            non_zero_coef = model.coef_!=0\n",
    "            X = embeddings_random[:,non_zero_coef]\n",
    "            y = labels_random\n",
    "            model_lr = LinearRegression()\n",
    "            model_lr.fit(X,y)\n",
    "            recommend = np.argsort(model_lr.predict(embeddings_subset[:,non_zero_coef]))[::-1]\n",
    "            sorted_ratings = ratings[recommend]\n",
    "            sorted_ratings = sorted_ratings[labels_not_random]\n",
    "            sorted_ratings = sorted_ratings[:top_k]\n",
    "            ratings_pred = np.concatenate([y,sorted_ratings])\n",
    "            regret_lasso[k,j,i,:] = np.cumsum(sorted_true-ratings_pred)\n",
    "\n",
    "            ### Random \n",
    "            random_indices = np.random.choice(ratings.shape[0],size=N_random+top_k,replace=False)\n",
    "            ratings_random = ratings[random_indices]\n",
    "\n",
    "            regret_random[k,j,i,:] = np.cumsum(sorted_true-ratings_random)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "regret_lasso_mean = regret_lasso.mean(axis=2).mean(axis=1).mean(axis=0)\n",
    "regret_lasso_std = regret_lasso.std(axis=2).mean(axis=1).mean(axis=0)\n",
    "regret_random_mean = regret_random.mean(axis=2).mean(axis=1).mean(axis=0)\n",
    "regret_random_std = regret_random.std(axis=2).mean(axis=1).mean(axis=0)\n",
    "regret_lr_mean = regret_lr.mean(axis=2).mean(axis=1).mean(axis=0)\n",
    "regret_lr_std = regret_lr.std(axis=2).mean(axis=1).mean(axis=0)\n",
    "plt.plot(regret_lasso_mean,label=\"Lasso\")\n",
    "plt.plot(regret_random_mean,label=\"Random\")\n",
    "plt.plot(regret_lr_mean,label=\"LR\")\n",
    "plt.legend(fontsize=15)\n",
    "plt.fill_between(np.arange(N_random+top_k),regret_lasso_mean-regret_lasso_std,regret_lasso_mean+regret_lasso_std,alpha=0.2)\n",
    "plt.fill_between(np.arange(N_random+top_k),regret_random_mean-regret_random_std,regret_random_mean+regret_random_std,alpha=0.2)\n",
    "plt.fill_between(np.arange(N_random+top_k),regret_lr_mean-regret_lr_std,regret_lr_mean+regret_lr_std,alpha=0.2)\n",
    "plt.xlabel(\"Number of Rounds\",fontsize=15)\n",
    "plt.ylabel(\"Regret\",fontsize=15)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
