{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Experiments for Section 4.1\n",
    "First, we post-process the dataset and store it as pkl files. \n",
    "Then run our method with given parameters for two different models: linear and NN."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Download and Extract the Webscope Yahoo! LTR Dataset into ./yahoo/ subdirectory from\n",
    "\n",
    "https://webscope.sandbox.yahoo.com/catalog.php?datatype=c&did=44"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's first save the test files into pkl files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_feats = []\n",
    "all_rels = []\n",
    "queries_used= {}\n",
    "with open(\"./yahoo/ltrc_yahoo/set1.test.txt\") as f:\n",
    "    curr_feats = []\n",
    "    curr_rels = []\n",
    "    curr_qid = None\n",
    "    for line in f:\n",
    "        line = line.split()\n",
    "        rel = int(line[0])\n",
    "        qid = int(line[1].split(':')[1])\n",
    "        if curr_qid is None:\n",
    "            curr_qid = qid\n",
    "        feats = np.zeros(700)\n",
    "        for j in range(2, len(line)):\n",
    "            featid = int(line[j].split(\":\")[0]) - 1\n",
    "            feats[featid] = float(line[j].split(\":\")[1])\n",
    "        if qid != curr_qid:\n",
    "            if qid in queries_used:\n",
    "                print(\"The queries arent sorted\")\n",
    "            all_feats.append(np.array(curr_feats))\n",
    "            all_rels.append(np.array(curr_rels))\n",
    "            curr_feats = []\n",
    "            curr_rels = []\n",
    "            queries_used[curr_qid] = True\n",
    "            curr_qid = qid\n",
    "        curr_feats.append(feats)\n",
    "        curr_rels.append(rel)\n",
    "    all_feats.append(np.array(curr_feats))\n",
    "    all_rels.append(np.array(curr_rels))\n",
    "    queries_used[curr_qid] = True\n",
    "            "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle as pkl\n",
    "pkl.dump((all_feats, all_rels), open(\"yahoo/yahoo_test.pkl\", \"wb\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Do the same for Train a"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  },
  "latex_envs": {
   "LaTeX_envs_menu_present": true,
   "autoclose": false,
   "autocomplete": true,
   "bibliofile": "biblio.bib",
   "cite_by": "apalike",
   "current_citInitial": 1,
   "eqLabelWithNumbers": true,
   "eqNumInitial": 1,
   "hotkeys": {
    "equation": "Ctrl-E",
    "itemize": "Ctrl-I"
   },
   "labels_anchors": false,
   "latex_user_defs": false,
   "report_style_numbering": false,
   "user_envs_cfg": false
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
