{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "\n",
    "import numpy as np\n",
    "from numpy.core.multiarray import result_type\n",
    "import sys\n",
    "\n",
    "code_path = '/home/ec2-user/PycharmProjects/facility_location_with_predictions'\n",
    "if code_path not in sys.path:\n",
    "    sys.path.append(code_path)\n",
    "\n",
    "from run_experiments import run_experiment\n",
    "from itertools import product\n",
    "import polars as pl\n",
    "import multiprocessing as mp\n",
    "import numpy as np\n",
    "import pickle\n",
    "import matplotlib.pyplot as plt"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "Set Parameters."
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "num_trials = 300\n",
    "num_elems = 100\n",
    "connection_probability = 0.02\n",
    "num_sets_list = np.arange(2000, 22000, 2000)\n",
    "fp = 0.005\n",
    "fn = 0.15"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "Run experiment to get a dataframe of results."
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "outputs": [],
   "source": [
    "df_list = []\n",
    "num_proc = 20\n",
    "with mp.Pool(num_proc) as pool:\n",
    "    result_obj_dict = {}\n",
    "    for num_sets in num_sets_list:\n",
    "        result_obj_dict[num_sets] = []\n",
    "        for trail_num in range(num_trials):\n",
    "            result_obj_dict[num_sets].append(pool.apply_async(\n",
    "                run_experiment,\n",
    "                kwds={'seed': trail_num,\n",
    "                      'false_neg_rate': fn,\n",
    "                      'false_pos_rate': fp,\n",
    "                      'num_elems': num_elems,\n",
    "                      'num_sets': num_sets,\n",
    "                      'connection_probability': connection_probability}\n",
    "            ))\n",
    "\n",
    "    # print (result_obj_dict)\n",
    "    i = 0\n",
    "    for num_sets, result_obj_list in result_obj_dict.items():\n",
    "        for result_obj in result_obj_list:\n",
    "            df_list.append(result_obj.get().select([\n",
    "                pl.lit(num_sets).alias('num_sets'),\n",
    "                pl.all()\n",
    "            ]))\n",
    "            print(i)\n",
    "            i += 1\n",
    "result_df = pl.concat(df_list)\n"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "Write results to pickle."
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "outputs": [],
   "source": [
    "with open(f'all_results_exp2_{fp}_{fn}_{connection_probability}.pkl', 'wb') as f:\n",
    "    pickle.dump(result_df, f)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "# Analysis"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "Load results from pickle."
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "with open(f'all_results_exp2_{fp}_{fn}_{connection_probability}.pkl', 'rb') as f:\n",
    "    result_df = pickle.load(f)"
   ],
   "metadata": {
    "collapsed": false
   },
   "execution_count": null,
   "outputs": []
  },
  {
   "cell_type": "markdown",
   "source": [
    "Calculate competitive ratio, and get mean and standard deviation."
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "comp_ratio_cols = ['general online comp ratio',\n",
    "                   'prediction online comp ratio',\n",
    "                   'standard combination comp ratio',\n",
    "                   'smooth combination comp ratio']"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "grouped_df_expectation_of_ratio_mean = result_df.select([\n",
    "    pl.col('num_sets'),\n",
    "    (pl.col('general online') / pl.col('offline')).alias('general online comp ratio'),\n",
    "    (pl.col('prediction online') / pl.col('offline')).alias('prediction online comp ratio'),\n",
    "    (pl.col('standard combination') / pl.col('offline')).alias('standard combination comp ratio'),\n",
    "    (pl.col('smooth combination') / pl.col('offline')).alias('smooth combination comp ratio')\n",
    "]).groupby(['num_sets']).agg(\n",
    "    [pl.col(name).mean() for name in comp_ratio_cols]\n",
    ").sort(['num_sets'])"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "grouped_df_expectation_of_ratio_std = result_df.select([\n",
    "    pl.col('num_sets'),\n",
    "    (pl.col('general online') / pl.col('offline')).alias('general online comp ratio'),\n",
    "    (pl.col('prediction online') / pl.col('offline')).alias('prediction online comp ratio'),\n",
    "    (pl.col('standard combination') / pl.col('offline')).alias('standard combination comp ratio'),\n",
    "    (pl.col('smooth combination') / pl.col('offline')).alias('smooth combination comp ratio')\n",
    "]).groupby(['num_sets']).agg(\n",
    "    [pl.col(name).std() for name in comp_ratio_cols]\n",
    ").sort(['num_sets'])"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "plot the results."
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "plt.plot(num_sets_list,\n",
    "         grouped_df_expectation_of_ratio_mean['general online comp ratio'].to_numpy(),\n",
    "         marker='o',\n",
    "         label='On')\n",
    "plt.plot(num_sets_list,\n",
    "         grouped_df_expectation_of_ratio_mean['prediction online comp ratio'].to_numpy(),\n",
    "         marker='o',\n",
    "         label='PredOn')\n",
    "plt.plot(num_sets_list,\n",
    "         grouped_df_expectation_of_ratio_mean['standard combination comp ratio'].to_numpy(),\n",
    "         marker='o',\n",
    "         label='BaseMerge')\n",
    "plt.plot(num_sets_list,\n",
    "         grouped_df_expectation_of_ratio_mean['smooth combination comp ratio'].to_numpy(),\n",
    "         marker='o',\n",
    "         label='SmoothMerge')\n",
    "plt.ylabel('Competitive ratio')\n",
    "plt.xlabel('#Sets')\n",
    "plt.legend()\n",
    "plt.savefig(code_path + '/varying_number_of_sets.pdf')"
   ],
   "metadata": {
    "collapsed": false
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
