{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Semi Synthetic Experiments"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Analyze the performance of various algorithms to solve the joint matching + activity task, when the number of volunteers is large and structured"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import random \n",
    "import matplotlib.pyplot as plt\n",
    "import json \n",
    "import argparse \n",
    "import sys\n",
    "import secrets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr0/home/naveenr/miniconda3/envs/food/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    }
   ],
   "source": [
    "from rmab.simulator import RMABSimulator\n",
    "from rmab.omniscient_policies import *\n",
    "from rmab.fr_dynamics import get_all_transitions\n",
    "from rmab.mcts_policies import *\n",
    "from rmab.utils import get_save_path, delete_duplicate_results, create_prob_distro\n",
    "import resource"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "torch.cuda.set_per_process_memory_fraction(0.5)\n",
    "torch.set_num_threads(1)\n",
    "resource.setrlimit(resource.RLIMIT_AS, (30 * 1024 * 1024 * 1024, -1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "is_jupyter = 'ipykernel' in sys.modules"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "if is_jupyter: \n",
    "    seed        = 43\n",
    "    n_arms      = 10\n",
    "    volunteers_per_arm = 10\n",
    "    budget      = 3\n",
    "    discount    = 0.9\n",
    "    alpha       = 3 \n",
    "    n_episodes  = 525\n",
    "    episode_len = 20 \n",
    "    n_epochs    = 1 \n",
    "    save_with_date = False \n",
    "    TIME_PER_RUN = 0.01 * 1000\n",
    "    lamb = 0.5\n",
    "    prob_distro = 'uniform'\n",
    "    train_iterations = 30\n",
    "    test_iterations = 30\n",
    "    out_folder = 'reward_variation/max_reward'\n",
    "    power = 0.5\n",
    "else:\n",
    "    parser = argparse.ArgumentParser()\n",
    "    parser.add_argument('--n_arms',         '-N', help='num beneficiaries (arms)', type=int, default=2)\n",
    "    parser.add_argument('--volunteers_per_arm',         '-V', help='volunteers per arm', type=int, default=5)\n",
    "    parser.add_argument('--episode_len',    '-H', help='episode length', type=int, default=20)\n",
    "    parser.add_argument('--n_episodes',     '-T', help='num episodes', type=int, default=200)\n",
    "    parser.add_argument('--budget',         '-B', help='budget', type=int, default=3)\n",
    "    parser.add_argument('--n_epochs',       '-E', help='number of epochs (num_repeats)', type=int, default=1)\n",
    "    parser.add_argument('--discount',       '-d', help='discount factor', type=float, default=0.9)\n",
    "    parser.add_argument('--alpha',          '-a', help='alpha: for conf radius', type=float, default=3)\n",
    "    parser.add_argument('--lamb',          '-l', help='lambda for matching-engagement tradeoff', type=float, default=0.5)\n",
    "    parser.add_argument('--seed',           '-s', help='random seed', type=int, default=42)\n",
    "    parser.add_argument('--prob_distro',           '-p', help='which prob distro [uniform,uniform_small,uniform_large,normal]', type=str, default='uniform')\n",
    "    parser.add_argument('--time_per_run',      '-t', help='time per MCTS run', type=float, default=.01*1000)\n",
    "    parser.add_argument('--train_iterations', help='Number of MCTS train iterations', type=int, default=30)\n",
    "    parser.add_argument('--test_iterations', help='Number of MCTS test iterations', type=int, default=30)\n",
    "    parser.add_argument('--power', help='Hyperparameter for the type of submodular function', type=float, default=1)\n",
    "    parser.add_argument('--out_folder', help='Which folder to write results to', type=str, default='reward_variation/max_reward')\n",
    "\n",
    "    parser.add_argument('--use_date', action='store_true')\n",
    "\n",
    "    args = parser.parse_args()\n",
    "\n",
    "    n_arms      = args.n_arms\n",
    "    volunteers_per_arm = args.volunteers_per_arm\n",
    "    budget      = args.budget\n",
    "    discount    = args.discount\n",
    "    alpha       = args.alpha \n",
    "    seed        = args.seed\n",
    "    n_episodes  = args.n_episodes\n",
    "    episode_len = args.episode_len\n",
    "    n_epochs    = args.n_epochs\n",
    "    lamb = args.lamb\n",
    "    save_with_date = args.use_date\n",
    "    TIME_PER_RUN = args.time_per_run\n",
    "    prob_distro = args.prob_distro\n",
    "    out_folder = args.out_folder\n",
    "    train_iterations = args.train_iterations \n",
    "    test_iterations = args.test_iterations \n",
    "    power = args.power\n",
    "\n",
    "save_name = secrets.token_hex(4)  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "n_states = 2\n",
    "n_actions = 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_population_size = 100 # number of random arms to generate\n",
    "all_transitions = get_all_transitions(all_population_size)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_environment(seed):\n",
    "    random.seed(seed)\n",
    "    np.random.seed(seed)\n",
    "\n",
    "    if prob_distro == 'uniform':\n",
    "        match_probabilities = [np.random.random() for i in range(all_population_size * volunteers_per_arm)] \n",
    "    elif prob_distro == 'uniform_small':\n",
    "        match_probabilities = [np.random.random()/4 for i in range(all_population_size * volunteers_per_arm)] \n",
    "    elif prob_distro == 'uniform_large':\n",
    "        match_probabilities = [np.random.random()/4+0.75 for i in range(all_population_size * volunteers_per_arm)] \n",
    "    elif prob_distro == 'normal':\n",
    "        match_probabilities = [np.clip(np.random.normal(0.25, 0.1),0,1) for i in range(all_population_size * volunteers_per_arm)] \n",
    "\n",
    "    all_features = np.arange(all_population_size)\n",
    "    match_probabilities = create_prob_distro(prob_distro,all_population_size*volunteers_per_arm)\n",
    "    match_probabilities = [np.random.randint(0,10) for i in range(all_population_size*volunteers_per_arm)]\n",
    "    \n",
    "    simulator = RMABSimulator(all_population_size, all_features, all_transitions,\n",
    "                n_arms, volunteers_per_arm, episode_len, n_epochs, n_episodes, budget, discount,number_states=n_states, reward_style='submodular',match_probability_list=match_probabilities,TIME_PER_RUN=TIME_PER_RUN)\n",
    "    simulator.power = power \n",
    "\n",
    "    return simulator "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "def run_multi_seed(seed_list,policy,is_mcts=False,per_epoch_function=None,train_iterations=0,test_iterations=0,test_length=500,avg_reward=5, num_samples=256):\n",
    "    memories = []\n",
    "    scores = {\n",
    "        'reward': [],\n",
    "        'time': [], \n",
    "        'match': [], \n",
    "        'active_rate': [],\n",
    "    }\n",
    "\n",
    "    for seed in seed_list:\n",
    "        simulator = create_environment(seed)\n",
    "        if is_mcts:\n",
    "            simulator.mcts_train_iterations = train_iterations\n",
    "            simulator.mcts_test_iterations = test_iterations\n",
    "            simulator.policy_lr = policy_lr\n",
    "            simulator.value_lr = value_lr\n",
    "            simulator.avg_reward = avg_reward\n",
    "            simulator.num_samples = num_samples \n",
    "\n",
    "        if is_mcts:\n",
    "            match, active_rate, memory = run_heterogenous_policy(simulator, n_episodes, n_epochs, discount,policy,seed,lamb=lamb,should_train=True,test_T=test_length,get_memory=True,per_epoch_function=per_epoch_function)\n",
    "        else:\n",
    "            match, active_rate = run_heterogenous_policy(simulator, n_episodes, n_epochs, discount,policy,seed,lamb=lamb,should_train=True,test_T=test_length,per_epoch_function=per_epoch_function)\n",
    "        time_whittle = simulator.time_taken\n",
    "        discounted_reward = get_discounted_reward(match,active_rate,discount,lamb)\n",
    "        scores['reward'].append(discounted_reward)\n",
    "        scores['time'].append(time_whittle)\n",
    "        scores['match'].append(np.mean(match))\n",
    "        scores['active_rate'].append(np.mean(active_rate))\n",
    "        if is_mcts:\n",
    "            memories.append(memory)\n",
    "\n",
    "    return scores, memories, simulator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "results = {}\n",
    "results['parameters'] = {'seed'      : seed,\n",
    "        'n_arms'    : n_arms,\n",
    "        'volunteers_per_arm': volunteers_per_arm, \n",
    "        'budget'    : budget,\n",
    "        'discount'  : discount, \n",
    "        'alpha'     : alpha, \n",
    "        'n_episodes': n_episodes, \n",
    "        'episode_len': episode_len, \n",
    "        'n_epochs'  : n_epochs, \n",
    "        'lamb': lamb,\n",
    "        'time_per_run': TIME_PER_RUN, \n",
    "        'prob_distro': prob_distro, \n",
    "        'power': power} "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Index Policies"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "seed_list = [seed]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "acting should always be good! 0.108 < 0.183\n",
      "good start state should always be good! 0.380 < 0.508\n",
      "good start state should always be good! 0.506 < 0.760\n",
      "cohort [18 62 57 95 66 52 31 69 30 99]\n",
      "instance 0, ep 1\n",
      "instance 0, ep 2\n",
      "instance 0, ep 3\n",
      "instance 0, ep 4\n",
      "instance 0, ep 5\n",
      "instance 0, ep 6\n",
      "instance 0, ep 7\n",
      "instance 0, ep 8\n",
      "instance 0, ep 9\n",
      "instance 0, ep 10\n",
      "instance 0, ep 11\n",
      "instance 0, ep 12\n",
      "instance 0, ep 13\n",
      "instance 0, ep 14\n",
      "instance 0, ep 15\n",
      "instance 0, ep 16\n",
      "instance 0, ep 17\n",
      "instance 0, ep 18\n",
      "instance 0, ep 19\n",
      "instance 0, ep 20\n",
      "instance 0, ep 21\n",
      "instance 0, ep 22\n",
      "instance 0, ep 23\n",
      "instance 0, ep 24\n",
      "instance 0, ep 25\n",
      "instance 0, ep 26\n",
      "instance 0, ep 27\n",
      "instance 0, ep 28\n",
      "instance 0, ep 29\n",
      "instance 0, ep 30\n",
      "instance 0, ep 31\n",
      "instance 0, ep 32\n",
      "instance 0, ep 33\n",
      "instance 0, ep 34\n",
      "instance 0, ep 35\n",
      "instance 0, ep 36\n",
      "instance 0, ep 37\n",
      "instance 0, ep 38\n",
      "instance 0, ep 39\n",
      "instance 0, ep 40\n",
      "instance 0, ep 41\n",
      "instance 0, ep 42\n",
      "instance 0, ep 43\n",
      "instance 0, ep 44\n",
      "instance 0, ep 45\n",
      "instance 0, ep 46\n",
      "instance 0, ep 47\n",
      "instance 0, ep 48\n",
      "instance 0, ep 49\n",
      "instance 0, ep 50\n",
      "instance 0, ep 51\n",
      "instance 0, ep 52\n",
      "instance 0, ep 53\n",
      "instance 0, ep 54\n",
      "instance 0, ep 55\n",
      "instance 0, ep 56\n",
      "instance 0, ep 57\n",
      "instance 0, ep 58\n",
      "instance 0, ep 59\n",
      "instance 0, ep 60\n",
      "instance 0, ep 61\n",
      "instance 0, ep 62\n",
      "instance 0, ep 63\n",
      "instance 0, ep 64\n",
      "instance 0, ep 65\n",
      "instance 0, ep 66\n",
      "instance 0, ep 67\n",
      "instance 0, ep 68\n",
      "instance 0, ep 69\n",
      "instance 0, ep 70\n",
      "instance 0, ep 71\n",
      "instance 0, ep 72\n",
      "instance 0, ep 73\n",
      "instance 0, ep 74\n",
      "instance 0, ep 75\n",
      "instance 0, ep 76\n",
      "instance 0, ep 77\n",
      "instance 0, ep 78\n",
      "instance 0, ep 79\n",
      "instance 0, ep 80\n",
      "instance 0, ep 81\n",
      "instance 0, ep 82\n",
      "instance 0, ep 83\n",
      "instance 0, ep 84\n",
      "instance 0, ep 85\n",
      "instance 0, ep 86\n",
      "instance 0, ep 87\n",
      "instance 0, ep 88\n",
      "instance 0, ep 89\n",
      "instance 0, ep 90\n",
      "instance 0, ep 91\n",
      "instance 0, ep 92\n",
      "instance 0, ep 93\n",
      "instance 0, ep 94\n",
      "instance 0, ep 95\n",
      "instance 0, ep 96\n",
      "instance 0, ep 97\n",
      "instance 0, ep 98\n",
      "instance 0, ep 99\n",
      "instance 0, ep 100\n",
      "instance 0, ep 101\n",
      "instance 0, ep 102\n",
      "instance 0, ep 103\n",
      "instance 0, ep 104\n",
      "instance 0, ep 105\n",
      "instance 0, ep 106\n",
      "instance 0, ep 107\n",
      "instance 0, ep 108\n",
      "instance 0, ep 109\n",
      "instance 0, ep 110\n",
      "instance 0, ep 111\n",
      "instance 0, ep 112\n",
      "instance 0, ep 113\n",
      "instance 0, ep 114\n",
      "instance 0, ep 115\n",
      "instance 0, ep 116\n",
      "instance 0, ep 117\n",
      "instance 0, ep 118\n",
      "instance 0, ep 119\n",
      "instance 0, ep 120\n",
      "instance 0, ep 121\n",
      "instance 0, ep 122\n",
      "instance 0, ep 123\n",
      "instance 0, ep 124\n",
      "instance 0, ep 125\n",
      "instance 0, ep 126\n",
      "instance 0, ep 127\n",
      "instance 0, ep 128\n",
      "instance 0, ep 129\n",
      "instance 0, ep 130\n",
      "instance 0, ep 131\n",
      "instance 0, ep 132\n",
      "instance 0, ep 133\n",
      "instance 0, ep 134\n",
      "instance 0, ep 135\n",
      "instance 0, ep 136\n",
      "instance 0, ep 137\n",
      "instance 0, ep 138\n",
      "instance 0, ep 139\n",
      "instance 0, ep 140\n",
      "instance 0, ep 141\n",
      "instance 0, ep 142\n",
      "instance 0, ep 143\n",
      "instance 0, ep 144\n",
      "instance 0, ep 145\n",
      "instance 0, ep 146\n",
      "instance 0, ep 147\n",
      "instance 0, ep 148\n",
      "instance 0, ep 149\n",
      "instance 0, ep 150\n",
      "instance 0, ep 151\n",
      "instance 0, ep 152\n",
      "instance 0, ep 153\n",
      "instance 0, ep 154\n",
      "instance 0, ep 155\n",
      "instance 0, ep 156\n",
      "instance 0, ep 157\n",
      "instance 0, ep 158\n",
      "instance 0, ep 159\n",
      "instance 0, ep 160\n",
      "instance 0, ep 161\n",
      "instance 0, ep 162\n",
      "instance 0, ep 163\n",
      "instance 0, ep 164\n",
      "instance 0, ep 165\n",
      "instance 0, ep 166\n",
      "instance 0, ep 167\n",
      "instance 0, ep 168\n",
      "instance 0, ep 169\n",
      "instance 0, ep 170\n",
      "instance 0, ep 171\n",
      "instance 0, ep 172\n",
      "instance 0, ep 173\n",
      "instance 0, ep 174\n",
      "instance 0, ep 175\n",
      "instance 0, ep 176\n",
      "instance 0, ep 177\n",
      "instance 0, ep 178\n",
      "instance 0, ep 179\n",
      "instance 0, ep 180\n",
      "instance 0, ep 181\n",
      "instance 0, ep 182\n",
      "instance 0, ep 183\n",
      "instance 0, ep 184\n",
      "instance 0, ep 185\n",
      "instance 0, ep 186\n",
      "instance 0, ep 187\n",
      "instance 0, ep 188\n",
      "instance 0, ep 189\n",
      "instance 0, ep 190\n",
      "instance 0, ep 191\n",
      "instance 0, ep 192\n",
      "instance 0, ep 193\n",
      "instance 0, ep 194\n",
      "instance 0, ep 195\n",
      "instance 0, ep 196\n",
      "instance 0, ep 197\n",
      "instance 0, ep 198\n",
      "instance 0, ep 199\n",
      "instance 0, ep 200\n",
      "instance 0, ep 201\n",
      "instance 0, ep 202\n",
      "instance 0, ep 203\n",
      "instance 0, ep 204\n",
      "instance 0, ep 205\n",
      "instance 0, ep 206\n",
      "instance 0, ep 207\n",
      "instance 0, ep 208\n",
      "instance 0, ep 209\n",
      "instance 0, ep 210\n",
      "instance 0, ep 211\n",
      "instance 0, ep 212\n",
      "instance 0, ep 213\n",
      "instance 0, ep 214\n",
      "instance 0, ep 215\n",
      "instance 0, ep 216\n",
      "instance 0, ep 217\n",
      "instance 0, ep 218\n",
      "instance 0, ep 219\n",
      "instance 0, ep 220\n",
      "instance 0, ep 221\n",
      "instance 0, ep 222\n",
      "instance 0, ep 223\n",
      "instance 0, ep 224\n",
      "instance 0, ep 225\n",
      "instance 0, ep 226\n",
      "instance 0, ep 227\n",
      "instance 0, ep 228\n",
      "instance 0, ep 229\n",
      "instance 0, ep 230\n",
      "instance 0, ep 231\n",
      "instance 0, ep 232\n",
      "instance 0, ep 233\n",
      "instance 0, ep 234\n",
      "instance 0, ep 235\n",
      "instance 0, ep 236\n",
      "instance 0, ep 237\n",
      "instance 0, ep 238\n",
      "instance 0, ep 239\n",
      "instance 0, ep 240\n",
      "instance 0, ep 241\n",
      "instance 0, ep 242\n",
      "instance 0, ep 243\n",
      "instance 0, ep 244\n",
      "instance 0, ep 245\n",
      "instance 0, ep 246\n",
      "instance 0, ep 247\n",
      "instance 0, ep 248\n",
      "instance 0, ep 249\n",
      "instance 0, ep 250\n",
      "instance 0, ep 251\n",
      "instance 0, ep 252\n",
      "instance 0, ep 253\n",
      "instance 0, ep 254\n",
      "instance 0, ep 255\n",
      "instance 0, ep 256\n",
      "instance 0, ep 257\n",
      "instance 0, ep 258\n",
      "instance 0, ep 259\n",
      "instance 0, ep 260\n",
      "instance 0, ep 261\n",
      "instance 0, ep 262\n",
      "instance 0, ep 263\n",
      "instance 0, ep 264\n",
      "instance 0, ep 265\n",
      "instance 0, ep 266\n",
      "instance 0, ep 267\n",
      "instance 0, ep 268\n",
      "instance 0, ep 269\n",
      "instance 0, ep 270\n",
      "instance 0, ep 271\n",
      "instance 0, ep 272\n",
      "instance 0, ep 273\n",
      "instance 0, ep 274\n",
      "instance 0, ep 275\n",
      "instance 0, ep 276\n",
      "instance 0, ep 277\n",
      "instance 0, ep 278\n",
      "instance 0, ep 279\n",
      "instance 0, ep 280\n",
      "instance 0, ep 281\n",
      "instance 0, ep 282\n",
      "instance 0, ep 283\n",
      "instance 0, ep 284\n",
      "instance 0, ep 285\n",
      "instance 0, ep 286\n",
      "instance 0, ep 287\n",
      "instance 0, ep 288\n",
      "instance 0, ep 289\n",
      "instance 0, ep 290\n",
      "instance 0, ep 291\n",
      "instance 0, ep 292\n",
      "instance 0, ep 293\n",
      "instance 0, ep 294\n",
      "instance 0, ep 295\n",
      "instance 0, ep 296\n",
      "instance 0, ep 297\n",
      "instance 0, ep 298\n",
      "instance 0, ep 299\n",
      "instance 0, ep 300\n",
      "instance 0, ep 301\n",
      "instance 0, ep 302\n",
      "instance 0, ep 303\n",
      "instance 0, ep 304\n",
      "instance 0, ep 305\n",
      "instance 0, ep 306\n",
      "instance 0, ep 307\n",
      "instance 0, ep 308\n",
      "instance 0, ep 309\n",
      "instance 0, ep 310\n",
      "instance 0, ep 311\n",
      "instance 0, ep 312\n",
      "instance 0, ep 313\n",
      "instance 0, ep 314\n",
      "instance 0, ep 315\n",
      "instance 0, ep 316\n",
      "instance 0, ep 317\n",
      "instance 0, ep 318\n",
      "instance 0, ep 319\n",
      "instance 0, ep 320\n",
      "instance 0, ep 321\n",
      "instance 0, ep 322\n",
      "instance 0, ep 323\n",
      "instance 0, ep 324\n",
      "instance 0, ep 325\n",
      "instance 0, ep 326\n",
      "instance 0, ep 327\n",
      "instance 0, ep 328\n",
      "instance 0, ep 329\n",
      "instance 0, ep 330\n",
      "instance 0, ep 331\n",
      "instance 0, ep 332\n",
      "instance 0, ep 333\n",
      "instance 0, ep 334\n",
      "instance 0, ep 335\n",
      "instance 0, ep 336\n",
      "instance 0, ep 337\n",
      "instance 0, ep 338\n",
      "instance 0, ep 339\n",
      "instance 0, ep 340\n",
      "instance 0, ep 341\n",
      "instance 0, ep 342\n",
      "instance 0, ep 343\n",
      "instance 0, ep 344\n",
      "instance 0, ep 345\n",
      "instance 0, ep 346\n",
      "instance 0, ep 347\n",
      "instance 0, ep 348\n",
      "instance 0, ep 349\n",
      "instance 0, ep 350\n",
      "instance 0, ep 351\n",
      "instance 0, ep 352\n",
      "instance 0, ep 353\n",
      "instance 0, ep 354\n",
      "instance 0, ep 355\n",
      "instance 0, ep 356\n",
      "instance 0, ep 357\n",
      "instance 0, ep 358\n",
      "instance 0, ep 359\n",
      "instance 0, ep 360\n",
      "instance 0, ep 361\n",
      "instance 0, ep 362\n",
      "instance 0, ep 363\n",
      "instance 0, ep 364\n",
      "instance 0, ep 365\n",
      "instance 0, ep 366\n",
      "instance 0, ep 367\n",
      "instance 0, ep 368\n",
      "instance 0, ep 369\n",
      "instance 0, ep 370\n",
      "instance 0, ep 371\n",
      "instance 0, ep 372\n",
      "instance 0, ep 373\n",
      "instance 0, ep 374\n",
      "instance 0, ep 375\n",
      "instance 0, ep 376\n",
      "instance 0, ep 377\n",
      "instance 0, ep 378\n",
      "instance 0, ep 379\n",
      "instance 0, ep 380\n",
      "instance 0, ep 381\n",
      "instance 0, ep 382\n",
      "instance 0, ep 383\n",
      "instance 0, ep 384\n",
      "instance 0, ep 385\n",
      "instance 0, ep 386\n",
      "instance 0, ep 387\n",
      "instance 0, ep 388\n",
      "instance 0, ep 389\n",
      "instance 0, ep 390\n",
      "instance 0, ep 391\n",
      "instance 0, ep 392\n",
      "instance 0, ep 393\n",
      "instance 0, ep 394\n",
      "instance 0, ep 395\n",
      "instance 0, ep 396\n",
      "instance 0, ep 397\n",
      "instance 0, ep 398\n",
      "instance 0, ep 399\n",
      "instance 0, ep 400\n",
      "instance 0, ep 401\n",
      "instance 0, ep 402\n",
      "instance 0, ep 403\n",
      "instance 0, ep 404\n",
      "instance 0, ep 405\n",
      "instance 0, ep 406\n",
      "instance 0, ep 407\n",
      "instance 0, ep 408\n",
      "instance 0, ep 409\n",
      "instance 0, ep 410\n",
      "instance 0, ep 411\n",
      "instance 0, ep XXXX-5\n",
      "instance 0, ep 413\n",
      "instance 0, ep 414\n",
      "instance 0, ep 415\n",
      "instance 0, ep 416\n",
      "instance 0, ep 417\n",
      "instance 0, ep 418\n",
      "instance 0, ep 419\n",
      "instance 0, ep 420\n",
      "instance 0, ep 421\n",
      "instance 0, ep 422\n",
      "instance 0, ep 423\n",
      "instance 0, ep 424\n",
      "instance 0, ep 425\n",
      "instance 0, ep 426\n",
      "instance 0, ep 427\n",
      "instance 0, ep 428\n",
      "instance 0, ep 429\n",
      "instance 0, ep 430\n",
      "instance 0, ep 431\n",
      "instance 0, ep 432\n",
      "instance 0, ep 433\n",
      "instance 0, ep 434\n",
      "instance 0, ep 435\n",
      "instance 0, ep 436\n",
      "instance 0, ep 437\n",
      "instance 0, ep 438\n",
      "instance 0, ep 439\n",
      "instance 0, ep 440\n",
      "instance 0, ep 441\n",
      "instance 0, ep 442\n",
      "instance 0, ep 443\n",
      "instance 0, ep 444\n",
      "instance 0, ep 445\n",
      "instance 0, ep 446\n",
      "instance 0, ep 447\n",
      "instance 0, ep 448\n",
      "instance 0, ep 449\n",
      "instance 0, ep 450\n",
      "instance 0, ep 451\n",
      "instance 0, ep 452\n",
      "instance 0, ep 453\n",
      "instance 0, ep 454\n",
      "instance 0, ep 455\n",
      "instance 0, ep 456\n",
      "instance 0, ep 457\n",
      "instance 0, ep 458\n",
      "instance 0, ep 459\n",
      "instance 0, ep 460\n",
      "instance 0, ep 461\n",
      "instance 0, ep 462\n",
      "instance 0, ep 463\n",
      "instance 0, ep 464\n",
      "instance 0, ep 465\n",
      "instance 0, ep 466\n",
      "instance 0, ep 467\n",
      "instance 0, ep 468\n",
      "instance 0, ep 469\n",
      "instance 0, ep 470\n",
      "instance 0, ep 471\n",
      "instance 0, ep 472\n",
      "instance 0, ep 473\n",
      "instance 0, ep 474\n",
      "instance 0, ep 475\n",
      "instance 0, ep 476\n",
      "instance 0, ep 477\n",
      "instance 0, ep 478\n",
      "instance 0, ep 479\n",
      "instance 0, ep 480\n",
      "instance 0, ep 481\n",
      "instance 0, ep 482\n",
      "instance 0, ep 483\n",
      "instance 0, ep 484\n",
      "instance 0, ep 485\n",
      "instance 0, ep 486\n",
      "instance 0, ep 487\n",
      "instance 0, ep 488\n",
      "instance 0, ep 489\n",
      "instance 0, ep 490\n",
      "instance 0, ep 491\n",
      "instance 0, ep 492\n",
      "instance 0, ep 493\n",
      "instance 0, ep 494\n",
      "instance 0, ep 495\n",
      "instance 0, ep 496\n",
      "instance 0, ep 497\n",
      "instance 0, ep 498\n",
      "instance 0, ep 499\n",
      "instance 0, ep 500\n",
      "instance 0, ep 501\n",
      "instance 0, ep 502\n",
      "instance 0, ep 503\n",
      "instance 0, ep 504\n",
      "instance 0, ep 505\n",
      "instance 0, ep 506\n",
      "instance 0, ep 507\n",
      "instance 0, ep 508\n",
      "instance 0, ep 509\n",
      "instance 0, ep 510\n",
      "instance 0, ep 511\n",
      "instance 0, ep 512\n",
      "instance 0, ep 513\n",
      "instance 0, ep 514\n",
      "instance 0, ep 515\n",
      "instance 0, ep 516\n",
      "instance 0, ep 517\n",
      "instance 0, ep 518\n",
      "instance 0, ep 519\n",
      "instance 0, ep 520\n",
      "instance 0, ep 521\n",
      "instance 0, ep 522\n",
      "instance 0, ep 523\n",
      "instance 0, ep 524\n",
      "Took 0.9451184272766113 time for inference and 18.867584705352783 time for training\n",
      "46.28202089244902\n"
     ]
    }
   ],
   "source": [
    "policy = greedy_policy\n",
    "name = \"greedy\"\n",
    "\n",
    "rewards, memory, simulator = run_multi_seed(seed_list,policy)\n",
    "results['{}_reward'.format(name)] = rewards['reward']\n",
    "results['{}_match'.format(name)] =  rewards['match'] \n",
    "results['{}_active'.format(name)] = rewards['active_rate']\n",
    "results['{}_time'.format(name)] =  rewards['time']\n",
    "print(np.mean(rewards['reward']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "acting should always be good! 0.000 < 0.044\n",
      "acting should always be good! 0.000 < 0.162\n",
      "acting should always be good! 0.108 < 0.183\n",
      "good start state should always be good! 0.380 < 0.508\n",
      "good start state should always be good! 0.506 < 0.760\n",
      "cohort [18 62 57 95 66 52 31 69 30 99]\n",
      "instance 0, ep 1\n",
      "instance 0, ep 2\n",
      "instance 0, ep 3\n",
      "instance 0, ep 4\n",
      "instance 0, ep 5\n",
      "instance 0, ep 6\n",
      "instance 0, ep 7\n",
      "instance 0, ep 8\n",
      "instance 0, ep 9\n",
      "instance 0, ep 10\n",
      "instance 0, ep 11\n",
      "instance 0, ep 12\n",
      "instance 0, ep 13\n",
      "instance 0, ep 14\n",
      "instance 0, ep 15\n",
      "instance 0, ep 16\n",
      "instance 0, ep 17\n",
      "instance 0, ep 18\n",
      "instance 0, ep 19\n",
      "instance 0, ep 20\n",
      "instance 0, ep 21\n",
      "instance 0, ep 22\n",
      "instance 0, ep 23\n",
      "instance 0, ep 24\n",
      "instance 0, ep 25\n",
      "instance 0, ep 26\n",
      "instance 0, ep 27\n",
      "instance 0, ep 28\n",
      "instance 0, ep 29\n",
      "instance 0, ep 30\n",
      "instance 0, ep 31\n",
      "instance 0, ep 32\n",
      "instance 0, ep 33\n",
      "instance 0, ep 34\n",
      "instance 0, ep 35\n",
      "instance 0, ep 36\n",
      "instance 0, ep 37\n",
      "instance 0, ep 38\n",
      "instance 0, ep 39\n",
      "instance 0, ep 40\n",
      "instance 0, ep 41\n",
      "instance 0, ep 42\n",
      "instance 0, ep 43\n",
      "instance 0, ep 44\n",
      "instance 0, ep 45\n",
      "instance 0, ep 46\n",
      "instance 0, ep 47\n",
      "instance 0, ep 48\n",
      "instance 0, ep 49\n",
      "instance 0, ep 50\n",
      "instance 0, ep 51\n",
      "instance 0, ep 52\n",
      "instance 0, ep 53\n",
      "instance 0, ep 54\n",
      "instance 0, ep 55\n",
      "instance 0, ep 56\n",
      "instance 0, ep 57\n",
      "instance 0, ep 58\n",
      "instance 0, ep 59\n",
      "instance 0, ep 60\n",
      "instance 0, ep 61\n",
      "instance 0, ep 62\n",
      "instance 0, ep 63\n",
      "instance 0, ep 64\n",
      "instance 0, ep 65\n",
      "instance 0, ep 66\n",
      "instance 0, ep 67\n",
      "instance 0, ep 68\n",
      "instance 0, ep 69\n",
      "instance 0, ep 70\n",
      "instance 0, ep 71\n",
      "instance 0, ep 72\n",
      "instance 0, ep 73\n",
      "instance 0, ep 74\n",
      "instance 0, ep 75\n",
      "instance 0, ep 76\n",
      "instance 0, ep 77\n",
      "instance 0, ep 78\n",
      "instance 0, ep 79\n",
      "instance 0, ep 80\n",
      "instance 0, ep 81\n",
      "instance 0, ep 82\n",
      "instance 0, ep 83\n",
      "instance 0, ep 84\n",
      "instance 0, ep 85\n",
      "instance 0, ep 86\n",
      "instance 0, ep 87\n",
      "instance 0, ep 88\n",
      "instance 0, ep 89\n",
      "instance 0, ep 90\n",
      "instance 0, ep 91\n",
      "instance 0, ep 92\n",
      "instance 0, ep 93\n",
      "instance 0, ep 94\n",
      "instance 0, ep 95\n",
      "instance 0, ep 96\n",
      "instance 0, ep 97\n",
      "instance 0, ep 98\n",
      "instance 0, ep 99\n",
      "Took 0.75174880027771 time for inference and 2.1559197902679443 time for training\n",
      "30.36886936877762\n"
     ]
    }
   ],
   "source": [
    "policy = random_policy\n",
    "name = \"random\"\n",
    "\n",
    "rewards, memory, simulator = run_multi_seed(seed_list,policy)\n",
    "results['{}_reward'.format(name)] = rewards['reward']\n",
    "results['{}_match'.format(name)] =  rewards['match'] \n",
    "results['{}_active'.format(name)] = rewards['active_rate']\n",
    "results['{}_time'.format(name)] =  rewards['time']\n",
    "print(np.mean(rewards['reward']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "acting should always be good! 0.000 < 0.044\n",
      "acting should always be good! 0.000 < 0.162\n",
      "acting should always be good! 0.108 < 0.183\n",
      "good start state should always be good! 0.380 < 0.508\n",
      "good start state should always be good! 0.506 < 0.760\n",
      "cohort [18 62 57 95 66 52 31 69 30 99]\n",
      "instance 0, ep 1\n",
      "instance 0, ep 2\n",
      "instance 0, ep 3\n",
      "instance 0, ep 4\n",
      "instance 0, ep 5\n",
      "instance 0, ep 6\n",
      "instance 0, ep 7\n",
      "instance 0, ep 8\n",
      "instance 0, ep 9\n",
      "instance 0, ep 10\n",
      "instance 0, ep 11\n",
      "instance 0, ep 12\n",
      "instance 0, ep 13\n",
      "instance 0, ep 14\n",
      "instance 0, ep 15\n",
      "instance 0, ep 16\n",
      "instance 0, ep 17\n",
      "instance 0, ep 18\n",
      "instance 0, ep 19\n",
      "instance 0, ep 20\n",
      "instance 0, ep 21\n",
      "instance 0, ep 22\n",
      "instance 0, ep 23\n",
      "instance 0, ep 24\n",
      "instance 0, ep 25\n",
      "instance 0, ep 26\n",
      "instance 0, ep 27\n",
      "instance 0, ep 28\n",
      "instance 0, ep 29\n",
      "instance 0, ep 30\n",
      "instance 0, ep 31\n",
      "instance 0, ep 32\n",
      "instance 0, ep 33\n",
      "instance 0, ep 34\n",
      "instance 0, ep 35\n",
      "instance 0, ep 36\n",
      "instance 0, ep 37\n",
      "instance 0, ep 38\n",
      "instance 0, ep 39\n",
      "instance 0, ep 40\n",
      "instance 0, ep 41\n",
      "instance 0, ep 42\n",
      "instance 0, ep 43\n",
      "instance 0, ep 44\n",
      "instance 0, ep 45\n",
      "instance 0, ep 46\n",
      "instance 0, ep 47\n",
      "instance 0, ep 48\n",
      "instance 0, ep 49\n",
      "instance 0, ep 50\n",
      "instance 0, ep 51\n",
      "instance 0, ep 52\n",
      "instance 0, ep 53\n",
      "instance 0, ep 54\n",
      "instance 0, ep 55\n",
      "instance 0, ep 56\n",
      "instance 0, ep 57\n",
      "instance 0, ep 58\n",
      "instance 0, ep 59\n",
      "instance 0, ep 60\n",
      "instance 0, ep 61\n",
      "instance 0, ep 62\n",
      "instance 0, ep 63\n",
      "instance 0, ep 64\n",
      "instance 0, ep 65\n",
      "instance 0, ep 66\n",
      "instance 0, ep 67\n",
      "instance 0, ep 68\n",
      "instance 0, ep 69\n",
      "instance 0, ep 70\n",
      "instance 0, ep 71\n",
      "instance 0, ep 72\n",
      "instance 0, ep 73\n",
      "instance 0, ep 74\n",
      "instance 0, ep 75\n",
      "instance 0, ep 76\n",
      "instance 0, ep 77\n",
      "instance 0, ep 78\n",
      "instance 0, ep 79\n",
      "instance 0, ep 80\n",
      "instance 0, ep 81\n",
      "instance 0, ep 82\n",
      "instance 0, ep 83\n",
      "instance 0, ep 84\n",
      "instance 0, ep 85\n",
      "instance 0, ep 86\n",
      "instance 0, ep 87\n",
      "instance 0, ep 88\n",
      "instance 0, ep 89\n",
      "instance 0, ep 90\n",
      "instance 0, ep 91\n",
      "instance 0, ep 92\n",
      "instance 0, ep 93\n",
      "instance 0, ep 94\n",
      "instance 0, ep 95\n",
      "instance 0, ep 96\n",
      "instance 0, ep 97\n",
      "instance 0, ep 98\n",
      "instance 0, ep 99\n",
      "Took 0.9990634918212891 time for inference and 3.6791768074035645 time for training\n",
      "0.756846030966729\n"
     ]
    }
   ],
   "source": [
    "policy = whittle_activity_policy\n",
    "name = \"engagement_whittle\"\n",
    "\n",
    "rewards, memory, simulator = run_multi_seed(seed_list,policy)\n",
    "results['{}_reward'.format(name)] = rewards['reward']\n",
    "results['{}_match'.format(name)] =  rewards['match'] \n",
    "results['{}_active'.format(name)] = rewards['active_rate']\n",
    "results['{}_time'.format(name)] =  rewards['time']\n",
    "print(np.mean(rewards['reward']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "acting should always be good! 0.108 < 0.183\n",
      "good start state should always be good! 0.380 < 0.508\n",
      "good start state should always be good! 0.506 < 0.760\n",
      "cohort [18 62 57 95 66 52 31 69 30 99]\n",
      "instance 0, ep 1\n",
      "instance 0, ep 2\n",
      "instance 0, ep 3\n",
      "instance 0, ep 4\n",
      "instance 0, ep 5\n",
      "instance 0, ep 6\n",
      "instance 0, ep 7\n",
      "instance 0, ep 8\n",
      "instance 0, ep 9\n",
      "instance 0, ep 10\n",
      "instance 0, ep 11\n",
      "instance 0, ep 12\n",
      "instance 0, ep 13\n",
      "instance 0, ep 14\n",
      "instance 0, ep 15\n",
      "instance 0, ep 16\n",
      "instance 0, ep 17\n",
      "instance 0, ep 18\n",
      "instance 0, ep 19\n",
      "instance 0, ep 20\n",
      "instance 0, ep 21\n",
      "instance 0, ep 22\n",
      "instance 0, ep 23\n",
      "instance 0, ep 24\n",
      "instance 0, ep 25\n",
      "instance 0, ep 26\n",
      "instance 0, ep 27\n",
      "instance 0, ep 28\n",
      "instance 0, ep 29\n",
      "instance 0, ep 30\n",
      "instance 0, ep 31\n",
      "instance 0, ep 32\n",
      "instance 0, ep 33\n",
      "instance 0, ep 34\n",
      "instance 0, ep 35\n",
      "instance 0, ep 36\n",
      "instance 0, ep 37\n",
      "instance 0, ep 38\n",
      "instance 0, ep 39\n",
      "instance 0, ep 40\n",
      "instance 0, ep 41\n",
      "instance 0, ep 42\n",
      "instance 0, ep 43\n",
      "instance 0, ep 44\n",
      "instance 0, ep 45\n",
      "instance 0, ep 46\n",
      "instance 0, ep 47\n",
      "instance 0, ep 48\n",
      "instance 0, ep 49\n",
      "instance 0, ep 50\n",
      "instance 0, ep 51\n",
      "instance 0, ep 52\n",
      "instance 0, ep 53\n",
      "instance 0, ep 54\n",
      "instance 0, ep 55\n",
      "instance 0, ep 56\n",
      "instance 0, ep 57\n",
      "instance 0, ep 58\n",
      "instance 0, ep 59\n",
      "instance 0, ep 60\n",
      "instance 0, ep 61\n",
      "instance 0, ep 62\n",
      "instance 0, ep 63\n",
      "instance 0, ep 64\n",
      "instance 0, ep 65\n",
      "instance 0, ep 66\n",
      "instance 0, ep 67\n",
      "instance 0, ep 68\n",
      "instance 0, ep 69\n",
      "instance 0, ep 70\n",
      "instance 0, ep 71\n",
      "instance 0, ep 72\n",
      "instance 0, ep 73\n",
      "instance 0, ep 74\n",
      "instance 0, ep 75\n",
      "instance 0, ep 76\n",
      "instance 0, ep 77\n",
      "instance 0, ep 78\n",
      "instance 0, ep 79\n",
      "instance 0, ep 80\n",
      "instance 0, ep 81\n",
      "instance 0, ep 82\n",
      "instance 0, ep 83\n",
      "instance 0, ep 84\n",
      "instance 0, ep 85\n",
      "instance 0, ep 86\n",
      "instance 0, ep 87\n",
      "instance 0, ep 88\n",
      "instance 0, ep 89\n",
      "instance 0, ep 90\n",
      "instance 0, ep 91\n",
      "instance 0, ep 92\n",
      "instance 0, ep 93\n",
      "instance 0, ep 94\n",
      "instance 0, ep 95\n",
      "instance 0, ep 96\n",
      "instance 0, ep 97\n",
      "instance 0, ep 98\n",
      "instance 0, ep 99\n",
      "instance 0, ep 100\n",
      "instance 0, ep 101\n",
      "instance 0, ep 102\n",
      "instance 0, ep 103\n",
      "instance 0, ep 104\n",
      "instance 0, ep 105\n",
      "instance 0, ep 106\n",
      "instance 0, ep 107\n",
      "instance 0, ep 108\n",
      "instance 0, ep 109\n",
      "instance 0, ep 110\n",
      "instance 0, ep 111\n",
      "instance 0, ep 112\n",
      "instance 0, ep 113\n",
      "instance 0, ep 114\n",
      "instance 0, ep 115\n",
      "instance 0, ep 116\n",
      "instance 0, ep 117\n",
      "instance 0, ep 118\n",
      "instance 0, ep 119\n",
      "instance 0, ep 120\n",
      "instance 0, ep 121\n",
      "instance 0, ep 122\n",
      "instance 0, ep 123\n",
      "instance 0, ep 124\n",
      "instance 0, ep 125\n",
      "instance 0, ep 126\n",
      "instance 0, ep 127\n",
      "instance 0, ep 128\n",
      "instance 0, ep 129\n",
      "instance 0, ep 130\n",
      "instance 0, ep 131\n",
      "instance 0, ep 132\n",
      "instance 0, ep 133\n",
      "instance 0, ep 134\n",
      "instance 0, ep 135\n",
      "instance 0, ep 136\n",
      "instance 0, ep 137\n",
      "instance 0, ep 138\n",
      "instance 0, ep 139\n",
      "instance 0, ep 140\n",
      "instance 0, ep 141\n",
      "instance 0, ep 142\n",
      "instance 0, ep 143\n",
      "instance 0, ep 144\n",
      "instance 0, ep 145\n",
      "instance 0, ep 146\n",
      "instance 0, ep 147\n",
      "instance 0, ep 148\n",
      "instance 0, ep 149\n",
      "instance 0, ep 150\n",
      "instance 0, ep 151\n",
      "instance 0, ep 152\n",
      "instance 0, ep 153\n",
      "instance 0, ep 154\n",
      "instance 0, ep 155\n",
      "instance 0, ep 156\n",
      "instance 0, ep 157\n",
      "instance 0, ep 158\n",
      "instance 0, ep 159\n",
      "instance 0, ep 160\n",
      "instance 0, ep 161\n",
      "instance 0, ep 162\n",
      "instance 0, ep 163\n",
      "instance 0, ep 164\n",
      "instance 0, ep 165\n",
      "instance 0, ep 166\n",
      "instance 0, ep 167\n",
      "instance 0, ep 168\n",
      "instance 0, ep 169\n",
      "instance 0, ep 170\n",
      "instance 0, ep 171\n",
      "instance 0, ep 172\n",
      "instance 0, ep 173\n",
      "instance 0, ep 174\n",
      "instance 0, ep 175\n",
      "instance 0, ep 176\n",
      "instance 0, ep 177\n",
      "instance 0, ep 178\n",
      "instance 0, ep 179\n",
      "instance 0, ep 180\n",
      "instance 0, ep 181\n",
      "instance 0, ep 182\n",
      "instance 0, ep 183\n",
      "instance 0, ep 184\n",
      "instance 0, ep 185\n",
      "instance 0, ep 186\n",
      "instance 0, ep 187\n",
      "instance 0, ep 188\n",
      "instance 0, ep 189\n",
      "instance 0, ep 190\n",
      "instance 0, ep 191\n",
      "instance 0, ep 192\n",
      "instance 0, ep 193\n",
      "instance 0, ep 194\n",
      "instance 0, ep 195\n",
      "instance 0, ep 196\n",
      "instance 0, ep 197\n",
      "instance 0, ep 198\n",
      "instance 0, ep 199\n",
      "instance 0, ep 200\n",
      "instance 0, ep 201\n",
      "instance 0, ep 202\n",
      "instance 0, ep 203\n",
      "instance 0, ep 204\n",
      "instance 0, ep 205\n",
      "instance 0, ep 206\n",
      "instance 0, ep 207\n",
      "instance 0, ep 208\n",
      "instance 0, ep 209\n",
      "instance 0, ep 210\n",
      "instance 0, ep 211\n",
      "instance 0, ep 212\n",
      "instance 0, ep 213\n",
      "instance 0, ep 214\n",
      "instance 0, ep 215\n",
      "instance 0, ep 216\n",
      "instance 0, ep 217\n",
      "instance 0, ep 218\n",
      "instance 0, ep 219\n",
      "instance 0, ep 220\n",
      "instance 0, ep 221\n",
      "instance 0, ep 222\n",
      "instance 0, ep 223\n",
      "instance 0, ep 224\n",
      "instance 0, ep 225\n",
      "instance 0, ep 226\n",
      "instance 0, ep 227\n",
      "instance 0, ep 228\n",
      "instance 0, ep 229\n",
      "instance 0, ep 230\n",
      "instance 0, ep 231\n",
      "instance 0, ep 232\n",
      "instance 0, ep 233\n",
      "instance 0, ep 234\n",
      "instance 0, ep 235\n",
      "instance 0, ep 236\n",
      "instance 0, ep 237\n",
      "instance 0, ep 238\n",
      "instance 0, ep 239\n",
      "instance 0, ep 240\n",
      "instance 0, ep 241\n",
      "instance 0, ep 242\n",
      "instance 0, ep 243\n",
      "instance 0, ep 244\n",
      "instance 0, ep 245\n",
      "instance 0, ep 246\n",
      "instance 0, ep 247\n",
      "instance 0, ep 248\n",
      "instance 0, ep 249\n",
      "instance 0, ep 250\n",
      "instance 0, ep 251\n",
      "instance 0, ep 252\n",
      "instance 0, ep 253\n",
      "instance 0, ep 254\n",
      "instance 0, ep 255\n",
      "instance 0, ep 256\n",
      "instance 0, ep 257\n",
      "instance 0, ep 258\n",
      "instance 0, ep 259\n",
      "instance 0, ep 260\n",
      "instance 0, ep 261\n",
      "instance 0, ep 262\n",
      "instance 0, ep 263\n",
      "instance 0, ep 264\n",
      "instance 0, ep 265\n",
      "instance 0, ep 266\n",
      "instance 0, ep 267\n",
      "instance 0, ep 268\n",
      "instance 0, ep 269\n",
      "instance 0, ep 270\n",
      "instance 0, ep 271\n",
      "instance 0, ep 272\n",
      "instance 0, ep 273\n",
      "instance 0, ep 274\n",
      "instance 0, ep 275\n",
      "instance 0, ep 276\n",
      "instance 0, ep 277\n",
      "instance 0, ep 278\n",
      "instance 0, ep 279\n",
      "instance 0, ep 280\n",
      "instance 0, ep 281\n",
      "instance 0, ep 282\n",
      "instance 0, ep 283\n",
      "instance 0, ep 284\n",
      "instance 0, ep 285\n",
      "instance 0, ep 286\n",
      "instance 0, ep 287\n",
      "instance 0, ep 288\n",
      "instance 0, ep 289\n",
      "instance 0, ep 290\n",
      "instance 0, ep 291\n",
      "instance 0, ep 292\n",
      "instance 0, ep 293\n",
      "instance 0, ep 294\n",
      "instance 0, ep 295\n",
      "instance 0, ep 296\n",
      "instance 0, ep 297\n",
      "instance 0, ep 298\n",
      "instance 0, ep 299\n",
      "instance 0, ep 300\n",
      "instance 0, ep 301\n",
      "instance 0, ep 302\n",
      "instance 0, ep 303\n",
      "instance 0, ep 304\n",
      "instance 0, ep 305\n",
      "instance 0, ep 306\n",
      "instance 0, ep 307\n",
      "instance 0, ep 308\n",
      "instance 0, ep 309\n",
      "instance 0, ep 310\n",
      "instance 0, ep 311\n",
      "instance 0, ep 312\n",
      "instance 0, ep 313\n",
      "instance 0, ep 314\n",
      "instance 0, ep 315\n",
      "instance 0, ep 316\n",
      "instance 0, ep 317\n",
      "instance 0, ep 318\n",
      "instance 0, ep 319\n",
      "instance 0, ep 320\n",
      "instance 0, ep 321\n",
      "instance 0, ep 322\n",
      "instance 0, ep 323\n",
      "instance 0, ep 324\n",
      "instance 0, ep 325\n",
      "instance 0, ep 326\n",
      "instance 0, ep 327\n",
      "instance 0, ep 328\n",
      "instance 0, ep 329\n",
      "instance 0, ep 330\n",
      "instance 0, ep 331\n",
      "instance 0, ep 332\n",
      "instance 0, ep 333\n",
      "instance 0, ep 334\n",
      "instance 0, ep 335\n",
      "instance 0, ep 336\n",
      "instance 0, ep 337\n",
      "instance 0, ep 338\n",
      "instance 0, ep 339\n",
      "instance 0, ep 340\n",
      "instance 0, ep 341\n",
      "instance 0, ep 342\n",
      "instance 0, ep 343\n",
      "instance 0, ep 344\n",
      "instance 0, ep 345\n",
      "instance 0, ep 346\n",
      "instance 0, ep 347\n",
      "instance 0, ep 348\n",
      "instance 0, ep 349\n",
      "instance 0, ep 350\n",
      "instance 0, ep 351\n",
      "instance 0, ep 352\n",
      "instance 0, ep 353\n",
      "instance 0, ep 354\n",
      "instance 0, ep 355\n",
      "instance 0, ep 356\n",
      "instance 0, ep 357\n",
      "instance 0, ep 358\n",
      "instance 0, ep 359\n",
      "instance 0, ep 360\n",
      "instance 0, ep 361\n",
      "instance 0, ep 362\n",
      "instance 0, ep 363\n",
      "instance 0, ep 364\n",
      "instance 0, ep 365\n",
      "instance 0, ep 366\n",
      "instance 0, ep 367\n",
      "instance 0, ep 368\n",
      "instance 0, ep 369\n",
      "instance 0, ep 370\n",
      "instance 0, ep 371\n",
      "instance 0, ep 372\n",
      "instance 0, ep 373\n",
      "instance 0, ep 374\n",
      "instance 0, ep 375\n",
      "instance 0, ep 376\n",
      "instance 0, ep 377\n",
      "instance 0, ep 378\n",
      "instance 0, ep 379\n",
      "instance 0, ep 380\n",
      "instance 0, ep 381\n",
      "instance 0, ep 382\n",
      "instance 0, ep 383\n",
      "instance 0, ep 384\n",
      "instance 0, ep 385\n",
      "instance 0, ep 386\n",
      "instance 0, ep 387\n",
      "instance 0, ep 388\n",
      "instance 0, ep 389\n",
      "instance 0, ep 390\n",
      "instance 0, ep 391\n",
      "instance 0, ep 392\n",
      "instance 0, ep 393\n",
      "instance 0, ep 394\n",
      "instance 0, ep 395\n",
      "instance 0, ep 396\n",
      "instance 0, ep 397\n",
      "instance 0, ep 398\n",
      "instance 0, ep 399\n",
      "instance 0, ep 400\n",
      "instance 0, ep 401\n",
      "instance 0, ep 402\n",
      "instance 0, ep 403\n",
      "instance 0, ep 404\n",
      "instance 0, ep 405\n",
      "instance 0, ep 406\n",
      "instance 0, ep 407\n",
      "instance 0, ep 408\n",
      "instance 0, ep 409\n",
      "instance 0, ep 410\n",
      "instance 0, ep 411\n",
      "instance 0, ep XXXX-5\n",
      "instance 0, ep 413\n",
      "instance 0, ep 414\n",
      "instance 0, ep 415\n",
      "instance 0, ep 416\n",
      "instance 0, ep 417\n",
      "instance 0, ep 418\n",
      "instance 0, ep 419\n",
      "instance 0, ep 420\n",
      "instance 0, ep 421\n",
      "instance 0, ep 422\n",
      "instance 0, ep 423\n",
      "instance 0, ep 424\n",
      "instance 0, ep 425\n",
      "instance 0, ep 426\n",
      "instance 0, ep 427\n",
      "instance 0, ep 428\n",
      "instance 0, ep 429\n",
      "instance 0, ep 430\n",
      "instance 0, ep 431\n",
      "instance 0, ep 432\n",
      "instance 0, ep 433\n",
      "instance 0, ep 434\n",
      "instance 0, ep 435\n",
      "instance 0, ep 436\n",
      "instance 0, ep 437\n",
      "instance 0, ep 438\n",
      "instance 0, ep 439\n",
      "instance 0, ep 440\n",
      "instance 0, ep 441\n",
      "instance 0, ep 442\n",
      "instance 0, ep 443\n",
      "instance 0, ep 444\n",
      "instance 0, ep 445\n",
      "instance 0, ep 446\n",
      "instance 0, ep 447\n",
      "instance 0, ep 448\n",
      "instance 0, ep 449\n",
      "instance 0, ep 450\n",
      "instance 0, ep 451\n",
      "instance 0, ep 452\n",
      "instance 0, ep 453\n",
      "instance 0, ep 454\n",
      "instance 0, ep 455\n",
      "instance 0, ep 456\n",
      "instance 0, ep 457\n",
      "instance 0, ep 458\n",
      "instance 0, ep 459\n",
      "instance 0, ep 460\n",
      "instance 0, ep 461\n",
      "instance 0, ep 462\n",
      "instance 0, ep 463\n",
      "instance 0, ep 464\n",
      "instance 0, ep 465\n",
      "instance 0, ep 466\n",
      "instance 0, ep 467\n",
      "instance 0, ep 468\n",
      "instance 0, ep 469\n",
      "instance 0, ep 470\n",
      "instance 0, ep 471\n",
      "instance 0, ep 472\n",
      "instance 0, ep 473\n",
      "instance 0, ep 474\n",
      "instance 0, ep 475\n",
      "instance 0, ep 476\n",
      "instance 0, ep 477\n",
      "instance 0, ep 478\n",
      "instance 0, ep 479\n",
      "instance 0, ep 480\n",
      "instance 0, ep 481\n",
      "instance 0, ep 482\n",
      "instance 0, ep 483\n",
      "instance 0, ep 484\n",
      "instance 0, ep 485\n",
      "instance 0, ep 486\n",
      "instance 0, ep 487\n",
      "instance 0, ep 488\n",
      "instance 0, ep 489\n",
      "instance 0, ep 490\n",
      "instance 0, ep 491\n",
      "instance 0, ep 492\n",
      "instance 0, ep 493\n",
      "instance 0, ep 494\n",
      "instance 0, ep 495\n",
      "instance 0, ep 496\n",
      "instance 0, ep 497\n",
      "instance 0, ep 498\n",
      "instance 0, ep 499\n",
      "instance 0, ep 500\n",
      "instance 0, ep 501\n",
      "instance 0, ep 502\n",
      "instance 0, ep 503\n",
      "instance 0, ep 504\n",
      "instance 0, ep 505\n",
      "instance 0, ep 506\n",
      "instance 0, ep 507\n",
      "instance 0, ep 508\n",
      "instance 0, ep 509\n",
      "instance 0, ep 510\n",
      "instance 0, ep 511\n",
      "instance 0, ep 512\n",
      "instance 0, ep 513\n",
      "instance 0, ep 514\n",
      "instance 0, ep 515\n",
      "instance 0, ep 516\n",
      "instance 0, ep 517\n",
      "instance 0, ep 518\n",
      "instance 0, ep 519\n",
      "instance 0, ep 520\n",
      "instance 0, ep 521\n",
      "instance 0, ep 522\n",
      "instance 0, ep 523\n",
      "instance 0, ep 524\n",
      "Took 1.112030029296875 time for inference and 30.55455207824707 time for training\n",
      "37.75217457430879\n"
     ]
    }
   ],
   "source": [
    "policy = whittle_policy\n",
    "name = \"linear_whittle\"\n",
    "\n",
    "rewards, memory, simulator = run_multi_seed(seed_list,policy)\n",
    "results['{}_reward'.format(name)] = rewards['reward']\n",
    "results['{}_match'.format(name)] =  rewards['match'] \n",
    "results['{}_active'.format(name)] = rewards['active_rate']\n",
    "results['{}_time'.format(name)] =  rewards['time']\n",
    "print(np.mean(rewards['reward']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "acting should always be good! 0.108 < 0.183\n",
      "good start state should always be good! 0.380 < 0.508\n",
      "good start state should always be good! 0.506 < 0.760\n",
      "cohort [18 62 57 95 66 52 31 69 30 99]\n",
      "instance 0, ep 1\n",
      "instance 0, ep 2\n",
      "instance 0, ep 3\n",
      "instance 0, ep 4\n",
      "instance 0, ep 5\n",
      "instance 0, ep 6\n",
      "instance 0, ep 7\n",
      "instance 0, ep 8\n",
      "instance 0, ep 9\n",
      "instance 0, ep 10\n",
      "instance 0, ep 11\n",
      "instance 0, ep 12\n",
      "instance 0, ep 13\n",
      "instance 0, ep 14\n",
      "instance 0, ep 15\n",
      "instance 0, ep 16\n",
      "instance 0, ep 17\n",
      "instance 0, ep 18\n",
      "instance 0, ep 19\n",
      "instance 0, ep 20\n",
      "instance 0, ep 21\n",
      "instance 0, ep 22\n",
      "instance 0, ep 23\n",
      "instance 0, ep 24\n",
      "instance 0, ep 25\n",
      "instance 0, ep 26\n",
      "instance 0, ep 27\n",
      "instance 0, ep 28\n",
      "instance 0, ep 29\n",
      "instance 0, ep 30\n",
      "instance 0, ep 31\n",
      "instance 0, ep 32\n",
      "instance 0, ep 33\n",
      "instance 0, ep 34\n",
      "instance 0, ep 35\n",
      "instance 0, ep 36\n",
      "instance 0, ep 37\n",
      "instance 0, ep 38\n",
      "instance 0, ep 39\n",
      "instance 0, ep 40\n",
      "instance 0, ep 41\n",
      "instance 0, ep 42\n",
      "instance 0, ep 43\n",
      "instance 0, ep 44\n",
      "instance 0, ep 45\n",
      "instance 0, ep 46\n",
      "instance 0, ep 47\n",
      "instance 0, ep 48\n",
      "instance 0, ep 49\n",
      "instance 0, ep 50\n",
      "instance 0, ep 51\n",
      "instance 0, ep 52\n",
      "instance 0, ep 53\n",
      "instance 0, ep 54\n",
      "instance 0, ep 55\n",
      "instance 0, ep 56\n",
      "instance 0, ep 57\n",
      "instance 0, ep 58\n",
      "instance 0, ep 59\n",
      "instance 0, ep 60\n",
      "instance 0, ep 61\n",
      "instance 0, ep 62\n",
      "instance 0, ep 63\n",
      "instance 0, ep 64\n",
      "instance 0, ep 65\n",
      "instance 0, ep 66\n",
      "instance 0, ep 67\n",
      "instance 0, ep 68\n",
      "instance 0, ep 69\n",
      "instance 0, ep 70\n",
      "instance 0, ep 71\n",
      "instance 0, ep 72\n",
      "instance 0, ep 73\n",
      "instance 0, ep 74\n",
      "instance 0, ep 75\n",
      "instance 0, ep 76\n",
      "instance 0, ep 77\n",
      "instance 0, ep 78\n",
      "instance 0, ep 79\n",
      "instance 0, ep 80\n",
      "instance 0, ep 81\n",
      "instance 0, ep 82\n",
      "instance 0, ep 83\n",
      "instance 0, ep 84\n",
      "instance 0, ep 85\n",
      "instance 0, ep 86\n",
      "instance 0, ep 87\n",
      "instance 0, ep 88\n",
      "instance 0, ep 89\n",
      "instance 0, ep 90\n",
      "instance 0, ep 91\n",
      "instance 0, ep 92\n",
      "instance 0, ep 93\n",
      "instance 0, ep 94\n",
      "instance 0, ep 95\n",
      "instance 0, ep 96\n",
      "instance 0, ep 97\n",
      "instance 0, ep 98\n",
      "instance 0, ep 99\n",
      "instance 0, ep 100\n",
      "instance 0, ep 101\n",
      "instance 0, ep 102\n",
      "instance 0, ep 103\n",
      "instance 0, ep 104\n",
      "instance 0, ep 105\n",
      "instance 0, ep 106\n",
      "instance 0, ep 107\n",
      "instance 0, ep 108\n",
      "instance 0, ep 109\n",
      "instance 0, ep 110\n",
      "instance 0, ep 111\n",
      "instance 0, ep 112\n",
      "instance 0, ep 113\n",
      "instance 0, ep 114\n",
      "instance 0, ep 115\n",
      "instance 0, ep 116\n",
      "instance 0, ep 117\n",
      "instance 0, ep 118\n",
      "instance 0, ep 119\n",
      "instance 0, ep 120\n",
      "instance 0, ep 121\n",
      "instance 0, ep 122\n",
      "instance 0, ep 123\n",
      "instance 0, ep 124\n",
      "instance 0, ep 125\n",
      "instance 0, ep 126\n",
      "instance 0, ep 127\n",
      "instance 0, ep 128\n",
      "instance 0, ep 129\n",
      "instance 0, ep 130\n",
      "instance 0, ep 131\n",
      "instance 0, ep 132\n",
      "instance 0, ep 133\n",
      "instance 0, ep 134\n",
      "instance 0, ep 135\n",
      "instance 0, ep 136\n",
      "instance 0, ep 137\n",
      "instance 0, ep 138\n",
      "instance 0, ep 139\n",
      "instance 0, ep 140\n",
      "instance 0, ep 141\n",
      "instance 0, ep 142\n",
      "instance 0, ep 143\n",
      "instance 0, ep 144\n",
      "instance 0, ep 145\n",
      "instance 0, ep 146\n",
      "instance 0, ep 147\n",
      "instance 0, ep 148\n",
      "instance 0, ep 149\n",
      "instance 0, ep 150\n",
      "instance 0, ep 151\n",
      "instance 0, ep 152\n",
      "instance 0, ep 153\n",
      "instance 0, ep 154\n",
      "instance 0, ep 155\n",
      "instance 0, ep 156\n",
      "instance 0, ep 157\n",
      "instance 0, ep 158\n",
      "instance 0, ep 159\n",
      "instance 0, ep 160\n",
      "instance 0, ep 161\n",
      "instance 0, ep 162\n",
      "instance 0, ep 163\n",
      "instance 0, ep 164\n",
      "instance 0, ep 165\n",
      "instance 0, ep 166\n",
      "instance 0, ep 167\n",
      "instance 0, ep 168\n",
      "instance 0, ep 169\n",
      "instance 0, ep 170\n",
      "instance 0, ep 171\n",
      "instance 0, ep 172\n",
      "instance 0, ep 173\n",
      "instance 0, ep 174\n",
      "instance 0, ep 175\n",
      "instance 0, ep 176\n",
      "instance 0, ep 177\n",
      "instance 0, ep 178\n",
      "instance 0, ep 179\n",
      "instance 0, ep 180\n",
      "instance 0, ep 181\n",
      "instance 0, ep 182\n",
      "instance 0, ep 183\n",
      "instance 0, ep 184\n",
      "instance 0, ep 185\n",
      "instance 0, ep 186\n",
      "instance 0, ep 187\n",
      "instance 0, ep 188\n",
      "instance 0, ep 189\n",
      "instance 0, ep 190\n",
      "instance 0, ep 191\n",
      "instance 0, ep 192\n",
      "instance 0, ep 193\n",
      "instance 0, ep 194\n",
      "instance 0, ep 195\n",
      "instance 0, ep 196\n",
      "instance 0, ep 197\n",
      "instance 0, ep 198\n",
      "instance 0, ep 199\n",
      "instance 0, ep 200\n",
      "instance 0, ep 201\n",
      "instance 0, ep 202\n",
      "instance 0, ep 203\n",
      "instance 0, ep 204\n",
      "instance 0, ep 205\n",
      "instance 0, ep 206\n",
      "instance 0, ep 207\n",
      "instance 0, ep 208\n",
      "instance 0, ep 209\n",
      "instance 0, ep 210\n",
      "instance 0, ep 211\n",
      "instance 0, ep 212\n",
      "instance 0, ep 213\n",
      "instance 0, ep 214\n",
      "instance 0, ep 215\n",
      "instance 0, ep 216\n",
      "instance 0, ep 217\n",
      "instance 0, ep 218\n",
      "instance 0, ep 219\n",
      "instance 0, ep 220\n",
      "instance 0, ep 221\n",
      "instance 0, ep 222\n",
      "instance 0, ep 223\n",
      "instance 0, ep 224\n",
      "instance 0, ep 225\n",
      "instance 0, ep 226\n",
      "instance 0, ep 227\n",
      "instance 0, ep 228\n",
      "instance 0, ep 229\n",
      "instance 0, ep 230\n",
      "instance 0, ep 231\n",
      "instance 0, ep 232\n",
      "instance 0, ep 233\n",
      "instance 0, ep 234\n",
      "instance 0, ep 235\n",
      "instance 0, ep 236\n",
      "instance 0, ep 237\n",
      "instance 0, ep 238\n",
      "instance 0, ep 239\n",
      "instance 0, ep 240\n",
      "instance 0, ep 241\n",
      "instance 0, ep 242\n",
      "instance 0, ep 243\n",
      "instance 0, ep 244\n",
      "instance 0, ep 245\n",
      "instance 0, ep 246\n",
      "instance 0, ep 247\n",
      "instance 0, ep 248\n",
      "instance 0, ep 249\n",
      "instance 0, ep 250\n",
      "instance 0, ep 251\n",
      "instance 0, ep 252\n",
      "instance 0, ep 253\n",
      "instance 0, ep 254\n",
      "instance 0, ep 255\n",
      "instance 0, ep 256\n",
      "instance 0, ep 257\n",
      "instance 0, ep 258\n",
      "instance 0, ep 259\n",
      "instance 0, ep 260\n",
      "instance 0, ep 261\n",
      "instance 0, ep 262\n",
      "instance 0, ep 263\n",
      "instance 0, ep 264\n",
      "instance 0, ep 265\n",
      "instance 0, ep 266\n",
      "instance 0, ep 267\n",
      "instance 0, ep 268\n",
      "instance 0, ep 269\n",
      "instance 0, ep 270\n",
      "instance 0, ep 271\n",
      "instance 0, ep 272\n",
      "instance 0, ep 273\n",
      "instance 0, ep 274\n",
      "instance 0, ep 275\n",
      "instance 0, ep 276\n",
      "instance 0, ep 277\n",
      "instance 0, ep 278\n",
      "instance 0, ep 279\n",
      "instance 0, ep 280\n",
      "instance 0, ep 281\n",
      "instance 0, ep 282\n",
      "instance 0, ep 283\n",
      "instance 0, ep 284\n",
      "instance 0, ep 285\n",
      "instance 0, ep 286\n",
      "instance 0, ep 287\n",
      "instance 0, ep 288\n",
      "instance 0, ep 289\n",
      "instance 0, ep 290\n",
      "instance 0, ep 291\n",
      "instance 0, ep 292\n",
      "instance 0, ep 293\n",
      "instance 0, ep 294\n",
      "instance 0, ep 295\n",
      "instance 0, ep 296\n",
      "instance 0, ep 297\n",
      "instance 0, ep 298\n",
      "instance 0, ep 299\n",
      "instance 0, ep 300\n",
      "instance 0, ep 301\n",
      "instance 0, ep 302\n",
      "instance 0, ep 303\n",
      "instance 0, ep 304\n",
      "instance 0, ep 305\n",
      "instance 0, ep 306\n",
      "instance 0, ep 307\n",
      "instance 0, ep 308\n",
      "instance 0, ep 309\n",
      "instance 0, ep 310\n",
      "instance 0, ep 311\n",
      "instance 0, ep 312\n",
      "instance 0, ep 313\n",
      "instance 0, ep 314\n",
      "instance 0, ep 315\n",
      "instance 0, ep 316\n",
      "instance 0, ep 317\n",
      "instance 0, ep 318\n",
      "instance 0, ep 319\n",
      "instance 0, ep 320\n",
      "instance 0, ep 321\n",
      "instance 0, ep 322\n",
      "instance 0, ep 323\n",
      "instance 0, ep 324\n",
      "instance 0, ep 325\n",
      "instance 0, ep 326\n",
      "instance 0, ep 327\n",
      "instance 0, ep 328\n",
      "instance 0, ep 329\n",
      "instance 0, ep 330\n",
      "instance 0, ep 331\n",
      "instance 0, ep 332\n",
      "instance 0, ep 333\n",
      "instance 0, ep 334\n",
      "instance 0, ep 335\n",
      "instance 0, ep 336\n",
      "instance 0, ep 337\n",
      "instance 0, ep 338\n",
      "instance 0, ep 339\n",
      "instance 0, ep 340\n",
      "instance 0, ep 341\n",
      "instance 0, ep 342\n",
      "instance 0, ep 343\n",
      "instance 0, ep 344\n",
      "instance 0, ep 345\n",
      "instance 0, ep 346\n",
      "instance 0, ep 347\n",
      "instance 0, ep 348\n",
      "instance 0, ep 349\n",
      "instance 0, ep 350\n",
      "instance 0, ep 351\n",
      "instance 0, ep 352\n",
      "instance 0, ep 353\n",
      "instance 0, ep 354\n",
      "instance 0, ep 355\n",
      "instance 0, ep 356\n",
      "instance 0, ep 357\n",
      "instance 0, ep 358\n",
      "instance 0, ep 359\n",
      "instance 0, ep 360\n",
      "instance 0, ep 361\n",
      "instance 0, ep 362\n",
      "instance 0, ep 363\n",
      "instance 0, ep 364\n",
      "instance 0, ep 365\n",
      "instance 0, ep 366\n",
      "instance 0, ep 367\n",
      "instance 0, ep 368\n",
      "instance 0, ep 369\n",
      "instance 0, ep 370\n",
      "instance 0, ep 371\n",
      "instance 0, ep 372\n",
      "instance 0, ep 373\n",
      "instance 0, ep 374\n",
      "instance 0, ep 375\n",
      "instance 0, ep 376\n",
      "instance 0, ep 377\n",
      "instance 0, ep 378\n",
      "instance 0, ep 379\n",
      "instance 0, ep 380\n",
      "instance 0, ep 381\n",
      "instance 0, ep 382\n",
      "instance 0, ep 383\n",
      "instance 0, ep 384\n",
      "instance 0, ep 385\n",
      "instance 0, ep 386\n",
      "instance 0, ep 387\n",
      "instance 0, ep 388\n",
      "instance 0, ep 389\n",
      "instance 0, ep 390\n",
      "instance 0, ep 391\n",
      "instance 0, ep 392\n",
      "instance 0, ep 393\n",
      "instance 0, ep 394\n",
      "instance 0, ep 395\n",
      "instance 0, ep 396\n",
      "instance 0, ep 397\n",
      "instance 0, ep 398\n",
      "instance 0, ep 399\n",
      "instance 0, ep 400\n",
      "instance 0, ep 401\n",
      "instance 0, ep 402\n",
      "instance 0, ep 403\n",
      "instance 0, ep 404\n",
      "instance 0, ep 405\n",
      "instance 0, ep 406\n",
      "instance 0, ep 407\n",
      "instance 0, ep 408\n",
      "instance 0, ep 409\n",
      "instance 0, ep 410\n",
      "instance 0, ep 411\n",
      "instance 0, ep XXXX-5\n",
      "instance 0, ep 413\n",
      "instance 0, ep 414\n",
      "instance 0, ep 415\n",
      "instance 0, ep 416\n",
      "instance 0, ep 417\n",
      "instance 0, ep 418\n",
      "instance 0, ep 419\n",
      "instance 0, ep 420\n",
      "instance 0, ep 421\n",
      "instance 0, ep 422\n",
      "instance 0, ep 423\n",
      "instance 0, ep 424\n",
      "instance 0, ep 425\n",
      "instance 0, ep 426\n",
      "instance 0, ep 427\n",
      "instance 0, ep 428\n",
      "instance 0, ep 429\n",
      "instance 0, ep 430\n",
      "instance 0, ep 431\n",
      "instance 0, ep 432\n",
      "instance 0, ep 433\n",
      "instance 0, ep 434\n",
      "instance 0, ep 435\n",
      "instance 0, ep 436\n",
      "instance 0, ep 437\n",
      "instance 0, ep 438\n",
      "instance 0, ep 439\n",
      "instance 0, ep 440\n",
      "instance 0, ep 441\n",
      "instance 0, ep 442\n",
      "instance 0, ep 443\n",
      "instance 0, ep 444\n",
      "instance 0, ep 445\n",
      "instance 0, ep 446\n",
      "instance 0, ep 447\n",
      "instance 0, ep 448\n",
      "instance 0, ep 449\n",
      "instance 0, ep 450\n",
      "instance 0, ep 451\n",
      "instance 0, ep 452\n",
      "instance 0, ep 453\n",
      "instance 0, ep 454\n",
      "instance 0, ep 455\n",
      "instance 0, ep 456\n",
      "instance 0, ep 457\n",
      "instance 0, ep 458\n",
      "instance 0, ep 459\n",
      "instance 0, ep 460\n",
      "instance 0, ep 461\n",
      "instance 0, ep 462\n",
      "instance 0, ep 463\n",
      "instance 0, ep 464\n",
      "instance 0, ep 465\n",
      "instance 0, ep 466\n",
      "instance 0, ep 467\n",
      "instance 0, ep 468\n",
      "instance 0, ep 469\n",
      "instance 0, ep 470\n",
      "instance 0, ep 471\n",
      "instance 0, ep 472\n",
      "instance 0, ep 473\n",
      "instance 0, ep 474\n",
      "instance 0, ep 475\n",
      "instance 0, ep 476\n",
      "instance 0, ep 477\n",
      "instance 0, ep 478\n",
      "instance 0, ep 479\n",
      "instance 0, ep 480\n",
      "instance 0, ep 481\n",
      "instance 0, ep 482\n",
      "instance 0, ep 483\n",
      "instance 0, ep 484\n",
      "instance 0, ep 485\n",
      "instance 0, ep 486\n",
      "instance 0, ep 487\n",
      "instance 0, ep 488\n",
      "instance 0, ep 489\n",
      "instance 0, ep 490\n",
      "instance 0, ep 491\n",
      "instance 0, ep 492\n",
      "instance 0, ep 493\n",
      "instance 0, ep 494\n",
      "instance 0, ep 495\n",
      "instance 0, ep 496\n",
      "instance 0, ep 497\n",
      "instance 0, ep 498\n",
      "instance 0, ep 499\n",
      "instance 0, ep 500\n",
      "instance 0, ep 501\n",
      "instance 0, ep 502\n",
      "instance 0, ep 503\n",
      "instance 0, ep 504\n",
      "instance 0, ep 505\n",
      "instance 0, ep 506\n",
      "instance 0, ep 507\n",
      "instance 0, ep 508\n",
      "instance 0, ep 509\n",
      "instance 0, ep 510\n",
      "instance 0, ep 511\n",
      "instance 0, ep 512\n",
      "instance 0, ep 513\n",
      "instance 0, ep 514\n",
      "instance 0, ep 515\n",
      "instance 0, ep 516\n",
      "instance 0, ep 517\n",
      "instance 0, ep 518\n",
      "instance 0, ep 519\n",
      "instance 0, ep 520\n",
      "instance 0, ep 521\n",
      "instance 0, ep 522\n",
      "instance 0, ep 523\n",
      "instance 0, ep 524\n",
      "Took 1.2090377807617188 time for inference and 29.75546407699585 time for training\n",
      "46.296526038940634\n"
     ]
    }
   ],
   "source": [
    "policy = shapley_whittle_submodular_policy \n",
    "name = \"shapley_whittle\"\n",
    "\n",
    "rewards, memory, simulator = run_multi_seed(seed_list,policy)\n",
    "results['{}_reward'.format(name)] = rewards['reward']\n",
    "results['{}_match'.format(name)] =  rewards['match'] \n",
    "results['{}_active'.format(name)] = rewards['active_rate']\n",
    "results['{}_time'.format(name)] =  rewards['time']\n",
    "print(np.mean(rewards['reward']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "acting should always be good! 0.000 < 0.044\n",
      "acting should always be good! 0.000 < 0.162\n",
      "acting should always be good! 0.108 < 0.183\n",
      "good start state should always be good! 0.380 < 0.508\n",
      "good start state should always be good! 0.506 < 0.760\n",
      "cohort [59 30 15 14 68 11 23 49 39 18]\n",
      "instance 0, ep 1\n",
      "instance 0, ep 2\n",
      "instance 0, ep 3\n",
      "instance 0, ep 4\n",
      "instance 0, ep 5\n",
      "instance 0, ep 6\n",
      "instance 0, ep 7\n",
      "instance 0, ep 8\n",
      "instance 0, ep 9\n",
      "instance 0, ep 10\n",
      "instance 0, ep 11\n",
      "instance 0, ep 12\n",
      "instance 0, ep 13\n",
      "instance 0, ep 14\n",
      "instance 0, ep 15\n",
      "instance 0, ep 16\n",
      "instance 0, ep 17\n",
      "instance 0, ep 18\n",
      "instance 0, ep 19\n",
      "instance 0, ep 20\n",
      "instance 0, ep 21\n",
      "instance 0, ep 22\n",
      "instance 0, ep 23\n",
      "instance 0, ep 24\n",
      "instance 0, ep 25\n",
      "instance 0, ep 26\n",
      "instance 0, ep 27\n",
      "instance 0, ep 28\n",
      "instance 0, ep 29\n",
      "instance 0, ep 30\n",
      "instance 0, ep 31\n",
      "instance 0, ep 32\n",
      "instance 0, ep 33\n",
      "instance 0, ep 34\n",
      "instance 0, ep 35\n",
      "instance 0, ep 36\n",
      "instance 0, ep 37\n",
      "instance 0, ep 38\n",
      "instance 0, ep 39\n",
      "instance 0, ep 40\n",
      "instance 0, ep 41\n",
      "instance 0, ep 42\n",
      "instance 0, ep 43\n",
      "instance 0, ep 44\n",
      "instance 0, ep 45\n",
      "instance 0, ep 46\n",
      "instance 0, ep 47\n",
      "instance 0, ep 48\n",
      "instance 0, ep 49\n",
      "instance 0, ep 50\n",
      "instance 0, ep 51\n",
      "instance 0, ep 52\n",
      "instance 0, ep 53\n",
      "instance 0, ep 54\n",
      "instance 0, ep 55\n",
      "instance 0, ep 56\n",
      "instance 0, ep 57\n",
      "instance 0, ep 58\n",
      "instance 0, ep 59\n",
      "instance 0, ep 60\n",
      "instance 0, ep 61\n",
      "instance 0, ep 62\n",
      "instance 0, ep 63\n",
      "instance 0, ep 64\n",
      "instance 0, ep 65\n",
      "instance 0, ep 66\n",
      "instance 0, ep 67\n",
      "instance 0, ep 68\n",
      "instance 0, ep 69\n",
      "instance 0, ep 70\n",
      "instance 0, ep 71\n",
      "instance 0, ep 72\n",
      "instance 0, ep 73\n",
      "instance 0, ep 74\n",
      "instance 0, ep 75\n",
      "instance 0, ep 76\n",
      "instance 0, ep 77\n",
      "instance 0, ep 78\n",
      "instance 0, ep 79\n",
      "instance 0, ep 80\n",
      "instance 0, ep 81\n",
      "instance 0, ep 82\n",
      "instance 0, ep 83\n",
      "instance 0, ep 84\n",
      "instance 0, ep 85\n",
      "instance 0, ep 86\n",
      "instance 0, ep 87\n",
      "instance 0, ep 88\n",
      "instance 0, ep 89\n",
      "instance 0, ep 90\n",
      "instance 0, ep 91\n",
      "instance 0, ep 92\n",
      "instance 0, ep 93\n",
      "instance 0, ep 94\n",
      "instance 0, ep 95\n",
      "instance 0, ep 96\n",
      "instance 0, ep 97\n",
      "instance 0, ep 98\n",
      "instance 0, ep 99\n",
      "Took 1.0011789798736572 time for inference and 4.197936296463013 time for training\n",
      "acting should always be good! 0.000 < 0.044\n",
      "acting should always be good! 0.000 < 0.162\n",
      "acting should always be good! 0.108 < 0.183\n",
      "good start state should always be good! 0.380 < 0.508\n",
      "good start state should always be good! 0.506 < 0.760\n",
      "cohort [18 62 57 95 66 52 31 69 30 99]\n",
      "instance 0, ep 1\n",
      "instance 0, ep 2\n",
      "instance 0, ep 3\n",
      "instance 0, ep 4\n",
      "instance 0, ep 5\n",
      "instance 0, ep 6\n",
      "instance 0, ep 7\n",
      "instance 0, ep 8\n",
      "instance 0, ep 9\n",
      "instance 0, ep 10\n",
      "instance 0, ep 11\n",
      "instance 0, ep 12\n",
      "instance 0, ep 13\n",
      "instance 0, ep 14\n",
      "instance 0, ep 15\n",
      "instance 0, ep 16\n",
      "instance 0, ep 17\n",
      "instance 0, ep 18\n",
      "instance 0, ep 19\n",
      "instance 0, ep 20\n",
      "instance 0, ep 21\n",
      "instance 0, ep 22\n",
      "instance 0, ep 23\n",
      "instance 0, ep 24\n",
      "instance 0, ep 25\n",
      "instance 0, ep 26\n",
      "instance 0, ep 27\n",
      "instance 0, ep 28\n",
      "instance 0, ep 29\n",
      "instance 0, ep 30\n",
      "instance 0, ep 31\n",
      "instance 0, ep 32\n",
      "instance 0, ep 33\n",
      "instance 0, ep 34\n",
      "instance 0, ep 35\n",
      "instance 0, ep 36\n",
      "instance 0, ep 37\n",
      "instance 0, ep 38\n",
      "instance 0, ep 39\n",
      "instance 0, ep 40\n",
      "instance 0, ep 41\n",
      "instance 0, ep 42\n",
      "instance 0, ep 43\n",
      "instance 0, ep 44\n",
      "instance 0, ep 45\n",
      "instance 0, ep 46\n",
      "instance 0, ep 47\n",
      "instance 0, ep 48\n",
      "instance 0, ep 49\n",
      "instance 0, ep 50\n",
      "instance 0, ep 51\n",
      "instance 0, ep 52\n",
      "instance 0, ep 53\n",
      "instance 0, ep 54\n",
      "instance 0, ep 55\n",
      "instance 0, ep 56\n",
      "instance 0, ep 57\n",
      "instance 0, ep 58\n",
      "instance 0, ep 59\n",
      "instance 0, ep 60\n",
      "instance 0, ep 61\n",
      "instance 0, ep 62\n",
      "instance 0, ep 63\n",
      "instance 0, ep 64\n",
      "instance 0, ep 65\n",
      "instance 0, ep 66\n",
      "instance 0, ep 67\n",
      "instance 0, ep 68\n",
      "instance 0, ep 69\n",
      "instance 0, ep 70\n",
      "instance 0, ep 71\n",
      "instance 0, ep 72\n",
      "instance 0, ep 73\n",
      "instance 0, ep 74\n",
      "instance 0, ep 75\n",
      "instance 0, ep 76\n",
      "instance 0, ep 77\n",
      "instance 0, ep 78\n",
      "instance 0, ep 79\n",
      "instance 0, ep 80\n",
      "instance 0, ep 81\n",
      "instance 0, ep 82\n",
      "instance 0, ep 83\n",
      "instance 0, ep 84\n",
      "instance 0, ep 85\n",
      "instance 0, ep 86\n",
      "instance 0, ep 87\n",
      "instance 0, ep 88\n",
      "instance 0, ep 89\n",
      "instance 0, ep 90\n",
      "instance 0, ep 91\n",
      "instance 0, ep 92\n",
      "instance 0, ep 93\n",
      "instance 0, ep 94\n",
      "instance 0, ep 95\n",
      "instance 0, ep 96\n",
      "instance 0, ep 97\n",
      "instance 0, ep 98\n",
      "instance 0, ep 99\n",
      "Took 0.9926755428314209 time for inference and 3.721172332763672 time for training\n",
      "acting should always be good! 0.000 < 0.044\n",
      "acting should always be good! 0.000 < 0.162\n",
      "acting should always be good! 0.108 < 0.183\n",
      "good start state should always be good! 0.380 < 0.508\n",
      "good start state should always be good! 0.506 < 0.760\n",
      "cohort [55 92 76 29 99 90  4 68 43 42]\n",
      "instance 0, ep 1\n",
      "instance 0, ep 2\n",
      "instance 0, ep 3\n",
      "instance 0, ep 4\n",
      "instance 0, ep 5\n",
      "instance 0, ep 6\n",
      "instance 0, ep 7\n",
      "instance 0, ep 8\n",
      "instance 0, ep 9\n",
      "instance 0, ep 10\n",
      "instance 0, ep 11\n",
      "instance 0, ep 12\n",
      "instance 0, ep 13\n",
      "instance 0, ep 14\n",
      "instance 0, ep 15\n",
      "instance 0, ep 16\n",
      "instance 0, ep 17\n",
      "instance 0, ep 18\n",
      "instance 0, ep 19\n",
      "instance 0, ep 20\n",
      "instance 0, ep 21\n",
      "instance 0, ep 22\n",
      "instance 0, ep 23\n",
      "instance 0, ep 24\n",
      "instance 0, ep 25\n",
      "instance 0, ep 26\n",
      "instance 0, ep 27\n",
      "instance 0, ep 28\n",
      "instance 0, ep 29\n",
      "instance 0, ep 30\n",
      "instance 0, ep 31\n",
      "instance 0, ep 32\n",
      "instance 0, ep 33\n",
      "instance 0, ep 34\n",
      "instance 0, ep 35\n",
      "instance 0, ep 36\n",
      "instance 0, ep 37\n",
      "instance 0, ep 38\n",
      "instance 0, ep 39\n",
      "instance 0, ep 40\n",
      "instance 0, ep 41\n",
      "instance 0, ep 42\n",
      "instance 0, ep 43\n",
      "instance 0, ep 44\n",
      "instance 0, ep 45\n",
      "instance 0, ep 46\n",
      "instance 0, ep 47\n",
      "instance 0, ep 48\n",
      "instance 0, ep 49\n",
      "instance 0, ep 50\n",
      "instance 0, ep 51\n",
      "instance 0, ep 52\n",
      "instance 0, ep 53\n",
      "instance 0, ep 54\n",
      "instance 0, ep 55\n",
      "instance 0, ep 56\n",
      "instance 0, ep 57\n",
      "instance 0, ep 58\n",
      "instance 0, ep 59\n",
      "instance 0, ep 60\n",
      "instance 0, ep 61\n",
      "instance 0, ep 62\n",
      "instance 0, ep 63\n",
      "instance 0, ep 64\n",
      "instance 0, ep 65\n",
      "instance 0, ep 66\n",
      "instance 0, ep 67\n",
      "instance 0, ep 68\n",
      "instance 0, ep 69\n",
      "instance 0, ep 70\n",
      "instance 0, ep 71\n",
      "instance 0, ep 72\n",
      "instance 0, ep 73\n",
      "instance 0, ep 74\n",
      "instance 0, ep 75\n",
      "instance 0, ep 76\n",
      "instance 0, ep 77\n",
      "instance 0, ep 78\n",
      "instance 0, ep 79\n",
      "instance 0, ep 80\n",
      "instance 0, ep 81\n",
      "instance 0, ep 82\n",
      "instance 0, ep 83\n",
      "instance 0, ep 84\n",
      "instance 0, ep 85\n",
      "instance 0, ep 86\n",
      "instance 0, ep 87\n",
      "instance 0, ep 88\n",
      "instance 0, ep 89\n",
      "instance 0, ep 90\n",
      "instance 0, ep 91\n",
      "instance 0, ep 92\n",
      "instance 0, ep 93\n",
      "instance 0, ep 94\n",
      "instance 0, ep 95\n",
      "instance 0, ep 96\n",
      "instance 0, ep 97\n",
      "instance 0, ep 98\n",
      "instance 0, ep 99\n",
      "Took 0.991661548614502 time for inference and 3.6848742961883545 time for training\n",
      "66.76726273516279\n"
     ]
    }
   ],
   "source": [
    "policy = whittle_greedy_policy\n",
    "name = \"greedy_whittle\"\n",
    "\n",
    "rewards, memory, simulator = run_multi_seed(seed_list,policy)\n",
    "results['{}_reward'.format(name)] = rewards['reward']\n",
    "results['{}_match'.format(name)] =  rewards['match'] \n",
    "results['{}_active'.format(name)] = rewards['active_rate']\n",
    "results['{}_time'.format(name)] =  rewards['time']\n",
    "print(np.mean(rewards['reward']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "if n_arms * volunteers_per_arm <= 4:\n",
    "    policy = q_iteration_policy\n",
    "    per_epoch_function = q_iteration_submodular_epoch(power)\n",
    "    name = \"optimal\"\n",
    "\n",
    "    rewards, memory, simulator = run_multi_seed(seed_list,policy,per_epoch_function=per_epoch_function)\n",
    "    results['{}_reward'.format(name)] = rewards['reward']\n",
    "    results['{}_match'.format(name)] =  rewards['match'] \n",
    "    results['{}_active'.format(name)] = rewards['active_rate']\n",
    "    results['{}_time'.format(name)] =  rewards['time']\n",
    "    print(np.mean(rewards['reward']))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Write Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "save_path = get_save_path(out_folder,save_name,seed,use_date=save_with_date)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "delete_duplicate_results(out_folder,\"\",results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "json.dump(results,open('../../results/'+save_path,'w'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "food",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.18"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
