{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "e4edb13b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cube-triple-play-oraclerep-v0            OS                        0.47 [0.41, 0.53]\n",
      "cube-triple-play-oraclerep-v0            DQC h=5 h_a=1             0.95 [0.94, 0.97]\n",
      "cube-triple-play-oraclerep-v0            DQC-naïve h=5 h_a=1       0.16 [0.07, 0.30]\n",
      "cube-triple-play-oraclerep-v0            QC h=5                    0.20 [0.07, 0.36]\n",
      "cube-triple-play-oraclerep-v0            NS n=5                    0.93 [0.91, 0.94]\n",
      "cube-triple-play-oraclerep-v0            DQC h=25 h_a=1            0.76 [0.73, 0.80]\n",
      "cube-triple-play-oraclerep-v0            DQC-naïve h=25 h_a=1      0.14 [0.08, 0.22]\n",
      "cube-triple-play-oraclerep-v0            NS n=25                   0.30 [0.26, 0.35]\n",
      "cube-triple-play-oraclerep-v0            DQC h=25 h_a=5            0.98 [0.98, 0.99]\n",
      "cube-triple-play-oraclerep-v0            DQC-naïve h=25 h_a=5      0.27 [0.18, 0.38]\n",
      "cube-triple-play-oraclerep-v0            QC-NS n=25 h_a=5          0.51 [0.22, 0.80]\n",
      "cube-triple-play-oraclerep-v0            QC h=25                   0.21 [0.13, 0.31]\n",
      "cube-quadruple-play-oraclerep-v0         OS                        0.00 [0.00, 0.00]\n",
      "cube-quadruple-play-oraclerep-v0         DQC h=5 h_a=1             0.84 [0.83, 0.86]\n",
      "cube-quadruple-play-oraclerep-v0         DQC-naïve h=5 h_a=1       0.40 [0.29, 0.49]\n",
      "cube-quadruple-play-oraclerep-v0         QC h=5                    0.35 [0.26, 0.43]\n",
      "cube-quadruple-play-oraclerep-v0         NS n=5                    0.27 [0.11, 0.43]\n",
      "cube-quadruple-play-oraclerep-v0         DQC h=25 h_a=1            0.45 [0.41, 0.49]\n",
      "cube-quadruple-play-oraclerep-v0         DQC-naïve h=25 h_a=1      0.16 [0.09, 0.23]\n",
      "cube-quadruple-play-oraclerep-v0         NS n=25                   0.19 [0.11, 0.28]\n",
      "cube-quadruple-play-oraclerep-v0         DQC h=25 h_a=5            0.92 [0.90, 0.93]\n",
      "cube-quadruple-play-oraclerep-v0         DQC-naïve h=25 h_a=5      0.27 [0.15, 0.39]\n",
      "cube-quadruple-play-oraclerep-v0         QC-NS n=25 h_a=5          0.53 [0.28, 0.77]\n",
      "cube-quadruple-play-oraclerep-v0         QC h=25                   0.12 [0.06, 0.18]\n",
      "cube-octuple-play-oraclerep-v0           OS                        0.00 [0.00, 0.00]\n",
      "cube-octuple-play-oraclerep-v0           DQC h=5 h_a=1             0.00 [0.00, 0.00]\n",
      "cube-octuple-play-oraclerep-v0           DQC-naïve h=5 h_a=1       0.00 [0.00, 0.00]\n",
      "cube-octuple-play-oraclerep-v0           QC h=5                    0.00 [0.00, 0.00]\n",
      "cube-octuple-play-oraclerep-v0           NS n=5                    0.01 [0.00, 0.03]\n",
      "cube-octuple-play-oraclerep-v0           DQC h=25 h_a=1            0.10 [0.08, 0.11]\n",
      "cube-octuple-play-oraclerep-v0           DQC-naïve h=25 h_a=1      0.01 [0.00, 0.02]\n",
      "cube-octuple-play-oraclerep-v0           NS n=25                   0.09 [0.06, 0.12]\n",
      "cube-octuple-play-oraclerep-v0           DQC h=25 h_a=5            0.34 [0.33, 0.35]\n",
      "cube-octuple-play-oraclerep-v0           DQC-naïve h=25 h_a=5      0.03 [0.01, 0.05]\n",
      "cube-octuple-play-oraclerep-v0           QC-NS n=25 h_a=5          0.18 [0.10, 0.25]\n",
      "cube-octuple-play-oraclerep-v0           QC h=25                   0.00 [0.00, 0.00]\n",
      "humanoidmaze-giant-navigate-oraclerep-v0 OS                        0.00 [0.00, 0.00]\n",
      "humanoidmaze-giant-navigate-oraclerep-v0 DQC h=5 h_a=1             0.19 [0.15, 0.22]\n",
      "humanoidmaze-giant-navigate-oraclerep-v0 DQC-naïve h=5 h_a=1       0.80 [0.77, 0.83]\n",
      "humanoidmaze-giant-navigate-oraclerep-v0 QC h=5                    0.48 [0.45, 0.52]\n",
      "humanoidmaze-giant-navigate-oraclerep-v0 NS n=5                    0.89 [0.87, 0.91]\n",
      "humanoidmaze-giant-navigate-oraclerep-v0 DQC h=25 h_a=1            0.92 [0.90, 0.94]\n",
      "humanoidmaze-giant-navigate-oraclerep-v0 DQC-naïve h=25 h_a=1      0.22 [0.20, 0.24]\n",
      "humanoidmaze-giant-navigate-oraclerep-v0 NS n=25                   0.95 [0.94, 0.97]\n",
      "humanoidmaze-giant-navigate-oraclerep-v0 DQC h=25 h_a=5            0.51 [0.48, 0.54]\n",
      "humanoidmaze-giant-navigate-oraclerep-v0 DQC-naïve h=25 h_a=5      0.00 [0.00, 0.01]\n",
      "humanoidmaze-giant-navigate-oraclerep-v0 QC-NS n=25 h_a=5          0.60 [0.58, 0.61]\n",
      "humanoidmaze-giant-navigate-oraclerep-v0 QC h=25                   0.00 [0.00, 0.00]\n",
      "puzzle-4x5-play-oraclerep-v0             OS                        0.19 [0.18, 0.19]\n",
      "puzzle-4x5-play-oraclerep-v0             DQC h=5 h_a=1             0.90 [0.88, 0.92]\n",
      "puzzle-4x5-play-oraclerep-v0             DQC-naïve h=5 h_a=1       0.20 [0.20, 0.20]\n",
      "puzzle-4x5-play-oraclerep-v0             QC h=5                    0.20 [0.20, 0.20]\n",
      "puzzle-4x5-play-oraclerep-v0             NS n=5                    0.93 [0.91, 0.95]\n",
      "puzzle-4x5-play-oraclerep-v0             DQC h=25 h_a=1            0.91 [0.89, 0.92]\n",
      "puzzle-4x5-play-oraclerep-v0             DQC-naïve h=25 h_a=1      0.32 [0.28, 0.36]\n",
      "puzzle-4x5-play-oraclerep-v0             NS n=25                   0.89 [0.87, 0.91]\n",
      "puzzle-4x5-play-oraclerep-v0             DQC h=25 h_a=5            0.96 [0.95, 0.97]\n",
      "puzzle-4x5-play-oraclerep-v0             DQC-naïve h=25 h_a=5      0.33 [0.29, 0.37]\n",
      "puzzle-4x5-play-oraclerep-v0             QC-NS n=25 h_a=5          0.95 [0.94, 0.96]\n",
      "puzzle-4x5-play-oraclerep-v0             QC h=25                   0.30 [0.27, 0.33]\n",
      "puzzle-4x6-play-oraclerep-v0             OS                        0.19 [0.19, 0.20]\n",
      "puzzle-4x6-play-oraclerep-v0             DQC h=5 h_a=1             0.44 [0.42, 0.47]\n",
      "puzzle-4x6-play-oraclerep-v0             DQC-naïve h=5 h_a=1       0.26 [0.25, 0.28]\n",
      "puzzle-4x6-play-oraclerep-v0             QC h=5                    0.28 [0.27, 0.30]\n",
      "puzzle-4x6-play-oraclerep-v0             NS n=5                    0.56 [0.48, 0.63]\n",
      "puzzle-4x6-play-oraclerep-v0             DQC h=25 h_a=1            0.83 [0.80, 0.86]\n",
      "puzzle-4x6-play-oraclerep-v0             DQC-naïve h=25 h_a=1      0.33 [0.29, 0.37]\n",
      "puzzle-4x6-play-oraclerep-v0             NS n=25                   0.91 [0.86, 0.94]\n",
      "puzzle-4x6-play-oraclerep-v0             DQC h=25 h_a=5            0.68 [0.66, 0.71]\n",
      "puzzle-4x6-play-oraclerep-v0             DQC-naïve h=25 h_a=5      0.33 [0.28, 0.38]\n",
      "puzzle-4x6-play-oraclerep-v0             QC-NS n=25 h_a=5          0.95 [0.93, 0.97]\n",
      "puzzle-4x6-play-oraclerep-v0             QC h=25                   0.37 [0.33, 0.42]\n",
      "cube-triple-play-oraclerep-v0            SHARSA                    0.83 [0.81, 0.85]\n",
      "cube-triple-play-oraclerep-v0            HIQL                      0.35 [0.31, 0.39]\n",
      "cube-triple-play-oraclerep-v0            IQL                       0.66 [0.63, 0.67]\n",
      "cube-triple-play-oraclerep-v0            FBC                       0.54 [0.51, 0.56]\n",
      "cube-triple-play-oraclerep-v0            HFBC                      0.56 [0.53, 0.59]\n",
      "cube-quadruple-play-oraclerep-v0         SHARSA                    0.64 [0.62, 0.68]\n",
      "cube-quadruple-play-oraclerep-v0         HIQL                      0.24 [0.21, 0.28]\n",
      "cube-quadruple-play-oraclerep-v0         IQL                       0.53 [0.52, 0.55]\n",
      "cube-quadruple-play-oraclerep-v0         FBC                       0.34 [0.32, 0.37]\n",
      "cube-quadruple-play-oraclerep-v0         HFBC                      0.37 [0.34, 0.40]\n",
      "cube-octuple-play-oraclerep-v0           SHARSA                    0.34 [0.31, 0.36]\n",
      "cube-octuple-play-oraclerep-v0           HIQL                      0.20 [0.17, 0.23]\n",
      "cube-octuple-play-oraclerep-v0           IQL                       0.00 [0.00, 0.00]\n",
      "cube-octuple-play-oraclerep-v0           FBC                       0.00 [0.00, 0.00]\n",
      "cube-octuple-play-oraclerep-v0           HFBC                      0.28 [0.26, 0.29]\n",
      "puzzle-4x5-play-oraclerep-v0             SHARSA                    0.01 [0.01, 0.02]\n",
      "puzzle-4x5-play-oraclerep-v0             HIQL                      0.00 [0.00, 0.00]\n",
      "puzzle-4x5-play-oraclerep-v0             IQL                       0.20 [0.19, 0.20]\n",
      "puzzle-4x5-play-oraclerep-v0             FBC                       0.00 [0.00, 0.00]\n",
      "puzzle-4x5-play-oraclerep-v0             HFBC                      0.00 [0.00, 0.00]\n",
      "puzzle-4x6-play-oraclerep-v0             SHARSA                    0.64 [0.60, 0.68]\n",
      "puzzle-4x6-play-oraclerep-v0             HIQL                      0.09 [0.05, 0.13]\n",
      "puzzle-4x6-play-oraclerep-v0             IQL                       0.06 [0.03, 0.09]\n",
      "puzzle-4x6-play-oraclerep-v0             FBC                       0.01 [0.00, 0.01]\n",
      "puzzle-4x6-play-oraclerep-v0             HFBC                      0.04 [0.03, 0.05]\n",
      "humanoidmaze-giant-navigate-oraclerep-v0 SHARSA                    0.19 [0.16, 0.23]\n",
      "humanoidmaze-giant-navigate-oraclerep-v0 HIQL                      0.24 [0.22, 0.26]\n",
      "humanoidmaze-giant-navigate-oraclerep-v0 IQL                       0.03 [0.02, 0.05]\n",
      "humanoidmaze-giant-navigate-oraclerep-v0 FBC                       0.01 [0.01, 0.02]\n",
      "humanoidmaze-giant-navigate-oraclerep-v0 HFBC                      0.06 [0.04, 0.08]\n"
     ]
    }
   ],
   "source": [
    "import pickle as pkl\n",
    "import numpy as np\n",
    "\n",
    "def bootstrap_estimate(data, n_samples=500000, ci=0.95):\n",
    "    assert len(data.shape) == 1\n",
    "    samples = data[np.random.choice(len(data), size=(len(data), n_samples))]\n",
    "    samples = np.mean(samples, axis=0)\n",
    "    mean = np.mean(data)\n",
    "\n",
    "    delta = (1. - ci) * 0.5\n",
    "    ci_low = np.quantile(samples, delta)\n",
    "    ci_high = np.quantile(samples, 1. - delta)\n",
    "    return mean, ci_low, ci_high, samples\n",
    "\n",
    "data = pkl.load(open(\"dqc-exp-data.pkl\", \"rb\"))\n",
    "for (task, name), samples in data.items():\n",
    "    mean, ci_low, ci_high, _ = bootstrap_estimate(samples[-1]) # use last step only\n",
    "\n",
    "    # evaluated at 250K, 500K, 750K, 1M training steps\n",
    "    #  SHARSA, HIQL, IQL, FBC, HFBC are evaluated with 15 trials per task. Others are done with 50.\n",
    "    assert samples.shape == (4, 10), samples.shape\n",
    "\n",
    "    data[(task, name)] = {\"mean\": mean, \"ci_low\": ci_low, \"ci_high\": ci_high}\n",
    "    # print(samples)\n",
    "    print(f\"{task:40s} {name:25s} {mean:.2f} [{ci_low:.2f}, {ci_high:.2f}]\")\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "zexp",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
