{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "006d2ed2-23d6-44e1-a497-4afdbbeb60ed",
   "metadata": {},
   "source": [
    "# LLaRA evaluation\n",
    "\n",
    "## Load all evaluation results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "104811ee-762c-4c56-8952-5b1a9b198739",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tid</th>\n",
       "      <th>level</th>\n",
       "      <th>task</th>\n",
       "      <th>seed</th>\n",
       "      <th>prompt</th>\n",
       "      <th>step</th>\n",
       "      <th>success</th>\n",
       "      <th>failure</th>\n",
       "      <th>method</th>\n",
       "      <th>prompt_mode</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>placement_generalization/sweep_without_exceedi...</td>\n",
       "      <td>L1</td>\n",
       "      <td>sweep_without_exceeding</td>\n",
       "      <td>200000</td>\n",
       "      <td>Sweep two {swept_obj} into {bounds} without ex...</td>\n",
       "      <td>2</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>D-inBC-AuxD-VIMA-80k_prompt003</td>\n",
       "      <td>hso</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>placement_generalization/sweep_without_exceedi...</td>\n",
       "      <td>L1</td>\n",
       "      <td>sweep_without_exceeding</td>\n",
       "      <td>200001</td>\n",
       "      <td>Sweep any {swept_obj} into {bounds} without ex...</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>D-inBC-AuxD-VIMA-80k_prompt003</td>\n",
       "      <td>hso</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>placement_generalization/sweep_without_exceedi...</td>\n",
       "      <td>L1</td>\n",
       "      <td>sweep_without_exceeding</td>\n",
       "      <td>200002</td>\n",
       "      <td>Sweep all {swept_obj} into {bounds} without ex...</td>\n",
       "      <td>2</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>D-inBC-AuxD-VIMA-80k_prompt003</td>\n",
       "      <td>hso</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>placement_generalization/sweep_without_exceedi...</td>\n",
       "      <td>L1</td>\n",
       "      <td>sweep_without_exceeding</td>\n",
       "      <td>200003</td>\n",
       "      <td>Sweep two {swept_obj} into {bounds} without ex...</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>D-inBC-AuxD-VIMA-80k_prompt003</td>\n",
       "      <td>hso</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>placement_generalization/sweep_without_exceedi...</td>\n",
       "      <td>L1</td>\n",
       "      <td>sweep_without_exceeding</td>\n",
       "      <td>200004</td>\n",
       "      <td>Sweep two {swept_obj} into {bounds} without ex...</td>\n",
       "      <td>2</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>D-inBC-AuxD-VIMA-80k_prompt003</td>\n",
       "      <td>hso</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                 tid level  \\\n",
       "0  placement_generalization/sweep_without_exceedi...    L1   \n",
       "1  placement_generalization/sweep_without_exceedi...    L1   \n",
       "2  placement_generalization/sweep_without_exceedi...    L1   \n",
       "3  placement_generalization/sweep_without_exceedi...    L1   \n",
       "4  placement_generalization/sweep_without_exceedi...    L1   \n",
       "\n",
       "                      task    seed  \\\n",
       "0  sweep_without_exceeding  200000   \n",
       "1  sweep_without_exceeding  200001   \n",
       "2  sweep_without_exceeding  200002   \n",
       "3  sweep_without_exceeding  200003   \n",
       "4  sweep_without_exceeding  200004   \n",
       "\n",
       "                                              prompt  step  success  failure  \\\n",
       "0  Sweep two {swept_obj} into {bounds} without ex...     2     True    False   \n",
       "1  Sweep any {swept_obj} into {bounds} without ex...     1     True    False   \n",
       "2  Sweep all {swept_obj} into {bounds} without ex...     2     True    False   \n",
       "3  Sweep two {swept_obj} into {bounds} without ex...     1     True    False   \n",
       "4  Sweep two {swept_obj} into {bounds} without ex...     2     True    False   \n",
       "\n",
       "                           method prompt_mode  \n",
       "0  D-inBC-AuxD-VIMA-80k_prompt003         hso  \n",
       "1  D-inBC-AuxD-VIMA-80k_prompt003         hso  \n",
       "2  D-inBC-AuxD-VIMA-80k_prompt003         hso  \n",
       "3  D-inBC-AuxD-VIMA-80k_prompt003         hso  \n",
       "4  D-inBC-AuxD-VIMA-80k_prompt003         hso  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from glob import glob\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import json\n",
    "import re\n",
    "\n",
    "levels = ['placement_generalization',\n",
    " 'combinatorial_generalization',\n",
    " 'novel_object_generalization',\n",
    " 'novel_task_generalization']\n",
    "\n",
    "files = glob('[*.json')\n",
    "result = []\n",
    "\n",
    "for f in files:\n",
    "    model_name = f[:-5]\n",
    "    model_name = re.sub(r'\\(.*\\)', '', model_name).split(']')[-1]\n",
    "    \n",
    "    js = json.load(open(f, 'r'))\n",
    "\n",
    "    # about prompt mode\n",
    "    pm = ''\n",
    "    pid = -1\n",
    "    prop = []\n",
    "    model_path = None\n",
    "    for i, j in js.items():\n",
    "        if i == 'global':\n",
    "            pm = j.get('prompt_mode', 'N/A')\n",
    "            pid = j.get('prompt_id', -2)\n",
    "            model_path = j.get('model_path', None)\n",
    "        else:\n",
    "            try:\n",
    "                del j['lm_prompt_hist']\n",
    "                del j['lm_answer_hist']\n",
    "            except:\n",
    "                pass\n",
    "            prop.append(j)\n",
    "\n",
    "    for i in prop:\n",
    "        i['level'] = f\"L{levels.index(i['level']) + 1}\"\n",
    "        if pid < 0:\n",
    "            # these methods use random user_prompt for action generation (default setting reported in paper)\n",
    "            i['method'] = model_name\n",
    "        elif pid < 15:\n",
    "            # these methods use a fixed user_prompt for action generation\n",
    "            i['method'] = model_name + f'_prompt{pid:03d}'\n",
    "        else:\n",
    "            # these methods omit the user_prompt for action generation\n",
    "            i['method'] = model_name + '_no_prompt'\n",
    "        i['prompt_mode'] = pm\n",
    "    result.extend(prop)\n",
    "\n",
    "\n",
    "df = pd.DataFrame(result).fillna('')\n",
    "display(df.head())"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2dbb1e86-8957-467d-8dcd-9b38024cd627",
   "metadata": {},
   "source": [
    "## Show the success rate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "c8012f26-ac97-4ae9-aebe-5cb2bad36d3e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Please note that results for L4 are not valid because there is no rotation data when the end effector is a spatula.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>level</th>\n",
       "      <th>L1</th>\n",
       "      <th>L2</th>\n",
       "      <th>L3</th>\n",
       "      <th>L4</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>method</th>\n",
       "      <th>prompt_mode</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxB-VIMA-80k</th>\n",
       "      <th>hso</th>\n",
       "      <td>234 / 260 (90.0%)</td>\n",
       "      <td>229 / 260 (88.1%)</td>\n",
       "      <td>190 / 240 (79.2%)</td>\n",
       "      <td>27 / 80 (33.8%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxB-VIMA-80k_no_prompt</th>\n",
       "      <th>hso</th>\n",
       "      <td>231 / 260 (88.8%)</td>\n",
       "      <td>227 / 260 (87.3%)</td>\n",
       "      <td>187 / 240 (77.9%)</td>\n",
       "      <td>28 / 80 (35.0%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxB-VIMA-80k_prompt000</th>\n",
       "      <th>hso</th>\n",
       "      <td>229 / 260 (88.1%)</td>\n",
       "      <td>227 / 260 (87.3%)</td>\n",
       "      <td>189 / 240 (78.8%)</td>\n",
       "      <td>28 / 80 (35.0%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxB-VIMA-80k_prompt001</th>\n",
       "      <th>hso</th>\n",
       "      <td>235 / 260 (90.4%)</td>\n",
       "      <td>230 / 260 (88.5%)</td>\n",
       "      <td>191 / 240 (79.6%)</td>\n",
       "      <td>31 / 80 (38.8%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxB-VIMA-80k_prompt002</th>\n",
       "      <th>hso</th>\n",
       "      <td>232 / 260 (89.2%)</td>\n",
       "      <td>232 / 260 (89.2%)</td>\n",
       "      <td>190 / 240 (79.2%)</td>\n",
       "      <td>29 / 80 (36.2%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxB-VIMA-80k_prompt003</th>\n",
       "      <th>hso</th>\n",
       "      <td>231 / 260 (88.8%)</td>\n",
       "      <td>229 / 260 (88.1%)</td>\n",
       "      <td>190 / 240 (79.2%)</td>\n",
       "      <td>28 / 80 (35.0%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxB-VIMA-80k_prompt004</th>\n",
       "      <th>hso</th>\n",
       "      <td>235 / 260 (90.4%)</td>\n",
       "      <td>229 / 260 (88.1%)</td>\n",
       "      <td>191 / 240 (79.6%)</td>\n",
       "      <td>27 / 80 (33.8%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxB-VIMA-80k_prompt005</th>\n",
       "      <th>hso</th>\n",
       "      <td>232 / 260 (89.2%)</td>\n",
       "      <td>232 / 260 (89.2%)</td>\n",
       "      <td>189 / 240 (78.8%)</td>\n",
       "      <td>27 / 80 (33.8%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxB-VIMA-80k_prompt006</th>\n",
       "      <th>hso</th>\n",
       "      <td>230 / 260 (88.5%)</td>\n",
       "      <td>226 / 260 (86.9%)</td>\n",
       "      <td>191 / 240 (79.6%)</td>\n",
       "      <td>29 / 80 (36.2%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxB-VIMA-80k_prompt007</th>\n",
       "      <th>hso</th>\n",
       "      <td>231 / 260 (88.8%)</td>\n",
       "      <td>228 / 260 (87.7%)</td>\n",
       "      <td>192 / 240 (80.0%)</td>\n",
       "      <td>29 / 80 (36.2%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxB-VIMA-80k_prompt008</th>\n",
       "      <th>hso</th>\n",
       "      <td>233 / 260 (89.6%)</td>\n",
       "      <td>230 / 260 (88.5%)</td>\n",
       "      <td>192 / 240 (80.0%)</td>\n",
       "      <td>31 / 80 (38.8%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxB-VIMA-80k_prompt009</th>\n",
       "      <th>hso</th>\n",
       "      <td>236 / 260 (90.8%)</td>\n",
       "      <td>228 / 260 (87.7%)</td>\n",
       "      <td>189 / 240 (78.8%)</td>\n",
       "      <td>31 / 80 (38.8%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxB-VIMA-80k_prompt010</th>\n",
       "      <th>hso</th>\n",
       "      <td>230 / 260 (88.5%)</td>\n",
       "      <td>227 / 260 (87.3%)</td>\n",
       "      <td>192 / 240 (80.0%)</td>\n",
       "      <td>29 / 80 (36.2%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxB-VIMA-80k_prompt011</th>\n",
       "      <th>hso</th>\n",
       "      <td>235 / 260 (90.4%)</td>\n",
       "      <td>228 / 260 (87.7%)</td>\n",
       "      <td>190 / 240 (79.2%)</td>\n",
       "      <td>30 / 80 (37.5%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxB-VIMA-80k_prompt012</th>\n",
       "      <th>hso</th>\n",
       "      <td>235 / 260 (90.4%)</td>\n",
       "      <td>229 / 260 (88.1%)</td>\n",
       "      <td>188 / 240 (78.3%)</td>\n",
       "      <td>31 / 80 (38.8%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxB-VIMA-80k_prompt013</th>\n",
       "      <th>hso</th>\n",
       "      <td>229 / 260 (88.1%)</td>\n",
       "      <td>229 / 260 (88.1%)</td>\n",
       "      <td>190 / 240 (79.2%)</td>\n",
       "      <td>28 / 80 (35.0%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxB-VIMA-80k_prompt014</th>\n",
       "      <th>hso</th>\n",
       "      <td>236 / 260 (90.8%)</td>\n",
       "      <td>230 / 260 (88.5%)</td>\n",
       "      <td>189 / 240 (78.8%)</td>\n",
       "      <td>28 / 80 (35.0%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxD-VIMA-80k</th>\n",
       "      <th>hso</th>\n",
       "      <td>218 / 260 (83.8%)</td>\n",
       "      <td>229 / 260 (88.1%)</td>\n",
       "      <td>189 / 240 (78.8%)</td>\n",
       "      <td>25 / 80 (31.2%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxD-VIMA-80k_no_prompt</th>\n",
       "      <th>hso</th>\n",
       "      <td>232 / 260 (89.2%)</td>\n",
       "      <td>232 / 260 (89.2%)</td>\n",
       "      <td>199 / 240 (82.9%)</td>\n",
       "      <td>25 / 80 (31.2%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxD-VIMA-80k_prompt000</th>\n",
       "      <th>hso</th>\n",
       "      <td>221 / 260 (85.0%)</td>\n",
       "      <td>223 / 260 (85.8%)</td>\n",
       "      <td>190 / 240 (79.2%)</td>\n",
       "      <td>22 / 80 (27.5%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxD-VIMA-80k_prompt001</th>\n",
       "      <th>hso</th>\n",
       "      <td>221 / 260 (85.0%)</td>\n",
       "      <td>229 / 260 (88.1%)</td>\n",
       "      <td>191 / 240 (79.6%)</td>\n",
       "      <td>27 / 80 (33.8%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxD-VIMA-80k_prompt002</th>\n",
       "      <th>hso</th>\n",
       "      <td>219 / 260 (84.2%)</td>\n",
       "      <td>223 / 260 (85.8%)</td>\n",
       "      <td>189 / 240 (78.8%)</td>\n",
       "      <td>23 / 80 (28.7%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxD-VIMA-80k_prompt003</th>\n",
       "      <th>hso</th>\n",
       "      <td>222 / 260 (85.4%)</td>\n",
       "      <td>223 / 260 (85.8%)</td>\n",
       "      <td>187 / 240 (77.9%)</td>\n",
       "      <td>22 / 80 (27.5%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxD-VIMA-80k_prompt004</th>\n",
       "      <th>hso</th>\n",
       "      <td>219 / 260 (84.2%)</td>\n",
       "      <td>224 / 260 (86.2%)</td>\n",
       "      <td>186 / 240 (77.5%)</td>\n",
       "      <td>24 / 80 (30.0%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxD-VIMA-80k_prompt005</th>\n",
       "      <th>hso</th>\n",
       "      <td>221 / 260 (85.0%)</td>\n",
       "      <td>223 / 260 (85.8%)</td>\n",
       "      <td>190 / 240 (79.2%)</td>\n",
       "      <td>23 / 80 (28.7%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxD-VIMA-80k_prompt006</th>\n",
       "      <th>hso</th>\n",
       "      <td>222 / 260 (85.4%)</td>\n",
       "      <td>223 / 260 (85.8%)</td>\n",
       "      <td>186 / 240 (77.5%)</td>\n",
       "      <td>23 / 80 (28.7%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxD-VIMA-80k_prompt007</th>\n",
       "      <th>hso</th>\n",
       "      <td>218 / 260 (83.8%)</td>\n",
       "      <td>226 / 260 (86.9%)</td>\n",
       "      <td>187 / 240 (77.9%)</td>\n",
       "      <td>23 / 80 (28.7%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxD-VIMA-80k_prompt008</th>\n",
       "      <th>hso</th>\n",
       "      <td>220 / 260 (84.6%)</td>\n",
       "      <td>225 / 260 (86.5%)</td>\n",
       "      <td>186 / 240 (77.5%)</td>\n",
       "      <td>23 / 80 (28.7%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxD-VIMA-80k_prompt009</th>\n",
       "      <th>hso</th>\n",
       "      <td>219 / 260 (84.2%)</td>\n",
       "      <td>225 / 260 (86.5%)</td>\n",
       "      <td>187 / 240 (77.9%)</td>\n",
       "      <td>25 / 80 (31.2%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxD-VIMA-80k_prompt010</th>\n",
       "      <th>hso</th>\n",
       "      <td>220 / 260 (84.6%)</td>\n",
       "      <td>224 / 260 (86.2%)</td>\n",
       "      <td>184 / 240 (76.7%)</td>\n",
       "      <td>24 / 80 (30.0%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxD-VIMA-80k_prompt011</th>\n",
       "      <th>hso</th>\n",
       "      <td>222 / 260 (85.4%)</td>\n",
       "      <td>229 / 260 (88.1%)</td>\n",
       "      <td>191 / 240 (79.6%)</td>\n",
       "      <td>26 / 80 (32.5%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxD-VIMA-80k_prompt012</th>\n",
       "      <th>hso</th>\n",
       "      <td>225 / 260 (86.5%)</td>\n",
       "      <td>229 / 260 (88.1%)</td>\n",
       "      <td>191 / 240 (79.6%)</td>\n",
       "      <td>26 / 80 (32.5%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxD-VIMA-80k_prompt013</th>\n",
       "      <th>hso</th>\n",
       "      <td>223 / 260 (85.8%)</td>\n",
       "      <td>223 / 260 (85.8%)</td>\n",
       "      <td>186 / 240 (77.5%)</td>\n",
       "      <td>23 / 80 (28.7%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-AuxD-VIMA-80k_prompt014</th>\n",
       "      <th>hso</th>\n",
       "      <td>217 / 260 (83.5%)</td>\n",
       "      <td>222 / 260 (85.4%)</td>\n",
       "      <td>186 / 240 (77.5%)</td>\n",
       "      <td>25 / 80 (31.2%)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D-inBC-VIMA-80k</th>\n",
       "      <th>hso</th>\n",
       "      <td>227 / 260 (87.3%)</td>\n",
       "      <td>222 / 260 (85.4%)</td>\n",
       "      <td>197 / 240 (82.1%)</td>\n",
       "      <td>20 / 80 (25.0%)</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "level                                                      L1  \\\n",
       "method                         prompt_mode                      \n",
       "D-inBC-AuxB-VIMA-80k           hso          234 / 260 (90.0%)   \n",
       "D-inBC-AuxB-VIMA-80k_no_prompt hso          231 / 260 (88.8%)   \n",
       "D-inBC-AuxB-VIMA-80k_prompt000 hso          229 / 260 (88.1%)   \n",
       "D-inBC-AuxB-VIMA-80k_prompt001 hso          235 / 260 (90.4%)   \n",
       "D-inBC-AuxB-VIMA-80k_prompt002 hso          232 / 260 (89.2%)   \n",
       "D-inBC-AuxB-VIMA-80k_prompt003 hso          231 / 260 (88.8%)   \n",
       "D-inBC-AuxB-VIMA-80k_prompt004 hso          235 / 260 (90.4%)   \n",
       "D-inBC-AuxB-VIMA-80k_prompt005 hso          232 / 260 (89.2%)   \n",
       "D-inBC-AuxB-VIMA-80k_prompt006 hso          230 / 260 (88.5%)   \n",
       "D-inBC-AuxB-VIMA-80k_prompt007 hso          231 / 260 (88.8%)   \n",
       "D-inBC-AuxB-VIMA-80k_prompt008 hso          233 / 260 (89.6%)   \n",
       "D-inBC-AuxB-VIMA-80k_prompt009 hso          236 / 260 (90.8%)   \n",
       "D-inBC-AuxB-VIMA-80k_prompt010 hso          230 / 260 (88.5%)   \n",
       "D-inBC-AuxB-VIMA-80k_prompt011 hso          235 / 260 (90.4%)   \n",
       "D-inBC-AuxB-VIMA-80k_prompt012 hso          235 / 260 (90.4%)   \n",
       "D-inBC-AuxB-VIMA-80k_prompt013 hso          229 / 260 (88.1%)   \n",
       "D-inBC-AuxB-VIMA-80k_prompt014 hso          236 / 260 (90.8%)   \n",
       "D-inBC-AuxD-VIMA-80k           hso          218 / 260 (83.8%)   \n",
       "D-inBC-AuxD-VIMA-80k_no_prompt hso          232 / 260 (89.2%)   \n",
       "D-inBC-AuxD-VIMA-80k_prompt000 hso          221 / 260 (85.0%)   \n",
       "D-inBC-AuxD-VIMA-80k_prompt001 hso          221 / 260 (85.0%)   \n",
       "D-inBC-AuxD-VIMA-80k_prompt002 hso          219 / 260 (84.2%)   \n",
       "D-inBC-AuxD-VIMA-80k_prompt003 hso          222 / 260 (85.4%)   \n",
       "D-inBC-AuxD-VIMA-80k_prompt004 hso          219 / 260 (84.2%)   \n",
       "D-inBC-AuxD-VIMA-80k_prompt005 hso          221 / 260 (85.0%)   \n",
       "D-inBC-AuxD-VIMA-80k_prompt006 hso          222 / 260 (85.4%)   \n",
       "D-inBC-AuxD-VIMA-80k_prompt007 hso          218 / 260 (83.8%)   \n",
       "D-inBC-AuxD-VIMA-80k_prompt008 hso          220 / 260 (84.6%)   \n",
       "D-inBC-AuxD-VIMA-80k_prompt009 hso          219 / 260 (84.2%)   \n",
       "D-inBC-AuxD-VIMA-80k_prompt010 hso          220 / 260 (84.6%)   \n",
       "D-inBC-AuxD-VIMA-80k_prompt011 hso          222 / 260 (85.4%)   \n",
       "D-inBC-AuxD-VIMA-80k_prompt012 hso          225 / 260 (86.5%)   \n",
       "D-inBC-AuxD-VIMA-80k_prompt013 hso          223 / 260 (85.8%)   \n",
       "D-inBC-AuxD-VIMA-80k_prompt014 hso          217 / 260 (83.5%)   \n",
       "D-inBC-VIMA-80k                hso          227 / 260 (87.3%)   \n",
       "\n",
       "level                                                      L2  \\\n",
       "method                         prompt_mode                      \n",
       "D-inBC-AuxB-VIMA-80k           hso          229 / 260 (88.1%)   \n",
       "D-inBC-AuxB-VIMA-80k_no_prompt hso          227 / 260 (87.3%)   \n",
       "D-inBC-AuxB-VIMA-80k_prompt000 hso          227 / 260 (87.3%)   \n",
       "D-inBC-AuxB-VIMA-80k_prompt001 hso          230 / 260 (88.5%)   \n",
       "D-inBC-AuxB-VIMA-80k_prompt002 hso          232 / 260 (89.2%)   \n",
       "D-inBC-AuxB-VIMA-80k_prompt003 hso          229 / 260 (88.1%)   \n",
       "D-inBC-AuxB-VIMA-80k_prompt004 hso          229 / 260 (88.1%)   \n",
       "D-inBC-AuxB-VIMA-80k_prompt005 hso          232 / 260 (89.2%)   \n",
       "D-inBC-AuxB-VIMA-80k_prompt006 hso          226 / 260 (86.9%)   \n",
       "D-inBC-AuxB-VIMA-80k_prompt007 hso          228 / 260 (87.7%)   \n",
       "D-inBC-AuxB-VIMA-80k_prompt008 hso          230 / 260 (88.5%)   \n",
       "D-inBC-AuxB-VIMA-80k_prompt009 hso          228 / 260 (87.7%)   \n",
       "D-inBC-AuxB-VIMA-80k_prompt010 hso          227 / 260 (87.3%)   \n",
       "D-inBC-AuxB-VIMA-80k_prompt011 hso          228 / 260 (87.7%)   \n",
       "D-inBC-AuxB-VIMA-80k_prompt012 hso          229 / 260 (88.1%)   \n",
       "D-inBC-AuxB-VIMA-80k_prompt013 hso          229 / 260 (88.1%)   \n",
       "D-inBC-AuxB-VIMA-80k_prompt014 hso          230 / 260 (88.5%)   \n",
       "D-inBC-AuxD-VIMA-80k           hso          229 / 260 (88.1%)   \n",
       "D-inBC-AuxD-VIMA-80k_no_prompt hso          232 / 260 (89.2%)   \n",
       "D-inBC-AuxD-VIMA-80k_prompt000 hso          223 / 260 (85.8%)   \n",
       "D-inBC-AuxD-VIMA-80k_prompt001 hso          229 / 260 (88.1%)   \n",
       "D-inBC-AuxD-VIMA-80k_prompt002 hso          223 / 260 (85.8%)   \n",
       "D-inBC-AuxD-VIMA-80k_prompt003 hso          223 / 260 (85.8%)   \n",
       "D-inBC-AuxD-VIMA-80k_prompt004 hso          224 / 260 (86.2%)   \n",
       "D-inBC-AuxD-VIMA-80k_prompt005 hso          223 / 260 (85.8%)   \n",
       "D-inBC-AuxD-VIMA-80k_prompt006 hso          223 / 260 (85.8%)   \n",
       "D-inBC-AuxD-VIMA-80k_prompt007 hso          226 / 260 (86.9%)   \n",
       "D-inBC-AuxD-VIMA-80k_prompt008 hso          225 / 260 (86.5%)   \n",
       "D-inBC-AuxD-VIMA-80k_prompt009 hso          225 / 260 (86.5%)   \n",
       "D-inBC-AuxD-VIMA-80k_prompt010 hso          224 / 260 (86.2%)   \n",
       "D-inBC-AuxD-VIMA-80k_prompt011 hso          229 / 260 (88.1%)   \n",
       "D-inBC-AuxD-VIMA-80k_prompt012 hso          229 / 260 (88.1%)   \n",
       "D-inBC-AuxD-VIMA-80k_prompt013 hso          223 / 260 (85.8%)   \n",
       "D-inBC-AuxD-VIMA-80k_prompt014 hso          222 / 260 (85.4%)   \n",
       "D-inBC-VIMA-80k                hso          222 / 260 (85.4%)   \n",
       "\n",
       "level                                                      L3               L4  \n",
       "method                         prompt_mode                                      \n",
       "D-inBC-AuxB-VIMA-80k           hso          190 / 240 (79.2%)  27 / 80 (33.8%)  \n",
       "D-inBC-AuxB-VIMA-80k_no_prompt hso          187 / 240 (77.9%)  28 / 80 (35.0%)  \n",
       "D-inBC-AuxB-VIMA-80k_prompt000 hso          189 / 240 (78.8%)  28 / 80 (35.0%)  \n",
       "D-inBC-AuxB-VIMA-80k_prompt001 hso          191 / 240 (79.6%)  31 / 80 (38.8%)  \n",
       "D-inBC-AuxB-VIMA-80k_prompt002 hso          190 / 240 (79.2%)  29 / 80 (36.2%)  \n",
       "D-inBC-AuxB-VIMA-80k_prompt003 hso          190 / 240 (79.2%)  28 / 80 (35.0%)  \n",
       "D-inBC-AuxB-VIMA-80k_prompt004 hso          191 / 240 (79.6%)  27 / 80 (33.8%)  \n",
       "D-inBC-AuxB-VIMA-80k_prompt005 hso          189 / 240 (78.8%)  27 / 80 (33.8%)  \n",
       "D-inBC-AuxB-VIMA-80k_prompt006 hso          191 / 240 (79.6%)  29 / 80 (36.2%)  \n",
       "D-inBC-AuxB-VIMA-80k_prompt007 hso          192 / 240 (80.0%)  29 / 80 (36.2%)  \n",
       "D-inBC-AuxB-VIMA-80k_prompt008 hso          192 / 240 (80.0%)  31 / 80 (38.8%)  \n",
       "D-inBC-AuxB-VIMA-80k_prompt009 hso          189 / 240 (78.8%)  31 / 80 (38.8%)  \n",
       "D-inBC-AuxB-VIMA-80k_prompt010 hso          192 / 240 (80.0%)  29 / 80 (36.2%)  \n",
       "D-inBC-AuxB-VIMA-80k_prompt011 hso          190 / 240 (79.2%)  30 / 80 (37.5%)  \n",
       "D-inBC-AuxB-VIMA-80k_prompt012 hso          188 / 240 (78.3%)  31 / 80 (38.8%)  \n",
       "D-inBC-AuxB-VIMA-80k_prompt013 hso          190 / 240 (79.2%)  28 / 80 (35.0%)  \n",
       "D-inBC-AuxB-VIMA-80k_prompt014 hso          189 / 240 (78.8%)  28 / 80 (35.0%)  \n",
       "D-inBC-AuxD-VIMA-80k           hso          189 / 240 (78.8%)  25 / 80 (31.2%)  \n",
       "D-inBC-AuxD-VIMA-80k_no_prompt hso          199 / 240 (82.9%)  25 / 80 (31.2%)  \n",
       "D-inBC-AuxD-VIMA-80k_prompt000 hso          190 / 240 (79.2%)  22 / 80 (27.5%)  \n",
       "D-inBC-AuxD-VIMA-80k_prompt001 hso          191 / 240 (79.6%)  27 / 80 (33.8%)  \n",
       "D-inBC-AuxD-VIMA-80k_prompt002 hso          189 / 240 (78.8%)  23 / 80 (28.7%)  \n",
       "D-inBC-AuxD-VIMA-80k_prompt003 hso          187 / 240 (77.9%)  22 / 80 (27.5%)  \n",
       "D-inBC-AuxD-VIMA-80k_prompt004 hso          186 / 240 (77.5%)  24 / 80 (30.0%)  \n",
       "D-inBC-AuxD-VIMA-80k_prompt005 hso          190 / 240 (79.2%)  23 / 80 (28.7%)  \n",
       "D-inBC-AuxD-VIMA-80k_prompt006 hso          186 / 240 (77.5%)  23 / 80 (28.7%)  \n",
       "D-inBC-AuxD-VIMA-80k_prompt007 hso          187 / 240 (77.9%)  23 / 80 (28.7%)  \n",
       "D-inBC-AuxD-VIMA-80k_prompt008 hso          186 / 240 (77.5%)  23 / 80 (28.7%)  \n",
       "D-inBC-AuxD-VIMA-80k_prompt009 hso          187 / 240 (77.9%)  25 / 80 (31.2%)  \n",
       "D-inBC-AuxD-VIMA-80k_prompt010 hso          184 / 240 (76.7%)  24 / 80 (30.0%)  \n",
       "D-inBC-AuxD-VIMA-80k_prompt011 hso          191 / 240 (79.6%)  26 / 80 (32.5%)  \n",
       "D-inBC-AuxD-VIMA-80k_prompt012 hso          191 / 240 (79.6%)  26 / 80 (32.5%)  \n",
       "D-inBC-AuxD-VIMA-80k_prompt013 hso          186 / 240 (77.5%)  23 / 80 (28.7%)  \n",
       "D-inBC-AuxD-VIMA-80k_prompt014 hso          186 / 240 (77.5%)  25 / 80 (31.2%)  \n",
       "D-inBC-VIMA-80k                hso          197 / 240 (82.1%)  20 / 80 (25.0%)  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "def show_results(data, data_total):\n",
    "    # Pivot the table\n",
    "    grouped = data.groupby(['method', 'prompt_mode', 'level']).size().unstack(fill_value=0)\n",
    "    total = data_total.groupby(['method', 'prompt_mode', 'level']).size().unstack(fill_value=0)\n",
    "    \n",
    "    # Create a new dataframe with the desired text format\n",
    "    result = total.copy().astype(str)  # Copy the structure of table1\n",
    "    for col in total.columns:\n",
    "        for idx in total.index:\n",
    "            if total.loc[idx, col] > 0:\n",
    "                try:\n",
    "                    result.loc[idx, col] = f\"{grouped.loc[idx, col]} / {total.loc[idx, col]} ({grouped.loc[idx, col] / total.loc[idx, col] * 100:.1f}%)\"\n",
    "                except KeyError:\n",
    "                    result.loc[idx, col] = f\"0 / {total.loc[idx, col]} (0.0%)\"\n",
    "            else:\n",
    "                result.loc[idx, col] = 'N/A'\n",
    "    \n",
    "    display(result)\n",
    "    \n",
    "print('Please note that results for L4 are not valid because there is no rotation data when the end effector is a spatula.')\n",
    "show_results(df[df['success']], df)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "sel",
   "language": "python",
   "name": "sel"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
