{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "da69f1aa",
   "metadata": {},
   "outputs": [],
   "source": [
    "from pathlib import Path\n",
    "\n",
    "import pandas as pd\n",
    "import plotly.express as px\n",
    "import plotly.graph_objects as go\n",
    "from plotly.subplots import make_subplots\n",
    "\n",
    "from misc import data_config, model_config"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "a7c197f5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>model_name</th>\n",
       "      <th>model_abbr</th>\n",
       "      <th>model_type</th>\n",
       "      <th>model_num_params</th>\n",
       "      <th>model_num_layers</th>\n",
       "      <th>model_num_experts</th>\n",
       "      <th>model_top_k</th>\n",
       "      <th>model_attn</th>\n",
       "      <th>model_group</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>key</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>llamamoe</th>\n",
       "      <td>LLaMA-MoE-v1</td>\n",
       "      <td>LL1</td>\n",
       "      <td>causal</td>\n",
       "      <td>6.74</td>\n",
       "      <td>32</td>\n",
       "      <td>16</td>\n",
       "      <td>4</td>\n",
       "      <td>eager</td>\n",
       "      <td>llamamoe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>llamamoes</th>\n",
       "      <td>LLaMA-MoE-v1-SFT</td>\n",
       "      <td>LL1-S</td>\n",
       "      <td>causal</td>\n",
       "      <td>6.74</td>\n",
       "      <td>32</td>\n",
       "      <td>16</td>\n",
       "      <td>4</td>\n",
       "      <td>eager</td>\n",
       "      <td>llamamoe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>olmoe</th>\n",
       "      <td>OLMoE</td>\n",
       "      <td>OL</td>\n",
       "      <td>causal</td>\n",
       "      <td>6.92</td>\n",
       "      <td>16</td>\n",
       "      <td>64</td>\n",
       "      <td>8</td>\n",
       "      <td>flash_attention_2</td>\n",
       "      <td>olmoe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>olmoesft</th>\n",
       "      <td>OLMoE-SFT</td>\n",
       "      <td>OL-S</td>\n",
       "      <td>causal</td>\n",
       "      <td>6.92</td>\n",
       "      <td>16</td>\n",
       "      <td>64</td>\n",
       "      <td>8</td>\n",
       "      <td>flash_attention_2</td>\n",
       "      <td>olmoe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>olmoedpo</th>\n",
       "      <td>OLMoE-DPO</td>\n",
       "      <td>OL-D</td>\n",
       "      <td>causal</td>\n",
       "      <td>6.92</td>\n",
       "      <td>16</td>\n",
       "      <td>64</td>\n",
       "      <td>8</td>\n",
       "      <td>flash_attention_2</td>\n",
       "      <td>olmoe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>olmoeins</th>\n",
       "      <td>OLMoE-Instruct</td>\n",
       "      <td>OL-I</td>\n",
       "      <td>causal</td>\n",
       "      <td>6.92</td>\n",
       "      <td>16</td>\n",
       "      <td>64</td>\n",
       "      <td>8</td>\n",
       "      <td>flash_attention_2</td>\n",
       "      <td>olmoe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>jetmoe</th>\n",
       "      <td>JetMoE</td>\n",
       "      <td>JT</td>\n",
       "      <td>causal</td>\n",
       "      <td>8.52</td>\n",
       "      <td>24</td>\n",
       "      <td>8</td>\n",
       "      <td>2</td>\n",
       "      <td>flash_attention_2</td>\n",
       "      <td>jetmoe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>jetmoesft</th>\n",
       "      <td>JetMoE-SFT</td>\n",
       "      <td>JT-S</td>\n",
       "      <td>causal</td>\n",
       "      <td>8.52</td>\n",
       "      <td>24</td>\n",
       "      <td>8</td>\n",
       "      <td>2</td>\n",
       "      <td>flash_attention_2</td>\n",
       "      <td>jetmoe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>jetmoechat</th>\n",
       "      <td>JetMoE-Chat</td>\n",
       "      <td>JT-C</td>\n",
       "      <td>causal</td>\n",
       "      <td>8.52</td>\n",
       "      <td>24</td>\n",
       "      <td>8</td>\n",
       "      <td>2</td>\n",
       "      <td>flash_attention_2</td>\n",
       "      <td>jetmoe</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  model_name model_abbr model_type  model_num_params  \\\n",
       "key                                                                    \n",
       "llamamoe        LLaMA-MoE-v1        LL1     causal              6.74   \n",
       "llamamoes   LLaMA-MoE-v1-SFT      LL1-S     causal              6.74   \n",
       "olmoe                  OLMoE         OL     causal              6.92   \n",
       "olmoesft           OLMoE-SFT       OL-S     causal              6.92   \n",
       "olmoedpo           OLMoE-DPO       OL-D     causal              6.92   \n",
       "olmoeins      OLMoE-Instruct       OL-I     causal              6.92   \n",
       "jetmoe                JetMoE         JT     causal              8.52   \n",
       "jetmoesft         JetMoE-SFT       JT-S     causal              8.52   \n",
       "jetmoechat       JetMoE-Chat       JT-C     causal              8.52   \n",
       "\n",
       "            model_num_layers  model_num_experts  model_top_k  \\\n",
       "key                                                            \n",
       "llamamoe                  32                 16            4   \n",
       "llamamoes                 32                 16            4   \n",
       "olmoe                     16                 64            8   \n",
       "olmoesft                  16                 64            8   \n",
       "olmoedpo                  16                 64            8   \n",
       "olmoeins                  16                 64            8   \n",
       "jetmoe                    24                  8            2   \n",
       "jetmoesft                 24                  8            2   \n",
       "jetmoechat                24                  8            2   \n",
       "\n",
       "                   model_attn model_group  \n",
       "key                                        \n",
       "llamamoe                eager    llamamoe  \n",
       "llamamoes               eager    llamamoe  \n",
       "olmoe       flash_attention_2       olmoe  \n",
       "olmoesft    flash_attention_2       olmoe  \n",
       "olmoedpo    flash_attention_2       olmoe  \n",
       "olmoeins    flash_attention_2       olmoe  \n",
       "jetmoe      flash_attention_2      jetmoe  \n",
       "jetmoesft   flash_attention_2      jetmoe  \n",
       "jetmoechat  flash_attention_2      jetmoe  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "main_model_config = (\n",
    "    model_config.query(\"main\")\n",
    "    .drop(columns=\"main\")\n",
    "    .rename(columns={k: f\"model_{k}\" for k in model_config.columns})\n",
    ")\n",
    "\n",
    "model_colors = {\n",
    "    key: px.colors.qualitative.Dark24[i] for i, key in enumerate(main_model_config.index.values)\n",
    "}\n",
    "\n",
    "cmp_groups = {\n",
    "    \"llamamoe\": [\"llamamoe\", \"llamamoes\"],\n",
    "    \"olmoe\": [\"olmoe\", \"olmoesft\", \"olmoedpo\", \"olmoeins\"],\n",
    "    \"jetmoe\": [\"jetmoe\", \"jetmoesft\", \"jetmoechat\"],\n",
    "}\n",
    "\n",
    "new_name = {\n",
    "    \"llamamoe\": \"LLaMA-MoE-v1\",\n",
    "    \"llamamoes\": \"LLaMA-MoE-v1-SFT\",\n",
    "    \"olmoe\": \"OLMoE\",\n",
    "    \"olmoesft\": \"OLMoE-SFT\",\n",
    "    \"olmoedpo\": \"OLMoE-DPO\",\n",
    "    \"olmoeins\": \"OLMoE-Instruct\",\n",
    "    \"jetmoe\": \"JetMoE\",\n",
    "    \"jetmoesft\": \"JetMoE-SFT\",\n",
    "    \"jetmoechat\": \"JetMoE-Chat\",\n",
    "}\n",
    "\n",
    "cmp_keys = [v for vs in cmp_groups.values() for v in vs]\n",
    "\n",
    "cmp_model_config = (\n",
    "    model_config.loc[cmp_keys]\n",
    "    .drop(columns=\"main\")\n",
    "    .rename(columns={k: f\"model_{k}\" for k in model_config.columns})\n",
    ")\n",
    "\n",
    "for k, vs in cmp_groups.items():\n",
    "    for v in vs:\n",
    "        cmp_model_config.loc[v, \"model_group\"] = k\n",
    "        cmp_model_config.loc[v, \"model_name\"] = new_name[v]\n",
    "\n",
    "cmp_model_config[\"model_group\"] = cmp_model_config[\"model_group\"].astype(model_config.index.dtype)\n",
    "cmp_model_config"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "05ed4da3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>data_name</th>\n",
       "      <th>data_abbr</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>key</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>c4</th>\n",
       "      <td>C4</td>\n",
       "      <td>C4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cc2306</th>\n",
       "      <td>CommonCrawl</td>\n",
       "      <td>CC</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>book</th>\n",
       "      <td>Books</td>\n",
       "      <td>BK</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>wikipedia</th>\n",
       "      <td>Wikipedia</td>\n",
       "      <td>WK</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>arxiv</th>\n",
       "      <td>ArXiv</td>\n",
       "      <td>AX</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>stackexchange</th>\n",
       "      <td>StackExchange</td>\n",
       "      <td>SE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>github</th>\n",
       "      <td>GitHub</td>\n",
       "      <td>GH</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>lmarena</th>\n",
       "      <td>LMArena</td>\n",
       "      <td>LM</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>math</th>\n",
       "      <td>OpenMath</td>\n",
       "      <td>OM</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>code</th>\n",
       "      <td>OpenCode</td>\n",
       "      <td>OC</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>science</th>\n",
       "      <td>OpenScience</td>\n",
       "      <td>OS</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   data_name data_abbr\n",
       "key                                   \n",
       "c4                        C4        C4\n",
       "cc2306           CommonCrawl        CC\n",
       "book                   Books        BK\n",
       "wikipedia          Wikipedia        WK\n",
       "arxiv                  ArXiv        AX\n",
       "stackexchange  StackExchange        SE\n",
       "github                GitHub        GH\n",
       "lmarena              LMArena        LM\n",
       "math                OpenMath        OM\n",
       "code                OpenCode        OC\n",
       "science          OpenScience        OS"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "main_data_config = data_config.rename(columns={k: f\"data_{k}\" for k in data_config.columns})\n",
    "main_data_config"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0539e947",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th colspan=\"4\" halign=\"left\">best_f1</th>\n",
       "      <th colspan=\"4\" halign=\"left\">best_m</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>seg_len</th>\n",
       "      <th>4</th>\n",
       "      <th>16</th>\n",
       "      <th>64</th>\n",
       "      <th>256</th>\n",
       "      <th>4</th>\n",
       "      <th>16</th>\n",
       "      <th>64</th>\n",
       "      <th>256</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>model_group</th>\n",
       "      <th>model_name</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">llamamoe</th>\n",
       "      <th>LLaMA-MoE-v1</th>\n",
       "      <td>0.557779</td>\n",
       "      <td>0.452880</td>\n",
       "      <td>0.416099</td>\n",
       "      <td>0.406180</td>\n",
       "      <td>1.029198</td>\n",
       "      <td>2.392017</td>\n",
       "      <td>2.923047</td>\n",
       "      <td>3.521086</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>LLaMA-MoE-v1-SFT</th>\n",
       "      <td>0.557927</td>\n",
       "      <td>0.452827</td>\n",
       "      <td>0.416027</td>\n",
       "      <td>0.406179</td>\n",
       "      <td>1.028538</td>\n",
       "      <td>2.390347</td>\n",
       "      <td>2.922548</td>\n",
       "      <td>3.520766</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">olmoe</th>\n",
       "      <th>OLMoE</th>\n",
       "      <td>0.646949</td>\n",
       "      <td>0.509072</td>\n",
       "      <td>0.455343</td>\n",
       "      <td>0.426438</td>\n",
       "      <td>0.997344</td>\n",
       "      <td>1.056418</td>\n",
       "      <td>1.205091</td>\n",
       "      <td>1.187848</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OLMoE-DPO</th>\n",
       "      <td>0.650923</td>\n",
       "      <td>0.513726</td>\n",
       "      <td>0.460383</td>\n",
       "      <td>0.432470</td>\n",
       "      <td>1.001774</td>\n",
       "      <td>1.065452</td>\n",
       "      <td>1.221628</td>\n",
       "      <td>1.171647</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OLMoE-Instruct</th>\n",
       "      <td>0.650671</td>\n",
       "      <td>0.513379</td>\n",
       "      <td>0.460033</td>\n",
       "      <td>0.432102</td>\n",
       "      <td>1.001423</td>\n",
       "      <td>1.064703</td>\n",
       "      <td>1.220646</td>\n",
       "      <td>1.170233</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OLMoE-SFT</th>\n",
       "      <td>0.651482</td>\n",
       "      <td>0.514721</td>\n",
       "      <td>0.461546</td>\n",
       "      <td>0.433809</td>\n",
       "      <td>1.002181</td>\n",
       "      <td>1.065605</td>\n",
       "      <td>1.220275</td>\n",
       "      <td>1.170159</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">jetmoe</th>\n",
       "      <th>JetMoE</th>\n",
       "      <td>0.602158</td>\n",
       "      <td>0.474540</td>\n",
       "      <td>0.427762</td>\n",
       "      <td>0.410949</td>\n",
       "      <td>1.093253</td>\n",
       "      <td>2.259037</td>\n",
       "      <td>2.687439</td>\n",
       "      <td>3.154335</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>JetMoE-Chat</th>\n",
       "      <td>0.600186</td>\n",
       "      <td>0.473155</td>\n",
       "      <td>0.426525</td>\n",
       "      <td>0.410049</td>\n",
       "      <td>1.090559</td>\n",
       "      <td>2.264925</td>\n",
       "      <td>2.702572</td>\n",
       "      <td>3.184174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>JetMoE-SFT</th>\n",
       "      <td>0.600133</td>\n",
       "      <td>0.473083</td>\n",
       "      <td>0.426441</td>\n",
       "      <td>0.409977</td>\n",
       "      <td>1.090510</td>\n",
       "      <td>2.265280</td>\n",
       "      <td>2.703776</td>\n",
       "      <td>3.186965</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                               best_f1                                \\\n",
       "seg_len                            4         16        64        256   \n",
       "model_group model_name                                                 \n",
       "llamamoe    LLaMA-MoE-v1      0.557779  0.452880  0.416099  0.406180   \n",
       "            LLaMA-MoE-v1-SFT  0.557927  0.452827  0.416027  0.406179   \n",
       "olmoe       OLMoE             0.646949  0.509072  0.455343  0.426438   \n",
       "            OLMoE-DPO         0.650923  0.513726  0.460383  0.432470   \n",
       "            OLMoE-Instruct    0.650671  0.513379  0.460033  0.432102   \n",
       "            OLMoE-SFT         0.651482  0.514721  0.461546  0.433809   \n",
       "jetmoe      JetMoE            0.602158  0.474540  0.427762  0.410949   \n",
       "            JetMoE-Chat       0.600186  0.473155  0.426525  0.410049   \n",
       "            JetMoE-SFT        0.600133  0.473083  0.426441  0.409977   \n",
       "\n",
       "                                best_m                                \n",
       "seg_len                            4         16        64        256  \n",
       "model_group model_name                                                \n",
       "llamamoe    LLaMA-MoE-v1      1.029198  2.392017  2.923047  3.521086  \n",
       "            LLaMA-MoE-v1-SFT  1.028538  2.390347  2.922548  3.520766  \n",
       "olmoe       OLMoE             0.997344  1.056418  1.205091  1.187848  \n",
       "            OLMoE-DPO         1.001774  1.065452  1.221628  1.171647  \n",
       "            OLMoE-Instruct    1.001423  1.064703  1.220646  1.170233  \n",
       "            OLMoE-SFT         1.002181  1.065605  1.220275  1.170159  \n",
       "jetmoe      JetMoE            1.093253  2.259037  2.687439  3.154335  \n",
       "            JetMoE-Chat       1.090559  2.264925  2.702572  3.184174  \n",
       "            JetMoE-SFT        1.090510  2.265280  2.703776  3.186965  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "root_dir = Path(\"../output/srp_mpq\")\n",
    "\n",
    "dfs = {\n",
    "    p.stem: pd.merge(pd.read_parquet(p), cmp_model_config, left_on=\"model\", right_index=True)\n",
    "    for p in root_dir.glob(\"*.parquet\")\n",
    "}\n",
    "\n",
    "for key in dfs.keys():\n",
    "    if \"dataset\" in dfs[key].columns:\n",
    "        dfs[key] = pd.merge(dfs[key], main_data_config, left_on=\"dataset\", right_index=True)\n",
    "\n",
    "for df in dfs.values():\n",
    "    df[\"model\"] = df[\"model\"].astype(model_config.index.dtype)\n",
    "    if \"dataset\" in df.columns:\n",
    "        df[\"dataset\"] = df[\"dataset\"].astype(data_config.index.dtype)\n",
    "\n",
    "dfs[\"mg\"].pivot(\n",
    "    index=[\"model_group\", \"model_name\"], columns=\"seg_len\", values=[\"best_f1\", \"best_m\"]\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "9bdc647a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>dataset</th>\n",
       "      <th>c4</th>\n",
       "      <th>cc2306</th>\n",
       "      <th>book</th>\n",
       "      <th>wikipedia</th>\n",
       "      <th>arxiv</th>\n",
       "      <th>stackexchange</th>\n",
       "      <th>github</th>\n",
       "      <th>lmarena</th>\n",
       "      <th>math</th>\n",
       "      <th>code</th>\n",
       "      <th>science</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>model_group</th>\n",
       "      <th>model_name</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">llamamoe</th>\n",
       "      <th>LLaMA-MoE-v1</th>\n",
       "      <td>0.450365</td>\n",
       "      <td>0.450565</td>\n",
       "      <td>0.448192</td>\n",
       "      <td>0.454389</td>\n",
       "      <td>0.454172</td>\n",
       "      <td>0.455140</td>\n",
       "      <td>0.456557</td>\n",
       "      <td>0.456482</td>\n",
       "      <td>0.451833</td>\n",
       "      <td>0.452922</td>\n",
       "      <td>0.451186</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>LLaMA-MoE-v1-SFT</th>\n",
       "      <td>0.450131</td>\n",
       "      <td>0.450435</td>\n",
       "      <td>0.448012</td>\n",
       "      <td>0.454107</td>\n",
       "      <td>0.454214</td>\n",
       "      <td>0.455006</td>\n",
       "      <td>0.456492</td>\n",
       "      <td>0.456385</td>\n",
       "      <td>0.452112</td>\n",
       "      <td>0.452437</td>\n",
       "      <td>0.451884</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">olmoe</th>\n",
       "      <th>OLMoE</th>\n",
       "      <td>0.455232</td>\n",
       "      <td>0.454706</td>\n",
       "      <td>0.455410</td>\n",
       "      <td>0.526970</td>\n",
       "      <td>0.535874</td>\n",
       "      <td>0.550433</td>\n",
       "      <td>0.565604</td>\n",
       "      <td>0.523845</td>\n",
       "      <td>0.529720</td>\n",
       "      <td>0.547402</td>\n",
       "      <td>0.488658</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OLMoE-DPO</th>\n",
       "      <td>0.453336</td>\n",
       "      <td>0.453536</td>\n",
       "      <td>0.454904</td>\n",
       "      <td>0.524818</td>\n",
       "      <td>0.545393</td>\n",
       "      <td>0.558625</td>\n",
       "      <td>0.578345</td>\n",
       "      <td>0.523817</td>\n",
       "      <td>0.541753</td>\n",
       "      <td>0.557496</td>\n",
       "      <td>0.492086</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OLMoE-Instruct</th>\n",
       "      <td>0.453118</td>\n",
       "      <td>0.453374</td>\n",
       "      <td>0.454745</td>\n",
       "      <td>0.524696</td>\n",
       "      <td>0.545053</td>\n",
       "      <td>0.558308</td>\n",
       "      <td>0.577966</td>\n",
       "      <td>0.523748</td>\n",
       "      <td>0.541030</td>\n",
       "      <td>0.557106</td>\n",
       "      <td>0.491492</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OLMoE-SFT</th>\n",
       "      <td>0.453791</td>\n",
       "      <td>0.453690</td>\n",
       "      <td>0.454565</td>\n",
       "      <td>0.525979</td>\n",
       "      <td>0.546128</td>\n",
       "      <td>0.559705</td>\n",
       "      <td>0.579066</td>\n",
       "      <td>0.524998</td>\n",
       "      <td>0.543172</td>\n",
       "      <td>0.559711</td>\n",
       "      <td>0.493525</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">jetmoe</th>\n",
       "      <th>JetMoE</th>\n",
       "      <td>0.475049</td>\n",
       "      <td>0.474884</td>\n",
       "      <td>0.470432</td>\n",
       "      <td>0.480482</td>\n",
       "      <td>0.475177</td>\n",
       "      <td>0.475215</td>\n",
       "      <td>0.470728</td>\n",
       "      <td>0.477087</td>\n",
       "      <td>0.473551</td>\n",
       "      <td>0.473441</td>\n",
       "      <td>0.474037</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>JetMoE-Chat</th>\n",
       "      <td>0.473541</td>\n",
       "      <td>0.473573</td>\n",
       "      <td>0.469329</td>\n",
       "      <td>0.479443</td>\n",
       "      <td>0.473990</td>\n",
       "      <td>0.473865</td>\n",
       "      <td>0.469668</td>\n",
       "      <td>0.475743</td>\n",
       "      <td>0.471043</td>\n",
       "      <td>0.472132</td>\n",
       "      <td>0.472529</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>JetMoE-SFT</th>\n",
       "      <td>0.473475</td>\n",
       "      <td>0.473515</td>\n",
       "      <td>0.469279</td>\n",
       "      <td>0.479428</td>\n",
       "      <td>0.473923</td>\n",
       "      <td>0.473807</td>\n",
       "      <td>0.469609</td>\n",
       "      <td>0.475665</td>\n",
       "      <td>0.470966</td>\n",
       "      <td>0.472059</td>\n",
       "      <td>0.472348</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "dataset                             c4    cc2306      book  wikipedia  \\\n",
       "model_group model_name                                                  \n",
       "llamamoe    LLaMA-MoE-v1      0.450365  0.450565  0.448192   0.454389   \n",
       "            LLaMA-MoE-v1-SFT  0.450131  0.450435  0.448012   0.454107   \n",
       "olmoe       OLMoE             0.455232  0.454706  0.455410   0.526970   \n",
       "            OLMoE-DPO         0.453336  0.453536  0.454904   0.524818   \n",
       "            OLMoE-Instruct    0.453118  0.453374  0.454745   0.524696   \n",
       "            OLMoE-SFT         0.453791  0.453690  0.454565   0.525979   \n",
       "jetmoe      JetMoE            0.475049  0.474884  0.470432   0.480482   \n",
       "            JetMoE-Chat       0.473541  0.473573  0.469329   0.479443   \n",
       "            JetMoE-SFT        0.473475  0.473515  0.469279   0.479428   \n",
       "\n",
       "dataset                          arxiv  stackexchange    github   lmarena  \\\n",
       "model_group model_name                                                      \n",
       "llamamoe    LLaMA-MoE-v1      0.454172       0.455140  0.456557  0.456482   \n",
       "            LLaMA-MoE-v1-SFT  0.454214       0.455006  0.456492  0.456385   \n",
       "olmoe       OLMoE             0.535874       0.550433  0.565604  0.523845   \n",
       "            OLMoE-DPO         0.545393       0.558625  0.578345  0.523817   \n",
       "            OLMoE-Instruct    0.545053       0.558308  0.577966  0.523748   \n",
       "            OLMoE-SFT         0.546128       0.559705  0.579066  0.524998   \n",
       "jetmoe      JetMoE            0.475177       0.475215  0.470728  0.477087   \n",
       "            JetMoE-Chat       0.473990       0.473865  0.469668  0.475743   \n",
       "            JetMoE-SFT        0.473923       0.473807  0.469609  0.475665   \n",
       "\n",
       "dataset                           math      code   science  \n",
       "model_group model_name                                      \n",
       "llamamoe    LLaMA-MoE-v1      0.451833  0.452922  0.451186  \n",
       "            LLaMA-MoE-v1-SFT  0.452112  0.452437  0.451884  \n",
       "olmoe       OLMoE             0.529720  0.547402  0.488658  \n",
       "            OLMoE-DPO         0.541753  0.557496  0.492086  \n",
       "            OLMoE-Instruct    0.541030  0.557106  0.491492  \n",
       "            OLMoE-SFT         0.543172  0.559711  0.493525  \n",
       "jetmoe      JetMoE            0.473551  0.473441  0.474037  \n",
       "            JetMoE-Chat       0.471043  0.472132  0.472529  \n",
       "            JetMoE-SFT        0.470966  0.472059  0.472348  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dfs[\"md\"].query(\"seg_len == 16\").pivot(\n",
    "    index=[\"model_group\", \"model_name\"], columns=\"dataset\", values=\"best_f1\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7ded83fa",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>dataset</th>\n",
       "      <th>c4</th>\n",
       "      <th>cc2306</th>\n",
       "      <th>book</th>\n",
       "      <th>wikipedia</th>\n",
       "      <th>arxiv</th>\n",
       "      <th>stackexchange</th>\n",
       "      <th>github</th>\n",
       "      <th>lmarena</th>\n",
       "      <th>math</th>\n",
       "      <th>code</th>\n",
       "      <th>science</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>model_group</th>\n",
       "      <th>model_name</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">llamamoe</th>\n",
       "      <th>LLaMA-MoE-v1</th>\n",
       "      <td>-0.005554</td>\n",
       "      <td>-0.005112</td>\n",
       "      <td>-0.010352</td>\n",
       "      <td>0.003333</td>\n",
       "      <td>0.002852</td>\n",
       "      <td>0.004990</td>\n",
       "      <td>0.008118</td>\n",
       "      <td>0.007955</td>\n",
       "      <td>-0.002313</td>\n",
       "      <td>0.000093</td>\n",
       "      <td>-0.003741</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>LLaMA-MoE-v1-SFT</th>\n",
       "      <td>-0.005953</td>\n",
       "      <td>-0.005281</td>\n",
       "      <td>-0.010632</td>\n",
       "      <td>0.002826</td>\n",
       "      <td>0.003063</td>\n",
       "      <td>0.004814</td>\n",
       "      <td>0.008095</td>\n",
       "      <td>0.007859</td>\n",
       "      <td>-0.001577</td>\n",
       "      <td>-0.000860</td>\n",
       "      <td>-0.002081</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">olmoe</th>\n",
       "      <th>OLMoE</th>\n",
       "      <td>-0.105762</td>\n",
       "      <td>-0.106794</td>\n",
       "      <td>-0.105412</td>\n",
       "      <td>0.035159</td>\n",
       "      <td>0.052648</td>\n",
       "      <td>0.081247</td>\n",
       "      <td>0.111048</td>\n",
       "      <td>0.029019</td>\n",
       "      <td>0.040560</td>\n",
       "      <td>0.075293</td>\n",
       "      <td>-0.040101</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OLMoE-DPO</th>\n",
       "      <td>-0.117552</td>\n",
       "      <td>-0.117165</td>\n",
       "      <td>-0.114501</td>\n",
       "      <td>0.021591</td>\n",
       "      <td>0.061641</td>\n",
       "      <td>0.087398</td>\n",
       "      <td>0.125784</td>\n",
       "      <td>0.019643</td>\n",
       "      <td>0.054555</td>\n",
       "      <td>0.085201</td>\n",
       "      <td>-0.042125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OLMoE-Instruct</th>\n",
       "      <td>-0.117380</td>\n",
       "      <td>-0.116883</td>\n",
       "      <td>-0.114211</td>\n",
       "      <td>0.022045</td>\n",
       "      <td>0.061698</td>\n",
       "      <td>0.087517</td>\n",
       "      <td>0.125808</td>\n",
       "      <td>0.020199</td>\n",
       "      <td>0.053861</td>\n",
       "      <td>0.085176</td>\n",
       "      <td>-0.042633</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OLMoE-SFT</th>\n",
       "      <td>-0.118375</td>\n",
       "      <td>-0.118571</td>\n",
       "      <td>-0.116871</td>\n",
       "      <td>0.021872</td>\n",
       "      <td>0.061018</td>\n",
       "      <td>0.087396</td>\n",
       "      <td>0.125010</td>\n",
       "      <td>0.019967</td>\n",
       "      <td>0.055275</td>\n",
       "      <td>0.087407</td>\n",
       "      <td>-0.041180</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">jetmoe</th>\n",
       "      <th>JetMoE</th>\n",
       "      <td>0.001072</td>\n",
       "      <td>0.000725</td>\n",
       "      <td>-0.008656</td>\n",
       "      <td>0.012522</td>\n",
       "      <td>0.001344</td>\n",
       "      <td>0.001422</td>\n",
       "      <td>-0.008034</td>\n",
       "      <td>0.005367</td>\n",
       "      <td>-0.002085</td>\n",
       "      <td>-0.002316</td>\n",
       "      <td>-0.001060</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>JetMoE-Chat</th>\n",
       "      <td>0.000816</td>\n",
       "      <td>0.000884</td>\n",
       "      <td>-0.008086</td>\n",
       "      <td>0.013291</td>\n",
       "      <td>0.001765</td>\n",
       "      <td>0.001502</td>\n",
       "      <td>-0.007369</td>\n",
       "      <td>0.005470</td>\n",
       "      <td>-0.004462</td>\n",
       "      <td>-0.002162</td>\n",
       "      <td>-0.001322</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>JetMoE-SFT</th>\n",
       "      <td>0.000827</td>\n",
       "      <td>0.000912</td>\n",
       "      <td>-0.008043</td>\n",
       "      <td>0.013411</td>\n",
       "      <td>0.001775</td>\n",
       "      <td>0.001529</td>\n",
       "      <td>-0.007343</td>\n",
       "      <td>0.005456</td>\n",
       "      <td>-0.004475</td>\n",
       "      <td>-0.002165</td>\n",
       "      <td>-0.001554</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "dataset                             c4    cc2306      book  wikipedia  \\\n",
       "model_group model_name                                                  \n",
       "llamamoe    LLaMA-MoE-v1     -0.005554 -0.005112 -0.010352   0.003333   \n",
       "            LLaMA-MoE-v1-SFT -0.005953 -0.005281 -0.010632   0.002826   \n",
       "olmoe       OLMoE            -0.105762 -0.106794 -0.105412   0.035159   \n",
       "            OLMoE-DPO        -0.117552 -0.117165 -0.114501   0.021591   \n",
       "            OLMoE-Instruct   -0.117380 -0.116883 -0.114211   0.022045   \n",
       "            OLMoE-SFT        -0.118375 -0.118571 -0.116871   0.021872   \n",
       "jetmoe      JetMoE            0.001072  0.000725 -0.008656   0.012522   \n",
       "            JetMoE-Chat       0.000816  0.000884 -0.008086   0.013291   \n",
       "            JetMoE-SFT        0.000827  0.000912 -0.008043   0.013411   \n",
       "\n",
       "dataset                          arxiv  stackexchange    github   lmarena  \\\n",
       "model_group model_name                                                      \n",
       "llamamoe    LLaMA-MoE-v1      0.002852       0.004990  0.008118  0.007955   \n",
       "            LLaMA-MoE-v1-SFT  0.003063       0.004814  0.008095  0.007859   \n",
       "olmoe       OLMoE             0.052648       0.081247  0.111048  0.029019   \n",
       "            OLMoE-DPO         0.061641       0.087398  0.125784  0.019643   \n",
       "            OLMoE-Instruct    0.061698       0.087517  0.125808  0.020199   \n",
       "            OLMoE-SFT         0.061018       0.087396  0.125010  0.019967   \n",
       "jetmoe      JetMoE            0.001344       0.001422 -0.008034  0.005367   \n",
       "            JetMoE-Chat       0.001765       0.001502 -0.007369  0.005470   \n",
       "            JetMoE-SFT        0.001775       0.001529 -0.007343  0.005456   \n",
       "\n",
       "dataset                           math      code   science  \n",
       "model_group model_name                                      \n",
       "llamamoe    LLaMA-MoE-v1     -0.002313  0.000093 -0.003741  \n",
       "            LLaMA-MoE-v1-SFT -0.001577 -0.000860 -0.002081  \n",
       "olmoe       OLMoE             0.040560  0.075293 -0.040101  \n",
       "            OLMoE-DPO         0.054555  0.085201 -0.042125  \n",
       "            OLMoE-Instruct    0.053861  0.085176 -0.042633  \n",
       "            OLMoE-SFT         0.055275  0.087407 -0.041180  \n",
       "jetmoe      JetMoE           -0.002085 -0.002316 -0.001060  \n",
       "            JetMoE-Chat      -0.004462 -0.002162 -0.001322  \n",
       "            JetMoE-SFT       -0.004475 -0.002165 -0.001554  "
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sample_seg_len = 16\n",
    "\n",
    "mdf = (\n",
    "    pd.merge(\n",
    "        dfs[\"mg\"]\n",
    "        .drop(columns=[\"best_m\", \"ci_lb\", \"ci_ub\"])\n",
    "        .rename(columns={\"best_f1\": \"gen_best_f1\"}),\n",
    "        dfs[\"md\"].drop(columns=[\"act_r\", \"best_m\", \"ci_lb\", \"ci_ub\"]),\n",
    "    )\n",
    "    .query(f\"seg_len == {sample_seg_len}\")\n",
    "    .drop(columns=\"seg_len\")\n",
    ")\n",
    "\n",
    "mdf[\"f1_diff\"] = (mdf[\"best_f1\"] - mdf[\"gen_best_f1\"]) / mdf[\"gen_best_f1\"]\n",
    "mdf.pivot(index=[\"model_group\", \"model_name\"], columns=\"dataset\", values=\"f1_diff\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "85e53324",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.plotly.v1+json": {
       "config": {
        "plotlyServerURL": "https://plot.ly"
       },
       "data": [
        {
         "hoverinfo": "skip",
         "marker": {
          "color": "#E15F99"
         },
         "showlegend": false,
         "type": "bar",
         "x": [
          "C4",
          "CC",
          "BK",
          "WK",
          "AX",
          "SE",
          "GH",
          "LM",
          "OM",
          "OC",
          "OS"
         ],
         "xaxis": "x",
         "y": {
          "bdata": "itVR+ta/dr/z3UQE7+90vxc3LyA2M4W/SPvNjwVNaz+vt/w2cV1nP0F1pezCcHQ/nxWpM1aggD9mvCPykEqAP6GGiwTv8WK/M4esJ3tkGD9BEkV/NaVuvw==",
          "dtype": "f8"
         },
         "yaxis": "y"
        },
        {
         "hoverinfo": "skip",
         "marker": {
          "color": "#E15F99"
         },
         "showlegend": false,
         "type": "bar",
         "x": [
          "C4",
          "CC",
          "BK",
          "WK",
          "AX",
          "SE",
          "GH",
          "LM",
          "OM",
          "OC",
          "OS"
         ],
         "xaxis": "x2",
         "y": {
          "bdata": "8PIF0/hheL8fGeNpBqF1v/mj0fz6xYW/zdLqMX0nZz9QE2za/BZpP1U9dcSHt3M/wbU9fiyUgD9tQXdpVRiAPyhndpKq11m/7uA6G8kvTL+CnpUROA1hvw==",
          "dtype": "f8"
         },
         "yaxis": "y2"
        },
        {
         "hoverinfo": "skip",
         "marker": {
          "color": "#1CA71C"
         },
         "showlegend": false,
         "type": "bar",
         "x": [
          "C4",
          "CC",
          "BK",
          "WK",
          "AX",
          "SE",
          "GH",
          "LM",
          "OM",
          "OC",
          "OS"
         ],
         "xaxis": "x5",
         "y": {
          "bdata": "iEZQhzETu79kH7E53Fa7v8+j0AFF/Lq/c/t34FYAoj/q31FPq/SqP5E4FtSfzLQ/gOMBN6ptvD8baWJPCredPzGF84hKxKQ/KVN+XmZGsz9Rrk0tFoikvw==",
          "dtype": "f8"
         },
         "yaxis": "y5"
        },
        {
         "hoverinfo": "skip",
         "marker": {
          "color": "#1CA71C"
         },
         "showlegend": false,
         "type": "bar",
         "x": [
          "C4",
          "CC",
          "BK",
          "WK",
          "AX",
          "SE",
          "GH",
          "LM",
          "OM",
          "OC",
          "OS"
         ],
         "xaxis": "x6",
         "y": {
          "bdata": "o/o2VsxNvr9lLGKOo1q+v7CykyY+672/aC7D1bhllj8H57RV0D2vPwluUeGcX7Y/aWVA9lEAwD99suJaHnKUP6vRkLLyTKw/stHPrE1gtj9flGW/hhWlvw==",
          "dtype": "f8"
         },
         "yaxis": "y6"
        },
        {
         "hoverinfo": "skip",
         "marker": {
          "color": "#1CA71C"
         },
         "showlegend": false,
         "type": "bar",
         "x": [
          "C4",
          "CC",
          "BK",
          "WK",
          "AX",
          "SE",
          "GH",
          "LM",
          "OM",
          "OC",
          "OS"
         ],
         "xaxis": "x7",
         "y": {
          "bdata": "JnYtsegXvr+ZBOJAgP69vxZ8gNv2T72/ZPII2f4blj9eIDyTb4+vPwlXotO6X7Y/INgawq4ZwD+kGoq1Ux2UP+pjVtum7qs/aeQHuL7PtT9z0FoDYZGlvw==",
          "dtype": "f8"
         },
         "yaxis": "y7"
        },
        {
         "hoverinfo": "skip",
         "marker": {
          "color": "#1CA71C"
         },
         "showlegend": false,
         "type": "bar",
         "x": [
          "C4",
          "CC",
          "BK",
          "WK",
          "AX",
          "SE",
          "GH",
          "LM",
          "OM",
          "OC",
          "OS"
         ],
         "xaxis": "x8",
         "y": {
          "bdata": "f6iSMp8Mvr+4U3tgCuy9v6Hck/rpPL2/QZIQY/6Slj9tHU3e1ZavP3/bj06AZ7Y/9EEFWnsawD+uYBlzCa+UP99ye82tk6s/1U33QhPOtT/0aDuOBNSlvw==",
          "dtype": "f8"
         },
         "yaxis": "y8"
        },
        {
         "hoverinfo": "skip",
         "marker": {
          "color": "#222A2A"
         },
         "showlegend": false,
         "type": "bar",
         "x": [
          "C4",
          "CC",
          "BK",
          "WK",
          "AX",
          "SE",
          "GH",
          "LM",
          "OM",
          "OC",
          "OS"
         ],
         "xaxis": "x9",
         "y": {
          "bdata": "3CkbB06SUT9+u6RBrsFHPzGfJhlHuoG/z/o75CyliT8lvvR+dQNWP7dv12jHTVc/Y1f1QuBzgL/EQAMxOvt1P8Od0EMhFGG/gk0AfEj4Yr8g7VCLkVxRvw==",
          "dtype": "f8"
         },
         "yaxis": "y9"
        },
        {
         "hoverinfo": "skip",
         "marker": {
          "color": "#222A2A"
         },
         "showlegend": false,
         "type": "bar",
         "x": [
          "C4",
          "CC",
          "BK",
          "WK",
          "AX",
          "SE",
          "GH",
          "LM",
          "OM",
          "OC",
          "OS"
         ],
         "xaxis": "x10",
         "y": {
          "bdata": "503dBloXSz+yrCOHpN5NPybfi8W3eIC//1bgp2V3iz8xnnouChVdP+oIsZPUDlk/mKts/RQUfr+lD+jGcFl2P79yCmGNVHK/2BL+sY68Yb/KroLoMndZvw==",
          "dtype": "f8"
         },
         "yaxis": "y10"
        },
        {
         "hoverinfo": "skip",
         "marker": {
          "color": "#222A2A"
         },
         "showlegend": false,
         "type": "bar",
         "x": [
          "C4",
          "CC",
          "BK",
          "WK",
          "AX",
          "SE",
          "GH",
          "LM",
          "OM",
          "OC",
          "OS"
         ],
         "xaxis": "x11",
         "y": {
          "bdata": "yxBDg6e/Sj/YsgyLw/lMPwXILdqPj4C/WDpArjg4iz+2pazvcOpcP3KCiWSHnFg/3flGiDAvfr9nEZ9uGWh2P9fWTi8ZR3K/mkBVwri2Yb+9ZMtK9qhVvw==",
          "dtype": "f8"
         },
         "yaxis": "y11"
        }
       ],
       "layout": {
        "annotations": [
         {
          "font": {
           "size": 20
          },
          "showarrow": false,
          "text": "LLaMA-MoE-v1",
          "x": 0.123125,
          "xanchor": "center",
          "xref": "paper",
          "y": 1,
          "yanchor": "bottom",
          "yref": "paper"
         },
         {
          "font": {
           "size": 20
          },
          "showarrow": false,
          "text": "LLaMA-MoE-v1-SFT",
          "x": 0.37437499999999996,
          "xanchor": "center",
          "xref": "paper",
          "y": 1,
          "yanchor": "bottom",
          "yref": "paper"
         },
         {
          "font": {
           "size": 20
          },
          "showarrow": false,
          "text": "OLMoE",
          "x": 0.123125,
          "xanchor": "center",
          "xref": "paper",
          "y": 0.6333333333333333,
          "yanchor": "bottom",
          "yref": "paper"
         },
         {
          "font": {
           "size": 20
          },
          "showarrow": false,
          "text": "OLMoE-SFT",
          "x": 0.37437499999999996,
          "xanchor": "center",
          "xref": "paper",
          "y": 0.6333333333333333,
          "yanchor": "bottom",
          "yref": "paper"
         },
         {
          "font": {
           "size": 20
          },
          "showarrow": false,
          "text": "OLMoE-DPO",
          "x": 0.6256249999999999,
          "xanchor": "center",
          "xref": "paper",
          "y": 0.6333333333333333,
          "yanchor": "bottom",
          "yref": "paper"
         },
         {
          "font": {
           "size": 20
          },
          "showarrow": false,
          "text": "OLMoE-Instruct",
          "x": 0.8768750000000001,
          "xanchor": "center",
          "xref": "paper",
          "y": 0.6333333333333333,
          "yanchor": "bottom",
          "yref": "paper"
         },
         {
          "font": {
           "size": 20
          },
          "showarrow": false,
          "text": "JetMoE",
          "x": 0.123125,
          "xanchor": "center",
          "xref": "paper",
          "y": 0.26666666666666666,
          "yanchor": "bottom",
          "yref": "paper"
         },
         {
          "font": {
           "size": 20
          },
          "showarrow": false,
          "text": "JetMoE-SFT",
          "x": 0.37437499999999996,
          "xanchor": "center",
          "xref": "paper",
          "y": 0.26666666666666666,
          "yanchor": "bottom",
          "yref": "paper"
         },
         {
          "font": {
           "size": 20
          },
          "showarrow": false,
          "text": "JetMoE-Chat",
          "x": 0.6256249999999999,
          "xanchor": "center",
          "xref": "paper",
          "y": 0.26666666666666666,
          "yanchor": "bottom",
          "yref": "paper"
         }
        ],
        "height": 600,
        "margin": {
         "b": 15,
         "l": 60,
         "r": 15,
         "t": 30
        },
        "template": {
         "data": {
          "bar": [
           {
            "error_x": {
             "color": "#2a3f5f"
            },
            "error_y": {
             "color": "#2a3f5f"
            },
            "marker": {
             "line": {
              "color": "#E5ECF6",
              "width": 0.5
             },
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "bar"
           }
          ],
          "barpolar": [
           {
            "marker": {
             "line": {
              "color": "#E5ECF6",
              "width": 0.5
             },
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "barpolar"
           }
          ],
          "carpet": [
           {
            "aaxis": {
             "endlinecolor": "#2a3f5f",
             "gridcolor": "white",
             "linecolor": "white",
             "minorgridcolor": "white",
             "startlinecolor": "#2a3f5f"
            },
            "baxis": {
             "endlinecolor": "#2a3f5f",
             "gridcolor": "white",
             "linecolor": "white",
             "minorgridcolor": "white",
             "startlinecolor": "#2a3f5f"
            },
            "type": "carpet"
           }
          ],
          "choropleth": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "choropleth"
           }
          ],
          "contour": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "contour"
           }
          ],
          "contourcarpet": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "contourcarpet"
           }
          ],
          "heatmap": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "heatmap"
           }
          ],
          "histogram": [
           {
            "marker": {
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "histogram"
           }
          ],
          "histogram2d": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "histogram2d"
           }
          ],
          "histogram2dcontour": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "histogram2dcontour"
           }
          ],
          "mesh3d": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "mesh3d"
           }
          ],
          "parcoords": [
           {
            "line": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "parcoords"
           }
          ],
          "pie": [
           {
            "automargin": true,
            "type": "pie"
           }
          ],
          "scatter": [
           {
            "fillpattern": {
             "fillmode": "overlay",
             "size": 10,
             "solidity": 0.2
            },
            "type": "scatter"
           }
          ],
          "scatter3d": [
           {
            "line": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatter3d"
           }
          ],
          "scattercarpet": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattercarpet"
           }
          ],
          "scattergeo": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattergeo"
           }
          ],
          "scattergl": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattergl"
           }
          ],
          "scattermap": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattermap"
           }
          ],
          "scattermapbox": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattermapbox"
           }
          ],
          "scatterpolar": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterpolar"
           }
          ],
          "scatterpolargl": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterpolargl"
           }
          ],
          "scatterternary": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterternary"
           }
          ],
          "surface": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "surface"
           }
          ],
          "table": [
           {
            "cells": {
             "fill": {
              "color": "#EBF0F8"
             },
             "line": {
              "color": "white"
             }
            },
            "header": {
             "fill": {
              "color": "#C8D4E3"
             },
             "line": {
              "color": "white"
             }
            },
            "type": "table"
           }
          ]
         },
         "layout": {
          "annotationdefaults": {
           "arrowcolor": "#2a3f5f",
           "arrowhead": 0,
           "arrowwidth": 1
          },
          "autotypenumbers": "strict",
          "coloraxis": {
           "colorbar": {
            "outlinewidth": 0,
            "ticks": ""
           }
          },
          "colorscale": {
           "diverging": [
            [
             0,
             "#8e0152"
            ],
            [
             0.1,
             "#c51b7d"
            ],
            [
             0.2,
             "#de77ae"
            ],
            [
             0.3,
             "#f1b6da"
            ],
            [
             0.4,
             "#fde0ef"
            ],
            [
             0.5,
             "#f7f7f7"
            ],
            [
             0.6,
             "#e6f5d0"
            ],
            [
             0.7,
             "#b8e186"
            ],
            [
             0.8,
             "#7fbc41"
            ],
            [
             0.9,
             "#4d9221"
            ],
            [
             1,
             "#276419"
            ]
           ],
           "sequential": [
            [
             0,
             "#0d0887"
            ],
            [
             0.1111111111111111,
             "#46039f"
            ],
            [
             0.2222222222222222,
             "#7201a8"
            ],
            [
             0.3333333333333333,
             "#9c179e"
            ],
            [
             0.4444444444444444,
             "#bd3786"
            ],
            [
             0.5555555555555556,
             "#d8576b"
            ],
            [
             0.6666666666666666,
             "#ed7953"
            ],
            [
             0.7777777777777778,
             "#fb9f3a"
            ],
            [
             0.8888888888888888,
             "#fdca26"
            ],
            [
             1,
             "#f0f921"
            ]
           ],
           "sequentialminus": [
            [
             0,
             "#0d0887"
            ],
            [
             0.1111111111111111,
             "#46039f"
            ],
            [
             0.2222222222222222,
             "#7201a8"
            ],
            [
             0.3333333333333333,
             "#9c179e"
            ],
            [
             0.4444444444444444,
             "#bd3786"
            ],
            [
             0.5555555555555556,
             "#d8576b"
            ],
            [
             0.6666666666666666,
             "#ed7953"
            ],
            [
             0.7777777777777778,
             "#fb9f3a"
            ],
            [
             0.8888888888888888,
             "#fdca26"
            ],
            [
             1,
             "#f0f921"
            ]
           ]
          },
          "colorway": [
           "#636efa",
           "#EF553B",
           "#00cc96",
           "#ab63fa",
           "#FFA15A",
           "#19d3f3",
           "#FF6692",
           "#B6E880",
           "#FF97FF",
           "#FECB52"
          ],
          "font": {
           "color": "#2a3f5f"
          },
          "geo": {
           "bgcolor": "white",
           "lakecolor": "white",
           "landcolor": "#E5ECF6",
           "showlakes": true,
           "showland": true,
           "subunitcolor": "white"
          },
          "hoverlabel": {
           "align": "left"
          },
          "hovermode": "closest",
          "mapbox": {
           "style": "light"
          },
          "paper_bgcolor": "white",
          "plot_bgcolor": "#E5ECF6",
          "polar": {
           "angularaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "bgcolor": "#E5ECF6",
           "radialaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           }
          },
          "scene": {
           "xaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           },
           "yaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           },
           "zaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           }
          },
          "shapedefaults": {
           "line": {
            "color": "#2a3f5f"
           }
          },
          "ternary": {
           "aaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "baxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "bgcolor": "#E5ECF6",
           "caxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           }
          },
          "title": {
           "x": 0.05
          },
          "xaxis": {
           "automargin": true,
           "gridcolor": "white",
           "linecolor": "white",
           "ticks": "",
           "title": {
            "standoff": 15
           },
           "zerolinecolor": "white",
           "zerolinewidth": 2
          },
          "yaxis": {
           "automargin": true,
           "gridcolor": "white",
           "linecolor": "white",
           "ticks": "",
           "title": {
            "standoff": 15
           },
           "zerolinecolor": "white",
           "zerolinewidth": 2
          }
         }
        },
        "width": 1000,
        "xaxis": {
         "anchor": "y",
         "domain": [
          0,
          0.24625
         ],
         "matches": "x9",
         "showticklabels": false,
         "tickangle": 0,
         "tickfont": {
          "size": 12
         }
        },
        "xaxis10": {
         "anchor": "y10",
         "domain": [
          0.25125,
          0.49749999999999994
         ],
         "matches": "x9",
         "showticklabels": true,
         "tickangle": 0,
         "tickfont": {
          "size": 12
         }
        },
        "xaxis11": {
         "anchor": "y11",
         "domain": [
          0.5025,
          0.7487499999999999
         ],
         "matches": "x9",
         "showticklabels": true,
         "tickangle": 0,
         "tickfont": {
          "size": 12
         }
        },
        "xaxis12": {
         "anchor": "y12",
         "domain": [
          0.75375,
          1
         ],
         "matches": "x9"
        },
        "xaxis2": {
         "anchor": "y2",
         "domain": [
          0.25125,
          0.49749999999999994
         ],
         "matches": "x9",
         "showticklabels": false,
         "tickangle": 0,
         "tickfont": {
          "size": 12
         }
        },
        "xaxis3": {
         "anchor": "y3",
         "domain": [
          0.5025,
          0.7487499999999999
         ],
         "matches": "x9",
         "showticklabels": false
        },
        "xaxis4": {
         "anchor": "y4",
         "domain": [
          0.75375,
          1
         ],
         "matches": "x9",
         "showticklabels": false
        },
        "xaxis5": {
         "anchor": "y5",
         "domain": [
          0,
          0.24625
         ],
         "matches": "x9",
         "showticklabels": false,
         "tickangle": 0,
         "tickfont": {
          "size": 12
         }
        },
        "xaxis6": {
         "anchor": "y6",
         "domain": [
          0.25125,
          0.49749999999999994
         ],
         "matches": "x9",
         "showticklabels": false,
         "tickangle": 0,
         "tickfont": {
          "size": 12
         }
        },
        "xaxis7": {
         "anchor": "y7",
         "domain": [
          0.5025,
          0.7487499999999999
         ],
         "matches": "x9",
         "showticklabels": false,
         "tickangle": 0,
         "tickfont": {
          "size": 12
         }
        },
        "xaxis8": {
         "anchor": "y8",
         "domain": [
          0.75375,
          1
         ],
         "matches": "x9",
         "showticklabels": false,
         "tickangle": 0,
         "tickfont": {
          "size": 12
         }
        },
        "xaxis9": {
         "anchor": "y9",
         "domain": [
          0,
          0.24625
         ],
         "showticklabels": true,
         "tickangle": 0,
         "tickfont": {
          "size": 12
         }
        },
        "yaxis": {
         "anchor": "x",
         "domain": [
          0.7333333333333334,
          1
         ],
         "matches": "y9",
         "showticklabels": true,
         "tickfont": {
          "size": 16
         },
         "ticktext": [
          "-10%",
          "SRP<br>(E,16)",
          "+10%"
         ],
         "tickvals": [
          -0.1,
          0,
          0.1
         ]
        },
        "yaxis10": {
         "anchor": "x10",
         "domain": [
          0,
          0.26666666666666666
         ],
         "matches": "y9",
         "showticklabels": false,
         "tickvals": [
          -0.1,
          0,
          0.1
         ]
        },
        "yaxis11": {
         "anchor": "x11",
         "domain": [
          0,
          0.26666666666666666
         ],
         "matches": "y9",
         "showticklabels": false,
         "tickvals": [
          -0.1,
          0,
          0.1
         ]
        },
        "yaxis12": {
         "anchor": "x12",
         "domain": [
          0,
          0.26666666666666666
         ],
         "matches": "y9",
         "showticklabels": false
        },
        "yaxis2": {
         "anchor": "x2",
         "domain": [
          0.7333333333333334,
          1
         ],
         "matches": "y9",
         "showticklabels": false,
         "tickvals": [
          -0.1,
          0,
          0.1
         ]
        },
        "yaxis3": {
         "anchor": "x3",
         "domain": [
          0.7333333333333334,
          1
         ],
         "matches": "y9",
         "showticklabels": false
        },
        "yaxis4": {
         "anchor": "x4",
         "domain": [
          0.7333333333333334,
          1
         ],
         "matches": "y9",
         "showticklabels": false
        },
        "yaxis5": {
         "anchor": "x5",
         "domain": [
          0.3666666666666667,
          0.6333333333333333
         ],
         "matches": "y9",
         "showticklabels": true,
         "tickfont": {
          "size": 16
         },
         "ticktext": [
          "-10%",
          "SRP<br>(E,16)",
          "+10%"
         ],
         "tickvals": [
          -0.1,
          0,
          0.1
         ]
        },
        "yaxis6": {
         "anchor": "x6",
         "domain": [
          0.3666666666666667,
          0.6333333333333333
         ],
         "matches": "y9",
         "showticklabels": false,
         "tickvals": [
          -0.1,
          0,
          0.1
         ]
        },
        "yaxis7": {
         "anchor": "x7",
         "domain": [
          0.3666666666666667,
          0.6333333333333333
         ],
         "matches": "y9",
         "showticklabels": false,
         "tickvals": [
          -0.1,
          0,
          0.1
         ]
        },
        "yaxis8": {
         "anchor": "x8",
         "domain": [
          0.3666666666666667,
          0.6333333333333333
         ],
         "matches": "y9",
         "showticklabels": false,
         "tickvals": [
          -0.1,
          0,
          0.1
         ]
        },
        "yaxis9": {
         "anchor": "x9",
         "domain": [
          0,
          0.26666666666666666
         ],
         "showticklabels": true,
         "tickfont": {
          "size": 16
         },
         "ticktext": [
          "-10%",
          "SRP<br>(E,16)",
          "+10%"
         ],
         "tickvals": [
          -0.1,
          0,
          0.1
         ]
        }
       }
      }
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "num_rows = len(cmp_groups)\n",
    "num_cols = max(len(v) for v in cmp_groups.values())\n",
    "\n",
    "fig = make_subplots(\n",
    "    rows=num_rows,\n",
    "    cols=num_cols,\n",
    "    shared_xaxes=\"all\",\n",
    "    shared_yaxes=\"all\",\n",
    "    horizontal_spacing=0.005,\n",
    "    vertical_spacing=0.1,\n",
    "    subplot_titles=[\n",
    "        \"\" if i >= len(group) else cmp_model_config.loc[group[i], \"model_name\"]\n",
    "        for group in cmp_groups.values()\n",
    "        for i in range(num_cols)\n",
    "    ],\n",
    ")\n",
    "\n",
    "font_size = [12, 16, 18, 20]\n",
    "\n",
    "for i, (group_key, group) in enumerate(cmp_groups.items()):\n",
    "    row = i + 1\n",
    "    for j, key in enumerate(group):\n",
    "        col = j + 1\n",
    "\n",
    "        tmpdf = mdf.query(f\"model == '{key}'\")\n",
    "        if len(tmpdf) == 0:\n",
    "            continue\n",
    "\n",
    "        fig.add_bar(\n",
    "            x=tmpdf[\"data_abbr\"],\n",
    "            y=tmpdf[\"f1_diff\"],\n",
    "            hoverinfo=\"skip\",\n",
    "            marker=go.bar.Marker(color=model_colors[group_key]),\n",
    "            showlegend=False,\n",
    "            row=row,\n",
    "            col=col,\n",
    "        )\n",
    "\n",
    "        fig.update_xaxes(\n",
    "            showticklabels=row == num_rows,\n",
    "            tickangle=0,\n",
    "            tickfont=go.layout.xaxis.Tickfont(size=font_size[0]),\n",
    "            row=row,\n",
    "            col=col,\n",
    "        )\n",
    "\n",
    "        fig.update_yaxes(showticklabels=col == 1, tickvals=[-0.1, 0, 0.1], row=row, col=col)\n",
    "\n",
    "        if col == 1:\n",
    "            fig.update_yaxes(\n",
    "                tickfont=go.layout.yaxis.Tickfont(size=font_size[1]),\n",
    "                ticktext=[\"-10%\", \"SRP<br>(E,16)\", \"+10%\"],\n",
    "                row=row,\n",
    "                col=col,\n",
    "            )\n",
    "\n",
    "fig.update_annotations(font=go.layout.annotation.Font(size=font_size[3]))\n",
    "fig.update_layout(margin=go.layout.Margin(l=60, r=15, t=30, b=15), width=1000, height=600)\n",
    "fig.write_image(\"./plot/msrpddp.pdf\", width=fig.layout.width, height=fig.layout.height)\n",
    "fig.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "moe-lrc",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
