{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_weighted_averages(df, split_col='Split'):\n",
    "    # Compute the total size\n",
    "    total_size = df['Size'].sum()\n",
    "    \n",
    "    # Calculate the weighted averages for each metric\n",
    "    weighted_averages = {}\n",
    "    for col in df.columns[1:-1]:  # Exclude 'Split' and 'Size' columns\n",
    "        weighted_averages[col] = (df[col] * df['Size']).sum() / total_size\n",
    "    \n",
    "    # Create a new row for the 'total' split with the weighted averages\n",
    "    total_row = pd.DataFrame({split_col: ['total'], **weighted_averages, 'Size': [total_size]})\n",
    "    \n",
    "    # Append the total row to the dataframe using concat\n",
    "    df_total = pd.concat([df, total_row], ignore_index=True)\n",
    "    \n",
    "    return df_total\n",
    "\n",
    "def get_percent_decrease(byt5_df, other_df):\n",
    "    return round((byt5_df['Eval Runtime'] - other_df['Eval Runtime']) / byt5_df['Eval Runtime'] * 100, 2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Spelling Correction Task"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "path = 'eval_results/character_tasks/spelling_correction_contextual/{}/{}.csv'\n",
    "\n",
    "# ByT5 DF\n",
    "byt5 = pd.read_csv(path.format('T5', 't5_spelling_correction_contextual_seed45'))\n",
    "byt5 = compute_weighted_averages(byt5)\n",
    "\n",
    "# MrT5 DF\n",
    "mrt5 = pd.read_csv(path.format('MrT5', 'mrt5_spelling_correction_contextual_50%_seed392'))\n",
    "mrt5 = compute_weighted_averages(mrt5)\n",
    "\n",
    "# BPT5 DF \n",
    "bpt5 = pd.read_csv(path.format('BPT5', 'bpt5_spelling_correction_contextual_50%_seed941'))\n",
    "bpt5 = compute_weighted_averages(bpt5)\n",
    "\n",
    "# CPT5 DF \n",
    "cpt5 = pd.read_csv(path.format('CanineT5', 'caninet5_spelling_correction_contextual_50%_seed473'))\n",
    "cpt5 = compute_weighted_averages(cpt5)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Split</th>\n",
       "      <th>Eval Sequence Accuracy</th>\n",
       "      <th>Eval Percent Deleted Tokens</th>\n",
       "      <th>Eval Runtime</th>\n",
       "      <th>Size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>test_context_dependent</td>\n",
       "      <td>49.41</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.86</td>\n",
       "      <td>4163</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>test_context_independent</td>\n",
       "      <td>82.11</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.90</td>\n",
       "      <td>4685</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>total</td>\n",
       "      <td>66.73</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.88</td>\n",
       "      <td>8848</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      Split  Eval Sequence Accuracy  \\\n",
       "0    test_context_dependent                   49.41   \n",
       "1  test_context_independent                   82.11   \n",
       "2                     total                   66.73   \n",
       "\n",
       "   Eval Percent Deleted Tokens  Eval Runtime  Size  \n",
       "0                          0.0          3.86  4163  \n",
       "1                          0.0          3.90  4685  \n",
       "2                          0.0          3.88  8848  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "byt5.round(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Split</th>\n",
       "      <th>Eval Sequence Accuracy</th>\n",
       "      <th>Eval Percent Deleted Tokens</th>\n",
       "      <th>Eval Runtime</th>\n",
       "      <th>Size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>test_context_dependent</td>\n",
       "      <td>44.20</td>\n",
       "      <td>50.85</td>\n",
       "      <td>2.98</td>\n",
       "      <td>4163</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>test_context_independent</td>\n",
       "      <td>80.04</td>\n",
       "      <td>50.26</td>\n",
       "      <td>3.04</td>\n",
       "      <td>4685</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>total</td>\n",
       "      <td>63.18</td>\n",
       "      <td>50.54</td>\n",
       "      <td>3.01</td>\n",
       "      <td>8848</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      Split  Eval Sequence Accuracy  \\\n",
       "0    test_context_dependent                   44.20   \n",
       "1  test_context_independent                   80.04   \n",
       "2                     total                   63.18   \n",
       "\n",
       "   Eval Percent Deleted Tokens  Eval Runtime  Size  \n",
       "0                        50.85          2.98  4163  \n",
       "1                        50.26          3.04  4685  \n",
       "2                        50.54          3.01  8848  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mrt5.round(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Split</th>\n",
       "      <th>Eval Sequence Accuracy</th>\n",
       "      <th>Eval Percent Deleted Tokens</th>\n",
       "      <th>Eval Runtime</th>\n",
       "      <th>Size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>test_context_dependent</td>\n",
       "      <td>36.51</td>\n",
       "      <td>49.34</td>\n",
       "      <td>3.20</td>\n",
       "      <td>4163</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>test_context_independent</td>\n",
       "      <td>71.08</td>\n",
       "      <td>49.25</td>\n",
       "      <td>3.28</td>\n",
       "      <td>4685</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>total</td>\n",
       "      <td>54.81</td>\n",
       "      <td>49.30</td>\n",
       "      <td>3.24</td>\n",
       "      <td>8848</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      Split  Eval Sequence Accuracy  \\\n",
       "0    test_context_dependent                   36.51   \n",
       "1  test_context_independent                   71.08   \n",
       "2                     total                   54.81   \n",
       "\n",
       "   Eval Percent Deleted Tokens  Eval Runtime  Size  \n",
       "0                        49.34          3.20  4163  \n",
       "1                        49.25          3.28  4685  \n",
       "2                        49.30          3.24  8848  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bpt5.round(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Split</th>\n",
       "      <th>Eval Sequence Accuracy</th>\n",
       "      <th>Eval Percent Deleted Tokens</th>\n",
       "      <th>Eval Runtime</th>\n",
       "      <th>Size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>test_context_dependent</td>\n",
       "      <td>37.40</td>\n",
       "      <td>50.0</td>\n",
       "      <td>2.94</td>\n",
       "      <td>4163</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>test_context_independent</td>\n",
       "      <td>74.96</td>\n",
       "      <td>50.0</td>\n",
       "      <td>3.02</td>\n",
       "      <td>4685</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>total</td>\n",
       "      <td>57.29</td>\n",
       "      <td>50.0</td>\n",
       "      <td>2.98</td>\n",
       "      <td>8848</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      Split  Eval Sequence Accuracy  \\\n",
       "0    test_context_dependent                   37.40   \n",
       "1  test_context_independent                   74.96   \n",
       "2                     total                   57.29   \n",
       "\n",
       "   Eval Percent Deleted Tokens  Eval Runtime  Size  \n",
       "0                         50.0          2.94  4163  \n",
       "1                         50.0          3.02  4685  \n",
       "2                         50.0          2.98  8848  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cpt5.round(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    22.59\n",
       "1    22.05\n",
       "2    22.30\n",
       "Name: Eval Runtime, dtype: float64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_percent_decrease(byt5, mrt5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    16.99\n",
       "1    16.04\n",
       "2    16.48\n",
       "Name: Eval Runtime, dtype: float64"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_percent_decrease(byt5, bpt5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    23.72\n",
       "1    22.62\n",
       "2    23.13\n",
       "Name: Eval Runtime, dtype: float64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_percent_decrease(byt5, cpt5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Word Search Task"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "path = 'eval_results/character_tasks/word_search/{}/{}.csv'\n",
    "\n",
    "# ByT5 DF\n",
    "byt5 = pd.read_csv(path.format('T5', 't5_word_search_seed462'))\n",
    "byt5 = compute_weighted_averages(byt5, split_col='Split')\n",
    "\n",
    "# MrT5 DF\n",
    "mrt5 = pd.read_csv(path.format('MrT5', 'mrt5_word_search_70%_seed54'))\n",
    "mrt5 = compute_weighted_averages(mrt5, split_col='Split')\n",
    "\n",
    "# BPT5\n",
    "bpt5 = pd.read_csv(path.format('BPT5', 'bpt5_word_search_70%_seed13'))\n",
    "bpt5 = compute_weighted_averages(bpt5, split_col='Split')\n",
    "\n",
    "# CanineT5\n",
    "caninet5 = pd.read_csv(path.format('CanineT5', 'caninet5_word_search_75%_seed857'))\n",
    "caninet5 = compute_weighted_averages(caninet5, split_col='Split')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Split</th>\n",
       "      <th>Eval Sequence Accuracy</th>\n",
       "      <th>Eval Percent Deleted Tokens</th>\n",
       "      <th>Eval Runtime</th>\n",
       "      <th>Size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>test_oov</td>\n",
       "      <td>78.49</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.56</td>\n",
       "      <td>1367</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>test_paraphrase</td>\n",
       "      <td>85.92</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.76</td>\n",
       "      <td>6404</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>test_overlap</td>\n",
       "      <td>77.31</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.77</td>\n",
       "      <td>5466</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>test_paraphrase_overlap</td>\n",
       "      <td>60.37</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.76</td>\n",
       "      <td>4068</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>total</td>\n",
       "      <td>76.61</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.75</td>\n",
       "      <td>17305</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     Split  Eval Sequence Accuracy  \\\n",
       "0                 test_oov                   78.49   \n",
       "1          test_paraphrase                   85.92   \n",
       "2             test_overlap                   77.31   \n",
       "3  test_paraphrase_overlap                   60.37   \n",
       "4                    total                   76.61   \n",
       "\n",
       "   Eval Percent Deleted Tokens  Eval Runtime   Size  \n",
       "0                          0.0          6.56   1367  \n",
       "1                          0.0          6.76   6404  \n",
       "2                          0.0          6.77   5466  \n",
       "3                          0.0          6.76   4068  \n",
       "4                          0.0          6.75  17305  "
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "byt5.round(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Split</th>\n",
       "      <th>Eval Sequence Accuracy</th>\n",
       "      <th>Eval Percent Deleted Tokens</th>\n",
       "      <th>Eval Runtime</th>\n",
       "      <th>Size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>test_oov</td>\n",
       "      <td>73.96</td>\n",
       "      <td>71.77</td>\n",
       "      <td>2.63</td>\n",
       "      <td>1367</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>test_paraphrase</td>\n",
       "      <td>81.51</td>\n",
       "      <td>71.88</td>\n",
       "      <td>2.75</td>\n",
       "      <td>6404</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>test_overlap</td>\n",
       "      <td>72.72</td>\n",
       "      <td>77.69</td>\n",
       "      <td>2.76</td>\n",
       "      <td>5466</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>test_paraphrase_overlap</td>\n",
       "      <td>55.48</td>\n",
       "      <td>75.40</td>\n",
       "      <td>2.77</td>\n",
       "      <td>4068</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>total</td>\n",
       "      <td>72.02</td>\n",
       "      <td>74.53</td>\n",
       "      <td>2.75</td>\n",
       "      <td>17305</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     Split  Eval Sequence Accuracy  \\\n",
       "0                 test_oov                   73.96   \n",
       "1          test_paraphrase                   81.51   \n",
       "2             test_overlap                   72.72   \n",
       "3  test_paraphrase_overlap                   55.48   \n",
       "4                    total                   72.02   \n",
       "\n",
       "   Eval Percent Deleted Tokens  Eval Runtime   Size  \n",
       "0                        71.77          2.63   1367  \n",
       "1                        71.88          2.75   6404  \n",
       "2                        77.69          2.76   5466  \n",
       "3                        75.40          2.77   4068  \n",
       "4                        74.53          2.75  17305  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mrt5.round(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Split</th>\n",
       "      <th>Eval Sequence Accuracy</th>\n",
       "      <th>Eval Percent Deleted Tokens</th>\n",
       "      <th>Eval Runtime</th>\n",
       "      <th>Size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>test_oov</td>\n",
       "      <td>78.42</td>\n",
       "      <td>69.95</td>\n",
       "      <td>3.62</td>\n",
       "      <td>1367</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>test_paraphrase</td>\n",
       "      <td>81.84</td>\n",
       "      <td>69.72</td>\n",
       "      <td>3.75</td>\n",
       "      <td>6404</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>test_overlap</td>\n",
       "      <td>77.41</td>\n",
       "      <td>69.80</td>\n",
       "      <td>3.80</td>\n",
       "      <td>5466</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>test_paraphrase_overlap</td>\n",
       "      <td>57.01</td>\n",
       "      <td>69.78</td>\n",
       "      <td>3.80</td>\n",
       "      <td>4068</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>total</td>\n",
       "      <td>74.33</td>\n",
       "      <td>69.78</td>\n",
       "      <td>3.77</td>\n",
       "      <td>17305</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     Split  Eval Sequence Accuracy  \\\n",
       "0                 test_oov                   78.42   \n",
       "1          test_paraphrase                   81.84   \n",
       "2             test_overlap                   77.41   \n",
       "3  test_paraphrase_overlap                   57.01   \n",
       "4                    total                   74.33   \n",
       "\n",
       "   Eval Percent Deleted Tokens  Eval Runtime   Size  \n",
       "0                        69.95          3.62   1367  \n",
       "1                        69.72          3.75   6404  \n",
       "2                        69.80          3.80   5466  \n",
       "3                        69.78          3.80   4068  \n",
       "4                        69.78          3.77  17305  "
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bpt5.round(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Split</th>\n",
       "      <th>Eval Sequence Accuracy</th>\n",
       "      <th>Eval Percent Deleted Tokens</th>\n",
       "      <th>Eval Runtime</th>\n",
       "      <th>Size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>test_oov</td>\n",
       "      <td>71.25</td>\n",
       "      <td>75.0</td>\n",
       "      <td>3.02</td>\n",
       "      <td>1367</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>test_paraphrase</td>\n",
       "      <td>72.30</td>\n",
       "      <td>75.0</td>\n",
       "      <td>3.14</td>\n",
       "      <td>6404</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>test_overlap</td>\n",
       "      <td>74.86</td>\n",
       "      <td>75.0</td>\n",
       "      <td>3.18</td>\n",
       "      <td>5466</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>test_paraphrase_overlap</td>\n",
       "      <td>51.89</td>\n",
       "      <td>75.0</td>\n",
       "      <td>3.18</td>\n",
       "      <td>4068</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>total</td>\n",
       "      <td>68.23</td>\n",
       "      <td>75.0</td>\n",
       "      <td>3.15</td>\n",
       "      <td>17305</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     Split  Eval Sequence Accuracy  \\\n",
       "0                 test_oov                   71.25   \n",
       "1          test_paraphrase                   72.30   \n",
       "2             test_overlap                   74.86   \n",
       "3  test_paraphrase_overlap                   51.89   \n",
       "4                    total                   68.23   \n",
       "\n",
       "   Eval Percent Deleted Tokens  Eval Runtime   Size  \n",
       "0                         75.0          3.02   1367  \n",
       "1                         75.0          3.14   6404  \n",
       "2                         75.0          3.18   5466  \n",
       "3                         75.0          3.18   4068  \n",
       "4                         75.0          3.15  17305  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "caninet5.round(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    59.90\n",
       "1    59.39\n",
       "2    59.19\n",
       "3    59.06\n",
       "4    59.29\n",
       "Name: Eval Runtime, dtype: float64"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_percent_decrease(byt5, mrt5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    44.79\n",
       "1    44.51\n",
       "2    43.92\n",
       "3    43.82\n",
       "4    44.18\n",
       "Name: Eval Runtime, dtype: float64"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_percent_decrease(byt5, bpt5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    53.98\n",
       "1    53.52\n",
       "2    53.06\n",
       "3    53.00\n",
       "4    53.29\n",
       "Name: Eval Runtime, dtype: float64"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_percent_decrease(byt5, caninet5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## XNLI"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "path = 'eval_results/xnli/{}/{}.csv'\n",
    "\n",
    "# ByT5 DF\n",
    "byt5 = pd.read_csv(path.format('T5', 't5_xnli_seed57'))\n",
    "byt5 = byt5.drop(columns='Language Code')\n",
    "byt5 = compute_weighted_averages(byt5, split_col='Language')\n",
    "\n",
    "# MrT5 DF\n",
    "mrt5 = pd.read_csv(path.format('MrT5', 'mrt5_xnli_50%_seed14'))\n",
    "mrt5 = mrt5.drop(columns='Language Code')\n",
    "mrt5 = compute_weighted_averages(mrt5, split_col='Language')\n",
    "\n",
    "# BPT5\n",
    "bpt5 = pd.read_csv(path.format('BPT5', 'bpt5_xnli_50%_seed88'))\n",
    "bpt5 = bpt5.drop(columns='Language Code')\n",
    "bpt5 = compute_weighted_averages(bpt5, split_col='Language')\n",
    "\n",
    "# CanineT5\n",
    "caninet5 = pd.read_csv(path.format('CanineT5', 'caninet5_xnli_50%_seed11'))\n",
    "caninet5 = caninet5.drop(columns='Language Code')\n",
    "caninet5 = compute_weighted_averages(caninet5, split_col='Language')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Language</th>\n",
       "      <th>Eval Sequence Accuracy</th>\n",
       "      <th>Eval Percent Deleted Tokens</th>\n",
       "      <th>Eval Runtime</th>\n",
       "      <th>Size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>English</td>\n",
       "      <td>80.30</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9.01</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>French</td>\n",
       "      <td>73.93</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10.76</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Spanish</td>\n",
       "      <td>74.85</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10.15</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>German</td>\n",
       "      <td>69.58</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10.44</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Greek</td>\n",
       "      <td>64.73</td>\n",
       "      <td>0.0</td>\n",
       "      <td>18.81</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Bulgarian</td>\n",
       "      <td>67.47</td>\n",
       "      <td>0.0</td>\n",
       "      <td>17.37</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Russian</td>\n",
       "      <td>64.21</td>\n",
       "      <td>0.0</td>\n",
       "      <td>17.80</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Turkish</td>\n",
       "      <td>61.68</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9.77</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Arabic</td>\n",
       "      <td>62.97</td>\n",
       "      <td>0.0</td>\n",
       "      <td>14.00</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Vietnamese</td>\n",
       "      <td>67.37</td>\n",
       "      <td>0.0</td>\n",
       "      <td>12.54</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Thai</td>\n",
       "      <td>55.45</td>\n",
       "      <td>0.0</td>\n",
       "      <td>24.95</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>Chinese</td>\n",
       "      <td>62.12</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8.07</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>Hindi</td>\n",
       "      <td>55.41</td>\n",
       "      <td>0.0</td>\n",
       "      <td>24.19</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>Swahili</td>\n",
       "      <td>60.04</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9.14</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>Urdu</td>\n",
       "      <td>50.76</td>\n",
       "      <td>0.0</td>\n",
       "      <td>15.49</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>total</td>\n",
       "      <td>64.72</td>\n",
       "      <td>0.0</td>\n",
       "      <td>14.17</td>\n",
       "      <td>75150</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      Language  Eval Sequence Accuracy  Eval Percent Deleted Tokens  \\\n",
       "0      English                   80.30                          0.0   \n",
       "1       French                   73.93                          0.0   \n",
       "2      Spanish                   74.85                          0.0   \n",
       "3       German                   69.58                          0.0   \n",
       "4        Greek                   64.73                          0.0   \n",
       "5    Bulgarian                   67.47                          0.0   \n",
       "6      Russian                   64.21                          0.0   \n",
       "7      Turkish                   61.68                          0.0   \n",
       "8       Arabic                   62.97                          0.0   \n",
       "9   Vietnamese                   67.37                          0.0   \n",
       "10        Thai                   55.45                          0.0   \n",
       "11     Chinese                   62.12                          0.0   \n",
       "12       Hindi                   55.41                          0.0   \n",
       "13     Swahili                   60.04                          0.0   \n",
       "14        Urdu                   50.76                          0.0   \n",
       "15       total                   64.72                          0.0   \n",
       "\n",
       "    Eval Runtime   Size  \n",
       "0           9.01   5010  \n",
       "1          10.76   5010  \n",
       "2          10.15   5010  \n",
       "3          10.44   5010  \n",
       "4          18.81   5010  \n",
       "5          17.37   5010  \n",
       "6          17.80   5010  \n",
       "7           9.77   5010  \n",
       "8          14.00   5010  \n",
       "9          12.54   5010  \n",
       "10         24.95   5010  \n",
       "11          8.07   5010  \n",
       "12         24.19   5010  \n",
       "13          9.14   5010  \n",
       "14         15.49   5010  \n",
       "15         14.17  75150  "
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "byt5.round(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Language</th>\n",
       "      <th>Eval Sequence Accuracy</th>\n",
       "      <th>Eval Percent Deleted Tokens</th>\n",
       "      <th>Eval Runtime</th>\n",
       "      <th>Size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>English</td>\n",
       "      <td>80.20</td>\n",
       "      <td>50.22</td>\n",
       "      <td>5.62</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>French</td>\n",
       "      <td>73.03</td>\n",
       "      <td>50.21</td>\n",
       "      <td>6.67</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Spanish</td>\n",
       "      <td>74.25</td>\n",
       "      <td>51.88</td>\n",
       "      <td>6.15</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>German</td>\n",
       "      <td>69.70</td>\n",
       "      <td>47.04</td>\n",
       "      <td>6.77</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Greek</td>\n",
       "      <td>65.03</td>\n",
       "      <td>63.65</td>\n",
       "      <td>9.32</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Bulgarian</td>\n",
       "      <td>68.14</td>\n",
       "      <td>63.57</td>\n",
       "      <td>8.63</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Russian</td>\n",
       "      <td>66.25</td>\n",
       "      <td>64.32</td>\n",
       "      <td>8.72</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Turkish</td>\n",
       "      <td>63.11</td>\n",
       "      <td>48.48</td>\n",
       "      <td>6.22</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Arabic</td>\n",
       "      <td>63.61</td>\n",
       "      <td>57.98</td>\n",
       "      <td>7.82</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Vietnamese</td>\n",
       "      <td>66.57</td>\n",
       "      <td>51.35</td>\n",
       "      <td>7.64</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Thai</td>\n",
       "      <td>58.02</td>\n",
       "      <td>67.08</td>\n",
       "      <td>11.87</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>Chinese</td>\n",
       "      <td>60.34</td>\n",
       "      <td>48.03</td>\n",
       "      <td>4.93</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>Hindi</td>\n",
       "      <td>57.15</td>\n",
       "      <td>66.96</td>\n",
       "      <td>11.51</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>Swahili</td>\n",
       "      <td>57.92</td>\n",
       "      <td>47.34</td>\n",
       "      <td>5.92</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>Urdu</td>\n",
       "      <td>56.31</td>\n",
       "      <td>55.87</td>\n",
       "      <td>8.83</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>total</td>\n",
       "      <td>65.31</td>\n",
       "      <td>55.60</td>\n",
       "      <td>7.77</td>\n",
       "      <td>75150</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      Language  Eval Sequence Accuracy  Eval Percent Deleted Tokens  \\\n",
       "0      English                   80.20                        50.22   \n",
       "1       French                   73.03                        50.21   \n",
       "2      Spanish                   74.25                        51.88   \n",
       "3       German                   69.70                        47.04   \n",
       "4        Greek                   65.03                        63.65   \n",
       "5    Bulgarian                   68.14                        63.57   \n",
       "6      Russian                   66.25                        64.32   \n",
       "7      Turkish                   63.11                        48.48   \n",
       "8       Arabic                   63.61                        57.98   \n",
       "9   Vietnamese                   66.57                        51.35   \n",
       "10        Thai                   58.02                        67.08   \n",
       "11     Chinese                   60.34                        48.03   \n",
       "12       Hindi                   57.15                        66.96   \n",
       "13     Swahili                   57.92                        47.34   \n",
       "14        Urdu                   56.31                        55.87   \n",
       "15       total                   65.31                        55.60   \n",
       "\n",
       "    Eval Runtime   Size  \n",
       "0           5.62   5010  \n",
       "1           6.67   5010  \n",
       "2           6.15   5010  \n",
       "3           6.77   5010  \n",
       "4           9.32   5010  \n",
       "5           8.63   5010  \n",
       "6           8.72   5010  \n",
       "7           6.22   5010  \n",
       "8           7.82   5010  \n",
       "9           7.64   5010  \n",
       "10         11.87   5010  \n",
       "11          4.93   5010  \n",
       "12         11.51   5010  \n",
       "13          5.92   5010  \n",
       "14          8.83   5010  \n",
       "15          7.77  75150  "
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mrt5.round(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Language</th>\n",
       "      <th>Eval Sequence Accuracy</th>\n",
       "      <th>Eval Percent Deleted Tokens</th>\n",
       "      <th>Eval Runtime</th>\n",
       "      <th>Size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>English</td>\n",
       "      <td>78.90</td>\n",
       "      <td>43.38</td>\n",
       "      <td>7.33</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>French</td>\n",
       "      <td>69.02</td>\n",
       "      <td>48.50</td>\n",
       "      <td>8.21</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Spanish</td>\n",
       "      <td>69.78</td>\n",
       "      <td>48.11</td>\n",
       "      <td>7.79</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>German</td>\n",
       "      <td>63.61</td>\n",
       "      <td>47.97</td>\n",
       "      <td>8.01</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Greek</td>\n",
       "      <td>57.33</td>\n",
       "      <td>65.51</td>\n",
       "      <td>10.75</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Bulgarian</td>\n",
       "      <td>61.88</td>\n",
       "      <td>60.49</td>\n",
       "      <td>10.78</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Russian</td>\n",
       "      <td>60.48</td>\n",
       "      <td>61.27</td>\n",
       "      <td>10.85</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Turkish</td>\n",
       "      <td>58.88</td>\n",
       "      <td>43.46</td>\n",
       "      <td>8.13</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Arabic</td>\n",
       "      <td>57.29</td>\n",
       "      <td>54.38</td>\n",
       "      <td>9.57</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Vietnamese</td>\n",
       "      <td>58.64</td>\n",
       "      <td>53.83</td>\n",
       "      <td>8.75</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Thai</td>\n",
       "      <td>46.19</td>\n",
       "      <td>74.89</td>\n",
       "      <td>12.34</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>Chinese</td>\n",
       "      <td>57.92</td>\n",
       "      <td>40.04</td>\n",
       "      <td>6.96</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>Hindi</td>\n",
       "      <td>48.22</td>\n",
       "      <td>66.49</td>\n",
       "      <td>14.48</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>Swahili</td>\n",
       "      <td>55.85</td>\n",
       "      <td>44.03</td>\n",
       "      <td>7.48</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>Urdu</td>\n",
       "      <td>46.59</td>\n",
       "      <td>56.87</td>\n",
       "      <td>10.47</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>total</td>\n",
       "      <td>59.37</td>\n",
       "      <td>53.95</td>\n",
       "      <td>9.46</td>\n",
       "      <td>75150</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      Language  Eval Sequence Accuracy  Eval Percent Deleted Tokens  \\\n",
       "0      English                   78.90                        43.38   \n",
       "1       French                   69.02                        48.50   \n",
       "2      Spanish                   69.78                        48.11   \n",
       "3       German                   63.61                        47.97   \n",
       "4        Greek                   57.33                        65.51   \n",
       "5    Bulgarian                   61.88                        60.49   \n",
       "6      Russian                   60.48                        61.27   \n",
       "7      Turkish                   58.88                        43.46   \n",
       "8       Arabic                   57.29                        54.38   \n",
       "9   Vietnamese                   58.64                        53.83   \n",
       "10        Thai                   46.19                        74.89   \n",
       "11     Chinese                   57.92                        40.04   \n",
       "12       Hindi                   48.22                        66.49   \n",
       "13     Swahili                   55.85                        44.03   \n",
       "14        Urdu                   46.59                        56.87   \n",
       "15       total                   59.37                        53.95   \n",
       "\n",
       "    Eval Runtime   Size  \n",
       "0           7.33   5010  \n",
       "1           8.21   5010  \n",
       "2           7.79   5010  \n",
       "3           8.01   5010  \n",
       "4          10.75   5010  \n",
       "5          10.78   5010  \n",
       "6          10.85   5010  \n",
       "7           8.13   5010  \n",
       "8           9.57   5010  \n",
       "9           8.75   5010  \n",
       "10         12.34   5010  \n",
       "11          6.96   5010  \n",
       "12         14.48   5010  \n",
       "13          7.48   5010  \n",
       "14         10.47   5010  \n",
       "15          9.46  75150  "
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bpt5.round(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Language</th>\n",
       "      <th>Eval Sequence Accuracy</th>\n",
       "      <th>Eval Percent Deleted Tokens</th>\n",
       "      <th>Eval Runtime</th>\n",
       "      <th>Size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>English</td>\n",
       "      <td>73.53</td>\n",
       "      <td>50.10</td>\n",
       "      <td>5.90</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>French</td>\n",
       "      <td>55.19</td>\n",
       "      <td>50.08</td>\n",
       "      <td>7.02</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Spanish</td>\n",
       "      <td>59.78</td>\n",
       "      <td>50.09</td>\n",
       "      <td>6.63</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>German</td>\n",
       "      <td>50.66</td>\n",
       "      <td>50.08</td>\n",
       "      <td>6.82</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Greek</td>\n",
       "      <td>46.53</td>\n",
       "      <td>50.05</td>\n",
       "      <td>11.79</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Bulgarian</td>\n",
       "      <td>52.79</td>\n",
       "      <td>50.05</td>\n",
       "      <td>10.96</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Russian</td>\n",
       "      <td>51.46</td>\n",
       "      <td>50.06</td>\n",
       "      <td>11.20</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Turkish</td>\n",
       "      <td>47.31</td>\n",
       "      <td>50.08</td>\n",
       "      <td>6.40</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Arabic</td>\n",
       "      <td>49.34</td>\n",
       "      <td>50.06</td>\n",
       "      <td>8.96</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Vietnamese</td>\n",
       "      <td>47.92</td>\n",
       "      <td>50.07</td>\n",
       "      <td>8.10</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Thai</td>\n",
       "      <td>43.55</td>\n",
       "      <td>50.04</td>\n",
       "      <td>15.42</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>Chinese</td>\n",
       "      <td>44.17</td>\n",
       "      <td>50.12</td>\n",
       "      <td>5.29</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>Hindi</td>\n",
       "      <td>41.24</td>\n",
       "      <td>50.04</td>\n",
       "      <td>14.96</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>Swahili</td>\n",
       "      <td>44.57</td>\n",
       "      <td>50.10</td>\n",
       "      <td>6.01</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>Urdu</td>\n",
       "      <td>41.08</td>\n",
       "      <td>50.06</td>\n",
       "      <td>9.85</td>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>total</td>\n",
       "      <td>49.94</td>\n",
       "      <td>50.07</td>\n",
       "      <td>9.02</td>\n",
       "      <td>75150</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      Language  Eval Sequence Accuracy  Eval Percent Deleted Tokens  \\\n",
       "0      English                   73.53                        50.10   \n",
       "1       French                   55.19                        50.08   \n",
       "2      Spanish                   59.78                        50.09   \n",
       "3       German                   50.66                        50.08   \n",
       "4        Greek                   46.53                        50.05   \n",
       "5    Bulgarian                   52.79                        50.05   \n",
       "6      Russian                   51.46                        50.06   \n",
       "7      Turkish                   47.31                        50.08   \n",
       "8       Arabic                   49.34                        50.06   \n",
       "9   Vietnamese                   47.92                        50.07   \n",
       "10        Thai                   43.55                        50.04   \n",
       "11     Chinese                   44.17                        50.12   \n",
       "12       Hindi                   41.24                        50.04   \n",
       "13     Swahili                   44.57                        50.10   \n",
       "14        Urdu                   41.08                        50.06   \n",
       "15       total                   49.94                        50.07   \n",
       "\n",
       "    Eval Runtime   Size  \n",
       "0           5.90   5010  \n",
       "1           7.02   5010  \n",
       "2           6.63   5010  \n",
       "3           6.82   5010  \n",
       "4          11.79   5010  \n",
       "5          10.96   5010  \n",
       "6          11.20   5010  \n",
       "7           6.40   5010  \n",
       "8           8.96   5010  \n",
       "9           8.10   5010  \n",
       "10         15.42   5010  \n",
       "11          5.29   5010  \n",
       "12         14.96   5010  \n",
       "13          6.01   5010  \n",
       "14          9.85   5010  \n",
       "15          9.02  75150  "
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "caninet5.round(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     37.64\n",
       "1     38.04\n",
       "2     39.44\n",
       "3     35.12\n",
       "4     50.48\n",
       "5     50.30\n",
       "6     50.99\n",
       "7     36.37\n",
       "8     44.15\n",
       "9     39.10\n",
       "10    52.45\n",
       "11    38.89\n",
       "12    52.43\n",
       "13    35.23\n",
       "14    43.01\n",
       "15    45.13\n",
       "Name: Eval Runtime, dtype: float64"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_percent_decrease(byt5, mrt5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     18.63\n",
       "1     23.69\n",
       "2     23.29\n",
       "3     23.28\n",
       "4     42.85\n",
       "5     37.96\n",
       "6     39.03\n",
       "7     16.85\n",
       "8     31.63\n",
       "9     30.20\n",
       "10    50.57\n",
       "11    13.67\n",
       "12    40.14\n",
       "13    18.16\n",
       "14    32.39\n",
       "15    33.22\n",
       "Name: Eval Runtime, dtype: float64"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_percent_decrease(byt5, bpt5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     34.50\n",
       "1     34.75\n",
       "2     34.63\n",
       "3     34.66\n",
       "4     37.32\n",
       "5     36.89\n",
       "6     37.07\n",
       "7     34.49\n",
       "8     36.01\n",
       "9     35.41\n",
       "10    38.23\n",
       "11    34.44\n",
       "12    38.17\n",
       "13    34.23\n",
       "14    36.44\n",
       "15    36.32\n",
       "Name: Eval Runtime, dtype: float64"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_percent_decrease(byt5, caninet5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## TyDiQA-GoldP"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "path = 'eval_results/tydiqa/{}/{}.csv'\n",
    "\n",
    "# ByT5 DF\n",
    "byt5 = pd.read_csv(path.format('T5', 't5_tydiqa_seed25'))\n",
    "byt5 = byt5.drop(columns='Language Code')\n",
    "byt5 = compute_weighted_averages(byt5, split_col='Language')\n",
    "\n",
    "# MrT5 DF\n",
    "mrt5 = pd.read_csv(path.format('MrT5', 'mrt5_tydiqa_50%_seed73'))\n",
    "mrt5 = mrt5.drop(columns='Language Code')\n",
    "mrt5 = compute_weighted_averages(mrt5, split_col='Language')\n",
    "\n",
    "# BPT5\n",
    "bpt5 = pd.read_csv(path.format('BPT5', 'bpt5_tydiqa_50%_seed960'))\n",
    "bpt5 = bpt5.drop(columns='Language Code')\n",
    "bpt5 = compute_weighted_averages(bpt5, split_col='Language')\n",
    "\n",
    "# CanineT5\n",
    "caninet5 = pd.read_csv(path.format('CanineT5', 'caninet5_tydiqa_50%_seed25'))\n",
    "caninet5 = caninet5.drop(columns='Language Code')\n",
    "caninet5 = compute_weighted_averages(caninet5, split_col='Language')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Language</th>\n",
       "      <th>Eval Exact Match</th>\n",
       "      <th>Eval F1</th>\n",
       "      <th>Eval Percent Deleted Tokens</th>\n",
       "      <th>Eval Runtime</th>\n",
       "      <th>Size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Russian</td>\n",
       "      <td>65.02</td>\n",
       "      <td>75.64</td>\n",
       "      <td>0.0</td>\n",
       "      <td>45.87</td>\n",
       "      <td>812</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Arabic</td>\n",
       "      <td>69.27</td>\n",
       "      <td>81.84</td>\n",
       "      <td>0.0</td>\n",
       "      <td>39.13</td>\n",
       "      <td>921</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Bengali</td>\n",
       "      <td>55.75</td>\n",
       "      <td>67.22</td>\n",
       "      <td>0.0</td>\n",
       "      <td>77.87</td>\n",
       "      <td>113</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Telugu</td>\n",
       "      <td>77.88</td>\n",
       "      <td>85.41</td>\n",
       "      <td>0.0</td>\n",
       "      <td>57.22</td>\n",
       "      <td>669</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Finnish</td>\n",
       "      <td>69.18</td>\n",
       "      <td>78.92</td>\n",
       "      <td>0.0</td>\n",
       "      <td>26.31</td>\n",
       "      <td>782</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Swahili</td>\n",
       "      <td>77.96</td>\n",
       "      <td>85.28</td>\n",
       "      <td>0.0</td>\n",
       "      <td>19.00</td>\n",
       "      <td>499</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Korean</td>\n",
       "      <td>58.70</td>\n",
       "      <td>66.26</td>\n",
       "      <td>0.0</td>\n",
       "      <td>31.37</td>\n",
       "      <td>276</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Indonesian</td>\n",
       "      <td>75.58</td>\n",
       "      <td>84.23</td>\n",
       "      <td>0.0</td>\n",
       "      <td>28.76</td>\n",
       "      <td>565</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>English</td>\n",
       "      <td>63.64</td>\n",
       "      <td>73.50</td>\n",
       "      <td>0.0</td>\n",
       "      <td>31.05</td>\n",
       "      <td>440</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>total</td>\n",
       "      <td>69.90</td>\n",
       "      <td>79.58</td>\n",
       "      <td>0.0</td>\n",
       "      <td>37.22</td>\n",
       "      <td>5077</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Language  Eval Exact Match  Eval F1  Eval Percent Deleted Tokens  \\\n",
       "0     Russian             65.02    75.64                          0.0   \n",
       "1      Arabic             69.27    81.84                          0.0   \n",
       "2     Bengali             55.75    67.22                          0.0   \n",
       "3      Telugu             77.88    85.41                          0.0   \n",
       "4     Finnish             69.18    78.92                          0.0   \n",
       "5     Swahili             77.96    85.28                          0.0   \n",
       "6      Korean             58.70    66.26                          0.0   \n",
       "7  Indonesian             75.58    84.23                          0.0   \n",
       "8     English             63.64    73.50                          0.0   \n",
       "9       total             69.90    79.58                          0.0   \n",
       "\n",
       "   Eval Runtime  Size  \n",
       "0         45.87   812  \n",
       "1         39.13   921  \n",
       "2         77.87   113  \n",
       "3         57.22   669  \n",
       "4         26.31   782  \n",
       "5         19.00   499  \n",
       "6         31.37   276  \n",
       "7         28.76   565  \n",
       "8         31.05   440  \n",
       "9         37.22  5077  "
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "byt5.round(2) # ByT5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Language</th>\n",
       "      <th>Eval Exact Match</th>\n",
       "      <th>Eval F1</th>\n",
       "      <th>Eval Percent Deleted Tokens</th>\n",
       "      <th>Eval Runtime</th>\n",
       "      <th>Size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Russian</td>\n",
       "      <td>60.59</td>\n",
       "      <td>71.41</td>\n",
       "      <td>54.70</td>\n",
       "      <td>28.44</td>\n",
       "      <td>812</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Arabic</td>\n",
       "      <td>68.62</td>\n",
       "      <td>80.98</td>\n",
       "      <td>56.09</td>\n",
       "      <td>24.90</td>\n",
       "      <td>921</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Bengali</td>\n",
       "      <td>56.64</td>\n",
       "      <td>66.08</td>\n",
       "      <td>66.68</td>\n",
       "      <td>38.88</td>\n",
       "      <td>113</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Telugu</td>\n",
       "      <td>77.43</td>\n",
       "      <td>84.87</td>\n",
       "      <td>65.33</td>\n",
       "      <td>29.96</td>\n",
       "      <td>669</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Finnish</td>\n",
       "      <td>67.90</td>\n",
       "      <td>76.97</td>\n",
       "      <td>31.19</td>\n",
       "      <td>22.28</td>\n",
       "      <td>782</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Swahili</td>\n",
       "      <td>76.55</td>\n",
       "      <td>82.77</td>\n",
       "      <td>40.68</td>\n",
       "      <td>17.04</td>\n",
       "      <td>499</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Korean</td>\n",
       "      <td>57.61</td>\n",
       "      <td>65.66</td>\n",
       "      <td>32.77</td>\n",
       "      <td>25.28</td>\n",
       "      <td>276</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Indonesian</td>\n",
       "      <td>73.10</td>\n",
       "      <td>83.19</td>\n",
       "      <td>39.27</td>\n",
       "      <td>22.16</td>\n",
       "      <td>565</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>English</td>\n",
       "      <td>62.50</td>\n",
       "      <td>70.93</td>\n",
       "      <td>40.48</td>\n",
       "      <td>23.07</td>\n",
       "      <td>440</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>total</td>\n",
       "      <td>68.27</td>\n",
       "      <td>77.73</td>\n",
       "      <td>47.48</td>\n",
       "      <td>24.83</td>\n",
       "      <td>5077</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Language  Eval Exact Match  Eval F1  Eval Percent Deleted Tokens  \\\n",
       "0     Russian             60.59    71.41                        54.70   \n",
       "1      Arabic             68.62    80.98                        56.09   \n",
       "2     Bengali             56.64    66.08                        66.68   \n",
       "3      Telugu             77.43    84.87                        65.33   \n",
       "4     Finnish             67.90    76.97                        31.19   \n",
       "5     Swahili             76.55    82.77                        40.68   \n",
       "6      Korean             57.61    65.66                        32.77   \n",
       "7  Indonesian             73.10    83.19                        39.27   \n",
       "8     English             62.50    70.93                        40.48   \n",
       "9       total             68.27    77.73                        47.48   \n",
       "\n",
       "   Eval Runtime  Size  \n",
       "0         28.44   812  \n",
       "1         24.90   921  \n",
       "2         38.88   113  \n",
       "3         29.96   669  \n",
       "4         22.28   782  \n",
       "5         17.04   499  \n",
       "6         25.28   276  \n",
       "7         22.16   565  \n",
       "8         23.07   440  \n",
       "9         24.83  5077  "
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mrt5.round(2) # MrT5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Language</th>\n",
       "      <th>Eval Exact Match</th>\n",
       "      <th>Eval F1</th>\n",
       "      <th>Eval Percent Deleted Tokens</th>\n",
       "      <th>Eval Runtime</th>\n",
       "      <th>Size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Russian</td>\n",
       "      <td>31.90</td>\n",
       "      <td>43.27</td>\n",
       "      <td>30.60</td>\n",
       "      <td>36.87</td>\n",
       "      <td>812</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Arabic</td>\n",
       "      <td>40.17</td>\n",
       "      <td>59.72</td>\n",
       "      <td>25.37</td>\n",
       "      <td>33.29</td>\n",
       "      <td>921</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Bengali</td>\n",
       "      <td>23.01</td>\n",
       "      <td>31.41</td>\n",
       "      <td>43.21</td>\n",
       "      <td>57.42</td>\n",
       "      <td>113</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Telugu</td>\n",
       "      <td>29.90</td>\n",
       "      <td>42.40</td>\n",
       "      <td>36.34</td>\n",
       "      <td>44.84</td>\n",
       "      <td>669</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Finnish</td>\n",
       "      <td>45.65</td>\n",
       "      <td>58.88</td>\n",
       "      <td>13.88</td>\n",
       "      <td>25.97</td>\n",
       "      <td>782</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Swahili</td>\n",
       "      <td>63.33</td>\n",
       "      <td>71.95</td>\n",
       "      <td>5.39</td>\n",
       "      <td>20.84</td>\n",
       "      <td>499</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Korean</td>\n",
       "      <td>31.88</td>\n",
       "      <td>40.07</td>\n",
       "      <td>17.00</td>\n",
       "      <td>29.42</td>\n",
       "      <td>276</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Indonesian</td>\n",
       "      <td>50.09</td>\n",
       "      <td>65.11</td>\n",
       "      <td>16.61</td>\n",
       "      <td>27.04</td>\n",
       "      <td>565</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>English</td>\n",
       "      <td>36.82</td>\n",
       "      <td>51.20</td>\n",
       "      <td>21.48</td>\n",
       "      <td>28.45</td>\n",
       "      <td>440</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>total</td>\n",
       "      <td>40.59</td>\n",
       "      <td>54.04</td>\n",
       "      <td>22.55</td>\n",
       "      <td>32.24</td>\n",
       "      <td>5077</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Language  Eval Exact Match  Eval F1  Eval Percent Deleted Tokens  \\\n",
       "0     Russian             31.90    43.27                        30.60   \n",
       "1      Arabic             40.17    59.72                        25.37   \n",
       "2     Bengali             23.01    31.41                        43.21   \n",
       "3      Telugu             29.90    42.40                        36.34   \n",
       "4     Finnish             45.65    58.88                        13.88   \n",
       "5     Swahili             63.33    71.95                         5.39   \n",
       "6      Korean             31.88    40.07                        17.00   \n",
       "7  Indonesian             50.09    65.11                        16.61   \n",
       "8     English             36.82    51.20                        21.48   \n",
       "9       total             40.59    54.04                        22.55   \n",
       "\n",
       "   Eval Runtime  Size  \n",
       "0         36.87   812  \n",
       "1         33.29   921  \n",
       "2         57.42   113  \n",
       "3         44.84   669  \n",
       "4         25.97   782  \n",
       "5         20.84   499  \n",
       "6         29.42   276  \n",
       "7         27.04   565  \n",
       "8         28.45   440  \n",
       "9         32.24  5077  "
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bpt5.round(2) # BP"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Language</th>\n",
       "      <th>Eval Exact Match</th>\n",
       "      <th>Eval F1</th>\n",
       "      <th>Eval Percent Deleted Tokens</th>\n",
       "      <th>Eval Runtime</th>\n",
       "      <th>Size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Russian</td>\n",
       "      <td>49.63</td>\n",
       "      <td>60.66</td>\n",
       "      <td>50.03</td>\n",
       "      <td>29.60</td>\n",
       "      <td>812</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Arabic</td>\n",
       "      <td>60.69</td>\n",
       "      <td>75.44</td>\n",
       "      <td>50.04</td>\n",
       "      <td>26.12</td>\n",
       "      <td>921</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Bengali</td>\n",
       "      <td>38.94</td>\n",
       "      <td>51.24</td>\n",
       "      <td>50.01</td>\n",
       "      <td>47.51</td>\n",
       "      <td>113</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Telugu</td>\n",
       "      <td>59.04</td>\n",
       "      <td>68.78</td>\n",
       "      <td>50.03</td>\n",
       "      <td>35.70</td>\n",
       "      <td>669</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Finnish</td>\n",
       "      <td>54.22</td>\n",
       "      <td>65.76</td>\n",
       "      <td>50.05</td>\n",
       "      <td>19.71</td>\n",
       "      <td>782</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Swahili</td>\n",
       "      <td>60.92</td>\n",
       "      <td>67.89</td>\n",
       "      <td>50.09</td>\n",
       "      <td>16.92</td>\n",
       "      <td>499</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Korean</td>\n",
       "      <td>38.04</td>\n",
       "      <td>44.67</td>\n",
       "      <td>50.04</td>\n",
       "      <td>22.30</td>\n",
       "      <td>276</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Indonesian</td>\n",
       "      <td>61.06</td>\n",
       "      <td>72.10</td>\n",
       "      <td>50.06</td>\n",
       "      <td>20.81</td>\n",
       "      <td>565</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>English</td>\n",
       "      <td>48.41</td>\n",
       "      <td>57.79</td>\n",
       "      <td>50.04</td>\n",
       "      <td>21.48</td>\n",
       "      <td>440</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>total</td>\n",
       "      <td>54.99</td>\n",
       "      <td>65.85</td>\n",
       "      <td>50.05</td>\n",
       "      <td>25.32</td>\n",
       "      <td>5077</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Language  Eval Exact Match  Eval F1  Eval Percent Deleted Tokens  \\\n",
       "0     Russian             49.63    60.66                        50.03   \n",
       "1      Arabic             60.69    75.44                        50.04   \n",
       "2     Bengali             38.94    51.24                        50.01   \n",
       "3      Telugu             59.04    68.78                        50.03   \n",
       "4     Finnish             54.22    65.76                        50.05   \n",
       "5     Swahili             60.92    67.89                        50.09   \n",
       "6      Korean             38.04    44.67                        50.04   \n",
       "7  Indonesian             61.06    72.10                        50.06   \n",
       "8     English             48.41    57.79                        50.04   \n",
       "9       total             54.99    65.85                        50.05   \n",
       "\n",
       "   Eval Runtime  Size  \n",
       "0         29.60   812  \n",
       "1         26.12   921  \n",
       "2         47.51   113  \n",
       "3         35.70   669  \n",
       "4         19.71   782  \n",
       "5         16.92   499  \n",
       "6         22.30   276  \n",
       "7         20.81   565  \n",
       "8         21.48   440  \n",
       "9         25.32  5077  "
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "caninet5.round(2) # CP"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    38.00\n",
       "1    36.36\n",
       "2    50.06\n",
       "3    47.65\n",
       "4    15.31\n",
       "5    10.33\n",
       "6    19.43\n",
       "7    22.93\n",
       "8    25.69\n",
       "9    33.31\n",
       "Name: Eval Runtime, dtype: float64"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_percent_decrease(byt5, mrt5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    19.62\n",
       "1    14.94\n",
       "2    26.26\n",
       "3    21.63\n",
       "4     1.30\n",
       "5    -9.68\n",
       "6     6.22\n",
       "7     5.98\n",
       "8     8.36\n",
       "9    13.38\n",
       "Name: Eval Runtime, dtype: float64"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_percent_decrease(byt5, bpt5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    35.46\n",
       "1    33.27\n",
       "2    38.99\n",
       "3    37.60\n",
       "4    25.09\n",
       "5    10.93\n",
       "6    28.91\n",
       "7    27.64\n",
       "8    30.80\n",
       "9    31.97\n",
       "Name: Eval Runtime, dtype: float64"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_percent_decrease(byt5, caninet5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "charlm-env",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
