{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Other Models: Computing Inverted Computational Complexity Ratio and Estimated Inference Time\n",
    "This notebook illustrates the computations of the above two metrics for other models compared against P-BERT. The results are as depicted in the paper Table 5."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "BERT_COMPUTATIONAL_COMPLEXITY = 1165824\n",
    "\n",
    "def compute_inverted_computational_complexity_ratio(bit_size,num_layers, num_hidden_states):\n",
    "    computational_complexity = 0\n",
    "    for i in range(1, num_layers):\n",
    "        computational_complexity += i * bit_size * num_hidden_states\n",
    "\n",
    "    return round(BERT_COMPUTATIONAL_COMPLEXITY / computational_complexity, 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "NUMBER_OF_EXAMPLES = [3000 + 2490 + 277, 1725 + 3668 + 408, 1379 + 5749 + 1500, 1063 + 8551 + 1043] # rte, mrpc, stsb, cola\n",
    "# taken from https://www.tensorflow.org/datasets/catalog/glue#gluecola_default_config\n",
    "FP32 = 19.5\n",
    "INT8 = 624\n",
    "\n",
    "def self_attention_layer(s, i, k):\n",
    "    return 3*s*i*k/1000000 + 2*(s**2)*k/1000000 + 2*(s**2)/1000000\n",
    "\n",
    "def add_norm_later(s, k): \n",
    "    return (s**2)*k/1000000\n",
    "\n",
    "def feed_forward_layer(s, k, d):\n",
    "    return 2*(2*s*k*d/1000000 + 2*(s**2)*k/1000000)\n",
    "\n",
    "def layer(s, i, k, d):\n",
    "    return self_attention_layer(s, i, k) + \\\n",
    "        add_norm_later(s, k) + \\\n",
    "            feed_forward_layer(s, k, d)\n",
    "            \n",
    "def compute_estimated_inference_time(num_layers, sequence_length, embedding_size, num_hidden_states, intermediate_size):\n",
    "    estimated_inference_time = []\n",
    "    for i in range(len(NUMBER_OF_EXAMPLES)):\n",
    "        operations = num_layers * layer(sequence_length, embedding_size, num_hidden_states, intermediate_size) * \\\n",
    "            NUMBER_OF_EXAMPLES[i]\n",
    "        estimated_inference_time.append(round(operations / FP32 / 1000000, 1))\n",
    "    return estimated_inference_time\n",
    "\n",
    "def compute_estimated_inference_time_int(num_layers, sequence_length, embedding_size, num_hidden_states, intermediate_size):\n",
    "    estimated_inference_time = []\n",
    "    for i in range(len(NUMBER_OF_EXAMPLES)):\n",
    "        operations = num_layers * layer(sequence_length, embedding_size, num_hidden_states, intermediate_size) * \\\n",
    "            NUMBER_OF_EXAMPLES[i]\n",
    "        estimated_inference_time.append(round(operations / INT8 / 1000000, 1))\n",
    "    return estimated_inference_time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "final_table = {\"Models\": [], \n",
    "               \"Inverted Computational Complexity Ratio\": [], \n",
    "               \"Estimated Inference Time (RTE)\": [],\n",
    "               \"Estimated Inference Time (MRPC)\": [],\n",
    "               \"Estimated Inference Time (STSB)\": [],\n",
    "               \"Estimated Inference Time (CoLA)\": [],\n",
    "               }"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Inverted Computational Complexity Ratio & Estimated Inference Time Results"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### BERT"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "bit_size = 23\n",
    "num_layers = 12\n",
    "num_hidden_states = 768\n",
    "embedding_size = num_hidden_states\n",
    "intermediate_size = 3072 \n",
    "sequence_length = 512\n",
    "\n",
    "\n",
    "\n",
    "inverted_computational_complexity_ratio = compute_inverted_computational_complexity_ratio(bit_size, num_layers, num_hidden_states)\n",
    "rte, mrpc, stsb, cola = compute_estimated_inference_time(num_layers, sequence_length, embedding_size, num_hidden_states, intermediate_size)\n",
    "\n",
    "final_table[\"Models\"].append(\"BERT\")\n",
    "final_table[\"Inverted Computational Complexity Ratio\"].append(inverted_computational_complexity_ratio)\n",
    "final_table[\"Estimated Inference Time (RTE)\"].append(rte)\n",
    "final_table[\"Estimated Inference Time (MRPC)\"].append(mrpc)\n",
    "final_table[\"Estimated Inference Time (STSB)\"].append(stsb)\n",
    "final_table[\"Estimated Inference Time (CoLA)\"].append(cola)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### RoBERTa (BASE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "bit_size = 23\n",
    "num_layers = 12\n",
    "num_hidden_states = 768\n",
    "embedding_size = num_hidden_states\n",
    "intermediate_size = 3072 \n",
    "sequence_length = 512\n",
    "\n",
    "inverted_computational_complexity_ratio = compute_inverted_computational_complexity_ratio(bit_size, num_layers, num_hidden_states)\n",
    "rte, mrpc, stsb, cola = compute_estimated_inference_time(num_layers, sequence_length, embedding_size, num_hidden_states, intermediate_size)\n",
    "\n",
    "final_table[\"Models\"].append(\"RoBERTa\")\n",
    "final_table[\"Inverted Computational Complexity Ratio\"].append(inverted_computational_complexity_ratio)\n",
    "final_table[\"Estimated Inference Time (RTE)\"].append(rte)\n",
    "final_table[\"Estimated Inference Time (MRPC)\"].append(mrpc)\n",
    "final_table[\"Estimated Inference Time (STSB)\"].append(stsb)\n",
    "final_table[\"Estimated Inference Time (CoLA)\"].append(cola)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### DistilBERT"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "bit_size = 23\n",
    "num_layers = 6\n",
    "num_hidden_states = 768\n",
    "embedding_size = num_hidden_states\n",
    "intermediate_size = 3072 \n",
    "sequence_length = 512\n",
    "\n",
    "inverted_computational_complexity_ratio = compute_inverted_computational_complexity_ratio(bit_size, num_layers, num_hidden_states)\n",
    "rte, mrpc, stsb, cola = compute_estimated_inference_time(num_layers, sequence_length, embedding_size, num_hidden_states, intermediate_size)\n",
    "\n",
    "final_table[\"Models\"].append(\"DistilBERT\")\n",
    "final_table[\"Inverted Computational Complexity Ratio\"].append(inverted_computational_complexity_ratio)\n",
    "final_table[\"Estimated Inference Time (RTE)\"].append(rte)\n",
    "final_table[\"Estimated Inference Time (MRPC)\"].append(mrpc)\n",
    "final_table[\"Estimated Inference Time (STSB)\"].append(stsb)\n",
    "final_table[\"Estimated Inference Time (CoLA)\"].append(cola)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### ALBERT (BASE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "bit_size = 23\n",
    "num_layers = 12\n",
    "num_hidden_states = 768\n",
    "embedding_size = 128\n",
    "intermediate_size = 3072 \n",
    "sequence_length = 512\n",
    "\n",
    "inverted_computational_complexity_ratio = compute_inverted_computational_complexity_ratio(bit_size, num_layers, num_hidden_states)\n",
    "rte, mrpc, stsb, cola = compute_estimated_inference_time(num_layers, sequence_length, embedding_size, num_hidden_states, intermediate_size)\n",
    "\n",
    "final_table[\"Models\"].append(\"ALBERT\")\n",
    "final_table[\"Inverted Computational Complexity Ratio\"].append(inverted_computational_complexity_ratio)\n",
    "final_table[\"Estimated Inference Time (RTE)\"].append(rte)\n",
    "final_table[\"Estimated Inference Time (MRPC)\"].append(mrpc)\n",
    "final_table[\"Estimated Inference Time (STSB)\"].append(stsb)\n",
    "final_table[\"Estimated Inference Time (CoLA)\"].append(cola)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### TinyBERT"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "bit_size = 23\n",
    "num_layers = 4\n",
    "num_hidden_states = 312\n",
    "embedding_size = num_hidden_states\n",
    "intermediate_size = 1200 \n",
    "sequence_length = 512\n",
    "\n",
    "inverted_computational_complexity_ratio = compute_inverted_computational_complexity_ratio(bit_size, num_layers, num_hidden_states)\n",
    "rte, mrpc, stsb, cola = compute_estimated_inference_time(num_layers, sequence_length, embedding_size, num_hidden_states, intermediate_size)\n",
    "\n",
    "final_table[\"Models\"].append(\"TinyBERT\")\n",
    "final_table[\"Inverted Computational Complexity Ratio\"].append(inverted_computational_complexity_ratio)\n",
    "final_table[\"Estimated Inference Time (RTE)\"].append(rte)\n",
    "final_table[\"Estimated Inference Time (MRPC)\"].append(mrpc)\n",
    "final_table[\"Estimated Inference Time (STSB)\"].append(stsb)\n",
    "final_table[\"Estimated Inference Time (CoLA)\"].append(cola)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### I-BERT"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "bit_size = 8\n",
    "num_layers = 12\n",
    "num_hidden_states = 768\n",
    "embedding_size = num_hidden_states\n",
    "intermediate_size = 3072 \n",
    "sequence_length = 512\n",
    "\n",
    "inverted_computational_complexity_ratio = compute_inverted_computational_complexity_ratio(bit_size, num_layers, num_hidden_states)\n",
    "rte, mrpc, stsb, cola = compute_estimated_inference_time_int(num_layers, sequence_length, embedding_size, num_hidden_states, intermediate_size)\n",
    "\n",
    "final_table[\"Models\"].append(\"I-BERT\")\n",
    "final_table[\"Inverted Computational Complexity Ratio\"].append(inverted_computational_complexity_ratio)\n",
    "final_table[\"Estimated Inference Time (RTE)\"].append(rte)\n",
    "final_table[\"Estimated Inference Time (MRPC)\"].append(mrpc)\n",
    "final_table[\"Estimated Inference Time (STSB)\"].append(stsb)\n",
    "final_table[\"Estimated Inference Time (CoLA)\"].append(cola)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### MiniLM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "bit_size = 23\n",
    "num_layers = 12\n",
    "num_hidden_states = 384\n",
    "embedding_size = num_hidden_states\n",
    "intermediate_size = 4 * num_hidden_states \n",
    "sequence_length = 512\n",
    "\n",
    "inverted_computational_complexity_ratio = compute_inverted_computational_complexity_ratio(bit_size, num_layers, num_hidden_states)\n",
    "rte, mrpc, stsb, cola = compute_estimated_inference_time(num_layers, sequence_length, embedding_size, num_hidden_states, intermediate_size)\n",
    "\n",
    "final_table[\"Models\"].append(\"MiniLM\")\n",
    "final_table[\"Inverted Computational Complexity Ratio\"].append(inverted_computational_complexity_ratio)\n",
    "final_table[\"Estimated Inference Time (RTE)\"].append(rte)\n",
    "final_table[\"Estimated Inference Time (MRPC)\"].append(mrpc)\n",
    "final_table[\"Estimated Inference Time (STSB)\"].append(stsb)\n",
    "final_table[\"Estimated Inference Time (CoLA)\"].append(cola)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Models</th>\n",
       "      <th>Inverted Computational Complexity Ratio</th>\n",
       "      <th>Estimated Inference Time (RTE)</th>\n",
       "      <th>Estimated Inference Time (MRPC)</th>\n",
       "      <th>Estimated Inference Time (STSB)</th>\n",
       "      <th>Estimated Inference Time (CoLA)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>BERT</td>\n",
       "      <td>1.0</td>\n",
       "      <td>25.4</td>\n",
       "      <td>25.5</td>\n",
       "      <td>38.0</td>\n",
       "      <td>46.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>RoBERTa</td>\n",
       "      <td>1.0</td>\n",
       "      <td>25.4</td>\n",
       "      <td>25.5</td>\n",
       "      <td>38.0</td>\n",
       "      <td>46.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>DistilBERT</td>\n",
       "      <td>4.4</td>\n",
       "      <td>12.7</td>\n",
       "      <td>12.8</td>\n",
       "      <td>19.0</td>\n",
       "      <td>23.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>ALBERT</td>\n",
       "      <td>1.0</td>\n",
       "      <td>22.7</td>\n",
       "      <td>22.8</td>\n",
       "      <td>33.9</td>\n",
       "      <td>41.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>TinyBERT</td>\n",
       "      <td>27.1</td>\n",
       "      <td>1.8</td>\n",
       "      <td>1.8</td>\n",
       "      <td>2.6</td>\n",
       "      <td>3.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>I-BERT</td>\n",
       "      <td>2.9</td>\n",
       "      <td>0.8</td>\n",
       "      <td>0.8</td>\n",
       "      <td>1.2</td>\n",
       "      <td>1.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>MiniLM</td>\n",
       "      <td>2.0</td>\n",
       "      <td>7.6</td>\n",
       "      <td>7.6</td>\n",
       "      <td>11.4</td>\n",
       "      <td>14.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       Models  Inverted Computational Complexity Ratio  \\\n",
       "0        BERT                                      1.0   \n",
       "1     RoBERTa                                      1.0   \n",
       "2  DistilBERT                                      4.4   \n",
       "3      ALBERT                                      1.0   \n",
       "4    TinyBERT                                     27.1   \n",
       "5      I-BERT                                      2.9   \n",
       "6      MiniLM                                      2.0   \n",
       "\n",
       "   Estimated Inference Time (RTE)  Estimated Inference Time (MRPC)  \\\n",
       "0                            25.4                             25.5   \n",
       "1                            25.4                             25.5   \n",
       "2                            12.7                             12.8   \n",
       "3                            22.7                             22.8   \n",
       "4                             1.8                              1.8   \n",
       "5                             0.8                              0.8   \n",
       "6                             7.6                              7.6   \n",
       "\n",
       "   Estimated Inference Time (STSB)  Estimated Inference Time (CoLA)  \n",
       "0                             38.0                             46.9  \n",
       "1                             38.0                             46.9  \n",
       "2                             19.0                             23.4  \n",
       "3                             33.9                             41.9  \n",
       "4                              2.6                              3.3  \n",
       "5                              1.2                              1.5  \n",
       "6                             11.4                             14.0  "
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "final_df = pd.DataFrame(final_table)\n",
    "final_df"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
