{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "nbpresent": {
     "id": "a1648a5d-023c-4322-9ced-16ff25bf8874"
    }
   },
   "source": [
    "# Edgeprobe Aggregate Analysis\n",
    "\n",
    "This notebook is intended to be run on the output of the [`analyze_runs.py`](analyze_runs.py) script; run that on a folder of experiments to produce a `scores.tsv` file that can be loaded here."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "nbpresent": {
     "id": "a527345e-7434-4e96-ae52-51b5b79be75e"
    }
   },
   "outputs": [],
   "source": [
    "import sys, os, re, json\n",
    "from importlib import reload\n",
    "import itertools\n",
    "import collections\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "import analysis\n",
    "reload(analysis)\n",
    "\n",
    "tasks = analysis.TASKS\n",
    "exp_types = analysis.EXP_TYPES\n",
    "palette = analysis.EXP_PALETTE\n",
    "\n",
    "task_sort_key = analysis.task_sort_key\n",
    "exp_type_sort_key = analysis.exp_type_sort_key\n",
    "\n",
    "from scipy.special import logsumexp\n",
    "from scipy.stats import entropy\n",
    "\n",
    "def softmax(x, axis=None):\n",
    "    return np.exp(x - logsumexp(x, axis=axis, keepdims=True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "nbpresent": {
     "id": "1bca01a3-7e20-4ffa-b8d7-764ef2f41929"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "    <div class=\"bk-root\">\n",
       "        <a href=\"https://bokeh.pydata.org\" target=\"_blank\" class=\"bk-logo bk-logo-small bk-logo-notebook\"></a>\n",
       "        <span id=\"1001\">Loading BokehJS ...</span>\n",
       "    </div>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/javascript": [
       "\n",
       "(function(root) {\n",
       "  function now() {\n",
       "    return new Date();\n",
       "  }\n",
       "\n",
       "  var force = true;\n",
       "\n",
       "  if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n",
       "    root._bokeh_onload_callbacks = [];\n",
       "    root._bokeh_is_loading = undefined;\n",
       "  }\n",
       "\n",
       "  var JS_MIME_TYPE = 'application/javascript';\n",
       "  var HTML_MIME_TYPE = 'text/html';\n",
       "  var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n",
       "  var CLASS_NAME = 'output_bokeh rendered_html';\n",
       "\n",
       "  /**\n",
       "   * Render data to the DOM node\n",
       "   */\n",
       "  function render(props, node) {\n",
       "    var script = document.createElement(\"script\");\n",
       "    node.appendChild(script);\n",
       "  }\n",
       "\n",
       "  /**\n",
       "   * Handle when an output is cleared or removed\n",
       "   */\n",
       "  function handleClearOutput(event, handle) {\n",
       "    var cell = handle.cell;\n",
       "\n",
       "    var id = cell.output_area._bokeh_element_id;\n",
       "    var server_id = cell.output_area._bokeh_server_id;\n",
       "    // Clean up Bokeh references\n",
       "    if (id != null && id in Bokeh.index) {\n",
       "      Bokeh.index[id].model.document.clear();\n",
       "      delete Bokeh.index[id];\n",
       "    }\n",
       "\n",
       "    if (server_id !== undefined) {\n",
       "      // Clean up Bokeh references\n",
       "      var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n",
       "      cell.notebook.kernel.execute(cmd, {\n",
       "        iopub: {\n",
       "          output: function(msg) {\n",
       "            var id = msg.content.text.trim();\n",
       "            if (id in Bokeh.index) {\n",
       "              Bokeh.index[id].model.document.clear();\n",
       "              delete Bokeh.index[id];\n",
       "            }\n",
       "          }\n",
       "        }\n",
       "      });\n",
       "      // Destroy server and session\n",
       "      var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n",
       "      cell.notebook.kernel.execute(cmd);\n",
       "    }\n",
       "  }\n",
       "\n",
       "  /**\n",
       "   * Handle when a new output is added\n",
       "   */\n",
       "  function handleAddOutput(event, handle) {\n",
       "    var output_area = handle.output_area;\n",
       "    var output = handle.output;\n",
       "\n",
       "    // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n",
       "    if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n",
       "      return\n",
       "    }\n",
       "\n",
       "    var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n",
       "\n",
       "    if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n",
       "      toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n",
       "      // store reference to embed id on output_area\n",
       "      output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n",
       "    }\n",
       "    if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n",
       "      var bk_div = document.createElement(\"div\");\n",
       "      bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n",
       "      var script_attrs = bk_div.children[0].attributes;\n",
       "      for (var i = 0; i < script_attrs.length; i++) {\n",
       "        toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n",
       "      }\n",
       "      // store reference to server id on output_area\n",
       "      output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n",
       "    }\n",
       "  }\n",
       "\n",
       "  function register_renderer(events, OutputArea) {\n",
       "\n",
       "    function append_mime(data, metadata, element) {\n",
       "      // create a DOM node to render to\n",
       "      var toinsert = this.create_output_subarea(\n",
       "        metadata,\n",
       "        CLASS_NAME,\n",
       "        EXEC_MIME_TYPE\n",
       "      );\n",
       "      this.keyboard_manager.register_events(toinsert);\n",
       "      // Render to node\n",
       "      var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n",
       "      render(props, toinsert[toinsert.length - 1]);\n",
       "      element.append(toinsert);\n",
       "      return toinsert\n",
       "    }\n",
       "\n",
       "    /* Handle when an output is cleared or removed */\n",
       "    events.on('clear_output.CodeCell', handleClearOutput);\n",
       "    events.on('delete.Cell', handleClearOutput);\n",
       "\n",
       "    /* Handle when a new output is added */\n",
       "    events.on('output_added.OutputArea', handleAddOutput);\n",
       "\n",
       "    /**\n",
       "     * Register the mime type and append_mime function with output_area\n",
       "     */\n",
       "    OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n",
       "      /* Is output safe? */\n",
       "      safe: true,\n",
       "      /* Index of renderer in `output_area.display_order` */\n",
       "      index: 0\n",
       "    });\n",
       "  }\n",
       "\n",
       "  // register the mime type if in Jupyter Notebook environment and previously unregistered\n",
       "  if (root.Jupyter !== undefined) {\n",
       "    var events = require('base/js/events');\n",
       "    var OutputArea = require('notebook/js/outputarea').OutputArea;\n",
       "\n",
       "    if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n",
       "      register_renderer(events, OutputArea);\n",
       "    }\n",
       "  }\n",
       "\n",
       "  \n",
       "  if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n",
       "    root._bokeh_timeout = Date.now() + 5000;\n",
       "    root._bokeh_failed_load = false;\n",
       "  }\n",
       "\n",
       "  var NB_LOAD_WARNING = {'data': {'text/html':\n",
       "     \"<div style='background-color: #fdd'>\\n\"+\n",
       "     \"<p>\\n\"+\n",
       "     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n",
       "     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n",
       "     \"</p>\\n\"+\n",
       "     \"<ul>\\n\"+\n",
       "     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n",
       "     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n",
       "     \"</ul>\\n\"+\n",
       "     \"<code>\\n\"+\n",
       "     \"from bokeh.resources import INLINE\\n\"+\n",
       "     \"output_notebook(resources=INLINE)\\n\"+\n",
       "     \"</code>\\n\"+\n",
       "     \"</div>\"}};\n",
       "\n",
       "  function display_loaded() {\n",
       "    var el = document.getElementById(\"1001\");\n",
       "    if (el != null) {\n",
       "      el.textContent = \"BokehJS is loading...\";\n",
       "    }\n",
       "    if (root.Bokeh !== undefined) {\n",
       "      if (el != null) {\n",
       "        el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n",
       "      }\n",
       "    } else if (Date.now() < root._bokeh_timeout) {\n",
       "      setTimeout(display_loaded, 100)\n",
       "    }\n",
       "  }\n",
       "\n",
       "\n",
       "  function run_callbacks() {\n",
       "    try {\n",
       "      root._bokeh_onload_callbacks.forEach(function(callback) {\n",
       "        if (callback != null)\n",
       "          callback();\n",
       "      });\n",
       "    } finally {\n",
       "      delete root._bokeh_onload_callbacks\n",
       "    }\n",
       "    console.debug(\"Bokeh: all callbacks have finished\");\n",
       "  }\n",
       "\n",
       "  function load_libs(css_urls, js_urls, callback) {\n",
       "    if (css_urls == null) css_urls = [];\n",
       "    if (js_urls == null) js_urls = [];\n",
       "\n",
       "    root._bokeh_onload_callbacks.push(callback);\n",
       "    if (root._bokeh_is_loading > 0) {\n",
       "      console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n",
       "      return null;\n",
       "    }\n",
       "    if (js_urls == null || js_urls.length === 0) {\n",
       "      run_callbacks();\n",
       "      return null;\n",
       "    }\n",
       "    console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n",
       "    root._bokeh_is_loading = css_urls.length + js_urls.length;\n",
       "\n",
       "    function on_load() {\n",
       "      root._bokeh_is_loading--;\n",
       "      if (root._bokeh_is_loading === 0) {\n",
       "        console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n",
       "        run_callbacks()\n",
       "      }\n",
       "    }\n",
       "\n",
       "    function on_error() {\n",
       "      console.error(\"failed to load \" + url);\n",
       "    }\n",
       "\n",
       "    for (var i = 0; i < css_urls.length; i++) {\n",
       "      var url = css_urls[i];\n",
       "      const element = document.createElement(\"link\");\n",
       "      element.onload = on_load;\n",
       "      element.onerror = on_error;\n",
       "      element.rel = \"stylesheet\";\n",
       "      element.type = \"text/css\";\n",
       "      element.href = url;\n",
       "      console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n",
       "      document.body.appendChild(element);\n",
       "    }\n",
       "\n",
       "    for (var i = 0; i < js_urls.length; i++) {\n",
       "      var url = js_urls[i];\n",
       "      var element = document.createElement('script');\n",
       "      element.onload = on_load;\n",
       "      element.onerror = on_error;\n",
       "      element.async = false;\n",
       "      element.src = url;\n",
       "      console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n",
       "      document.head.appendChild(element);\n",
       "    }\n",
       "  };var element = document.getElementById(\"1001\");\n",
       "  if (element == null) {\n",
       "    console.error(\"Bokeh: ERROR: autoload.js configured with elementid '1001' but no matching script tag was found. \")\n",
       "    return false;\n",
       "  }\n",
       "\n",
       "  function inject_raw_css(css) {\n",
       "    const element = document.createElement(\"style\");\n",
       "    element.appendChild(document.createTextNode(css));\n",
       "    document.body.appendChild(element);\n",
       "  }\n",
       "\n",
       "  var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.2.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.2.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.2.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.2.0.min.js\"];\n",
       "  var css_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.2.0.min.css\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.2.0.min.css\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.2.0.min.css\"];\n",
       "\n",
       "  var inline_js = [\n",
       "    function(Bokeh) {\n",
       "      Bokeh.set_log_level(\"info\");\n",
       "    },\n",
       "    \n",
       "    function(Bokeh) {\n",
       "      \n",
       "    },\n",
       "    function(Bokeh) {} // ensure no trailing comma for IE\n",
       "  ];\n",
       "\n",
       "  function run_inline_js() {\n",
       "    \n",
       "    if ((root.Bokeh !== undefined) || (force === true)) {\n",
       "      for (var i = 0; i < inline_js.length; i++) {\n",
       "        inline_js[i].call(root, root.Bokeh);\n",
       "      }if (force === true) {\n",
       "        display_loaded();\n",
       "      }} else if (Date.now() < root._bokeh_timeout) {\n",
       "      setTimeout(run_inline_js, 100);\n",
       "    } else if (!root._bokeh_failed_load) {\n",
       "      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n",
       "      root._bokeh_failed_load = true;\n",
       "    } else if (force !== true) {\n",
       "      var cell = $(document.getElementById(\"1001\")).parents('.cell').data().cell;\n",
       "      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n",
       "    }\n",
       "\n",
       "  }\n",
       "\n",
       "  if (root._bokeh_is_loading === 0) {\n",
       "    console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n",
       "    run_inline_js();\n",
       "  } else {\n",
       "    load_libs(css_urls, js_urls, function() {\n",
       "      console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n",
       "      run_inline_js();\n",
       "    });\n",
       "  }\n",
       "}(window));"
      ],
      "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n  function now() {\n    return new Date();\n  }\n\n  var force = true;\n\n  if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n    root._bokeh_onload_callbacks = [];\n    root._bokeh_is_loading = undefined;\n  }\n\n  \n\n  \n  if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n    root._bokeh_timeout = Date.now() + 5000;\n    root._bokeh_failed_load = false;\n  }\n\n  var NB_LOAD_WARNING = {'data': {'text/html':\n     \"<div style='background-color: #fdd'>\\n\"+\n     \"<p>\\n\"+\n     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n     \"</p>\\n\"+\n     \"<ul>\\n\"+\n     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n     \"</ul>\\n\"+\n     \"<code>\\n\"+\n     \"from bokeh.resources import INLINE\\n\"+\n     \"output_notebook(resources=INLINE)\\n\"+\n     \"</code>\\n\"+\n     \"</div>\"}};\n\n  function display_loaded() {\n    var el = document.getElementById(\"1001\");\n    if (el != null) {\n      el.textContent = \"BokehJS is loading...\";\n    }\n    if (root.Bokeh !== undefined) {\n      if (el != null) {\n        el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n      }\n    } else if (Date.now() < root._bokeh_timeout) {\n      setTimeout(display_loaded, 100)\n    }\n  }\n\n\n  function run_callbacks() {\n    try {\n      root._bokeh_onload_callbacks.forEach(function(callback) {\n        if (callback != null)\n          callback();\n      });\n    } finally {\n      delete root._bokeh_onload_callbacks\n    }\n    console.debug(\"Bokeh: all callbacks have finished\");\n  }\n\n  function load_libs(css_urls, js_urls, callback) {\n    if (css_urls == null) css_urls = [];\n    if (js_urls == null) js_urls = [];\n\n    root._bokeh_onload_callbacks.push(callback);\n    if (root._bokeh_is_loading > 0) {\n      console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n      return null;\n    }\n    if (js_urls == null || js_urls.length === 0) {\n      run_callbacks();\n      return null;\n    }\n    console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n    root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n    function on_load() {\n      root._bokeh_is_loading--;\n      if (root._bokeh_is_loading === 0) {\n        console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n        run_callbacks()\n      }\n    }\n\n    function on_error() {\n      console.error(\"failed to load \" + url);\n    }\n\n    for (var i = 0; i < css_urls.length; i++) {\n      var url = css_urls[i];\n      const element = document.createElement(\"link\");\n      element.onload = on_load;\n      element.onerror = on_error;\n      element.rel = \"stylesheet\";\n      element.type = \"text/css\";\n      element.href = url;\n      console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n      document.body.appendChild(element);\n    }\n\n    for (var i = 0; i < js_urls.length; i++) {\n      var url = js_urls[i];\n      var element = document.createElement('script');\n      element.onload = on_load;\n      element.onerror = on_error;\n      element.async = false;\n      element.src = url;\n      console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n      document.head.appendChild(element);\n    }\n  };var element = document.getElementById(\"1001\");\n  if (element == null) {\n    console.error(\"Bokeh: ERROR: autoload.js configured with elementid '1001' but no matching script tag was found. \")\n    return false;\n  }\n\n  function inject_raw_css(css) {\n    const element = document.createElement(\"style\");\n    element.appendChild(document.createTextNode(css));\n    document.body.appendChild(element);\n  }\n\n  var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.2.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.2.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.2.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.2.0.min.js\"];\n  var css_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.2.0.min.css\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.2.0.min.css\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.2.0.min.css\"];\n\n  var inline_js = [\n    function(Bokeh) {\n      Bokeh.set_log_level(\"info\");\n    },\n    \n    function(Bokeh) {\n      \n    },\n    function(Bokeh) {} // ensure no trailing comma for IE\n  ];\n\n  function run_inline_js() {\n    \n    if ((root.Bokeh !== undefined) || (force === true)) {\n      for (var i = 0; i < inline_js.length; i++) {\n        inline_js[i].call(root, root.Bokeh);\n      }if (force === true) {\n        display_loaded();\n      }} else if (Date.now() < root._bokeh_timeout) {\n      setTimeout(run_inline_js, 100);\n    } else if (!root._bokeh_failed_load) {\n      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n      root._bokeh_failed_load = true;\n    } else if (force !== true) {\n      var cell = $(document.getElementById(\"1001\")).parents('.cell').data().cell;\n      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n    }\n\n  }\n\n  if (root._bokeh_is_loading === 0) {\n    console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n    run_inline_js();\n  } else {\n    load_libs(css_urls, js_urls, function() {\n      console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n      run_inline_js();\n    });\n  }\n}(window));"
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import bokeh\n",
    "import bokeh.plotting as bp\n",
    "bp.output_notebook()\n",
    "\n",
    "import datetime\n",
    "import socket\n",
    "def get_compact_timestamp():\n",
    "    now = datetime.datetime.now()\n",
    "    return now.strftime(\"%Y%m%d.%H%M%S\")\n",
    "\n",
    "def _save_figure_to_bucket(fig, name, title=None, export_format=\"html\"):\n",
    "    now = get_compact_timestamp()\n",
    "    fname = f\"{name}.{now:s}.{export_format}\"\n",
    "    title = title or name\n",
    "    if fname.endswith('.png'):\n",
    "        bokeh.io.export_png(p, os.path.join(\"/tmp\", fname))\n",
    "    else:\n",
    "        bp.save(p, os.path.join(\"/tmp\", fname), title=title, \n",
    "                resources=bokeh.resources.CDN)\n",
    "    hostname = socket.gethostname()\n",
    "    GCP_PROJECT=\"edge-probing\"\n",
    "    !gsutil cp /tmp/$fname gs://$GCP_PROJECT/$hostname/plots/$fname\n",
    "    !gsutil acl ch -u AllUsers:R gs://$GCP_PROJECT/$hostname/plots/$fname\n",
    "    url = f\"https://storage.googleapis.com/{GCP_PROJECT}/{hostname}/plots/{fname}\"\n",
    "    print(f\"Public URL: {url}\")\n",
    "    return url"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "nbpresent": {
     "id": "e51ba443-98b7-41bb-8e01-9126d3b77c2e"
    }
   },
   "outputs": [],
   "source": [
    "ID_COLS = ['run', 'task', 'split']\n",
    "\n",
    "def agg_label_group(df, task_predicate, label_predicate, group_name):\n",
    "    agg_map = {k:\"sum\" for k in df.columns if k.endswith(\"_count\")}\n",
    "    mask = df['task'].map(task_predicate) & df['label'].map(label_predicate)\n",
    "    sdf = df[mask].groupby(by=ID_COLS).agg(agg_map).reset_index()\n",
    "    sdf['label'] = group_name\n",
    "    return sdf\n",
    "\n",
    "def agg_stratifier_group(df, stratifier, key_predicate, group_name):\n",
    "    agg_map = {k:\"sum\" for k in df.columns if k.endswith(\"_count\")}\n",
    "    # Use this for short-circuit evaluation, so we don't call key_predicate on invalid keys\n",
    "    mask = [(s == stratifier and key_predicate(key)) \n",
    "            for s, key in zip(df['stratifier'], df['stratum_key'])]\n",
    "    sdf = df[mask].groupby(by=ID_COLS).agg(agg_map).reset_index()\n",
    "    sdf['label'] = group_name\n",
    "    return sdf    \n",
    "\n",
    "def load_scores_file(filename, tag=None, seed=None):\n",
    "    df = pd.read_csv(filename, sep=\"\\t\", header=0)\n",
    "    df.drop(['Unnamed: 0'], axis='columns', inplace=True)\n",
    "    # df['task_raw'] = df['task'].copy()\n",
    "    df['task'] = df['task'].map(analysis.clean_task_name)\n",
    "    if not \"stratifier\" in df.columns:\n",
    "        df[\"stratifier\"] = None\n",
    "    if not \"stratum_key\" in df.columns:\n",
    "        df[\"stratum_key\"] = 0\n",
    "        \n",
    "    ###\n",
    "    # Add additional custom aggregations\n",
    "    _eg = []\n",
    "    # SRL core, non-core, and cleaned micro F1\n",
    "    _eg.append(agg_label_group(df, analysis.is_srl_task, analysis.is_core_role, \"_core_\"))\n",
    "    _eg.append(agg_label_group(df, analysis.is_srl_task, analysis.is_non_core_role, \"_non_core_\"))\n",
    "    _eg.append(agg_label_group(df, analysis.is_srl_task, analysis.is_core_or_noncore, \"_clean_micro_\"))\n",
    "    # Constituents: split into POS, nonterminals\n",
    "    _eg.append(agg_stratifier_group(df, 'info.height', lambda x: int(x) == 1, \"_pos_\"))\n",
    "    _eg.append(agg_stratifier_group(df, 'info.height', lambda x: int(x) > 1, \"_nonterminal_\"))\n",
    "    # Relations: ignore negative class (no_relation)\n",
    "    _eg.append(agg_label_group(df, analysis.is_relation_task, analysis.is_positive_relation, \"_clean_micro_\"))\n",
    "    df = pd.concat([df] + _eg, ignore_index=True, sort=False)\n",
    "    \n",
    "    df.insert(0, \"exp_name\", df['run'].map(lambda p: os.path.basename(os.path.dirname(p.strip(\"/\")))))\n",
    "    df.insert(1, \"exp_type\", df['exp_name'].map(analysis.get_exp_type))\n",
    "    df.insert(1, \"layer_num\", df['exp_name'].map(analysis.get_layer_num))\n",
    "    if tag is not None:\n",
    "        df.insert(0, \"tag\", tag)\n",
    "    df.insert(1, \"seed\", seed)\n",
    "    return df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "nbpresent": {
     "id": "a5dbfdad-dd87-4c19-9507-1231e5251cb2"
    }
   },
   "source": [
    "## Specify score files and load"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "nbpresent": {
     "id": "3de2e9a1-4976-42ab-b927-5a9081f01054"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['rel-semeval' 'coref-ontonotes']\n",
      "['bert-base-uncased-mix']\n"
     ]
    }
   ],
   "source": [
    "score_files = []\n",
    "# Add (tag, path/to/scores.tsv) tuples here; results will be concatenated.\n",
    "score_files = [\n",
    "#     (\"base\", \"/nfs/jiant/exp/iftenney/20190721-test-ep-bert/stats.tsv\"),\n",
    "#     (\"base\", \"/nfs/jiant/exp/iftenney/20190721-test-ep-bert-medium/stats.tsv\"),\n",
    "    (\"base\", \"/nfs/jiant/exp/iftenney/20190721-bert-base-layers/scores.tsv\"),\n",
    "]\n",
    "dfs = []\n",
    "for tag, score_file in score_files:\n",
    "    df = load_scores_file(score_file, tag=tag)\n",
    "    dfs.append(df)\n",
    "\n",
    "df = pd.concat(dfs, ignore_index=True, sort=False)\n",
    "def _format_display_col(exp_type, layer_num, tag):\n",
    "    ret = exp_type\n",
    "    if layer_num:\n",
    "        ret += f\"-{layer_num}\"\n",
    "    if tag:\n",
    "        ret += f\" ({tag})\"\n",
    "    return ret\n",
    "\n",
    "df['display_col'] = list(map(_format_display_col, df.exp_type, df.layer_num, df.tag))\n",
    "print(df['task'].unique())\n",
    "print(df['exp_type'].unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "nbpresent": {
     "id": "975c73b5-a1e4-4ff8-933f-a97fe7de3220"
    }
   },
   "outputs": [],
   "source": [
    "analysis.score_from_confusion_matrix(df)\n",
    "\n",
    "def _get_final_score(row):\n",
    "    return row['f1_score'], row['f1_errn95']\n",
    "\n",
    "df['score'], df['score_errn95'] = zip(*(_get_final_score(row) for i, row in df.iterrows()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "nbpresent": {
     "id": "6c374262-2146-420c-9592-4d92dbb92889"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tag</th>\n",
       "      <th>seed</th>\n",
       "      <th>exp_name</th>\n",
       "      <th>layer_num</th>\n",
       "      <th>exp_type</th>\n",
       "      <th>run</th>\n",
       "      <th>task</th>\n",
       "      <th>split</th>\n",
       "      <th>label</th>\n",
       "      <th>fn_count</th>\n",
       "      <th>...</th>\n",
       "      <th>accuracy</th>\n",
       "      <th>precision</th>\n",
       "      <th>recall</th>\n",
       "      <th>f1_score</th>\n",
       "      <th>accuracy_errn95</th>\n",
       "      <th>precision_errn95</th>\n",
       "      <th>recall_errn95</th>\n",
       "      <th>f1_errn95</th>\n",
       "      <th>score</th>\n",
       "      <th>score_errn95</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>base</td>\n",
       "      <td>None</td>\n",
       "      <td>bert-base-uncased-mix_00-edges-rel-semeval</td>\n",
       "      <td>00</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>rel-semeval</td>\n",
       "      <td>val</td>\n",
       "      <td>Cause-Effect(e1,e2)</td>\n",
       "      <td>32</td>\n",
       "      <td>...</td>\n",
       "      <td>0.961706</td>\n",
       "      <td>0.571429</td>\n",
       "      <td>0.333333</td>\n",
       "      <td>0.421053</td>\n",
       "      <td>0.011096</td>\n",
       "      <td>0.183303</td>\n",
       "      <td>0.133361</td>\n",
       "      <td>0.154394</td>\n",
       "      <td>0.421053</td>\n",
       "      <td>0.154394</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>base</td>\n",
       "      <td>None</td>\n",
       "      <td>bert-base-uncased-mix_00-edges-rel-semeval</td>\n",
       "      <td>00</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>rel-semeval</td>\n",
       "      <td>val</td>\n",
       "      <td>Cause-Effect(e2,e1)</td>\n",
       "      <td>40</td>\n",
       "      <td>...</td>\n",
       "      <td>0.950392</td>\n",
       "      <td>0.776316</td>\n",
       "      <td>0.595960</td>\n",
       "      <td>0.674286</td>\n",
       "      <td>0.012555</td>\n",
       "      <td>0.093688</td>\n",
       "      <td>0.096663</td>\n",
       "      <td>0.095152</td>\n",
       "      <td>0.674286</td>\n",
       "      <td>0.095152</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>base</td>\n",
       "      <td>None</td>\n",
       "      <td>bert-base-uncased-mix_00-edges-rel-semeval</td>\n",
       "      <td>00</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>rel-semeval</td>\n",
       "      <td>val</td>\n",
       "      <td>Component-Whole(e1,e2)</td>\n",
       "      <td>32</td>\n",
       "      <td>...</td>\n",
       "      <td>0.964317</td>\n",
       "      <td>0.808511</td>\n",
       "      <td>0.542857</td>\n",
       "      <td>0.649573</td>\n",
       "      <td>0.010726</td>\n",
       "      <td>0.112492</td>\n",
       "      <td>0.116701</td>\n",
       "      <td>0.114558</td>\n",
       "      <td>0.649573</td>\n",
       "      <td>0.114558</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>base</td>\n",
       "      <td>None</td>\n",
       "      <td>bert-base-uncased-mix_00-edges-rel-semeval</td>\n",
       "      <td>00</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>rel-semeval</td>\n",
       "      <td>val</td>\n",
       "      <td>Component-Whole(e2,e1)</td>\n",
       "      <td>37</td>\n",
       "      <td>...</td>\n",
       "      <td>0.956484</td>\n",
       "      <td>0.717391</td>\n",
       "      <td>0.471429</td>\n",
       "      <td>0.568966</td>\n",
       "      <td>0.011797</td>\n",
       "      <td>0.130121</td>\n",
       "      <td>0.116941</td>\n",
       "      <td>0.123180</td>\n",
       "      <td>0.568966</td>\n",
       "      <td>0.123180</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>base</td>\n",
       "      <td>None</td>\n",
       "      <td>bert-base-uncased-mix_00-edges-rel-semeval</td>\n",
       "      <td>00</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>rel-semeval</td>\n",
       "      <td>val</td>\n",
       "      <td>Content-Container(e1,e2)</td>\n",
       "      <td>12</td>\n",
       "      <td>...</td>\n",
       "      <td>0.981723</td>\n",
       "      <td>0.756757</td>\n",
       "      <td>0.700000</td>\n",
       "      <td>0.727273</td>\n",
       "      <td>0.007745</td>\n",
       "      <td>0.138246</td>\n",
       "      <td>0.142015</td>\n",
       "      <td>0.140106</td>\n",
       "      <td>0.727273</td>\n",
       "      <td>0.140106</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 29 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    tag  seed                                    exp_name layer_num  \\\n",
       "0  base  None  bert-base-uncased-mix_00-edges-rel-semeval        00   \n",
       "1  base  None  bert-base-uncased-mix_00-edges-rel-semeval        00   \n",
       "2  base  None  bert-base-uncased-mix_00-edges-rel-semeval        00   \n",
       "3  base  None  bert-base-uncased-mix_00-edges-rel-semeval        00   \n",
       "4  base  None  bert-base-uncased-mix_00-edges-rel-semeval        00   \n",
       "\n",
       "                exp_type                                                run  \\\n",
       "0  bert-base-uncased-mix  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "1  bert-base-uncased-mix  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "2  bert-base-uncased-mix  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "3  bert-base-uncased-mix  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "4  bert-base-uncased-mix  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "\n",
       "          task split                     label  fn_count      ...       \\\n",
       "0  rel-semeval   val       Cause-Effect(e1,e2)        32      ...        \n",
       "1  rel-semeval   val       Cause-Effect(e2,e1)        40      ...        \n",
       "2  rel-semeval   val    Component-Whole(e1,e2)        32      ...        \n",
       "3  rel-semeval   val    Component-Whole(e2,e1)        37      ...        \n",
       "4  rel-semeval   val  Content-Container(e1,e2)        12      ...        \n",
       "\n",
       "   accuracy  precision    recall  f1_score  accuracy_errn95 precision_errn95  \\\n",
       "0  0.961706   0.571429  0.333333  0.421053         0.011096         0.183303   \n",
       "1  0.950392   0.776316  0.595960  0.674286         0.012555         0.093688   \n",
       "2  0.964317   0.808511  0.542857  0.649573         0.010726         0.112492   \n",
       "3  0.956484   0.717391  0.471429  0.568966         0.011797         0.130121   \n",
       "4  0.981723   0.756757  0.700000  0.727273         0.007745         0.138246   \n",
       "\n",
       "   recall_errn95  f1_errn95     score  score_errn95  \n",
       "0       0.133361   0.154394  0.421053      0.154394  \n",
       "1       0.096663   0.095152  0.674286      0.095152  \n",
       "2       0.116701   0.114558  0.649573      0.114558  \n",
       "3       0.116941   0.123180  0.568966      0.123180  \n",
       "4       0.142015   0.140106  0.727273      0.140106  \n",
       "\n",
       "[5 rows x 29 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "nbpresent": {
     "id": "da90cd74-743a-4995-8144-e561826d6206"
    }
   },
   "source": [
    "For DPR, we need to average across multiple runs to get a good estimate of performance."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "nbpresent": {
     "id": "7193ffc4-31d2-42d3-b198-39ce7dfd2b5e"
    }
   },
   "outputs": [],
   "source": [
    "mask = df['task'] == 'dpr'\n",
    "mask &= df['label'] != \"__run_info__\"\n",
    "mask &= df['seed'].notnull()\n",
    "gb_cols = [\"tag\", \"exp_name\", \"exp_type\", \"task\", \"label\", \"split\", \"display_col\"]\n",
    "gb = df[mask].groupby(by=gb_cols)\n",
    "new_rows = []\n",
    "for key, idxs in gb.groups.items():\n",
    "    new_row = dict(zip(gb_cols, key))\n",
    "    new_row[\"seed\"] = \"_mean_\"\n",
    "    new_row[\"score\"] = df.loc[idxs, \"score\"].mean()\n",
    "    new_row[\"score_errn95\"] = 1.96 * np.sqrt(df.loc[idxs, \"score\"].var()/len(idxs))\n",
    "    new_rows.append(new_row)\n",
    "    \n",
    "agg_df = pd.DataFrame.from_records(new_rows)\n",
    "df = pd.concat([df, agg_df], ignore_index=True, sort=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "nbpresent": {
     "id": "8ae1f12b-87dc-41f6-9351-7ee325abb10e"
    }
   },
   "source": [
    "For SemEval 2010 Task 8, the official metric is macro-averaged F1 over non-Other labels. Compute this so we can compare to SOTA."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "nbpresent": {
     "id": "371b0ce3-4892-4052-aaf1-118a25897307"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "run,split,score\n",
      "/nfs/jiant/exp/iftenney/20190721-bert-base-layers//bert-base-uncased-mix_00-edges-rel-semeval/run,test,0.5293\n",
      "/nfs/jiant/exp/iftenney/20190721-bert-base-layers//bert-base-uncased-mix_00-edges-rel-semeval/run,val,0.5096\n",
      "/nfs/jiant/exp/iftenney/20190721-bert-base-layers//bert-base-uncased-mix_01-edges-rel-semeval/run,test,0.5858\n",
      "/nfs/jiant/exp/iftenney/20190721-bert-base-layers//bert-base-uncased-mix_01-edges-rel-semeval/run,val,0.5713\n",
      "/nfs/jiant/exp/iftenney/20190721-bert-base-layers//bert-base-uncased-mix_02-edges-rel-semeval/run,test,0.6075\n",
      "/nfs/jiant/exp/iftenney/20190721-bert-base-layers//bert-base-uncased-mix_02-edges-rel-semeval/run,val,0.5922\n",
      "/nfs/jiant/exp/iftenney/20190721-bert-base-layers//bert-base-uncased-mix_03-edges-rel-semeval/run,test,0.6082\n",
      "/nfs/jiant/exp/iftenney/20190721-bert-base-layers//bert-base-uncased-mix_03-edges-rel-semeval/run,val,0.6006\n",
      "/nfs/jiant/exp/iftenney/20190721-bert-base-layers//bert-base-uncased-mix_04-edges-rel-semeval/run,test,0.6655\n",
      "/nfs/jiant/exp/iftenney/20190721-bert-base-layers//bert-base-uncased-mix_04-edges-rel-semeval/run,val,0.6420\n",
      "/nfs/jiant/exp/iftenney/20190721-bert-base-layers//bert-base-uncased-mix_05-edges-rel-semeval/run,test,0.6826\n",
      "/nfs/jiant/exp/iftenney/20190721-bert-base-layers//bert-base-uncased-mix_05-edges-rel-semeval/run,val,0.6725\n",
      "/nfs/jiant/exp/iftenney/20190721-bert-base-layers//bert-base-uncased-mix_06-edges-rel-semeval/run,test,0.6752\n",
      "/nfs/jiant/exp/iftenney/20190721-bert-base-layers//bert-base-uncased-mix_06-edges-rel-semeval/run,val,0.6608\n",
      "/nfs/jiant/exp/iftenney/20190721-bert-base-layers//bert-base-uncased-mix_07-edges-rel-semeval/run,test,0.7071\n",
      "/nfs/jiant/exp/iftenney/20190721-bert-base-layers//bert-base-uncased-mix_07-edges-rel-semeval/run,val,0.7076\n",
      "/nfs/jiant/exp/iftenney/20190721-bert-base-layers//bert-base-uncased-mix_08-edges-rel-semeval/run,test,0.7260\n",
      "/nfs/jiant/exp/iftenney/20190721-bert-base-layers//bert-base-uncased-mix_08-edges-rel-semeval/run,val,0.7334\n",
      "/nfs/jiant/exp/iftenney/20190721-bert-base-layers//bert-base-uncased-mix_09-edges-rel-semeval/run,test,0.7313\n",
      "/nfs/jiant/exp/iftenney/20190721-bert-base-layers//bert-base-uncased-mix_09-edges-rel-semeval/run,val,0.7260\n",
      "/nfs/jiant/exp/iftenney/20190721-bert-base-layers//bert-base-uncased-mix_10-edges-rel-semeval/run,test,0.7344\n",
      "/nfs/jiant/exp/iftenney/20190721-bert-base-layers//bert-base-uncased-mix_10-edges-rel-semeval/run,val,0.7381\n",
      "/nfs/jiant/exp/iftenney/20190721-bert-base-layers//bert-base-uncased-mix_11-edges-rel-semeval/run,test,0.7406\n",
      "/nfs/jiant/exp/iftenney/20190721-bert-base-layers//bert-base-uncased-mix_11-edges-rel-semeval/run,val,0.7458\n",
      "/nfs/jiant/exp/iftenney/20190721-bert-base-layers//bert-base-uncased-mix_12-edges-rel-semeval/run,test,0.7375\n",
      "/nfs/jiant/exp/iftenney/20190721-bert-base-layers//bert-base-uncased-mix_12-edges-rel-semeval/run,val,0.7431\n",
      "\n"
     ]
    }
   ],
   "source": [
    "mask = df['task'] == 'rel-semeval'\n",
    "mask &= df['split'].notnull()\n",
    "mask &= df['label'].map(analysis.is_positive_relation)\n",
    "_id_cols = ['run', 'split']\n",
    "_agg_cols = ['score']\n",
    "gb = df[mask][_id_cols + _agg_cols].groupby(_id_cols)\n",
    "afd = gb.agg('mean')\n",
    "afd = afd.reset_index()\n",
    "\n",
    "csv_args = dict(float_format=\"%.4f\")\n",
    "print(afd.to_csv(index=False, **csv_args))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "nbpresent": {
     "id": "6a0daefb-44bf-4062-94ec-fba2bc96d001"
    }
   },
   "source": [
    "## Compute clean metrics for each task\n",
    "\n",
    "For most tasks this is just the micro or macro average F1, but we need to ignore the 0 label for coref, and drop references and continuations for SRL."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "nbpresent": {
     "id": "57b4606b-ac23-48ed-85aa-4b33b4075f12"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "26\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tag</th>\n",
       "      <th>seed</th>\n",
       "      <th>exp_name</th>\n",
       "      <th>layer_num</th>\n",
       "      <th>exp_type</th>\n",
       "      <th>run</th>\n",
       "      <th>task</th>\n",
       "      <th>split</th>\n",
       "      <th>label</th>\n",
       "      <th>fn_count</th>\n",
       "      <th>...</th>\n",
       "      <th>precision</th>\n",
       "      <th>recall</th>\n",
       "      <th>f1_score</th>\n",
       "      <th>accuracy_errn95</th>\n",
       "      <th>precision_errn95</th>\n",
       "      <th>recall_errn95</th>\n",
       "      <th>f1_errn95</th>\n",
       "      <th>score</th>\n",
       "      <th>score_errn95</th>\n",
       "      <th>display_row</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>base</td>\n",
       "      <td>None</td>\n",
       "      <td>bert-base-uncased-mix_00-edges-rel-semeval</td>\n",
       "      <td>00</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>rel-semeval</td>\n",
       "      <td>test</td>\n",
       "      <td>_clean_micro_</td>\n",
       "      <td>1152</td>\n",
       "      <td>...</td>\n",
       "      <td>0.721897</td>\n",
       "      <td>0.490941</td>\n",
       "      <td>0.584429</td>\n",
       "      <td>0.001567</td>\n",
       "      <td>0.022386</td>\n",
       "      <td>0.020597</td>\n",
       "      <td>0.021455</td>\n",
       "      <td>0.584429</td>\n",
       "      <td>0.021455</td>\n",
       "      <td>rel-semeval-_clean_micro_</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>base</td>\n",
       "      <td>None</td>\n",
       "      <td>bert-base-uncased-mix_01-edges-rel-semeval</td>\n",
       "      <td>01</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>rel-semeval</td>\n",
       "      <td>test</td>\n",
       "      <td>_clean_micro_</td>\n",
       "      <td>987</td>\n",
       "      <td>...</td>\n",
       "      <td>0.755477</td>\n",
       "      <td>0.563853</td>\n",
       "      <td>0.645749</td>\n",
       "      <td>0.001478</td>\n",
       "      <td>0.020498</td>\n",
       "      <td>0.020432</td>\n",
       "      <td>0.020465</td>\n",
       "      <td>0.645749</td>\n",
       "      <td>0.020465</td>\n",
       "      <td>rel-semeval-_clean_micro_</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>base</td>\n",
       "      <td>None</td>\n",
       "      <td>bert-base-uncased-mix_02-edges-rel-semeval</td>\n",
       "      <td>02</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>rel-semeval</td>\n",
       "      <td>test</td>\n",
       "      <td>_clean_micro_</td>\n",
       "      <td>933</td>\n",
       "      <td>...</td>\n",
       "      <td>0.786052</td>\n",
       "      <td>0.587715</td>\n",
       "      <td>0.672566</td>\n",
       "      <td>0.001423</td>\n",
       "      <td>0.019540</td>\n",
       "      <td>0.020281</td>\n",
       "      <td>0.019904</td>\n",
       "      <td>0.672566</td>\n",
       "      <td>0.019904</td>\n",
       "      <td>rel-semeval-_clean_micro_</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>base</td>\n",
       "      <td>None</td>\n",
       "      <td>bert-base-uncased-mix_03-edges-rel-semeval</td>\n",
       "      <td>03</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>rel-semeval</td>\n",
       "      <td>test</td>\n",
       "      <td>_clean_micro_</td>\n",
       "      <td>880</td>\n",
       "      <td>...</td>\n",
       "      <td>0.799422</td>\n",
       "      <td>0.611136</td>\n",
       "      <td>0.692712</td>\n",
       "      <td>0.001386</td>\n",
       "      <td>0.018870</td>\n",
       "      <td>0.020085</td>\n",
       "      <td>0.019459</td>\n",
       "      <td>0.692712</td>\n",
       "      <td>0.019459</td>\n",
       "      <td>rel-semeval-_clean_micro_</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>base</td>\n",
       "      <td>None</td>\n",
       "      <td>bert-base-uncased-mix_04-edges-rel-semeval</td>\n",
       "      <td>04</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>rel-semeval</td>\n",
       "      <td>test</td>\n",
       "      <td>_clean_micro_</td>\n",
       "      <td>747</td>\n",
       "      <td>...</td>\n",
       "      <td>0.795383</td>\n",
       "      <td>0.669907</td>\n",
       "      <td>0.727273</td>\n",
       "      <td>0.001336</td>\n",
       "      <td>0.018111</td>\n",
       "      <td>0.019375</td>\n",
       "      <td>0.018722</td>\n",
       "      <td>0.727273</td>\n",
       "      <td>0.018722</td>\n",
       "      <td>rel-semeval-_clean_micro_</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>base</td>\n",
       "      <td>None</td>\n",
       "      <td>bert-base-uncased-mix_05-edges-rel-semeval</td>\n",
       "      <td>05</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>rel-semeval</td>\n",
       "      <td>test</td>\n",
       "      <td>_clean_micro_</td>\n",
       "      <td>674</td>\n",
       "      <td>...</td>\n",
       "      <td>0.813204</td>\n",
       "      <td>0.702165</td>\n",
       "      <td>0.753616</td>\n",
       "      <td>0.001278</td>\n",
       "      <td>0.017281</td>\n",
       "      <td>0.018842</td>\n",
       "      <td>0.018028</td>\n",
       "      <td>0.753616</td>\n",
       "      <td>0.018028</td>\n",
       "      <td>rel-semeval-_clean_micro_</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>base</td>\n",
       "      <td>None</td>\n",
       "      <td>bert-base-uncased-mix_06-edges-rel-semeval</td>\n",
       "      <td>06</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>rel-semeval</td>\n",
       "      <td>test</td>\n",
       "      <td>_clean_micro_</td>\n",
       "      <td>693</td>\n",
       "      <td>...</td>\n",
       "      <td>0.825013</td>\n",
       "      <td>0.693769</td>\n",
       "      <td>0.753721</td>\n",
       "      <td>0.001270</td>\n",
       "      <td>0.017071</td>\n",
       "      <td>0.018991</td>\n",
       "      <td>0.017980</td>\n",
       "      <td>0.753721</td>\n",
       "      <td>0.017980</td>\n",
       "      <td>rel-semeval-_clean_micro_</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>base</td>\n",
       "      <td>None</td>\n",
       "      <td>bert-base-uncased-mix_07-edges-rel-semeval</td>\n",
       "      <td>07</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>rel-semeval</td>\n",
       "      <td>test</td>\n",
       "      <td>_clean_micro_</td>\n",
       "      <td>626</td>\n",
       "      <td>...</td>\n",
       "      <td>0.833928</td>\n",
       "      <td>0.723376</td>\n",
       "      <td>0.774728</td>\n",
       "      <td>0.001224</td>\n",
       "      <td>0.016463</td>\n",
       "      <td>0.018431</td>\n",
       "      <td>0.017391</td>\n",
       "      <td>0.774728</td>\n",
       "      <td>0.017391</td>\n",
       "      <td>rel-semeval-_clean_micro_</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>base</td>\n",
       "      <td>None</td>\n",
       "      <td>bert-base-uncased-mix_08-edges-rel-semeval</td>\n",
       "      <td>08</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>rel-semeval</td>\n",
       "      <td>test</td>\n",
       "      <td>_clean_micro_</td>\n",
       "      <td>548</td>\n",
       "      <td>...</td>\n",
       "      <td>0.841924</td>\n",
       "      <td>0.757844</td>\n",
       "      <td>0.797674</td>\n",
       "      <td>0.001172</td>\n",
       "      <td>0.015843</td>\n",
       "      <td>0.017650</td>\n",
       "      <td>0.016698</td>\n",
       "      <td>0.797674</td>\n",
       "      <td>0.016698</td>\n",
       "      <td>rel-semeval-_clean_micro_</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>base</td>\n",
       "      <td>None</td>\n",
       "      <td>bert-base-uncased-mix_09-edges-rel-semeval</td>\n",
       "      <td>09</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>rel-semeval</td>\n",
       "      <td>test</td>\n",
       "      <td>_clean_micro_</td>\n",
       "      <td>541</td>\n",
       "      <td>...</td>\n",
       "      <td>0.855440</td>\n",
       "      <td>0.760937</td>\n",
       "      <td>0.805426</td>\n",
       "      <td>0.001146</td>\n",
       "      <td>0.015362</td>\n",
       "      <td>0.017573</td>\n",
       "      <td>0.016393</td>\n",
       "      <td>0.805426</td>\n",
       "      <td>0.016393</td>\n",
       "      <td>rel-semeval-_clean_micro_</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>base</td>\n",
       "      <td>None</td>\n",
       "      <td>bert-base-uncased-mix_10-edges-rel-semeval</td>\n",
       "      <td>10</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>rel-semeval</td>\n",
       "      <td>test</td>\n",
       "      <td>_clean_micro_</td>\n",
       "      <td>502</td>\n",
       "      <td>...</td>\n",
       "      <td>0.851959</td>\n",
       "      <td>0.778171</td>\n",
       "      <td>0.813395</td>\n",
       "      <td>0.001130</td>\n",
       "      <td>0.015310</td>\n",
       "      <td>0.017118</td>\n",
       "      <td>0.016164</td>\n",
       "      <td>0.813395</td>\n",
       "      <td>0.016164</td>\n",
       "      <td>rel-semeval-_clean_micro_</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>base</td>\n",
       "      <td>None</td>\n",
       "      <td>bert-base-uncased-mix_11-edges-rel-semeval</td>\n",
       "      <td>11</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>rel-semeval</td>\n",
       "      <td>test</td>\n",
       "      <td>_clean_micro_</td>\n",
       "      <td>505</td>\n",
       "      <td>...</td>\n",
       "      <td>0.860499</td>\n",
       "      <td>0.776845</td>\n",
       "      <td>0.816535</td>\n",
       "      <td>0.001117</td>\n",
       "      <td>0.015024</td>\n",
       "      <td>0.017155</td>\n",
       "      <td>0.016019</td>\n",
       "      <td>0.816535</td>\n",
       "      <td>0.016019</td>\n",
       "      <td>rel-semeval-_clean_micro_</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>base</td>\n",
       "      <td>None</td>\n",
       "      <td>bert-base-uncased-mix_12-edges-rel-semeval</td>\n",
       "      <td>12</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>rel-semeval</td>\n",
       "      <td>test</td>\n",
       "      <td>_clean_micro_</td>\n",
       "      <td>512</td>\n",
       "      <td>...</td>\n",
       "      <td>0.859597</td>\n",
       "      <td>0.773752</td>\n",
       "      <td>0.814419</td>\n",
       "      <td>0.001123</td>\n",
       "      <td>0.015087</td>\n",
       "      <td>0.017239</td>\n",
       "      <td>0.016091</td>\n",
       "      <td>0.814419</td>\n",
       "      <td>0.016091</td>\n",
       "      <td>rel-semeval-_clean_micro_</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>base</td>\n",
       "      <td>None</td>\n",
       "      <td>bert-base-uncased-mix_00-edges-coref-ontonotes</td>\n",
       "      <td>00</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>coref-ontonotes</td>\n",
       "      <td>test</td>\n",
       "      <td>1</td>\n",
       "      <td>1496</td>\n",
       "      <td>...</td>\n",
       "      <td>0.763575</td>\n",
       "      <td>0.751660</td>\n",
       "      <td>0.757571</td>\n",
       "      <td>0.003592</td>\n",
       "      <td>0.010814</td>\n",
       "      <td>0.010911</td>\n",
       "      <td>0.010862</td>\n",
       "      <td>0.757571</td>\n",
       "      <td>0.010862</td>\n",
       "      <td>coref-ontonotes-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>base</td>\n",
       "      <td>None</td>\n",
       "      <td>bert-base-uncased-mix_01-edges-coref-ontonotes</td>\n",
       "      <td>01</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>coref-ontonotes</td>\n",
       "      <td>test</td>\n",
       "      <td>1</td>\n",
       "      <td>1068</td>\n",
       "      <td>...</td>\n",
       "      <td>0.830290</td>\n",
       "      <td>0.822709</td>\n",
       "      <td>0.826482</td>\n",
       "      <td>0.003094</td>\n",
       "      <td>0.009523</td>\n",
       "      <td>0.009645</td>\n",
       "      <td>0.009583</td>\n",
       "      <td>0.826482</td>\n",
       "      <td>0.009583</td>\n",
       "      <td>coref-ontonotes-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>base</td>\n",
       "      <td>None</td>\n",
       "      <td>bert-base-uncased-mix_02-edges-coref-ontonotes</td>\n",
       "      <td>02</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>coref-ontonotes</td>\n",
       "      <td>test</td>\n",
       "      <td>1</td>\n",
       "      <td>888</td>\n",
       "      <td>...</td>\n",
       "      <td>0.835258</td>\n",
       "      <td>0.852590</td>\n",
       "      <td>0.843835</td>\n",
       "      <td>0.002967</td>\n",
       "      <td>0.009272</td>\n",
       "      <td>0.008953</td>\n",
       "      <td>0.009109</td>\n",
       "      <td>0.843835</td>\n",
       "      <td>0.009109</td>\n",
       "      <td>coref-ontonotes-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>base</td>\n",
       "      <td>None</td>\n",
       "      <td>bert-base-uncased-mix_03-edges-coref-ontonotes</td>\n",
       "      <td>03</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>coref-ontonotes</td>\n",
       "      <td>test</td>\n",
       "      <td>1</td>\n",
       "      <td>787</td>\n",
       "      <td>...</td>\n",
       "      <td>0.841015</td>\n",
       "      <td>0.869356</td>\n",
       "      <td>0.854951</td>\n",
       "      <td>0.002875</td>\n",
       "      <td>0.009082</td>\n",
       "      <td>0.008511</td>\n",
       "      <td>0.008787</td>\n",
       "      <td>0.854951</td>\n",
       "      <td>0.008787</td>\n",
       "      <td>coref-ontonotes-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>base</td>\n",
       "      <td>None</td>\n",
       "      <td>bert-base-uncased-mix_04-edges-coref-ontonotes</td>\n",
       "      <td>04</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>coref-ontonotes</td>\n",
       "      <td>test</td>\n",
       "      <td>1</td>\n",
       "      <td>707</td>\n",
       "      <td>...</td>\n",
       "      <td>0.851129</td>\n",
       "      <td>0.882636</td>\n",
       "      <td>0.866596</td>\n",
       "      <td>0.002767</td>\n",
       "      <td>0.008827</td>\n",
       "      <td>0.008128</td>\n",
       "      <td>0.008463</td>\n",
       "      <td>0.866596</td>\n",
       "      <td>0.008463</td>\n",
       "      <td>coref-ontonotes-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>base</td>\n",
       "      <td>None</td>\n",
       "      <td>bert-base-uncased-mix_05-edges-coref-ontonotes</td>\n",
       "      <td>05</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>coref-ontonotes</td>\n",
       "      <td>test</td>\n",
       "      <td>1</td>\n",
       "      <td>699</td>\n",
       "      <td>...</td>\n",
       "      <td>0.868254</td>\n",
       "      <td>0.883964</td>\n",
       "      <td>0.876038</td>\n",
       "      <td>0.002662</td>\n",
       "      <td>0.008465</td>\n",
       "      <td>0.008088</td>\n",
       "      <td>0.008272</td>\n",
       "      <td>0.876038</td>\n",
       "      <td>0.008272</td>\n",
       "      <td>coref-ontonotes-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>base</td>\n",
       "      <td>None</td>\n",
       "      <td>bert-base-uncased-mix_06-edges-coref-ontonotes</td>\n",
       "      <td>06</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>coref-ontonotes</td>\n",
       "      <td>test</td>\n",
       "      <td>1</td>\n",
       "      <td>673</td>\n",
       "      <td>...</td>\n",
       "      <td>0.880243</td>\n",
       "      <td>0.888280</td>\n",
       "      <td>0.884244</td>\n",
       "      <td>0.002572</td>\n",
       "      <td>0.008162</td>\n",
       "      <td>0.007955</td>\n",
       "      <td>0.008057</td>\n",
       "      <td>0.884244</td>\n",
       "      <td>0.008057</td>\n",
       "      <td>coref-ontonotes-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>base</td>\n",
       "      <td>None</td>\n",
       "      <td>bert-base-uncased-mix_07-edges-coref-ontonotes</td>\n",
       "      <td>07</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>coref-ontonotes</td>\n",
       "      <td>test</td>\n",
       "      <td>1</td>\n",
       "      <td>619</td>\n",
       "      <td>...</td>\n",
       "      <td>0.888396</td>\n",
       "      <td>0.897244</td>\n",
       "      <td>0.892798</td>\n",
       "      <td>0.002480</td>\n",
       "      <td>0.007912</td>\n",
       "      <td>0.007668</td>\n",
       "      <td>0.007788</td>\n",
       "      <td>0.892798</td>\n",
       "      <td>0.007788</td>\n",
       "      <td>coref-ontonotes-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>base</td>\n",
       "      <td>None</td>\n",
       "      <td>bert-base-uncased-mix_08-edges-coref-ontonotes</td>\n",
       "      <td>08</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>coref-ontonotes</td>\n",
       "      <td>test</td>\n",
       "      <td>1</td>\n",
       "      <td>548</td>\n",
       "      <td>...</td>\n",
       "      <td>0.888528</td>\n",
       "      <td>0.909031</td>\n",
       "      <td>0.898663</td>\n",
       "      <td>0.002422</td>\n",
       "      <td>0.007857</td>\n",
       "      <td>0.007262</td>\n",
       "      <td>0.007548</td>\n",
       "      <td>0.898663</td>\n",
       "      <td>0.007548</td>\n",
       "      <td>coref-ontonotes-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>base</td>\n",
       "      <td>None</td>\n",
       "      <td>bert-base-uncased-mix_09-edges-coref-ontonotes</td>\n",
       "      <td>09</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>coref-ontonotes</td>\n",
       "      <td>test</td>\n",
       "      <td>1</td>\n",
       "      <td>571</td>\n",
       "      <td>...</td>\n",
       "      <td>0.897761</td>\n",
       "      <td>0.905212</td>\n",
       "      <td>0.901471</td>\n",
       "      <td>0.002381</td>\n",
       "      <td>0.007619</td>\n",
       "      <td>0.007397</td>\n",
       "      <td>0.007507</td>\n",
       "      <td>0.901471</td>\n",
       "      <td>0.007507</td>\n",
       "      <td>coref-ontonotes-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>base</td>\n",
       "      <td>None</td>\n",
       "      <td>bert-base-uncased-mix_10-edges-coref-ontonotes</td>\n",
       "      <td>10</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>coref-ontonotes</td>\n",
       "      <td>test</td>\n",
       "      <td>1</td>\n",
       "      <td>530</td>\n",
       "      <td>...</td>\n",
       "      <td>0.898006</td>\n",
       "      <td>0.912019</td>\n",
       "      <td>0.904958</td>\n",
       "      <td>0.002345</td>\n",
       "      <td>0.007584</td>\n",
       "      <td>0.007153</td>\n",
       "      <td>0.007362</td>\n",
       "      <td>0.904958</td>\n",
       "      <td>0.007362</td>\n",
       "      <td>coref-ontonotes-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>base</td>\n",
       "      <td>None</td>\n",
       "      <td>bert-base-uncased-mix_11-edges-coref-ontonotes</td>\n",
       "      <td>11</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>coref-ontonotes</td>\n",
       "      <td>test</td>\n",
       "      <td>1</td>\n",
       "      <td>507</td>\n",
       "      <td>...</td>\n",
       "      <td>0.892574</td>\n",
       "      <td>0.915837</td>\n",
       "      <td>0.904056</td>\n",
       "      <td>0.002361</td>\n",
       "      <td>0.007720</td>\n",
       "      <td>0.007011</td>\n",
       "      <td>0.007348</td>\n",
       "      <td>0.904056</td>\n",
       "      <td>0.007348</td>\n",
       "      <td>coref-ontonotes-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>base</td>\n",
       "      <td>None</td>\n",
       "      <td>bert-base-uncased-mix_12-edges-coref-ontonotes</td>\n",
       "      <td>12</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>coref-ontonotes</td>\n",
       "      <td>test</td>\n",
       "      <td>1</td>\n",
       "      <td>589</td>\n",
       "      <td>...</td>\n",
       "      <td>0.902075</td>\n",
       "      <td>0.902224</td>\n",
       "      <td>0.902150</td>\n",
       "      <td>0.002369</td>\n",
       "      <td>0.007505</td>\n",
       "      <td>0.007500</td>\n",
       "      <td>0.007503</td>\n",
       "      <td>0.902150</td>\n",
       "      <td>0.007503</td>\n",
       "      <td>coref-ontonotes-1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>26 rows × 30 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     tag  seed                                        exp_name layer_num  \\\n",
       "0   base  None      bert-base-uncased-mix_00-edges-rel-semeval        00   \n",
       "1   base  None      bert-base-uncased-mix_01-edges-rel-semeval        01   \n",
       "2   base  None      bert-base-uncased-mix_02-edges-rel-semeval        02   \n",
       "3   base  None      bert-base-uncased-mix_03-edges-rel-semeval        03   \n",
       "4   base  None      bert-base-uncased-mix_04-edges-rel-semeval        04   \n",
       "5   base  None      bert-base-uncased-mix_05-edges-rel-semeval        05   \n",
       "6   base  None      bert-base-uncased-mix_06-edges-rel-semeval        06   \n",
       "7   base  None      bert-base-uncased-mix_07-edges-rel-semeval        07   \n",
       "8   base  None      bert-base-uncased-mix_08-edges-rel-semeval        08   \n",
       "9   base  None      bert-base-uncased-mix_09-edges-rel-semeval        09   \n",
       "10  base  None      bert-base-uncased-mix_10-edges-rel-semeval        10   \n",
       "11  base  None      bert-base-uncased-mix_11-edges-rel-semeval        11   \n",
       "12  base  None      bert-base-uncased-mix_12-edges-rel-semeval        12   \n",
       "13  base  None  bert-base-uncased-mix_00-edges-coref-ontonotes        00   \n",
       "14  base  None  bert-base-uncased-mix_01-edges-coref-ontonotes        01   \n",
       "15  base  None  bert-base-uncased-mix_02-edges-coref-ontonotes        02   \n",
       "16  base  None  bert-base-uncased-mix_03-edges-coref-ontonotes        03   \n",
       "17  base  None  bert-base-uncased-mix_04-edges-coref-ontonotes        04   \n",
       "18  base  None  bert-base-uncased-mix_05-edges-coref-ontonotes        05   \n",
       "19  base  None  bert-base-uncased-mix_06-edges-coref-ontonotes        06   \n",
       "20  base  None  bert-base-uncased-mix_07-edges-coref-ontonotes        07   \n",
       "21  base  None  bert-base-uncased-mix_08-edges-coref-ontonotes        08   \n",
       "22  base  None  bert-base-uncased-mix_09-edges-coref-ontonotes        09   \n",
       "23  base  None  bert-base-uncased-mix_10-edges-coref-ontonotes        10   \n",
       "24  base  None  bert-base-uncased-mix_11-edges-coref-ontonotes        11   \n",
       "25  base  None  bert-base-uncased-mix_12-edges-coref-ontonotes        12   \n",
       "\n",
       "                 exp_type                                                run  \\\n",
       "0   bert-base-uncased-mix  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "1   bert-base-uncased-mix  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "2   bert-base-uncased-mix  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "3   bert-base-uncased-mix  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "4   bert-base-uncased-mix  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "5   bert-base-uncased-mix  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "6   bert-base-uncased-mix  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "7   bert-base-uncased-mix  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "8   bert-base-uncased-mix  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "9   bert-base-uncased-mix  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "10  bert-base-uncased-mix  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "11  bert-base-uncased-mix  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "12  bert-base-uncased-mix  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "13  bert-base-uncased-mix  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "14  bert-base-uncased-mix  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "15  bert-base-uncased-mix  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "16  bert-base-uncased-mix  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "17  bert-base-uncased-mix  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "18  bert-base-uncased-mix  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "19  bert-base-uncased-mix  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "20  bert-base-uncased-mix  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "21  bert-base-uncased-mix  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "22  bert-base-uncased-mix  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "23  bert-base-uncased-mix  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "24  bert-base-uncased-mix  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "25  bert-base-uncased-mix  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "\n",
       "               task split          label  fn_count            ...              \\\n",
       "0       rel-semeval  test  _clean_micro_      1152            ...               \n",
       "1       rel-semeval  test  _clean_micro_       987            ...               \n",
       "2       rel-semeval  test  _clean_micro_       933            ...               \n",
       "3       rel-semeval  test  _clean_micro_       880            ...               \n",
       "4       rel-semeval  test  _clean_micro_       747            ...               \n",
       "5       rel-semeval  test  _clean_micro_       674            ...               \n",
       "6       rel-semeval  test  _clean_micro_       693            ...               \n",
       "7       rel-semeval  test  _clean_micro_       626            ...               \n",
       "8       rel-semeval  test  _clean_micro_       548            ...               \n",
       "9       rel-semeval  test  _clean_micro_       541            ...               \n",
       "10      rel-semeval  test  _clean_micro_       502            ...               \n",
       "11      rel-semeval  test  _clean_micro_       505            ...               \n",
       "12      rel-semeval  test  _clean_micro_       512            ...               \n",
       "13  coref-ontonotes  test              1      1496            ...               \n",
       "14  coref-ontonotes  test              1      1068            ...               \n",
       "15  coref-ontonotes  test              1       888            ...               \n",
       "16  coref-ontonotes  test              1       787            ...               \n",
       "17  coref-ontonotes  test              1       707            ...               \n",
       "18  coref-ontonotes  test              1       699            ...               \n",
       "19  coref-ontonotes  test              1       673            ...               \n",
       "20  coref-ontonotes  test              1       619            ...               \n",
       "21  coref-ontonotes  test              1       548            ...               \n",
       "22  coref-ontonotes  test              1       571            ...               \n",
       "23  coref-ontonotes  test              1       530            ...               \n",
       "24  coref-ontonotes  test              1       507            ...               \n",
       "25  coref-ontonotes  test              1       589            ...               \n",
       "\n",
       "    precision    recall  f1_score accuracy_errn95  precision_errn95  \\\n",
       "0    0.721897  0.490941  0.584429        0.001567          0.022386   \n",
       "1    0.755477  0.563853  0.645749        0.001478          0.020498   \n",
       "2    0.786052  0.587715  0.672566        0.001423          0.019540   \n",
       "3    0.799422  0.611136  0.692712        0.001386          0.018870   \n",
       "4    0.795383  0.669907  0.727273        0.001336          0.018111   \n",
       "5    0.813204  0.702165  0.753616        0.001278          0.017281   \n",
       "6    0.825013  0.693769  0.753721        0.001270          0.017071   \n",
       "7    0.833928  0.723376  0.774728        0.001224          0.016463   \n",
       "8    0.841924  0.757844  0.797674        0.001172          0.015843   \n",
       "9    0.855440  0.760937  0.805426        0.001146          0.015362   \n",
       "10   0.851959  0.778171  0.813395        0.001130          0.015310   \n",
       "11   0.860499  0.776845  0.816535        0.001117          0.015024   \n",
       "12   0.859597  0.773752  0.814419        0.001123          0.015087   \n",
       "13   0.763575  0.751660  0.757571        0.003592          0.010814   \n",
       "14   0.830290  0.822709  0.826482        0.003094          0.009523   \n",
       "15   0.835258  0.852590  0.843835        0.002967          0.009272   \n",
       "16   0.841015  0.869356  0.854951        0.002875          0.009082   \n",
       "17   0.851129  0.882636  0.866596        0.002767          0.008827   \n",
       "18   0.868254  0.883964  0.876038        0.002662          0.008465   \n",
       "19   0.880243  0.888280  0.884244        0.002572          0.008162   \n",
       "20   0.888396  0.897244  0.892798        0.002480          0.007912   \n",
       "21   0.888528  0.909031  0.898663        0.002422          0.007857   \n",
       "22   0.897761  0.905212  0.901471        0.002381          0.007619   \n",
       "23   0.898006  0.912019  0.904958        0.002345          0.007584   \n",
       "24   0.892574  0.915837  0.904056        0.002361          0.007720   \n",
       "25   0.902075  0.902224  0.902150        0.002369          0.007505   \n",
       "\n",
       "   recall_errn95  f1_errn95     score  score_errn95                display_row  \n",
       "0       0.020597   0.021455  0.584429      0.021455  rel-semeval-_clean_micro_  \n",
       "1       0.020432   0.020465  0.645749      0.020465  rel-semeval-_clean_micro_  \n",
       "2       0.020281   0.019904  0.672566      0.019904  rel-semeval-_clean_micro_  \n",
       "3       0.020085   0.019459  0.692712      0.019459  rel-semeval-_clean_micro_  \n",
       "4       0.019375   0.018722  0.727273      0.018722  rel-semeval-_clean_micro_  \n",
       "5       0.018842   0.018028  0.753616      0.018028  rel-semeval-_clean_micro_  \n",
       "6       0.018991   0.017980  0.753721      0.017980  rel-semeval-_clean_micro_  \n",
       "7       0.018431   0.017391  0.774728      0.017391  rel-semeval-_clean_micro_  \n",
       "8       0.017650   0.016698  0.797674      0.016698  rel-semeval-_clean_micro_  \n",
       "9       0.017573   0.016393  0.805426      0.016393  rel-semeval-_clean_micro_  \n",
       "10      0.017118   0.016164  0.813395      0.016164  rel-semeval-_clean_micro_  \n",
       "11      0.017155   0.016019  0.816535      0.016019  rel-semeval-_clean_micro_  \n",
       "12      0.017239   0.016091  0.814419      0.016091  rel-semeval-_clean_micro_  \n",
       "13      0.010911   0.010862  0.757571      0.010862          coref-ontonotes-1  \n",
       "14      0.009645   0.009583  0.826482      0.009583          coref-ontonotes-1  \n",
       "15      0.008953   0.009109  0.843835      0.009109          coref-ontonotes-1  \n",
       "16      0.008511   0.008787  0.854951      0.008787          coref-ontonotes-1  \n",
       "17      0.008128   0.008463  0.866596      0.008463          coref-ontonotes-1  \n",
       "18      0.008088   0.008272  0.876038      0.008272          coref-ontonotes-1  \n",
       "19      0.007955   0.008057  0.884244      0.008057          coref-ontonotes-1  \n",
       "20      0.007668   0.007788  0.892798      0.007788          coref-ontonotes-1  \n",
       "21      0.007262   0.007548  0.898663      0.007548          coref-ontonotes-1  \n",
       "22      0.007397   0.007507  0.901471      0.007507          coref-ontonotes-1  \n",
       "23      0.007153   0.007362  0.904958      0.007362          coref-ontonotes-1  \n",
       "24      0.007011   0.007348  0.904056      0.007348          coref-ontonotes-1  \n",
       "25      0.007500   0.007503  0.902150      0.007503          coref-ontonotes-1  \n",
       "\n",
       "[26 rows x 30 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "SPLIT = \"test\"\n",
    "# SPLIT = \"val\"\n",
    "mask = df['split'] == SPLIT\n",
    "\n",
    "final_scores = []\n",
    "for task in df['task'].unique():\n",
    "    task_scores = df[mask & (df['task'] == task)]\n",
    "    if analysis.is_coref_task(task):\n",
    "        final_scores.append(task_scores[task_scores['label'] == \"1\"])\n",
    "        # For GAP coref, have stratified by gender\n",
    "        if task.startswith(\"coref-gap\"):\n",
    "            final_scores.append(task_scores[task_scores['label'] == \"_info.pronoun_gender_MASCULINE_1_\"])\n",
    "            final_scores.append(task_scores[task_scores['label'] == \"_info.pronoun_gender_FEMININE_1_\"])\n",
    "    elif task == \"dpr\":\n",
    "        dpr_mask = task_scores['seed'] == \"_mean_\"\n",
    "        dpr_mask &= task_scores['label'] == \"_micro_avg_\"\n",
    "        final_scores.append(task_scores[dpr_mask])\n",
    "    elif analysis.is_srl_task(task):\n",
    "        final_scores.append(task_scores[task_scores['label'] == '_core_'])\n",
    "        final_scores.append(task_scores[task_scores['label'] == '_non_core_'])\n",
    "        # Use clean version, average only over core or noncore roles.\n",
    "        final_scores.append(task_scores[task_scores['label'] == '_clean_micro_'])\n",
    "    elif analysis.is_relation_task(task):\n",
    "        # Relation tasks include specific \"no_relation\" label\n",
    "        final_scores.append(task_scores[task_scores['label'] == '_clean_micro_'])\n",
    "    elif task == \"noun-verb\":\n",
    "        # Noun-verb reports accuracy on VERB class\n",
    "        final_scores.append(task_scores[task_scores['label'] == 'VERB'])\n",
    "    else:\n",
    "        final_scores.append(task_scores[task_scores['label'] == '_micro_avg_'])\n",
    "        \n",
    "fdf = pd.concat(final_scores, axis=0, ignore_index=True, sort=False)\n",
    "# fdf['task_and_metric'] = [\"%s-%s\" % tl for tl in zip(fdf.task, fdf.label)]\n",
    "def format_display_row(task, label, seed):\n",
    "    ret = f\"{task}-{label}\"\n",
    "    if seed:\n",
    "        ret += f\":{seed}\"\n",
    "    return ret\n",
    "\n",
    "fdf['display_row'] = [format_display_row(*args) for args in zip(fdf.task, fdf.label, fdf.seed)]\n",
    "print(len(fdf))\n",
    "fdf"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "nbpresent": {
     "id": "17221f4c-e3b1-4987-8824-643ab9accbd5"
    }
   },
   "source": [
    "Pivot DataFrame to present each task on a row, and each experiment on a column.\n",
    "\n",
    "This form is suitable to copy-paste into a spreadsheet."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "nbpresent": {
     "id": "9b06c697-bb71-4231-bd7e-e9028f85ffa4"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "display_row,bert-base-uncased-mix-00 (base),bert-base-uncased-mix-01 (base),bert-base-uncased-mix-02 (base),bert-base-uncased-mix-03 (base),bert-base-uncased-mix-04 (base),bert-base-uncased-mix-05 (base),bert-base-uncased-mix-06 (base),bert-base-uncased-mix-07 (base),bert-base-uncased-mix-08 (base),bert-base-uncased-mix-09 (base),bert-base-uncased-mix-10 (base),bert-base-uncased-mix-11 (base),bert-base-uncased-mix-12 (base)\n",
      "coref-ontonotes-1,75.7571,82.6482,84.3835,85.4951,86.6596,87.6038,88.4244,89.2798,89.8663,90.1471,90.4958,90.4056,90.2150\n",
      "rel-semeval-_clean_micro_,58.4429,64.5749,67.2566,69.2712,72.7273,75.3616,75.3721,77.4728,79.7674,80.5426,81.3395,81.6535,81.4419\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# Pivot to wide-form for spreadsheet, and sort in (mostly) stable order.\n",
    "sheet_df = fdf.pivot(index=\"display_row\", columns=\"display_col\", values=\"score\")\n",
    "sheet_df = sheet_df.reindex(sorted(sheet_df.columns, \n",
    "                                   key=exp_type_sort_key), axis=1)\n",
    "sheet_df = sheet_df.reindex(sorted(sheet_df.index,\n",
    "                                   key=task_sort_key), axis=0)\n",
    "# sheet_df\n",
    "print((100*sheet_df).to_csv(**csv_args))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "nbpresent": {
     "id": "7256b0b5-f94e-4452-a395-24d43d8cc742"
    }
   },
   "source": [
    "Print the same format, but show the 95% confidence intervals for each score."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "nbpresent": {
     "id": "a9b7c866-7614-458b-a585-47f22802c81c"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "display_row,bert-base-uncased-mix-00 (base),bert-base-uncased-mix-01 (base),bert-base-uncased-mix-02 (base),bert-base-uncased-mix-03 (base),bert-base-uncased-mix-04 (base),bert-base-uncased-mix-05 (base),bert-base-uncased-mix-06 (base),bert-base-uncased-mix-07 (base),bert-base-uncased-mix-08 (base),bert-base-uncased-mix-09 (base),bert-base-uncased-mix-10 (base),bert-base-uncased-mix-11 (base),bert-base-uncased-mix-12 (base)\n",
      "coref-ontonotes-1,1.0862,0.9583,0.9109,0.8787,0.8463,0.8272,0.8057,0.7788,0.7548,0.7507,0.7362,0.7348,0.7503\n",
      "rel-semeval-_clean_micro_,2.1455,2.0465,1.9904,1.9459,1.8722,1.8028,1.7980,1.7391,1.6698,1.6393,1.6164,1.6019,1.6091\n",
      "\n"
     ]
    }
   ],
   "source": [
    "sheet_df = fdf.pivot(index=\"display_row\", columns=\"display_col\", values=\"score_errn95\")\n",
    "sheet_df = sheet_df.reindex(sorted(sheet_df.columns, \n",
    "                                   key=exp_type_sort_key), axis=1)\n",
    "sheet_df = sheet_df.reindex(sorted(sheet_df.index,\n",
    "                                   key=task_sort_key), axis=0)\n",
    "# sheet_df\n",
    "print((100*sheet_df).to_csv(**csv_args))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "nbpresent": {
     "id": "aab22d14-0ec5-4235-a9bf-a69c3e5d328e"
    }
   },
   "source": [
    "## Load scalar mixing weights"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "nbpresent": {
     "id": "45c7a519-9fdf-486d-a7f0-e551f2a36e11"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['coref-ontonotes' 'rel-semeval']\n",
      "['bert-base-uncased-mix']\n",
      "26\n",
      "Scalar sets: ['sent_encoder._text_field_embedder.scalar_mix.']\n"
     ]
    }
   ],
   "source": [
    "scalar_files = [\n",
    "    (\"base\", \"/nfs/jiant/exp/iftenney/20190721-bert-base-layers/scalars.tsv\"),\n",
    "]\n",
    "\n",
    "def load_scalars_file(filename, tag=None):\n",
    "    df = pd.read_csv(filename, sep=\"\\t\", header=0)\n",
    "    df.drop(['Unnamed: 0'], axis='columns', inplace=True)\n",
    "    \n",
    "    df.insert(0, \"exp_name\", df['run'].map(lambda p: os.path.basename(os.path.dirname(p.strip(\"/\")))))\n",
    "    df.insert(1, \"exp_type\", df['exp_name'].map(analysis.get_exp_type))\n",
    "    df.insert(2, \"task\", df['exp_name'].map(lambda name: analysis.clean_task_name(name.split(\"-edges-\")[1])))\n",
    "    if tag is not None:\n",
    "        df.insert(0, \"tag\", tag)\n",
    "        \n",
    "    return df\n",
    "\n",
    "dfs = []\n",
    "for tag, scalar_file in scalar_files:\n",
    "    dfs.append(load_scalars_file(scalar_file, tag=tag))\n",
    "scalar_df = pd.concat(dfs, ignore_index=True, sort=False)\n",
    "scalar_df['display_col'] = [\"%s (%s)\" % et for et in zip(scalar_df.exp_type, scalar_df.tag)]\n",
    "# ELMo models also have 'scalar_mix_0.', which is for pretraining and not used by edge probing.\n",
    "mask = scalar_df['scalar_set'].map(lambda s: s.endswith(\"scalar_mix.\") or s.endswith(\"scalar_mix_1.\"))\n",
    "scalar_df = scalar_df[mask].copy()\n",
    "print(scalar_df['task'].unique())\n",
    "print(scalar_df['exp_type'].unique())\n",
    "print(len(scalar_df))\n",
    "print(\"Scalar sets:\", scalar_df['scalar_set'].unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/iftenney/.conda/envs/jiant/lib/python3.6/site-packages/ipykernel_launcher.py:42: RuntimeWarning: invalid value encountered in double_scalars\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tag</th>\n",
       "      <th>exp_name</th>\n",
       "      <th>exp_type</th>\n",
       "      <th>task</th>\n",
       "      <th>checkpoint</th>\n",
       "      <th>gamma</th>\n",
       "      <th>label</th>\n",
       "      <th>run</th>\n",
       "      <th>scalar_parameters.0</th>\n",
       "      <th>scalar_set</th>\n",
       "      <th>...</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>10</th>\n",
       "      <th>11</th>\n",
       "      <th>12</th>\n",
       "      <th>weight_entropy</th>\n",
       "      <th>weight_kl_unif</th>\n",
       "      <th>weight_exp_layer</th>\n",
       "      <th>weight_exp_layer_oneplus</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>base</td>\n",
       "      <td>bert-base-uncased-mix_00-edges-coref-ontonotes</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>coref-ontonotes</td>\n",
       "      <td>/edges-coref-ontonotes/model_state_target_trai...</td>\n",
       "      <td>1.731095</td>\n",
       "      <td>__scalar_mix__</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>sent_encoder._text_field_embedder.scalar_mix.</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>base</td>\n",
       "      <td>bert-base-uncased-mix_00-edges-rel-semeval</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>rel-semeval</td>\n",
       "      <td>/edges-rel-semeval/model_state_target_train_va...</td>\n",
       "      <td>1.099132</td>\n",
       "      <td>__scalar_mix__</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>sent_encoder._text_field_embedder.scalar_mix.</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>base</td>\n",
       "      <td>bert-base-uncased-mix_01-edges-coref-ontonotes</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>coref-ontonotes</td>\n",
       "      <td>/edges-coref-ontonotes/model_state_target_trai...</td>\n",
       "      <td>1.835713</td>\n",
       "      <td>__scalar_mix__</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>-0.709879</td>\n",
       "      <td>sent_encoder._text_field_embedder.scalar_mix.</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.7112</td>\n",
       "      <td>0.288800</td>\n",
       "      <td>0.805300</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>base</td>\n",
       "      <td>bert-base-uncased-mix_01-edges-rel-semeval</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>rel-semeval</td>\n",
       "      <td>/edges-rel-semeval/model_state_target_train_va...</td>\n",
       "      <td>1.097378</td>\n",
       "      <td>__scalar_mix__</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>-0.178499</td>\n",
       "      <td>sent_encoder._text_field_embedder.scalar_mix.</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.977378</td>\n",
       "      <td>0.022622</td>\n",
       "      <td>0.588313</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>base</td>\n",
       "      <td>bert-base-uncased-mix_02-edges-coref-ontonotes</td>\n",
       "      <td>bert-base-uncased-mix</td>\n",
       "      <td>coref-ontonotes</td>\n",
       "      <td>/edges-coref-ontonotes/model_state_target_trai...</td>\n",
       "      <td>1.777263</td>\n",
       "      <td>__scalar_mix__</td>\n",
       "      <td>/nfs/jiant/exp/iftenney/20190721-bert-base-lay...</td>\n",
       "      <td>-0.585102</td>\n",
       "      <td>sent_encoder._text_field_embedder.scalar_mix.</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.19747</td>\n",
       "      <td>0.387494</td>\n",
       "      <td>1.550549</td>\n",
       "      <td>1.805902</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 40 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    tag                                        exp_name  \\\n",
       "0  base  bert-base-uncased-mix_00-edges-coref-ontonotes   \n",
       "1  base      bert-base-uncased-mix_00-edges-rel-semeval   \n",
       "2  base  bert-base-uncased-mix_01-edges-coref-ontonotes   \n",
       "3  base      bert-base-uncased-mix_01-edges-rel-semeval   \n",
       "4  base  bert-base-uncased-mix_02-edges-coref-ontonotes   \n",
       "\n",
       "                exp_type             task  \\\n",
       "0  bert-base-uncased-mix  coref-ontonotes   \n",
       "1  bert-base-uncased-mix      rel-semeval   \n",
       "2  bert-base-uncased-mix  coref-ontonotes   \n",
       "3  bert-base-uncased-mix      rel-semeval   \n",
       "4  bert-base-uncased-mix  coref-ontonotes   \n",
       "\n",
       "                                          checkpoint     gamma  \\\n",
       "0  /edges-coref-ontonotes/model_state_target_trai...  1.731095   \n",
       "1  /edges-rel-semeval/model_state_target_train_va...  1.099132   \n",
       "2  /edges-coref-ontonotes/model_state_target_trai...  1.835713   \n",
       "3  /edges-rel-semeval/model_state_target_train_va...  1.097378   \n",
       "4  /edges-coref-ontonotes/model_state_target_trai...  1.777263   \n",
       "\n",
       "            label                                                run  \\\n",
       "0  __scalar_mix__  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "1  __scalar_mix__  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "2  __scalar_mix__  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "3  __scalar_mix__  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "4  __scalar_mix__  /nfs/jiant/exp/iftenney/20190721-bert-base-lay...   \n",
       "\n",
       "   scalar_parameters.0                                     scalar_set  \\\n",
       "0             0.000000  sent_encoder._text_field_embedder.scalar_mix.   \n",
       "1             0.000000  sent_encoder._text_field_embedder.scalar_mix.   \n",
       "2            -0.709879  sent_encoder._text_field_embedder.scalar_mix.   \n",
       "3            -0.178499  sent_encoder._text_field_embedder.scalar_mix.   \n",
       "4            -0.585102  sent_encoder._text_field_embedder.scalar_mix.   \n",
       "\n",
       "             ...             7  8  9  10  11  12  weight_entropy  \\\n",
       "0            ...             0  0  0   0   0   0               0   \n",
       "1            ...             0  0  0   0   0   0               0   \n",
       "2            ...             0  0  0   0   0   0          0.7112   \n",
       "3            ...             0  0  0   0   0   0        0.977378   \n",
       "4            ...             0  0  0   0   0   0         1.19747   \n",
       "\n",
       "   weight_kl_unif  weight_exp_layer  weight_exp_layer_oneplus  \n",
       "0        0.000000          0.000000                       NaN  \n",
       "1        0.000000          0.000000                       NaN  \n",
       "2        0.288800          0.805300                  1.000000  \n",
       "3        0.022622          0.588313                  1.000000  \n",
       "4        0.387494          1.550549                  1.805902  \n",
       "\n",
       "[5 rows x 40 columns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Count total scalar columns\n",
    "scalar_columns = collections.OrderedDict(sorted(\n",
    "    [(int(m.group(1)), m.group(0)) for m in \n",
    "     (re.match(\"^scalar_parameters\\.(\\d+)$\", str(name)) for name in scalar_df.columns)\n",
    "     if m]\n",
    "))\n",
    "\n",
    "# Fill NaN with -inf for scalar columns\n",
    "for name in scalar_columns.values():\n",
    "    scalar_df[name].fillna(value=-np.inf, inplace=True)\n",
    "\n",
    "# Pre-fill number columns\n",
    "for number in scalar_columns.keys():\n",
    "    scalar_df[number] = None\n",
    "scalar_df[\"weight_entropy\"] = None\n",
    "    \n",
    "# Softmax over parameters in each row\n",
    "num_scalars = max(scalar_columns.keys()) + 1\n",
    "scalars = {}\n",
    "masks = {}\n",
    "for i, row in scalar_df.iterrows():\n",
    "    arr = np.zeros(num_scalars, dtype=np.float32)\n",
    "    for j, col in scalar_columns.items():\n",
    "        arr[j] = float(row[col])\n",
    "        if np.isnan(arr[j]):\n",
    "            arr[j] = -np.inf\n",
    "    # Softmax over row\n",
    "    scalars[i] = softmax(arr)\n",
    "    masks[i] = np.isfinite(arr)\n",
    "\n",
    "# Add softmax weights back to DataFrame, with numeric column names.\n",
    "# This way, we can convert to long-form for easy plotting.\n",
    "for i in scalar_df.index:\n",
    "    for j in scalar_columns.keys():\n",
    "        scalar_df.loc[i, j] = scalars[i][j]\n",
    "    # Compute entropy\n",
    "    scalar_df.loc[i, \"weight_entropy\"] = entropy(scalars[i], base=2)\n",
    "    scalar_df.loc[i, \"weight_kl_unif\"] = entropy(scalars[i], qk=masks[i], base=2)\n",
    "    # Compute expectation\n",
    "    weighted_layers = scalars[i] * np.arange(len(scalars[i])) * masks[i]\n",
    "    scalar_df.loc[i, \"weight_exp_layer\"] = np.sum(weighted_layers)\n",
    "    scalar_df.loc[i, \"weight_exp_layer_oneplus\"] = np.sum(weighted_layers[1:]) / np.sum(scalars[i][1:] * masks[i][1:])\n",
    "\n",
    "scalar_df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Print scalars from the top layer, in spreadsheet-friendly form:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      ",tag,exp_name,exp_type,task,checkpoint,gamma,label,run,scalar_parameters.0,scalar_set,scalar_parameters.1,scalar_parameters.2,scalar_parameters.3,scalar_parameters.4,scalar_parameters.5,scalar_parameters.6,scalar_parameters.7,scalar_parameters.8,scalar_parameters.9,scalar_parameters.10,scalar_parameters.11,scalar_parameters.12,display_col,0,1,2,3,4,5,6,7,8,9,10,11,12,weight_entropy,weight_kl_unif,weight_exp_layer,weight_exp_layer_oneplus\n",
      "24,base,bert-base-uncased-mix_12-edges-coref-ontonotes,bert-base-uncased-mix,coref-ontonotes,/edges-coref-ontonotes/model_state_target_train_val_73.best.th,1.6825,__scalar_mix__,/nfs/jiant/exp/iftenney/20190721-bert-base-layers//bert-base-uncased-mix_12-edges-coref-ontonotes/run,-0.4338,sent_encoder._text_field_embedder.scalar_mix.,-0.6981,-0.6875,-0.7092,-0.6423,-0.3593,0.0113,0.2223,0.8736,1.3477,0.9775,0.4410,-0.0540,bert-base-uncased-mix (base),0.03805815,0.029217781,0.029529793,0.02889403,0.030893512,0.0410006,0.05939578,0.073342934,0.14067322,0.22599995,0.15608208,0.09127266,0.05563934,3.3368135834156445,0.3636,7.7652,8.0724\n",
      "25,base,bert-base-uncased-mix_12-edges-rel-semeval,bert-base-uncased-mix,rel-semeval,/edges-rel-semeval/model_state_target_train_val_33.best.th,1.1048,__scalar_mix__,/nfs/jiant/exp/iftenney/20190721-bert-base-layers//bert-base-uncased-mix_12-edges-rel-semeval/run,-0.0888,sent_encoder._text_field_embedder.scalar_mix.,-0.1982,-0.2256,-0.2404,-0.2409,-0.1616,-0.0567,0.1238,0.2615,0.2689,0.2391,0.1682,0.1238,bert-base-uncased-mix (base),0.069189705,0.062019564,0.06033947,0.059453472,0.059423145,0.06433094,0.07144345,0.08557492,0.09821061,0.09894507,0.09603209,0.08946312,0.08557441,3.6727534867213993,0.0277,6.5956,7.0859\n",
      "\n"
     ]
    }
   ],
   "source": [
    "matcher = \"_12\"\n",
    "sheet_df = scalar_df[scalar_df.exp_name.map(lambda s: matcher in s)]\n",
    "print(sheet_df.to_csv(**csv_args))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "jiant",
   "language": "python",
   "name": "jiant"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
