{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import plotly.express as px\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "from analysis_utils import get_success_rate_vs_num_attempts, get_bin_width_mean_error, scatter_num_attempts, scatter_with_error_bands"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>test_ID</th>\n",
       "      <th>test_file</th>\n",
       "      <th>has_requires</th>\n",
       "      <th>has_ensures</th>\n",
       "      <th>#_char</th>\n",
       "      <th>#_hint_char</th>\n",
       "      <th>#_lemma</th>\n",
       "      <th>#_function</th>\n",
       "      <th>#_method</th>\n",
       "      <th>verifies_without_hints</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>630-dafny_tmp_tmpz2kokaiq_Solution.dfy</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>781</td>\n",
       "      <td>89</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>703FinalProject_tmp_tmpr_10rn4z_DP-GD.dfy</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>1452</td>\n",
       "      <td>231</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>703FinalProject_tmp_tmpr_10rn4z_gaussian.dfy</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>1028</td>\n",
       "      <td>274</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>AssertivePrograming_tmp_tmpwf43uz0e_DivMode_Un...</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>9500</td>\n",
       "      <td>1745</td>\n",
       "      <td>14</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>AssertivePrograming_tmp_tmpwf43uz0e_Find_Subst...</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>10254</td>\n",
       "      <td>3595</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>777</th>\n",
       "      <td>777</td>\n",
       "      <td>verified-using-dafny_tmp_tmp7jatpjyn_longestZe...</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>2274</td>\n",
       "      <td>652</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>778</th>\n",
       "      <td>778</td>\n",
       "      <td>vfag_tmp_tmpc29dxm1j_Verificacion_torneo.dfy</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>8315</td>\n",
       "      <td>6019</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>779</th>\n",
       "      <td>779</td>\n",
       "      <td>vfag_tmp_tmpc29dxm1j_mergesort.dfy</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>2528</td>\n",
       "      <td>239</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>780</th>\n",
       "      <td>780</td>\n",
       "      <td>vfag_tmp_tmpc29dxm1j_sumar_componentes.dfy</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>1507</td>\n",
       "      <td>779</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>781</th>\n",
       "      <td>781</td>\n",
       "      <td>vmware-verification-2023_tmp_tmpoou5u54i_demos...</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>5084</td>\n",
       "      <td>299</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>782 rows × 10 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     test_ID                                          test_file  has_requires  \\\n",
       "0          0             630-dafny_tmp_tmpz2kokaiq_Solution.dfy          True   \n",
       "1          1          703FinalProject_tmp_tmpr_10rn4z_DP-GD.dfy          True   \n",
       "2          2       703FinalProject_tmp_tmpr_10rn4z_gaussian.dfy          True   \n",
       "3          3  AssertivePrograming_tmp_tmpwf43uz0e_DivMode_Un...          True   \n",
       "4          4  AssertivePrograming_tmp_tmpwf43uz0e_Find_Subst...          True   \n",
       "..       ...                                                ...           ...   \n",
       "777      777  verified-using-dafny_tmp_tmp7jatpjyn_longestZe...          True   \n",
       "778      778       vfag_tmp_tmpc29dxm1j_Verificacion_torneo.dfy          True   \n",
       "779      779                 vfag_tmp_tmpc29dxm1j_mergesort.dfy          True   \n",
       "780      780         vfag_tmp_tmpc29dxm1j_sumar_componentes.dfy          True   \n",
       "781      781  vmware-verification-2023_tmp_tmpoou5u54i_demos...          True   \n",
       "\n",
       "     has_ensures  #_char  #_hint_char  #_lemma  #_function  #_method  \\\n",
       "0           True     781           89        0           1         1   \n",
       "1          False    1452          231        0           0         1   \n",
       "2          False    1028          274        0           1         1   \n",
       "3           True    9500         1745       14           7         3   \n",
       "4           True   10254         3595        5           0         2   \n",
       "..           ...     ...          ...      ...         ...       ...   \n",
       "777         True    2274          652        0           1         2   \n",
       "778         True    8315         6019        0           0         1   \n",
       "779        False    2528          239        0           0         3   \n",
       "780         True    1507          779        0           1         1   \n",
       "781         True    5084          299        3           2         0   \n",
       "\n",
       "     verifies_without_hints  \n",
       "0                     False  \n",
       "1                      True  \n",
       "2                      True  \n",
       "3                     False  \n",
       "4                     False  \n",
       "..                      ...  \n",
       "777                   False  \n",
       "778                    True  \n",
       "779                   False  \n",
       "780                   False  \n",
       "781                   False  \n",
       "\n",
       "[782 rows x 10 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "metadata_df = pd.read_csv(\"../../DafnyBench/metadata/ground_truth_metadata.csv\")\n",
    "metadata_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "gpt4o_num_attempts_mean, gpt4o_num_attempts_error = get_success_rate_vs_num_attempts(\"gpt-4o\")\n",
    "gpt4_num_attempts_mean, gpt4_num_attempts_error = get_success_rate_vs_num_attempts(\"gpt-4-turbo\")\n",
    "gpt35_num_attempts_mean, gpt35_num_attempts_error = get_success_rate_vs_num_attempts(\"gpt-3.5-turbo\")\n",
    "claude3_num_attempts_mean, claude3_num_attempts_error = get_success_rate_vs_num_attempts(\"claude-3-opus\")\n",
    "codellama_num_attempts_mean, codellama_num_attempts_error = get_success_rate_vs_num_attempts(\"codellama-7b\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Model</th>\n",
       "      <th># of attempts given</th>\n",
       "      <th>Success rate</th>\n",
       "      <th>Error</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>0</td>\n",
       "      <td>0.265985</td>\n",
       "      <td>0.015801</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>1</td>\n",
       "      <td>0.508951</td>\n",
       "      <td>0.017877</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>2</td>\n",
       "      <td>0.552430</td>\n",
       "      <td>0.017781</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>3</td>\n",
       "      <td>0.566496</td>\n",
       "      <td>0.017721</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>4</td>\n",
       "      <td>0.572890</td>\n",
       "      <td>0.017689</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>5</td>\n",
       "      <td>0.581841</td>\n",
       "      <td>0.017639</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>6</td>\n",
       "      <td>0.586957</td>\n",
       "      <td>0.017607</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>7</td>\n",
       "      <td>0.588235</td>\n",
       "      <td>0.017599</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>8</td>\n",
       "      <td>0.590793</td>\n",
       "      <td>0.017583</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>9</td>\n",
       "      <td>0.593350</td>\n",
       "      <td>0.017566</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>10</td>\n",
       "      <td>0.593350</td>\n",
       "      <td>0.017566</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>0</td>\n",
       "      <td>0.265985</td>\n",
       "      <td>0.015801</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>1</td>\n",
       "      <td>0.508951</td>\n",
       "      <td>0.017877</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>2</td>\n",
       "      <td>0.556266</td>\n",
       "      <td>0.017766</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>3</td>\n",
       "      <td>0.572890</td>\n",
       "      <td>0.017689</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>4</td>\n",
       "      <td>0.578005</td>\n",
       "      <td>0.017661</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>5</td>\n",
       "      <td>0.584399</td>\n",
       "      <td>0.017623</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>6</td>\n",
       "      <td>0.589514</td>\n",
       "      <td>0.017591</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>7</td>\n",
       "      <td>0.593350</td>\n",
       "      <td>0.017566</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>8</td>\n",
       "      <td>0.595908</td>\n",
       "      <td>0.017548</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>9</td>\n",
       "      <td>0.595908</td>\n",
       "      <td>0.017548</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>10</td>\n",
       "      <td>0.598465</td>\n",
       "      <td>0.017530</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>0</td>\n",
       "      <td>0.265985</td>\n",
       "      <td>0.015801</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>1</td>\n",
       "      <td>0.411765</td>\n",
       "      <td>0.017599</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>2</td>\n",
       "      <td>0.428389</td>\n",
       "      <td>0.017696</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>3</td>\n",
       "      <td>0.430946</td>\n",
       "      <td>0.017709</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>4</td>\n",
       "      <td>0.434783</td>\n",
       "      <td>0.017727</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>5</td>\n",
       "      <td>0.436061</td>\n",
       "      <td>0.017733</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>6</td>\n",
       "      <td>0.437340</td>\n",
       "      <td>0.017739</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>7</td>\n",
       "      <td>0.437340</td>\n",
       "      <td>0.017739</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>8</td>\n",
       "      <td>0.438619</td>\n",
       "      <td>0.017745</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>9</td>\n",
       "      <td>0.439898</td>\n",
       "      <td>0.017750</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>10</td>\n",
       "      <td>0.439898</td>\n",
       "      <td>0.017750</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>0</td>\n",
       "      <td>0.265985</td>\n",
       "      <td>0.015801</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>1</td>\n",
       "      <td>0.538363</td>\n",
       "      <td>0.017827</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>2</td>\n",
       "      <td>0.585678</td>\n",
       "      <td>0.017616</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>3</td>\n",
       "      <td>0.625320</td>\n",
       "      <td>0.017309</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>4</td>\n",
       "      <td>0.639386</td>\n",
       "      <td>0.017171</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>5</td>\n",
       "      <td>0.649616</td>\n",
       "      <td>0.017061</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>6</td>\n",
       "      <td>0.653453</td>\n",
       "      <td>0.017017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>7</td>\n",
       "      <td>0.657289</td>\n",
       "      <td>0.016972</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>8</td>\n",
       "      <td>0.668798</td>\n",
       "      <td>0.016830</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>9</td>\n",
       "      <td>0.676471</td>\n",
       "      <td>0.016729</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>10</td>\n",
       "      <td>0.677749</td>\n",
       "      <td>0.016712</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>0</td>\n",
       "      <td>0.265985</td>\n",
       "      <td>0.015801</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>1</td>\n",
       "      <td>0.273657</td>\n",
       "      <td>0.015943</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>2</td>\n",
       "      <td>0.277494</td>\n",
       "      <td>0.016012</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>3</td>\n",
       "      <td>0.280051</td>\n",
       "      <td>0.016057</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>4</td>\n",
       "      <td>0.280051</td>\n",
       "      <td>0.016057</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>5</td>\n",
       "      <td>0.280051</td>\n",
       "      <td>0.016057</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>6</td>\n",
       "      <td>0.280051</td>\n",
       "      <td>0.016057</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>7</td>\n",
       "      <td>0.280051</td>\n",
       "      <td>0.016057</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>8</td>\n",
       "      <td>0.280051</td>\n",
       "      <td>0.016057</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>9</td>\n",
       "      <td>0.280051</td>\n",
       "      <td>0.016057</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>54</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>10</td>\n",
       "      <td>0.280051</td>\n",
       "      <td>0.016057</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            Model  # of attempts given  Success rate     Error\n",
       "0          GPT-4o                    0      0.265985  0.015801\n",
       "1          GPT-4o                    1      0.508951  0.017877\n",
       "2          GPT-4o                    2      0.552430  0.017781\n",
       "3          GPT-4o                    3      0.566496  0.017721\n",
       "4          GPT-4o                    4      0.572890  0.017689\n",
       "5          GPT-4o                    5      0.581841  0.017639\n",
       "6          GPT-4o                    6      0.586957  0.017607\n",
       "7          GPT-4o                    7      0.588235  0.017599\n",
       "8          GPT-4o                    8      0.590793  0.017583\n",
       "9          GPT-4o                    9      0.593350  0.017566\n",
       "10         GPT-4o                   10      0.593350  0.017566\n",
       "11    GPT-4-turbo                    0      0.265985  0.015801\n",
       "12    GPT-4-turbo                    1      0.508951  0.017877\n",
       "13    GPT-4-turbo                    2      0.556266  0.017766\n",
       "14    GPT-4-turbo                    3      0.572890  0.017689\n",
       "15    GPT-4-turbo                    4      0.578005  0.017661\n",
       "16    GPT-4-turbo                    5      0.584399  0.017623\n",
       "17    GPT-4-turbo                    6      0.589514  0.017591\n",
       "18    GPT-4-turbo                    7      0.593350  0.017566\n",
       "19    GPT-4-turbo                    8      0.595908  0.017548\n",
       "20    GPT-4-turbo                    9      0.595908  0.017548\n",
       "21    GPT-4-turbo                   10      0.598465  0.017530\n",
       "22  GPT-3.5-turbo                    0      0.265985  0.015801\n",
       "23  GPT-3.5-turbo                    1      0.411765  0.017599\n",
       "24  GPT-3.5-turbo                    2      0.428389  0.017696\n",
       "25  GPT-3.5-turbo                    3      0.430946  0.017709\n",
       "26  GPT-3.5-turbo                    4      0.434783  0.017727\n",
       "27  GPT-3.5-turbo                    5      0.436061  0.017733\n",
       "28  GPT-3.5-turbo                    6      0.437340  0.017739\n",
       "29  GPT-3.5-turbo                    7      0.437340  0.017739\n",
       "30  GPT-3.5-turbo                    8      0.438619  0.017745\n",
       "31  GPT-3.5-turbo                    9      0.439898  0.017750\n",
       "32  GPT-3.5-turbo                   10      0.439898  0.017750\n",
       "33  Claude-3-Opus                    0      0.265985  0.015801\n",
       "34  Claude-3-Opus                    1      0.538363  0.017827\n",
       "35  Claude-3-Opus                    2      0.585678  0.017616\n",
       "36  Claude-3-Opus                    3      0.625320  0.017309\n",
       "37  Claude-3-Opus                    4      0.639386  0.017171\n",
       "38  Claude-3-Opus                    5      0.649616  0.017061\n",
       "39  Claude-3-Opus                    6      0.653453  0.017017\n",
       "40  Claude-3-Opus                    7      0.657289  0.016972\n",
       "41  Claude-3-Opus                    8      0.668798  0.016830\n",
       "42  Claude-3-Opus                    9      0.676471  0.016729\n",
       "43  Claude-3-Opus                   10      0.677749  0.016712\n",
       "44   CodeLlama-7b                    0      0.265985  0.015801\n",
       "45   CodeLlama-7b                    1      0.273657  0.015943\n",
       "46   CodeLlama-7b                    2      0.277494  0.016012\n",
       "47   CodeLlama-7b                    3      0.280051  0.016057\n",
       "48   CodeLlama-7b                    4      0.280051  0.016057\n",
       "49   CodeLlama-7b                    5      0.280051  0.016057\n",
       "50   CodeLlama-7b                    6      0.280051  0.016057\n",
       "51   CodeLlama-7b                    7      0.280051  0.016057\n",
       "52   CodeLlama-7b                    8      0.280051  0.016057\n",
       "53   CodeLlama-7b                    9      0.280051  0.016057\n",
       "54   CodeLlama-7b                   10      0.280051  0.016057"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "num_attempts_df = pd.DataFrame(columns=[\"Model\", \"# of attempts given\", \"Success rate\", \"Error\"])\n",
    "num_attempts_df[\"Model\"] = [\"GPT-4o\"] * len(gpt4o_num_attempts_mean) + [\"GPT-4-turbo\"] * len(gpt4_num_attempts_mean) + [\"GPT-3.5-turbo\"] * len(gpt35_num_attempts_mean) + [\"Claude-3-Opus\"] * len(claude3_num_attempts_mean) + [\"CodeLlama-7b\"] * len(codellama_num_attempts_mean)\n",
    "num_attempts_df[\"# of attempts given\"] = list(gpt4o_num_attempts_mean.keys()) + list(gpt4_num_attempts_mean.keys()) + list(gpt35_num_attempts_mean.keys()) + list(claude3_num_attempts_mean.keys()) + list(codellama_num_attempts_mean.keys())\n",
    "num_attempts_df[\"Success rate\"] = list(gpt4o_num_attempts_mean.values()) + list(gpt4_num_attempts_mean.values()) + list(gpt35_num_attempts_mean.values()) + list(claude3_num_attempts_mean.values()) + list(codellama_num_attempts_mean.values())\n",
    "num_attempts_df[\"Error\"] = list(gpt4o_num_attempts_error.values()) + list(gpt4_num_attempts_error.values()) + list(gpt35_num_attempts_error.values()) + list(claude3_num_attempts_error.values()) + list(codellama_num_attempts_error.values())\n",
    "num_attempts_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.plotly.v1+json": {
       "config": {
        "plotlyServerURL": "https://plot.ly"
       },
       "data": [
        {
         "fill": "toself",
         "fillcolor": "rgba(99,110,250,.3)",
         "hoverinfo": "skip",
         "legendgroup": "GPT-4o",
         "line": {
          "color": "rgba(255,255,255,0)"
         },
         "showlegend": false,
         "type": "scatter",
         "x": [
          0,
          1,
          2,
          3,
          4,
          5,
          6,
          7,
          8,
          9,
          10,
          10,
          9,
          8,
          7,
          6,
          5,
          4,
          3,
          2,
          1,
          0
         ],
         "xaxis": "x",
         "y": [
          0.2817853968139228,
          0.5268285045562658,
          0.570211059915582,
          0.5842173006646639,
          0.5905789778753304,
          0.5994802488414293,
          0.6045640127094494,
          0.6058346479214657,
          0.6083755502598321,
          0.6109159606505556,
          0.6109159606505556,
          0.5757848066128715,
          0.5757848066128715,
          0.5732101274895284,
          0.5706359403138285,
          0.5693490307688116,
          0.5642026156086986,
          0.5552010732755647,
          0.548775026701065,
          0.5346482751227811,
          0.4910743087429669,
          0.25018391264899287
         ],
         "yaxis": "y"
        },
        {
         "hovertemplate": "Model=GPT-4o<br># of attempts given=%{x}<br>Success rate=%{y}<extra></extra>",
         "legendgroup": "GPT-4o",
         "line": {
          "color": "#636efa",
          "dash": "solid"
         },
         "marker": {
          "symbol": "circle"
         },
         "mode": "lines",
         "name": "GPT-4o",
         "orientation": "v",
         "showlegend": true,
         "type": "scatter",
         "x": [
          0,
          1,
          2,
          3,
          4,
          5,
          6,
          7,
          8,
          9,
          10
         ],
         "xaxis": "x",
         "y": [
          0.2659846547314578,
          0.5089514066496164,
          0.5524296675191815,
          0.5664961636828645,
          0.5728900255754475,
          0.5818414322250639,
          0.5869565217391305,
          0.5882352941176471,
          0.5907928388746803,
          0.5933503836317136,
          0.5933503836317136
         ],
         "yaxis": "y"
        },
        {
         "fill": "toself",
         "fillcolor": "rgba(239,85,59,.3)",
         "hoverinfo": "skip",
         "legendgroup": "GPT-4-turbo",
         "line": {
          "color": "rgba(255,255,255,0)"
         },
         "showlegend": false,
         "type": "scatter",
         "x": [
          0,
          1,
          2,
          3,
          4,
          5,
          6,
          7,
          8,
          9,
          10,
          10,
          9,
          8,
          7,
          6,
          5,
          4,
          3,
          2,
          1,
          0
         ],
         "xaxis": "x",
         "y": [
          0.2817853968139228,
          0.5268285045562658,
          0.574032376494124,
          0.5905789778753304,
          0.5956661462893991,
          0.6020223750495226,
          0.6071051604958713,
          0.6109159606505556,
          0.613455877652577,
          0.613455877652577,
          0.6159952997774613,
          0.5809356465140988,
          0.5783599791249167,
          0.5783599791249167,
          0.5757848066128715,
          0.5719229724964561,
          0.5667755789146718,
          0.560344083889629,
          0.5552010732755647,
          0.5384995928153389,
          0.4910743087429669,
          0.25018391264899287
         ],
         "yaxis": "y"
        },
        {
         "hovertemplate": "Model=GPT-4-turbo<br># of attempts given=%{x}<br>Success rate=%{y}<extra></extra>",
         "legendgroup": "GPT-4-turbo",
         "line": {
          "color": "#EF553B",
          "dash": "solid"
         },
         "marker": {
          "symbol": "circle"
         },
         "mode": "lines",
         "name": "GPT-4-turbo",
         "orientation": "v",
         "showlegend": true,
         "type": "scatter",
         "x": [
          0,
          1,
          2,
          3,
          4,
          5,
          6,
          7,
          8,
          9,
          10
         ],
         "xaxis": "x",
         "y": [
          0.2659846547314578,
          0.5089514066496164,
          0.5562659846547314,
          0.5728900255754475,
          0.578005115089514,
          0.5843989769820972,
          0.5895140664961637,
          0.5933503836317136,
          0.5959079283887468,
          0.5959079283887468,
          0.59846547314578
         ],
         "yaxis": "y"
        },
        {
         "fill": "toself",
         "fillcolor": "rgba(0,204,150,.3)",
         "hoverinfo": "skip",
         "legendgroup": "GPT-3.5-turbo",
         "line": {
          "color": "rgba(255,255,255,0)"
         },
         "showlegend": false,
         "type": "scatter",
         "x": [
          0,
          1,
          2,
          3,
          4,
          5,
          6,
          7,
          8,
          9,
          10,
          10,
          9,
          8,
          7,
          6,
          5,
          4,
          3,
          2,
          1,
          0
         ],
         "xaxis": "x",
         "y": [
          0.2817853968139228,
          0.42936405968617153,
          0.44608437706747855,
          0.44865491594728474,
          0.4525098217218741,
          0.45379455014173387,
          0.4550791586793907,
          0.4550791586793907,
          0.4563636474531309,
          0.45764801657866244,
          0.45764801657866244,
          0.42214737984077494,
          0.42214737984077494,
          0.4208742042092732,
          0.4196011482259801,
          0.4196011482259801,
          0.41832821200660375,
          0.41705539566943023,
          0.4132376671729198,
          0.41069311653865953,
          0.3941653520785343,
          0.25018391264899287
         ],
         "yaxis": "y"
        },
        {
         "hovertemplate": "Model=GPT-3.5-turbo<br># of attempts given=%{x}<br>Success rate=%{y}<extra></extra>",
         "legendgroup": "GPT-3.5-turbo",
         "line": {
          "color": "#00cc96",
          "dash": "solid"
         },
         "marker": {
          "symbol": "circle"
         },
         "mode": "lines",
         "name": "GPT-3.5-turbo",
         "orientation": "v",
         "showlegend": true,
         "type": "scatter",
         "x": [
          0,
          1,
          2,
          3,
          4,
          5,
          6,
          7,
          8,
          9,
          10
         ],
         "xaxis": "x",
         "y": [
          0.2659846547314578,
          0.4117647058823529,
          0.42838874680306904,
          0.4309462915601023,
          0.43478260869565216,
          0.4360613810741688,
          0.4373401534526854,
          0.4373401534526854,
          0.43861892583120204,
          0.4398976982097187,
          0.4398976982097187
         ],
         "yaxis": "y"
        },
        {
         "fill": "toself",
         "fillcolor": "rgba(171,99,250,.3)",
         "hoverinfo": "skip",
         "legendgroup": "Claude-3-Opus",
         "line": {
          "color": "rgba(255,255,255,0)"
         },
         "showlegend": false,
         "type": "scatter",
         "x": [
          0,
          1,
          2,
          3,
          4,
          5,
          6,
          7,
          8,
          9,
          10,
          10,
          9,
          8,
          7,
          6,
          5,
          4,
          3,
          2,
          1,
          0
         ],
         "xaxis": "x",
         "y": [
          0.2817853968139228,
          0.556190428105404,
          0.6032932550297729,
          0.6426289374988441,
          0.6565573410250836,
          0.666677074637679,
          0.6704697624975495,
          0.6742612293695944,
          0.6856282051175275,
          0.6931998958527801,
          0.6944613470927838,
          0.6610373741348377,
          0.6597412806178082,
          0.6519677028108612,
          0.6403167757454951,
          0.6364356083464402,
          0.632555661935211,
          0.6222150374915405,
          0.6080104486904143,
          0.5680622436914546,
          0.5205359146055935,
          0.25018391264899287
         ],
         "yaxis": "y"
        },
        {
         "hovertemplate": "Model=Claude-3-Opus<br># of attempts given=%{x}<br>Success rate=%{y}<extra></extra>",
         "legendgroup": "Claude-3-Opus",
         "line": {
          "color": "#ab63fa",
          "dash": "solid"
         },
         "marker": {
          "symbol": "circle"
         },
         "mode": "lines",
         "name": "Claude-3-Opus",
         "orientation": "v",
         "showlegend": true,
         "type": "scatter",
         "x": [
          0,
          1,
          2,
          3,
          4,
          5,
          6,
          7,
          8,
          9,
          10
         ],
         "xaxis": "x",
         "y": [
          0.2659846547314578,
          0.5383631713554987,
          0.5856777493606138,
          0.6253196930946292,
          0.639386189258312,
          0.649616368286445,
          0.6534526854219949,
          0.6572890025575447,
          0.6687979539641944,
          0.6764705882352942,
          0.6777493606138107
         ],
         "yaxis": "y"
        },
        {
         "fill": "toself",
         "fillcolor": "rgba(255,161,90,.3)",
         "hoverinfo": "skip",
         "legendgroup": "CodeLlama-7b",
         "line": {
          "color": "rgba(255,255,255,0)"
         },
         "showlegend": false,
         "type": "scatter",
         "x": [
          0,
          1,
          2,
          3,
          4,
          5,
          6,
          7,
          8,
          9,
          10,
          10,
          9,
          8,
          7,
          6,
          5,
          4,
          3,
          2,
          1,
          0
         ],
         "xaxis": "x",
         "y": [
          0.2817853968139228,
          0.28960032126501145,
          0.2935055464034881,
          0.2961082144099295,
          0.2961082144099295,
          0.2961082144099295,
          0.2961082144099295,
          0.2961082144099295,
          0.2961082144099295,
          0.2961082144099295,
          0.2961082144099295,
          0.2639940873803518,
          0.2639940873803518,
          0.2639940873803518,
          0.2639940873803518,
          0.2639940873803518,
          0.2639940873803518,
          0.2639940873803518,
          0.2639940873803518,
          0.26148166587272675,
          0.2577142567401036,
          0.25018391264899287
         ],
         "yaxis": "y"
        },
        {
         "hovertemplate": "Model=CodeLlama-7b<br># of attempts given=%{x}<br>Success rate=%{y}<extra></extra>",
         "legendgroup": "CodeLlama-7b",
         "line": {
          "color": "#FFA15A",
          "dash": "solid"
         },
         "marker": {
          "symbol": "circle"
         },
         "mode": "lines",
         "name": "CodeLlama-7b",
         "orientation": "v",
         "showlegend": true,
         "type": "scatter",
         "x": [
          0,
          1,
          2,
          3,
          4,
          5,
          6,
          7,
          8,
          9,
          10
         ],
         "xaxis": "x",
         "y": [
          0.2659846547314578,
          0.27365728900255754,
          0.2774936061381074,
          0.28005115089514065,
          0.28005115089514065,
          0.28005115089514065,
          0.28005115089514065,
          0.28005115089514065,
          0.28005115089514065,
          0.28005115089514065,
          0.28005115089514065
         ],
         "yaxis": "y"
        }
       ],
       "layout": {
        "font": {
         "size": 20
        },
        "height": 600,
        "legend": {
         "title": {},
         "tracegroupgap": 0
        },
        "margin": {
         "t": 60
        },
        "template": {
         "data": {
          "bar": [
           {
            "error_x": {
             "color": "#2a3f5f"
            },
            "error_y": {
             "color": "#2a3f5f"
            },
            "marker": {
             "line": {
              "color": "#E5ECF6",
              "width": 0.5
             },
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "bar"
           }
          ],
          "barpolar": [
           {
            "marker": {
             "line": {
              "color": "#E5ECF6",
              "width": 0.5
             },
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "barpolar"
           }
          ],
          "carpet": [
           {
            "aaxis": {
             "endlinecolor": "#2a3f5f",
             "gridcolor": "white",
             "linecolor": "white",
             "minorgridcolor": "white",
             "startlinecolor": "#2a3f5f"
            },
            "baxis": {
             "endlinecolor": "#2a3f5f",
             "gridcolor": "white",
             "linecolor": "white",
             "minorgridcolor": "white",
             "startlinecolor": "#2a3f5f"
            },
            "type": "carpet"
           }
          ],
          "choropleth": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "choropleth"
           }
          ],
          "contour": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "contour"
           }
          ],
          "contourcarpet": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "contourcarpet"
           }
          ],
          "heatmap": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "heatmap"
           }
          ],
          "heatmapgl": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "heatmapgl"
           }
          ],
          "histogram": [
           {
            "marker": {
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "histogram"
           }
          ],
          "histogram2d": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "histogram2d"
           }
          ],
          "histogram2dcontour": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "histogram2dcontour"
           }
          ],
          "mesh3d": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "mesh3d"
           }
          ],
          "parcoords": [
           {
            "line": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "parcoords"
           }
          ],
          "pie": [
           {
            "automargin": true,
            "type": "pie"
           }
          ],
          "scatter": [
           {
            "fillpattern": {
             "fillmode": "overlay",
             "size": 10,
             "solidity": 0.2
            },
            "type": "scatter"
           }
          ],
          "scatter3d": [
           {
            "line": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatter3d"
           }
          ],
          "scattercarpet": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattercarpet"
           }
          ],
          "scattergeo": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattergeo"
           }
          ],
          "scattergl": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattergl"
           }
          ],
          "scattermapbox": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattermapbox"
           }
          ],
          "scatterpolar": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterpolar"
           }
          ],
          "scatterpolargl": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterpolargl"
           }
          ],
          "scatterternary": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterternary"
           }
          ],
          "surface": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "surface"
           }
          ],
          "table": [
           {
            "cells": {
             "fill": {
              "color": "#EBF0F8"
             },
             "line": {
              "color": "white"
             }
            },
            "header": {
             "fill": {
              "color": "#C8D4E3"
             },
             "line": {
              "color": "white"
             }
            },
            "type": "table"
           }
          ]
         },
         "layout": {
          "annotationdefaults": {
           "arrowcolor": "#2a3f5f",
           "arrowhead": 0,
           "arrowwidth": 1
          },
          "autotypenumbers": "strict",
          "coloraxis": {
           "colorbar": {
            "outlinewidth": 0,
            "ticks": ""
           }
          },
          "colorscale": {
           "diverging": [
            [
             0,
             "#8e0152"
            ],
            [
             0.1,
             "#c51b7d"
            ],
            [
             0.2,
             "#de77ae"
            ],
            [
             0.3,
             "#f1b6da"
            ],
            [
             0.4,
             "#fde0ef"
            ],
            [
             0.5,
             "#f7f7f7"
            ],
            [
             0.6,
             "#e6f5d0"
            ],
            [
             0.7,
             "#b8e186"
            ],
            [
             0.8,
             "#7fbc41"
            ],
            [
             0.9,
             "#4d9221"
            ],
            [
             1,
             "#276419"
            ]
           ],
           "sequential": [
            [
             0,
             "#0d0887"
            ],
            [
             0.1111111111111111,
             "#46039f"
            ],
            [
             0.2222222222222222,
             "#7201a8"
            ],
            [
             0.3333333333333333,
             "#9c179e"
            ],
            [
             0.4444444444444444,
             "#bd3786"
            ],
            [
             0.5555555555555556,
             "#d8576b"
            ],
            [
             0.6666666666666666,
             "#ed7953"
            ],
            [
             0.7777777777777778,
             "#fb9f3a"
            ],
            [
             0.8888888888888888,
             "#fdca26"
            ],
            [
             1,
             "#f0f921"
            ]
           ],
           "sequentialminus": [
            [
             0,
             "#0d0887"
            ],
            [
             0.1111111111111111,
             "#46039f"
            ],
            [
             0.2222222222222222,
             "#7201a8"
            ],
            [
             0.3333333333333333,
             "#9c179e"
            ],
            [
             0.4444444444444444,
             "#bd3786"
            ],
            [
             0.5555555555555556,
             "#d8576b"
            ],
            [
             0.6666666666666666,
             "#ed7953"
            ],
            [
             0.7777777777777778,
             "#fb9f3a"
            ],
            [
             0.8888888888888888,
             "#fdca26"
            ],
            [
             1,
             "#f0f921"
            ]
           ]
          },
          "colorway": [
           "#636efa",
           "#EF553B",
           "#00cc96",
           "#ab63fa",
           "#FFA15A",
           "#19d3f3",
           "#FF6692",
           "#B6E880",
           "#FF97FF",
           "#FECB52"
          ],
          "font": {
           "color": "#2a3f5f"
          },
          "geo": {
           "bgcolor": "white",
           "lakecolor": "white",
           "landcolor": "#E5ECF6",
           "showlakes": true,
           "showland": true,
           "subunitcolor": "white"
          },
          "hoverlabel": {
           "align": "left"
          },
          "hovermode": "closest",
          "mapbox": {
           "style": "light"
          },
          "paper_bgcolor": "white",
          "plot_bgcolor": "#E5ECF6",
          "polar": {
           "angularaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "bgcolor": "#E5ECF6",
           "radialaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           }
          },
          "scene": {
           "xaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           },
           "yaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           },
           "zaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           }
          },
          "shapedefaults": {
           "line": {
            "color": "#2a3f5f"
           }
          },
          "ternary": {
           "aaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "baxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "bgcolor": "#E5ECF6",
           "caxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           }
          },
          "title": {
           "x": 0.05
          },
          "xaxis": {
           "automargin": true,
           "gridcolor": "white",
           "linecolor": "white",
           "ticks": "",
           "title": {
            "standoff": 15
           },
           "zerolinecolor": "white",
           "zerolinewidth": 2
          },
          "yaxis": {
           "automargin": true,
           "gridcolor": "white",
           "linecolor": "white",
           "ticks": "",
           "title": {
            "standoff": 15
           },
           "zerolinecolor": "white",
           "zerolinewidth": 2
          }
         }
        },
        "width": 900,
        "xaxis": {
         "anchor": "y",
         "domain": [
          0,
          1
         ],
         "title": {
          "text": "# of attempts given"
         }
        },
        "yaxis": {
         "anchor": "x",
         "domain": [
          0,
          1
         ],
         "range": [
          0,
          0.8
         ],
         "tickformat": ",.0%",
         "title": {
          "text": "Success rate"
         }
        }
       }
      }
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "num_attempts_fig = scatter_num_attempts(data_frame=num_attempts_df,\n",
    "                                        x='# of attempts given',\n",
    "                                        y='Success rate',\n",
    "                                        error_y='Error',\n",
    "                                        error_y_mode='band',\n",
    "                                        color='Model')\n",
    "num_attempts_fig.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/wendysun/Desktop/DafnyBench/stats/lib/python3.10/site-packages/numpy/core/fromnumeric.py:3504: RuntimeWarning:\n",
      "\n",
      "Mean of empty slice.\n",
      "\n",
      "/Users/wendysun/Desktop/DafnyBench/stats/lib/python3.10/site-packages/numpy/core/_methods.py:129: RuntimeWarning:\n",
      "\n",
      "invalid value encountered in scalar divide\n",
      "\n",
      "/Users/wendysun/Desktop/DafnyBench/stats/lib/python3.10/site-packages/numpy/core/_methods.py:206: RuntimeWarning:\n",
      "\n",
      "Degrees of freedom <= 0 for slice\n",
      "\n",
      "/Users/wendysun/Desktop/DafnyBench/stats/lib/python3.10/site-packages/numpy/core/_methods.py:163: RuntimeWarning:\n",
      "\n",
      "invalid value encountered in divide\n",
      "\n",
      "/Users/wendysun/Desktop/DafnyBench/stats/lib/python3.10/site-packages/numpy/core/_methods.py:198: RuntimeWarning:\n",
      "\n",
      "invalid value encountered in scalar divide\n",
      "\n"
     ]
    }
   ],
   "source": [
    "bin_num_programs, gpt4o_bin_mean, gpt4o_bin_error = get_bin_width_mean_error(\"gpt-4o\")\n",
    "gpt4o_bin_num_char_mean, gpt4o_bin_num_hint_char_mean, gpt4o_bin_num_lemma_mean = gpt4o_bin_mean\n",
    "gpt4o_bin_num_char_error, gpt4o_bin_num_hint_char_error, gpt4o_bin_num_lemma_error = gpt4o_bin_error\n",
    "\n",
    "_, gpt4_bin_mean, gpt4_bin_error = get_bin_width_mean_error(\"gpt-4-turbo\")\n",
    "gpt4_bin_num_char_mean, gpt4_bin_num_hint_char_mean, gpt4_bin_num_lemma_mean = gpt4_bin_mean\n",
    "gpt4_bin_num_char_error, gpt4_bin_num_hint_char_error, gpt4_bin_num_lemma_error = gpt4_bin_error\n",
    "\n",
    "_, gpt35_bin_mean, gpt35_bin_error = get_bin_width_mean_error(\"gpt-3.5-turbo\")\n",
    "gpt35_bin_num_char_mean, gpt35_bin_num_hint_char_mean, gpt35_bin_num_lemma_mean = gpt35_bin_mean\n",
    "gpt35_bin_num_char_error, gpt35_bin_num_hint_char_error, gpt35_bin_num_lemma_error = gpt35_bin_error\n",
    "\n",
    "_, claude3_bin_mean, claude3_bin_error = get_bin_width_mean_error(\"claude-3-opus\")\n",
    "claude3_bin_num_char_mean, claude3_bin_num_hint_char_mean, claude3_bin_num_lemma_mean = claude3_bin_mean\n",
    "claude3_bin_num_char_error, claude3_bin_num_hint_char_error, claude3_bin_num_lemma_error = claude3_bin_error\n",
    "\n",
    "_, codellama_bin_mean, codellama_bin_error = get_bin_width_mean_error(\"codellama-7b\")\n",
    "codellama_bin_num_char_mean, codellama_bin_num_hint_char_mean, codellama_bin_num_lemma_mean = codellama_bin_mean\n",
    "codellama_bin_num_char_error, codellama_bin_num_hint_char_error, codellama_bin_num_lemma_error = codellama_bin_error"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "bin_num_char_num_programs, bin_num_hint_char_num_programs, bin_num_lemma_num_programs = bin_num_programs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.plotly.v1+json": {
       "config": {
        "plotlyServerURL": "https://plot.ly"
       },
       "data": [
        {
         "alignmentgroup": "True",
         "hovertemplate": "Program length (characters)=%{x}<br># of test files=%{y}<extra></extra>",
         "legendgroup": "",
         "marker": {
          "color": "#636efa",
          "pattern": {
           "shape": ""
          }
         },
         "name": "",
         "offsetgroup": "",
         "orientation": "v",
         "showlegend": false,
         "textposition": "auto",
         "type": "bar",
         "x": [
          153.5,
          314.1,
          443.4000000000001,
          574.3000000000001,
          759.75,
          1019.4500000000004,
          1445.1000000000004,
          2094.1000000000004,
          3540.9500000000003,
          16673.75
         ],
         "xaxis": "x",
         "y": [
          78,
          79,
          78,
          77,
          79,
          78,
          78,
          78,
          78,
          79
         ],
         "yaxis": "y"
        }
       ],
       "layout": {
        "barmode": "relative",
        "height": 450,
        "legend": {
         "tracegroupgap": 0
        },
        "margin": {
         "t": 60
        },
        "template": {
         "data": {
          "bar": [
           {
            "error_x": {
             "color": "#2a3f5f"
            },
            "error_y": {
             "color": "#2a3f5f"
            },
            "marker": {
             "line": {
              "color": "#E5ECF6",
              "width": 0.5
             },
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "bar"
           }
          ],
          "barpolar": [
           {
            "marker": {
             "line": {
              "color": "#E5ECF6",
              "width": 0.5
             },
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "barpolar"
           }
          ],
          "carpet": [
           {
            "aaxis": {
             "endlinecolor": "#2a3f5f",
             "gridcolor": "white",
             "linecolor": "white",
             "minorgridcolor": "white",
             "startlinecolor": "#2a3f5f"
            },
            "baxis": {
             "endlinecolor": "#2a3f5f",
             "gridcolor": "white",
             "linecolor": "white",
             "minorgridcolor": "white",
             "startlinecolor": "#2a3f5f"
            },
            "type": "carpet"
           }
          ],
          "choropleth": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "choropleth"
           }
          ],
          "contour": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "contour"
           }
          ],
          "contourcarpet": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "contourcarpet"
           }
          ],
          "heatmap": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "heatmap"
           }
          ],
          "heatmapgl": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "heatmapgl"
           }
          ],
          "histogram": [
           {
            "marker": {
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "histogram"
           }
          ],
          "histogram2d": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "histogram2d"
           }
          ],
          "histogram2dcontour": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "histogram2dcontour"
           }
          ],
          "mesh3d": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "mesh3d"
           }
          ],
          "parcoords": [
           {
            "line": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "parcoords"
           }
          ],
          "pie": [
           {
            "automargin": true,
            "type": "pie"
           }
          ],
          "scatter": [
           {
            "fillpattern": {
             "fillmode": "overlay",
             "size": 10,
             "solidity": 0.2
            },
            "type": "scatter"
           }
          ],
          "scatter3d": [
           {
            "line": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatter3d"
           }
          ],
          "scattercarpet": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattercarpet"
           }
          ],
          "scattergeo": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattergeo"
           }
          ],
          "scattergl": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattergl"
           }
          ],
          "scattermapbox": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattermapbox"
           }
          ],
          "scatterpolar": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterpolar"
           }
          ],
          "scatterpolargl": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterpolargl"
           }
          ],
          "scatterternary": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterternary"
           }
          ],
          "surface": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "surface"
           }
          ],
          "table": [
           {
            "cells": {
             "fill": {
              "color": "#EBF0F8"
             },
             "line": {
              "color": "white"
             }
            },
            "header": {
             "fill": {
              "color": "#C8D4E3"
             },
             "line": {
              "color": "white"
             }
            },
            "type": "table"
           }
          ]
         },
         "layout": {
          "annotationdefaults": {
           "arrowcolor": "#2a3f5f",
           "arrowhead": 0,
           "arrowwidth": 1
          },
          "autotypenumbers": "strict",
          "coloraxis": {
           "colorbar": {
            "outlinewidth": 0,
            "ticks": ""
           }
          },
          "colorscale": {
           "diverging": [
            [
             0,
             "#8e0152"
            ],
            [
             0.1,
             "#c51b7d"
            ],
            [
             0.2,
             "#de77ae"
            ],
            [
             0.3,
             "#f1b6da"
            ],
            [
             0.4,
             "#fde0ef"
            ],
            [
             0.5,
             "#f7f7f7"
            ],
            [
             0.6,
             "#e6f5d0"
            ],
            [
             0.7,
             "#b8e186"
            ],
            [
             0.8,
             "#7fbc41"
            ],
            [
             0.9,
             "#4d9221"
            ],
            [
             1,
             "#276419"
            ]
           ],
           "sequential": [
            [
             0,
             "#0d0887"
            ],
            [
             0.1111111111111111,
             "#46039f"
            ],
            [
             0.2222222222222222,
             "#7201a8"
            ],
            [
             0.3333333333333333,
             "#9c179e"
            ],
            [
             0.4444444444444444,
             "#bd3786"
            ],
            [
             0.5555555555555556,
             "#d8576b"
            ],
            [
             0.6666666666666666,
             "#ed7953"
            ],
            [
             0.7777777777777778,
             "#fb9f3a"
            ],
            [
             0.8888888888888888,
             "#fdca26"
            ],
            [
             1,
             "#f0f921"
            ]
           ],
           "sequentialminus": [
            [
             0,
             "#0d0887"
            ],
            [
             0.1111111111111111,
             "#46039f"
            ],
            [
             0.2222222222222222,
             "#7201a8"
            ],
            [
             0.3333333333333333,
             "#9c179e"
            ],
            [
             0.4444444444444444,
             "#bd3786"
            ],
            [
             0.5555555555555556,
             "#d8576b"
            ],
            [
             0.6666666666666666,
             "#ed7953"
            ],
            [
             0.7777777777777778,
             "#fb9f3a"
            ],
            [
             0.8888888888888888,
             "#fdca26"
            ],
            [
             1,
             "#f0f921"
            ]
           ]
          },
          "colorway": [
           "#636efa",
           "#EF553B",
           "#00cc96",
           "#ab63fa",
           "#FFA15A",
           "#19d3f3",
           "#FF6692",
           "#B6E880",
           "#FF97FF",
           "#FECB52"
          ],
          "font": {
           "color": "#2a3f5f"
          },
          "geo": {
           "bgcolor": "white",
           "lakecolor": "white",
           "landcolor": "#E5ECF6",
           "showlakes": true,
           "showland": true,
           "subunitcolor": "white"
          },
          "hoverlabel": {
           "align": "left"
          },
          "hovermode": "closest",
          "mapbox": {
           "style": "light"
          },
          "paper_bgcolor": "white",
          "plot_bgcolor": "#E5ECF6",
          "polar": {
           "angularaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "bgcolor": "#E5ECF6",
           "radialaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           }
          },
          "scene": {
           "xaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           },
           "yaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           },
           "zaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           }
          },
          "shapedefaults": {
           "line": {
            "color": "#2a3f5f"
           }
          },
          "ternary": {
           "aaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "baxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "bgcolor": "#E5ECF6",
           "caxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           }
          },
          "title": {
           "x": 0.05
          },
          "xaxis": {
           "automargin": true,
           "gridcolor": "white",
           "linecolor": "white",
           "ticks": "",
           "title": {
            "standoff": 15
           },
           "zerolinecolor": "white",
           "zerolinewidth": 2
          },
          "yaxis": {
           "automargin": true,
           "gridcolor": "white",
           "linecolor": "white",
           "ticks": "",
           "title": {
            "standoff": 15
           },
           "zerolinecolor": "white",
           "zerolinewidth": 2
          }
         }
        },
        "width": 800,
        "xaxis": {
         "anchor": "y",
         "domain": [
          0,
          1
         ],
         "ticktext": [
          "153",
          "314",
          "443",
          "574",
          "759",
          "1019",
          "1445",
          "2094",
          "3540",
          "16673"
         ],
         "tickvals": [
          153.5,
          314.1,
          443.4000000000001,
          574.3000000000001,
          759.75,
          1019.4500000000004,
          1445.1000000000004,
          2094.1000000000004,
          3540.9500000000003,
          16673.75
         ],
         "title": {
          "text": "Program length (characters)"
         },
         "type": "category"
        },
        "yaxis": {
         "anchor": "x",
         "domain": [
          0,
          1
         ],
         "title": {
          "text": "# of test files"
         }
        }
       }
      }
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "num_char_num_programs_df = pd.DataFrame(columns=[\"Program length (characters)\", \"# of test files\"])\n",
    "x_ticks_num_char = [key[1] for key in list(bin_num_char_num_programs.keys())]\n",
    "num_char_num_programs_df[\"Program length (characters)\"] = x_ticks_num_char\n",
    "num_char_num_programs_df[\"# of test files\"] = list(bin_num_char_num_programs.values())\n",
    "\n",
    "num_char_bar_fig = px.bar(num_char_num_programs_df, x=\"Program length (characters)\", y=\"# of test files\")\n",
    "num_char_bar_fig.update_layout(xaxis_type='category', width=800, height=450)\n",
    "num_char_bar_fig.update_xaxes(tickvals=x_ticks_num_char, ticktext=[str(int(x)) for x in x_ticks_num_char])\n",
    "num_char_bar_fig.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.plotly.v1+json": {
       "config": {
        "plotlyServerURL": "https://plot.ly"
       },
       "data": [
        {
         "alignmentgroup": "True",
         "hovertemplate": "Hint quantity (characters)=%{x}<br># of test files=%{y}<extra></extra>",
         "legendgroup": "",
         "marker": {
          "color": "#636efa",
          "pattern": {
           "shape": ""
          }
         },
         "name": "",
         "offsetgroup": "",
         "orientation": "v",
         "showlegend": false,
         "textposition": "auto",
         "type": "bar",
         "x": [
          0,
          19.10000000000001,
          55.60000000000001,
          85.20000000000002,
          114.70000000000002,
          156.80000000000004,
          207.15000000000006,
          279.6500000000001,
          440.20000000000005,
          3286.4
         ],
         "xaxis": "x",
         "y": [
          0,
          157,
          74,
          82,
          78,
          78,
          78,
          78,
          78,
          79
         ],
         "yaxis": "y"
        }
       ],
       "layout": {
        "barmode": "relative",
        "height": 450,
        "legend": {
         "tracegroupgap": 0
        },
        "margin": {
         "t": 60
        },
        "template": {
         "data": {
          "bar": [
           {
            "error_x": {
             "color": "#2a3f5f"
            },
            "error_y": {
             "color": "#2a3f5f"
            },
            "marker": {
             "line": {
              "color": "#E5ECF6",
              "width": 0.5
             },
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "bar"
           }
          ],
          "barpolar": [
           {
            "marker": {
             "line": {
              "color": "#E5ECF6",
              "width": 0.5
             },
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "barpolar"
           }
          ],
          "carpet": [
           {
            "aaxis": {
             "endlinecolor": "#2a3f5f",
             "gridcolor": "white",
             "linecolor": "white",
             "minorgridcolor": "white",
             "startlinecolor": "#2a3f5f"
            },
            "baxis": {
             "endlinecolor": "#2a3f5f",
             "gridcolor": "white",
             "linecolor": "white",
             "minorgridcolor": "white",
             "startlinecolor": "#2a3f5f"
            },
            "type": "carpet"
           }
          ],
          "choropleth": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "choropleth"
           }
          ],
          "contour": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "contour"
           }
          ],
          "contourcarpet": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "contourcarpet"
           }
          ],
          "heatmap": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "heatmap"
           }
          ],
          "heatmapgl": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "heatmapgl"
           }
          ],
          "histogram": [
           {
            "marker": {
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "histogram"
           }
          ],
          "histogram2d": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "histogram2d"
           }
          ],
          "histogram2dcontour": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "histogram2dcontour"
           }
          ],
          "mesh3d": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "mesh3d"
           }
          ],
          "parcoords": [
           {
            "line": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "parcoords"
           }
          ],
          "pie": [
           {
            "automargin": true,
            "type": "pie"
           }
          ],
          "scatter": [
           {
            "fillpattern": {
             "fillmode": "overlay",
             "size": 10,
             "solidity": 0.2
            },
            "type": "scatter"
           }
          ],
          "scatter3d": [
           {
            "line": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatter3d"
           }
          ],
          "scattercarpet": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattercarpet"
           }
          ],
          "scattergeo": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattergeo"
           }
          ],
          "scattergl": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattergl"
           }
          ],
          "scattermapbox": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattermapbox"
           }
          ],
          "scatterpolar": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterpolar"
           }
          ],
          "scatterpolargl": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterpolargl"
           }
          ],
          "scatterternary": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterternary"
           }
          ],
          "surface": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "surface"
           }
          ],
          "table": [
           {
            "cells": {
             "fill": {
              "color": "#EBF0F8"
             },
             "line": {
              "color": "white"
             }
            },
            "header": {
             "fill": {
              "color": "#C8D4E3"
             },
             "line": {
              "color": "white"
             }
            },
            "type": "table"
           }
          ]
         },
         "layout": {
          "annotationdefaults": {
           "arrowcolor": "#2a3f5f",
           "arrowhead": 0,
           "arrowwidth": 1
          },
          "autotypenumbers": "strict",
          "coloraxis": {
           "colorbar": {
            "outlinewidth": 0,
            "ticks": ""
           }
          },
          "colorscale": {
           "diverging": [
            [
             0,
             "#8e0152"
            ],
            [
             0.1,
             "#c51b7d"
            ],
            [
             0.2,
             "#de77ae"
            ],
            [
             0.3,
             "#f1b6da"
            ],
            [
             0.4,
             "#fde0ef"
            ],
            [
             0.5,
             "#f7f7f7"
            ],
            [
             0.6,
             "#e6f5d0"
            ],
            [
             0.7,
             "#b8e186"
            ],
            [
             0.8,
             "#7fbc41"
            ],
            [
             0.9,
             "#4d9221"
            ],
            [
             1,
             "#276419"
            ]
           ],
           "sequential": [
            [
             0,
             "#0d0887"
            ],
            [
             0.1111111111111111,
             "#46039f"
            ],
            [
             0.2222222222222222,
             "#7201a8"
            ],
            [
             0.3333333333333333,
             "#9c179e"
            ],
            [
             0.4444444444444444,
             "#bd3786"
            ],
            [
             0.5555555555555556,
             "#d8576b"
            ],
            [
             0.6666666666666666,
             "#ed7953"
            ],
            [
             0.7777777777777778,
             "#fb9f3a"
            ],
            [
             0.8888888888888888,
             "#fdca26"
            ],
            [
             1,
             "#f0f921"
            ]
           ],
           "sequentialminus": [
            [
             0,
             "#0d0887"
            ],
            [
             0.1111111111111111,
             "#46039f"
            ],
            [
             0.2222222222222222,
             "#7201a8"
            ],
            [
             0.3333333333333333,
             "#9c179e"
            ],
            [
             0.4444444444444444,
             "#bd3786"
            ],
            [
             0.5555555555555556,
             "#d8576b"
            ],
            [
             0.6666666666666666,
             "#ed7953"
            ],
            [
             0.7777777777777778,
             "#fb9f3a"
            ],
            [
             0.8888888888888888,
             "#fdca26"
            ],
            [
             1,
             "#f0f921"
            ]
           ]
          },
          "colorway": [
           "#636efa",
           "#EF553B",
           "#00cc96",
           "#ab63fa",
           "#FFA15A",
           "#19d3f3",
           "#FF6692",
           "#B6E880",
           "#FF97FF",
           "#FECB52"
          ],
          "font": {
           "color": "#2a3f5f"
          },
          "geo": {
           "bgcolor": "white",
           "lakecolor": "white",
           "landcolor": "#E5ECF6",
           "showlakes": true,
           "showland": true,
           "subunitcolor": "white"
          },
          "hoverlabel": {
           "align": "left"
          },
          "hovermode": "closest",
          "mapbox": {
           "style": "light"
          },
          "paper_bgcolor": "white",
          "plot_bgcolor": "#E5ECF6",
          "polar": {
           "angularaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "bgcolor": "#E5ECF6",
           "radialaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           }
          },
          "scene": {
           "xaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           },
           "yaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           },
           "zaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           }
          },
          "shapedefaults": {
           "line": {
            "color": "#2a3f5f"
           }
          },
          "ternary": {
           "aaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "baxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "bgcolor": "#E5ECF6",
           "caxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           }
          },
          "title": {
           "x": 0.05
          },
          "xaxis": {
           "automargin": true,
           "gridcolor": "white",
           "linecolor": "white",
           "ticks": "",
           "title": {
            "standoff": 15
           },
           "zerolinecolor": "white",
           "zerolinewidth": 2
          },
          "yaxis": {
           "automargin": true,
           "gridcolor": "white",
           "linecolor": "white",
           "ticks": "",
           "title": {
            "standoff": 15
           },
           "zerolinecolor": "white",
           "zerolinewidth": 2
          }
         }
        },
        "width": 800,
        "xaxis": {
         "anchor": "y",
         "domain": [
          0,
          1
         ],
         "ticktext": [
          "0",
          "19",
          "55",
          "85",
          "114",
          "156",
          "207",
          "279",
          "440",
          "3286"
         ],
         "tickvals": [
          0,
          19.10000000000001,
          55.60000000000001,
          85.20000000000002,
          114.70000000000002,
          156.80000000000004,
          207.15000000000006,
          279.6500000000001,
          440.20000000000005,
          3286.4
         ],
         "title": {
          "text": "Hint quantity (characters)"
         },
         "type": "category"
        },
        "yaxis": {
         "anchor": "x",
         "domain": [
          0,
          1
         ],
         "title": {
          "text": "# of test files"
         }
        }
       }
      }
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "num_hint_char_num_programs_df = pd.DataFrame(columns=[\"Hint quantity (characters)\", \"# of test files\"])\n",
    "x_ticks_num_hint_char = [key[1] for key in list(bin_num_hint_char_num_programs.keys())]\n",
    "num_hint_char_num_programs_df[\"Hint quantity (characters)\"] = x_ticks_num_hint_char\n",
    "num_hint_char_num_programs_df[\"# of test files\"] = list(bin_num_hint_char_num_programs.values())\n",
    "\n",
    "num_hint_char_bar_fig = px.bar(num_hint_char_num_programs_df, x=\"Hint quantity (characters)\", y=\"# of test files\")\n",
    "num_hint_char_bar_fig.update_layout(xaxis_type='category', width=800, height=450)\n",
    "num_hint_char_bar_fig.update_xaxes(tickvals=x_ticks_num_hint_char, ticktext=[str(int(x)) for x in x_ticks_num_hint_char])\n",
    "num_hint_char_bar_fig.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.plotly.v1+json": {
       "config": {
        "plotlyServerURL": "https://plot.ly"
       },
       "data": [
        {
         "alignmentgroup": "True",
         "hovertemplate": "# of lemmas=%{x}<br># of test files=%{y}<extra></extra>",
         "legendgroup": "",
         "marker": {
          "color": "#636efa",
          "pattern": {
           "shape": ""
          }
         },
         "name": "",
         "offsetgroup": "",
         "orientation": "v",
         "showlegend": false,
         "textposition": "auto",
         "type": "bar",
         "x": [
          0,
          0.5,
          2.5,
          23
         ],
         "xaxis": "x",
         "y": [
          0,
          624,
          70,
          88
         ],
         "yaxis": "y"
        }
       ],
       "layout": {
        "barmode": "relative",
        "height": 450,
        "legend": {
         "tracegroupgap": 0
        },
        "margin": {
         "t": 60
        },
        "template": {
         "data": {
          "bar": [
           {
            "error_x": {
             "color": "#2a3f5f"
            },
            "error_y": {
             "color": "#2a3f5f"
            },
            "marker": {
             "line": {
              "color": "#E5ECF6",
              "width": 0.5
             },
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "bar"
           }
          ],
          "barpolar": [
           {
            "marker": {
             "line": {
              "color": "#E5ECF6",
              "width": 0.5
             },
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "barpolar"
           }
          ],
          "carpet": [
           {
            "aaxis": {
             "endlinecolor": "#2a3f5f",
             "gridcolor": "white",
             "linecolor": "white",
             "minorgridcolor": "white",
             "startlinecolor": "#2a3f5f"
            },
            "baxis": {
             "endlinecolor": "#2a3f5f",
             "gridcolor": "white",
             "linecolor": "white",
             "minorgridcolor": "white",
             "startlinecolor": "#2a3f5f"
            },
            "type": "carpet"
           }
          ],
          "choropleth": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "choropleth"
           }
          ],
          "contour": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "contour"
           }
          ],
          "contourcarpet": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "contourcarpet"
           }
          ],
          "heatmap": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "heatmap"
           }
          ],
          "heatmapgl": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "heatmapgl"
           }
          ],
          "histogram": [
           {
            "marker": {
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "histogram"
           }
          ],
          "histogram2d": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "histogram2d"
           }
          ],
          "histogram2dcontour": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "histogram2dcontour"
           }
          ],
          "mesh3d": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "mesh3d"
           }
          ],
          "parcoords": [
           {
            "line": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "parcoords"
           }
          ],
          "pie": [
           {
            "automargin": true,
            "type": "pie"
           }
          ],
          "scatter": [
           {
            "fillpattern": {
             "fillmode": "overlay",
             "size": 10,
             "solidity": 0.2
            },
            "type": "scatter"
           }
          ],
          "scatter3d": [
           {
            "line": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatter3d"
           }
          ],
          "scattercarpet": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattercarpet"
           }
          ],
          "scattergeo": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattergeo"
           }
          ],
          "scattergl": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattergl"
           }
          ],
          "scattermapbox": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattermapbox"
           }
          ],
          "scatterpolar": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterpolar"
           }
          ],
          "scatterpolargl": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterpolargl"
           }
          ],
          "scatterternary": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterternary"
           }
          ],
          "surface": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "surface"
           }
          ],
          "table": [
           {
            "cells": {
             "fill": {
              "color": "#EBF0F8"
             },
             "line": {
              "color": "white"
             }
            },
            "header": {
             "fill": {
              "color": "#C8D4E3"
             },
             "line": {
              "color": "white"
             }
            },
            "type": "table"
           }
          ]
         },
         "layout": {
          "annotationdefaults": {
           "arrowcolor": "#2a3f5f",
           "arrowhead": 0,
           "arrowwidth": 1
          },
          "autotypenumbers": "strict",
          "coloraxis": {
           "colorbar": {
            "outlinewidth": 0,
            "ticks": ""
           }
          },
          "colorscale": {
           "diverging": [
            [
             0,
             "#8e0152"
            ],
            [
             0.1,
             "#c51b7d"
            ],
            [
             0.2,
             "#de77ae"
            ],
            [
             0.3,
             "#f1b6da"
            ],
            [
             0.4,
             "#fde0ef"
            ],
            [
             0.5,
             "#f7f7f7"
            ],
            [
             0.6,
             "#e6f5d0"
            ],
            [
             0.7,
             "#b8e186"
            ],
            [
             0.8,
             "#7fbc41"
            ],
            [
             0.9,
             "#4d9221"
            ],
            [
             1,
             "#276419"
            ]
           ],
           "sequential": [
            [
             0,
             "#0d0887"
            ],
            [
             0.1111111111111111,
             "#46039f"
            ],
            [
             0.2222222222222222,
             "#7201a8"
            ],
            [
             0.3333333333333333,
             "#9c179e"
            ],
            [
             0.4444444444444444,
             "#bd3786"
            ],
            [
             0.5555555555555556,
             "#d8576b"
            ],
            [
             0.6666666666666666,
             "#ed7953"
            ],
            [
             0.7777777777777778,
             "#fb9f3a"
            ],
            [
             0.8888888888888888,
             "#fdca26"
            ],
            [
             1,
             "#f0f921"
            ]
           ],
           "sequentialminus": [
            [
             0,
             "#0d0887"
            ],
            [
             0.1111111111111111,
             "#46039f"
            ],
            [
             0.2222222222222222,
             "#7201a8"
            ],
            [
             0.3333333333333333,
             "#9c179e"
            ],
            [
             0.4444444444444444,
             "#bd3786"
            ],
            [
             0.5555555555555556,
             "#d8576b"
            ],
            [
             0.6666666666666666,
             "#ed7953"
            ],
            [
             0.7777777777777778,
             "#fb9f3a"
            ],
            [
             0.8888888888888888,
             "#fdca26"
            ],
            [
             1,
             "#f0f921"
            ]
           ]
          },
          "colorway": [
           "#636efa",
           "#EF553B",
           "#00cc96",
           "#ab63fa",
           "#FFA15A",
           "#19d3f3",
           "#FF6692",
           "#B6E880",
           "#FF97FF",
           "#FECB52"
          ],
          "font": {
           "color": "#2a3f5f"
          },
          "geo": {
           "bgcolor": "white",
           "lakecolor": "white",
           "landcolor": "#E5ECF6",
           "showlakes": true,
           "showland": true,
           "subunitcolor": "white"
          },
          "hoverlabel": {
           "align": "left"
          },
          "hovermode": "closest",
          "mapbox": {
           "style": "light"
          },
          "paper_bgcolor": "white",
          "plot_bgcolor": "#E5ECF6",
          "polar": {
           "angularaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "bgcolor": "#E5ECF6",
           "radialaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           }
          },
          "scene": {
           "xaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           },
           "yaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           },
           "zaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           }
          },
          "shapedefaults": {
           "line": {
            "color": "#2a3f5f"
           }
          },
          "ternary": {
           "aaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "baxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "bgcolor": "#E5ECF6",
           "caxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           }
          },
          "title": {
           "x": 0.05
          },
          "xaxis": {
           "automargin": true,
           "gridcolor": "white",
           "linecolor": "white",
           "ticks": "",
           "title": {
            "standoff": 15
           },
           "zerolinecolor": "white",
           "zerolinewidth": 2
          },
          "yaxis": {
           "automargin": true,
           "gridcolor": "white",
           "linecolor": "white",
           "ticks": "",
           "title": {
            "standoff": 15
           },
           "zerolinecolor": "white",
           "zerolinewidth": 2
          }
         }
        },
        "width": 800,
        "xaxis": {
         "anchor": "y",
         "domain": [
          0,
          1
         ],
         "ticktext": [
          "0",
          "0",
          "2",
          "23"
         ],
         "tickvals": [
          0,
          0.5,
          2.5,
          23
         ],
         "title": {
          "text": "# of lemmas"
         },
         "type": "category"
        },
        "yaxis": {
         "anchor": "x",
         "domain": [
          0,
          1
         ],
         "title": {
          "text": "# of test files"
         }
        }
       }
      }
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "num_lemma_num_programs_df = pd.DataFrame(columns=[\"# of lemmas\", \"# of test files\"])\n",
    "x_ticks_num_lemma = [key[1] for key in list(bin_num_lemma_num_programs.keys())]\n",
    "num_lemma_num_programs_df[\"# of lemmas\"] = x_ticks_num_lemma\n",
    "num_lemma_num_programs_df[\"# of test files\"] = list(bin_num_lemma_num_programs.values())\n",
    "\n",
    "num_lemma_bar_fig = px.bar(num_lemma_num_programs_df, x=\"# of lemmas\", y=\"# of test files\")\n",
    "num_lemma_bar_fig.update_layout(xaxis_type='category', width=800, height=450)\n",
    "num_lemma_bar_fig.update_xaxes(tickvals=x_ticks_num_lemma, ticktext=[str(int(x)) for x in x_ticks_num_lemma])\n",
    "num_lemma_bar_fig.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Model</th>\n",
       "      <th>Program length (characters)</th>\n",
       "      <th>Left endpoints</th>\n",
       "      <th>Right endpoints</th>\n",
       "      <th>Bin half width</th>\n",
       "      <th>Success rate</th>\n",
       "      <th>Error</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>153.50</td>\n",
       "      <td>70.0</td>\n",
       "      <td>237.0</td>\n",
       "      <td>83.50</td>\n",
       "      <td>0.961538</td>\n",
       "      <td>0.021775</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>314.10</td>\n",
       "      <td>237.0</td>\n",
       "      <td>391.2</td>\n",
       "      <td>77.10</td>\n",
       "      <td>0.860759</td>\n",
       "      <td>0.038950</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>443.40</td>\n",
       "      <td>391.2</td>\n",
       "      <td>495.6</td>\n",
       "      <td>52.20</td>\n",
       "      <td>0.794872</td>\n",
       "      <td>0.045721</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>574.30</td>\n",
       "      <td>495.6</td>\n",
       "      <td>653.0</td>\n",
       "      <td>78.70</td>\n",
       "      <td>0.805195</td>\n",
       "      <td>0.045134</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>759.75</td>\n",
       "      <td>653.0</td>\n",
       "      <td>866.5</td>\n",
       "      <td>106.75</td>\n",
       "      <td>0.632911</td>\n",
       "      <td>0.054230</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>1019.45</td>\n",
       "      <td>866.5</td>\n",
       "      <td>1172.4</td>\n",
       "      <td>152.95</td>\n",
       "      <td>0.474359</td>\n",
       "      <td>0.056539</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>1445.10</td>\n",
       "      <td>1172.4</td>\n",
       "      <td>1717.8</td>\n",
       "      <td>272.70</td>\n",
       "      <td>0.500000</td>\n",
       "      <td>0.056614</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>2094.10</td>\n",
       "      <td>1717.8</td>\n",
       "      <td>2470.4</td>\n",
       "      <td>376.30</td>\n",
       "      <td>0.410256</td>\n",
       "      <td>0.055694</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>3540.95</td>\n",
       "      <td>2470.4</td>\n",
       "      <td>4611.5</td>\n",
       "      <td>1070.55</td>\n",
       "      <td>0.269231</td>\n",
       "      <td>0.050223</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>16673.75</td>\n",
       "      <td>4611.5</td>\n",
       "      <td>28736.0</td>\n",
       "      <td>12062.25</td>\n",
       "      <td>0.227848</td>\n",
       "      <td>0.047191</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>153.50</td>\n",
       "      <td>70.0</td>\n",
       "      <td>237.0</td>\n",
       "      <td>83.50</td>\n",
       "      <td>0.948718</td>\n",
       "      <td>0.024975</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>314.10</td>\n",
       "      <td>237.0</td>\n",
       "      <td>391.2</td>\n",
       "      <td>77.10</td>\n",
       "      <td>0.886076</td>\n",
       "      <td>0.035746</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>443.40</td>\n",
       "      <td>391.2</td>\n",
       "      <td>495.6</td>\n",
       "      <td>52.20</td>\n",
       "      <td>0.923077</td>\n",
       "      <td>0.030172</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>574.30</td>\n",
       "      <td>495.6</td>\n",
       "      <td>653.0</td>\n",
       "      <td>78.70</td>\n",
       "      <td>0.831169</td>\n",
       "      <td>0.042690</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>759.75</td>\n",
       "      <td>653.0</td>\n",
       "      <td>866.5</td>\n",
       "      <td>106.75</td>\n",
       "      <td>0.632911</td>\n",
       "      <td>0.054230</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>1019.45</td>\n",
       "      <td>866.5</td>\n",
       "      <td>1172.4</td>\n",
       "      <td>152.95</td>\n",
       "      <td>0.461538</td>\n",
       "      <td>0.056446</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>1445.10</td>\n",
       "      <td>1172.4</td>\n",
       "      <td>1717.8</td>\n",
       "      <td>272.70</td>\n",
       "      <td>0.461538</td>\n",
       "      <td>0.056446</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>2094.10</td>\n",
       "      <td>1717.8</td>\n",
       "      <td>2470.4</td>\n",
       "      <td>376.30</td>\n",
       "      <td>0.384615</td>\n",
       "      <td>0.055086</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>3540.95</td>\n",
       "      <td>2470.4</td>\n",
       "      <td>4611.5</td>\n",
       "      <td>1070.55</td>\n",
       "      <td>0.256410</td>\n",
       "      <td>0.049441</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>16673.75</td>\n",
       "      <td>4611.5</td>\n",
       "      <td>28736.0</td>\n",
       "      <td>12062.25</td>\n",
       "      <td>0.202532</td>\n",
       "      <td>0.045216</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>153.50</td>\n",
       "      <td>70.0</td>\n",
       "      <td>237.0</td>\n",
       "      <td>83.50</td>\n",
       "      <td>0.871795</td>\n",
       "      <td>0.037854</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>314.10</td>\n",
       "      <td>237.0</td>\n",
       "      <td>391.2</td>\n",
       "      <td>77.10</td>\n",
       "      <td>0.708861</td>\n",
       "      <td>0.051111</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>443.40</td>\n",
       "      <td>391.2</td>\n",
       "      <td>495.6</td>\n",
       "      <td>52.20</td>\n",
       "      <td>0.615385</td>\n",
       "      <td>0.055086</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>574.30</td>\n",
       "      <td>495.6</td>\n",
       "      <td>653.0</td>\n",
       "      <td>78.70</td>\n",
       "      <td>0.493506</td>\n",
       "      <td>0.056975</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>759.75</td>\n",
       "      <td>653.0</td>\n",
       "      <td>866.5</td>\n",
       "      <td>106.75</td>\n",
       "      <td>0.392405</td>\n",
       "      <td>0.054936</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>1019.45</td>\n",
       "      <td>866.5</td>\n",
       "      <td>1172.4</td>\n",
       "      <td>152.95</td>\n",
       "      <td>0.269231</td>\n",
       "      <td>0.050223</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>1445.10</td>\n",
       "      <td>1172.4</td>\n",
       "      <td>1717.8</td>\n",
       "      <td>272.70</td>\n",
       "      <td>0.384615</td>\n",
       "      <td>0.055086</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>2094.10</td>\n",
       "      <td>1717.8</td>\n",
       "      <td>2470.4</td>\n",
       "      <td>376.30</td>\n",
       "      <td>0.269231</td>\n",
       "      <td>0.050223</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>3540.95</td>\n",
       "      <td>2470.4</td>\n",
       "      <td>4611.5</td>\n",
       "      <td>1070.55</td>\n",
       "      <td>0.166667</td>\n",
       "      <td>0.042197</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>16673.75</td>\n",
       "      <td>4611.5</td>\n",
       "      <td>28736.0</td>\n",
       "      <td>12062.25</td>\n",
       "      <td>0.227848</td>\n",
       "      <td>0.047191</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>153.50</td>\n",
       "      <td>70.0</td>\n",
       "      <td>237.0</td>\n",
       "      <td>83.50</td>\n",
       "      <td>0.935897</td>\n",
       "      <td>0.027733</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>314.10</td>\n",
       "      <td>237.0</td>\n",
       "      <td>391.2</td>\n",
       "      <td>77.10</td>\n",
       "      <td>0.949367</td>\n",
       "      <td>0.024667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>443.40</td>\n",
       "      <td>391.2</td>\n",
       "      <td>495.6</td>\n",
       "      <td>52.20</td>\n",
       "      <td>0.858974</td>\n",
       "      <td>0.039409</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>574.30</td>\n",
       "      <td>495.6</td>\n",
       "      <td>653.0</td>\n",
       "      <td>78.70</td>\n",
       "      <td>0.857143</td>\n",
       "      <td>0.039878</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>759.75</td>\n",
       "      <td>653.0</td>\n",
       "      <td>866.5</td>\n",
       "      <td>106.75</td>\n",
       "      <td>0.759494</td>\n",
       "      <td>0.048085</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>1019.45</td>\n",
       "      <td>866.5</td>\n",
       "      <td>1172.4</td>\n",
       "      <td>152.95</td>\n",
       "      <td>0.628205</td>\n",
       "      <td>0.054721</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>1445.10</td>\n",
       "      <td>1172.4</td>\n",
       "      <td>1717.8</td>\n",
       "      <td>272.70</td>\n",
       "      <td>0.679487</td>\n",
       "      <td>0.052840</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>2094.10</td>\n",
       "      <td>1717.8</td>\n",
       "      <td>2470.4</td>\n",
       "      <td>376.30</td>\n",
       "      <td>0.551282</td>\n",
       "      <td>0.056315</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>3540.95</td>\n",
       "      <td>2470.4</td>\n",
       "      <td>4611.5</td>\n",
       "      <td>1070.55</td>\n",
       "      <td>0.294872</td>\n",
       "      <td>0.051630</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>16673.75</td>\n",
       "      <td>4611.5</td>\n",
       "      <td>28736.0</td>\n",
       "      <td>12062.25</td>\n",
       "      <td>0.265823</td>\n",
       "      <td>0.049703</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>153.50</td>\n",
       "      <td>70.0</td>\n",
       "      <td>237.0</td>\n",
       "      <td>83.50</td>\n",
       "      <td>0.666667</td>\n",
       "      <td>0.053376</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>314.10</td>\n",
       "      <td>237.0</td>\n",
       "      <td>391.2</td>\n",
       "      <td>77.10</td>\n",
       "      <td>0.455696</td>\n",
       "      <td>0.056033</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>443.40</td>\n",
       "      <td>391.2</td>\n",
       "      <td>495.6</td>\n",
       "      <td>52.20</td>\n",
       "      <td>0.307692</td>\n",
       "      <td>0.052259</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>574.30</td>\n",
       "      <td>495.6</td>\n",
       "      <td>653.0</td>\n",
       "      <td>78.70</td>\n",
       "      <td>0.259740</td>\n",
       "      <td>0.049971</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>759.75</td>\n",
       "      <td>653.0</td>\n",
       "      <td>866.5</td>\n",
       "      <td>106.75</td>\n",
       "      <td>0.151899</td>\n",
       "      <td>0.040382</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>1019.45</td>\n",
       "      <td>866.5</td>\n",
       "      <td>1172.4</td>\n",
       "      <td>152.95</td>\n",
       "      <td>0.153846</td>\n",
       "      <td>0.040853</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>1445.10</td>\n",
       "      <td>1172.4</td>\n",
       "      <td>1717.8</td>\n",
       "      <td>272.70</td>\n",
       "      <td>0.217949</td>\n",
       "      <td>0.046746</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>2094.10</td>\n",
       "      <td>1717.8</td>\n",
       "      <td>2470.4</td>\n",
       "      <td>376.30</td>\n",
       "      <td>0.192308</td>\n",
       "      <td>0.044625</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>3540.95</td>\n",
       "      <td>2470.4</td>\n",
       "      <td>4611.5</td>\n",
       "      <td>1070.55</td>\n",
       "      <td>0.192308</td>\n",
       "      <td>0.044625</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>16673.75</td>\n",
       "      <td>4611.5</td>\n",
       "      <td>28736.0</td>\n",
       "      <td>12062.25</td>\n",
       "      <td>0.202532</td>\n",
       "      <td>0.045216</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            Model  Program length (characters)  Left endpoints  \\\n",
       "0          GPT-4o                       153.50            70.0   \n",
       "1          GPT-4o                       314.10           237.0   \n",
       "2          GPT-4o                       443.40           391.2   \n",
       "3          GPT-4o                       574.30           495.6   \n",
       "4          GPT-4o                       759.75           653.0   \n",
       "5          GPT-4o                      1019.45           866.5   \n",
       "6          GPT-4o                      1445.10          1172.4   \n",
       "7          GPT-4o                      2094.10          1717.8   \n",
       "8          GPT-4o                      3540.95          2470.4   \n",
       "9          GPT-4o                     16673.75          4611.5   \n",
       "10    GPT-4-turbo                       153.50            70.0   \n",
       "11    GPT-4-turbo                       314.10           237.0   \n",
       "12    GPT-4-turbo                       443.40           391.2   \n",
       "13    GPT-4-turbo                       574.30           495.6   \n",
       "14    GPT-4-turbo                       759.75           653.0   \n",
       "15    GPT-4-turbo                      1019.45           866.5   \n",
       "16    GPT-4-turbo                      1445.10          1172.4   \n",
       "17    GPT-4-turbo                      2094.10          1717.8   \n",
       "18    GPT-4-turbo                      3540.95          2470.4   \n",
       "19    GPT-4-turbo                     16673.75          4611.5   \n",
       "20  GPT-3.5-turbo                       153.50            70.0   \n",
       "21  GPT-3.5-turbo                       314.10           237.0   \n",
       "22  GPT-3.5-turbo                       443.40           391.2   \n",
       "23  GPT-3.5-turbo                       574.30           495.6   \n",
       "24  GPT-3.5-turbo                       759.75           653.0   \n",
       "25  GPT-3.5-turbo                      1019.45           866.5   \n",
       "26  GPT-3.5-turbo                      1445.10          1172.4   \n",
       "27  GPT-3.5-turbo                      2094.10          1717.8   \n",
       "28  GPT-3.5-turbo                      3540.95          2470.4   \n",
       "29  GPT-3.5-turbo                     16673.75          4611.5   \n",
       "30  Claude-3-Opus                       153.50            70.0   \n",
       "31  Claude-3-Opus                       314.10           237.0   \n",
       "32  Claude-3-Opus                       443.40           391.2   \n",
       "33  Claude-3-Opus                       574.30           495.6   \n",
       "34  Claude-3-Opus                       759.75           653.0   \n",
       "35  Claude-3-Opus                      1019.45           866.5   \n",
       "36  Claude-3-Opus                      1445.10          1172.4   \n",
       "37  Claude-3-Opus                      2094.10          1717.8   \n",
       "38  Claude-3-Opus                      3540.95          2470.4   \n",
       "39  Claude-3-Opus                     16673.75          4611.5   \n",
       "40   CodeLlama-7b                       153.50            70.0   \n",
       "41   CodeLlama-7b                       314.10           237.0   \n",
       "42   CodeLlama-7b                       443.40           391.2   \n",
       "43   CodeLlama-7b                       574.30           495.6   \n",
       "44   CodeLlama-7b                       759.75           653.0   \n",
       "45   CodeLlama-7b                      1019.45           866.5   \n",
       "46   CodeLlama-7b                      1445.10          1172.4   \n",
       "47   CodeLlama-7b                      2094.10          1717.8   \n",
       "48   CodeLlama-7b                      3540.95          2470.4   \n",
       "49   CodeLlama-7b                     16673.75          4611.5   \n",
       "\n",
       "    Right endpoints  Bin half width  Success rate     Error  \n",
       "0             237.0           83.50      0.961538  0.021775  \n",
       "1             391.2           77.10      0.860759  0.038950  \n",
       "2             495.6           52.20      0.794872  0.045721  \n",
       "3             653.0           78.70      0.805195  0.045134  \n",
       "4             866.5          106.75      0.632911  0.054230  \n",
       "5            1172.4          152.95      0.474359  0.056539  \n",
       "6            1717.8          272.70      0.500000  0.056614  \n",
       "7            2470.4          376.30      0.410256  0.055694  \n",
       "8            4611.5         1070.55      0.269231  0.050223  \n",
       "9           28736.0        12062.25      0.227848  0.047191  \n",
       "10            237.0           83.50      0.948718  0.024975  \n",
       "11            391.2           77.10      0.886076  0.035746  \n",
       "12            495.6           52.20      0.923077  0.030172  \n",
       "13            653.0           78.70      0.831169  0.042690  \n",
       "14            866.5          106.75      0.632911  0.054230  \n",
       "15           1172.4          152.95      0.461538  0.056446  \n",
       "16           1717.8          272.70      0.461538  0.056446  \n",
       "17           2470.4          376.30      0.384615  0.055086  \n",
       "18           4611.5         1070.55      0.256410  0.049441  \n",
       "19          28736.0        12062.25      0.202532  0.045216  \n",
       "20            237.0           83.50      0.871795  0.037854  \n",
       "21            391.2           77.10      0.708861  0.051111  \n",
       "22            495.6           52.20      0.615385  0.055086  \n",
       "23            653.0           78.70      0.493506  0.056975  \n",
       "24            866.5          106.75      0.392405  0.054936  \n",
       "25           1172.4          152.95      0.269231  0.050223  \n",
       "26           1717.8          272.70      0.384615  0.055086  \n",
       "27           2470.4          376.30      0.269231  0.050223  \n",
       "28           4611.5         1070.55      0.166667  0.042197  \n",
       "29          28736.0        12062.25      0.227848  0.047191  \n",
       "30            237.0           83.50      0.935897  0.027733  \n",
       "31            391.2           77.10      0.949367  0.024667  \n",
       "32            495.6           52.20      0.858974  0.039409  \n",
       "33            653.0           78.70      0.857143  0.039878  \n",
       "34            866.5          106.75      0.759494  0.048085  \n",
       "35           1172.4          152.95      0.628205  0.054721  \n",
       "36           1717.8          272.70      0.679487  0.052840  \n",
       "37           2470.4          376.30      0.551282  0.056315  \n",
       "38           4611.5         1070.55      0.294872  0.051630  \n",
       "39          28736.0        12062.25      0.265823  0.049703  \n",
       "40            237.0           83.50      0.666667  0.053376  \n",
       "41            391.2           77.10      0.455696  0.056033  \n",
       "42            495.6           52.20      0.307692  0.052259  \n",
       "43            653.0           78.70      0.259740  0.049971  \n",
       "44            866.5          106.75      0.151899  0.040382  \n",
       "45           1172.4          152.95      0.153846  0.040853  \n",
       "46           1717.8          272.70      0.217949  0.046746  \n",
       "47           2470.4          376.30      0.192308  0.044625  \n",
       "48           4611.5         1070.55      0.192308  0.044625  \n",
       "49          28736.0        12062.25      0.202532  0.045216  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "num_char_df = pd.DataFrame(columns=[\"Model\", \"Program length (characters)\", \"Left endpoints\", \"Right endpoints\", \"Bin half width\", \"Success rate\", \"Error\"])\n",
    "\n",
    "bin_left_num_char, bin_mid_num_char, bin_right_num_char = [], [], []\n",
    "for i in [list(gpt4o_bin_num_char_mean.keys()), list(gpt4_bin_num_char_mean.keys()), list(gpt35_bin_num_char_mean.keys()), list(claude3_bin_num_char_mean.keys()), list(codellama_bin_num_char_mean.keys())]:\n",
    "    for j in i:\n",
    "        bin_left_num_char.append(j[0])\n",
    "        bin_mid_num_char.append(j[1])\n",
    "        bin_right_num_char.append(j[2])\n",
    "\n",
    "num_char_df[\"Model\"] = [\"GPT-4o\"] * len(gpt4o_bin_num_char_mean.keys()) + [\"GPT-4-turbo\"] * len(gpt4_bin_num_char_mean.keys()) + [\"GPT-3.5-turbo\"] * len(gpt35_bin_num_char_mean.keys()) + [\"Claude-3-Opus\"] * len(claude3_bin_num_char_mean.keys()) + [\"CodeLlama-7b\"] * len(codellama_bin_num_char_mean.keys())\n",
    "num_char_df[\"Program length (characters)\"] = bin_mid_num_char\n",
    "num_char_df[\"Left endpoints\"] = bin_left_num_char\n",
    "num_char_df[\"Right endpoints\"] = bin_right_num_char\n",
    "num_char_df[\"Bin half width\"] = (np.array(bin_right_num_char) - np.array(bin_left_num_char)) / 2.0\n",
    "num_char_df[\"Success rate\"] = list(gpt4o_bin_num_char_mean.values()) + list(gpt4_bin_num_char_mean.values()) + list(gpt35_bin_num_char_mean.values()) + list(claude3_bin_num_char_mean.values()) + list(codellama_bin_num_char_mean.values())\n",
    "num_char_df[\"Error\"] = list(gpt4o_bin_num_char_error.values()) + list(gpt4_bin_num_char_error.values()) + list(gpt35_bin_num_char_error.values()) + list(claude3_bin_num_char_error.values()) + list(codellama_bin_num_char_error.values())\n",
    "num_char_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "num_char_bin_endpoints = list(set(num_char_df['Left endpoints']))\n",
    "num_char_bin_endpoints.remove(min(num_char_bin_endpoints))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Model</th>\n",
       "      <th>Hint quantity (characters)</th>\n",
       "      <th>Left endpoints</th>\n",
       "      <th>Right endpoints</th>\n",
       "      <th>Bin half width</th>\n",
       "      <th>Success rate</th>\n",
       "      <th>Error</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>19.10</td>\n",
       "      <td>0.0</td>\n",
       "      <td>38.2</td>\n",
       "      <td>19.10</td>\n",
       "      <td>0.904459</td>\n",
       "      <td>0.023461</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>55.60</td>\n",
       "      <td>38.2</td>\n",
       "      <td>73.0</td>\n",
       "      <td>17.40</td>\n",
       "      <td>0.689189</td>\n",
       "      <td>0.053802</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>85.20</td>\n",
       "      <td>73.0</td>\n",
       "      <td>97.4</td>\n",
       "      <td>12.20</td>\n",
       "      <td>0.707317</td>\n",
       "      <td>0.050246</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>114.70</td>\n",
       "      <td>97.4</td>\n",
       "      <td>132.0</td>\n",
       "      <td>17.30</td>\n",
       "      <td>0.717949</td>\n",
       "      <td>0.050952</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>156.80</td>\n",
       "      <td>132.0</td>\n",
       "      <td>181.6</td>\n",
       "      <td>24.80</td>\n",
       "      <td>0.705128</td>\n",
       "      <td>0.051630</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>207.15</td>\n",
       "      <td>181.6</td>\n",
       "      <td>232.7</td>\n",
       "      <td>25.55</td>\n",
       "      <td>0.589744</td>\n",
       "      <td>0.055694</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>279.65</td>\n",
       "      <td>232.7</td>\n",
       "      <td>326.6</td>\n",
       "      <td>46.95</td>\n",
       "      <td>0.269231</td>\n",
       "      <td>0.050223</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>440.20</td>\n",
       "      <td>326.6</td>\n",
       "      <td>553.8</td>\n",
       "      <td>113.60</td>\n",
       "      <td>0.333333</td>\n",
       "      <td>0.053376</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>3286.40</td>\n",
       "      <td>553.8</td>\n",
       "      <td>6019.0</td>\n",
       "      <td>2732.60</td>\n",
       "      <td>0.113924</td>\n",
       "      <td>0.035746</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>19.10</td>\n",
       "      <td>0.0</td>\n",
       "      <td>38.2</td>\n",
       "      <td>19.10</td>\n",
       "      <td>0.872611</td>\n",
       "      <td>0.026609</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>55.60</td>\n",
       "      <td>38.2</td>\n",
       "      <td>73.0</td>\n",
       "      <td>17.40</td>\n",
       "      <td>0.837838</td>\n",
       "      <td>0.042849</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>85.20</td>\n",
       "      <td>73.0</td>\n",
       "      <td>97.4</td>\n",
       "      <td>12.20</td>\n",
       "      <td>0.719512</td>\n",
       "      <td>0.049610</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>114.70</td>\n",
       "      <td>97.4</td>\n",
       "      <td>132.0</td>\n",
       "      <td>17.30</td>\n",
       "      <td>0.717949</td>\n",
       "      <td>0.050952</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>156.80</td>\n",
       "      <td>132.0</td>\n",
       "      <td>181.6</td>\n",
       "      <td>24.80</td>\n",
       "      <td>0.666667</td>\n",
       "      <td>0.053376</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>207.15</td>\n",
       "      <td>181.6</td>\n",
       "      <td>232.7</td>\n",
       "      <td>25.55</td>\n",
       "      <td>0.576923</td>\n",
       "      <td>0.055940</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>279.65</td>\n",
       "      <td>232.7</td>\n",
       "      <td>326.6</td>\n",
       "      <td>46.95</td>\n",
       "      <td>0.307692</td>\n",
       "      <td>0.052259</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>440.20</td>\n",
       "      <td>326.6</td>\n",
       "      <td>553.8</td>\n",
       "      <td>113.60</td>\n",
       "      <td>0.307692</td>\n",
       "      <td>0.052259</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>3286.40</td>\n",
       "      <td>553.8</td>\n",
       "      <td>6019.0</td>\n",
       "      <td>2732.60</td>\n",
       "      <td>0.113924</td>\n",
       "      <td>0.035746</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>19.10</td>\n",
       "      <td>0.0</td>\n",
       "      <td>38.2</td>\n",
       "      <td>19.10</td>\n",
       "      <td>0.815287</td>\n",
       "      <td>0.030971</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>55.60</td>\n",
       "      <td>38.2</td>\n",
       "      <td>73.0</td>\n",
       "      <td>17.40</td>\n",
       "      <td>0.567568</td>\n",
       "      <td>0.057591</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>85.20</td>\n",
       "      <td>73.0</td>\n",
       "      <td>97.4</td>\n",
       "      <td>12.20</td>\n",
       "      <td>0.426829</td>\n",
       "      <td>0.054621</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>114.70</td>\n",
       "      <td>97.4</td>\n",
       "      <td>132.0</td>\n",
       "      <td>17.30</td>\n",
       "      <td>0.487179</td>\n",
       "      <td>0.056595</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>156.80</td>\n",
       "      <td>132.0</td>\n",
       "      <td>181.6</td>\n",
       "      <td>24.80</td>\n",
       "      <td>0.461538</td>\n",
       "      <td>0.056446</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>207.15</td>\n",
       "      <td>181.6</td>\n",
       "      <td>232.7</td>\n",
       "      <td>25.55</td>\n",
       "      <td>0.397436</td>\n",
       "      <td>0.055410</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>279.65</td>\n",
       "      <td>232.7</td>\n",
       "      <td>326.6</td>\n",
       "      <td>46.95</td>\n",
       "      <td>0.179487</td>\n",
       "      <td>0.043452</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>440.20</td>\n",
       "      <td>326.6</td>\n",
       "      <td>553.8</td>\n",
       "      <td>113.60</td>\n",
       "      <td>0.179487</td>\n",
       "      <td>0.043452</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>3286.40</td>\n",
       "      <td>553.8</td>\n",
       "      <td>6019.0</td>\n",
       "      <td>2732.60</td>\n",
       "      <td>0.075949</td>\n",
       "      <td>0.029806</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>19.10</td>\n",
       "      <td>0.0</td>\n",
       "      <td>38.2</td>\n",
       "      <td>19.10</td>\n",
       "      <td>0.910828</td>\n",
       "      <td>0.022745</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>55.60</td>\n",
       "      <td>38.2</td>\n",
       "      <td>73.0</td>\n",
       "      <td>17.40</td>\n",
       "      <td>0.824324</td>\n",
       "      <td>0.044237</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>85.20</td>\n",
       "      <td>73.0</td>\n",
       "      <td>97.4</td>\n",
       "      <td>12.20</td>\n",
       "      <td>0.731707</td>\n",
       "      <td>0.048929</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>114.70</td>\n",
       "      <td>97.4</td>\n",
       "      <td>132.0</td>\n",
       "      <td>17.30</td>\n",
       "      <td>0.769231</td>\n",
       "      <td>0.047706</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>156.80</td>\n",
       "      <td>132.0</td>\n",
       "      <td>181.6</td>\n",
       "      <td>24.80</td>\n",
       "      <td>0.820513</td>\n",
       "      <td>0.043452</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>207.15</td>\n",
       "      <td>181.6</td>\n",
       "      <td>232.7</td>\n",
       "      <td>25.55</td>\n",
       "      <td>0.692308</td>\n",
       "      <td>0.052259</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>279.65</td>\n",
       "      <td>232.7</td>\n",
       "      <td>326.6</td>\n",
       "      <td>46.95</td>\n",
       "      <td>0.589744</td>\n",
       "      <td>0.055694</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>440.20</td>\n",
       "      <td>326.6</td>\n",
       "      <td>553.8</td>\n",
       "      <td>113.60</td>\n",
       "      <td>0.397436</td>\n",
       "      <td>0.055410</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>3286.40</td>\n",
       "      <td>553.8</td>\n",
       "      <td>6019.0</td>\n",
       "      <td>2732.60</td>\n",
       "      <td>0.139241</td>\n",
       "      <td>0.038950</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>19.10</td>\n",
       "      <td>0.0</td>\n",
       "      <td>38.2</td>\n",
       "      <td>19.10</td>\n",
       "      <td>0.687898</td>\n",
       "      <td>0.036979</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>55.60</td>\n",
       "      <td>38.2</td>\n",
       "      <td>73.0</td>\n",
       "      <td>17.40</td>\n",
       "      <td>0.270270</td>\n",
       "      <td>0.051626</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>85.20</td>\n",
       "      <td>73.0</td>\n",
       "      <td>97.4</td>\n",
       "      <td>12.20</td>\n",
       "      <td>0.207317</td>\n",
       "      <td>0.044767</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>114.70</td>\n",
       "      <td>97.4</td>\n",
       "      <td>132.0</td>\n",
       "      <td>17.30</td>\n",
       "      <td>0.179487</td>\n",
       "      <td>0.043452</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>156.80</td>\n",
       "      <td>132.0</td>\n",
       "      <td>181.6</td>\n",
       "      <td>24.80</td>\n",
       "      <td>0.269231</td>\n",
       "      <td>0.050223</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>207.15</td>\n",
       "      <td>181.6</td>\n",
       "      <td>232.7</td>\n",
       "      <td>25.55</td>\n",
       "      <td>0.179487</td>\n",
       "      <td>0.043452</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>279.65</td>\n",
       "      <td>232.7</td>\n",
       "      <td>326.6</td>\n",
       "      <td>46.95</td>\n",
       "      <td>0.115385</td>\n",
       "      <td>0.036175</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>440.20</td>\n",
       "      <td>326.6</td>\n",
       "      <td>553.8</td>\n",
       "      <td>113.60</td>\n",
       "      <td>0.128205</td>\n",
       "      <td>0.037854</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>3286.40</td>\n",
       "      <td>553.8</td>\n",
       "      <td>6019.0</td>\n",
       "      <td>2732.60</td>\n",
       "      <td>0.075949</td>\n",
       "      <td>0.029806</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            Model  Hint quantity (characters)  Left endpoints  \\\n",
       "0          GPT-4o                        0.00             0.0   \n",
       "1          GPT-4o                       19.10             0.0   \n",
       "2          GPT-4o                       55.60            38.2   \n",
       "3          GPT-4o                       85.20            73.0   \n",
       "4          GPT-4o                      114.70            97.4   \n",
       "5          GPT-4o                      156.80           132.0   \n",
       "6          GPT-4o                      207.15           181.6   \n",
       "7          GPT-4o                      279.65           232.7   \n",
       "8          GPT-4o                      440.20           326.6   \n",
       "9          GPT-4o                     3286.40           553.8   \n",
       "10    GPT-4-turbo                        0.00             0.0   \n",
       "11    GPT-4-turbo                       19.10             0.0   \n",
       "12    GPT-4-turbo                       55.60            38.2   \n",
       "13    GPT-4-turbo                       85.20            73.0   \n",
       "14    GPT-4-turbo                      114.70            97.4   \n",
       "15    GPT-4-turbo                      156.80           132.0   \n",
       "16    GPT-4-turbo                      207.15           181.6   \n",
       "17    GPT-4-turbo                      279.65           232.7   \n",
       "18    GPT-4-turbo                      440.20           326.6   \n",
       "19    GPT-4-turbo                     3286.40           553.8   \n",
       "20  GPT-3.5-turbo                        0.00             0.0   \n",
       "21  GPT-3.5-turbo                       19.10             0.0   \n",
       "22  GPT-3.5-turbo                       55.60            38.2   \n",
       "23  GPT-3.5-turbo                       85.20            73.0   \n",
       "24  GPT-3.5-turbo                      114.70            97.4   \n",
       "25  GPT-3.5-turbo                      156.80           132.0   \n",
       "26  GPT-3.5-turbo                      207.15           181.6   \n",
       "27  GPT-3.5-turbo                      279.65           232.7   \n",
       "28  GPT-3.5-turbo                      440.20           326.6   \n",
       "29  GPT-3.5-turbo                     3286.40           553.8   \n",
       "30  Claude-3-Opus                        0.00             0.0   \n",
       "31  Claude-3-Opus                       19.10             0.0   \n",
       "32  Claude-3-Opus                       55.60            38.2   \n",
       "33  Claude-3-Opus                       85.20            73.0   \n",
       "34  Claude-3-Opus                      114.70            97.4   \n",
       "35  Claude-3-Opus                      156.80           132.0   \n",
       "36  Claude-3-Opus                      207.15           181.6   \n",
       "37  Claude-3-Opus                      279.65           232.7   \n",
       "38  Claude-3-Opus                      440.20           326.6   \n",
       "39  Claude-3-Opus                     3286.40           553.8   \n",
       "40   CodeLlama-7b                        0.00             0.0   \n",
       "41   CodeLlama-7b                       19.10             0.0   \n",
       "42   CodeLlama-7b                       55.60            38.2   \n",
       "43   CodeLlama-7b                       85.20            73.0   \n",
       "44   CodeLlama-7b                      114.70            97.4   \n",
       "45   CodeLlama-7b                      156.80           132.0   \n",
       "46   CodeLlama-7b                      207.15           181.6   \n",
       "47   CodeLlama-7b                      279.65           232.7   \n",
       "48   CodeLlama-7b                      440.20           326.6   \n",
       "49   CodeLlama-7b                     3286.40           553.8   \n",
       "\n",
       "    Right endpoints  Bin half width  Success rate     Error  \n",
       "0               0.0            0.00      1.000000  0.000000  \n",
       "1              38.2           19.10      0.904459  0.023461  \n",
       "2              73.0           17.40      0.689189  0.053802  \n",
       "3              97.4           12.20      0.707317  0.050246  \n",
       "4             132.0           17.30      0.717949  0.050952  \n",
       "5             181.6           24.80      0.705128  0.051630  \n",
       "6             232.7           25.55      0.589744  0.055694  \n",
       "7             326.6           46.95      0.269231  0.050223  \n",
       "8             553.8          113.60      0.333333  0.053376  \n",
       "9            6019.0         2732.60      0.113924  0.035746  \n",
       "10              0.0            0.00      1.000000  0.000000  \n",
       "11             38.2           19.10      0.872611  0.026609  \n",
       "12             73.0           17.40      0.837838  0.042849  \n",
       "13             97.4           12.20      0.719512  0.049610  \n",
       "14            132.0           17.30      0.717949  0.050952  \n",
       "15            181.6           24.80      0.666667  0.053376  \n",
       "16            232.7           25.55      0.576923  0.055940  \n",
       "17            326.6           46.95      0.307692  0.052259  \n",
       "18            553.8          113.60      0.307692  0.052259  \n",
       "19           6019.0         2732.60      0.113924  0.035746  \n",
       "20              0.0            0.00      1.000000  0.000000  \n",
       "21             38.2           19.10      0.815287  0.030971  \n",
       "22             73.0           17.40      0.567568  0.057591  \n",
       "23             97.4           12.20      0.426829  0.054621  \n",
       "24            132.0           17.30      0.487179  0.056595  \n",
       "25            181.6           24.80      0.461538  0.056446  \n",
       "26            232.7           25.55      0.397436  0.055410  \n",
       "27            326.6           46.95      0.179487  0.043452  \n",
       "28            553.8          113.60      0.179487  0.043452  \n",
       "29           6019.0         2732.60      0.075949  0.029806  \n",
       "30              0.0            0.00      1.000000  0.000000  \n",
       "31             38.2           19.10      0.910828  0.022745  \n",
       "32             73.0           17.40      0.824324  0.044237  \n",
       "33             97.4           12.20      0.731707  0.048929  \n",
       "34            132.0           17.30      0.769231  0.047706  \n",
       "35            181.6           24.80      0.820513  0.043452  \n",
       "36            232.7           25.55      0.692308  0.052259  \n",
       "37            326.6           46.95      0.589744  0.055694  \n",
       "38            553.8          113.60      0.397436  0.055410  \n",
       "39           6019.0         2732.60      0.139241  0.038950  \n",
       "40              0.0            0.00      1.000000  0.000000  \n",
       "41             38.2           19.10      0.687898  0.036979  \n",
       "42             73.0           17.40      0.270270  0.051626  \n",
       "43             97.4           12.20      0.207317  0.044767  \n",
       "44            132.0           17.30      0.179487  0.043452  \n",
       "45            181.6           24.80      0.269231  0.050223  \n",
       "46            232.7           25.55      0.179487  0.043452  \n",
       "47            326.6           46.95      0.115385  0.036175  \n",
       "48            553.8          113.60      0.128205  0.037854  \n",
       "49           6019.0         2732.60      0.075949  0.029806  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "num_hint_char_df = pd.DataFrame(columns=[\"Model\", \"Hint quantity (characters)\", \"Left endpoints\", \"Right endpoints\", \"Bin half width\", \"Success rate\", \"Error\"])\n",
    "\n",
    "bin_left_num_hint_char, bin_mid_num_hint_char, bin_right_num_hint_char = [], [], []\n",
    "for i in [list(gpt4o_bin_num_hint_char_mean.keys()), list(gpt4_bin_num_hint_char_mean.keys()), list(gpt35_bin_num_hint_char_mean.keys()), list(claude3_bin_num_hint_char_mean.keys()), list(codellama_bin_num_hint_char_mean.keys())]:\n",
    "    for j in i:\n",
    "        bin_left_num_hint_char.append(j[0])\n",
    "        bin_mid_num_hint_char.append(j[1])\n",
    "        bin_right_num_hint_char.append(j[2])\n",
    "\n",
    "num_hint_char_df[\"Model\"] = [\"GPT-4o\"] * len(gpt4o_bin_num_hint_char_mean.keys()) + [\"GPT-4-turbo\"] * len(gpt4_bin_num_hint_char_mean.keys()) + [\"GPT-3.5-turbo\"] * len(gpt35_bin_num_hint_char_mean.keys()) + [\"Claude-3-Opus\"] * len(claude3_bin_num_hint_char_mean.keys()) + [\"CodeLlama-7b\"] * len(codellama_bin_num_hint_char_mean.keys())\n",
    "num_hint_char_df[\"Hint quantity (characters)\"] = bin_mid_num_hint_char\n",
    "num_hint_char_df[\"Left endpoints\"] = bin_left_num_hint_char\n",
    "num_hint_char_df[\"Right endpoints\"] = bin_right_num_hint_char\n",
    "num_hint_char_df[\"Bin half width\"] = (np.array(bin_right_num_hint_char) - np.array(bin_left_num_hint_char)) / 2.0\n",
    "num_hint_char_df[\"Success rate\"] = list(gpt4o_bin_num_hint_char_mean.values()) + list(gpt4_bin_num_hint_char_mean.values()) + list(gpt35_bin_num_hint_char_mean.values()) + list(claude3_bin_num_hint_char_mean.values()) + list(codellama_bin_num_hint_char_mean.values())\n",
    "num_hint_char_df[\"Error\"] = list(gpt4o_bin_num_hint_char_error.values()) + list(gpt4_bin_num_hint_char_error.values()) + list(gpt35_bin_num_hint_char_error.values()) + list(claude3_bin_num_hint_char_error.values()) + list(codellama_bin_num_hint_char_error.values())\n",
    "\n",
    "num_hint_char_df.loc[num_hint_char_df['Bin half width'] == 0, 'Success rate'] = 1.0\n",
    "num_hint_char_df.loc[num_hint_char_df['Bin half width'] == 0, 'Error'] = 0.0\n",
    "num_hint_char_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "num_hint_char_bin_endpoints = list(set(num_hint_char_df['Left endpoints']))\n",
    "num_hint_char_bin_endpoints.remove(min(num_hint_char_bin_endpoints))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Model</th>\n",
       "      <th># of lemmas</th>\n",
       "      <th>Left endpoints</th>\n",
       "      <th>Right endpoints</th>\n",
       "      <th>Bin half width</th>\n",
       "      <th>Success rate</th>\n",
       "      <th>Error</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.655449</td>\n",
       "      <td>0.019024</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>2.5</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.385714</td>\n",
       "      <td>0.058179</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>GPT-4o</td>\n",
       "      <td>23.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>42.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>0.318182</td>\n",
       "      <td>0.049651</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.658654</td>\n",
       "      <td>0.018982</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>2.5</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.428571</td>\n",
       "      <td>0.059148</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>GPT-4-turbo</td>\n",
       "      <td>23.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>42.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>0.306818</td>\n",
       "      <td>0.049161</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.475962</td>\n",
       "      <td>0.019993</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>2.5</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.328571</td>\n",
       "      <td>0.056139</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>GPT-3.5-turbo</td>\n",
       "      <td>23.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>42.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>0.272727</td>\n",
       "      <td>0.047476</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.740385</td>\n",
       "      <td>0.017551</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>2.5</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.471429</td>\n",
       "      <td>0.059664</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>Claude-3-Opus</td>\n",
       "      <td>23.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>42.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>0.397727</td>\n",
       "      <td>0.052173</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.288462</td>\n",
       "      <td>0.018136</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>2.5</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.257143</td>\n",
       "      <td>0.052239</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>CodeLlama-7b</td>\n",
       "      <td>23.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>42.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>0.238636</td>\n",
       "      <td>0.045438</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            Model  # of lemmas  Left endpoints  Right endpoints  \\\n",
       "0          GPT-4o          0.0             0.0              0.0   \n",
       "1          GPT-4o          0.5             0.0              1.0   \n",
       "2          GPT-4o          2.5             1.0              4.0   \n",
       "3          GPT-4o         23.0             4.0             42.0   \n",
       "4     GPT-4-turbo          0.0             0.0              0.0   \n",
       "5     GPT-4-turbo          0.5             0.0              1.0   \n",
       "6     GPT-4-turbo          2.5             1.0              4.0   \n",
       "7     GPT-4-turbo         23.0             4.0             42.0   \n",
       "8   GPT-3.5-turbo          0.0             0.0              0.0   \n",
       "9   GPT-3.5-turbo          0.5             0.0              1.0   \n",
       "10  GPT-3.5-turbo          2.5             1.0              4.0   \n",
       "11  GPT-3.5-turbo         23.0             4.0             42.0   \n",
       "12  Claude-3-Opus          0.0             0.0              0.0   \n",
       "13  Claude-3-Opus          0.5             0.0              1.0   \n",
       "14  Claude-3-Opus          2.5             1.0              4.0   \n",
       "15  Claude-3-Opus         23.0             4.0             42.0   \n",
       "16   CodeLlama-7b          0.0             0.0              0.0   \n",
       "17   CodeLlama-7b          0.5             0.0              1.0   \n",
       "18   CodeLlama-7b          2.5             1.0              4.0   \n",
       "19   CodeLlama-7b         23.0             4.0             42.0   \n",
       "\n",
       "    Bin half width  Success rate     Error  \n",
       "0              0.0           NaN       NaN  \n",
       "1              0.5      0.655449  0.019024  \n",
       "2              1.5      0.385714  0.058179  \n",
       "3             19.0      0.318182  0.049651  \n",
       "4              0.0           NaN       NaN  \n",
       "5              0.5      0.658654  0.018982  \n",
       "6              1.5      0.428571  0.059148  \n",
       "7             19.0      0.306818  0.049161  \n",
       "8              0.0           NaN       NaN  \n",
       "9              0.5      0.475962  0.019993  \n",
       "10             1.5      0.328571  0.056139  \n",
       "11            19.0      0.272727  0.047476  \n",
       "12             0.0           NaN       NaN  \n",
       "13             0.5      0.740385  0.017551  \n",
       "14             1.5      0.471429  0.059664  \n",
       "15            19.0      0.397727  0.052173  \n",
       "16             0.0           NaN       NaN  \n",
       "17             0.5      0.288462  0.018136  \n",
       "18             1.5      0.257143  0.052239  \n",
       "19            19.0      0.238636  0.045438  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "num_lemma_df = pd.DataFrame(columns=[\"Model\", \"# of lemmas\", \"Left endpoints\", \"Right endpoints\", \"Bin half width\", \"Success rate\", \"Error\"])\n",
    "\n",
    "bin_left_num_lemma, bin_mid_num_lemma, bin_right_num_lemma = [], [], []\n",
    "for i in [list(gpt4o_bin_num_lemma_mean.keys()), list(gpt4_bin_num_lemma_mean.keys()), list(gpt35_bin_num_lemma_mean.keys()), list(claude3_bin_num_lemma_mean.keys()), list(codellama_bin_num_lemma_mean.keys())]:\n",
    "    for j in i:\n",
    "        bin_left_num_lemma.append(j[0])\n",
    "        bin_mid_num_lemma.append(j[1])\n",
    "        bin_right_num_lemma.append(j[2])\n",
    "\n",
    "num_lemma_df[\"Model\"] = [\"GPT-4o\"] * len(gpt4o_bin_num_lemma_mean.keys()) + [\"GPT-4-turbo\"] * len(gpt4_bin_num_lemma_mean.keys()) + [\"GPT-3.5-turbo\"] * len(gpt35_bin_num_lemma_mean.keys()) + [\"Claude-3-Opus\"] * len(claude3_bin_num_lemma_mean.keys()) + [\"CodeLlama-7b\"] * len(codellama_bin_num_lemma_mean.keys())\n",
    "num_lemma_df[\"# of lemmas\"] = bin_mid_num_lemma\n",
    "num_lemma_df[\"Left endpoints\"] = bin_left_num_lemma\n",
    "num_lemma_df[\"Right endpoints\"] = bin_right_num_lemma\n",
    "num_lemma_df[\"Bin half width\"] = (np.array(bin_right_num_lemma) - np.array(bin_left_num_lemma)) / 2.0\n",
    "num_lemma_df[\"Success rate\"] = list(gpt4o_bin_num_lemma_mean.values()) + list(gpt4_bin_num_lemma_mean.values()) + list(gpt35_bin_num_lemma_mean.values()) + list(claude3_bin_num_lemma_mean.values()) + list(codellama_bin_num_lemma_mean.values())\n",
    "num_lemma_df[\"Error\"] = list(gpt4o_bin_num_lemma_error.values()) + list(gpt4_bin_num_lemma_error.values()) + list(gpt35_bin_num_lemma_error.values()) + list(claude3_bin_num_lemma_error.values()) + list(codellama_bin_num_lemma_error.values())\n",
    "num_lemma_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.plotly.v1+json": {
       "config": {
        "plotlyServerURL": "https://plot.ly"
       },
       "data": [
        {
         "fill": "toself",
         "fillcolor": "rgba(99,110,250,.3)",
         "hoverinfo": "skip",
         "legendgroup": "GPT-4o",
         "line": {
          "color": "rgba(255,255,255,0)"
         },
         "showlegend": false,
         "type": "scatter",
         "x": [
          153.5,
          314.1,
          443.4000000000001,
          574.3000000000001,
          759.75,
          1019.4500000000004,
          1445.1000000000004,
          2094.1000000000004,
          3540.9500000000003,
          16673.75,
          16673.75,
          3540.9500000000003,
          2094.1000000000004,
          1445.1000000000004,
          1019.4500000000004,
          759.75,
          574.3000000000001,
          443.4000000000001,
          314.1,
          153.5
         ],
         "xaxis": "x",
         "y": [
          0.9833130198873961,
          0.8997097358219226,
          0.8405926065764988,
          0.850328967520681,
          0.6871418618835778,
          0.5308983341237998,
          0.5566138517072298,
          0.4659508707022942,
          0.319454060656849,
          0.27503924825998993,
          0.18065695427165565,
          0.2190074778046894,
          0.3545619498105263,
          0.44338614829277023,
          0.4178196145941489,
          0.5786809229265488,
          0.7600606428689295,
          0.7491509831670908,
          0.8218092515198496,
          0.9397639031895271
         ],
         "yaxis": "y"
        },
        {
         "hovertemplate": "Model=GPT-4o<br>Program length (characters)=%{x}<br>Success rate=%{y}<extra></extra>",
         "legendgroup": "GPT-4o",
         "line": {
          "color": "#636efa",
          "dash": "solid"
         },
         "marker": {
          "symbol": "circle"
         },
         "mode": "lines",
         "name": "GPT-4o",
         "orientation": "v",
         "showlegend": true,
         "type": "scatter",
         "x": [
          153.5,
          314.1,
          443.4000000000001,
          574.3000000000001,
          759.75,
          1019.4500000000004,
          1445.1000000000004,
          2094.1000000000004,
          3540.9500000000003,
          16673.75
         ],
         "xaxis": "x",
         "y": [
          0.9615384615384616,
          0.8607594936708861,
          0.7948717948717948,
          0.8051948051948052,
          0.6329113924050633,
          0.47435897435897434,
          0.5,
          0.41025641025641024,
          0.2692307692307692,
          0.22784810126582278
         ],
         "yaxis": "y"
        },
        {
         "fill": "toself",
         "fillcolor": "rgba(239,85,59,.3)",
         "hoverinfo": "skip",
         "legendgroup": "GPT-4-turbo",
         "line": {
          "color": "rgba(255,255,255,0)"
         },
         "showlegend": false,
         "type": "scatter",
         "x": [
          153.5,
          314.1,
          443.4000000000001,
          574.3000000000001,
          759.75,
          1019.4500000000004,
          1445.1000000000004,
          2094.1000000000004,
          3540.9500000000003,
          16673.75,
          16673.75,
          3540.9500000000003,
          2094.1000000000004,
          1445.1000000000004,
          1019.4500000000004,
          759.75,
          574.3000000000001,
          443.4000000000001,
          314.1,
          153.5
         ],
         "xaxis": "x",
         "y": [
          0.9736928598464465,
          0.9218221293560228,
          0.9532486361751052,
          0.873858774213679,
          0.6871418618835778,
          0.5179845681311802,
          0.5179845681311802,
          0.43970114415687944,
          0.30585123868644504,
          0.24774738413576614,
          0.15731590700347436,
          0.20696927413406777,
          0.32952962507388983,
          0.4050923549457429,
          0.4050923549457429,
          0.5786809229265488,
          0.7884788881239835,
          0.892905209978741,
          0.8503297693781543,
          0.9237430375894509
         ],
         "yaxis": "y"
        },
        {
         "hovertemplate": "Model=GPT-4-turbo<br>Program length (characters)=%{x}<br>Success rate=%{y}<extra></extra>",
         "legendgroup": "GPT-4-turbo",
         "line": {
          "color": "#EF553B",
          "dash": "solid"
         },
         "marker": {
          "symbol": "circle"
         },
         "mode": "lines",
         "name": "GPT-4-turbo",
         "orientation": "v",
         "showlegend": true,
         "type": "scatter",
         "x": [
          153.5,
          314.1,
          443.4000000000001,
          574.3000000000001,
          759.75,
          1019.4500000000004,
          1445.1000000000004,
          2094.1000000000004,
          3540.9500000000003,
          16673.75
         ],
         "xaxis": "x",
         "y": [
          0.9487179487179487,
          0.8860759493670886,
          0.9230769230769231,
          0.8311688311688312,
          0.6329113924050633,
          0.46153846153846156,
          0.46153846153846156,
          0.38461538461538464,
          0.2564102564102564,
          0.20253164556962025
         ],
         "yaxis": "y"
        },
        {
         "fill": "toself",
         "fillcolor": "rgba(0,204,150,.3)",
         "hoverinfo": "skip",
         "legendgroup": "GPT-3.5-turbo",
         "line": {
          "color": "rgba(255,255,255,0)"
         },
         "showlegend": false,
         "type": "scatter",
         "x": [
          153.5,
          314.1,
          443.4000000000001,
          574.3000000000001,
          759.75,
          1019.4500000000004,
          1445.1000000000004,
          2094.1000000000004,
          3540.9500000000003,
          16673.75,
          16673.75,
          3540.9500000000003,
          2094.1000000000004,
          1445.1000000000004,
          1019.4500000000004,
          759.75,
          574.3000000000001,
          443.4000000000001,
          314.1,
          153.5
         ],
         "xaxis": "x",
         "y": [
          0.909648939304365,
          0.759972102724725,
          0.6704703749261103,
          0.5504819763145683,
          0.44734154325037573,
          0.319454060656849,
          0.43970114415687944,
          0.319454060656849,
          0.20886414029515277,
          0.27503924825998993,
          0.18065695427165565,
          0.12446919303818055,
          0.2190074778046894,
          0.32952962507388983,
          0.2190074778046894,
          0.3374685833319027,
          0.43653101069841865,
          0.5602988558431206,
          0.6577494162626167,
          0.8339408042853786
         ],
         "yaxis": "y"
        },
        {
         "hovertemplate": "Model=GPT-3.5-turbo<br>Program length (characters)=%{x}<br>Success rate=%{y}<extra></extra>",
         "legendgroup": "GPT-3.5-turbo",
         "line": {
          "color": "#00cc96",
          "dash": "solid"
         },
         "marker": {
          "symbol": "circle"
         },
         "mode": "lines",
         "name": "GPT-3.5-turbo",
         "orientation": "v",
         "showlegend": true,
         "type": "scatter",
         "x": [
          153.5,
          314.1,
          443.4000000000001,
          574.3000000000001,
          759.75,
          1019.4500000000004,
          1445.1000000000004,
          2094.1000000000004,
          3540.9500000000003,
          16673.75
         ],
         "xaxis": "x",
         "y": [
          0.8717948717948718,
          0.7088607594936709,
          0.6153846153846154,
          0.4935064935064935,
          0.3924050632911392,
          0.2692307692307692,
          0.38461538461538464,
          0.2692307692307692,
          0.16666666666666666,
          0.22784810126582278
         ],
         "yaxis": "y"
        },
        {
         "fill": "toself",
         "fillcolor": "rgba(171,99,250,.3)",
         "hoverinfo": "skip",
         "legendgroup": "Claude-3-Opus",
         "line": {
          "color": "rgba(255,255,255,0)"
         },
         "showlegend": false,
         "type": "scatter",
         "x": [
          153.5,
          314.1,
          443.4000000000001,
          574.3000000000001,
          759.75,
          1019.4500000000004,
          1445.1000000000004,
          2094.1000000000004,
          3540.9500000000003,
          16673.75,
          16673.75,
          3540.9500000000003,
          2094.1000000000004,
          1445.1000000000004,
          1019.4500000000004,
          759.75,
          574.3000000000001,
          443.4000000000001,
          314.1,
          153.5
         ],
         "xaxis": "x",
         "y": [
          0.9636309261070299,
          0.9740342962960112,
          0.8983830345776702,
          0.8970207518742201,
          0.807578910966013,
          0.6829262680171984,
          0.7323275720107775,
          0.607597344010411,
          0.3465019415340916,
          0.31552583108178356,
          0.21611973853846955,
          0.24324164820949817,
          0.4949667585536917,
          0.6266467869635816,
          0.573483988393058,
          0.711408430806139,
          0.817264962411494,
          0.8195656833710476,
          0.9246998809191785,
          0.9081639456878419
         ],
         "yaxis": "y"
        },
        {
         "hovertemplate": "Model=Claude-3-Opus<br>Program length (characters)=%{x}<br>Success rate=%{y}<extra></extra>",
         "legendgroup": "Claude-3-Opus",
         "line": {
          "color": "#ab63fa",
          "dash": "solid"
         },
         "marker": {
          "symbol": "circle"
         },
         "mode": "lines",
         "name": "Claude-3-Opus",
         "orientation": "v",
         "showlegend": true,
         "type": "scatter",
         "x": [
          153.5,
          314.1,
          443.4000000000001,
          574.3000000000001,
          759.75,
          1019.4500000000004,
          1445.1000000000004,
          2094.1000000000004,
          3540.9500000000003,
          16673.75
         ],
         "xaxis": "x",
         "y": [
          0.9358974358974359,
          0.9493670886075949,
          0.8589743589743589,
          0.8571428571428571,
          0.759493670886076,
          0.6282051282051282,
          0.6794871794871795,
          0.5512820512820513,
          0.2948717948717949,
          0.26582278481012656
         ],
         "yaxis": "y"
        },
        {
         "fill": "toself",
         "fillcolor": "rgba(255,161,90,.3)",
         "hoverinfo": "skip",
         "legendgroup": "CodeLlama-7b",
         "line": {
          "color": "rgba(255,255,255,0)"
         },
         "showlegend": false,
         "type": "scatter",
         "x": [
          153.5,
          314.1,
          443.4000000000001,
          574.3000000000001,
          759.75,
          1019.4500000000004,
          1445.1000000000004,
          2094.1000000000004,
          3540.9500000000003,
          16673.75,
          16673.75,
          3540.9500000000003,
          2094.1000000000004,
          1445.1000000000004,
          1019.4500000000004,
          759.75,
          574.3000000000001,
          443.4000000000001,
          314.1,
          153.5
         ],
         "xaxis": "x",
         "y": [
          0.720042717935029,
          0.5117293271753801,
          0.35995124772975057,
          0.3097110638409715,
          0.19228069354256166,
          0.1946988465033688,
          0.264695042950746,
          0.23693225777810165,
          0.23693225777810165,
          0.24774738413576614,
          0.15731590700347436,
          0.147683126837283,
          0.147683126837283,
          0.1712023929466899,
          0.11299346118893891,
          0.11151677481186872,
          0.2097694556395479,
          0.25543336765486485,
          0.399663077887911,
          0.6132906153983042
         ],
         "yaxis": "y"
        },
        {
         "hovertemplate": "Model=CodeLlama-7b<br>Program length (characters)=%{x}<br>Success rate=%{y}<extra></extra>",
         "legendgroup": "CodeLlama-7b",
         "line": {
          "color": "#FFA15A",
          "dash": "solid"
         },
         "marker": {
          "symbol": "circle"
         },
         "mode": "lines",
         "name": "CodeLlama-7b",
         "orientation": "v",
         "showlegend": true,
         "type": "scatter",
         "x": [
          153.5,
          314.1,
          443.4000000000001,
          574.3000000000001,
          759.75,
          1019.4500000000004,
          1445.1000000000004,
          2094.1000000000004,
          3540.9500000000003,
          16673.75
         ],
         "xaxis": "x",
         "y": [
          0.6666666666666666,
          0.45569620253164556,
          0.3076923076923077,
          0.2597402597402597,
          0.1518987341772152,
          0.15384615384615385,
          0.21794871794871795,
          0.19230769230769232,
          0.19230769230769232,
          0.20253164556962025
         ],
         "yaxis": "y"
        }
       ],
       "layout": {
        "font": {
         "size": 20
        },
        "height": 600,
        "legend": {
         "title": {},
         "tracegroupgap": 0
        },
        "margin": {
         "t": 60
        },
        "shapes": [
         {
          "line": {
           "color": "blue",
           "dash": "dash",
           "width": 1
          },
          "type": "line",
          "x0": 866.5,
          "x1": 866.5,
          "xref": "x",
          "y0": 0,
          "y1": 1,
          "yref": "y domain"
         },
         {
          "line": {
           "color": "blue",
           "dash": "dash",
           "width": 1
          },
          "type": "line",
          "x0": 4611.5,
          "x1": 4611.5,
          "xref": "x",
          "y0": 0,
          "y1": 1,
          "yref": "y domain"
         },
         {
          "line": {
           "color": "blue",
           "dash": "dash",
           "width": 1
          },
          "type": "line",
          "x0": 391.20000000000005,
          "x1": 391.20000000000005,
          "xref": "x",
          "y0": 0,
          "y1": 1,
          "yref": "y domain"
         },
         {
          "line": {
           "color": "blue",
           "dash": "dash",
           "width": 1
          },
          "type": "line",
          "x0": 2470.4000000000005,
          "x1": 2470.4000000000005,
          "xref": "x",
          "y0": 0,
          "y1": 1,
          "yref": "y domain"
         },
         {
          "line": {
           "color": "blue",
           "dash": "dash",
           "width": 1
          },
          "type": "line",
          "x0": 653,
          "x1": 653,
          "xref": "x",
          "y0": 0,
          "y1": 1,
          "yref": "y domain"
         },
         {
          "line": {
           "color": "blue",
           "dash": "dash",
           "width": 1
          },
          "type": "line",
          "x0": 237,
          "x1": 237,
          "xref": "x",
          "y0": 0,
          "y1": 1,
          "yref": "y domain"
         },
         {
          "line": {
           "color": "blue",
           "dash": "dash",
           "width": 1
          },
          "type": "line",
          "x0": 495.6000000000001,
          "x1": 495.6000000000001,
          "xref": "x",
          "y0": 0,
          "y1": 1,
          "yref": "y domain"
         },
         {
          "line": {
           "color": "blue",
           "dash": "dash",
           "width": 1
          },
          "type": "line",
          "x0": 1172.4000000000008,
          "x1": 1172.4000000000008,
          "xref": "x",
          "y0": 0,
          "y1": 1,
          "yref": "y domain"
         },
         {
          "line": {
           "color": "blue",
           "dash": "dash",
           "width": 1
          },
          "type": "line",
          "x0": 1717.8000000000002,
          "x1": 1717.8000000000002,
          "xref": "x",
          "y0": 0,
          "y1": 1,
          "yref": "y domain"
         }
        ],
        "template": {
         "data": {
          "bar": [
           {
            "error_x": {
             "color": "#2a3f5f"
            },
            "error_y": {
             "color": "#2a3f5f"
            },
            "marker": {
             "line": {
              "color": "#E5ECF6",
              "width": 0.5
             },
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "bar"
           }
          ],
          "barpolar": [
           {
            "marker": {
             "line": {
              "color": "#E5ECF6",
              "width": 0.5
             },
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "barpolar"
           }
          ],
          "carpet": [
           {
            "aaxis": {
             "endlinecolor": "#2a3f5f",
             "gridcolor": "white",
             "linecolor": "white",
             "minorgridcolor": "white",
             "startlinecolor": "#2a3f5f"
            },
            "baxis": {
             "endlinecolor": "#2a3f5f",
             "gridcolor": "white",
             "linecolor": "white",
             "minorgridcolor": "white",
             "startlinecolor": "#2a3f5f"
            },
            "type": "carpet"
           }
          ],
          "choropleth": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "choropleth"
           }
          ],
          "contour": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "contour"
           }
          ],
          "contourcarpet": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "contourcarpet"
           }
          ],
          "heatmap": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "heatmap"
           }
          ],
          "heatmapgl": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "heatmapgl"
           }
          ],
          "histogram": [
           {
            "marker": {
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "histogram"
           }
          ],
          "histogram2d": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "histogram2d"
           }
          ],
          "histogram2dcontour": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "histogram2dcontour"
           }
          ],
          "mesh3d": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "mesh3d"
           }
          ],
          "parcoords": [
           {
            "line": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "parcoords"
           }
          ],
          "pie": [
           {
            "automargin": true,
            "type": "pie"
           }
          ],
          "scatter": [
           {
            "fillpattern": {
             "fillmode": "overlay",
             "size": 10,
             "solidity": 0.2
            },
            "type": "scatter"
           }
          ],
          "scatter3d": [
           {
            "line": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatter3d"
           }
          ],
          "scattercarpet": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattercarpet"
           }
          ],
          "scattergeo": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattergeo"
           }
          ],
          "scattergl": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattergl"
           }
          ],
          "scattermapbox": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattermapbox"
           }
          ],
          "scatterpolar": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterpolar"
           }
          ],
          "scatterpolargl": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterpolargl"
           }
          ],
          "scatterternary": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterternary"
           }
          ],
          "surface": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "surface"
           }
          ],
          "table": [
           {
            "cells": {
             "fill": {
              "color": "#EBF0F8"
             },
             "line": {
              "color": "white"
             }
            },
            "header": {
             "fill": {
              "color": "#C8D4E3"
             },
             "line": {
              "color": "white"
             }
            },
            "type": "table"
           }
          ]
         },
         "layout": {
          "annotationdefaults": {
           "arrowcolor": "#2a3f5f",
           "arrowhead": 0,
           "arrowwidth": 1
          },
          "autotypenumbers": "strict",
          "coloraxis": {
           "colorbar": {
            "outlinewidth": 0,
            "ticks": ""
           }
          },
          "colorscale": {
           "diverging": [
            [
             0,
             "#8e0152"
            ],
            [
             0.1,
             "#c51b7d"
            ],
            [
             0.2,
             "#de77ae"
            ],
            [
             0.3,
             "#f1b6da"
            ],
            [
             0.4,
             "#fde0ef"
            ],
            [
             0.5,
             "#f7f7f7"
            ],
            [
             0.6,
             "#e6f5d0"
            ],
            [
             0.7,
             "#b8e186"
            ],
            [
             0.8,
             "#7fbc41"
            ],
            [
             0.9,
             "#4d9221"
            ],
            [
             1,
             "#276419"
            ]
           ],
           "sequential": [
            [
             0,
             "#0d0887"
            ],
            [
             0.1111111111111111,
             "#46039f"
            ],
            [
             0.2222222222222222,
             "#7201a8"
            ],
            [
             0.3333333333333333,
             "#9c179e"
            ],
            [
             0.4444444444444444,
             "#bd3786"
            ],
            [
             0.5555555555555556,
             "#d8576b"
            ],
            [
             0.6666666666666666,
             "#ed7953"
            ],
            [
             0.7777777777777778,
             "#fb9f3a"
            ],
            [
             0.8888888888888888,
             "#fdca26"
            ],
            [
             1,
             "#f0f921"
            ]
           ],
           "sequentialminus": [
            [
             0,
             "#0d0887"
            ],
            [
             0.1111111111111111,
             "#46039f"
            ],
            [
             0.2222222222222222,
             "#7201a8"
            ],
            [
             0.3333333333333333,
             "#9c179e"
            ],
            [
             0.4444444444444444,
             "#bd3786"
            ],
            [
             0.5555555555555556,
             "#d8576b"
            ],
            [
             0.6666666666666666,
             "#ed7953"
            ],
            [
             0.7777777777777778,
             "#fb9f3a"
            ],
            [
             0.8888888888888888,
             "#fdca26"
            ],
            [
             1,
             "#f0f921"
            ]
           ]
          },
          "colorway": [
           "#636efa",
           "#EF553B",
           "#00cc96",
           "#ab63fa",
           "#FFA15A",
           "#19d3f3",
           "#FF6692",
           "#B6E880",
           "#FF97FF",
           "#FECB52"
          ],
          "font": {
           "color": "#2a3f5f"
          },
          "geo": {
           "bgcolor": "white",
           "lakecolor": "white",
           "landcolor": "#E5ECF6",
           "showlakes": true,
           "showland": true,
           "subunitcolor": "white"
          },
          "hoverlabel": {
           "align": "left"
          },
          "hovermode": "closest",
          "mapbox": {
           "style": "light"
          },
          "paper_bgcolor": "white",
          "plot_bgcolor": "#E5ECF6",
          "polar": {
           "angularaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "bgcolor": "#E5ECF6",
           "radialaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           }
          },
          "scene": {
           "xaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           },
           "yaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           },
           "zaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           }
          },
          "shapedefaults": {
           "line": {
            "color": "#2a3f5f"
           }
          },
          "ternary": {
           "aaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "baxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "bgcolor": "#E5ECF6",
           "caxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           }
          },
          "title": {
           "x": 0.05
          },
          "xaxis": {
           "automargin": true,
           "gridcolor": "white",
           "linecolor": "white",
           "ticks": "",
           "title": {
            "standoff": 15
           },
           "zerolinecolor": "white",
           "zerolinewidth": 2
          },
          "yaxis": {
           "automargin": true,
           "gridcolor": "white",
           "linecolor": "white",
           "ticks": "",
           "title": {
            "standoff": 15
           },
           "zerolinecolor": "white",
           "zerolinewidth": 2
          }
         }
        },
        "width": 900,
        "xaxis": {
         "anchor": "y",
         "domain": [
          0,
          1
         ],
         "dtick": 1,
         "minor": {
          "showgrid": false,
          "ticklen": 6,
          "ticks": "inside"
         },
         "showgrid": true,
         "title": {
          "text": "Program length (characters)"
         },
         "type": "log"
        },
        "yaxis": {
         "anchor": "x",
         "domain": [
          0,
          1
         ],
         "range": [
          0,
          1
         ],
         "tickformat": ",.0%",
         "title": {
          "text": "Success rate"
         }
        }
       }
      }
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "num_char_fig = scatter_with_error_bands(num_char_bin_endpoints,\n",
    "                                        data_frame=num_char_df,\n",
    "                                        x=\"Program length (characters)\",\n",
    "                                        y=\"Success rate\",\n",
    "                                        error_y=\"Error\",\n",
    "                                        error_y_mode=\"band\",\n",
    "                                        color=\"Model\")\n",
    "num_char_fig.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.plotly.v1+json": {
       "config": {
        "plotlyServerURL": "https://plot.ly"
       },
       "data": [
        {
         "fill": "toself",
         "fillcolor": "rgba(99,110,250,.3)",
         "hoverinfo": "skip",
         "legendgroup": "GPT-4o",
         "line": {
          "color": "rgba(255,255,255,0)"
         },
         "showlegend": false,
         "type": "scatter",
         "x": [
          0,
          19.10000000000001,
          55.60000000000001,
          85.20000000000002,
          114.70000000000002,
          156.80000000000004,
          207.15000000000006,
          279.6500000000001,
          440.20000000000005,
          3286.4,
          3286.4,
          440.20000000000005,
          279.6500000000001,
          207.15000000000006,
          156.80000000000004,
          114.70000000000002,
          85.20000000000002,
          55.60000000000001,
          19.10000000000001,
          0
         ],
         "xaxis": "x",
         "y": [
          1,
          0.9279192639927866,
          0.7429915616243132,
          0.7575627660054222,
          0.7689009776988759,
          0.7567583517905019,
          0.6454380501894736,
          0.319454060656849,
          0.3867093846016957,
          0.14967023062184562,
          0.07817787064397716,
          0.27995728206497095,
          0.2190074778046894,
          0.5340491292977059,
          0.6534980584659085,
          0.66699645819856,
          0.6570713803360412,
          0.635386816754065,
          0.8809979334594428,
          1
         ],
         "yaxis": "y"
        },
        {
         "hovertemplate": "Model=GPT-4o<br>Hint quantity (characters)=%{x}<br>Success rate=%{y}<extra></extra>",
         "legendgroup": "GPT-4o",
         "line": {
          "color": "#636efa",
          "dash": "solid"
         },
         "marker": {
          "symbol": "circle"
         },
         "mode": "lines",
         "name": "GPT-4o",
         "orientation": "v",
         "showlegend": true,
         "type": "scatter",
         "x": [
          0,
          19.10000000000001,
          55.60000000000001,
          85.20000000000002,
          114.70000000000002,
          156.80000000000004,
          207.15000000000006,
          279.6500000000001,
          440.20000000000005,
          3286.4
         ],
         "xaxis": "x",
         "y": [
          1,
          0.9044585987261147,
          0.6891891891891891,
          0.7073170731707317,
          0.717948717948718,
          0.7051282051282052,
          0.5897435897435898,
          0.2692307692307692,
          0.3333333333333333,
          0.11392405063291139
         ],
         "yaxis": "y"
        },
        {
         "fill": "toself",
         "fillcolor": "rgba(239,85,59,.3)",
         "hoverinfo": "skip",
         "legendgroup": "GPT-4-turbo",
         "line": {
          "color": "rgba(255,255,255,0)"
         },
         "showlegend": false,
         "type": "scatter",
         "x": [
          0,
          19.10000000000001,
          55.60000000000001,
          85.20000000000002,
          114.70000000000002,
          156.80000000000004,
          207.15000000000006,
          279.6500000000001,
          440.20000000000005,
          3286.4,
          3286.4,
          440.20000000000005,
          279.6500000000001,
          207.15000000000006,
          156.80000000000004,
          114.70000000000002,
          85.20000000000002,
          55.60000000000001,
          19.10000000000001,
          0
         ],
         "xaxis": "x",
         "y": [
          1,
          0.8992202964332536,
          0.880686659973886,
          0.7691221857799047,
          0.7689009776988759,
          0.720042717935029,
          0.6328629301810326,
          0.35995124772975057,
          0.35995124772975057,
          0.14967023062184562,
          0.07817787064397716,
          0.25543336765486485,
          0.25543336765486485,
          0.5209832236651212,
          0.6132906153983042,
          0.66699645819856,
          0.6699022044639977,
          0.7949890157017897,
          0.8460026335030522,
          1
         ],
         "yaxis": "y"
        },
        {
         "hovertemplate": "Model=GPT-4-turbo<br>Hint quantity (characters)=%{x}<br>Success rate=%{y}<extra></extra>",
         "legendgroup": "GPT-4-turbo",
         "line": {
          "color": "#EF553B",
          "dash": "solid"
         },
         "marker": {
          "symbol": "circle"
         },
         "mode": "lines",
         "name": "GPT-4-turbo",
         "orientation": "v",
         "showlegend": true,
         "type": "scatter",
         "x": [
          0,
          19.10000000000001,
          55.60000000000001,
          85.20000000000002,
          114.70000000000002,
          156.80000000000004,
          207.15000000000006,
          279.6500000000001,
          440.20000000000005,
          3286.4
         ],
         "xaxis": "x",
         "y": [
          1,
          0.8726114649681529,
          0.8378378378378378,
          0.7195121951219512,
          0.717948717948718,
          0.6666666666666666,
          0.5769230769230769,
          0.3076923076923077,
          0.3076923076923077,
          0.11392405063291139
         ],
         "yaxis": "y"
        },
        {
         "fill": "toself",
         "fillcolor": "rgba(0,204,150,.3)",
         "hoverinfo": "skip",
         "legendgroup": "GPT-3.5-turbo",
         "line": {
          "color": "rgba(255,255,255,0)"
         },
         "showlegend": false,
         "type": "scatter",
         "x": [
          0,
          19.10000000000001,
          55.60000000000001,
          85.20000000000002,
          114.70000000000002,
          156.80000000000004,
          207.15000000000006,
          279.6500000000001,
          440.20000000000005,
          3286.4,
          3286.4,
          440.20000000000005,
          279.6500000000001,
          207.15000000000006,
          156.80000000000004,
          114.70000000000002,
          85.20000000000002,
          55.60000000000001,
          19.10000000000001,
          0
         ],
         "xaxis": "x",
         "y": [
          1,
          0.8462575726413821,
          0.6251582274490197,
          0.48145058594178936,
          0.543774725093116,
          0.5179845681311802,
          0.45284586192730203,
          0.22293941260186273,
          0.22293941260186273,
          0.10575486754254165,
          0.04614386663467355,
          0.13603494637249625,
          0.13603494637249625,
          0.3420259329444928,
          0.4050923549457429,
          0.43058424926585837,
          0.3722079506435765,
          0.5099769076861154,
          0.7843156757662613,
          1
         ],
         "yaxis": "y"
        },
        {
         "hovertemplate": "Model=GPT-3.5-turbo<br>Hint quantity (characters)=%{x}<br>Success rate=%{y}<extra></extra>",
         "legendgroup": "GPT-3.5-turbo",
         "line": {
          "color": "#00cc96",
          "dash": "solid"
         },
         "marker": {
          "symbol": "circle"
         },
         "mode": "lines",
         "name": "GPT-3.5-turbo",
         "orientation": "v",
         "showlegend": true,
         "type": "scatter",
         "x": [
          0,
          19.10000000000001,
          55.60000000000001,
          85.20000000000002,
          114.70000000000002,
          156.80000000000004,
          207.15000000000006,
          279.6500000000001,
          440.20000000000005,
          3286.4
         ],
         "xaxis": "x",
         "y": [
          1,
          0.8152866242038217,
          0.5675675675675675,
          0.4268292682926829,
          0.48717948717948717,
          0.46153846153846156,
          0.3974358974358974,
          0.1794871794871795,
          0.1794871794871795,
          0.0759493670886076
         ],
         "yaxis": "y"
        },
        {
         "fill": "toself",
         "fillcolor": "rgba(171,99,250,.3)",
         "hoverinfo": "skip",
         "legendgroup": "Claude-3-Opus",
         "line": {
          "color": "rgba(255,255,255,0)"
         },
         "showlegend": false,
         "type": "scatter",
         "x": [
          0,
          19.10000000000001,
          55.60000000000001,
          85.20000000000002,
          114.70000000000002,
          156.80000000000004,
          207.15000000000006,
          279.6500000000001,
          440.20000000000005,
          3286.4,
          3286.4,
          440.20000000000005,
          279.6500000000001,
          207.15000000000006,
          156.80000000000004,
          114.70000000000002,
          85.20000000000002,
          55.60000000000001,
          19.10000000000001,
          0
         ],
         "xaxis": "x",
         "y": [
          1,
          0.9335728482920107,
          0.8685616652607926,
          0.7806362998571759,
          0.8169364363804649,
          0.8639650536275038,
          0.7445666323451352,
          0.6454380501894736,
          0.45284586192730203,
          0.1781907484801504,
          0.10029026417807743,
          0.3420259329444928,
          0.5340491292977059,
          0.6400487522702494,
          0.7770605873981372,
          0.7215251020810737,
          0.6827783342891656,
          0.7800869833878561,
          0.8880832026634033,
          1
         ],
         "yaxis": "y"
        },
        {
         "hovertemplate": "Model=Claude-3-Opus<br>Hint quantity (characters)=%{x}<br>Success rate=%{y}<extra></extra>",
         "legendgroup": "Claude-3-Opus",
         "line": {
          "color": "#ab63fa",
          "dash": "solid"
         },
         "marker": {
          "symbol": "circle"
         },
         "mode": "lines",
         "name": "Claude-3-Opus",
         "orientation": "v",
         "showlegend": true,
         "type": "scatter",
         "x": [
          0,
          19.10000000000001,
          55.60000000000001,
          85.20000000000002,
          114.70000000000002,
          156.80000000000004,
          207.15000000000006,
          279.6500000000001,
          440.20000000000005,
          3286.4
         ],
         "xaxis": "x",
         "y": [
          1,
          0.910828025477707,
          0.8243243243243243,
          0.7317073170731707,
          0.7692307692307693,
          0.8205128205128205,
          0.6923076923076923,
          0.5897435897435898,
          0.3974358974358974,
          0.13924050632911392
         ],
         "yaxis": "y"
        },
        {
         "fill": "toself",
         "fillcolor": "rgba(255,161,90,.3)",
         "hoverinfo": "skip",
         "legendgroup": "CodeLlama-7b",
         "line": {
          "color": "rgba(255,255,255,0)"
         },
         "showlegend": false,
         "type": "scatter",
         "x": [
          0,
          19.10000000000001,
          55.60000000000001,
          85.20000000000002,
          114.70000000000002,
          156.80000000000004,
          207.15000000000006,
          279.6500000000001,
          440.20000000000005,
          3286.4,
          3286.4,
          440.20000000000005,
          279.6500000000001,
          207.15000000000006,
          156.80000000000004,
          114.70000000000002,
          85.20000000000002,
          55.60000000000001,
          19.10000000000001,
          0
         ],
         "xaxis": "x",
         "y": [
          1,
          0.7248775384557933,
          0.3218957735108389,
          0.25208431734010356,
          0.22293941260186273,
          0.319454060656849,
          0.22293941260186273,
          0.1515592285817677,
          0.16605919571462135,
          0.10575486754254165,
          0.04614386663467355,
          0.09035106069563502,
          0.07921000218746307,
          0.13603494637249625,
          0.2190074778046894,
          0.13603494637249625,
          0.1625498290013598,
          0.21864476702970165,
          0.6509186398881557,
          1
         ],
         "yaxis": "y"
        },
        {
         "hovertemplate": "Model=CodeLlama-7b<br>Hint quantity (characters)=%{x}<br>Success rate=%{y}<extra></extra>",
         "legendgroup": "CodeLlama-7b",
         "line": {
          "color": "#FFA15A",
          "dash": "solid"
         },
         "marker": {
          "symbol": "circle"
         },
         "mode": "lines",
         "name": "CodeLlama-7b",
         "orientation": "v",
         "showlegend": true,
         "type": "scatter",
         "x": [
          0,
          19.10000000000001,
          55.60000000000001,
          85.20000000000002,
          114.70000000000002,
          156.80000000000004,
          207.15000000000006,
          279.6500000000001,
          440.20000000000005,
          3286.4
         ],
         "xaxis": "x",
         "y": [
          1,
          0.6878980891719745,
          0.2702702702702703,
          0.2073170731707317,
          0.1794871794871795,
          0.2692307692307692,
          0.1794871794871795,
          0.11538461538461539,
          0.1282051282051282,
          0.0759493670886076
         ],
         "yaxis": "y"
        }
       ],
       "layout": {
        "font": {
         "size": 20
        },
        "height": 600,
        "legend": {
         "title": {},
         "tracegroupgap": 0
        },
        "margin": {
         "t": 60
        },
        "shapes": [
         {
          "line": {
           "color": "blue",
           "dash": "dash",
           "width": 1
          },
          "type": "line",
          "x0": 97.40000000000003,
          "x1": 97.40000000000003,
          "xref": "x",
          "y0": 0,
          "y1": 1,
          "yref": "y domain"
         },
         {
          "line": {
           "color": "blue",
           "dash": "dash",
           "width": 1
          },
          "type": "line",
          "x0": 132,
          "x1": 132,
          "xref": "x",
          "y0": 0,
          "y1": 1,
          "yref": "y domain"
         },
         {
          "line": {
           "color": "blue",
           "dash": "dash",
           "width": 1
          },
          "type": "line",
          "x0": 38.20000000000002,
          "x1": 38.20000000000002,
          "xref": "x",
          "y0": 0,
          "y1": 1,
          "yref": "y domain"
         },
         {
          "line": {
           "color": "blue",
           "dash": "dash",
           "width": 1
          },
          "type": "line",
          "x0": 326.60000000000014,
          "x1": 326.60000000000014,
          "xref": "x",
          "y0": 0,
          "y1": 1,
          "yref": "y domain"
         },
         {
          "line": {
           "color": "blue",
           "dash": "dash",
           "width": 1
          },
          "type": "line",
          "x0": 232.70000000000005,
          "x1": 232.70000000000005,
          "xref": "x",
          "y0": 0,
          "y1": 1,
          "yref": "y domain"
         },
         {
          "line": {
           "color": "blue",
           "dash": "dash",
           "width": 1
          },
          "type": "line",
          "x0": 73,
          "x1": 73,
          "xref": "x",
          "y0": 0,
          "y1": 1,
          "yref": "y domain"
         },
         {
          "line": {
           "color": "blue",
           "dash": "dash",
           "width": 1
          },
          "type": "line",
          "x0": 553.8,
          "x1": 553.8,
          "xref": "x",
          "y0": 0,
          "y1": 1,
          "yref": "y domain"
         },
         {
          "line": {
           "color": "blue",
           "dash": "dash",
           "width": 1
          },
          "type": "line",
          "x0": 181.60000000000008,
          "x1": 181.60000000000008,
          "xref": "x",
          "y0": 0,
          "y1": 1,
          "yref": "y domain"
         }
        ],
        "template": {
         "data": {
          "bar": [
           {
            "error_x": {
             "color": "#2a3f5f"
            },
            "error_y": {
             "color": "#2a3f5f"
            },
            "marker": {
             "line": {
              "color": "#E5ECF6",
              "width": 0.5
             },
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "bar"
           }
          ],
          "barpolar": [
           {
            "marker": {
             "line": {
              "color": "#E5ECF6",
              "width": 0.5
             },
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "barpolar"
           }
          ],
          "carpet": [
           {
            "aaxis": {
             "endlinecolor": "#2a3f5f",
             "gridcolor": "white",
             "linecolor": "white",
             "minorgridcolor": "white",
             "startlinecolor": "#2a3f5f"
            },
            "baxis": {
             "endlinecolor": "#2a3f5f",
             "gridcolor": "white",
             "linecolor": "white",
             "minorgridcolor": "white",
             "startlinecolor": "#2a3f5f"
            },
            "type": "carpet"
           }
          ],
          "choropleth": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "choropleth"
           }
          ],
          "contour": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "contour"
           }
          ],
          "contourcarpet": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "contourcarpet"
           }
          ],
          "heatmap": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "heatmap"
           }
          ],
          "heatmapgl": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "heatmapgl"
           }
          ],
          "histogram": [
           {
            "marker": {
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "histogram"
           }
          ],
          "histogram2d": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "histogram2d"
           }
          ],
          "histogram2dcontour": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "histogram2dcontour"
           }
          ],
          "mesh3d": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "mesh3d"
           }
          ],
          "parcoords": [
           {
            "line": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "parcoords"
           }
          ],
          "pie": [
           {
            "automargin": true,
            "type": "pie"
           }
          ],
          "scatter": [
           {
            "fillpattern": {
             "fillmode": "overlay",
             "size": 10,
             "solidity": 0.2
            },
            "type": "scatter"
           }
          ],
          "scatter3d": [
           {
            "line": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatter3d"
           }
          ],
          "scattercarpet": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattercarpet"
           }
          ],
          "scattergeo": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattergeo"
           }
          ],
          "scattergl": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattergl"
           }
          ],
          "scattermapbox": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattermapbox"
           }
          ],
          "scatterpolar": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterpolar"
           }
          ],
          "scatterpolargl": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterpolargl"
           }
          ],
          "scatterternary": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterternary"
           }
          ],
          "surface": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "surface"
           }
          ],
          "table": [
           {
            "cells": {
             "fill": {
              "color": "#EBF0F8"
             },
             "line": {
              "color": "white"
             }
            },
            "header": {
             "fill": {
              "color": "#C8D4E3"
             },
             "line": {
              "color": "white"
             }
            },
            "type": "table"
           }
          ]
         },
         "layout": {
          "annotationdefaults": {
           "arrowcolor": "#2a3f5f",
           "arrowhead": 0,
           "arrowwidth": 1
          },
          "autotypenumbers": "strict",
          "coloraxis": {
           "colorbar": {
            "outlinewidth": 0,
            "ticks": ""
           }
          },
          "colorscale": {
           "diverging": [
            [
             0,
             "#8e0152"
            ],
            [
             0.1,
             "#c51b7d"
            ],
            [
             0.2,
             "#de77ae"
            ],
            [
             0.3,
             "#f1b6da"
            ],
            [
             0.4,
             "#fde0ef"
            ],
            [
             0.5,
             "#f7f7f7"
            ],
            [
             0.6,
             "#e6f5d0"
            ],
            [
             0.7,
             "#b8e186"
            ],
            [
             0.8,
             "#7fbc41"
            ],
            [
             0.9,
             "#4d9221"
            ],
            [
             1,
             "#276419"
            ]
           ],
           "sequential": [
            [
             0,
             "#0d0887"
            ],
            [
             0.1111111111111111,
             "#46039f"
            ],
            [
             0.2222222222222222,
             "#7201a8"
            ],
            [
             0.3333333333333333,
             "#9c179e"
            ],
            [
             0.4444444444444444,
             "#bd3786"
            ],
            [
             0.5555555555555556,
             "#d8576b"
            ],
            [
             0.6666666666666666,
             "#ed7953"
            ],
            [
             0.7777777777777778,
             "#fb9f3a"
            ],
            [
             0.8888888888888888,
             "#fdca26"
            ],
            [
             1,
             "#f0f921"
            ]
           ],
           "sequentialminus": [
            [
             0,
             "#0d0887"
            ],
            [
             0.1111111111111111,
             "#46039f"
            ],
            [
             0.2222222222222222,
             "#7201a8"
            ],
            [
             0.3333333333333333,
             "#9c179e"
            ],
            [
             0.4444444444444444,
             "#bd3786"
            ],
            [
             0.5555555555555556,
             "#d8576b"
            ],
            [
             0.6666666666666666,
             "#ed7953"
            ],
            [
             0.7777777777777778,
             "#fb9f3a"
            ],
            [
             0.8888888888888888,
             "#fdca26"
            ],
            [
             1,
             "#f0f921"
            ]
           ]
          },
          "colorway": [
           "#636efa",
           "#EF553B",
           "#00cc96",
           "#ab63fa",
           "#FFA15A",
           "#19d3f3",
           "#FF6692",
           "#B6E880",
           "#FF97FF",
           "#FECB52"
          ],
          "font": {
           "color": "#2a3f5f"
          },
          "geo": {
           "bgcolor": "white",
           "lakecolor": "white",
           "landcolor": "#E5ECF6",
           "showlakes": true,
           "showland": true,
           "subunitcolor": "white"
          },
          "hoverlabel": {
           "align": "left"
          },
          "hovermode": "closest",
          "mapbox": {
           "style": "light"
          },
          "paper_bgcolor": "white",
          "plot_bgcolor": "#E5ECF6",
          "polar": {
           "angularaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "bgcolor": "#E5ECF6",
           "radialaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           }
          },
          "scene": {
           "xaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           },
           "yaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           },
           "zaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           }
          },
          "shapedefaults": {
           "line": {
            "color": "#2a3f5f"
           }
          },
          "ternary": {
           "aaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "baxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "bgcolor": "#E5ECF6",
           "caxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           }
          },
          "title": {
           "x": 0.05
          },
          "xaxis": {
           "automargin": true,
           "gridcolor": "white",
           "linecolor": "white",
           "ticks": "",
           "title": {
            "standoff": 15
           },
           "zerolinecolor": "white",
           "zerolinewidth": 2
          },
          "yaxis": {
           "automargin": true,
           "gridcolor": "white",
           "linecolor": "white",
           "ticks": "",
           "title": {
            "standoff": 15
           },
           "zerolinecolor": "white",
           "zerolinewidth": 2
          }
         }
        },
        "width": 900,
        "xaxis": {
         "anchor": "y",
         "domain": [
          0,
          1
         ],
         "dtick": 1,
         "minor": {
          "showgrid": false,
          "ticklen": 6,
          "ticks": "inside"
         },
         "showgrid": true,
         "title": {
          "text": "Hint quantity (characters)"
         },
         "type": "log"
        },
        "yaxis": {
         "anchor": "x",
         "domain": [
          0,
          1
         ],
         "range": [
          0,
          1
         ],
         "tickformat": ",.0%",
         "title": {
          "text": "Success rate"
         }
        }
       }
      }
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "num_hint_char_fig = scatter_with_error_bands(num_hint_char_bin_endpoints,\n",
    "                                             data_frame=num_hint_char_df,\n",
    "                                             x=\"Hint quantity (characters)\",\n",
    "                                             y=\"Success rate\",\n",
    "                                             error_y=\"Error\",\n",
    "                                             error_y_mode=\"band\",\n",
    "                                             color=\"Model\")\n",
    "num_hint_char_fig.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "stats",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
