{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "32aa1868-cfe2-4492-a564-9a79ffa04fff",
   "metadata": {},
   "outputs": [],
   "source": [
    "import sympy as sp\n",
    "import re\n",
    "import pickle\n",
    "import networkx as nx\n",
    "import pandas as pd\n",
    "from tqdm import tqdm"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6359d7f4-0b3a-4e78-bd43-18b14de14a52",
   "metadata": {},
   "source": [
    "# HP"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "id": "3667f1d1-88f5-4790-93dc-d3ca14088bb3",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"data/logs/tot_test_split/hp_3/mistralai/Mistral-Small-24B-Instruct-2501/1_1_1_1_1_3_10_3_20_5/0.dat\", \"rb\") as f:\n",
    "    logs = pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "id": "62311530-02d5-4a49-9a70-7687853fbf08",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5649"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "G = logs[-2][\"graph\"]\n",
    "\n",
    "# verify nodes and edges to our best abilities\n",
    "edges = [(e[0], G.nodes[e[0]], e[1], G.nodes[e[1]], e[2], e[3]) for e in G.edges(data=True,keys=True)]\n",
    "edges = pd.DataFrame(edges, columns=[\"parent\",\"parent_data\",\"child\",\"child_data\",\"move\",\"edge_data\"])\n",
    "len(edges)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "id": "b572ffe2-23bd-48bd-9472-8a442bda17b6",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "import re\n",
    "from collections import Counter\n",
    "from functools import lru_cache\n",
    "\n",
    "TOKEN_REGEX = re.compile(r'[^\\s]+')\n",
    "\n",
    "@lru_cache(maxsize=None)\n",
    "def extract_numbers_from_eq(s):\n",
    "    tokens = TOKEN_REGEX.findall(s.replace(\"(\", \" \").replace(\")\", \" \").strip())\n",
    "    numbers = []\n",
    "    for token in tokens:\n",
    "        try:\n",
    "            expr = sp.nsimplify(token)\n",
    "            for node in sp.preorder_traversal(expr):\n",
    "                if isinstance(node, sp.Number):\n",
    "                    numbers.append(node)\n",
    "        except Exception:\n",
    "            continue  # Skip tokens that can't be parsed\n",
    "    return numbers\n",
    "\n",
    "@lru_cache(maxsize=None)\n",
    "def nsimplify_cache(x):\n",
    "    return sp.nsimplify(x)\n",
    "\n",
    "unwanted = ',\\\\[]()'\n",
    "table = str.maketrans('', '', unwanted)\n",
    "\n",
    "def parse_row(row, debug=False):\n",
    "    try:\n",
    "        pn = [nsimplify_cache(x) for x in row.parent.translate(table).split()]\n",
    "        eq = row.move\n",
    "        split_eq = [x.strip() for x in eq.split(\"=\")]\n",
    "        if len(split_eq) == 1:\n",
    "            # no \"=\" found, need to recompute\n",
    "            left = split_eq[0]\n",
    "            right = str(nsimplify_cache(left))\n",
    "        elif len(split_eq) == 2:\n",
    "            left, right = split_eq\n",
    "        else:\n",
    "            return False, \"Failed to parse EQ\"\n",
    "\n",
    "        used_numbers = extract_numbers_from_eq(left)\n",
    "        try:\n",
    "            eq_result_true = nsimplify_cache(left)\n",
    "            eq_result_pred = nsimplify_cache(right)\n",
    "        except (sp.SympifyError, TypeError):\n",
    "            return False, \"Failed to parse EQ\"\n",
    "    \n",
    "        if eq_result_pred != eq_result_true:\n",
    "            # model made a math mistake in the eq...\n",
    "            return False, \"math error\"\n",
    "        right_number = nsimplify_cache(right)\n",
    "    \n",
    "        pn_pred = pn.copy()\n",
    "        try:\n",
    "            for e in used_numbers:\n",
    "                pn_pred.remove(e)\n",
    "        except ValueError as e:\n",
    "            # some of the used number werent in the parent\n",
    "            return False, \"selection error\"\n",
    "\n",
    "        # try to get the child node label\n",
    "        if row.child_data.get(\"is_error\",False):\n",
    "            child_label = row.child_data[\"label\"]\n",
    "        else:\n",
    "            child_label = row.child\n",
    "        cn = [sp.nsimplify(x) for x in child_label.translate(table).split()]\n",
    "\n",
    "        # two cases: if the model deemed the move incorrect, the child will have no label\n",
    "        #    however, if we got to this point, the selection and math errors did not fire, so its actually \n",
    "        #    a correct edge, which was thought incorrect (false negative)\n",
    "        # second case: move deemed correct, but wasnt correct (false positive)\n",
    "        if row.edge_data.get(\"is_error\", False):\n",
    "            pass\n",
    "        else:\n",
    "            if Counter(pn_pred + [right_number]) != Counter(cn):\n",
    "                # the new node isnt what it was supposed to be..\n",
    "                return False, \"leftovers error\"\n",
    "    except (sp.SympifyError, AttributeError):\n",
    "        return False, \"Failed to parse EQ\"\n",
    "    except Exception as e:\n",
    "        print(row)\n",
    "        raise\n",
    "    return True, None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "id": "c1b2ac03-8cc4-4724-8b87-16641a1ce128",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 5649/5649 [01:06<00:00, 84.85it/s] \n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>parent</th>\n",
       "      <th>parent_data</th>\n",
       "      <th>child</th>\n",
       "      <th>child_data</th>\n",
       "      <th>move</th>\n",
       "      <th>edge_data</th>\n",
       "      <th>errors</th>\n",
       "      <th>deemed_error</th>\n",
       "      <th>is_error</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>293</th>\n",
       "      <td>1 2 8 13</td>\n",
       "      <td>{'is_root': True, 'expedience': 1, 'verified':...</td>\n",
       "      <td>1 2 1.625</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'expedie...</td>\n",
       "      <td>13 / 8 = 1.625```</td>\n",
       "      <td>{'label': '13 / 8 = 1.625```', 'is_shortcut': ...</td>\n",
       "      <td>Failed to parse EQ</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>311</th>\n",
       "      <td>1 2 8 13</td>\n",
       "      <td>{'is_root': True, 'expedience': 1, 'verified':...</td>\n",
       "      <td>8 3 13</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'expedie...</td>\n",
       "      <td>```1 + 2 = 3</td>\n",
       "      <td>{'label': '```1 + 2 = 3', 'is_shortcut': False}</td>\n",
       "      <td>Failed to parse EQ</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546</th>\n",
       "      <td>7 9 9 13</td>\n",
       "      <td>{'is_root': True, 'expedience': 1, 'verified':...</td>\n",
       "      <td>9 1 13</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'expedie...</td>\n",
       "      <td>9 / 7 = 1.2857142857142858</td>\n",
       "      <td>{'label': '9 / 7 = 1.2857142857142858', 'is_sh...</td>\n",
       "      <td>leftovers error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>727</th>\n",
       "      <td>6 11 12 13</td>\n",
       "      <td>{'is_root': True, 'expedience': 1, 'verified':...</td>\n",
       "      <td>1 6 13</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'expedie...</td>\n",
       "      <td>12 / 11 = 1.0909090909090908</td>\n",
       "      <td>{'label': '12 / 11 = 1.0909090909090908', 'is_...</td>\n",
       "      <td>leftovers error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1324</th>\n",
       "      <td>3 4 6 6</td>\n",
       "      <td>{'is_root': True, 'expedience': 1, 'verified':...</td>\n",
       "      <td>4 6 3</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'expedie...</td>\n",
       "      <td>6-3=3</td>\n",
       "      <td>{'label': '6-3=3', 'is_shortcut': True}</td>\n",
       "      <td>leftovers error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2608</th>\n",
       "      <td>6 4 8</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'expedie...</td>\n",
       "      <td>4 6</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>8-4=4</td>\n",
       "      <td>{'label': '8-4=4', 'is_shortcut': True}</td>\n",
       "      <td>leftovers error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2894</th>\n",
       "      <td>7 13 24</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>24 1.85714285714</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'expedie...</td>\n",
       "      <td>13 / 7 = 1.85714285714 (Note: For exact divisi...</td>\n",
       "      <td>{'label': '13 / 7 = 1.85714285714 (Note: For e...</td>\n",
       "      <td>Failed to parse EQ</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2958</th>\n",
       "      <td>6 12 130</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>12 22</td>\n",
       "      <td>{'verified': True, 'num_expanded': 2, 'expedie...</td>\n",
       "      <td>130 / 6 = 21.666666666666668</td>\n",
       "      <td>{'label': '130 / 6 = 21.666666666666668', 'is_...</td>\n",
       "      <td>leftovers error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3039</th>\n",
       "      <td>10 13 18</td>\n",
       "      <td>{'verified': True, 'num_expanded': 2, 'expedie...</td>\n",
       "      <td>1 18</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>13 / 10 = 1.3</td>\n",
       "      <td>{'label': '13 / 10 = 1.3', 'is_shortcut': False}</td>\n",
       "      <td>leftovers error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3252</th>\n",
       "      <td>11 59</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>5</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>59 / 11 = 5.363636363636364</td>\n",
       "      <td>{'label': '59 / 11 = 5.363636363636364', 'is_s...</td>\n",
       "      <td>leftovers error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3383</th>\n",
       "      <td>12 3.25</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>3.69230769231</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>12 / 3.25 = 3.69230769231</td>\n",
       "      <td>{'label': '12 / 3.25 = 3.69230769231', 'is_sho...</td>\n",
       "      <td>math error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3421</th>\n",
       "      <td>6 130</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>21.6666666667</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>130 / 6 = 21.6666666667</td>\n",
       "      <td>{'label': '130 / 6 = 21.6666666667', 'is_short...</td>\n",
       "      <td>math error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3519</th>\n",
       "      <td>13 180</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>0.07222222222222223</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>13 / 180 ≈ 0.07222222222222223</td>\n",
       "      <td>{'label': '13 / 180 ≈ 0.07222222222222223', 'i...</td>\n",
       "      <td>Failed to parse EQ</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3600</th>\n",
       "      <td>10 1.5833333333333333</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>6.316666666666667</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>10 / 1.5833333333333333 = 6.316666666666667</td>\n",
       "      <td>{'label': '10 / 1.5833333333333333 = 6.3166666...</td>\n",
       "      <td>math error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3639</th>\n",
       "      <td>12 1.2307692307692308</td>\n",
       "      <td>{'verified': True, 'num_expanded': 2, 'expedie...</td>\n",
       "      <td>9.746793687869236</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>12 / 1.2307692307692308 = 9.746793687869236</td>\n",
       "      <td>{'label': '12 / 1.2307692307692308 = 9.7467936...</td>\n",
       "      <td>math error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3677</th>\n",
       "      <td>16 1.0833333333333333</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>14.777777777777779</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>16 / 1.0833333333333333 = 14.777777777777779</td>\n",
       "      <td>{'label': '16 / 1.0833333333333333 = 14.777777...</td>\n",
       "      <td>math error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3751</th>\n",
       "      <td>23 72</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>3</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>72 / 23 = 3.1304347826086957</td>\n",
       "      <td>{'label': '72 / 23 = 3.1304347826086957', 'is_...</td>\n",
       "      <td>leftovers error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3761</th>\n",
       "      <td>6 35</td>\n",
       "      <td>{'verified': True, 'num_expanded': 2, 'expedie...</td>\n",
       "      <td>5</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>35 / 6 = 5.833333333333333</td>\n",
       "      <td>{'label': '35 / 6 = 5.833333333333333', 'is_sh...</td>\n",
       "      <td>leftovers error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3807</th>\n",
       "      <td>13 0.2727272727272727</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>47.67129629629629</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>13 / 0.2727272727272727 = 47.67129629629629</td>\n",
       "      <td>{'label': '13 / 0.2727272727272727 = 47.671296...</td>\n",
       "      <td>math error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3829</th>\n",
       "      <td>6 11.8</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>1.96666666667</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>11.8 / 6 = 1.96666666667</td>\n",
       "      <td>{'label': '11.8 / 6 = 1.96666666667', 'is_shor...</td>\n",
       "      <td>math error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3846</th>\n",
       "      <td>6 10.833333333333334</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>0.5540540540540541</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>6 / 10.833333333333334 = 0.5540540540540541</td>\n",
       "      <td>{'label': '6 / 10.833333333333334 = 0.55405405...</td>\n",
       "      <td>math error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4269</th>\n",
       "      <td>12 -10</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>2</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>12 - 10 = 2</td>\n",
       "      <td>{'label': '12 - 10 = 2', 'is_shortcut': False}</td>\n",
       "      <td>selection error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4270</th>\n",
       "      <td>12 -10</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>-22</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>-12 - 10 = -22</td>\n",
       "      <td>{'label': '-12 - 10 = -22', 'is_shortcut': False}</td>\n",
       "      <td>selection error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4665</th>\n",
       "      <td>19 0.23076923076923078</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>82.35294117647059</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>19 / 0.23076923076923078 = 82.35294117647059</td>\n",
       "      <td>{'label': '19 / 0.23076923076923078 = 82.35294...</td>\n",
       "      <td>math error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4669</th>\n",
       "      <td>19 0.23076923076923078</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>0.012145736842105263</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>0.23076923076923078 / 19 = 0.012145736842105263</td>\n",
       "      <td>{'label': '0.23076923076923078 / 19 = 0.012145...</td>\n",
       "      <td>math error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4759</th>\n",
       "      <td>0.9 16</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>17.78</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>16 / 0.9 ≈ 17.78</td>\n",
       "      <td>{'label': '16 / 0.9 ≈ 17.78', 'is_shortcut': F...</td>\n",
       "      <td>Failed to parse EQ</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4868</th>\n",
       "      <td>13 0.6923076923076923</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>18.77919727840826</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>13 / 0.6923076923076923 = 18.77919727840826</td>\n",
       "      <td>{'label': '13 / 0.6923076923076923 = 18.779197...</td>\n",
       "      <td>math error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4885</th>\n",
       "      <td>10 39</td>\n",
       "      <td>{'verified': True, 'num_expanded': 2, 'expedie...</td>\n",
       "      <td>Invalid step</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>39 / 10 = 3.9</td>\n",
       "      <td>{'label': '39 / 10 = 3.9', 'is_shortcut': False}</td>\n",
       "      <td>leftovers error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5053</th>\n",
       "      <td>12 0.21666666666666667</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>55.405405405405406</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>12 / 0.21666666666666667 = 55.405405405405406</td>\n",
       "      <td>{'label': '12 / 0.21666666666666667 = 55.40540...</td>\n",
       "      <td>math error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5158</th>\n",
       "      <td>0.6 25</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>41.6667</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>25 / 0.6 ≈ 41.6667</td>\n",
       "      <td>{'label': '25 / 0.6 ≈ 41.6667', 'is_shortcut':...</td>\n",
       "      <td>Failed to parse EQ</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5207</th>\n",
       "      <td>10 12.461538461538461</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>0.8024096385542169</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>10 / 12.461538461538461 = 0.8024096385542169</td>\n",
       "      <td>{'label': '10 / 12.461538461538461 = 0.8024096...</td>\n",
       "      <td>math error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5214</th>\n",
       "      <td>12 -9.538461538461538</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>-114.46153846153847</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>-9.538461538461538 * 12 = -114.46153846153847</td>\n",
       "      <td>{'label': '-9.538461538461538 * 12 = -114.4615...</td>\n",
       "      <td>math error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5215</th>\n",
       "      <td>12 -9.538461538461538</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>-114.46153846153847</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>12 * (-9.538461538461538) = -114.46153846153847</td>\n",
       "      <td>{'label': '12 * (-9.538461538461538) = -114.46...</td>\n",
       "      <td>math error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5220</th>\n",
       "      <td>12 -9.538461538461538</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>-1.2589285714285712</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>12 / (-9.538461538461538) = -1.2589285714285712</td>\n",
       "      <td>{'label': '12 / (-9.538461538461538) = -1.2589...</td>\n",
       "      <td>math error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5228</th>\n",
       "      <td>0.46153846153846156 22</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>0.021</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>0.46153846153846156 / 22 = 0.021</td>\n",
       "      <td>{'label': '0.46153846153846156 / 22 = 0.021', ...</td>\n",
       "      <td>math error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5356</th>\n",
       "      <td>6 -146</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>-0.04109589041</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>6 / (-146) = -0.04109589041</td>\n",
       "      <td>{'label': '6 / (-146) = -0.04109589041', 'is_s...</td>\n",
       "      <td>math error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5357</th>\n",
       "      <td>6 -146</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>-0.04109589041</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>6 / -146 = -0.04109589041</td>\n",
       "      <td>{'label': '6 / -146 = -0.04109589041', 'is_sho...</td>\n",
       "      <td>math error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5468</th>\n",
       "      <td>9 1.8571428571428572</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>4.84573107621699</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>9 / 1.8571428571428572 = 4.84573107621699</td>\n",
       "      <td>{'label': '9 / 1.8571428571428572 = 4.84573107...</td>\n",
       "      <td>math error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5490</th>\n",
       "      <td>13 1.66666666667</td>\n",
       "      <td>{'verified': True, 'num_expanded': 2, 'expedie...</td>\n",
       "      <td>21.66666666667</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>1.66666666667 * 13 = 21.66666666667</td>\n",
       "      <td>{'label': '1.66666666667 * 13 = 21.66666666667...</td>\n",
       "      <td>math error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5491</th>\n",
       "      <td>13 1.66666666667</td>\n",
       "      <td>{'verified': True, 'num_expanded': 2, 'expedie...</td>\n",
       "      <td>21.66666666667</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>13 * 1.66666666667 = 21.66666666667</td>\n",
       "      <td>{'label': '13 * 1.66666666667 = 21.66666666667...</td>\n",
       "      <td>math error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5495</th>\n",
       "      <td>13 1.66666666667</td>\n",
       "      <td>{'verified': True, 'num_expanded': 2, 'expedie...</td>\n",
       "      <td>0.1282051282051282</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>1.66666666667 / 13 = 0.1282051282051282</td>\n",
       "      <td>{'label': '1.66666666667 / 13 = 0.128205128205...</td>\n",
       "      <td>math error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5496</th>\n",
       "      <td>13 1.66666666667</td>\n",
       "      <td>{'verified': True, 'num_expanded': 2, 'expedie...</td>\n",
       "      <td>7.8</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>13 / 1.66666666667 = 7.8</td>\n",
       "      <td>{'label': '13 / 1.66666666667 = 7.8', 'is_shor...</td>\n",
       "      <td>math error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5507</th>\n",
       "      <td>1.8 13</td>\n",
       "      <td>{'verified': True, 'num_expanded': 2, 'expedie...</td>\n",
       "      <td>7.222222222222222</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>13 / 1.8 ≈ 7.222222222222222</td>\n",
       "      <td>{'label': '13 / 1.8 ≈ 7.222222222222222', 'is_...</td>\n",
       "      <td>Failed to parse EQ</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5541</th>\n",
       "      <td>6 1.6923076923076923</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>10.15384515384574</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>1.6923076923076923 * 6 = 10.15384515384574</td>\n",
       "      <td>{'label': '1.6923076923076923 * 6 = 10.1538451...</td>\n",
       "      <td>math error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5543</th>\n",
       "      <td>6 1.6923076923076923</td>\n",
       "      <td>{'verified': True, 'num_expanded': 1, 'expedie...</td>\n",
       "      <td>3.548387096774194</td>\n",
       "      <td>{'verified': True, 'num_expanded': 0, 'is_fini...</td>\n",
       "      <td>6 / 1.6923076923076923 = 3.548387096774194</td>\n",
       "      <td>{'label': '6 / 1.6923076923076923 = 3.54838709...</td>\n",
       "      <td>math error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      parent  \\\n",
       "293                 1 2 8 13   \n",
       "311                 1 2 8 13   \n",
       "546                 7 9 9 13   \n",
       "727               6 11 12 13   \n",
       "1324                 3 4 6 6   \n",
       "2608                   6 4 8   \n",
       "2894                 7 13 24   \n",
       "2958                6 12 130   \n",
       "3039                10 13 18   \n",
       "3252                   11 59   \n",
       "3383                 12 3.25   \n",
       "3421                   6 130   \n",
       "3519                  13 180   \n",
       "3600   10 1.5833333333333333   \n",
       "3639   12 1.2307692307692308   \n",
       "3677   16 1.0833333333333333   \n",
       "3751                   23 72   \n",
       "3761                    6 35   \n",
       "3807   13 0.2727272727272727   \n",
       "3829                  6 11.8   \n",
       "3846    6 10.833333333333334   \n",
       "4269                  12 -10   \n",
       "4270                  12 -10   \n",
       "4665  19 0.23076923076923078   \n",
       "4669  19 0.23076923076923078   \n",
       "4759                  0.9 16   \n",
       "4868   13 0.6923076923076923   \n",
       "4885                   10 39   \n",
       "5053  12 0.21666666666666667   \n",
       "5158                  0.6 25   \n",
       "5207   10 12.461538461538461   \n",
       "5214   12 -9.538461538461538   \n",
       "5215   12 -9.538461538461538   \n",
       "5220   12 -9.538461538461538   \n",
       "5228  0.46153846153846156 22   \n",
       "5356                  6 -146   \n",
       "5357                  6 -146   \n",
       "5468    9 1.8571428571428572   \n",
       "5490        13 1.66666666667   \n",
       "5491        13 1.66666666667   \n",
       "5495        13 1.66666666667   \n",
       "5496        13 1.66666666667   \n",
       "5507                  1.8 13   \n",
       "5541    6 1.6923076923076923   \n",
       "5543    6 1.6923076923076923   \n",
       "\n",
       "                                            parent_data                 child  \\\n",
       "293   {'is_root': True, 'expedience': 1, 'verified':...             1 2 1.625   \n",
       "311   {'is_root': True, 'expedience': 1, 'verified':...                8 3 13   \n",
       "546   {'is_root': True, 'expedience': 1, 'verified':...                9 1 13   \n",
       "727   {'is_root': True, 'expedience': 1, 'verified':...                1 6 13   \n",
       "1324  {'is_root': True, 'expedience': 1, 'verified':...                 4 6 3   \n",
       "2608  {'verified': True, 'num_expanded': 0, 'expedie...                   4 6   \n",
       "2894  {'verified': True, 'num_expanded': 1, 'expedie...      24 1.85714285714   \n",
       "2958  {'verified': True, 'num_expanded': 1, 'expedie...                 12 22   \n",
       "3039  {'verified': True, 'num_expanded': 2, 'expedie...                  1 18   \n",
       "3252  {'verified': True, 'num_expanded': 1, 'expedie...                     5   \n",
       "3383  {'verified': True, 'num_expanded': 1, 'expedie...         3.69230769231   \n",
       "3421  {'verified': True, 'num_expanded': 1, 'expedie...         21.6666666667   \n",
       "3519  {'verified': True, 'num_expanded': 1, 'expedie...   0.07222222222222223   \n",
       "3600  {'verified': True, 'num_expanded': 1, 'expedie...     6.316666666666667   \n",
       "3639  {'verified': True, 'num_expanded': 2, 'expedie...     9.746793687869236   \n",
       "3677  {'verified': True, 'num_expanded': 1, 'expedie...    14.777777777777779   \n",
       "3751  {'verified': True, 'num_expanded': 1, 'expedie...                     3   \n",
       "3761  {'verified': True, 'num_expanded': 2, 'expedie...                     5   \n",
       "3807  {'verified': True, 'num_expanded': 1, 'expedie...     47.67129629629629   \n",
       "3829  {'verified': True, 'num_expanded': 1, 'expedie...         1.96666666667   \n",
       "3846  {'verified': True, 'num_expanded': 1, 'expedie...    0.5540540540540541   \n",
       "4269  {'verified': True, 'num_expanded': 1, 'expedie...                     2   \n",
       "4270  {'verified': True, 'num_expanded': 1, 'expedie...                   -22   \n",
       "4665  {'verified': True, 'num_expanded': 1, 'expedie...     82.35294117647059   \n",
       "4669  {'verified': True, 'num_expanded': 1, 'expedie...  0.012145736842105263   \n",
       "4759  {'verified': True, 'num_expanded': 1, 'expedie...                 17.78   \n",
       "4868  {'verified': True, 'num_expanded': 1, 'expedie...     18.77919727840826   \n",
       "4885  {'verified': True, 'num_expanded': 2, 'expedie...          Invalid step   \n",
       "5053  {'verified': True, 'num_expanded': 1, 'expedie...    55.405405405405406   \n",
       "5158  {'verified': True, 'num_expanded': 1, 'expedie...               41.6667   \n",
       "5207  {'verified': True, 'num_expanded': 1, 'expedie...    0.8024096385542169   \n",
       "5214  {'verified': True, 'num_expanded': 1, 'expedie...   -114.46153846153847   \n",
       "5215  {'verified': True, 'num_expanded': 1, 'expedie...   -114.46153846153847   \n",
       "5220  {'verified': True, 'num_expanded': 1, 'expedie...   -1.2589285714285712   \n",
       "5228  {'verified': True, 'num_expanded': 1, 'expedie...                 0.021   \n",
       "5356  {'verified': True, 'num_expanded': 1, 'expedie...        -0.04109589041   \n",
       "5357  {'verified': True, 'num_expanded': 1, 'expedie...        -0.04109589041   \n",
       "5468  {'verified': True, 'num_expanded': 1, 'expedie...      4.84573107621699   \n",
       "5490  {'verified': True, 'num_expanded': 2, 'expedie...        21.66666666667   \n",
       "5491  {'verified': True, 'num_expanded': 2, 'expedie...        21.66666666667   \n",
       "5495  {'verified': True, 'num_expanded': 2, 'expedie...    0.1282051282051282   \n",
       "5496  {'verified': True, 'num_expanded': 2, 'expedie...                   7.8   \n",
       "5507  {'verified': True, 'num_expanded': 2, 'expedie...     7.222222222222222   \n",
       "5541  {'verified': True, 'num_expanded': 1, 'expedie...     10.15384515384574   \n",
       "5543  {'verified': True, 'num_expanded': 1, 'expedie...     3.548387096774194   \n",
       "\n",
       "                                             child_data  \\\n",
       "293   {'verified': True, 'num_expanded': 0, 'expedie...   \n",
       "311   {'verified': True, 'num_expanded': 0, 'expedie...   \n",
       "546   {'verified': True, 'num_expanded': 0, 'expedie...   \n",
       "727   {'verified': True, 'num_expanded': 0, 'expedie...   \n",
       "1324  {'verified': True, 'num_expanded': 0, 'expedie...   \n",
       "2608  {'verified': True, 'num_expanded': 1, 'expedie...   \n",
       "2894  {'verified': True, 'num_expanded': 0, 'expedie...   \n",
       "2958  {'verified': True, 'num_expanded': 2, 'expedie...   \n",
       "3039  {'verified': True, 'num_expanded': 1, 'expedie...   \n",
       "3252  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "3383  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "3421  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "3519  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "3600  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "3639  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "3677  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "3751  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "3761  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "3807  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "3829  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "3846  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "4269  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "4270  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "4665  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "4669  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "4759  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "4868  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "4885  {'verified': True, 'num_expanded': 1, 'expedie...   \n",
       "5053  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "5158  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "5207  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "5214  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "5215  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "5220  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "5228  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "5356  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "5357  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "5468  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "5490  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "5491  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "5495  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "5496  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "5507  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "5541  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "5543  {'verified': True, 'num_expanded': 0, 'is_fini...   \n",
       "\n",
       "                                                   move  \\\n",
       "293                                   13 / 8 = 1.625```   \n",
       "311                                        ```1 + 2 = 3   \n",
       "546                          9 / 7 = 1.2857142857142858   \n",
       "727                        12 / 11 = 1.0909090909090908   \n",
       "1324                                              6-3=3   \n",
       "2608                                              8-4=4   \n",
       "2894  13 / 7 = 1.85714285714 (Note: For exact divisi...   \n",
       "2958                       130 / 6 = 21.666666666666668   \n",
       "3039                                      13 / 10 = 1.3   \n",
       "3252                        59 / 11 = 5.363636363636364   \n",
       "3383                          12 / 3.25 = 3.69230769231   \n",
       "3421                            130 / 6 = 21.6666666667   \n",
       "3519                     13 / 180 ≈ 0.07222222222222223   \n",
       "3600        10 / 1.5833333333333333 = 6.316666666666667   \n",
       "3639        12 / 1.2307692307692308 = 9.746793687869236   \n",
       "3677       16 / 1.0833333333333333 = 14.777777777777779   \n",
       "3751                       72 / 23 = 3.1304347826086957   \n",
       "3761                         35 / 6 = 5.833333333333333   \n",
       "3807        13 / 0.2727272727272727 = 47.67129629629629   \n",
       "3829                           11.8 / 6 = 1.96666666667   \n",
       "3846        6 / 10.833333333333334 = 0.5540540540540541   \n",
       "4269                                        12 - 10 = 2   \n",
       "4270                                     -12 - 10 = -22   \n",
       "4665       19 / 0.23076923076923078 = 82.35294117647059   \n",
       "4669    0.23076923076923078 / 19 = 0.012145736842105263   \n",
       "4759                                   16 / 0.9 ≈ 17.78   \n",
       "4868        13 / 0.6923076923076923 = 18.77919727840826   \n",
       "4885                                      39 / 10 = 3.9   \n",
       "5053      12 / 0.21666666666666667 = 55.405405405405406   \n",
       "5158                                 25 / 0.6 ≈ 41.6667   \n",
       "5207       10 / 12.461538461538461 = 0.8024096385542169   \n",
       "5214      -9.538461538461538 * 12 = -114.46153846153847   \n",
       "5215    12 * (-9.538461538461538) = -114.46153846153847   \n",
       "5220    12 / (-9.538461538461538) = -1.2589285714285712   \n",
       "5228                   0.46153846153846156 / 22 = 0.021   \n",
       "5356                        6 / (-146) = -0.04109589041   \n",
       "5357                          6 / -146 = -0.04109589041   \n",
       "5468          9 / 1.8571428571428572 = 4.84573107621699   \n",
       "5490                1.66666666667 * 13 = 21.66666666667   \n",
       "5491                13 * 1.66666666667 = 21.66666666667   \n",
       "5495            1.66666666667 / 13 = 0.1282051282051282   \n",
       "5496                           13 / 1.66666666667 = 7.8   \n",
       "5507                       13 / 1.8 ≈ 7.222222222222222   \n",
       "5541         1.6923076923076923 * 6 = 10.15384515384574   \n",
       "5543         6 / 1.6923076923076923 = 3.548387096774194   \n",
       "\n",
       "                                              edge_data              errors  \\\n",
       "293   {'label': '13 / 8 = 1.625```', 'is_shortcut': ...  Failed to parse EQ   \n",
       "311     {'label': '```1 + 2 = 3', 'is_shortcut': False}  Failed to parse EQ   \n",
       "546   {'label': '9 / 7 = 1.2857142857142858', 'is_sh...     leftovers error   \n",
       "727   {'label': '12 / 11 = 1.0909090909090908', 'is_...     leftovers error   \n",
       "1324            {'label': '6-3=3', 'is_shortcut': True}     leftovers error   \n",
       "2608            {'label': '8-4=4', 'is_shortcut': True}     leftovers error   \n",
       "2894  {'label': '13 / 7 = 1.85714285714 (Note: For e...  Failed to parse EQ   \n",
       "2958  {'label': '130 / 6 = 21.666666666666668', 'is_...     leftovers error   \n",
       "3039   {'label': '13 / 10 = 1.3', 'is_shortcut': False}     leftovers error   \n",
       "3252  {'label': '59 / 11 = 5.363636363636364', 'is_s...     leftovers error   \n",
       "3383  {'label': '12 / 3.25 = 3.69230769231', 'is_sho...          math error   \n",
       "3421  {'label': '130 / 6 = 21.6666666667', 'is_short...          math error   \n",
       "3519  {'label': '13 / 180 ≈ 0.07222222222222223', 'i...  Failed to parse EQ   \n",
       "3600  {'label': '10 / 1.5833333333333333 = 6.3166666...          math error   \n",
       "3639  {'label': '12 / 1.2307692307692308 = 9.7467936...          math error   \n",
       "3677  {'label': '16 / 1.0833333333333333 = 14.777777...          math error   \n",
       "3751  {'label': '72 / 23 = 3.1304347826086957', 'is_...     leftovers error   \n",
       "3761  {'label': '35 / 6 = 5.833333333333333', 'is_sh...     leftovers error   \n",
       "3807  {'label': '13 / 0.2727272727272727 = 47.671296...          math error   \n",
       "3829  {'label': '11.8 / 6 = 1.96666666667', 'is_shor...          math error   \n",
       "3846  {'label': '6 / 10.833333333333334 = 0.55405405...          math error   \n",
       "4269     {'label': '12 - 10 = 2', 'is_shortcut': False}     selection error   \n",
       "4270  {'label': '-12 - 10 = -22', 'is_shortcut': False}     selection error   \n",
       "4665  {'label': '19 / 0.23076923076923078 = 82.35294...          math error   \n",
       "4669  {'label': '0.23076923076923078 / 19 = 0.012145...          math error   \n",
       "4759  {'label': '16 / 0.9 ≈ 17.78', 'is_shortcut': F...  Failed to parse EQ   \n",
       "4868  {'label': '13 / 0.6923076923076923 = 18.779197...          math error   \n",
       "4885   {'label': '39 / 10 = 3.9', 'is_shortcut': False}     leftovers error   \n",
       "5053  {'label': '12 / 0.21666666666666667 = 55.40540...          math error   \n",
       "5158  {'label': '25 / 0.6 ≈ 41.6667', 'is_shortcut':...  Failed to parse EQ   \n",
       "5207  {'label': '10 / 12.461538461538461 = 0.8024096...          math error   \n",
       "5214  {'label': '-9.538461538461538 * 12 = -114.4615...          math error   \n",
       "5215  {'label': '12 * (-9.538461538461538) = -114.46...          math error   \n",
       "5220  {'label': '12 / (-9.538461538461538) = -1.2589...          math error   \n",
       "5228  {'label': '0.46153846153846156 / 22 = 0.021', ...          math error   \n",
       "5356  {'label': '6 / (-146) = -0.04109589041', 'is_s...          math error   \n",
       "5357  {'label': '6 / -146 = -0.04109589041', 'is_sho...          math error   \n",
       "5468  {'label': '9 / 1.8571428571428572 = 4.84573107...          math error   \n",
       "5490  {'label': '1.66666666667 * 13 = 21.66666666667...          math error   \n",
       "5491  {'label': '13 * 1.66666666667 = 21.66666666667...          math error   \n",
       "5495  {'label': '1.66666666667 / 13 = 0.128205128205...          math error   \n",
       "5496  {'label': '13 / 1.66666666667 = 7.8', 'is_shor...          math error   \n",
       "5507  {'label': '13 / 1.8 ≈ 7.222222222222222', 'is_...  Failed to parse EQ   \n",
       "5541  {'label': '1.6923076923076923 * 6 = 10.1538451...          math error   \n",
       "5543  {'label': '6 / 1.6923076923076923 = 3.54838709...          math error   \n",
       "\n",
       "      deemed_error  is_error  \n",
       "293          False      True  \n",
       "311          False      True  \n",
       "546          False      True  \n",
       "727          False      True  \n",
       "1324         False      True  \n",
       "2608         False      True  \n",
       "2894         False      True  \n",
       "2958         False      True  \n",
       "3039         False      True  \n",
       "3252         False      True  \n",
       "3383         False      True  \n",
       "3421         False      True  \n",
       "3519         False      True  \n",
       "3600         False      True  \n",
       "3639         False      True  \n",
       "3677         False      True  \n",
       "3751         False      True  \n",
       "3761         False      True  \n",
       "3807         False      True  \n",
       "3829         False      True  \n",
       "3846         False      True  \n",
       "4269         False      True  \n",
       "4270         False      True  \n",
       "4665         False      True  \n",
       "4669         False      True  \n",
       "4759         False      True  \n",
       "4868         False      True  \n",
       "4885         False      True  \n",
       "5053         False      True  \n",
       "5158         False      True  \n",
       "5207         False      True  \n",
       "5214         False      True  \n",
       "5215         False      True  \n",
       "5220         False      True  \n",
       "5228         False      True  \n",
       "5356         False      True  \n",
       "5357         False      True  \n",
       "5468         False      True  \n",
       "5490         False      True  \n",
       "5491         False      True  \n",
       "5495         False      True  \n",
       "5496         False      True  \n",
       "5507         False      True  \n",
       "5541         False      True  \n",
       "5543         False      True  "
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "errors = []\n",
    "for _, row in tqdm(edges.iterrows(), total=len(edges)):\n",
    "    errors.append(parse_row(row))\n",
    "\n",
    "edges[\"errors\"] = [e[1] for e in errors]\n",
    "\n",
    "edges[\"deemed_error\"] = edges.apply(lambda row: row.parent_data.get(\"is_error\", False) or row.child_data.get(\"is_error\", False), axis=1)\n",
    "edges[\"is_error\"] = ~edges.errors.isna()\n",
    "\n",
    "wrong_decisions = edges[edges.deemed_error != edges.is_error]\n",
    "wrong_decisions[wrong_decisions.is_error]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "id": "fb2f28fa-bb84-43e6-af85-f0671c572d82",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>errors</th>\n",
       "      <th>deemed_error</th>\n",
       "      <th>is_error</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>454</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>math error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>leftovers error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Failed to parse EQ</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>selection error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               errors  deemed_error  is_error  count\n",
       "0                 NaN          True     False    454\n",
       "1          math error         False      True     26\n",
       "2     leftovers error         False      True     10\n",
       "3  Failed to parse EQ         False      True      7\n",
       "4     selection error         False      True      2"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# mistral\n",
    "error_counts = wrong_decisions[[\"errors\", \"deemed_error\",\"is_error\"]].value_counts(dropna=False).to_frame().reset_index()\n",
    "error_counts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "id": "473d50e1-a047-49c1-a0ef-5088a5d81e0d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>errors</th>\n",
       "      <th>deemed_error</th>\n",
       "      <th>is_error</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Failed to parse EQ</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>244</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>math error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>selection error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               errors  deemed_error  is_error  count\n",
       "0  Failed to parse EQ         False      True    244\n",
       "1          math error         False      True    100\n",
       "2                 NaN          True     False     70\n",
       "3     selection error         False      True      1"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# phi\n",
    "error_counts = wrong_decisions[[\"errors\", \"deemed_error\",\"is_error\"]].value_counts(dropna=False).to_frame().reset_index()\n",
    "error_counts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "id": "881e86e1-c9ac-427d-b02d-6173d3d68ead",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>errors</th>\n",
       "      <th>deemed_error</th>\n",
       "      <th>is_error</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>145</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>math error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>120</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>selection error</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            errors  deemed_error  is_error  count\n",
       "0              NaN          True     False    145\n",
       "1       math error         False      True    120\n",
       "2  selection error         False      True      4"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# llama\n",
    "error_counts = wrong_decisions[[\"errors\", \"deemed_error\",\"is_error\"]].value_counts(dropna=False).to_frame().reset_index()\n",
    "error_counts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ebf8a1eb-ea1f-4551-8111-244b503b9218",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "hp_env2",
   "language": "python",
   "name": "hp_env2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
