{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "b6bc6c6e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "import json\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "ba803e3a",
   "metadata": {},
   "outputs": [],
   "source": [
    "file_path = f\"response_evaluation/Math/ConsistencyTest/resultsEvaluations_evaluatedbyo3-2025-04-16.jsonl\"\n",
    "with open(file_path, 'r') as file:\n",
    "    data = pd.DataFrame([json.loads(line) for line in file])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7a009085",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([135.,   3.,   4.,   1.,   1., 106.]),\n",
       " array([0.        , 0.83333333, 1.66666667, 2.5       , 3.33333333,\n",
       "        4.16666667, 5.        ]),\n",
       " <BarContainer object of 6 artists>)"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAGeCAYAAAC+dvpwAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAIABJREFUeJzt3X1wVPX9t/H3msCS0GQloeyyYyKxTa2agBJsSnwgFIiTAspQiwql2DItDg8aAyIptY2MZhVbiGMGWhyHRJk0/lFBWp8ItSYyqW0STMXUQRkjRM2aatPdJMQNhnP/Ydn7twaV6An7zXq9Zs6M53vOnnyy45BrTnazDsuyLAEAABjknGgPAAAA8EkECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA48dEe4Is4efKk3n33XSUlJcnhcER7HAAAcAYsy1J3d7e8Xq/OOedz7pFYQ1RXV2fNmzfPmjhxoiXJ2r1796ee+/Of/9ySZG3dujVi/cMPP7RWr15tpaamWomJidb8+fOt9vb2M56hvb3dksTGxsbGxsY2Arcz+Zk/5Dsovb29mjJlin7yk5/oBz/4waeet2fPHv3973+X1+sddKyoqEh/+tOfVFNTo9TUVK1du1bz5s1Tc3Oz4uLiPneGpKQkSVJ7e7uSk5OH+i0AAIAoCAaDSktLC/8c/yxDDpTCwkIVFhZ+5jnvvPOOVq9ereeee05z586NOBYIBPTII4/oscce0+zZsyVJu3btUlpamvbv369rrrnmc2c49Wud5ORkAgUAgBHmTF6eYfuLZE+ePKmlS5fqjjvu0CWXXDLoeHNzs06cOKGCgoLwmtfrVVZWlhoaGk57zVAopGAwGLEBAIDYZXug3H///YqPj9ett9562uN+v1+jR4/WuHHjItbdbrf8fv9pH+Pz+eRyucJbWlqa3WMDAACD2Boozc3NevDBB1VZWTnkd9dYlvWpjykpKVEgEAhv7e3tdowLAAAMZWugvPjii+rs7FR6erri4+MVHx+vo0ePau3atZo0aZIkyePxqL+/X11dXRGP7ezslNvtPu11nU5n+PUmvO4EAIDYZ2ugLF26VK+88opaWlrCm9fr1R133KHnnntOkpSTk6NRo0aptrY2/LiOjg69+uqrysvLs3McAAAwQg35XTw9PT06cuRIeL+trU0tLS1KSUlRenq6UlNTI84fNWqUPB6PLrzwQkmSy+XS8uXLtXbtWqWmpiolJUXr1q1TdnZ2+F09AADgq23IgdLU1KSZM2eG94uLiyVJy5YtU2Vl5RldY+vWrYqPj9eiRYvU19enWbNmqbKy8oz+BgoAAIh9DsuyrGgPMVTBYFAul0uBQIDXowAAMEIM5ec3HxYIAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMM+e+gfBVM2vBUtEcYMd66b260RwAAxCDuoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4Qw6U+vp6zZ8/X16vVw6HQ3v27AkfO3HihO68805lZ2dr7Nix8nq9+vGPf6x333034hqhUEhr1qzR+PHjNXbsWF177bV6++23v/Q3AwAAYsOQA6W3t1dTpkxRRUXFoGPHjx/XwYMHddddd+ngwYN64okn9Prrr+vaa6+NOK+oqEi7d+9WTU2NDhw4oJ6eHs2bN08DAwNf/DsBAAAxI36oDygsLFRhYeFpj7lcLtXW1kasPfTQQ/rOd76jY8eOKT09XYFAQI888ogee+wxzZ49W5K0a9cupaWlaf/+/brmmmu+wLcBAABiybC/BiUQCMjhcOjcc8+VJDU3N+vEiRMqKCgIn+P1epWVlaWGhobTXiMUCikYDEZsAAAgdg1roHz44YfasGGDFi9erOTkZEmS3+/X6NGjNW7cuIhz3W63/H7/aa/j8/nkcrnCW1pa2nCODQAAomzYAuXEiRO68cYbdfLkSW3btu1zz7csSw6H47THSkpKFAgEwlt7e7vd4wIAAIMMS6CcOHFCixYtUltbm2pra8N3TyTJ4/Gov79fXV1dEY/p7OyU2+0+7fWcTqeSk5MjNgAAELtsD5RTcfLGG29o//79Sk1NjTiek5OjUaNGRbyYtqOjQ6+++qry8vLsHgcAAIxAQ34XT09Pj44cORLeb2trU0tLi1JSUuT1enX99dfr4MGD+vOf/6yBgYHw60pSUlI0evRouVwuLV++XGvXrlVqaqpSUlK0bt06ZWdnh9/VAwAAvtqGHChNTU2aOXNmeL+4uFiStGzZMpWWlmrv3r2SpEsvvTTicX/961+Vn58vSdq6davi4+O1aNEi9fX1adasWaqsrFRcXNwX/DYAAEAscViWZUV7iKEKBoNyuVwKBALD8nqUSRuesv2aseqt++ZGewQAwAgxlJ/ffBYPAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIwTH+0BAAA4WyZteCraI4wYb903N6pfnzsoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMM6QA6W+vl7z58+X1+uVw+HQnj17Io5blqXS0lJ5vV4lJCQoPz9fra2tEeeEQiGtWbNG48eP19ixY3Xttdfq7bff/lLfCAAAiB1DDpTe3l5NmTJFFRUVpz2+efNmbdmyRRUVFWpsbJTH49GcOXPU3d0dPqeoqEi7d+9WTU2NDhw4oJ6eHs2bN08DAwNf/DsBAAAxY8gfFlhYWKjCwsLTHrMsS+Xl5dq4caMWLlwoSaqqqpLb7VZ1dbVWrFihQCCgRx55RI899phmz54tSdq1a5fS0tK0f/9+XXPNNV/i2wEAALHA1tegtLW1ye/3q6CgILzmdDo1Y8YMNTQ0SJKam5t14sSJiHO8Xq+ysrLC5wAAgK+2Id9B+Sx+v1+S5Ha7I9bdbreOHj0aPmf06NEaN27coHNOPf6TQqGQQqFQeD8YDNo5NgAAMMywvIvH4XBE7FuWNWjtkz7rHJ/PJ5fLFd7S0tJsmxUAAJjH1kDxeDySNOhOSGdnZ/iuisfjUX9/v7q6uj71nE8qKSlRIBAIb+3t7XaODQAADGNroGRkZMjj8ai2tja81t/fr7q6OuXl5UmScnJyNGrUqIhzOjo69Oqrr4bP+SSn06nk5OSIDQAAxK4hvwalp6dHR44cCe+3tbWppaVFKSkpSk9PV1FRkcrKypSZmanMzEyVlZUpMTFRixcvliS5XC4tX75ca9euVWpqqlJSUrRu3TplZ2eH39UDAAC+2oYcKE1NTZo5c2Z4v7i4WJK0bNkyVVZWav369err69PKlSvV1dWl3Nxc7du3T0lJSeHHbN26VfHx8Vq0aJH6+vo0a9YsVVZWKi4uzoZvCQAAjHQOy7KsaA8xVMFgUC6XS4FAYFh+3TNpw1O2XzNWvXXf3GiPAABnjH/fz9xw/Ps+lJ/ffBYPAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADCO7YHy0Ucf6Ze//KUyMjKUkJCgCy64QJs2bdLJkyfD51iWpdLSUnm9XiUkJCg/P1+tra12jwIAAEYo2wPl/vvv1+9+9ztVVFTotdde0+bNm/XAAw/ooYceCp+zefNmbdmyRRUVFWpsbJTH49GcOXPU3d1t9zgAAGAEsj1Q/va3v+m6667T3LlzNWnSJF1//fUqKChQU1OTpI/vnpSXl2vjxo1auHChsrKyVFVVpePHj6u6utrucQAAwAhke6BceeWV+stf/qLXX39dkvTPf/5TBw4c0Pe//31JUltbm/x+vwoKCsKPcTqdmjFjhhoaGuweBwAAjEDxdl/wzjvvVCAQ0Le//W3FxcVpYGBA9957r2666SZJkt/vlyS53e6Ix7ndbh09evS01wyFQgqFQuH9YDBo99gAAMAgtt9Befzxx7Vr1y5VV1fr4MGDqqqq0m9+8xtVVVVFnOdwOCL2LcsatHaKz+eTy+UKb2lpaXaPDQAADGJ7oNxxxx3asGGDbrzxRmVnZ2vp0qW6/fbb5fP5JEkej0fS/7+TckpnZ+eguyqnlJSUKBAIhLf29na7xwYAAAaxPVCOHz+uc86JvGxcXFz4bcYZGRnyeDyqra0NH+/v71ddXZ3y8vJOe02n06nk5OSIDQAAxC7bX4Myf/583XvvvUpPT9cll1yil19+WVu2bNFPf/pTSR//aqeoqEhlZWXKzMxUZmamysrKlJiYqMWLF9s9DgAAGIFsD5SHHnpId911l1auXKnOzk55vV6tWLFCv/rVr8LnrF+/Xn19fVq5cqW6urqUm5urffv2KSkpye5xAADACOSwLMuK9hBDFQwG5XK5FAgEhuXXPZM2PGX7NWPVW/fNjfYIAHDG+Pf9zA3Hv+9D+fnNZ/EAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDjDEijvvPOOfvSjHyk1NVWJiYm69NJL1dzcHD5uWZZKS0vl9XqVkJCg/Px8tba2DscoAABgBLI9ULq6unTFFVdo1KhReuaZZ/Svf/1Lv/3tb3XuueeGz9m8ebO2bNmiiooKNTY2yuPxaM6cOeru7rZ7HAAAMALF233B+++/X2lpadq5c2d4bdKkSeH/tixL5eXl2rhxoxYuXChJqqqqktvtVnV1tVasWGH3SAAAYISx/Q7K3r17NW3aNP3whz/UhAkTdNlll+nhhx8OH29ra5Pf71dBQUF4zel0asaMGWpoaDjtNUOhkILBYMQGAABil+2B8uabb2r79u3KzMzUc889p1tuuUW33nqrHn30UUmS3++XJLnd7ojHud3u8LFP8vl8crlc4S0tLc3usQEAgEFsD5STJ09q6tSpKisr02WXXaYVK1boZz/7mbZv3x5xnsPhiNi3LGvQ2iklJSUKBALhrb293e6xAQCAQWwPlIkTJ+riiy+OWLvooot07NgxSZLH45GkQXdLOjs7B91VOcXpdCo5OTliAwAAscv2QLniiit0+PDhiLXXX39d559/viQpIyNDHo9HtbW14eP9/f2qq6tTXl6e3eMAAIARyPZ38dx+++3Ky8tTWVmZFi1apH/84x/asWOHduzYIenjX+0UFRWprKxMmZmZyszMVFlZmRITE7V48WK7xwEAACOQ7YFy+eWXa/fu3SopKdGmTZuUkZGh8vJyLVmyJHzO+vXr1dfXp5UrV6qrq0u5ubnat2+fkpKS7B4HAACMQLYHiiTNmzdP8+bN+9TjDodDpaWlKi0tHY4vDwAARjg+iwcAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGGfYA8Xn88nhcKioqCi8ZlmWSktL5fV6lZCQoPz8fLW2tg73KAAAYIQY1kBpbGzUjh07NHny5Ij1zZs3a8uWLaqoqFBjY6M8Ho/mzJmj7u7u4RwHAACMEMMWKD09PVqyZIkefvhhjRs3LrxuWZbKy8u1ceNGLVy4UFlZWaqqqtLx48dVXV09XOMAAIARZNgCZdWqVZo7d65mz54dsd7W1ia/36+CgoLwmtPp1IwZM9TQ0HDaa4VCIQWDwYgNAADErvjhuGhNTY0OHjyoxsbGQcf8fr8kye12R6y73W4dPXr0tNfz+Xy6++677R8UAAAYyfY7KO3t7brtttu0a9cujRkz5lPPczgcEfuWZQ1aO6WkpESBQCC8tbe32zozAAAwi+13UJqbm9XZ2amcnJzw2sDAgOrr61VRUaHDhw9L+vhOysSJE8PndHZ2DrqrcorT6ZTT6bR7VAAAYCjb76DMmjVLhw4dUktLS3ibNm2alixZopaWFl1wwQXyeDyqra0NP6a/v191dXXKy8uzexwAADAC2X4HJSkpSVlZWRFrY8eOVWpqani9qKhIZWVlyszMVGZmpsrKypSYmKjFixfbPQ4AABiBhuVFsp9n/fr16uvr08qVK9XV1aXc3Fzt27dPSUlJ0RgHAAAY5qwEygsvvBCx73A4VFpaqtLS0rPx5QEAwAjDZ/EAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOPYHig+n0+XX365kpKSNGHCBC1YsECHDx+OOMeyLJWWlsrr9SohIUH5+flqbW21exQAADBC2R4odXV1WrVqlV566SXV1tbqo48+UkFBgXp7e8PnbN68WVu2bFFFRYUaGxvl8Xg0Z84cdXd32z0OAAAYgeLtvuCzzz4bsb9z505NmDBBzc3Nuvrqq2VZlsrLy7Vx40YtXLhQklRVVSW3263q6mqtWLHC7pEAAMAIM+yvQQkEApKklJQUSVJbW5v8fr8KCgrC5zidTs2YMUMNDQ2nvUYoFFIwGIzYAABA7BrWQLEsS8XFxbryyiuVlZUlSfL7/ZIkt9sdca7b7Q4f+ySfzyeXyxXe0tLShnNsAAAQZcMaKKtXr9Yrr7yiP/zhD4OOORyOiH3LsgatnVJSUqJAIBDe2tvbh2VeAABgBttfg3LKmjVrtHfvXtXX1+u8884Lr3s8Hkkf30mZOHFieL2zs3PQXZVTnE6nnE7ncI0KAAAMY/sdFMuytHr1aj3xxBN6/vnnlZGREXE8IyNDHo9HtbW14bX+/n7V1dUpLy/P7nEAAMAIZPsdlFWrVqm6ulpPPvmkkpKSwq8rcblcSkhIkMPhUFFRkcrKypSZmanMzEyVlZUpMTFRixcvtnscAAAwAtkeKNu3b5ck5efnR6zv3LlTN998syRp/fr16uvr08qVK9XV1aXc3Fzt27dPSUlJdo8DAABGINsDxbKszz3H4XCotLRUpaWldn95AAAQA/gsHgAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMaJj/YAwFfFpA1PRXuEEeOt++ZGewQAUcYdFAAAYBzuoOBL4a4AAGA4cAcFAAAYh0ABAADGIVAAAIBxohoo27ZtU0ZGhsaMGaOcnBy9+OKL0RwHAAAYImqB8vjjj6uoqEgbN27Uyy+/rKuuukqFhYU6duxYtEYCAACGcFiWZUXjC+fm5mrq1Knavn17eO2iiy7SggUL5PP5PvOxwWBQLpdLgUBAycnJts/GO1MAAF91w/H3iIby8zsqbzPu7+9Xc3OzNmzYELFeUFCghoaGQeeHQiGFQqHwfiAQkPTxNzocToaOD8t1AQAYKYbjZ+ypa57JvZGoBMr777+vgYEBud3uiHW32y2/3z/ofJ/Pp7vvvnvQelpa2rDNCADAV5mrfPiu3d3dLZfL9ZnnRPUPtTkcjoh9y7IGrUlSSUmJiouLw/snT57Uf/7zH6Wmpp72/C8jGAwqLS1N7e3tw/LrI3yM5/ns4Hk+O3iezx6e67NjuJ5ny7LU3d0tr9f7uedGJVDGjx+vuLi4QXdLOjs7B91VkSSn0ymn0xmxdu655w7niEpOTuZ//rOA5/ns4Hk+O3iezx6e67NjOJ7nz7tzckpU3sUzevRo5eTkqLa2NmK9trZWeXl50RgJAAAYJGq/4ikuLtbSpUs1bdo0TZ8+XTt27NCxY8d0yy23RGskAABgiKgFyg033KAPPvhAmzZtUkdHh7KysvT000/r/PPPj9ZIkj7+ddKvf/3rQb9Sgr14ns8Onuezg+f57OG5PjtMeJ6j9ndQAAAAPg2fxQMAAIxDoAAAAOMQKAAAwDgECgAAMA6B8n9s27ZNGRkZGjNmjHJycvTiiy9Ge6SYU19fr/nz58vr9crhcGjPnj3RHikm+Xw+XX755UpKStKECRO0YMECHT58ONpjxZzt27dr8uTJ4T9mNX36dD3zzDPRHivm+Xw+ORwOFRUVRXuUmFJaWiqHwxGxeTyeqM1DoPzP448/rqKiIm3cuFEvv/yyrrrqKhUWFurYsWPRHi2m9Pb2asqUKaqoqIj2KDGtrq5Oq1at0ksvvaTa2lp99NFHKigoUG9vb7RHiynnnXee7rvvPjU1NampqUnf+973dN1116m1tTXao8WsxsZG7dixQ5MnT472KDHpkksuUUdHR3g7dOhQ1Gbhbcb/k5ubq6lTp2r79u3htYsuukgLFiyQz+eL4mSxy+FwaPfu3VqwYEG0R4l5//73vzVhwgTV1dXp6quvjvY4MS0lJUUPPPCAli9fHu1RYk5PT4+mTp2qbdu26Z577tGll16q8vLyaI8VM0pLS7Vnzx61tLREexRJ3EGRJPX396u5uVkFBQUR6wUFBWpoaIjSVIB9AoGApI9/eGJ4DAwMqKamRr29vZo+fXq0x4lJq1at0ty5czV79uxojxKz3njjDXm9XmVkZOjGG2/Um2++GbVZovppxqZ4//33NTAwMOiDCt1u96APNARGGsuyVFxcrCuvvFJZWVnRHifmHDp0SNOnT9eHH36or33ta9q9e7cuvvjiaI8Vc2pqanTw4EE1NjZGe5SYlZubq0cffVTf+ta39N577+mee+5RXl6eWltblZqaetbnIVD+D4fDEbFvWdagNWCkWb16tV555RUdOHAg2qPEpAsvvFAtLS3673//qz/+8Y9atmyZ6urqiBQbtbe367bbbtO+ffs0ZsyYaI8TswoLC8P/nZ2drenTp+sb3/iGqqqqVFxcfNbnIVAkjR8/XnFxcYPulnR2dg66qwKMJGvWrNHevXtVX1+v8847L9rjxKTRo0frm9/8piRp2rRpamxs1IMPPqjf//73UZ4sdjQ3N6uzs1M5OTnhtYGBAdXX16uiokKhUEhxcXFRnDA2jR07VtnZ2XrjjTei8vV5DYo+/gcmJydHtbW1Eeu1tbXKy8uL0lTAF2dZllavXq0nnnhCzz//vDIyMqI90leGZVkKhULRHiOmzJo1S4cOHVJLS0t4mzZtmpYsWaKWlhbiZJiEQiG99tprmjhxYlS+PndQ/qe4uFhLly7VtGnTNH36dO3YsUPHjh3TLbfcEu3RYkpPT4+OHDkS3m9ra1NLS4tSUlKUnp4excliy6pVq1RdXa0nn3xSSUlJ4buDLpdLCQkJUZ4udvziF79QYWGh0tLS1N3drZqaGr3wwgt69tlnoz1aTElKShr0+qmxY8cqNTWV11XZaN26dZo/f77S09PV2dmpe+65R8FgUMuWLYvKPATK/9xwww364IMPtGnTJnV0dCgrK0tPP/20zj///GiPFlOampo0c+bM8P6p32suW7ZMlZWVUZoq9px6u3x+fn7E+s6dO3XzzTef/YFi1HvvvaelS5eqo6NDLpdLkydP1rPPPqs5c+ZEezRgyN5++23ddNNNev/99/X1r39d3/3ud/XSSy9F7ecgfwcFAAAYh9egAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjPP/AObC+vPiP6KiAAAAAElFTkSuQmCC",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "c, h, m = [], [], []\n",
    "\n",
    "for index, row in data.iterrows():\n",
    "    # if index >= 50:\n",
    "    #     break\n",
    "    correctness = row['correctness']\n",
    "    humanbruteforce = row['human_bruteforce']\n",
    "    modelbruteforce = row['model_bruteforce']\n",
    "    correctness = np.array(correctness, dtype=int)\n",
    "    humanbruteforce = np.array(humanbruteforce, dtype=int)\n",
    "    modelbruteforce = np.array(modelbruteforce, dtype=int)\n",
    "\n",
    "    c.append(np.sum(correctness))\n",
    "    h.append(np.sum(humanbruteforce))\n",
    "    m.append(np.sum(modelbruteforce))\n",
    "\n",
    "plt.hist(c, bins=6) # 14 / 1250\n",
    "# plt.hist(h, bins = [0, 1, 2, 3, 4, 5, 6]) # 42 / 1250\n",
    "# plt.hist(m, bins = [0, 1, 2, 3, 4, 5, 6]) # 60 / 1250"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "d60e3d42",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>Question</th>\n",
       "      <th>Hint</th>\n",
       "      <th>Human Solution</th>\n",
       "      <th>Model</th>\n",
       "      <th>PromptType</th>\n",
       "      <th>Response</th>\n",
       "      <th>Status</th>\n",
       "      <th>Correct</th>\n",
       "      <th>correctness</th>\n",
       "      <th>model_bruteforce</th>\n",
       "      <th>human_bruteforce</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>Question: There are six villages along the coa...</td>\n",
       "      <td>To find the number of routes, start from the s...</td>\n",
       "      <td>The wedding dates are April 6th, May 12th, and...</td>\n",
       "      <td>Qwen70</td>\n",
       "      <td>combinedhintPrompt</td>\n",
       "      <td>&lt;think&gt;\\nOkay, so I have this problem about si...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[0, 0, 0, 0, 0]</td>\n",
       "      <td>[0, 0, 0, 0, 0]</td>\n",
       "      <td>[0, 0, 0, 0, 0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>Question: What is the smallest integer such th...</td>\n",
       "      <td>The number has 16 digits.  I repeat, the numbe...</td>\n",
       "      <td>1,176,470,588,235,294\\nx 1.5 =\\n1,764,705,882,...</td>\n",
       "      <td>Qwen70</td>\n",
       "      <td>combinedhintPrompt</td>\n",
       "      <td>&lt;think&gt;\\nOkay, so I have this problem where I ...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[1, 1, 1, 1, 1]</td>\n",
       "      <td>[0, 0, 0, 0, 0]</td>\n",
       "      <td>[0, 0, 0, 0, 0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>Question: If you use a certain formula on 13, ...</td>\n",
       "      <td>The binary representation of each number may b...</td>\n",
       "      <td>19. Convert the number to binary, add one for ...</td>\n",
       "      <td>Qwen70</td>\n",
       "      <td>combinedhintPrompt</td>\n",
       "      <td>&lt;think&gt;\\nOkay, so I've got this problem here w...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[0, 0, 0, 0, 0]</td>\n",
       "      <td>[0, 0, 0, 0, 1]</td>\n",
       "      <td>[0, 0, 0, 0, 0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>Question: Two 2s can be combined in many ways ...</td>\n",
       "      <td>These operations or symbols are used, in some ...</td>\n",
       "      <td>The square root of point 2 to the power of min...</td>\n",
       "      <td>Qwen70</td>\n",
       "      <td>combinedhintPrompt</td>\n",
       "      <td>&lt;think&gt;\\nOkay, so I have this problem where I ...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[0, 0, 0, 0, 0]</td>\n",
       "      <td>[1, 1, 1, 1, 1]</td>\n",
       "      <td>[0, 0, 0, 0, 0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>Question: What characteristic do these three 1...</td>\n",
       "      <td>316713\\n334401\\n656635</td>\n",
       "      <td>They are all square numbers:\\n100307124369 = 3...</td>\n",
       "      <td>Qwen70</td>\n",
       "      <td>combinedhintPrompt</td>\n",
       "      <td>&lt;think&gt;\\nOkay, so I have this problem where I ...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[0, 0, 0, 0, 0]</td>\n",
       "      <td>[0, 0, 0, 0, 0]</td>\n",
       "      <td>[0, 0, 0, 0, 0]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   ID                                           Question  \\\n",
       "0   0  Question: There are six villages along the coa...   \n",
       "1   1  Question: What is the smallest integer such th...   \n",
       "2   2  Question: If you use a certain formula on 13, ...   \n",
       "3   3  Question: Two 2s can be combined in many ways ...   \n",
       "4   4  Question: What characteristic do these three 1...   \n",
       "\n",
       "                                                Hint  \\\n",
       "0  To find the number of routes, start from the s...   \n",
       "1  The number has 16 digits.  I repeat, the numbe...   \n",
       "2  The binary representation of each number may b...   \n",
       "3  These operations or symbols are used, in some ...   \n",
       "4                             316713\\n334401\\n656635   \n",
       "\n",
       "                                      Human Solution   Model  \\\n",
       "0  The wedding dates are April 6th, May 12th, and...  Qwen70   \n",
       "1  1,176,470,588,235,294\\nx 1.5 =\\n1,764,705,882,...  Qwen70   \n",
       "2  19. Convert the number to binary, add one for ...  Qwen70   \n",
       "3  The square root of point 2 to the power of min...  Qwen70   \n",
       "4  They are all square numbers:\\n100307124369 = 3...  Qwen70   \n",
       "\n",
       "           PromptType                                           Response  \\\n",
       "0  combinedhintPrompt  <think>\\nOkay, so I have this problem about si...   \n",
       "1  combinedhintPrompt  <think>\\nOkay, so I have this problem where I ...   \n",
       "2  combinedhintPrompt  <think>\\nOkay, so I've got this problem here w...   \n",
       "3  combinedhintPrompt  <think>\\nOkay, so I have this problem where I ...   \n",
       "4  combinedhintPrompt  <think>\\nOkay, so I have this problem where I ...   \n",
       "\n",
       "   Status  Correct      correctness model_bruteforce human_bruteforce  \n",
       "0    True      NaN  [0, 0, 0, 0, 0]  [0, 0, 0, 0, 0]  [0, 0, 0, 0, 0]  \n",
       "1    True      NaN  [1, 1, 1, 1, 1]  [0, 0, 0, 0, 0]  [0, 0, 0, 0, 0]  \n",
       "2    True      NaN  [0, 0, 0, 0, 0]  [0, 0, 0, 0, 1]  [0, 0, 0, 0, 0]  \n",
       "3    True      NaN  [0, 0, 0, 0, 0]  [1, 1, 1, 1, 1]  [0, 0, 0, 0, 0]  \n",
       "4    True      NaN  [0, 0, 0, 0, 0]  [0, 0, 0, 0, 0]  [0, 0, 0, 0, 0]  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "464c53fd",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Brainteasers",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
