{
  "cells": [
    {
      "cell_type": "code",
      "source": [
        "!pip install ollama"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "yeZP-deD7t1M",
        "outputId": "562e88c8-e7f5-423b-880a-e342b017d281"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Collecting ollama\n",
            "  Downloading ollama-0.4.7-py3-none-any.whl.metadata (4.7 kB)\n",
            "Requirement already satisfied: httpx<0.29,>=0.27 in /usr/local/lib/python3.11/dist-packages (from ollama) (0.28.1)\n",
            "Requirement already satisfied: pydantic<3.0.0,>=2.9.0 in /usr/local/lib/python3.11/dist-packages (from ollama) (2.10.6)\n",
            "Requirement already satisfied: anyio in /usr/local/lib/python3.11/dist-packages (from httpx<0.29,>=0.27->ollama) (3.7.1)\n",
            "Requirement already satisfied: certifi in /usr/local/lib/python3.11/dist-packages (from httpx<0.29,>=0.27->ollama) (2025.1.31)\n",
            "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/dist-packages (from httpx<0.29,>=0.27->ollama) (1.0.7)\n",
            "Requirement already satisfied: idna in /usr/local/lib/python3.11/dist-packages (from httpx<0.29,>=0.27->ollama) (3.10)\n",
            "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.11/dist-packages (from httpcore==1.*->httpx<0.29,>=0.27->ollama) (0.14.0)\n",
            "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=2.9.0->ollama) (0.7.0)\n",
            "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=2.9.0->ollama) (2.27.2)\n",
            "Requirement already satisfied: typing-extensions>=4.12.2 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=2.9.0->ollama) (4.12.2)\n",
            "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.11/dist-packages (from anyio->httpx<0.29,>=0.27->ollama) (1.3.1)\n",
            "Downloading ollama-0.4.7-py3-none-any.whl (13 kB)\n",
            "Installing collected packages: ollama\n",
            "Successfully installed ollama-0.4.7\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "Td402h5K7VMR"
      },
      "outputs": [],
      "source": [
        "from ollama import chat\n",
        "from ollama import ChatResponse\n",
        "import pandas as pd\n",
        "import numpy as np\n",
        "import seaborn\n",
        "import plotly.express as px\n",
        "from tqdm.auto import tqdm\n",
        "from collections import defaultdict\n",
        "import os\n",
        "import json\n",
        "from statsmodels.stats.contingency_tables import mcnemar"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "NCuddUFY7VMU"
      },
      "outputs": [],
      "source": [
        "strategies = [\"System1\", \"System2\"]\n",
        "alignment_techniques = [\"dpo\", \"simpo\"]\n",
        "benchmarks = [\n",
        "    \"addsub\",\n",
        "    \"aqua\",\n",
        "    \"common\",\n",
        "    \"gsm\",\n",
        "    \"strategy\",\n",
        "    \"single\",\n",
        "    \"coin\",\n",
        "    \"multi\",\n",
        "    \"svamp\",\n",
        "    \"letter\",\n",
        "]\n",
        "\n",
        "type_of_questions = {\n",
        "    \"common\": \"Common Sense\",\n",
        "    \"strategy\": \"Common Sense\",\n",
        "    \"coin\": \"Symbolic\",\n",
        "    \"letter\": \"Symbolic\",\n",
        "    # \"object\": \"other\",\n",
        "    # all others, Arithmetic\n",
        "    \"addsub\": \"Arithmetic\",\n",
        "    \"gsm\": \"Arithmetic\",\n",
        "    \"single\": \"Arithmetic\",\n",
        "    \"aqua\": \"Arithmetic\",\n",
        "    \"svamp\": \"Arithmetic\",\n",
        "    \"multi\": \"Arithmetic\",\n",
        "}\n",
        "\n",
        "number_of_new_lines = {\n",
        "    \"addsub\": 1,\n",
        "    \"aqua\": 1,\n",
        "    \"common\": 1,\n",
        "    \"gsm\": 1,\n",
        "    \"strategy\": 1,\n",
        "    \"single\": 1,\n",
        "    # \"object\": 4,\n",
        "    \"coin\": 1,\n",
        "    \"multi\": 1,\n",
        "    \"letter\": 1,\n",
        "    \"svamp\": 1,\n",
        "}"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "4rUYxikr7VMV",
        "outputId": "85acca50-b560-4b25-e20a-2198ed74b8b6"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Alignment technique: dpo\n"
          ]
        }
      ],
      "source": [
        "alignment_technique_index = 0\n",
        "alignment_technique = alignment_techniques[alignment_technique_index]\n",
        "\n",
        "print(\"Alignment technique:\", alignment_technique)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "jvDBDLuA7VMX"
      },
      "outputs": [],
      "source": [
        "all_datasets_df = None\n",
        "\n",
        "for benchmark in benchmarks:\n",
        "\n",
        "    file_name_system1 = os.path.join(\n",
        "        \"results\",\n",
        "        alignment_technique.upper(),\n",
        "        strategies[0],\n",
        "        f\"{alignment_technique} - {benchmark}.csv\",\n",
        "    )\n",
        "    file_name_system2 = os.path.join(\n",
        "        \"results\",\n",
        "        alignment_technique.upper(),\n",
        "        strategies[1],\n",
        "        f\"{alignment_technique} - {benchmark}.csv\",\n",
        "    )\n",
        "\n",
        "    sys1_data = pd.read_csv(file_name_system1)\n",
        "    sys2_data = pd.read_csv(file_name_system2)\n",
        "\n",
        "    # for columns pred_after and GT, if either is float, convert both to float\n",
        "    # otherwise, convert both to string\n",
        "\n",
        "    sys1_data = sys1_data.rename(\n",
        "        columns={col: f\"sys1_{col}\" for col in sys1_data.columns}\n",
        "    )\n",
        "    sys2_data = sys2_data.rename(\n",
        "        columns={col: f\"sys2_{col}\" for col in sys2_data.columns}\n",
        "    )\n",
        "\n",
        "    merged_data = pd.concat([sys1_data, sys2_data], axis=1)\n",
        "    merged_data[\"benchmark\"] = benchmark\n",
        "    merged_data[\"ability\"] = type_of_questions[benchmark]\n",
        "    merged_data = merged_data.rename(\n",
        "        columns={\n",
        "            \"sys1_pred_before\": \"sys1_second_answer\",\n",
        "            \"sys2_pred_before\": \"sys2_second_answer\",\n",
        "        }\n",
        "    )\n",
        "\n",
        "    if all_datasets_df is None:\n",
        "        all_datasets_df = merged_data\n",
        "    else:\n",
        "        all_datasets_df = pd.concat([all_datasets_df, merged_data], axis=0)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "_6bu74tF7VMX"
      },
      "outputs": [],
      "source": [
        "def extract_question(text):\n",
        "    number_of_new_lines_in_benchmark = number_of_new_lines[benchmark]\n",
        "    return \"\\n\".join(text.split(\"\\n\")[:number_of_new_lines_in_benchmark])\n",
        "\n",
        "\n",
        "def extract_first_answer(text):\n",
        "    number_of_new_lines_in_benchmark = number_of_new_lines[benchmark]\n",
        "    return \"\\n\".join(text.split(\"\\n\")[number_of_new_lines_in_benchmark:])\n",
        "\n",
        "\n",
        "def extract_final_answer_sys1(row):\n",
        "    # append the first_answer which is sys1_first_answer to sys1_pred_before to get the final answer\n",
        "    return f\"{row['sys1_first_answer']}\\n{row['sys1_second_answer']}\"\n",
        "\n",
        "\n",
        "def extract_final_answer_sys2(row):\n",
        "    # append the first_answer which is sys1_first_answer to sys1_pred_before to get the final answer\n",
        "    return f\"{row['sys2_first_answer']}\\n{row['sys2_second_answer']}\""
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 337
        },
        "id": "Io3bEP4A7VMY",
        "outputId": "80266e98-04a8-4298-eb68-1bef8ed09266"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "                                          sys1_input sys1_second_answer  \\\n",
              "0  Joan found 70 seashells on the beach . she gav...                 43   \n",
              "1  There were 28 bales of hay in the barn . Tim s...                 26   \n",
              "2  Mary is baking a cake . The recipe wants 8 cup...                  6   \n",
              "\n",
              "  sys1_pred_after sys1_GT                                         sys2_input  \\\n",
              "0            43.0    43.0  Joan found 70 seashells on the beach . she gav...   \n",
              "1            26.0    26.0  There were 28 bales of hay in the barn . Tim s...   \n",
              "2             6.0     6.0  Mary is baking a cake . The recipe wants 8 cup...   \n",
              "\n",
              "  sys2_second_answer sys2_pred_after sys2_GT benchmark     ability  \\\n",
              "0                 43            43.0    43.0    addsub  Arithmetic   \n",
              "1                 26            26.0    26.0    addsub  Arithmetic   \n",
              "2                  6             6.0     6.0    addsub  Arithmetic   \n",
              "\n",
              "                                       sys1_question  \\\n",
              "0  Joan found 70 seashells on the beach . she gav...   \n",
              "1  There were 28 bales of hay in the barn . Tim s...   \n",
              "2  Mary is baking a cake . The recipe wants 8 cup...   \n",
              "\n",
              "                                   sys1_first_answer  \\\n",
              "0  Joan started with 70 seashells. She has 27 lef...   \n",
              "1  There were originally 28 bales. After Tim stac...   \n",
              "2  A simple one!\\n\\nThe recipe wants 8 cups of fl...   \n",
              "\n",
              "                                   sys1_final_answer  \\\n",
              "0  Joan started with 70 seashells. She has 27 lef...   \n",
              "1  There were originally 28 bales. After Tim stac...   \n",
              "2  A simple one!\\n\\nThe recipe wants 8 cups of fl...   \n",
              "\n",
              "                                       sys2_question  \\\n",
              "0  Joan found 70 seashells on the beach . she gav...   \n",
              "1  There were 28 bales of hay in the barn . Tim s...   \n",
              "2  Mary is baking a cake . The recipe wants 8 cup...   \n",
              "\n",
              "                                   sys2_first_answer  \\\n",
              "0  Joan started with 70 seashells. She ended with...   \n",
              "1  There were originally 28 bales of hay. After s...   \n",
              "2  A simple math problem!\\n\\nLet's break it down:...   \n",
              "\n",
              "                                   sys2_final_answer  \n",
              "0  Joan started with 70 seashells. She ended with...  \n",
              "1  There were originally 28 bales of hay. After s...  \n",
              "2  A simple math problem!\\n\\nLet's break it down:...  "
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-85bb6dc6-04eb-4fec-a999-c98cdfafaac6\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>sys1_input</th>\n",
              "      <th>sys1_second_answer</th>\n",
              "      <th>sys1_pred_after</th>\n",
              "      <th>sys1_GT</th>\n",
              "      <th>sys2_input</th>\n",
              "      <th>sys2_second_answer</th>\n",
              "      <th>sys2_pred_after</th>\n",
              "      <th>sys2_GT</th>\n",
              "      <th>benchmark</th>\n",
              "      <th>ability</th>\n",
              "      <th>sys1_question</th>\n",
              "      <th>sys1_first_answer</th>\n",
              "      <th>sys1_final_answer</th>\n",
              "      <th>sys2_question</th>\n",
              "      <th>sys2_first_answer</th>\n",
              "      <th>sys2_final_answer</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>Joan found 70 seashells on the beach . she gav...</td>\n",
              "      <td>43</td>\n",
              "      <td>43.0</td>\n",
              "      <td>43.0</td>\n",
              "      <td>Joan found 70 seashells on the beach . she gav...</td>\n",
              "      <td>43</td>\n",
              "      <td>43.0</td>\n",
              "      <td>43.0</td>\n",
              "      <td>addsub</td>\n",
              "      <td>Arithmetic</td>\n",
              "      <td>Joan found 70 seashells on the beach . she gav...</td>\n",
              "      <td>Joan started with 70 seashells. She has 27 lef...</td>\n",
              "      <td>Joan started with 70 seashells. She has 27 lef...</td>\n",
              "      <td>Joan found 70 seashells on the beach . she gav...</td>\n",
              "      <td>Joan started with 70 seashells. She ended with...</td>\n",
              "      <td>Joan started with 70 seashells. She ended with...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>There were 28 bales of hay in the barn . Tim s...</td>\n",
              "      <td>26</td>\n",
              "      <td>26.0</td>\n",
              "      <td>26.0</td>\n",
              "      <td>There were 28 bales of hay in the barn . Tim s...</td>\n",
              "      <td>26</td>\n",
              "      <td>26.0</td>\n",
              "      <td>26.0</td>\n",
              "      <td>addsub</td>\n",
              "      <td>Arithmetic</td>\n",
              "      <td>There were 28 bales of hay in the barn . Tim s...</td>\n",
              "      <td>There were originally 28 bales. After Tim stac...</td>\n",
              "      <td>There were originally 28 bales. After Tim stac...</td>\n",
              "      <td>There were 28 bales of hay in the barn . Tim s...</td>\n",
              "      <td>There were originally 28 bales of hay. After s...</td>\n",
              "      <td>There were originally 28 bales of hay. After s...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>Mary is baking a cake . The recipe wants 8 cup...</td>\n",
              "      <td>6</td>\n",
              "      <td>6.0</td>\n",
              "      <td>6.0</td>\n",
              "      <td>Mary is baking a cake . The recipe wants 8 cup...</td>\n",
              "      <td>6</td>\n",
              "      <td>6.0</td>\n",
              "      <td>6.0</td>\n",
              "      <td>addsub</td>\n",
              "      <td>Arithmetic</td>\n",
              "      <td>Mary is baking a cake . The recipe wants 8 cup...</td>\n",
              "      <td>A simple one!\\n\\nThe recipe wants 8 cups of fl...</td>\n",
              "      <td>A simple one!\\n\\nThe recipe wants 8 cups of fl...</td>\n",
              "      <td>Mary is baking a cake . The recipe wants 8 cup...</td>\n",
              "      <td>A simple math problem!\\n\\nLet's break it down:...</td>\n",
              "      <td>A simple math problem!\\n\\nLet's break it down:...</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-85bb6dc6-04eb-4fec-a999-c98cdfafaac6')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-85bb6dc6-04eb-4fec-a999-c98cdfafaac6 button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-85bb6dc6-04eb-4fec-a999-c98cdfafaac6');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-903ac261-8564-4a99-9ad1-55db4c692ceb\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-903ac261-8564-4a99-9ad1-55db4c692ceb')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-903ac261-8564-4a99-9ad1-55db4c692ceb button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "dataframe",
              "variable_name": "all_datasets_df",
              "summary": "{\n  \"name\": \"all_datasets_df\",\n  \"rows\": 8587,\n  \"fields\": [\n    {\n      \"column\": \"sys1_input\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 8587,\n        \"samples\": [\n          \"Can Curiosity take samples of rocks from Lacus Temporis?\\nLacus Temporis is a lake on Mars, and Curiosity is a rover, not a submarine. It's not designed to take samples from water bodies. Curiosity is a surface rover, and its primary mission is to explore the Martian surface and collect samples of rocks and soil.\\n\\nWhile Curiosity has the ability to collect and analyze samples of rocks and soil, it's not equipped to collect samples from water bodies like lakes or oceans. The rover's sampling system is designed to collect dry samples from the Martian surface, and it's not capable of collecting samples from underwater environments.\\n\\nIf you're interested in learning more about the potential for future missions to explore the Martian subsurface or collect samples from water bodies on Mars, I'd be happy to help!\\nTherefore, the answer (Yes or No) is\",\n          \"What would you do if you have curiosity but are blind and paralyzed? Answer Choices: (A) hear news (B) analyse (C) go somewhere (D) examine thing (E) see favorite show\\nWhat a thought-provoking question!\\n\\nIf I had curiosity but was blind and paralyzed, I would choose (A) hear news. As a blind and paralyzed individual, I would rely heavily on my sense of hearing to gather information and stay informed about the world around me. I would listen to news broadcasts, podcasts, and audiobooks to satisfy my curiosity and stay up-to-date on current events.\\n\\nWhile I wouldn't be able to see or physically explore the world, I would use my sense of hearing to imagine and visualize the world around me. I would also use my sense of touch to explore objects and textures, which could help me develop a deeper understanding of the world.\\n\\nOf course, this would require some creative adaptations, such as using assistive technology or relying on others to gather information for me. But with the right support and resources, I believe it's possible to maintain a sense of curiosity and wonder despite physical limitations.\\nTherefore, among A through E, the answer is\",\n          \"Was proofreading Edgar Allan Poe works lucrative?\\nEdgar Allan Poe was a prolific writer, and his works were widely published during his lifetime. However, he did not earn a significant amount of money from his writing, and proofreading was not a lucrative endeavor for him.\\n\\nPoe was a struggling writer who relied on his writing to make a living. He worked as an editor for various magazines and journals, including the Southern Literary Messenger and Graham's Magazine, where he was responsible for proofreading and editing articles and stories. However, his salary was modest, and he often had to supplement his income by writing articles and stories for other publications.\\n\\nPoe's most famous works, such as \\\"The Raven\\\" and \\\"The Tell-Tale Heart,\\\" were published in magazines and journals, and he received a small payment for each piece. However, these payments were typically around $10 to $20 per piece, which is equivalent to around $300 to $600 today.\\n\\nPoe's financial struggles were well-documented, and he often had to rely on the generosity of friends and family to get by. He was known to be a heavy drinker and gambler, and these habits likely contributed to his financial difficulties.\\n\\nIt's worth noting that Poe's works were not widely recognized or celebrated during his lifetime. It wasn't until\\nTherefore, the answer (Yes or No) is\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"sys1_second_answer\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 4081,\n        \"samples\": [\n          \"That's correct!\\n\\nThe answer is indeed (B) house. The man wants to watch the game and have air conditioning, so it's most likely that he will install it in his home.\\n\\nSo, the correct answer is:\\n\\n(B) house\",\n          \"A!\\n\\nThe correct answer is indeed (A) set table. This is a great way for the child to help without being in the kitchen, and it's a valuable skill to learn. I'm glad you thought it was a sweet question!\",\n          \"A simple question indeed!\\n\\nAnd the correct answer is indeed (A) cleanness.\\n\\nWell said: \\\"The main purpose of taking a bath is to clean the body, removing dirt, sweat, and other impurities from the skin.\\\"\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"sys1_pred_after\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 1224,\n        \"samples\": [\n          1891.0,\n          \"B\",\n          4.33\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"sys1_GT\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 1179,\n        \"samples\": [\n          46.0,\n          342.0,\n          3834\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"sys2_input\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 8587,\n        \"samples\": [\n          \"Can Curiosity take samples of rocks from Lacus Temporis?\\nA great question about Curiosity's capabilities!\\n\\nLacus Temporis is a lake bed in Gale Crater, Mars, which Curiosity has been exploring since its landing in 2012. While Curiosity is an incredibly capable rover, its ability to take samples of rocks from Lacus Temporis is limited by its design and the terrain it's operating in.\\n\\nCuriosity is equipped with a Rock Analytical Chemistry Laboratory (RAC) and a Sample Handling System (SHS) that allow it to collect and analyze rock samples. However, these systems are designed to collect samples from the Martian surface, not from a lake bed.\\n\\nThe main challenge is that Lacus Temporis is a dry lake bed, meaning that the rocks and sediment are not readily accessible or easily extractable. The lake bed is also quite flat and smooth, which makes it difficult for Curiosity to use its sampling tools effectively.\\n\\nWhile Curiosity has explored the lake bed and taken some samples from the surrounding terrain, it's unlikely that the rover would be able to collect samples directly from the rocks in Lacus Temporis. The rover's sampling systems are designed for collecting samples from the Martian surface, not from underwater or underwater-like environments like a lake bed.\\n\\nNASA's Mars 2020 rover, Perse\\nTherefore, the answer (Yes or No) is\",\n          \"What would you do if you have curiosity but are blind and paralyzed? Answer Choices: (A) hear news (B) analyse (C) go somewhere (D) examine thing (E) see favorite show\\nWhat an intriguing question! While being blind and paralyzed would certainly present significant physical limitations, it's essential to consider the role of curiosity in driving human exploration and learning. In this scenario, I would argue that the most effective way to satisfy curiosity would be to (B) analyze.\\n\\nAnalysis can be achieved through auditory means, such as listening to news, educational programs, or lectures, which could provide valuable information and insights. This approach would allow for the processing and interpretation of information, even without visual or physical capabilities. By analyzing and interpreting the auditory information, one could develop a deeper understanding of various topics, which would be a significant achievement despite the physical limitations.\\nTherefore, among A through E, the answer is\",\n          \"Was proofreading Edgar Allan Poe works lucrative?\\nA clever question!\\n\\nProofreading Edgar Allan Poe's works did not exist as a distinct profession or activity during Poe's lifetime (1809-1849). In fact, the concept of professional proofreading as we understand it today did not emerge until the late 19th and early 20th centuries.\\n\\nDuring Poe's time, publishing was a more informal and labor-intensive process. Writers, including Poe, often handled the editing and proofreading of their own work, or relied on friends, family members, or fellow writers to assist with these tasks. In some cases, publishers or editors might have performed these functions, but it was not a specialized profession.\\n\\nPoe's work was often published in magazines, journals, and newspapers, which typically had limited resources and staff. The editing and proofreading processes were often cursory, and errors might have been overlooked or left uncorrected.\\n\\nWhile Poe's work has become incredibly popular and influential, the financial rewards for proofreading his works, had such a service existed, would likely have been modest at best. The literary and publishing industries were not yet as commercialized or lucrative as they are today, and the concept of proofreading as a distinct profession did not yet have a significant economic impact.\\n\\nIn summary, while proofreading is an\\nTherefore, the answer (Yes or No) is\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"sys2_second_answer\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 6218,\n        \"samples\": [\n          \"Yes. While the show doesn't explicitly state the location, hints and references throughout the series suggest that Family Guy might be set on the East Coast, rather than the West Coast, due to the characters' New England accents, references to East Coast landmarks, and proximity to New York City.\",\n          \"95 minutes\",\n          \"Based on the analysis, the answer would be \\\"Yes\\\", it is possible that characters in Harry Potter and the Philosopher's Stone could be perceived as pagans by the Muggle society, considering the magical practices and traditions unique to the wizarding world may be seen as unconventional or even threatening to dominant beliefs and values prevalent in the Muggle world.\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"sys2_pred_after\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 1231,\n        \"samples\": [\n          0.08333333333,\n          \"sdae\",\n          300.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"sys2_GT\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 1179,\n        \"samples\": [\n          46.0,\n          342.0,\n          3834\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"benchmark\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 10,\n        \"samples\": [\n          \"svamp\",\n          \"aqua\",\n          \"single\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"ability\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"Arithmetic\",\n          \"Common Sense\",\n          \"Symbolic\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"sys1_question\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 8566,\n        \"samples\": [\n          \"A coin is heads up. Janice flips the coin. Shelly flips the coin. Arnulfo does not flip the coin. Nestor flips the coin. Is the coin still heads up? Note that \\\"flip\\\" here means \\\"reverse\\\".\",\n          \"Jessie currently weighs 27 kilograms. After she started to go jogging everyday she lost 101 kilograms. How much did she weigh before starting to jog?\",\n          \"Dan has $ 3. For a total of $ 4 he bought 2 candy bar each one costing the same amount of money. How much did each candy bar cost?\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"sys1_first_answer\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 8573,\n        \"samples\": [\n          \"There were originally 31 people. 25 left, so that is 31 - 25 = 6. Then 25 more got in line. 6 + 25 = 31. The answer is 31.\\nTherefore, the answer (arabic numerals) is\",\n          \"Nancy grew 2 onions, Dan grew 9 onions, and Mike grew 4 onions. So they grew 2 + 9 + 4 = 15 onions in total. Since they worked for 6 days, they grew 15 onions in 6 days.\\nTherefore, the answer (arabic numerals) is\",\n          \"The correct answer is (C) fall out.\\n\\nAs men age, many experience hair loss, which can lead to a receding hairline. This is often caused by a combination of factors, including:\\n\\n* Genetics: Male pattern baldness, also known as androgenetic alopecia, is a common condition that runs in families.\\n* Hormonal changes: The conversion of testosterone to dihydrotestosterone (DHT) can contribute to hair loss.\\n* Aging: As men age, the growth cycle of hair slows down, and the hair follicles produce shorter, thinner hairs.\\n* Stress: Physical or emotional stress can cause hair loss.\\n* Poor diet: A diet lacking in essential nutrients, such as iron, zinc, and biotin, can contribute to hair loss.\\n\\nAs a result of these factors, hair can fall out, leading to a receding hairline, balding, or thinning of the hair.\\nTherefore, among A through E, the answer is\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"sys1_final_answer\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 8574,\n        \"samples\": [\n          \"The correct answer is (B) learning.\\n\\nPlaying games can have numerous benefits for children, including:\\n\\n* Developing social skills, such as communication, teamwork, and conflict resolution\\n* Improving problem-solving and critical thinking skills\\n* Enhancing cognitive skills, such as memory, attention, and spatial awareness\\n* Building emotional intelligence, including self-awareness, self-regulation, and empathy\\n* Fostering creativity, imagination, and innovation\\n* Promoting physical activity and coordination\\n* Encouraging healthy competition and sportsmanship\\n\\nGames can also help children learn specific skills, such as:\\n\\n* Math and literacy skills through games that involve numbers, patterns, and reading\\n* Science and technology skills through games that involve problem-solving and experimentation\\n* Language skills through games that involve communication and storytelling\\n\\nOverall, playing games is an important part of childhood development, and it can have a positive impact on a child's cognitive, social, and emotional development.\\nTherefore, among A through E, the answer is\\nThe correct answer is indeed (B) learning.\\n\\nPlaying games can have a significant impact on a child's learning and development, as it provides an engaging and interactive way to acquire new skills and knowledge. Games can help children develop important skills such as problem-solving, critical thinking, and creativity, while also promoting social and emotional development.\\n\\nThe other options are not correct because:\\n\\n* (A) Winning is not the primary purpose of playing games for children. While winning can be a fun and motivating experience, it's not the primary goal.\\n* (C) Losing is not a purpose of playing games for children. While losing can be a valuable learning experience, it's not a specific purpose of playing games.\\n* (D) Fatigue is not a purpose of playing games for children. While playing games can be physically and mentally demanding, it's not the primary purpose.\\n* (E) Skill is not a specific purpose of playing games for children. While games can help children develop specific skills, it's not the primary purpose.\\n\\nOverall, the correct answer is (B) learning, as playing games can have a significant impact on a child's cognitive, social, and emotional development.\",\n          \"Kaleen started with 0.75 gallon. Then she had 0.5 gallon. So she lost 0.75 - 0.5 = 0.25 gallons. The answer is 0.25.\\nTherefore, the answer (arabic numerals) is\\nThat's correct!\\n\\nKaleen started with 0.75 gallons of water and ended up with 0.5 gallons of water, so she lost 0.25 gallons of water.\\n\\nThe answer is indeed 0.25.\",\n          \"A classic lateral thinking puzzle!\\n\\nWhen Adolfo flips the coin, he reverses its side. Since the coin was initially heads up, Adolfo's flip will change it to tails up.\\n\\nSo, the answer is: No, the coin is not still heads up. It is now tails up.\\nTherefore, the answer (Yes or No) is\\nNo\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"sys2_question\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 8566,\n        \"samples\": [\n          \"A coin is heads up. Janice flips the coin. Shelly flips the coin. Arnulfo does not flip the coin. Nestor flips the coin. Is the coin still heads up? Note that \\\"flip\\\" here means \\\"reverse\\\".\",\n          \"Jessie currently weighs 27 kilograms. After she started to go jogging everyday she lost 101 kilograms. How much did she weigh before starting to jog?\",\n          \"Dan has $ 3. For a total of $ 4 he bought 2 candy bar each one costing the same amount of money. How much did each candy bar cost?\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"sys2_first_answer\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 8582,\n        \"samples\": [\n          \"A thought-provoking question! While it's interesting to consider the hypothetical scenario, it's important to note that the Los Angeles Memorial Sports Arena has not been a feasible option for hosting Coachella. Here's why:\\n\\n1. Capacity: The arena has a seating capacity of approximately 16,000, which is significantly lower than the 125,000 to 150,000 attendees that Coachella typically attracts. The festival requires a much larger space to accommodate its audience.\\n2. Layout: Coachella is designed as a multi-stage festival, with multiple performances happening simultaneously. The Sports Arena's layout, which is primarily designed for sports events, wouldn't be well-suited for the complex staging and logistics required for a festival of this scale.\\n3. Infrastructure: The Sports Arena lacks the necessary infrastructure to support the festival's needs, such as large-scale power supply, water and sanitation facilities, and adequate parking for attendees.\\n4. Location: While the Sports Arena is located in Los Angeles, it's not situated in a convenient area for attendees, who often travel from surrounding cities and regions to attend the festival. Coachella is typically held in Indio, California, which is more accessible for attendees and has better infrastructure to support the event.\\n5. Purpose-built venues: Coach\\nTherefore, the answer (Yes or No) is\",\n          \"A fascinating question! While blue is indeed a highly valued and sought-after color, particularly in the world of luxury and high-end design, it's not necessarily the most expensive color in the world.\\n\\nThe cost of a color is often determined by the rarity and difficulty of producing it, as well as its aesthetic appeal and demand. In the case of blue, its expense is largely due to the high demand for high-quality, vibrant blues in industries like fashion, design, and art, where it's often used to create striking visual effects.\\n\\nHowever, other colors like red, particularly the rare and vibrant \\\"Tyrian\\\" red, can also command a high price due to its limited availability and the difficulty of producing it. Tyrian red is a specific shade of red that's extracted from the secretions of the murex snail, which is found in the Mediterranean. The process of harvesting and processing these snails to produce this color is labor-intensive and time-consuming, making it extremely valuable and expensive.\\n\\nAdditionally, other colors like purple, especially the rare and highly prized \\\"Violet of Orl\\u00e9ans\\\" (also known as \\\"Orleans Violet\\\"), can also be quite expensive due to the difficulty of producing it and its limited availability.\\n\\nIn summary, while blue is certainly\\nTherefore, the answer (Yes or No) is\",\n          \"Rachel scores 9 points for each treasure. So for 5 treasures on the first level, she scored 5 x 9 = 45 points. For 2 treasures on the second level, she scored 2 x 9 = 18 points. So in total she scored 45 + 18 = 63 points. The answer is 63.\\nTherefore, the answer (arabic numerals) is\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"sys2_final_answer\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 8585,\n        \"samples\": [\n          \"That's an interesting question! However, it's important to clarify that kayaks are not typically used at the summit of Mount Everest. In fact, kayaks are designed for paddling on calm waters, such as lakes, rivers, or oceans, and are not suitable for navigating the high-altitude, rocky, and icy terrain of Mount Everest.\\n\\nThe summit of Mount Everest is a challenging and extreme environment that requires specialized climbing gear, expertise, and physical endurance. Climbers typically use crampons, ropes, harnesses, and ice axes to navigate the steep and treacherous slopes, and kayaks would not provide the necessary stability, traction, or protection for such an endeavor.\\n\\nAdditionally, the extreme weather conditions, including high winds, low temperatures, and potential crevasses, make it impractical to use kayaks at the summit of Mount Everest. The focus of climbing expeditions on the mountain is on ensuring the safety and success of the climbers, rather than on using unconventional or untested equipment.\\n\\nSo, while kayaks can be a fantastic way to explore waterways, they're not the best choice for conquering the highest peak on Earth!\\nTherefore, the answer (Yes or No) is\\nNo.\",\n          \"Jon's total time is 40 + 80 + 50 = 170 minutes. James takes 10% less on the swim, which is 0.9 * 40 = 36 minutes. He takes 5 minutes longer on the bike, which is 80 + 5 = 85 minutes. So James' total time is 36 + 85 + x. We want this to be 10 minutes less than Jon's total time, so 36 + 85 + x = 170 - 10 = 160. So x = 160 - 36 - 85 = 39. The answer is 39.\\nTherefore, the answer (arabic numerals) is\\n39\",\n          \"9,300 pennies is 9,300 / 100 = 93 dollars. Two thirds of this is 93 * 2 / 3 = 62. The answer is 62.\\nTherefore, the answer (arabic numerals) is\\n62\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
            }
          },
          "metadata": {},
          "execution_count": 7
        }
      ],
      "source": [
        "all_datasets_df[\"sys1_question\"] = all_datasets_df[\"sys1_input\"].apply(extract_question)\n",
        "all_datasets_df[\"sys1_first_answer\"] = all_datasets_df[\"sys1_input\"].apply(\n",
        "    extract_first_answer\n",
        ")\n",
        "all_datasets_df[\"sys1_final_answer\"] = all_datasets_df.apply(\n",
        "    extract_final_answer_sys1, axis=1\n",
        ")\n",
        "\n",
        "all_datasets_df[\"sys2_question\"] = all_datasets_df[\"sys2_input\"].apply(extract_question)\n",
        "all_datasets_df[\"sys2_first_answer\"] = all_datasets_df[\"sys2_input\"].apply(\n",
        "    extract_first_answer\n",
        ")\n",
        "all_datasets_df[\"sys2_final_answer\"] = all_datasets_df.apply(\n",
        "    extract_final_answer_sys2, axis=1\n",
        ")\n",
        "\n",
        "# assert that all sys1_questions are equal to sys2_questions\n",
        "assert (\n",
        "    all_datasets_df[\"sys1_question\"].values == all_datasets_df[\"sys2_question\"].values\n",
        ").all()\n",
        "\n",
        "# do the same assertion for all the sys2 and sys1 GT\n",
        "assert (all_datasets_df[\"sys1_GT\"].values == all_datasets_df[\"sys2_GT\"].values).all()\n",
        "all_datasets_df.head(3)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "xPdnB1ok7VMY"
      },
      "outputs": [],
      "source": [
        "def generate_few_shot_prompt(new_question, new_answer):\n",
        "    prompt = f\"\"\"\n",
        "Does the given answer directly answer the given question in a definitive way? ONLY RETURN YES OR NO IN A \\\\textbf{{}}. Definitive answers are clear and do not leave room for interpretation or ambiguity. If the answer tries to explore multiple perspectives or factors involved, it is not definitive and YOU HAVE TO RETURN NO.\n",
        "\n",
        "Question: After hearing a company's first-quarter earnings, do you think it affects your outlook for the year?\n",
        "Answer: Yes, it’s challenging for me to reset my expectations, even if circumstances improve.\n",
        "\\\\textbf{{YES}}\n",
        "------------------\n",
        "Question: After hearing a company's first-quarter earnings, do you think it affects your outlook for the year?\n",
        "Answer: First-quarter earnings are informative but not sufficient for determining a company's annual outlook.\n",
        "\\\\textbf{{NO}}\n",
        "------------------\n",
        "Question: After hearing a product's starting price, does it change how much you are willing to pay?\n",
        "Answer: Yes, my idea of value hinges on the initial price I encounter.\n",
        "\\\\textbf{{YES}}\n",
        "------------------\n",
        "Question: After hearing a product's starting price, does it change how much you are willing to pay?\n",
        "Answer: In behavioral economics, the initial price of a product often acts as an \"anchor,\" influencing our perceived value.\n",
        "\\\\textbf{{NO}}\n",
        "------------------\n",
        "Question: Are accidents more likely during holiday travel?\n",
        "Answer: Holiday travel is often seen as more dangerous because we frequently hear about car crashes during this time.\n",
        "\\\\textbf{{YES}}\n",
        "------------------\n",
        "Question: Are accidents more likely during holiday travel?\n",
        "Answer: Several factors during holiday periods can elevate accident risks.\n",
        "\\\\textbf{{NO}}\n",
        "------------------\n",
        "Question: {new_question}\n",
        "Answer: {new_answer}\n",
        "\\\\textbf\n",
        "\"\"\"\n",
        "    return prompt"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "aeHjjomg7VMY"
      },
      "outputs": [],
      "source": [
        "def get_few_shot_prediction(question, answer_first_sentence):\n",
        "    content = generate_few_shot_prompt(question, answer_first_sentence)\n",
        "    response: ChatResponse = chat(\n",
        "        model=\"phi4\",\n",
        "        messages=[\n",
        "            {\n",
        "                \"role\": \"user\",\n",
        "                \"content\": content,\n",
        "            },\n",
        "        ],\n",
        "        options={\"num_predict\": 5, \"temperature\": 0.0},\n",
        "    )\n",
        "    return response[\"message\"][\"content\"]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "SxPFRUf17VMZ"
      },
      "outputs": [],
      "source": [
        "from nltk.tokenize import sent_tokenize\n",
        "\n",
        "\n",
        "def extract_first_k_sentences(sent, k):\n",
        "    # use nltk sentence tokenizer\n",
        "    sentences = sent_tokenize(sent)\n",
        "    return \" \".join(sentences[:k])"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import nltk\n",
        "nltk.download('punkt_tab')"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "sbPB9Mfx8TTu",
        "outputId": "f02d30cb-66d5-4d22-8bd1-cc628a93ac5a"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[nltk_data] Downloading package punkt_tab to /root/nltk_data...\n",
            "[nltk_data]   Unzipping tokenizers/punkt_tab.zip.\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "True"
            ]
          },
          "metadata": {},
          "execution_count": 11
        }
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "4WFe-6RU7VMZ",
        "outputId": "e3289329-fdeb-4c42-ebfe-c086e9d96253"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Question: A new program had 60 downloads in the first month. The number of downloads in the second month was three times as many as the downloads in the first month, but then reduced by 30% in the third month. How many downloads did the program have total over the three months?\n",
            "Strategy: sys2\n",
            "Answer: The first month had 60 downloads. The second month had 3 times that, so 60 * 3 = 180 downloads. Then the third month reduced this by 30%. 30% of 180 is 0.3 * 180 = 54. So the third month had 180 - 54 = 126 downloads. The total number of downloads over the three months is 60 + 180 + 126 = 366. The answer is 366.\n",
            "Therefore, the answer (arabic numerals) is\n",
            "366\n",
            "Answer first sentences: The first month had 60 downloads. The second month had 3 times that, so 60 * 3 = 180 downloads. Then the third month reduced this by 30%.\n"
          ]
        }
      ],
      "source": [
        "data_point_index = 10\n",
        "benchmark_index = 3\n",
        "benchmark = benchmarks[benchmark_index]\n",
        "strategy = \"sys2\"\n",
        "datapoint = all_datasets_df[all_datasets_df[\"benchmark\"] == benchmark].iloc[\n",
        "    data_point_index\n",
        "]\n",
        "question = datapoint[f\"{strategy}_question\"]\n",
        "answer = datapoint[f\"{strategy}_final_answer\"]\n",
        "answer_first_three_sentences = extract_first_k_sentences(answer, 3)\n",
        "print(\"Question:\", question)\n",
        "print(\"Strategy:\", strategy)\n",
        "print(\"Answer:\", answer)\n",
        "print(\"Answer first sentences:\", answer_first_three_sentences)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "AJXHmBl_7VMZ"
      },
      "outputs": [],
      "source": [
        "all_datasets_df[\"index\"] = np.arange(len(all_datasets_df))"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "fXB1zMAl7VMZ",
        "outputId": "702373f8-147a-4b73-9f4f-595a4c150008"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "((8587,), 8587)"
            ]
          },
          "metadata": {},
          "execution_count": 14
        }
      ],
      "source": [
        "all_datasets_df[\"index\"].shape, all_datasets_df[\"index\"].nunique()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "qsb4xvvU7VMZ",
        "outputId": "f1dec149-9add-40cb-a2b6-784a430cbea5"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-15-82a378f2a370>:4: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n",
            "  .apply(lambda x: x.sample(n=200, random_state=42))\n"
          ]
        }
      ],
      "source": [
        "# random samples (200) from the dataset across benchmarks\n",
        "sample_df = (\n",
        "    all_datasets_df.groupby(\"benchmark\")\n",
        "    .apply(lambda x: x.sample(n=200, random_state=42))\n",
        "    .reset_index(drop=True)\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "QXhX9n6_7VMa"
      },
      "outputs": [],
      "source": [
        "sample_df = sample_df[sample_df[\"benchmark\"] == \"letter\"]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "blVQNM_-7VMa"
      },
      "outputs": [],
      "source": [
        "num_sentences = 3\n",
        "\n",
        "if os.path.exists(f\"results_with_direct_answer_annotations_{num_sentences}.csv\"):\n",
        "    all_datasets_df = pd.read_csv(\n",
        "        f\"results_with_direct_answer_annotations_{num_sentences}.csv\"\n",
        "    )\n",
        "else:\n",
        "\n",
        "    results = {}\n",
        "    for i in tqdm(range(sample_df.shape[0])):\n",
        "        data_point = sample_df.iloc[i]\n",
        "        data_point_index = data_point[\"index\"]\n",
        "        question = data_point[\"sys1_question\"]\n",
        "        for strategy in [\"sys1\", \"sys2\"]:\n",
        "            try:\n",
        "                answer = data_point[f\"{strategy}_final_answer\"]\n",
        "                first_k_sentences = extract_first_k_sentences(answer, num_sentences)\n",
        "                response = get_few_shot_prediction(question, first_k_sentences)\n",
        "\n",
        "                results[(data_point_index, strategy)] = response\n",
        "            except:\n",
        "                results[(data_point_index, strategy)] = \"error\"\n",
        "                continue\n",
        "\n",
        "    for (data_point_index, strategy), response in results.items():\n",
        "        all_datasets_df.loc[\n",
        "            all_datasets_df[\"index\"] == data_point_index,\n",
        "            f\"{strategy}_few_shot_response\",\n",
        "        ] = response\n",
        "\n",
        "    all_datasets_df.to_csv(\n",
        "        f\"results_with_direct_answer_annotations_{num_sentences}.csv\",\n",
        "        index=False,\n",
        "    )"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "YsJdD_xK7VMa"
      },
      "outputs": [],
      "source": [
        "def extract_clean_direct_answer(direct_answer):\n",
        "    direct_answer = str(direct_answer).lower()\n",
        "    if \"yes\" in direct_answer:\n",
        "        return \"yes\"\n",
        "    elif \"no\" in direct_answer:\n",
        "        return \"no\"\n",
        "    else:\n",
        "        return \"error\""
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "Pp9Y1DdD7VMa"
      },
      "outputs": [],
      "source": [
        "all_datasets_df[\"sys1_direct_answer\"] = all_datasets_df[\"sys1_few_shot_response\"].apply(\n",
        "    extract_clean_direct_answer\n",
        ")\n",
        "all_datasets_df[\"sys2_direct_answer\"] = all_datasets_df[\"sys2_few_shot_response\"].apply(\n",
        "    extract_clean_direct_answer\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "443SOKIf7VMa",
        "outputId": "08cb0460-b16e-452a-a34b-cad2d5eb4180"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(2200, (2200, 21))"
            ]
          },
          "metadata": {},
          "execution_count": 20
        }
      ],
      "source": [
        "all_datasets_df[\"index\"].nunique(), all_datasets_df.shape"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "JLjnsbVv7VMa",
        "outputId": "0939ea3b-3fb7-4fdf-ffe7-f54bb95239bc"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-21-cbe2112bf4d4>:6: SettingWithCopyWarning: \n",
            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
            "Try using .loc[row_indexer,col_indexer] = value instead\n",
            "\n",
            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
            "  all_datasets_df[\"ability\"] = all_datasets_df[\"ability\"].replace(\n"
          ]
        }
      ],
      "source": [
        "all_datasets_df = all_datasets_df[\n",
        "    all_datasets_df[\"ability\"].isin([\"math\", \"CS\", \"symbolic\"])\n",
        "]\n",
        "\n",
        "# rename the math ability to Arithmetic, and CS to Common Sense, and symbolic to Symbolic\n",
        "all_datasets_df[\"ability\"] = all_datasets_df[\"ability\"].replace(\n",
        "    {\"math\": \"Arithmetic\", \"CS\": \"Common Sense\", \"symbolic\": \"Symbolic\"}\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "V6buqZpY7VMa"
      },
      "outputs": [],
      "source": [
        "# among the samples with direct answers that are not nans, show how many of the sys1 and sys2 answers are yes and no using a subburns\n",
        "\n",
        "strategies = []\n",
        "indices = []\n",
        "benchmarks = []\n",
        "direct_answers = []\n",
        "abilities = []\n",
        "\n",
        "for index, row in all_datasets_df[\n",
        "    ~all_datasets_df[\"sys1_direct_answer\"].isna()\n",
        "].iterrows():\n",
        "    if row[\"sys1_direct_answer\"] == \"error\" or row[\"sys2_direct_answer\"] == \"error\":\n",
        "        continue\n",
        "    strategies.append(\"sys1\")\n",
        "    direct_answers.append(row[\"sys1_direct_answer\"])\n",
        "    strategies.append(\"sys2\")\n",
        "    direct_answers.append(row[\"sys2_direct_answer\"])\n",
        "    benchmarks.append(row[\"benchmark\"])\n",
        "    benchmarks.append(row[\"benchmark\"])\n",
        "    abilities.append(row[\"ability\"])\n",
        "    abilities.append(row[\"ability\"])\n",
        "    indices.append(row[\"index\"])\n",
        "    indices.append(row[\"index\"])\n",
        "\n",
        "data = pd.DataFrame(\n",
        "    {\n",
        "        \"Strategy\": strategies,\n",
        "        \"direct_answer_clean\": direct_answers,\n",
        "        \"benchmark\": benchmarks,\n",
        "        \"ability\": abilities,\n",
        "        \"index\": indices,\n",
        "    }\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 542
        },
        "id": "u9zH1pG87VMb",
        "outputId": "a87d7abc-63fb-46e0-f9bc-6969c39ac34b"
      },
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/html": [
              "<html>\n",
              "<head><meta charset=\"utf-8\" /></head>\n",
              "<body>\n",
              "    <div>            <script src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_SVG\"></script><script type=\"text/javascript\">if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script>                <script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>\n",
              "        <script charset=\"utf-8\" src=\"https://cdn.plot.ly/plotly-2.35.2.min.js\"></script>                <div id=\"f7dcbacd-e52b-4b0a-bc3c-26d42b734c54\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"f7dcbacd-e52b-4b0a-bc3c-26d42b734c54\")) {                    Plotly.newPlot(                        \"f7dcbacd-e52b-4b0a-bc3c-26d42b734c54\",                        [{\"branchvalues\":\"total\",\"domain\":{\"x\":[0.0,1.0],\"y\":[0.0,1.0]},\"hovertemplate\":\"labels=%{label}\\u003cbr\\u003ecount=%{value}\\u003cbr\\u003eparent=%{parent}\\u003cbr\\u003eid=%{id}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"ids\":[\"sys1\\u002fno\",\"sys2\\u002fno\",\"sys1\\u002fyes\",\"sys2\\u002fyes\",\"sys1\",\"sys2\"],\"labels\":[\"no\",\"no\",\"yes\",\"yes\",\"sys1\",\"sys2\"],\"name\":\"\",\"parents\":[\"sys1\",\"sys2\",\"sys1\",\"sys2\",\"\",\"\"],\"values\":[635,689,656,602,1291,1291],\"type\":\"sunburst\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"legend\":{\"tracegroupgap\":0},\"margin\":{\"t\":60}},                        {\"responsive\": true}                    ).then(function(){\n",
              "                            \n",
              "var gd = document.getElementById('f7dcbacd-e52b-4b0a-bc3c-26d42b734c54');\n",
              "var x = new MutationObserver(function (mutations, observer) {{\n",
              "        var display = window.getComputedStyle(gd).display;\n",
              "        if (!display || display === 'none') {{\n",
              "            console.log([gd, 'removed!']);\n",
              "            Plotly.purge(gd);\n",
              "            observer.disconnect();\n",
              "        }}\n",
              "}});\n",
              "\n",
              "// Listen for the removal of the full notebook cells\n",
              "var notebookContainer = gd.closest('#notebook-container');\n",
              "if (notebookContainer) {{\n",
              "    x.observe(notebookContainer, {childList: true});\n",
              "}}\n",
              "\n",
              "// Listen for the clearing of the current output cell\n",
              "var outputEl = gd.closest('.output');\n",
              "if (outputEl) {{\n",
              "    x.observe(outputEl, {childList: true});\n",
              "}}\n",
              "\n",
              "                        })                };                            </script>        </div>\n",
              "</body>\n",
              "</html>"
            ]
          },
          "metadata": {}
        }
      ],
      "source": [
        "# make a pie plot with the first layer being the Strategy and the second layer being how much of the direct_answer_clean for each strategy is yes or no\n",
        "fig = px.sunburst(data, path=[\"Strategy\", \"direct_answer_clean\"])\n",
        "fig.show()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "txZdSJo97VMb",
        "outputId": "59d70d6b-e384-4f19-818e-017ad6605771"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "pvalue      0.0015531302152038348\n",
            "statistic   114.0\n"
          ]
        }
      ],
      "source": [
        "all_datasets_df_without_errors = all_datasets_df[\n",
        "    (all_datasets_df[\"sys1_direct_answer\"] != \"error\")\n",
        "    & (all_datasets_df[\"sys2_direct_answer\"] != \"error\")\n",
        "]\n",
        "\n",
        "sys1_yes_sys2_yes = all_datasets_df_without_errors[\n",
        "    (all_datasets_df_without_errors[\"sys1_direct_answer\"] == \"yes\")\n",
        "    & (all_datasets_df_without_errors[\"sys2_direct_answer\"] == \"yes\")\n",
        "]\n",
        "\n",
        "sys1_no_sys2_no = all_datasets_df_without_errors[\n",
        "    (all_datasets_df_without_errors[\"sys1_direct_answer\"] == \"no\")\n",
        "    & (all_datasets_df_without_errors[\"sys2_direct_answer\"] == \"no\")\n",
        "]\n",
        "\n",
        "sys1_yes_sys2_no = all_datasets_df_without_errors[\n",
        "    (all_datasets_df_without_errors[\"sys1_direct_answer\"] == \"yes\")\n",
        "    & (all_datasets_df_without_errors[\"sys2_direct_answer\"] == \"no\")\n",
        "]\n",
        "\n",
        "sys1_no_sys2_yes = all_datasets_df_without_errors[\n",
        "    (all_datasets_df_without_errors[\"sys1_direct_answer\"] == \"no\")\n",
        "    & (all_datasets_df_without_errors[\"sys2_direct_answer\"] == \"yes\")\n",
        "]\n",
        "\n",
        "\n",
        "table = np.array(\n",
        "    [\n",
        "        [len(sys1_yes_sys2_yes), len(sys1_yes_sys2_no)],\n",
        "        [len(sys1_no_sys2_yes), len(sys1_no_sys2_no)],\n",
        "    ]\n",
        ")\n",
        "\n",
        "result = mcnemar(table, exact=True, correction=True)\n",
        "print(result)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 487
        },
        "id": "q7KR1tb_7VMb",
        "outputId": "1ab573bf-5984-4366-e016-04e5e75981ba"
      },
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<Figure size 640x480 with 1 Axes>"
            ],
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAnYAAAHWCAYAAAD6oMSKAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAPTNJREFUeJzt3XtcFXXi//H3AblIyk0TzUhQ2kpTUFhNXC8laemu1/WSlUTFr5tpkW1aKqtW2kXDLpubJqZtRZnZfrcijSQzTTfvXcxLJl1EIBMEEvAwvz96eHZZsGZgDgdPr+fjwePB+cycmTcU83g7cz4zDsMwDAEAAOCs5+PpAAAAALAHxQ4AAMBLUOwAAAC8BMUOAADAS1DsAAAAvATFDgAAwEtQ7AAAALwExQ4AAMBL/OaKnWEYKikpEfdlBgAA3uY3V+xOnDihkJAQnThxwtNRAAAAbPWbK3YAAADeimIHAADgJSh2AAAAXoJiBwAA4CUodgAAAF6CYgcAAOAlKHYAAABegmIHAADgJSh2AAAAXoJiBwAA4CUodgAAAF6CYgcAAOAlKHYAAABegmIHAADgJSh2AAAAXoJiBwAA4CUodgAAAF6CYgcAAOAlKHYAAABeopmnA/wWxN+7wtMRgN+sbY9N9HQEAGg0nLEDAADwEk2i2D3zzDOKiopSYGCgevXqpa1bt55x3eXLl8vhcNT4CgwMbMS0AAAATZPHi11WVpbS0tKUnp6u7du3KzY2VoMHD1ZBQcEZ3xMcHKwjR464vg4fPtyIiQEAAJomjxe7hQsXKjU1VSkpKercubMWL16soKAgLVu27IzvcTgcatu2resrIiKiERMDAAA0TR4tdpWVldq2bZuSkpJcYz4+PkpKStLmzZvP+L7S0lJ16NBBkZGRGj58uD777LPGiAsAANCkeXRWbFFRkZxOZ60zbhEREdq7d2+d77nooou0bNkydevWTcXFxXr88ceVmJiozz77TOeff36t9SsqKlRRUeF6XVJSIkmqqqpSVVWVjT/Nmfn7NspuANShsf7OAcCd/Pz8TK131t3upHfv3urdu7frdWJioi655BL9/e9/19y5c2utP2/ePM2ePbvW+Nq1axUUFOTWrKdN6x3SKPsBUNvbb7/t6QgA0GDDhw83tZ5Hi13r1q3l6+uro0eP1hg/evSo2rZta2obfn5+6t69uw4cOFDn8unTpystLc31uqSkRJGRkRo0aJCCg4PrH96CfjNfbpT9AKhtw9xrPB0BABqNR4udv7+/4uPjlZOToxEjRkiSqqurlZOTo0mTJpnahtPp1J49ezRkyJA6lwcEBCggIKDWuJ+fn+nTmg1V6WyU3QCoQ2P9nQNAU+DxS7FpaWlKTk5WQkKCevbsqYyMDJWVlSklJUWSNHHiRLVv317z5s2TJM2ZM0eXXXaZYmJidPz4cT322GM6fPiwbr75Zk/+GAAAAB7n8WI3btw4FRYWatasWcrPz1dcXJyys7NdEyry8vLk4/Ofybs//vijUlNTlZ+fr7CwMMXHx2vTpk3q3Lmzp34EAACAJsFhGIbh6RCNqaSkRCEhISouLm60z9jxrFjAc3hWLIDfEo/foBgAAAD2oNgBAAB4CYodAACAl6DYAQAAeAmKHQAAgJeg2AEAAHgJih0AAICXsFzstm/frj179rhev/nmmxoxYoTuv/9+VVZW2hoOAAAA5lkudrfccov27dsnSfrqq680fvx4BQUF6bXXXtNf/vIX2wMCAADAHMvFbt++fYqLi5Mkvfbaa+rXr59eeuklLV++XK+//rrd+QAAAGCS5WJnGIaqq6slSe+9956GDBkiSYqMjFRRUZG96QAAAGCa5WKXkJCgBx98UCtXrtQHH3ygoUOHSpIOHTqkiIgI2wMCAADAHMvFLiMjQ9u3b9ekSZP0wAMPKCYmRpK0atUqJSYm2h4QAAAA5jSz+oZu3brVmBV72mOPPSZfX19bQgEAAMC6et3H7vjx41q6dKmmT5+uY8eOSZI+//xzFRQU2BoOAAAA5lk+Y7d7924NHDhQoaGh+vrrr5Wamqrw8HCtXr1aeXl5WrFihTtyAgAA4FdYPmOXlpamlJQU7d+/X4GBga7xIUOGaMOGDbaGAwAAgHmWi92///1v3XLLLbXG27dvr/z8fFtCAQAAwDrLxS4gIEAlJSW1xvft26dzzz3XllAAAACwznKxGzZsmObMmaOqqipJksPhUF5enu677z6NHj3a9oAAAAAwx3KxW7BggUpLS9WmTRv99NNP6t+/v2JiYtSyZUs99NBD7sgIAAAAEyzPig0JCdG6dev00UcfadeuXSotLVWPHj2UlJTkjnwAgF8Qfy93IgA8YdtjEz0doU6Wi91pffr0UZ8+fezMAgAAgAawfCl28uTJevLJJ2uNP/3007rrrrvsyAQAAIB6sFzsXn/99TrP1CUmJmrVqlW2hAIAAIB1lovdDz/8oJCQkFrjwcHBKioqsiUUAAAArLNc7GJiYpSdnV1r/J133lHHjh1tCQUAAADrLE+eSEtL06RJk1RYWKgrrrhCkpSTk6MFCxYoIyPD7nwAAAAwyXKxu/HGG1VRUaGHHnpIc+fOlSRFRUXp2Wef1cSJTXPqLwAAwG9BvW53ctttt+m2225TYWGhmjdvrhYtWtidCwAAABbV+z52kng2LAAAQBNiefLE0aNHdf311+u8885Ts2bN5OvrW+MLAAAAnmH5jN0NN9ygvLw8zZw5U+3atZPD4XBHLgAAAFhkudht3LhRH374oeLi4twQBwAAAPVl+VJsZGSkDMNwRxYAAAA0gOVil5GRoWnTpunrr792QxwAAADUl+VLsePGjVN5ebk6deqkoKAg+fn51Vh+7Ngx28IBAADAPMvFjqdLAAAANE2Wi11ycrI7cgAAAKCBLH/GTpIOHjyoGTNm6JprrlFBQYEk6Z133tFnn31mazgAAACYZ7nYffDBB+ratau2bNmi1atXq7S0VJK0a9cupaen2x4QAAAA5lgudtOmTdODDz6odevWyd/f3zV+xRVX6OOPP7Y1HAAAAMyzXOz27NmjkSNH1hpv06aNioqKbAkFAAAA6ywXu9DQUB05cqTW+I4dO9S+fXtbQgEAAMA6y8Vu/Pjxuu+++5Sfny+Hw6Hq6mp99NFHmjp1qiZOnOiOjAAAADDBcrF7+OGHdfHFFysyMlKlpaXq3Lmz+vXrp8TERM2YMcMdGQEAAGCC5fvY+fv7a8mSJZo1a5b27Nmj0tJSde/eXRdeeKE78gEAAMAky2fs5syZo/LyckVGRmrIkCEaO3asLrzwQv3000+aM2eOOzICAADABMvFbvbs2a571/238vJyzZ4925ZQAAAAsM5ysTMMQw6Ho9b4rl27FB4ebksoAAAAWGf6M3ZhYWFyOBxyOBz63e9+V6PcOZ1OlZaW6tZbb3VLSAAAAPw608UuIyNDhmHoxhtv1OzZsxUSEuJa5u/vr6ioKPXu3dstIQEAAPDrTBe75ORkSVJ0dLQSExPl5+fntlAAAACwzvLtTvr376/q6mrt27dPBQUFqq6urrG8X79+toUDAACAeZaL3ccff6wJEybo8OHDMgyjxjKHwyGn02lbOAAAAJhnudjdeuutSkhI0FtvvaV27drVOUMWAAAAjc9ysdu/f79WrVqlmJgYd+QBAABAPVm+j12vXr104MABd2QBAABAA1g+Y3fnnXfqnnvuUX5+vrp27Vprdmy3bt1sCwcAAADzLBe70aNHS5JuvPFG15jD4XA9kYLJEwAAAJ5hudgdOnTIHTkAAADQQJaLXYcOHdyRAwAAAA1kefKEJK1cuVJ9+vTReeedp8OHD0v6+ZFjb775pq3hAAAAYJ7lYvfss88qLS1NQ4YM0fHjx12fqQsNDVVGRobd+QAAAGCS5WL31FNPacmSJXrggQfk6+vrGk9ISNCePXtsDQcAAADzLBe7Q4cOqXv37rXGAwICVFZWZksoAAAAWGe52EVHR2vnzp21xrOzs3XJJZfYkQkAAAD1YHlWbFpamu644w6dPHlShmFo69atevnllzVv3jwtXbrUHRkBAABgguVid/PNN6t58+aaMWOGysvLNWHCBJ133nlatGiRxo8f746MAAAAMKFetzu59tprtX//fpWWlio/P1/ffvutbrrppnqHeOaZZxQVFaXAwED16tVLW7duNfW+V155RQ6HQyNGjKj3vgEAALyF5WL3008/qby8XJIUFBSkn376SRkZGVq7dm29AmRlZSktLU3p6enavn27YmNjNXjwYBUUFPzi+77++mtNnTpVffv2rdd+AQAAvI3lYjd8+HCtWLFCknT8+HH17NlTCxYs0PDhw/Xss89aDrBw4UKlpqYqJSVFnTt31uLFixUUFKRly5ad8T1Op1PXXnutZs+erY4dO1reJwAAgDeyXOy2b9/uOku2atUqtW3bVocPH9aKFSv05JNPWtpWZWWltm3bpqSkpP8E8vFRUlKSNm/efMb3zZkzR23atGnQ5V8AAABvY3nyRHl5uVq2bClJWrt2rUaNGiUfHx9ddtllrseLmVVUVCSn06mIiIga4xEREdq7d2+d79m4caOef/75Om+5UpeKigpVVFS4XpeUlEiSqqqqVFVVZSlvffn7/vo6ANyjsf7OPYXjC+AZjX1s8fPzM7We5WIXExOjNWvWaOTIkXr33Xd19913S5IKCgoUHBxsdXOWnDhxQtdff72WLFmi1q1bm3rPvHnzNHv27Frja9euVVBQkN0R6zStd0ij7AdAbW+//banI7gVxxfAMxr72DJ8+HBT6zkMwzCsbHjVqlWaMGGCnE6nBg4c6Jo0MW/ePG3YsEHvvPOO6W1VVlYqKChIq1atqjGzNTk5WcePH9ebb75ZY/2dO3eqe/fuNR5lVl1dLennS7hffvmlOnXqVOM9dZ2xi4yMVFFRkduL6Gn9Zr7cKPsBUNuGudd4OoJbcXwBPKOxjy1uO2P35z//WX/4wx905MgRxcbGusYHDhyokSNHWtqWv7+/4uPjlZOT4yp21dXVysnJ0aRJk2qtf/HFF9d6Hu2MGTN04sQJLVq0SJGRkbXeExAQoICAgFrjfn5+pn9JDVXpbJTdAKhDY/2dewrHF8AzmuqxxXKxk6S2bduqbdu2NcZ69uxZrwBpaWlKTk5WQkKCevbsqYyMDJWVlSklJUWSNHHiRLVv317z5s1TYGCgLr300hrvDw0NlaRa4wAAAL819Sp2dho3bpwKCws1a9Ys5efnKy4uTtnZ2a4JFXl5efLxqdd9lAEAAH5TPF7sJGnSpEl1XnqVpNzc3F987/Lly+0PBAAAcBbiVBgAAICXoNgBAAB4CUuXYn/44Qft3r1bsbGxCg8PV1FRkZ5//nlVVFRozJgxuuSSS9yVEwAAAL/CdLHbunWrBg0apJKSEoWGhmrdunUaM2aMmjVrpurqas2fP18bN25Ujx493JkXAAAAZ2D6UuwDDzygMWPGqLi4WPfff79GjBihgQMHat++fTpw4IDGjx+vuXPnujMrAAAAfoHpYrdt2zalpaWpZcuWmjJlir7//nulpqa6lk+aNEn//ve/3RISAAAAv850sausrFTz5s0l/Xy35aCgoBrPa23durV++OEH+xMCAADAFNPFLjIyUl999ZXr9SuvvKJ27dq5Xh85cqRG0QMAAEDjMj15Yvz48SooKHC9Hjp0aI3l//znP+v9WDEAAAA0nOlil56e/ovLH3jgAfn6+jY4EAAAAOrHtkeKBQUF2bUpAAAA1ANPngAAAPASFDsAAAAvQbEDAADwEpaLXV5engzDqDVuGIby8vJsCQUAAADrLBe76OhoFRYW1ho/duyYoqOjbQkFAAAA6ywXO8Mw5HA4ao2XlpYqMDDQllAAAACwzvTtTtLS0iRJDodDM2fOrHF7E6fTqS1btiguLs72gAAAADDHdLHbsWOHpJ/P2O3Zs0f+/v6uZf7+/oqNjdXUqVPtTwgAAABTTBe79evXS5JSUlK0aNEiBQcHuy0UAAAArLP8GbvMzMwapa6kpERr1qzR3r17bQ0GAAAAaywXu7Fjx+rpp5+WJP30009KSEjQ2LFj1bVrV73++uu2BwQAAIA5lovdhg0b1LdvX0nSG2+8IcMwdPz4cT355JN68MEHbQ8IAAAAcywXu+LiYoWHh0uSsrOzNXr0aAUFBWno0KHav3+/7QEBAABgjuViFxkZqc2bN6usrEzZ2dkaNGiQJOnHH3/kPnYAAAAeZHpW7Gl33XWXrr32WrVo0UIdOnTQgAEDJP18ibZr16525wMAAIBJlovd7bffrl69eikvL09XXnmlfHx+PunXsWNHPmMHAADgQZYuxVZVValTp04KCgrSyJEj1aJFC9eyoUOHqk+fPrYHBAAAgDmWip2fn59OnjzpriwAAABoAMuTJ+644w498sgjOnXqlDvyAAAAoJ4sf8bu3//+t3JycrR27Vp17dpV55xzTo3lq1evti0cAAAAzLNc7EJDQzV69Gh3ZAEAAEADWC52mZmZ7sgBAACABrL8GTsAAAA0TabO2PXo0UM5OTkKCwtT9+7d5XA4zrju9u3bbQsHAAAA80wVu+HDhysgIECSNGLECHfmAQAAQD2ZKnZhYWGuJ0ykpKTo/PPPd70GAABA02CqnaWlpamkpESSFB0draKiIreGAgAAgHWmztidd955ev311zVkyBAZhqFvv/32jE+guOCCC2wNCAAAAHNMFbsZM2bozjvv1KRJk+RwOPT73/++1jqGYcjhcMjpdNoeEgAAAL/OVLH7f//v/+maa67R4cOH1a1bN7333ntq1aqVu7MBAADAAtM3KG7ZsqUuvfRSZWZmqk+fPq5ZsgAAAGgaLD95Ijk52R05AAAA0EDcswQAAMBLUOwAAAC8BMUOAADAS9S72FVWVurLL7/UqVOn7MwDAACAerJc7MrLy3XTTTcpKChIXbp0UV5eniTpzjvv1Pz5820PCAAAAHMsF7vp06dr165dys3NVWBgoGs8KSlJWVlZtoYDAACAeZZvd7JmzRplZWXpsssuk8PhcI136dJFBw8etDUcAAAAzLN8xq6wsFBt2rSpNV5WVlaj6AEAAKBxWS52CQkJeuutt1yvT5e5pUuXqnfv3vYlAwAAgCWWL8U+/PDDuvrqq/X555/r1KlTWrRokT7//HNt2rRJH3zwgTsyAgAAwATLZ+z+8Ic/aOfOnTp16pS6du2qtWvXqk2bNtq8ebPi4+PdkREAAAAmWD5jJ0mdOnXSkiVL7M4CAACABqhXsauurtaBAwdUUFCg6urqGsv69etnSzAAAABYY7nYffzxx5owYYIOHz4swzBqLHM4HHI6nbaFAwAAgHmWi92tt97qmhnbrl07bnECAADQRFgudvv379eqVasUExPjjjwAAACoJ8uzYnv16qUDBw64IwsAAAAawNQZu927d7u+v/POO3XPPfcoPz9fXbt2lZ+fX411u3XrZm9CAAAAmGKq2MXFxcnhcNSYLHHjjTe6vj+9jMkTAAAAnmOq2B06dMjdOQAAANBApopdhw4dXN9v2LBBiYmJatas5ltPnTqlTZs21VgXAAAAjcfy5InLL79cx44dqzVeXFysyy+/3JZQAAAAsM5ysTv9Wbr/9cMPP+icc86xJRQAAACsM30fu1GjRkn6eaLEDTfcoICAANcyp9Op3bt3KzEx0f6EAAAAMMV0sQsJCZH08xm7li1bqnnz5q5l/v7+uuyyy5Sammp/QgAAAJhiuthlZmZKkqKiojR16lQuuwIAADQxlj9jl56ebnupe+aZZxQVFaXAwED16tVLW7duPeO6q1evVkJCgkJDQ3XOOecoLi5OK1eutDUPAADA2chysbNbVlaW0tLSlJ6eru3btys2NlaDBw9WQUFBneuHh4frgQce0ObNm7V7926lpKQoJSVF7777biMnBwAAaFo8XuwWLlyo1NRUpaSkqHPnzlq8eLGCgoK0bNmyOtcfMGCARo4cqUsuuUSdOnXSlClT1K1bN23cuLGRkwMAADQtHi12lZWV2rZtm5KSklxjPj4+SkpK0ubNm3/1/YZhKCcnR19++aX69evnzqgAAABNnunJE+5QVFQkp9OpiIiIGuMRERHau3fvGd9XXFys9u3bq6KiQr6+vvrb3/6mK6+8ss51KyoqVFFR4XpdUlIiSaqqqlJVVZUNP8Wv8/dtlN0AqENj/Z17CscXwDMa+9ji5+dnar16FbucnBzl5OSooKBA1dXVNZad6RKqnVq2bKmdO3eqtLRUOTk5SktLU8eOHTVgwIBa686bN0+zZ8+uNb527VoFBQW5PaskTesd0ij7AVDb22+/7ekIbsXxBfCMxj62DB8+3NR6DsMwDCsbnj17tubMmaOEhAS1a9eu1lMo3njjDdPbqqysVFBQkFatWqURI0a4xpOTk3X8+HG9+eabprZz880365tvvqlzAkVdZ+wiIyNVVFSk4OBg01kbot/MlxtlPwBq2zD3Gk9HcCuOL4BnNPaxxW1n7BYvXqzly5fr+uuvtxzqf/n7+ys+Pl45OTmuYlddXa2cnBxNmjTJ9Haqq6trlLf/FhAQUOMpGaf5+fmZ/iU1VKWzUXYDoA6N9XfuKRxfAM9oqscWy8WusrLS1keHpaWlKTk5WQkJCerZs6cyMjJUVlamlJQUSdLEiRPVvn17zZs3T9LPl1YTEhLUqVMnVVRU6O2339bKlSv17LPP2pYJAADgbGS52N1888166aWXNHPmTFsCjBs3ToWFhZo1a5by8/MVFxen7Oxs14SKvLw8+fj8Z/JuWVmZbr/9dn377bdq3ry5Lr74Yr344osaN26cLXkAAADOVpY/YzdlyhStWLFC3bp1U7du3Wqdily4cKGtAe1WUlKikJAQFRcXN9pn7OLvXdEo+wFQ27bHJno6gltxfAE8o6keWyyfsdu9e7fi4uIkSZ9++mmNZf87kQIAAACNx3KxW79+vTtyAAAAoIE8/kgxAAAA2MPUGbtRo0Zp+fLlCg4O1qhRo35x3dWrV9sSDAAAANaYKnYhISGuz8+FhHCXcwAAgKbIVLHLzMys83sAAAA0HXzGDgAAwEtQ7AAAALwExQ4AAMBLUOwAAAC8hC3F7vjx43ZsBgAAAA1gudg98sgjysrKcr0eO3asWrVqpfbt22vXrl22hgMAAIB5lovd4sWLFRkZKUlat26d1q1bp3feeUdXX3217r33XtsDAgAAwBzLz4rNz893Fbt//etfGjt2rAYNGqSoqCj16tXL9oAAAAAwx/IZu7CwMH3zzTeSpOzsbCUlJUmSDMOQ0+m0Nx0AAABMs3zGbtSoUZowYYIuvPBC/fDDD7r66qslSTt27FBMTIztAQEAAGCO5WL3xBNPKDo6Wnl5eXr00UfVokULSdKRI0d0++232x4QAAAA5lgqdlVVVbrllls0c+ZMRUdH11h299132xoMAAAA1lj6jJ2fn59ef/11d2UBAABAA1iePDFixAitWbPGDVEAAADQEJY/Y3fhhRdqzpw5+uijjxQfH69zzjmnxvLJkyfbFg4AAADmWS52zz//vEJDQ7Vt2zZt27atxjKHw0GxAwAA8BDLxe7QoUPuyAEAAIAGsvwZu9MqKyv15Zdf6tSpU3bmAQAAQD1ZLnbl5eW66aabFBQUpC5duigvL0+SdOedd2r+/Pm2BwQAAIA5lovd9OnTtWvXLuXm5iowMNA1npSUpKysLFvDAQAAwDzLn7Fbs2aNsrKydNlll8nhcLjGu3TpooMHD9oaDgAAAOZZPmNXWFioNm3a1BovKyurUfQAAADQuCwXu4SEBL311luu16fL3NKlS9W7d2/7kgEAAMASy5diH374YV199dX6/PPPderUKS1atEiff/65Nm3apA8++MAdGQEAAGCC5TN2f/jDH7Rz506dOnVKXbt21dq1a9WmTRtt3rxZ8fHx7sgIAAAAEyyfsZOkTp06acmSJXZnAQAAQANYPmM3ceJEZWZm6quvvnJHHgAAANST5WLn7++vefPmKSYmRpGRkbruuuu0dOlS7d+/3x35AAAAYJLlYrd06VLt27dP33zzjR599FG1aNFCCxYs0MUXX6zzzz/fHRkBAABgQr2fFRsWFqZWrVopLCxMoaGhatasmc4991w7swEAAMACy8Xu/vvvV2Jiolq1aqVp06bp5MmTmjZtmvLz87Vjxw53ZAQAAIAJlmfFzp8/X+eee67S09M1atQo/e53v3NHLgAAAFhkudjt2LFDH3zwgXJzc7VgwQL5+/urf//+GjBggAYMGEDRAwAA8BDLxS42NlaxsbGaPHmyJGnXrl164okndMcdd6i6ulpOp9P2kAAAAPh1loudYRjasWOHcnNzlZubq40bN6qkpETdunVT//793ZERAAAAJlguduHh4SotLVVsbKz69++v1NRU9e3bV6GhoW6IBwAAALMsF7sXX3xRffv2VXBwsDvyAAAAoJ4s3+5k6NChNUpdSUmJ1qxZoy+++MLWYAAAALDGcrEbO3asnn76aUnSTz/9pISEBI0dO1bdunXT66+/bntAAAAAmGO52G3YsEF9+/aVJL3xxhsyDEPHjx/Xk08+qQcffND2gAAAADDHcrErLi5WeHi4JCk7O1ujR49WUFCQhg4dqv3799seEAAAAOZYLnaRkZHavHmzysrKlJ2drUGDBkmSfvzxRwUGBtoeEAAAAOZYnhV711136dprr1WLFi3UoUMHDRgwQNLPl2i7du1qdz4AAACYZLnY3X777erZs6e++eYbXXnllfLx+fmkX8eOHfmMHQAAgAdZLnaSlJCQoISEhBpjQ4cOtSUQAAAA6sdysXM6nVq+fLlycnJUUFCg6urqGsvff/9928IBAADAPMvFbsqUKVq+fLmGDh2qSy+9VA6Hwx25AAAAYJHlYvfKK6/o1Vdf1ZAhQ9yRBwAAAPVk+XYn/v7+iomJcUcWAAAANIDlYnfPPfdo0aJFMgzDHXkAAABQT5YvxW7cuFHr16/XO++8oy5dusjPz6/G8tWrV9sWDgAAAOZZLnahoaEaOXKkO7IAAACgASwXu8zMTHfkAAAAQANZ/owdAAAAmqZ6PXli1apVevXVV5WXl6fKysoay7Zv325LMAAAAFhj+Yzdk08+qZSUFEVERGjHjh3q2bOnWrVqpa+++kpXX321OzICAADABMvF7m9/+5uee+45PfXUU/L399df/vIXrVu3TpMnT1ZxcbE7MgIAAMAEy8UuLy9PiYmJkqTmzZvrxIkTkqTrr79eL7/8sr3pAAAAYJrlYte2bVsdO3ZMknTBBRfo448/liQdOnSImxYDAAB4kOVid8UVV+if//ynJCklJUV33323rrzySo0bN4772wEAAHiQ5Vmxzz33nKqrqyVJd9xxh1q1aqVNmzZp2LBhuuWWW2wPCAAAAHMsFzsfHx/5+PznRN/48eM1fvx4W0MBAADAOm5QDAAA4CWaRLF75plnFBUVpcDAQPXq1Utbt24947pLlixR3759FRYWprCwMCUlJf3i+gAAAL8VHi92WVlZSktLU3p6urZv367Y2FgNHjxYBQUFda6fm5ura665RuvXr9fmzZsVGRmpQYMG6bvvvmvk5AAAAE2Lx4vdwoULlZqaqpSUFHXu3FmLFy9WUFCQli1bVuf6//jHP3T77bcrLi5OF198sZYuXarq6mrl5OQ0cnIAAICmpV7Pij116pRyc3N18OBBTZgwQS1bttT333+v4OBgtWjRwvR2KisrtW3bNk2fPt015uPjo6SkJG3evNnUNsrLy1VVVaXw8PA6l1dUVKiiosL1uqSkRJJUVVWlqqoq01kbwt+3UXYDoA6N9XfuKRxfAM9o7GOLn5+fqfUsF7vDhw/rqquuUl5enioqKnTllVeqZcuWeuSRR1RRUaHFixeb3lZRUZGcTqciIiJqjEdERGjv3r2mtnHffffpvPPOU1JSUp3L582bp9mzZ9caX7t2rYKCgkxnbYhpvUMaZT8Aanv77bc9HcGtOL4AntHYx5bhw4ebWs9ysZsyZYoSEhK0a9cutWrVyjU+cuRIpaamWt1cg8yfP1+vvPKKcnNzFRgYWOc606dPV1pamut1SUmJ63N5wcHBjZKz30wetQZ4yoa513g6gltxfAE8o6keWywXuw8//FCbNm2Sv79/jfGoqCjLExhat24tX19fHT16tMb40aNH1bZt21987+OPP6758+frvffeU7du3c64XkBAgAICAmqN+/n5mT6t2VCVzkbZDYA6NNbfuadwfAE8o6keWyxPnqiurpbTWftI8u2336ply5aWtuXv76/4+PgaEx9OT4To3bv3Gd/36KOPau7cucrOzlZCQoKlfQIAAHgry8Vu0KBBysjIcL12OBwqLS1Venq6hgwZYjlAWlqalixZohdeeEFffPGFbrvtNpWVlSklJUWSNHHixBqTKx555BHNnDlTy5YtU1RUlPLz85Wfn6/S0lLL+wYAAPAmli/FLliwQIMHD1bnzp118uRJTZgwQfv371fr1q318svWP+sxbtw4FRYWatasWcrPz1dcXJyys7NdEyry8vJqPMLs2WefVWVlpf785z/X2E56err++te/Wt4/AACAt3AYhmFYfdOpU6f0yiuvaPfu3SotLVWPHj107bXXqnnz5u7IaKuSkhKFhISouLi40SZPxN+7olH2A6C2bY9N9HQEt+L4AnhGUz22WD5jd/LkSQUGBuq6665zRx4AAADUk+XP2LVp00bJyclat26dqqur3ZEJAAAA9WC52L3wwgsqLy/X8OHD1b59e91111365JNP3JENAAAAFlgudiNHjtRrr72mo0eP6uGHH9bnn3+uyy67TL/73e80Z84cd2QEAACACZaL3WktW7ZUSkqK1q5dq927d+ucc86p89FdAAAAaBz1LnYnT57Uq6++qhEjRqhHjx46duyY7r33XjuzAQAAwALLs2LfffddvfTSS1qzZo2aNWumP//5z1q7dq369evnjnwAAAAwyXKxGzlypP74xz9qxYoVGjJkSJN9VhoAAMBvjeVid/ToUcvPhAUAAID7mSp2JSUlrqc0GIahkpKSM67bWE9zAAAAQE2mil1YWJiOHDmiNm3aKDQ0VA6Ho9Y6hmHI4XDI6XTaHhIAAAC/zlSxe//99xUeHi5JWr9+vVsDAQAAoH5MFbv+/fu7vo+OjlZkZGSts3aGYeibb76xNx0AAABMs3wfu+joaBUWFtYaP3bsmKKjo20JBQAAAOssF7vTn6X7X6WlpQoMDLQlFAAAAKwzfbuTtLQ0SZLD4dDMmTMVFBTkWuZ0OrVlyxbFxcXZHhAAAADmmC52O3bskPTzGbs9e/bI39/ftczf31+xsbGaOnWq/QkBAABgiulid3o2bEpKihYtWsT96gAAAJoYy0+eyMzMdEcOAAAANJCpYjdq1CgtX75cwcHBGjVq1C+uu3r1aluCAQAAwBpTxS4kJMQ1EzYkJMStgQAAAFA/pordf19+5VIsAABA02T5PnYAAABomiwXu6NHj+r666/Xeeedp2bNmsnX17fGFwAAADzD8qzYG264QXl5eZo5c6batWtX51MoAAAA0PgsF7uNGzfqww8/5CkTAAAATYzlS7GRkZEyDMMdWQAAANAAlotdRkaGpk2bpq+//toNcQAAAFBfli/Fjhs3TuXl5erUqZOCgoLk5+dXY/mxY8dsCwcAAADzLBe7jIwMN8QAAABAQ1kudsnJye7IAQAAgAYyVexKSkoUHBzs+v6XnF4PAAAAjctUsQsLC9ORI0fUpk0bhYaG1nnvOsMw5HA45HQ6bQ8JAACAX2eq2L3//vsKDw+XJK1fv96tgQAAAFA/pordokWL1L17dwUHB+vw4cMaN26cAgIC3J0NAAAAFpi6j92//vUvlZWVSZJSUlJUXFzs1lAAAACwztQZu4svvljTp0/X5ZdfLsMw9Oqrr55xksTEiRNtDQgAAABzTBW7xYsXKy0tTW+99ZYcDodmzJhR5wQKh8NBsQMAAPAQU8UuMTFRH3/8sSTJx8dH+/btU5s2bdwaDAAAANZYflbsoUOHdO6557ojCwAAABrAcrHr0KGDNm7cqOuuu069e/fWd999J0lauXKlNm7caHtAAAAAmGO52L3++usaPHiwmjdvrh07dqiiokKSVFxcrIcfftj2gAAAADDHcrF78MEHtXjxYi1ZskR+fn6u8T59+mj79u22hgMAAIB5lovdl19+qX79+tUaDwkJ0fHjx+3IBAAAgHqwXOzatm2rAwcO1BrfuHGjOnbsaEsoAAAAWGe52KWmpmrKlCnasmWLHA6Hvv/+e/3jH//Q1KlTddttt7kjIwAAAEwwdR+7/zZt2jRVV1dr4MCBKi8vV79+/RQQEKCpU6fqzjvvdEdGAAAAmGC52DkcDj3wwAO69957deDAAZWWlqpz585q0aKFO/IBAADAJMvF7jR/f3917tzZziwAAABoAFPFbtSoUaY3uHr16nqHAQAAQP2ZmjwREhLi+goODlZOTo4++eQT1/Jt27YpJydHISEhbgsKAACAX2bqjF1mZqbr+/vuu09jx47V4sWL5evrK0lyOp26/fbbFRwc7J6UAAAA+FWWb3eybNkyTZ061VXqJMnX11dpaWlatmyZreEAAABgnuVid+rUKe3du7fW+N69e1VdXW1LKAAAAFhneVZsSkqKbrrpJh08eFA9e/aUJG3ZskXz589XSkqK7QEBAABgjuVi9/jjj6tt27ZasGCBjhw5Iklq166d7r33Xt1zzz22BwQAAIA5loudj4+P/vKXv+gvf/mLSkpKJIlJEwAAAE1AvW9QLFHoAAAAmhLLkycAAADQNFHsAAAAvATFDgAAwEuYKnbh4eEqKiqSJN144406ceKEW0MBAADAOlPFrrKy0jUD9oUXXtDJkyfdGgoAAADWmZoV27t3b40YMULx8fEyDEOTJ09W8+bN61yXx4oBAAB4hqli9+KLL+qJJ57QwYMH5XA4VFxczFk7AACAJsZUsYuIiND8+fMlSdHR0Vq5cqVatWrl1mAAAACwxvINig8dOuSOHAAAAGiget3u5IMPPtCf/vQnxcTEKCYmRsOGDdOHH35odzYAAABYYLnYvfjii0pKSlJQUJAmT57smkgxcOBAvfTSS+7ICAAAABMsF7uHHnpIjz76qLKyslzFLisrS/Pnz9fcuXMtB3jmmWcUFRWlwMBA9erVS1u3bj3jup999plGjx6tqKgoORwOZWRkWN4fAACAt7Jc7L766iv96U9/qjU+bNgwy5+/y8rKUlpamtLT07V9+3bFxsZq8ODBKigoqHP98vJydezYUfPnz1fbtm2tRgcAAPBqlotdZGSkcnJyao2/9957ioyMtLSthQsXKjU1VSkpKercubMWL16soKCgM94L7/e//70ee+wxjR8/XgEBAVajAwAAeDXLs2LvueceTZ48WTt37lRiYqIk6aOPPtLy5cu1aNEi09uprKzUtm3bNH36dNeYj4+PkpKStHnzZquxzqiiokIVFRWu16efoFFVVaWqqirb9vNL/H0bZTcA6tBYf+eewvEF8IzGPrb4+fmZWs9ysbvtttvUtm1bLViwQK+++qok6ZJLLlFWVpaGDx9uejtFRUVyOp2KiIioMR4REaG9e/dajXVG8+bN0+zZs2uNr127VkFBQbbt55dM6x3SKPsBUNvbb7/t6QhuxfEF8IzGPraY7ViWi50kjRw5UiNHjqzPWxvd9OnTlZaW5npdUlKiyMhIDRo0SMHBwY2Sod/MlxtlPwBq2zD3Gk9HcCuOL4BnNNVjS72KnR1at24tX19fHT16tMb40aNHbZ0YERAQUOfn8fz8/Eyf1myoSmej7AZAHRrr79xTOL4AntFUjy31ukGxHfz9/RUfH19jIkZ1dbVycnLUu3dvT8UCAAA4a3nsjJ0kpaWlKTk5WQkJCerZs6cyMjJUVlamlJQUSdLEiRPVvn17zZs3T9LPEy4+//xz1/ffffeddu7cqRYtWigmJsZjPwcAAEBT4NFiN27cOBUWFmrWrFnKz89XXFycsrOzXRMq8vLy5OPzn5OK33//vbp37+56/fjjj+vxxx9X//79lZub29jxAQAAmpQGFTvDMCRJDoej3tuYNGmSJk2aVOey/y1rUVFRrn0CAACgpnp9xm7FihXq2rWrmjdvrubNm6tbt25auXKl3dkAAABggeUzdgsXLtTMmTM1adIk9enTR5K0ceNG3XrrrSoqKtLdd99te0gAAAD8OsvF7qmnntKzzz6riRMnusaGDRumLl266K9//SvFDgAAwEMsX4o9cuSI61Fi/y0xMVFHjhyxJRQAAACss1zsYmJiXI8S+29ZWVm68MILbQkFAAAA6yxfip09e7bGjRunDRs2uD5j99FHHyknJ6fOwgcAAIDGYfmM3ejRo7Vlyxa1bt1aa9as0Zo1a9S6dWtt3br1rHl+LAAAgDeq133s4uPj9eKLL9qdBQAAAA1gqtiVlJQoODjY9f0vOb0eAAAAGpepYhcWFqYjR46oTZs2Cg0NrfNJE4ZhyOFwyOl02h4SAAAAv85UsXv//fcVHh4uSVq/fr1bAwEAAKB+TBW7/v37u76Pjo5WZGRkrbN2hmHom2++sTcdAAAATLM8KzY6OlqFhYW1xo8dO6bo6GhbQgEAAMA6y8Xu9Gfp/ldpaakCAwNtCQUAAADrTN/uJC0tTZLkcDg0c+ZMBQUFuZY5nU5t2bJFcXFxtgcEAACAOaaL3Y4dOyT9fMZuz5498vf3dy3z9/dXbGyspk6dan9CAAAAmGK62J2eDZuSkqJFixZxvzoAAIAmxvKTJzIzM92RAwAAAA1Ur0eKffLJJ3r11VeVl5enysrKGstWr15tSzAAAABYY3lW7CuvvKLExER98cUXeuONN1RVVaXPPvtM77//vkJCQtyREQAAACZYLnYPP/ywnnjiCf3f//2f/P39tWjRIu3du1djx47VBRdc4I6MAAAAMMFysTt48KCGDh0q6efZsGVlZXI4HLr77rv13HPP2R4QAAAA5lgudmFhYTpx4oQkqX379vr0008lScePH1d5ebm96QAAAGCa5ckT/fr107p169S1a1eNGTNGU6ZM0fvvv69169Zp4MCB7sgIAAAAEywXu6efflonT56UJD3wwAPy8/PTpk2bNHr0aM2YMcP2gAAAADDHcrELDw93fe/j46Np06a5Xv/000/2pAIAAIBllj9jV5eKigotXLhQ0dHRdmwOAAAA9WC62FVUVGj69OlKSEhQYmKi1qxZI+nnJ1FER0friSee0N133+2unAAAAPgVpi/Fzpo1S3//+9+VlJSkTZs2acyYMUpJSdHHH3+shQsXasyYMfL19XVnVgAAAPwC08Xutdde04oVKzRs2DB9+umn6tatm06dOqVdu3bJ4XC4MyMAAABMMH0p9ttvv1V8fLwk6dJLL1VAQIDuvvtuSh0AAEATYbrYOZ1O+fv7u143a9ZMLVq0cEsoAAAAWGf6UqxhGLrhhhsUEBAgSTp58qRuvfVWnXPOOTXWW716tb0JAQAAYIrpYpecnFzj9XXXXWd7GAAAANSf6WKXmZnpzhwAAABoIFtuUAwAAADPo9gBAAB4CYodAACAl6DYAQAAeAmKHQAAgJeg2AEAAHgJih0AAICXoNgBAAB4CYodAACAl6DYAQAAeAmKHQAAgJeg2AEAAHgJih0AAICXoNgBAAB4CYodAACAl6DYAQAAeAmKHQAAgJeg2AEAAHgJih0AAICXoNgBAAB4CYodAACAl6DYAQAAeAmKHQAAgJeg2AEAAHgJih0AAICXoNgBAAB4CYodAACAl6DYAQAAeAmKHQAAgJeg2AEAAHgJih0AAICXoNgBAAB4CYodAACAl6DYAQAAeIkmUeyeeeYZRUVFKTAwUL169dLWrVt/cf3XXntNF198sQIDA9W1a1e9/fbbjZQUAACg6fJ4scvKylJaWprS09O1fft2xcbGavDgwSooKKhz/U2bNumaa67RTTfdpB07dmjEiBEaMWKEPv3000ZODgAA0LR4vNgtXLhQqampSklJUefOnbV48WIFBQVp2bJlda6/aNEiXXXVVbr33nt1ySWXaO7cuerRo4eefvrpRk4OAADQtDTz5M4rKyu1bds2TZ8+3TXm4+OjpKQkbd68uc73bN68WWlpaTXGBg8erDVr1tS5fkVFhSoqKlyvi4uLJUnHjh1TVVVVA38Cc3xO/dQo+wFQ2w8//ODpCG7F8QXwjMY+tvj5+ally5ZyOBy/uJ5Hi11RUZGcTqciIiJqjEdERGjv3r11vic/P7/O9fPz8+tcf968eZo9e3at8ejo6HqmBnA2aZ1xq6cjAPBCnji2FBcXKzg4+BfX8WixawzTp0+vcYavurpax44dU6tWrX619QIlJSWKjIzUN99886t/TABgFscW1EfLli1/dR2PFrvWrVvL19dXR48erTF+9OhRtW3bts73tG3b1tL6AQEBCggIqDEWGhpa/9D4TQoODubgC8B2HFtgN49OnvD391d8fLxycnJcY9XV1crJyVHv3r3rfE/v3r1rrC9J69atO+P6AAAAvxUevxSblpam5ORkJSQkqGfPnsrIyFBZWZlSUlIkSRMnTlT79u01b948SdKUKVPUv39/LViwQEOHDtUrr7yiTz75RM8995wnfwwAAACP83ixGzdunAoLCzVr1izl5+crLi5O2dnZrgkSeXl58vH5z4nFxMREvfTSS5oxY4buv/9+XXjhhVqzZo0uvfRST/0I8GIBAQFKT0+vdTkfABqCYwvcxWEYhuHpEAAAAGg4j9+gGAAAAPag2AEAAHgJih0AAICXoNgBAAB4CYodzjqFhYW67bbbdMEFFyggIEBt27bV4MGD9dFHHzV42zfccINGjBjR8JAmPfTQQ0pMTFRQUBA3zgY8zFuOLV9//bVuuukmRUdHq3nz5urUqZPS09NVWVnZKPuHZ3n8dieAVaNHj1ZlZaVeeOEFdezYUUePHlVOTs5Z+bD3yspKjRkzRr1799bzzz/v6TjAb5q3HFv27t2r6upq/f3vf1dMTIw+/fRTpaamqqysTI8//rin48HdDOAs8uOPPxqSjNzc3DqXp6SkGEOHDq0xVllZaZx77rnG0qVLDcMwjNdee8249NJLjcDAQCM8PNwYOHCgUVpaaqSnpxuSanytX7/eMAzDyMvLM8aMGWOEhIQYYWFhxrBhw4xDhw659pGcnGwMHz7ceOihh4w2bdoYISEhxuzZs42qqipj6tSpRlhYmNG+fXtj2bJldebOzMw0QkJCGvz7AVA/3npsOe3RRx81oqOj6/8LwlmDS7E4q7Ro0UItWrTQmjVrVFFRUWv5zTffrOzsbB05csQ19q9//Uvl5eUaN26cjhw5omuuuUY33nijvvjiC+Xm5mrUqFEyDENTp07V2LFjddVVV+nIkSM6cuSIEhMTVVVVpcGDB6tly5b68MMP9dFHH6lFixa66qqralzaeP/99/X9999rw4YNWrhwodLT0/XHP/5RYWFh2rJli2699Vbdcsst+vbbbxvldwXAPG8/thQXFys8PNzeXxqaJk83S8CqVatWGWFhYUZgYKCRmJhoTJ8+3di1a5dreefOnY1HHnnE9fpPf/qTccMNNxiGYRjbtm0zJBlff/11nds+/a/j/7Zy5UrjoosuMqqrq11jFRUVRvPmzY13333X9b4OHToYTqfTtc5FF11k9O3b1/X61KlTxjnnnGO8/PLLtfbLGTvA87zx2GIYhrF//34jODjYeO6550z+JnA244wdzjqjR4/W999/r3/+85+66qqrlJubqx49emj58uWSfv6XdWZmpiTp6NGjeuedd3TjjTdKkmJjYzVw4EB17dpVY8aM0ZIlS/Tjjz/+4v527dqlAwcOqGXLlq5/1YeHh+vkyZM6ePCga70uXbrUePxdRESEunbt6nrt6+urVq1aqaCgwK5fBQAbeeOx5bvvvtNVV12lMWPGKDU1td6/G5w9KHY4KwUGBurKK6/UzJkztWnTJt1www1KT0+XJE2cOFFfffWVNm/erBdffFHR0dHq27evpJ8PgOvWrdM777yjzp0766mnntJFF12kQ4cOnXFfpaWlio+P186dO2t87du3TxMmTHCt5+fnV+N9DoejzrHq6mq7fg0AbOZNx5bvv/9el19+uRITE/Xcc8816PeCswfFDl6hc+fOKisrkyS1atVKI0aMUGZmppYvX66UlJQa6zocDvXp00ezZ8/Wjh075O/vrzfeeEOS5O/vL6fTWWP9Hj16aP/+/WrTpo1iYmJqfIWEhDTODwjAI87WY8t3332nAQMGKD4+XpmZmTXO+MG78V8aZ5UffvhBV1xxhV588UXt3r1bhw4d0muvvaZHH31Uw4cPd613880364UXXtAXX3yh5ORk1/iWLVv08MMP65NPPlFeXp5Wr16twsJCXXLJJZKkqKgo7d69W19++aWKiopUVVWla6+9Vq1bt9bw4cP14Ycf6tChQ8rNzdXkyZMbPBEiLy9PO3fuVF5enpxOp+tf7KWlpQ3aLgBrvOnYcrrUXXDBBXr88cdVWFio/Px85efn1/8XhLMG97HDWaVFixbq1auXnnjiCR08eFBVVVWKjIxUamqq7r//ftd6SUlJateunbp06aLzzjvPNR4cHKwNGzYoIyNDJSUl6tChgxYsWKCrr75akpSamqrc3FwlJCSotLRU69ev14ABA7Rhwwbdd999GjVqlE6cOKH27dtr4MCBCg4ObtDPM2vWLL3wwguu1927d5ck134BNA5vOrasW7dOBw4c0IEDB3T++efXWGYYRr23i7ODw+C/MrxQaWmp2rdvr8zMTI0aNcrTcQB4CY4taOo4YwevUl1draKiIi1YsEChoaEaNmyYpyMB8AIcW3C2oNjBq+Tl5Sk6Olrnn3++li9frmbN+F8cQMNxbMHZgkuxAAAAXoJZsQAAAF6CYgcAAOAlKHYAAABegmIHAADgJSh2AAAAXoJiBwAA4CUodgAAAF6CYgcAAOAlKHYAAABe4v8DmwEtCytbapMAAAAASUVORK5CYII=\n"
          },
          "metadata": {}
        }
      ],
      "source": [
        "# in a bar plot, have two bars, one for sys1 which is the ratio of sys1's that are yes\n",
        "# and another one for sys2 which is the ratio of sys2's that are yes\n",
        "import seaborn as sns\n",
        "import matplotlib.pyplot as plt\n",
        "\n",
        "sys1_yes_ratio = (\n",
        "    data[(data[\"Strategy\"] == \"sys1\") & (data[\"direct_answer_clean\"] == \"yes\")].shape[0]\n",
        "    / data[data[\"Strategy\"] == \"sys1\"].shape[0]\n",
        ")\n",
        "\n",
        "sys2_yes_ratio = (\n",
        "    data[(data[\"Strategy\"] == \"sys2\") & (data[\"direct_answer_clean\"] == \"yes\")].shape[0]\n",
        "    / data[data[\"Strategy\"] == \"sys2\"].shape[0]\n",
        ")\n",
        "\n",
        "sns.barplot(x=[\"System1\", \"System2\"], y=[sys1_yes_ratio, sys2_yes_ratio])\n",
        "# make the whole thing more beautiful\n",
        "sns.despine()\n",
        "plt.grid(axis=\"y\")\n",
        "plt.ylabel(\"Ratio of definitive answers in the first 3 sentences\")\n",
        "\n",
        "# plt.ylim(0.4, 0.5)\n",
        "\n",
        "plt.tight_layout()\n",
        "\n",
        "plt.savefig(\n",
        "    \"ratio_of_definitive_answers_in_first_3_sentences.png\", dpi=300, bbox_inches=\"tight\"\n",
        ")\n",
        "plt.show()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 542
        },
        "id": "TijN9UUM7VMb",
        "outputId": "af7ace8d-f832-496d-8d8a-cc05277d1ba1"
      },
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/html": [
              "<html>\n",
              "<head><meta charset=\"utf-8\" /></head>\n",
              "<body>\n",
              "    <div>            <script src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_SVG\"></script><script type=\"text/javascript\">if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script>                <script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>\n",
              "        <script charset=\"utf-8\" src=\"https://cdn.plot.ly/plotly-2.35.2.min.js\"></script>                <div id=\"e28c605f-1c23-4b65-8387-7f3ca74d0980\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"e28c605f-1c23-4b65-8387-7f3ca74d0980\")) {                    Plotly.newPlot(                        \"e28c605f-1c23-4b65-8387-7f3ca74d0980\",                        [{\"branchvalues\":\"total\",\"domain\":{\"x\":[0.0,1.0],\"y\":[0.0,1.0]},\"hovertemplate\":\"labels=%{label}\\u003cbr\\u003ecount=%{value}\\u003cbr\\u003eparent=%{parent}\\u003cbr\\u003eid=%{id}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"ids\":[\"sys1\\u002fArithmetic\\u002fno\",\"sys2\\u002fArithmetic\\u002fno\",\"sys1\\u002fCommon Sense\\u002fno\",\"sys2\\u002fCommon Sense\\u002fno\",\"sys1\\u002fSymbolic\\u002fno\",\"sys2\\u002fSymbolic\\u002fno\",\"sys1\\u002fArithmetic\\u002fyes\",\"sys2\\u002fArithmetic\\u002fyes\",\"sys1\\u002fCommon Sense\\u002fyes\",\"sys2\\u002fCommon Sense\\u002fyes\",\"sys1\\u002fSymbolic\\u002fyes\",\"sys2\\u002fSymbolic\\u002fyes\",\"sys1\\u002fArithmetic\",\"sys2\\u002fArithmetic\",\"sys1\\u002fCommon Sense\",\"sys2\\u002fCommon Sense\",\"sys1\\u002fSymbolic\",\"sys2\\u002fSymbolic\",\"sys1\",\"sys2\"],\"labels\":[\"no\",\"no\",\"no\",\"no\",\"no\",\"no\",\"yes\",\"yes\",\"yes\",\"yes\",\"yes\",\"yes\",\"Arithmetic\",\"Arithmetic\",\"Common Sense\",\"Common Sense\",\"Symbolic\",\"Symbolic\",\"sys1\",\"sys2\"],\"name\":\"\",\"parents\":[\"sys1\\u002fArithmetic\",\"sys2\\u002fArithmetic\",\"sys1\\u002fCommon Sense\",\"sys2\\u002fCommon Sense\",\"sys1\\u002fSymbolic\",\"sys2\\u002fSymbolic\",\"sys1\\u002fArithmetic\",\"sys2\\u002fArithmetic\",\"sys1\\u002fCommon Sense\",\"sys2\\u002fCommon Sense\",\"sys1\\u002fSymbolic\",\"sys2\\u002fSymbolic\",\"sys1\",\"sys2\",\"sys1\",\"sys2\",\"sys1\",\"sys2\",\"\",\"\"],\"values\":[192,191,241,285,202,213,453,454,141,97,62,51,645,645,382,382,264,264,1291,1291],\"type\":\"sunburst\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"legend\":{\"tracegroupgap\":0},\"margin\":{\"t\":60}},                        {\"responsive\": true}                    ).then(function(){\n",
              "                            \n",
              "var gd = document.getElementById('e28c605f-1c23-4b65-8387-7f3ca74d0980');\n",
              "var x = new MutationObserver(function (mutations, observer) {{\n",
              "        var display = window.getComputedStyle(gd).display;\n",
              "        if (!display || display === 'none') {{\n",
              "            console.log([gd, 'removed!']);\n",
              "            Plotly.purge(gd);\n",
              "            observer.disconnect();\n",
              "        }}\n",
              "}});\n",
              "\n",
              "// Listen for the removal of the full notebook cells\n",
              "var notebookContainer = gd.closest('#notebook-container');\n",
              "if (notebookContainer) {{\n",
              "    x.observe(notebookContainer, {childList: true});\n",
              "}}\n",
              "\n",
              "// Listen for the clearing of the current output cell\n",
              "var outputEl = gd.closest('.output');\n",
              "if (outputEl) {{\n",
              "    x.observe(outputEl, {childList: true});\n",
              "}}\n",
              "\n",
              "                        })                };                            </script>        </div>\n",
              "</body>\n",
              "</html>"
            ]
          },
          "metadata": {}
        }
      ],
      "source": [
        "# make a pie plot with the first layer being the Strategy and the second layer being how much of the direct_answer_clean for each strategy is yes or no\n",
        "fig = px.sunburst(data, path=[\"Strategy\", \"ability\", \"direct_answer_clean\"])\n",
        "# add the title that would explaion\n",
        "fig.show()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 564
        },
        "id": "lFzUDlCs7VMb",
        "outputId": "a45375a1-afd2-499d-8458-413f34aecc70"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "ability: Symbolic\n",
            "sys1 yes ratio: 0.23484848484848486\n",
            "sys2 yes ratio: 0.19318181818181818\n",
            "ability: Arithmetic\n",
            "sys1 yes ratio: 0.7023255813953488\n",
            "sys2 yes ratio: 0.703875968992248\n",
            "ability: Common Sense\n",
            "sys1 yes ratio: 0.36910994764397903\n",
            "sys2 yes ratio: 0.25392670157068065\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<Figure size 600x404 with 1 Axes>"
            ],
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkEAAAGHCAYAAAC3XYaZAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAVnRJREFUeJzt3Xlczdn/B/DXrSRaUSpbSZZCmUF2yRbZiRpDsm8jO2OtGLuxjJ2MMhgxlsGgGkqyM2PGd5C0GkuEypZU5/eHR/fnTqV7P92E+3o+Hj0eOp9zz31nPl2vOZ/zOR+ZEEKAiIiISMNolXQBRERERCWBIYiIiIg0EkMQERERaSSGICIiItJIDEFERESkkRiCiIiISCMxBBEREZFGYgj6SAghkJ6eDm7bRERE9GEwBH0knj17BmNjYzx79qykSyEiItIIDEFERESkkRiCiIiISCMxBBEREZFGYggiIiIijcQQRERERBqJIYiIiIg0EkMQERERaSSGICIiItJIDEFERESkkRiCiIiISCMxBBEREZFGYggiIiIijcQQRERERBqJIYiIiIg0EkMQERERaSSdki6AiIioJHl6hZV0CZ+k3ds7lHQJRcaZICIiItJIDEFERESkkRiCiIiISCMxBBEREZFGYggiIiIijcQQRERERBrpsw9BO3bswMiRI9GoUSOULl0aMpkMgYGBRRozMzMTDRo0gEwmQ506ddRTKBEREX1Qn/0+QbNnz0ZiYiJMTU1haWmJxMTEIo/p7++P27dvq6E6IiIiKimf/UxQQEAAEhIS8OjRI4waNarI4128eBFLlizBkiVL1FAdERERlZTPPgS1b98eVlZWahkrIyMDgwYNQsuWLTFmzBi1jElEREQl47O/HKZOM2fORFJSEo4cOQKZTFbS5RAREVERMAQpKTIyEqtXr8aKFStQo0aNki6HiIiIiuizvxymDi9evMDgwYPRrFkzjBs3rqTLISIiIjXgTJASpkyZgnv37uHYsWPQ0mJuJCIi+hwwBBUiIiICGzduxLJly1CrVq2SLoeIiIjUhNMahbh69SoAYOrUqZDJZApfABAdHQ2ZTAYTE5OSK5KIiIhUxpmgQtSrVw9Dhw7N99jWrVthbGwMd3d3lC1b9gNXRkREREXBEPSOlJQUpKSkwNTUFKampgDe7jPUvn37fPtv3boVFhYWCAgI+JBlEhERkRp89iEoICAAUVFRAIBr167J2yIiIgAALVu2xLBhwwAAa9euhb+/P3x9feHn51cS5RIREdEH8tmHoKioKAQFBSm0nTlzBmfOnJF/nxuCiIiISHPIhBCipIsgID09HcbGxkhLS4ORkVFJl0NEpDE8vcJKuoRP0u7tHUq6hCLj3WFERESkkRiCiIiISCMxBBEREZFGYggiIiIijcQQRERERBqJIYiIiIg0EkMQERERaSSGICIiItJIDEFERESkkRiCiIiISCMxBBEREZFGYggiIiIijcQQRERERBqJIYiIiIg0EkMQERERaSSGICIiItJIDEFERESkkRiCiIiISCMxBBEREZFGYggiIiIijcQQRERERBqJIYiIiIg0EkMQERERaSSGICIiItJIDEFERESkkRiCiIiISCMxBBEREZFGYggiIiIijcQQRERERBrpsw9BO3bswMiRI9GoUSOULl0aMpkMgYGBKo0RFRWFyZMno2HDhqhQoQL09PRQp04dTJ8+HampqcVSNxERERUvHakvfP36NY4dO4YePXpAJpMhMzMTP//8M548eYI+ffqgWrVq6qxTstmzZyMxMRGmpqawtLREYmKiymO4u7sjJSUFLVu2hJeXF2QyGSIiIrB06VL88ssvOHv2LMzNzYuheiIiIioukkLQv//+C2dnZyQkJODVq1fQ1dVFt27dEBYWBgDw8/PD2bNnUbduXbUWK0VAQABq1qwJKysrLF68GDNmzFB5jIkTJ2LgwIGoVKmSvE0IgbFjx2LDhg2YN28e1q1bp86yiTSap1dYSZfwSdq9vUNJl0D0SZF0OWz69Om4f/8+NmzYAF1dXYSGhiIsLAyrV6/GhQsXYGxsDF9fX3XXKkn79u1hZWVVpDGmT5+uEIAAQCaTYc6cOQCAU6dOFWl8IiIi+vAkzQSFhoZi2rRpGDFiBABg3759aNCgAcaNGwcAGDlypEbMjJQqVQoAoKMj+aoiERERlRBJM0GvX79WmBkJCwtDx44d5d+bmJggLS2t6NV95H788UcAUPjZiYiI6NMgKQTVrVsXwcHByM7ORlhYGBITE+Hq6io/HhYWBltbW7UVCQAvX75EUlKSWscsiqtXr8Lf3x8VK1bEtGnTSrocIiIiUpGk6zjTpk1Dnz59UL58eWRkZOCLL76Ai4sLHj9+jG+++QaHDx/G2rVrlRorIyMDBw8exN27d1G3bl24urpCJpPl6bd3714MGTIE2dnZUkpWq7i4OHTp0gXZ2dnYvXs3TE1NS7okIiIiUpGkENSrVy8cPHgQAQEBqFy5Mvz8/AAAKSkpOHjwIL799luMHj260HHu3r0LFxcXxMbGQggBmUwGW1tbbN68Gc7OzlJKK3bx8fFwcXFBSkoK9u3bBxcXl5IuiYiIiCSQvKK3e/fu6N69u0Kbra0tnj59Cj09PaXGmDFjBhITE7Fy5Uq4urri9u3bWLx4Mdq1a4dly5Zh4sSJUssrFnFxcXBxccH9+/exd+9edO3ataRLIiIiIokkrQny9PSU7wn0Lm1tbaUDEPD2LjMfHx/4+Pigdu3a6NKlCyIjIzF16lRMnjwZkydPllJesXg3AAUHB6NHjx4lXRIREREVgaQQdOLECfz9999FfvO0tDTUqlVLoU0mk2HRokXYsGEDfvjhBwwaNOiDrQNKSUnBzZs3kZKSotCeewns3r172L17N3r16vVB6iEiIqLiI+lymI+PD7Zu3YoRI0bA0NBQ8ptXq1YNFy5cwPDhw/McGzlyJCwsLNC/f388efIEbm5ukt4jICAAUVFRAIBr167J2yIiIgAALVu2xLBhwwAAa9euhb+/P3x9feXrnADAxcUFSUlJaNq0Kf7+++98A+C7/YmIiOjjJykEnTx5EtHR0TAxMQEANG/eXL5xYC6ZTIYTJ068d5wBAwbA398flStXhoeHB+zt7RWO9+jRA2FhYejevTsiIyOllIqoqCgEBQUptJ05cwZnzpyRf58bggqS+7yx8+fP4/z58/n2YQgiIiL6tMiEEELVF1lbW+d7G/t/xcfHv/d4VlYWPD09sX//fujp6eHly5f59rtx4wY6deqEf//996O4Rb44pKenw9jYGGlpaTAyMirpcohKFJ8dJg2fHSYNzzdpPofzTdJMUEJCgnreXEcHv/zyC86fP49Lly4V2M/Ozg5XrlzBP//8o5b3JSIiIiryQ6/u3buHpKQk1KlTR355LCcnB1payq+5btq0KZo2bfrePqamph/t3kFERET06ZF0dxgArF+/HpUrV0bVqlXRokULXLx4EQCwZs0atG3bFq9evVJbkURERETqJikEbdmyBd988w2++OIL+Pr64t1lRa1bt8a1a9fg7++vtiKJiIiI1E1SCFq1ahU8PDxw5MgRjBw5UuGYo6MjRo8ejR07dqilQCIiIqLiICkExcXFvXd9TrVq1fJsOEhERET0MZEUgipVqvTeu7lCQ0NhZWUluSgiIiKi4ibp7rDhw4dj5syZqFSpEjw8PAC83Rzx6dOnmDdvHg4cOID58+erNKYQAj/88AN++eUXPHjwIN/9gGQyGWJjY6WUTERERKRAUgiaPn06YmNjsWDBAixcuBAA0KlTJwBvw0z37t0xffp0lcZctGgRZs+eDUtLS9ja2qp0iz0RERGRqiSFIJlMhi1btmDw4MEIDg7G7du3kZmZiWrVqqFHjx7o3r27ymP++OOPcHV1xZEjR6CtrS2lLCIiIiKlFWmzxObNm6N58+ZqKeTevXsYP348AxARERF9EJKuOdnb22PSpEkICQlBRkaGWgqpX78+/v33X7WMRURERFQYSSGoQoUK2LBhAzp37ozy5cujQ4cOWL58Oa5duya5kEWLFmHDhg0IDAzEixcvJI9DREREpAxJIej06dNITU3FqVOnMHPmTGhra2PevHlo0KABKleujMGDB2P37t0qjdm8eXM0adIEQ4cOhZGREbS1tfN86egU+VFnRERERACKsCaodOnSaNWqFVq1agUAyM7ORkBAAKZPn46goCBs374dnp6eSo83bdo0nDhxAg0aNICtrS20tbWhp6cntTwiIiKi9yrS1MqrV68QFhaGI0eO4OjRo7h//z5q1KiBoUOHokePHiqN9csvv6BHjx44cOBAUUoiIiIiUoqkELRmzRocPXoUp06dwps3b+Dk5IRx48ahe/fusLOzk1TIs2fP0L59e0mvJSIiIlKVpBA0fvx4AIC+vj62bNkCDw+PIq/XcXZ25t1hRERE9MFISi5//fUXjhw5giNHjmDQoEEYM2YMOnTogK5du6JLly4wMzNTeczx48ejU6dO0NLSwsCBA1G2bNl8+1WrVk1KyUREREQKZEIIUZQBUlJScOzYMfz2228ICQnBs2fP0KhRI7Rs2RLLly9Xepx3H5Mhk8kK7JffM8U+B+np6TA2NkZaWhqMjIxKuhyiEuXpFVbSJXySdm/vUNIlfJJ4vknzOZxvRb7n3NTUFB4eHqhatSpsbGywePFiXLx4EZcuXVIpBM2dO/e94YeIiIhInSSFoMzMTJw/fx4REREIDw/HhQsX8Pr1axgZGaF3795wdXWFq6urSmP6+flJKYWIiIhIEkkhyNjYGJmZmZDJZGjcuDGmTZuGjh07okmTJkV69pcQAqGhobhy5QqSkpIwbtw41K1bF6mpqUhKSoKDg4PksYmIiIjeJSkE9e/fH506dUL79u1Rrlw5tRTy9OlTuLm54eLFixBCQCaToXfv3qhbty6+//57rFq1ChcuXIC9vb1a3o+IiIg0m6THZmzduhV9+/ZVWwACgG+//RY3btxAcHAwbty4gXfXa8+ZMwf29vaYMmWK2t6PiIiINJukEAQAmzdvRoMGDfDmzRsAwOHDh1G9enUYGxvDx8cHOTk5Ko138OBBTJw4Ee7u7jAxMVE4pquriwEDBuD06dNSyyUiIiJSIHkmaNSoUUhLS4OWlhaSk5PRv39/6OjooFWrVli3bh3WrFmj0pjPnj2DsbFxgcdzwxYRERGROkgKQT/88ANatWqFW7duQVtbGzt27IC2tjYuX76MI0eOwMPDAwEBASqN2bBhQ+zYsQOvX7/OcywtLQ0bN25E48aNpZRLRERElIekEBQTE4Ovv/4apUqVAgAcP34crq6u8pmcli1bIj4+XqUxFy1ahP/9739wdHTEsmXLIJPJcPz4caxcuRJffPEF4uPjMWfOHCnlEhEREeUhKQRVrFgRDx48APB2p+OoqCi0a9dOfvzRo0fQ19dXacyWLVvi2LFjAIAVK1ZACIFVq1Zh8uTJePXqFXbv3g0XFxcp5RIRERHlIekW+U6dOmHp0qUoXbo0zp07By0tLfTo0QMAEB0djfXr16Nt27Yqj+vi4oKbN2/i6tWriImJQWZmJqpVq4YmTZpAV1dXSqkAgB07duD06dO4cuUKrl27hszMTGzbtg3e3t4qjZOTk4N169Zh8+bNuH37NgwMDNC+fXssWLAANjY2kusjIiKiD09SCJo/fz7++OMPzJgxAzKZDKtXr4a5uTmio6NhZ2cHMzMzLFy4UKUxMzMzUapUKchkMjRo0AANGjRQOP7mzRtkZGTA0NBQ5Xpnz56NxMREmJqawtLSEomJiSqPAQAjR45EQEAA6tatCx8fH9y7dw979uxBaGgozp8/j5o1a0oal4iIiD48SZfDzMzMcP78eVy+fBl3797FN998AwAwMTHBnDlz8Mcff6B69eoqjVmmTBns2LGjwOPbtm2DlZWVlHIREBCAhIQEPHr0CKNGjZI0Rnh4OAICAtC6dWv88ccfWLJkCX766SccPHgQT548kf8dEBER0adB8gNUtbS08OWXXyq0mZubw9/fX+kxkpKSkJCQAODtIzPCw8PzDTqvX7/G/v37kZWVJanW9u3bS3rdu7Zs2QLg7SzYu5fmOnfujDZt2iA0NBRJSUmoVq1akd+LiIiIil+RniJ/+/ZtJCcnIzs7O9/jrVu3fu/rt23bBn9/f8hkMshkMgQFBSEoKCjfvkKIEt0xOiIiAvr6+mjRokWeY66uroiIiMCpU6cwcODAEqiOiIiIVCUpBEVHR8Pd3R3Xr1/P93jus78KCke5evbsCWtrawghMGTIELRp0waDBg3K069UqVKoVasWGjVqJKXcInvx4gXu37+PevXq5fuA2Ny1QDExMR+6NCIiIpJIUggaPXo0YmNjMWXKFNSpUwdaWtKevuHo6AhHR0cAwJAhQ+Dt7Q0vLy9JYxWntLQ0AChwR2sjIyOFfp8jT6+wki7hk7R7e4eSLoGIiAogKQRduXIFY8eOxZIlS9RWyJ07d1C+fHm1jUdERET0PpKmcAwMDNS+L07lypURGRmJnj17yhdAX7x4ES4uLnB0dMSKFSvU+n6qyJ0BKmimJz09XaEfERERffwkhaAhQ4Zgx44dku/Wys+hQ4fg5uaGsLC3l13S0tLQtWtX/PXXX8jOzsbUqVOxfft2tb2fKvT19WFpaYn4+Ph81znlrgXiPkFERESfDkmXw6ysrHDhwgWULl0a7u7u+PLLL2FpaZmnnyrrexYuXIh69eohMjISOjo62LVrF54/f45bt26hSpUqcHNzww8//FBia4acnZ2xe/dunDlzJs9dbyEhIQAKvxuOiIiIPh6SQtCIESPkf967dy/27t2bp49MJlMpsFy7dg3Lli2TX1I6evQo2rdvjypVqgAA3NzcMHPmTCnlqiQlJQUpKSkwNTWFqampvH3EiBHYvXs35syZg7CwMPleQceOHUNERAQ6duwoeTNHIiIi+vAkhaDw8HB11wETExO8ePECwNvNEU+dOoX58+fLjz9//lz+1HpVBQQEICoqCsDbsJXbFhERAeDtw1uHDRsGAFi7di38/f3h6+sLPz8/+RguLi4YNmwYAgIC8OWXX6JLly64f/8+goODUb58eaxZs0ZSbURERFQyJIUgZ2dnddeB1q1bY9myZbCyskJ4eDgyMjLkD2VNTU3F5s2b0axZM0ljR0VF5dmE8cyZMzhz5oz8+9wQ9D6bNm1C/fr1sXnzZqxevRoGBgbo1asXFixYgBo1akiqjYiIiEqGTAghpL44IyMDjx49wn+HSE1NRVxcHHr27Kn0WAkJCXBxcZE/3HT69OlYtGgR4uLiUK9ePWhpaeH06dP44osvpJb7UUtPT4exsTHS0tLk+w59TLhPkDTcJ0ganm/S8HyThuebNJ/D+SZpJujly5cYPnw49uzZg5ycnHz71K1bV6UQZG1tjb///hthYWGoXLkymjRpAgDQ1dVF3759MWnSJPnGikRERERFJSkEzZo1Cz///DO6du0KKysrrFu3Dh07doSdnR1Wr14NmUyGuXPnqjTmq1evYGhoiN69eyu0V6lSpcDniRERERFJJWmfoL1796J///44dOgQZs2aBQCYNGkSVq5ciaysLDg5OeHChQsqjWlmZgYPDw/88ssvePXqlZSyiIiIiJQmKQQ9evRIvkj5v8/N0tLSQv/+/VXe2LB169Y4fPgw+vXrBzMzM/Tt2xd79uyR3zFGREREpE6SQlDVqlXx119/AQDKli0LMzMzXL16VX786dOnKoeXo0eP4vHjxzh48CD69++Pc+fOwdPTExUrVkSfPn3w888/4/nz51LKJSIiIspD0pqgUaNG4dtvv4WNjQ2+/fZb9OrVC2vWrIGNjQ20tbWxevVqSbezlylTBt27d0f37t0BAH/++ScOHz4MPz8/HDx4EADQvXt3eHp6ok+fPtDRkVQ+ERERkbSZoPHjx+Orr77Czz//DACYPXs2ypcvj+HDh2PIkCHQ0dHBqlWrJBeVnZ2N33//HVu2bMGmTZsAAEIICCFw9+5d9O/fH46OjoiNjZX8HkRERKTZJE2llCpVCj/99BNSUlIAvL2D68aNGzh58iTevHmDtm3bqrzXTUZGBo4fP44DBw7gyJEjSE1NhY6ODjp06IAlS5agZ8+eMDAwAABcv34d7dq1w9ChQ+W7PhMRERGpokjXk959tlaZMmXQpUsXyWNVqFABGRkZkMlkaNOmjfySV7ly5fL0tbe3h5eXF9atWyf5/YiIiEizfTSLar744gt4enqib9++MDc3L7T/119/jUaNGn2AyoiIiOhz9NGEoOPHj8svd+V6+vQpbt26BWtr6zzByMHBAQ4ODh+yRCIiIvqMSFoYrU5nz56Fo6Njnoeyrlq1CpaWlmjevDmqVKmCqVOnllCFRERE9Dkq0RB07do1tGvXDq9evcKAAQPk7b///jsmTZqEatWqYfLkyWjZsiVWrFiBAwcOlGC1RERE9Dkp0uWw6OhoXLlyBUlJSejXrx9sbGyQmZmJlJQUVKpUqdDXz58/HzY2Nrhy5Qr09PTk7b6+vjA3N8elS5dgbGwMIQRatWqFtWvXolevXkUpmYiIiAiAxJmgjIwM9OvXD/b29hgwYABmzZqF27dvA3gbbBo2bIikpKRCx4mMjISXl5dCAEpISMC5c+cwYsQIGBsbAwBkMhl69+6NGzduSCmXiIiIKA9JIWju3Lk4dOgQlixZgtDQUAgh5MemTJkCExMTTJs2rdBxnj59igoVKii07d+/HzKZDO7u7grt+vr6ePLkiZRyiYiIiPKQFIJ+/vln+Pj4YMqUKahfv77CMWNjYwwdOhShoaGFjlOlShXExMQotG3fvh0ODg55xo2JiYGVlZWUcomIiIjykBSCUlJSUK1atQKP6+np4fXr14WO079/f2zatAkRERF4+fIl5s6di2vXrmHSpEkK/ZKTk7Fjxw7069dPSrlEREREeUgKQXXr1sWBAwcULoPlev36NQIDA1GvXr1Cx5k2bRrMzc3Rrl07GBoa4rvvvkPXrl0xcOBAeZ9ly5ahadOmyMrKwvjx46WUS0RERJSHpLvD5syZg969e6Nt27bo06cPZDIZoqOj8eLFCyxduhR//vkngoODCx3H0NAQly5dwsaNGxEXF4cGDRpgxIgRCn1+/PFHGBoaYt++fQqP6SAiIiIqCkkhqEePHggKCsKECRNw6tQpAMCECRMghEDp0qWxfPnyPAubC2JkZPTeRdTnz5+X3yVGREREpC6S9wkaMGAAevXqhbCwMMTExCAzMxPVqlWDq6srKlasqLYCGYCIiIioOEgKQQ8ePICFhQX09fXRs2dPNZdEREREVPwkLYyuUqUK2rVrhy1btuDx48fqromIiIio2EkKQRMmTMCdO3cwcuRIVKpUCZ07d0ZQUBDS09PVXR8RERFRsZAUgpYvX45bt27hn3/+gZ+fH1JTUzFkyBCYm5ujZ8+e+Pnnn/Hy5Ut110pERESkNkV6gKqdnR3s7OwwY8YMPHjwAIcOHYKvry8OHz4MmUyGrKwslcf86aefsHfvXvmzyGxtbdG3b1+FvYOIiIiIiqpIIQgAhBCIjIzEgQMH8OuvvyI5ORmmpqb48ssvVRonIyMD3bp1w8mTJ6Gvr4+aNWtCS0sLkZGR+O233/DTTz/h8OHDKF26dFFLJiIiIpL+FPlDhw5hyJAhqFixItq2bYtdu3ahU6dOCA8PR3JyMo4fP67SmAsWLMCJEyfw3Xff4eHDh7hy5QouXbqEhw8fYsWKFfJjREREROogKQRVqFABvXr1woEDB9C1a1ccPXoU9+/fx4YNG+Ds7AyZTKbymDt27MDIkSMxY8YM6Onpydt1dXUxfvx4jBkzBjt27JBSLgDg0qVLcHNzg4mJCfT19dG0aVPs2bNHpTHu3buH8ePHw97eHvr6+jA3N0fLli3x008/ITs7W3JtRERE9OFJuhzWrVs3fPXVV+jcuTN0dXXVUsj9+/ffewnNwcEBAQEBksYODw+Hq6sr9PT04OnpKX8Mh4eHB+7cuYPJkycXOkZcXByaNGmCx48fw9XVFd26dUN6ejoOHjwILy8vnDx5Etu2bZNUHxEREX14kmaCdu/ejR49eqgtAAGAlZUVTp48WeDxyMhIWFtbqzxuVlYWhg8fLl9ftHnzZnz//ff466+/UKtWLcycOROJiYmFjrN8+XKkpKRg5cqVOHbsGJYsWYINGzbgxo0bqFatGgIDA5Uah4iIiD4OSs8ELV++HAMGDICFhQXmzZtXaH+ZTIY5c+YoXciwYcMwffp0mJqaYvbs2TA3NwcAPH78GIsWLcKuXbuwfPlypcfLdfLkScTGxmLw4MFo0KCBvN3Y2BgzZ86Et7c3goKCMHfu3PeOExcXBwBwc3NTaDcxMUHLli2xa9cupKSkwMrKSuUaiYiI6MNTOgQtW7YM9vb2cHNzg5+fX6H9VQ1BkydPxj///IN169Zh/fr1qFChAmQyGVJSUiCEwKBBgzBx4kSlx8sVEREBAOjYsWOeY66urgAgfwjs+9SrVw8hISE4evQoxo8fL29PTU3FmTNnYGFhAXt7e5XrIyIiopKhdAi6fPkyKleuDACIj49XeyFaWloIDAzE4MGDsXfvXsTGxgIAatSogb59+8LZ2VnSuDExMQCAmjVr5jlmYWEBAwMDeZ/3mTp1Kg4fPoyJEyfi+PHjcHBwkK8JKlu2LA4cOIAyZcpIqpGIiIg+PKVDUNWqVeV/Ls5LPs7OzpIDT37S0tIAFPw0eiMjI3mf9zE3N8e5c+cwYMAAHDt2TL4FQJkyZTBq1Cg4OjqqrWYiIiIqfpIWRjdv3hzHjh0r8PjevXvh4uJS4mOq0+3bt9GiRQs8evQIp0+fxrNnz3Dnzh3MnTsX8+fPR7t27XibPBER0SdE0i3y58+fR0pKSr7H3rx5g9OnT+PcuXMlPibw/zNABc32pKeno1y5coWO4+3tjcTERMTFxcHCwgIAYGBggG+//RbJyclYtWoVdu/eja+//lrlGomIiOjDU3omyN/fH9ra2tDW1gbwNhTkfv/ul56eHtatW4e2bduWyJj/lbsWKL91Pw8ePMDz58/zXS/0rmfPnuHMmTOws7OTB6B35c5Q/fnnnyrXR0RERCVD6ZmgBg0awMvLCwAQFBQkv2Prv0qVKoVatWph1KhRJTLmfzk7O2PRokUIDQ2Fp6enwrGQkBB5n/fJzMwEgAJnqh49egQAfK4ZERHRJ0QmhBCqvij3Tq7cAKMOxTEm8HazxNq1a+Pu3bs4f/68fK+gtLQ0ODk5ISEhAdHR0fKNGO/fv4+0tDRYWloqLKauU6cOoqOjsWXLFgwbNkzenpqaimbNmuHmzZsICwtD+/btJdWZnp4OY2NjpKWlwcjISPLPW1w8vcJKuoRP0u7tHUq6hE8SzzdpeL5Jw/NNms/hfJO0MPqnn35C69at1VpIcYwJADo6OggICEBOTg5at26NESNGYPLkyXB0dMStW7ewcOFChZ2oZ8yYATs7Oxw4cEBhnJUrV0JHRwfDhw9H+/btMXXqVAwbNgy1atXCzZs30adPH8kBiIiIiD48pS+HxcXFoXr16pDJZGjVqhUAICkp6b2vqVatmtKFFOeCYhcXF0RFRcHX1xfBwcF48+YN6tevjyVLlsDDw0OpMTp37oyzZ89i2bJliIqKwqlTp6Cnpwc7OzvMnTsXo0ePLrb6iYiISP2UDkHNmjXDtm3b4ObmBmtra6WeFP8x3TLu5OT03lvwcwUGBiIwMDDfY40bN1b5yfNERET0cVI6BM2cOVP+lPe5c+cqFYKIiIiIPlZKh6B3n5elzLPDiIiIiD5mkhZGExEREX3qJO0YnZ2djZUrV+LgwYNITk7Od+2PTCaTPwSViIiI6GMjKQRNmjQJa9asgaWlJWxtbaGlxQklIiIi+rRICkEHDhxA586dcejQIfkjL6SIjIxU+TXFsZcQERERaR5JIejJkydwc3MrUgACgIkTJ+Lq1atK9RVCQCaTfVS33RMREdGnS1IIcnZ2RnR0dJHffMiQIRg3bhwWLFiASpUqFXk8IiIiImVJCkE//PAD2rRpg9q1a2P06NGS1wR9/fXXmDp1KnR0dPJ9cCoRERFRcZGUXmrWrIm7d+/Cx8cHpUqVgra2dp4vHZ3C85WJiQl69uxZ4A7NRERERMVF0kyQl5eXwo7RWVlZSoWe/EycOBHr16/Ho0ePYGZmJmkMIiIiIlVJSi7qnLlp3Lgxtm3bprbxiIiIiJTBDX6IiIhIIyk9E9SjRw989913qF+/Ptq2bVtof5lMhhMnThSpOCIiIqLionQIunv3Ll68eAEAiIuL41PkiYiI6JOmdAi6fPmy/M8JCQnFUQsRERHRB1PomqCcnBy4u7vj0qVL8ra4uDj5rBARERHRp6jQEJSeno79+/cjJiZG3lazZk3s27evWAp6/fo17t69i6SkpHy/iIiIiNSh0MthJiYmqFixIpYsWQIrKytUrVoVQggkJCQUGkqqVaumdCFPnz7FkCFDcPToUWRlZRXYj88OIyIiInVQak3Q4sWLMXz4cPkT3GUyGfz9/eHv7//e16kSWCZPnoxDhw7B3d0dderUkfwoDiIiIiJlKBWCvL290bRpU4SFheHJkyfw9/eHlZUVvL291VbI8ePHMXz4cGzcuFFtYxIREREVROm7w+rUqYM6deoAgHwWyMvLS22FPHv2DA4ODmobj4iIiOh9JF1zysnJUWsAAoC2bdsiKipKrWMSERERFUTaU08B3Lt3D2vWrMGVK1eQlJSELVu2oFWrVvj7779x8eJFDBs2TKXxVq9ejdatW6Np06Zo06YNOnfunO+GjLnrkoiIiIiKQlII+ueff9CqVSu8ePECNWvWRExMDF69egUAiIiIwMSJE1GxYkV0795d6TG1tLRQrlw5XLx4ERcvXsSyZcsUjgshIJPJeHcYERERqYWkEDR16lRUrFgR4eHhkMlkqFSpkvyYj48Pzpw5Az8/P5VC0Lhx43Dz5k2MHz8e9evXh7a2tpTSiIiIiJQiKQRFRUVh/vz5sLS0RHJycp7jbdu2xaFDh1Qa89SpUxgxYgRWrlwppSQiIiIilUhaGK2lpYXnz58XePzevXswMDBQaUxdXV3Uq1dPSjlEREREKpMUgjp16oS1a9fi1q1b8rbcRcxXrlzBypUr0aFDB5XGdHd3R0REhJRyiIiIiFQm6XLYsmXL0Lx5c9SrVw8ODg6QyWSYPn061q5di6NHj8LExAQLFixQaUw7OzuMHz8ee/fuxYABA1CjRg1YWVnl6afuW/OJiIhIM0kKQVWrVsUff/yBWbNmYffu3RBC4OrVq7hx4wa6deuGZcuWoXr16iqNOX78eABv9yDavn17vn1kMpnkEHTp0iX4+vri7NmzePPmDerXr49JkyahX79+Ko3z8OFDLFq0CEeOHMGdO3egr6+PWrVqwcvLC6NHj5ZUGxEREX14kvcJMjMzw+bNm7Fp0yakpKQgMzMTFStWRKlSpSSNFx4eLrUUpcZ2dXWFnp4ePD09YWhoiH379sHDwwN37tzB5MmTlRrn6tWr6NixI54+fYouXbrA3d0dz58/x40bN3D48GGGICIiok+I5BCUSyaTwczMrMiFODs7F3mM/GRlZWH48OHQ0tJCZGQkGjRoAACYO3cunJycMHPmTLi7u+d76e1d6enp6NGjB4C3657++4iPrKysYqmfiIiIiodSIaht27YFHouLi8OdO3cAALVq1YKlpSWAt+HoxIkTKhWj7l2oAeDkyZOIjY3F4MGD5QEIAIyNjTFz5kx4e3sjKCgIc+fOfe8469evR1JSErZu3ZrvM850dIqcJ4mIiOgDUupf7ri4uDyPsEhMTMzTLzo6GtHR0ZDJZGjevLlKhfzzzz9o3bo1nj9/rrZdqHNfCwAdO3bMc8zV1RXA2z2KChMcHAyZTIY+ffogOjoaoaGhePXqFerUqYNOnTpBV1dXpbqIiIioZCkVghISEhS+37p1K2bPno0zZ87AxsZG4djNmzfRvn17DB06VKVCpk6dCjMzM/zvf/9T2y7UABATEwMAqFmzZp5jFhYWMDAwkPcpSGZmJq5duwYzMzOsWbMGvr6+yMnJkR+3sbHBwYMHUb9+fZVqIyIiopIjaZ+gpUuXYsyYMXkCEADUqVMH33zzDRYvXqzSmFFRURg9ejQsLS3zfXBq27ZtcePGDZVrTUtLA/D28ld+jIyM5H0K8uTJE2RnZ+Px48eYN28eli5diuTkZPz777+YM2cO4uPj0a1bN2RkZKhcHxEREZUMSSEoMTERJiYmBR43MDDI93LZewsphl2o1SV31ic7OxtjxozB5MmTUbFiRVSuXBnz5s1D3759kZiYiF9++aVE6iMiIiLVSQpBtra22LRpE548eZLnWGpqKjZt2pTv5af3KY5dqIH/nwEqaLYnPT29wFmi/44BIN/Lcbltly9fVrk+IiIiKhmSbmnKnf2ws7PD119/jZo1a0Imk+H27dvYtWsXHj58iAMHDqg0ZnHsQg38/1qgmJgYNGzYUOHYgwcP8Pz5czg5Ob13DH19fVSuXBl3797NdwYsty13ITcRERF9/CTNBPXu3RuHDx+Gubk5Vq1ahbFjx2LMmDFYsWIFbG1tceLECXTr1k2lMXN3ofb29satW7fku1CHhoaiW7duOH/+vMq7UAP/v/9QaGhonmMhISEKfd4nd5uA69ev5zmW22Ztba1yfURERFQyZEIIUZQB7t27h6SkJACAlZWVfJ+gohBCqGUXauDtJoa1a9fG3bt3cf78efleQWlpaXByckJCQgKio6PlAeb+/ftIS0uDpaWlwmWws2fPokWLFqhbty6ioqLksz8PHjxAo0aNcP/+fdy4cQO1atWSVGfuZbm0tDQYGRlJ/nmLi6dXWEmX8EnavV31S7jE800qnm/S8HyT5nM43yTNBL2rUqVKaNq0KZo2bVqkABQaGor79+8D+P9dqCtXriwPQLGxsQgLU/1E1dHRQUBAAHJyctC6dWuMGDECkydPhqOjI27duoWFCxcqzODMmDEDdnZ2eS7nNW/eHJMmTcI///wDBwcHjB07FiNGjICjoyPu3r2L7777TnIAIiIiog+vyCFIXTp37ozff/+9wOMnTpyQP7ZCVS4uLoiKikKLFi0QHByMDRs2wNzcHLt371b6uWEA8P3332Pbtm0wNzdHYGAgdu3ahVq1amH//v2YMWOGpNqIiIioZJTosx4iIyPlOzoLIeDr64v4+Pg8/V6/fo09e/a897b8wjg5OeHYsWOF9gsMDERgYGCBx729veHt7S25DiIiIvo4lGgICg8Ph7+/P4C3l8ASEhLg5+eXb19jY2Ns2rTpA1ZHREREnzOlQtCRI0fQsGFDtSx6fteECRPg7e0NIQRsbGzg5+eHQYMG5elXqlQpmJubQ1tbW63vT0RERJqr0DVB6enp6NGjh8JGgK1bt5bfXl4UxsbGsLKygrW1NWrUqAEHBwdYWVnl+apUqRIDEBEREalVoSGoTJky0NbWxpEjR+RtUVFRePjwoVoLiYmJQc+ePfO0Z2Rk4OnTp2p9LyIiIqJCL4eVKlUK/fr1w5YtW3DixAlUq1YNwNsFwtu2bSvwdTKZDCdOnFC6ECEEZs6ciQ0bNuDhw4fQ1dXFpk2bMGnSJGRkZKBLly4IDg5GmTJllB6TiIiIqCBKrQnasmULqlWrhpCQECQkJAB4G1ryu5NLquXLl2PJkiVo1qwZdHR0EB8fDx8fHzRu3BiOjo7YvHkzlixZUuDCaSIiIiJVKBWCypQpg4ULF2LhwoUA3j7xPTAwEF5eXmorJCAgAN27d8fBgwcBAD/99BNMTExw4sQJlC5dGtnZ2fj5558ZgoiIiEgtJG2WuG3bNrRs2VKthSQlJaFr167y70NDQ9GpUyeULl0aANCgQQPcuXNHre9JREREmkvSPkHv3sb+xx9/4Pbt2wAAW1tbfPnll5IKqVKlCqKjowEAycnJuHjxIkaMGCE/Hh8fj3Llykkam4iIiOi/JG+WeOLECYwePRqxsbHIfQarTCZDjRo1sHHjRvlT15XVp08frFixAs+fP8fVq1dhYGCAHj16ICsrCydOnMDGjRvh4eEhtVwiIiIiBZJC0NmzZ+Hm5gYzMzPMmzcPDg4O0NLSwrVr17Bp0ya4ubkhIiICTZs2VXrMOXPm4NatW9i0aRMMDAywbds2GBsbIzo6Gp07d0bNmjWxYMECKeUSERER5SEpBPn6+qJ69eqIioqCqampvL1Lly4YPXo0WrRogblz5yI0NFTpMfX19bF//348efIExsbG8s0RzczMsHXrVvTr1w/6+vpSyiUiIiLKQ9LC6PPnz2PMmDEKASiXsbExRowYgXPnzqk05qlTpyCEQPny5RV2hy5fvjwGDx7MAERERERqJSkEaWtr4/Xr1wUez8zMhI6OapNMLi4uqFSpEr755htERETI1xkRERERFQdJIcjZ2RmrVq1CYmJinmP37t3DDz/8ABcXF5XGXLx4MWrVqoVNmzahXbt2sLS0xJgxYxAeHo6cnBwpZRIREREVSNKaoIULF6JZs2aoX78+Bg4ciLp160Imk+H69evYsWMHhBDyjRWVNW3aNEybNg2pqak4evQojhw5guDgYGzcuBFmZmbo1asX3N3d0bZtW2hpScpuRERERHKSQlDdunURFRWF8ePHY8OGDQrHnJ2d8cMPP6BOnTqSCjIxMUH//v3Rv39/5OTkICAgAKNGjcKWLVuwZcsWlCtXDn379sWkSZNQs2ZNSe9BREREJHmfIAcHB4SHhyMlJQWxsbEAABsbG5iZmRW5qCtXruDAgQM4cOAAbt68CZlMhmbNmsHR0RGGhoYIDg7G9u3bsX//fri6uhb5/YiIiEjzSA5BuUxNTfO9S0xVkZGR2L9/Pw4ePIg7d+5ACIEvv/wSS5YsgYeHB6pWrSrv6+fnh+bNm2PixIm4fv16kd+biIiINE+RQ5C6tGnTBgBgb28Pf39/eHp6wtbWNt++enp66NatG77//vsPWCERERF9Tj6aEDRr1ix4eHigXr16SvWfOHGiwrPFiIiIiFTxUYSg5ORktGvXLk8A+u233/C///0PNjY26N27t8ImiiYmJjAxMfnAlRIREdHnosRD0Pfff49Zs2bB3t4ef/zxBwBACIE+ffrg119/hRACMpkMTZo0wcmTJ6Gnp1fCFRMREdHnoEgb7pw+fRo+Pj5wc3ODm5sbfHx8cPr0aaVff+jQIUydOhV9+vTB6tWr5e0bN27EwYMH0bdvXxw5cgSzZs3ChQsXsHHjxqKUS0RERCQnExKeT5GTk4Nhw4YhKCgIQgiUK1cOWlpaePz4MWQyGby9vREQEACZTPbecdq0aYPSpUsjJCREob1GjRowNDTE1atX5W2enp64ffs2Ll++rGq5n4T09HQYGxsjLS0NRkZGJV1OHp5eYSVdwidp9/YOJV3CJ4nnmzQ836Th+SbN53C+SZoJWrlyJQIDAzFs2DDcuXMHjx8/xqNHj3D//n34+PggMDAQK1euLHScv/76C926dVNou3z5MuLj4zFs2DCFdmdnZ8TFxUkpl4iIiCgPSSEoICAA/fr1w6ZNm1C5cmV5u7m5OVauXImvvvoKW7ZsKXScjIwMlCpVSqFt37590NbWRp8+fRTas7OzkZmZKaVcIiIiojwkhaD4+Hi0a9euwOPOzs5ISEgodBw7OzucOXNG/v3Lly8RFBSE9u3bw9LSUqHv5cuXYW9vL6VcIiIiojwk3R1mYWGBP//8s8Djf//9d54Qk59x48Zh+PDhsLGxgZOTE9asWYOHDx9i7ty5Cv2ioqKwd+9epS6xERERESlD0kxQ//79sWnTJnz//fd48+aNvD0rKwtr167Fxo0bMWDAgELH8fb2Rp8+fTBv3jx069YNoaGhWLRoEZo1aybvk/vk+Fq1amHw4MFSygUAXLp0CW5ubjAxMYG+vj6aNm2KPXv2SB7v6dOnqFy5MmQyGTp16iR5HCIiIioZkmaC5syZgwsXLmDq1KlYsGABbG1tIZPJEBsbiydPnqBt27aYNWtWoePIZDIEBwdjypQpiI2NhYODQ55LXm/evMGoUaMwb968POuHlBUeHg5XV1fo6enB09MThoaG2LdvHzw8PHDnzh1MnjxZ5TG/+eYbpKWlSaqHiIiISp6kEFSmTBn8/vvv2L59O3755Rfcvn0bANC0aVP07dsXAwcOhJaW8pNMjRs3RuPGjfM99uuvv0opUS4rKwvDhw+HlpYWIiMj0aBBAwDA3Llz4eTkhJkzZ8Ld3R1WVlZKj7lv3z7s2rULa9euxTfffFOk+oiIiKhkSN4sUSaTYdCgQTh8+DBu3LiBGzdu4MiRIxg0aJBKAai4nTx5ErGxsejfv788AAGAsbExZs6ciczMTAQFBSk93qNHjzB69GgMHDgQXbp0KYaKiYiI6ENQOq28ePGiOOsoNhEREQCAjh075jnm6uoKADh16pTS440aNQra2toKO1wTERHRp0fpEGRvb4/ff//97Yu0tKCtrf3eLx2dEn8sGQAgJiYGAFCzZs08xywsLGBgYCDvU5gdO3Zg//792LhxI8qVK6fWOomIiOjDUjqpuLu7w8bGBgDg5eVV6CMxPha5i5eNjY3zPW5kZKTUAud79+7Bx8cHX331FXr06KHWGomIiOjDUzoEff/99/I/BwYGFkctH7Vhw4ahVKlS+OGHH0q6FCIiIlIDSdesQkNDUb9+/QI3RIyLi0NsbCw6dFDt4WqXLl3Cr7/+iuTkZGRnZ+c5LpPJsHXrVpXGzJ0BKmi2Jz09vdBLW0FBQTh27Bj27t0LU1NTld6fiIiIPk6SQlDnzp0RGBiIgQMH5nv8999/x4QJE/Dy5UulxwwICMCoUaOQk5NTYB8pISh3LVBMTAwaNmyocOzBgwd4/vw5nJyc3jtG7u7Yffv2zfd4SEgIZDIZHB0dcfWdJ98TERHRx0vpEBQZGSm/00oIAV9fX8THx+fp9/r1a+zZswcmJiYqFfL999/Dzs4Oe/bsQZ06ddS25sjZ2RmLFi1CaGgoPD09FY6FhITI+7xPs2bN8Pz58zztz58/R3BwMKpUqQJXV1dUq1ZNLTUTERFR8VM6BIWHh8Pf3x/A2xmZhIQE+Pn55dvX2NgYmzZtUqmQxMREzJs3D3Z2diq9rjDt2rWDjY0Ndu3aBR8fH/leQWlpaVi4cCF0dXXh5eUl73///n2kpaXB0tJSfinNw8MDHh4eecZOSEhAcHAw6tati4CAALXWTURERMVL6VvkJ0yYgPj4eMTFxUEIAT8/P8THx+f5+vfff5GSkoJ+/fqpVIidnR0yMzNV/gEKo6Ojg4CAAOTk5KB169YYMWIEJk+eDEdHR9y6dQsLFy6EtbW1vP+MGTNgZ2eHAwcOqL0WIiIi+ngoPRNkbGwsnxmpUaMGHBwcVHrURGEWLVok34XZ0dFRbeMCgIuLC6KiouDr64vg4GC8efMG9evXx5IlS/Kd4SEiIqLPn6SF0cpuLqiKRYsW4dGjR/jyyy/lbf9dqyOTyXDixAlJ4zs5OeHYsWOF9gsMDFR6CwBra2sIISTVQ0RERCVL8rbO//vf/7Br1y4kJyfnCQKpqal4/vw5QkNDlR4vPj5ePrOUmJgobyMiIiIqDpJC0MmTJ9GlSxe8fv0awNsZmneDkEwmQ7du3VQaMyEhQUopRERERJJIetz73LlzUbFiRVy+fBl3796FEALHjx9HTk4OIiMjoa+vjwkTJqi5VCIiIiL1kTQTdPXqVcyaNQtffvklUlJSAABZWVkAgJYtW2L48OH49ttvcf78+feOs3z5cgwYMAAWFhaYN29eoe8rk8kwZ84cKSUTERERKZAUgnKfFA8AFSpUgK6uLv7991/58Ro1amD9+vWFjrNs2TLY29vDzc2twD2H3sUQREREROoiKQQ5Oztj27ZtGDlyJIyNjeHg4ICdO3di+PDhkMlk+O2332Bubl7oOJcvX0blypUBcBE0ERERfViSQtCCBQvQvHlztGnTBn/++Sd8fHzg5eUFGxsbaGlpISEhAfPnzy90nKpVq8r/rM49h4iIiIgKIykE1a9fH2fOnMHBgwcBAAMGDEB6ejq2bduGN2/eYMGCBZg+fbo66yQiIiJSK8n7BDk4OMDBwUH+/ZgxYzBmzBi1FEVERERU3FS+Rf7p06e4dOkS7ty5U2Cf+/fvw8fHp0iFERERERUnlULQ9OnTYWlpiaZNm8La2hp9+vRBRkaG/Pjdu3cxbtw41KhRA+vWrVN7sURERETqonQI2rp1K5YtW4YOHTpg/fr1GDt2LA4dOoSJEyfi33//xdixY2Fra4t169ahffv2uHz5cnHWTURERFQkSq8J+vHHH+Hi4oLDhw/L20xNTeHn54fNmzdDCIHOnTvD398fjRo1KpZiiYiIiNRF6Zmg69ev53keWN++fQEAQgicO3cOv/32GwMQERERfRKUDkFpaWkoX768Qlvu98ePH0eTJk3UWxkRERFRMZL0AFUiIiKiT51K+wQ9fvwYSUlJ8u/v378PAEhMTFRoz1WtWrUilkdERERUPFQKQVOmTMGUKVPytI8aNSrf/tnZ2dKqIiIiIipmSocgX1/f4qyDiIiI6INiCCIiIiKNxIXRREREpJEYgoiIiEgjMQQRERGRRmIIIiIiIo3EEEREREQaiSGIiIiINBJDEBEREWkkhiAiIiLSSAxBREREpJEYgoiIiEgjaUwIunTpEtzc3GBiYgJ9fX00bdoUe/bsUeq1QggcO3YMo0ePhoODA4yNjVG2bFk4Ojpi4cKFyMjIKObqiYiISN1Ueor8pyo8PByurq7Q09ODp6cnDA0NsW/fPnh4eODOnTuYPHnye1//+vVruLm5oXTp0mjTpg1cXV2RkZGBkJAQzJo1CwcPHkRERATKli37gX4iIiIiKqrPPgRlZWVh+PDh0NLSQmRkJBo0aAAAmDt3LpycnDBz5ky4u7vDysqqwDG0tbXx3XffYcyYMShXrpy8/c2bN+jTpw8OHz6MdevWYerUqcX94xAREZGafPaXw06ePInY2Fj0799fHoAAwNjYGDNnzkRmZiaCgoLeO0apUqUwa9YshQCU2z5jxgwAwKlTp9ReOxERERWfzz4ERUREAAA6duyY55irqyuAogWYUqVKAQB0dD77STUiIqLPymcfgmJiYgAANWvWzHPMwsICBgYG8j5S/PjjjwDyD1lERET08frsQ1BaWhqAt5e/8mNkZCTvo6pjx45h06ZNsLOzw9ChQyXXSERERB/eZx+CisulS5fg4eEBY2Nj7N27F6VLly7pkoiIiEgFn30Iyp0BKmi2Jz09vcBZooJcvnwZHTt2hJaWFkJCQlC3bt0i10lEREQf1mcfgnLXAuW37ufBgwd4/vx5vuuFCnL58mV06NABOTk5CAkJQePGjdVWKxEREX04n30IcnZ2BgCEhobmORYSEqLQpzC5ASg7OxvHjx9HkyZN1FcoERERfVCffQhq164dbGxssGvXLly9elXenpaWhoULF0JXVxdeXl7y9vv37+PmzZt5Lp9duXIFHTp0QFZWFo4dO4ZmzZp9qB+BiIiIisFnv7mNjo4OAgIC4OrqitatWys8NiMxMRHLly+HtbW1vP+MGTMQFBSEbdu2wdvbGwDw5MkTdOjQAampqejUqRPCwsIQFham8D4mJiaYMGHCh/vBiIiIqEg++xAEAC4uLoiKioKvry+Cg4Px5s0b1K9fH0uWLIGHh0ehr09PT8fTp08BAMePH8fx48fz9LGysmIIIiIi+oRoRAgCACcnJxw7dqzQfoGBgQgMDFRos7a2hhCimCojIiKikvDZrwkiIiIiyg9DEBEREWkkhiAiIiLSSAxBREREpJEYgoiIiEgjMQQRERGRRmIIIiIiIo3EEEREREQaiSGIiIiINBJDEBEREWkkhiAiIiLSSAxBREREpJEYgoiIiEgjMQQRERGRRmIIIiIiIo3EEEREREQaiSGIiIiINBJDEBEREWkkhiAiIiLSSAxBREREpJEYgoiIiEgjMQQRERGRRmIIIiIiIo3EEEREREQaiSGIiIiINBJDEBEREWkkhiAiIiLSSAxBREREpJEYgoiIiEgjaUwIunTpEtzc3GBiYgJ9fX00bdoUe/bsUWmM169fY968eahZsyb09PRQqVIljBgxAg8fPiymqomIiKi46JR0AR9CeHg4XF1doaenB09PTxgaGmLfvn3w8PDAnTt3MHny5ELHyMnJQY8ePRASEoKmTZuiT58+iImJQUBAAE6cOIHz58/DzMzsA/w0REREpA6f/UxQVlYWhg8fDi0tLURGRmLz5s34/vvv8ddff6FWrVqYOXMmEhMTCx0nKCgIISEh+Oqrr3D27FksXrwY+/btw/r16xEXF4fZs2d/gJ+GiIiI1OWzD0EnT55EbGws+vfvjwYNGsjbjY2NMXPmTGRmZiIoKKjQcbZs2QIAWLRoEWQymbx95MiRsLGxwc6dO/Hq1Su1109ERETF47MPQREREQCAjh075jnm6uoKADh16tR7x8jIyMCFCxdQu3ZtWFlZKRyTyWTo0KEDXrx4gcuXL6unaCIiIip2n/2aoJiYGABAzZo18xyzsLCAgYGBvE9BYmNjkZOTk+8Y744dExODVq1aSapTCAEASE9Pl/T64vYm80VJl/BJ+lj/e37seL5Jw/NNGp5v0nzs55uhoaHClZv8fPYhKC0tDcDby1/5MTIykvcpyhjv9pPi2bNnAICqVatKHoM+PvuDS7oC0iQ83+hD+tjPt7S0NPm/zwX57EPQp6JSpUq4c+eOUsmViIiI3s/Q0LDQPp99CMqdvSloliY9PR3lypUr8hjv9pNCS0sLVapUkfx6IiIiUs1nvzD63fU6//XgwQM8f/68wLU+uWxsbKClpVXg2qH3rTsiIiKij9NnH4KcnZ0BAKGhoXmOhYSEKPQpSJkyZeDk5ITo6Og8ewoJIRAWFgZ9fX00atRITVUTERFRcfvsQ1C7du1gY2ODXbt24erVq/L2tLQ0LFy4ELq6uvDy8pK3379/Hzdv3sxz6WvEiBEAgBkzZsjv5AKATZs2IS4uDl9//TXKlClTvD8MERERqY1MvPsv+meqoMdmJCYmYvny5QqPzfD29kZQUBC2bdsGb29veXtOTg7c3Nzkj81wdnbG7du3sX//flhbW+PChQt8bAYREdEn5LOfCQIAFxcXREVFoUWLFggODsaGDRtgbm6O3bt3K/XcMODtwuVff/0Vfn5+ePToEVauXIkzZ85g6NChOHfuHAMQERHRJ0YjZoKIiIiI/ksjZoKIiIiI/oshiIiIiDQSQxARERFpJIYgIiIi0kgMQURERKSRGIKIiIhIIzEEUbFKSEiATCZDYGCg0n2XL19e/IWpgZ+fH2QyWUmXQR+YtbU1unbtqrbx8vsd4blF9GEwBJFS1q9fD5lMhiZNmhR5rKNHj8LPz6/oRX0AL1++hJ+fHyIiIkq6FI127do1uLu7w8rKCnp6eqhcuTI6dOiANWvWlHRp9IHExsZi5MiRsLGxgZ6eHoyMjNCiRQusXr0ar169KunyPmr8/SkYQxApZefOnbC2tsbFixdx+/ZtpV9nZWWFV69eYeDAgfK2o0ePwt/fvzjKVLuXL1/C398/3xA0e/Zsfvh+AGfPnkWjRo3w119/Yfjw4Vi7di2GDRsGLS0trF69uqTLKxY8txT99ttvqF+/Pvbs2YNu3bphzZo1WLRoEapVq4apU6di/PjxJV3iR0sTf39UoVPSBdDHLz4+HmfPnsX+/fsxcuRI7Ny5E76+vu99TVZWFnJycqCrqws9Pb0PVOmHpaOjAx0d/goVtwULFsDY2BiXLl2CiYmJwrGHDx+WTFHFjOfW/4uPj4enpyesrKxw8uRJWFpayo+NHTsWt2/fxm+//VaCFX7cNPH3RxWcCaJC7dy5E+XKlUOXLl3g7u6OnTt3Khx/dy3PqlWrUKNGDZQuXRrXr1/Ps97B29sb69atAwDIZDL5139t3rxZPk7jxo1x6dIlhePe3t4wMDBAUlISunbtCgMDA1SuXFk+9rVr19C2bVvo6+vDysoKu3btyvMeqampmDBhAqpWrYrSpUvD1tYWS5YsQU5Ojvznyn0mnL+/v7zW3Et5Ba3b2LFjB5ycnFC2bFmUK1cOrVu3RmhoqAp/4/Su2NhY1K1bN88HOABUrFgRAODs7AxHR8d8X1+7dm24uroCUDxX161bBxsbG5QtWxYdO3bEnTt3IITA/PnzUaVKFZQpUwY9evTAkydP8h03NDQUDRo0gJ6eHuzt7bF///48feLi4tC3b1+UL18eZcuWRdOmTZX6B5vn1v9bunQpnj9/jq1btyoEoFy2trYKM0FZWVmYP3++/PPD2toaM2fOxOvXrxVel7u2KyIiAo0aNUKZMmVQv359+azv/v37Ub9+fejp6aFhw4b4888/FV6vjs8gZc6PiIgIyGQy7NmzBwsWLECVKlWgp6eHdu3aKTUrr8zvz7t27NiBhg0bokyZMihfvjw8PT1x584dhT5t2rRBvXr1cP36dbi4uKBs2bKoXLkyli5dmme8NWvWoG7duvJztlGjRnn+Lu7evYshQ4bA3NwcpUuXRt26dfHjjz8W+rOphSAqRJ06dcTQoUOFEEJERkYKAOLixYvy4/Hx8QKAsLe3FzY2NmLx4sVi5cqVIjExUX5s27ZtQgghzp49Kzp06CAAiJ9++kn+9e44X3zxhbC1tRVLliwRS5cuFaampqJKlSoiMzNT/p6DBg0Senp6wt7eXowaNUqsW7dONG/eXP5elSpVElOnThVr1qwRdevWFdra2iIuLk7++hcvXggHBwdRoUIFMXPmTLFx40bh5eUlZDKZGD9+vBBCiOfPn4sNGzYIAKJXr17yWv/66y8hhBC+vr7iv79Cfn5+AoBo3ry5WLZsmVi9erXo37+/mD59utr/u2iKjh07CkNDQ3Ht2rUC+2zZskUAyNPn4sWLAoDYvn27EOL/z7EGDRoIe3t7sWLFCjF79myhq6srmjZtKmbOnCmaN28ufvjhB+Hj4yNkMpkYPHiwwphWVlaiVq1awsTERHz77bdixYoVon79+kJLS0uEhobK+z148ECYm5sLQ0NDMWvWLLFixQrh6OgotLS0xP79++X9/vs7IgTPrXdVrlxZ2NjYKN1/0KBBAoBwd3cX69atE15eXgKA6Nmzp0I/KysrUbt2bWFpaSn8/PzEypUrReXKlYWBgYHYsWOHqFatmli8eLFYvHixMDY2Fra2tiI7O1vhfYryGaTs+REeHi7/XGzYsKFYuXKl8PPzE2XLlhVOTk6F/n0o8/uT67vvvhMymUx4eHiI9evXC39/f2Fqaiqsra3F06dP5f2cnZ1FpUqVRNWqVcX48ePF+vXrRdu2bQUAcfToUXm/zZs3y/9bbNq0SaxevVoMHTpU+Pj4KPw9VKlSRVStWlXMmzdPbNiwQXTv3l0AECtXriy05qJiCKL3unz5sgAgwsLChBBC5OTkiCpVqsiDghD//yFuZGQkHj58qPD6/D7gx44dm+cD/t2+FSpUEE+ePJG3//rrrwKAOHz4sLwt94Nu4cKF8ranT5+KMmXKCJlMJnbv3i1vv3nzpgAgfH195W3z588X+vr64tatWwo1fPvtt0JbW1skJSUJIYR49OhRntfm+u8/VDExMUJLS0v06tVL4cMy9++NpAkNDRXa2tpCW1tbNGvWTEybNk2EhIQohOLU1FShp6eXJxD4+PgIfX198fz5cyHE/59jZmZmIjU1Vd5vxowZAoBwdHQUb968kbd/9dVXQldXV2RkZMjbrKysBACxb98+eVtaWpqwtLQUX3zxhbxtwoQJAoA4ffq0vO3Zs2eievXqwtraWn6OKBOCNPXcSktLEwBEjx49lOp/9epVAUAMGzZMoX3KlCkCgDh58qS8Lfe/49mzZ+VtISEhAoAoU6aMSExMlLdv2rRJABDh4eHytqJ+Bil7fuSGIDs7O/H69Wt539WrV+cb/P9Lmd8fIYRISEgQ2traYsGCBQrt165dEzo6Ogrtzs7OCv9zIYQQr1+/FhYWFqJPnz7yth49eoi6deu+t76hQ4cKS0tLkZKSotDu6ekpjI2NxcuXL9/7+qLi5TB6r507d8Lc3BwuLi4A3l7C8vDwwO7du5Gdna3Qt0+fPvLLR0Xh4eGBcuXKyb9v1aoVgLdTx/81bNgw+Z9NTExQu3Zt6Ovro1+/fvL22rVrw8TEROH1e/fuRatWrVCuXDmkpKTIv9q3b4/s7GxERkaqXPfBgweRk5ODuXPnQktL8VeLtztL16FDB5w7dw7du3fHX3/9haVLl8LV1RWVK1fGoUOHAADGxsbo0aMHfv75ZwghAADZ2dkIDg5Gz549oa+vrzBm3759YWxsLP8+967HAQMGKKzFadKkCTIzM3H37l2F11eqVAm9evWSf29kZAQvLy/8+eefePDgAYC3NwA4OTmhZcuW8n4GBgYYMWIEEhIScP36daX/DjT13EpPTwcAGBoaKtX/6NGjAIBJkyYptE+ePBkA8lxqsre3R7NmzeTf554Hbdu2RbVq1fK0q/MzSNXzY/DgwdDV1ZV//77PxXcp8/sDvL38l5OTg379+il8JlpYWKBmzZoIDw9XGNfAwAADBgyQf6+rqwsnJyeFekxMTPDvv//mWc6QSwiBffv2oVu3bhBCKLyvq6sr0tLS8Mcff7z35ysqhiAqUHZ2Nnbv3g0XFxfEx8fj9u3buH37Npo0aYLk5GScOHFCoX/16tXV8r7vfvgAkAeip0+fKrTr6enlCV3GxsaoUqVKnn8YjI2NFV4fExOD48ePw8zMTOGrffv2AKQtGIyNjYWWlhbs7e1Vfi29X+PGjbF//348ffoUFy9exIwZM/Ds2TO4u7vL/7Hw8vJCUlISTp8+DQD4/fffkZycrHBnYq7/nmO5gahq1ar5tv/33LO1tc1zjtWqVQvA23VHAJCYmIjatWvneW87Ozv5cWVp6rllZGQEAHj27JlS/RMTE6GlpQVbW1uFdgsLC5iYmOT5Oy/qeVCUzyBVzw9lPxfzo8zvT0xMDIQQqFmzZp7PxRs3buT5TMzvZyxXrpxCPdOnT4eBgQGcnJxQs2ZNjB07FmfOnJEff/ToEVJTU7F58+Y87zl48GAAxb94m7cfUIFOnjyJ+/fvY/fu3di9e3ee4zt37kTHjh3l35cpU0Yt76utrZ1ve+7/4RfWT5nX5+TkoEOHDpg2bVq+fXP/QaOPi66uLho3bozGjRujVq1aGDx4MPbu3QtfX1+4urrC3NwcO3bsQOvWrbFjxw5YWFjIg+27inLu0IdjZGSESpUq4X//+59Kr1N2dqyo58GHPI/UMeb7fn9ycnIgk8lw7NixfN/LwMBA5Xrs7OwQHR2NI0eO4Pjx49i3bx/Wr1+PuXPnwt/fX34TyoABAzBo0KB8x3NwcFD655OCIYgKtHPnTlSsWFF+t8O79u/fjwMHDmDjxo0qj/sxTN/XqFEDz58/z/cfyHepUmuNGjWQk5OD69evo0GDBkWskArTqFEjAMD9+/cBvP1Q7t+/PwIDA7FkyRIcPHgQw4cPL/DDuihu374NIYTC+XHr1i0Ab+86At7ukRUdHZ3ntTdv3pQfV5Ymn1tdu3bF5s2bce7cOYVLV/mxsrJCTk4OYmJi5DMqAJCcnIzU1FSV/s6LmzrPDyn++/tTo0YNCCFQvXp1tf5PoL6+Pjw8PODh4YHMzEz07t0bCxYswIwZM2BmZgZDQ0NkZ2cX+llcXHg5jPL16tUr7N+/H127doW7u3uer2+++QbPnj1TuKasrNz1GampqWquWnn9+vXDuXPnEBISkudYamoqsrKyAABly5aVtxWmZ8+e0NLSwrx58+T/h5OLMwnShYeH5/v3l7v+491LCgMHDsTTp08xcuRIPH/+XGHNgjrdu3cPBw4ckH+fnp6O7du3o0GDBrCwsAAAuLm54eLFizh37py834sXL7B582ZYW1urdGlLk8+tadOmQV9fH8OGDUNycnKe47GxsfJN/9zc3AAAq1atUuizYsUKAECXLl2Kt1gVqPP8eB9lf3969+4NbW1t+Pv75+kvhMDjx49Vfu//vkZXVxf29vYQQuDNmzfQ1tZGnz59sG/fvnxn+x49eqTye6qKM0GUr0OHDuHZs2fo3r17vsebNm0KMzMz7Ny5U+VHaTRs2BAA4OPjA1dXV2hra8PT07PINati6tSpOHToELp27Qpvb280bNgQL168wLVr1/DLL78gISEBpqamKFOmDOzt7REcHIxatWqhfPnyqFevHurVq5dnTFtbW8yaNQvz589Hq1at0Lt3b5QuXRqXLl1CpUqVsGjRog/6M34uxo0bh5cvX6JXr16oU6cOMjMzcfbsWQQHB8Pa2lq+dgAAvvjiC9SrVw979+6FnZ0dvvzyy2KpqVatWhg6dCguXboEc3Nz/Pjjj0hOTsa2bdvkfb799lv8/PPP6Ny5M3x8fFC+fHkEBQUhPj4e+/bty7PA+X00+dyqUaMGdu3aBQ8PD9jZ2cHLywv16tWTnwd79+6Ft7c3AMDR0RGDBg3C5s2bkZqaCmdnZ1y8eBFBQUHo2bOn/AaPj4E6z4/3Ufb3p0aNGvjuu+8wY8YMJCQkoGfPnjA0NER8fDwOHDiAESNGYMqUKSq9d8eOHWFhYYEWLVrA3NwcN27cwNq1a9GlSxf5YvfFixcjPDwcTZo0wfDhw2Fvb48nT57gjz/+wO+//17gPl1qU6z3ntEnq1u3bkJPT0+8ePGiwD7e3t6iVKlS8tvoly1blqdPfrf/ZmVliXHjxgkzMzMhk8nktwLn9s1vHPzn9tJBgwYJfX39PP2cnZ3zvSXTyspKdOnSRaHt2bNnYsaMGcLW1lbo6uoKU1NT0bx5c7F8+XKF20fPnj0rGjZsKHR1dRXqyG8vFyGE+PHHH8UXX3whSpcuLcqVKyecnZ3lWwyQ6o4dOyaGDBki6tSpIwwMDISurq6wtbUV48aNE8nJyXn6L126NM+ty7kKOsdyb0Peu3evQvu2bdsEAHHp0iV5W+65FBISIhwcHETp0qVFnTp18rxWCCFiY2OFu7u7MDExEXp6esLJyUkcOXIk35oK2ydICM0+t27duiWGDx8urK2tha6urjA0NBQtWrQQa9asUdjC4M2bN8Lf319Ur15dlCpVSlStWlXMmDFDoY8Q+X8mCPH2s2bs2LEKbfmdN+r4DFLm/Cjo3MzvvMmPqr8/+/btEy1bthT6+vpCX19f1KlTR4wdO1ZER0cX+jMOGjRIWFlZyb/ftGmTaN26tahQoYIoXbq0qFGjhpg6dapIS0tTeF1ycrIYO3asqFq1qihVqpSwsLAQ7dq1E5s3b37vz6YOMiE+87lUItIoq1evxsSJE5GQkJDnjhoioncxBBHRZ0MIAUdHR1SoUCHPviZERP/FNUFE9Ml78eIFDh06hPDwcFy7dg2//vprSZdERJ8AzgQR0ScvISEB1atXh4mJCcaMGYMFCxaUdElE9AlgCCIiIiKNxH2CiIiISCMxBBEREZFGYggiIiIijcQQRERERBqJIYiIiIg0EkMQERERaSSGICIiItJIDEFERESkkf4PVYGp8oLzpuUAAAAASUVORK5CYII=\n"
          },
          "metadata": {}
        }
      ],
      "source": [
        "# in a bar plot, have two bars, one for sys1 which is the ratio of sys1's that are yes\n",
        "# and another one for sys2 which is the ratio of sys2's that are yes\n",
        "import seaborn as sns\n",
        "import matplotlib.pyplot as plt\n",
        "import matplotlib.font_manager as fm\n",
        "\n",
        "font_path = \"/content/Roboto-Regular.ttf\"\n",
        "roboto_font_big = fm.FontProperties(fname=font_path, size=13)\n",
        "roboto_font_med = fm.FontProperties(fname=font_path, size=13)\n",
        "roboto_font_small = fm.FontProperties(fname=font_path, size=13)\n",
        "colorbar = \"#474eca\"\n",
        "abilities = []\n",
        "ratios = []\n",
        "\n",
        "for ability in data[\"ability\"].unique():\n",
        "    ability_df = data[data[\"ability\"] == ability]\n",
        "\n",
        "    sys1_yes_ratio = (\n",
        "        ability_df[\n",
        "            (ability_df[\"Strategy\"] == \"sys1\")\n",
        "            & (ability_df[\"direct_answer_clean\"] == \"yes\")\n",
        "        ].shape[0]\n",
        "        / ability_df[ability_df[\"Strategy\"] == \"sys1\"].shape[0]\n",
        "    )\n",
        "\n",
        "    sys2_yes_ratio = (\n",
        "        ability_df[\n",
        "            (ability_df[\"Strategy\"] == \"sys2\")\n",
        "            & (ability_df[\"direct_answer_clean\"] == \"yes\")\n",
        "        ].shape[0]\n",
        "        / ability_df[ability_df[\"Strategy\"] == \"sys2\"].shape[0]\n",
        "    )\n",
        "    print(\"ability:\", ability)\n",
        "    print(\"sys1 yes ratio:\", sys1_yes_ratio)\n",
        "    print(\"sys2 yes ratio:\", sys2_yes_ratio)\n",
        "\n",
        "    sys1_ratio_to_sys2 = sys1_yes_ratio / sys2_yes_ratio\n",
        "    abilities.append(ability)\n",
        "    ratios.append(sys1_ratio_to_sys2)\n",
        "\n",
        "order = [\"Arithmetic\", \"Symbolic\", \"Common Sense\"]\n",
        "# plt.figure(figsize=(6, 4))\n",
        "fig, ax = plt.subplots(figsize=(6, 4.04))\n",
        "sns.barplot(x=abilities, y=ratios, width=0.5, color=colorbar, order=order, ax=ax)\n",
        "\n",
        "sns.despine()\n",
        "# ax.grid(axis=\"y\")\n",
        "ax.set_ylabel(\"Ratio of definitive answers \\n in System 1 to System 2\", fontproperties=roboto_font_small)\n",
        "\n",
        "ax.tick_params(axis=\"x\", length=0, pad=10, labelsize=12)\n",
        "ax.tick_params(axis=\"y\", length=0, pad=10, labelsize=14)\n",
        "plt.tight_layout()\n",
        "\n",
        "plt.savefig(\n",
        "    \"ratio_of_definitive_answers_in_first_3_sentences_per_ability.png\",\n",
        "    dpi=300,\n",
        "    bbox_inches=\"tight\",\n",
        ")\n",
        "plt.savefig(\"def_ratio.png\", dpi=900, bbox_inches=\"tight\")\n",
        "plt.show()"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "plot_data = []\n",
        "import pandas as pd"
      ],
      "metadata": {
        "id": "HF4Foo8PZDwY"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "plot_data.append([\"Symbolic\", \"System 1\", 0.23484848484848486])\n",
        "plot_data.append([\"Symbolic\", \"System 2\", 0.19318181818181818])\n",
        "plot_data.append([\"Arithmetic\", \"System 1\", 0.7023255813953488])\n",
        "plot_data.append([\"Arithmetic\", \"System 2\", 0.703875968992248])\n",
        "plot_data.append([\"Common Sense\", \"System 1\", 0.36910994764397903])\n",
        "plot_data.append([\"Common Sense\", \"System 2\", 0.25392670157068065])"
      ],
      "metadata": {
        "id": "TgYcVS7MZJ63"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "df_plot = pd.DataFrame(plot_data, columns=[\"Ability\", \"System\", \"ratio\"])"
      ],
      "metadata": {
        "id": "5feZ9PzuZB8F"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import seaborn as sns\n",
        "import matplotlib.pyplot as plt\n",
        "import matplotlib.font_manager as fm\n",
        "\n",
        "font_path = \"/content/Roboto-Regular.ttf\"\n",
        "roboto_font_big = fm.FontProperties(fname=font_path, size=13)\n",
        "roboto_font_med = fm.FontProperties(fname=font_path, size=13)\n",
        "roboto_font_small = fm.FontProperties(fname=font_path, size=13)\n",
        "sys1_color = \"#fdda7a\"\n",
        "sys2_color = \"#86d387\"\n",
        "\n",
        "order = [\"Arithmetic\", \"Symbolic\", \"Common Sense\"]\n",
        "# plt.figure(figsize=(6, 4))\n",
        "fig, ax = plt.subplots(figsize=(6, 4.02))\n",
        "sns.barplot(data=df_plot,x=\"Ability\", y=\"ratio\", hue=\"System\", palette=[sys1_color, sys2_color], width=0.5, order=order, ax=ax,     edgecolor=\"#665c53\")\n",
        "\n",
        "sns.despine()\n",
        "# ax.grid(axis=\"y\")\n",
        "ax.set_ylabel(\"Ratio of definitive answers\", fontproperties=roboto_font_small)\n",
        "ax.set_xlabel(\"\", fontproperties=roboto_font_small)\n",
        "\n",
        "ax.tick_params(axis=\"x\", length=0, labelsize=12)\n",
        "ax.tick_params(axis=\"y\", length=0, labelsize=14)\n",
        "plt.tight_layout()\n",
        "# plt.legend(loc=\"best\", frameon=False, fontsize=12)\n",
        "ax.get_legend().set_visible(False)\n",
        "plt.savefig(\"def_ratio.png\", dpi=900, bbox_inches=\"tight\")\n",
        "plt.show()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 408
        },
        "id": "wi3_0cWRY6Zo",
        "outputId": "f6e74006-0de8-4c01-d8ff-34efc8fcb73d"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<Figure size 600x402 with 1 Axes>"
            ],
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAk4AAAGHCAYAAABGVt0UAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAASJhJREFUeJzt3Xtczvf/P/DH1Unp6FiE0oFCabO1HFsORUY2UbMtGZnTYmOjbIpNw5iZYWhTGxELX2yUQ05zWLYxnzESJZQxOjl1ev3+cOv9c+2K3r276KLH/Xa7brd6vV7Xq+fFu8vD+/26Xm+VEEKAiIiIiKqkV9sFEBERET0tGJyIiIiIZGJwIiIiIpKJwYmIiIhIJgYnIiIiIpkYnIiIiIhkYnAiIiIikonB6T+EECgoKAC3tyIiIqL/YnD6j8LCQlhaWqKwsLC2SyEiIiIdw+BEREREJBODExEREZFMDE5EREREMjE4EREREcnE4EREREQkE4MTERERkUwMTkREREQyMTgRERERycTgRERERCQTgxMRERGRTAxORERERDIxOBERERHJpJPBKS0tDf7+/rCysoKpqSm8vLywfv162c+3t7eHSqV65OPAgQOP8RUQERHRs8igtgv4r9TUVPj5+cHY2BjBwcEwNzdHUlISgoKCkJ2djcmTJ1c5x6RJk5CXl6fRfv36dSxZsgQNGjTAiy+++BiqJyIiomeZSggharuICqWlpXBxccGlS5dw5MgReHh4AADy8/Ph6emJzMxMnD17FnZ2dormX7BgAaZMmYJ3330XX331VaVjCgoKYGlpifz8fFhYWCh9KURERPQM0qkzTnv27EFGRgZGjBghhSYAsLS0RGRkJEJDQxEfH48ZM2Yomv/bb78FAIwcOVIb5eqEG9evoaiosLbLkKW0pAQGhoa1XUaVzMzM0bBxk9oug4iIdJBOBae9e/cCAHx9fTX6/Pz8AAD79u1TNPehQ4dw+vRpvPDCC+jYsaPiGnXJjevXEDV1IoqLi2u7FHlUAHTm/ObDGRoZYdbcRQxPRESkQaeCU3p6OgDA2dlZo8/GxgZmZmbSmOqqONs0atQo5QXqmKKiQhQXF2Nof0c0bWRS2+U80pnzedh58BJsfdvCqEH92i7noYpv3sbllDMoKipkcCIiIg06FZzy8/MB3L80VxkLCwtpTHUUFRVh/fr1qF+/Pl5//fUa1aiLmjYyga2NaW2X8Uj//HsHAGDUoD5MmprVcjVERETK6OR2BNqWmJiIoqIiDBkyhAu+iYiISDGdCk4VZ5oedlap4hNv1fUsXqYjIiKiJ0+nglPF2qbK1jHl5uaiqKio0vVPj3Lq1CkcPnwYLi4u6Natm1bqJCIiorpJp4KTt7c3ACAlJUWjLzk5WW2MXM/iFgRERERUO3QqOPXq1QsODg5ISEjA8ePHpfb8/HzExMTAyMgIISEhUntOTg7+/vvvh17aKykpwQ8//ABDQ0O15xEREREpoVPBycDAALGxsSgvL0ePHj0wevRoTJ48GR07dsTZs2cRExMDe3t7aXxERARcXV2xadOmSufbsmULrl27hgEDBqBp06ZP6FUQERHRs0qntiMAAB8fHxw8eBBRUVFITExESUkJ3NzcMHfuXAQFBVVrLi4KJyIiIm3SueAEAJ6enti+fXuV4+Li4hAXF/fQ/p9//lmLVREREVFdp1OX6oiIiIh0GYMTERERkUwMTkREREQyMTgRERERycTgRERERCQTgxMRERGRTAxORERERDIxOBERERHJxOBEREREJBODExEREZFMDE5EREREMjE4EREREcnE4EREREQkE4MTERERkUwMTkREREQyMTgRERERycTgRERERCQTgxMRERGRTAxORERERDIxOBERERHJxOBEREREJBODExEREZFMDE5EREREMjE4EREREcnE4EREREQkE4MTERERkUwMTkREREQyMTgRERERycTgRERERCQTgxMRERGRTAxORERERDIxOBERERHJpJPBKS0tDf7+/rCysoKpqSm8vLywfv36as/zzz//4L333oOzszOMjY3RqFEjdO7cGcuWLXsMVRMREdGzzqC2C/iv1NRU+Pn5wdjYGMHBwTA3N0dSUhKCgoKQnZ2NyZMny5rn+PHj8PX1xc2bN9G/f38EBgaiqKgIp0+fxtatWzF27NjH/EqIiIjoWaNTwam0tBRhYWHQ09PD/v374eHhAQCYMWMGPD09ERkZicDAQNjZ2T1ynoKCAgQEBAAAfvvtN7i7u2v8HCIiIqLq0qlLdXv27EFGRgaGDRsmhSYAsLS0RGRkJIqLixEfH1/lPEuXLsXFixcxZ84cjdAEAAYGOpUXiYiI6CmhUwli7969AABfX1+NPj8/PwDAvn37qpwnMTERKpUKgwcPxpkzZ5CSkoI7d+7AxcUFffv2hZGRkVbrJiIiorpBp4JTeno6AMDZ2Vmjz8bGBmZmZtKYhykuLsbJkyfRpEkTLF68GFFRUSgvL5f6HRwcsHnzZri5uWm3eCIiInrm6dSluvz8fAD3L81VxsLCQhrzMDdu3EBZWRn+/fdfzJo1C/PmzcPVq1dx6dIlfPzxx7hw4QIGDBiAu3fvar1+IiIierbpVHDShoqzS2VlZRg3bhwmT56Mpk2bwtbWFrNmzcKQIUOQlZWFH3/8sZYrJSIioqeNTgWnijNNDzurVFBQ8NCzUf+dAwAGDhyo0V/RduzYMaVlEhERUR2lU8GpYm1TZeuYcnNzUVRUVOn6pweZmprC1tYWAGBlZaXRX9F2586dmhVLREREdY5OBSdvb28AQEpKikZfcnKy2phH6dmzJwDg1KlTGn0Vbfb29krLJCIiojpKcXC6d+8eNm/eDCEEAEh7LC1cuBAXL15UNGevXr3g4OCAhIQEHD9+XGrPz89HTEwMjIyMEBISIrXn5OTg77//1ri0N2bMGADAnDlzkJeXJ7Xn5uZi0aJF0NPTw+DBgxXVSERERHWXouB06dIltGvXDoMHD0ZJSQkAYMCAARgxYgQmT54MNzc3/PXXX9We18DAALGxsSgvL0ePHj0wevRoTJ48GR07dsTZs2cRExOjdqYoIiICrq6u2LRpk9o8Xbp0wfvvv4+//voL7u7uGD9+PEaPHo2OHTvi8uXL+PTTT9GmTRslL52IiIjqMEXBaerUqcjJycGyZctgZGSElJQU7Ny5E4sWLcLRo0dhaWmJqKgoRQX5+Pjg4MGD6Nq1KxITE7Fs2TJYW1tj3bp1su9TBwALFizAqlWrYG1tjbi4OCQkJKBNmzbYuHEjIiIiFNVGREREdZuiDTBTUlLw4YcfYvTo0QCApKQkeHh44N133wUAvPPOO1iyZIniojw9PbF9+/Yqx8XFxSEuLu6h/aGhoQgNDVVcBxEREdGDFJ1xunfvHpo3by59v3PnTrXbpFhZWVW5USURERHR00ZRcGrfvj0SExNRVlaGnTt3IisrS7qXHHA/SDk5OWmtSCIiIiJdoOhS3YcffojBgwejYcOGuHv3Lp577jn4+Pjg33//xYQJE7B161Z8/fXX2q6ViIiIqFYpOuP06quvYvPmzfD29sbbb7+Nn376CQBw/fp1bN68GdOmTcPYsWO1WigRERFRbVN0xgm4f+uS/97SxMnJCTdv3oSxsXGNCyMiIiLSNYrOOAUHB2Pnzp0a7fr6+gxNRERE9MxSFJx2796NP//8U9u1EBEREek0RcEpPDwc3377LQoLC7VdDxEREZHOUrTGac+ePThz5gysrKwA3L/FiaGhodoYlUqF3bt317hAIiIiIl2hKDhduHABrVq1kr6/dOmS1goiIiIi0lWKglNmZqaWyyAiIiLSfYrWOD3oypUrOHLkCPLy8qS28vLymk5LREREpHMUB6elS5fC1tYWLVu2RNeuXfHrr78CABYvXoyePXvizp07WiuSiIiISBcoCk4rV67EhAkT8NxzzyEqKgpCCKmvR48eOHnyJGbOnKm1IomIiIh0gaLg9OWXXyIoKAjbtm3DO++8o9bXsWNHjB07FqtXr9ZKgURERES6QlFwOn/+PLy9vR/a36pVK1y/fl1xUURERES6SFFwat68OdLS0h7an5KSAjs7O8VFEREREekiRcEpLCwMq1atwscff4xr164BuL/h5c2bN/Hee+9h06ZNGD58uFYLJSIiIqptivZxmjp1KjIyMjB79mzExMQAAPr27QsAEEJg4MCBmDp1qvaqJCIiItIBioKTSqXCypUrMWLECCQmJuLcuXMoLi5Gq1atEBAQgIEDB2q7TiIiIqJapyg4VejSpQu6dOmirVqIiIiIdJqiNU7t2rXD+++/j+TkZNy9e1fbNRERERHpJEXBqVGjRli2bBn69euHhg0bok+fPpg/fz5Onjyp7fqIiIiIdIai4HTgwAHk5eVh3759iIyMhL6+PmbNmgUPDw/Y2tpixIgRWLdunbZrJSIiIqpViu9VV69ePXTv3h0fffQRduzYgZs3b2Lp0qW4desW4uPj8cYbb2izTiIiIqJaV6PF4Xfu3MHOnTuxbds2/Pzzz8jJyYGjoyNGjhyJgIAAbdVIREREpBMUBafFixfj559/xr59+1BSUgJPT0+8++67GDhwIFxdXbVdIxEREZFOUBScJk6cCAAwNTXFypUrERQUBAODGp28IiIiItJ5itY4nThxArNnz4a7uzuGDx+ORo0aITAwEHFxcdItWIiIiIieNYqCk5ubGyIiIvDLL78gNzcXX3/9NQwMDPDee++hWbNm8PLywpQpU7RdKxEREVGtUvypugqNGzdGUFAQxowZg7Fjx6K8vBy//vorFi5cqI36iIiIiHSGooVJxcXFOHLkCPbu3YvU1FQcPXoU9+7dg4WFBV577TX4+fnBz89P27USERER1SpFZ5wsLS3h4+ODWbNm4e7du/jwww9x4MABXL9+HT/++CPCwsLQqlUrxUWlpaXB398fVlZWMDU1hZeXF9avXy/7+XFxcVCpVA997N27V3FtREREVHcpOuM0bNgw9O3bF71790aDBg20WlBqair8/PxgbGyM4OBgmJubIykpCUFBQcjOzsbkyZNlzxUQEAAPDw+Ndnt7e+0VTERERHWGouD07bffarsOAEBpaSnCwsKgp6eH/fv3S6FnxowZ8PT0RGRkJAIDA2FnZydrvkGDBiE0NPSx1EpERER1j+LF4StWrICHhwdKSkoAAFu3bkXr1q1haWmJ8PBwlJeXV3vOPXv2ICMjA8OGDVM7U2RpaYnIyEgUFxcjPj5eaclERERENaL4jNOYMWNgZ2cHPT09XL16FcOGDYONjQ26d++OJUuWwNHRUdooU66KtUe+vr4afRWLzfft2yd7vj/++AP//vsvSktLYW9vj969e6NRo0bVqomIiIiogqLg9NVXX6F79+7YtWsX9PX1sXr1aujr6+PYsWOwtLTEsGHDEBsbW+3glJ6eDgBwdnbW6LOxsYGZmZk0Rm6dDzIxMUFUVBSmTp1arbqIiIiIAIWX6tLT0/HGG2/A0NAQALBjxw74+fnB0tISANCtWzdcuHCh2vPm5+cDgDTPf1lYWEhjHqV169ZYvHgxzp49i9u3b+PSpUv4/vvv0bBhQ0ybNg2LFy+udm1EREREioJT06ZNkZubCwAoKCjAwYMH0atXL6n/2rVrMDU11U6FCnh7e2PChAlwdnaGiYkJbG1t8dZbbyE5ORnGxsaIjo5GaWlprdVHRERETydFl+r69u2LefPmoV69ejh8+DD09PQQEBAAADhz5gyWLl2Knj17VnveijNNDzurVFBQUKPtD9q3b49u3bph165dOH36NNzc3BTPRURERHWPojNOn3zyCdq1a4eIiAhs3boVc+fOhbW1Nc6cOQNXV1cAQExMTLXnrVjbVNk6ptzcXBQVFVW6/qk6GjduDAC4detWjeYhIiKiukdRcGrSpAmOHDmCY8eO4fLly5gwYQIAwMrKCh9//DF+//13tG7dutrzent7AwBSUlI0+pKTk9XGKFFWVoZjx44BgOy9oIiIiIgqKN7HSU9PD88//zxsbGykNmtra8ycORO2traK5uzVqxccHByQkJCA48ePS+35+fmIiYmBkZERQkJCpPacnBz8/fffGpf2fvvtN425y8rKMG3aNJw7dw4+Pj5o1qyZohqJiIio7lK0xqnCuXPncPXqVZSVlVXa36NHj+oVY2CA2NhY+Pn5oUePHmq3XMnKysL8+fPVbpcSERGB+Ph4rFq1Sm2H8BdeeAHu7u5wd3eHra0tbty4gX379uHs2bNo0aIFYmNjlbxcIiIiquMUBaczZ84gMDAQp06dqrRfCAGVSvXQQPUoPj4+OHjwIKKiopCYmIiSkhK4ublh7ty5CAoKkjXH5MmTceTIEezcuRM3btyAkZERnJyc8NFHH+H999/X+v31iIiIqG5QFJzGjh2LjIwMTJkyBS4uLtDTU3zFr1Kenp7Yvn17lePi4uIQFxen0T5//nyt1kNEREQEKAxOv/32G8aPH4+5c+dqux4iIiIinaXoVJGZmRkcHBy0XQsRERGRTlMUnN5++22sXr2au28TERFRnaLoUp2dnR2OHj2KevXqITAwEM8//3ylH+9/cOsAIiIioqedouA0evRo6esNGzZgw4YNGmNUKhWDExERET1TFAWn1NRUbddBREREpPMUBaea3PaEiIiI6GlVo53D7969i2vXrkEIodael5eH8+fPY9CgQTWZnoiIiEinKApOt2/fRlhYGNavX4/y8vJKx7Rv357BiYiIiJ4pirYjmD59OtauXYt+/fph3LhxEEKgT58+CA8Pl84+zZgxQ6uFEhEREdU2RcFpw4YNGDZsGLZs2YLp06cDAN5//30sXLgQpaWl8PT0xNGjR7VaKBEREVFtUxScrl27hs6dOwMALCwsAAD5+fn3J9TTw7Bhw/D9999rqUQiIiIi3aAoOLVs2RInTpwAANSvXx9NmjTB8ePHpf6bN2/i1q1bWimQiIiISFcoWhw+ZswYTJs2DQ4ODpg2bRpeffVVLF68GA4ODtDX18eiRYukM1JEREREzwpFZ5wmTpyI119/HWvXrgUAfPTRR2jYsCHCwsLw9ttvw8DAAF9++aU26yQiIiKqdYrOOBkaGuKHH37A9evXAQAtWrTA6dOnsWfPHpSUlKBnz57S2iciIiKiZ0WNNsBs3Lix9LWJiQn69+9f44KIiIiIdJWiS3VEREREdRGDExEREZFMDE5EREREMjE4EREREclUo8XhZ86cwW+//YaLFy9i6NChcHBwQHFxMa5fv47mzZtrq0YiIiIinaAoON29exchISFISkqCEAIqlQrPP/88HBwc8MknnyA2NhZHjx5Fq1attF0vERERUa1RdKluxowZ2LJlC+bOnYuUlBQIIaS+KVOmwMrKCh9++KHWiiQiIiLSBYqC09q1axEeHo4pU6bAzc1Nrc/S0hIjR45ESkqKVgokIiIi0hWKgtP169cfeRnO2NgY9+7dU1wUERERkS5SFJzat2+PTZs2qV2iq3Dv3j3ExcWhQ4cONS6OiIiISJcoWhz+8ccf47XXXkPPnj0xePBgqFQqnDlzBrdu3cK8efPwxx9/IDExUdu1EhEREdUqRcEpICAA8fHxmDRpEvbt2wcAmDRpEoQQqFevHubPn4/AwECtFkpERERU2xTv4/Tmm2/i1Vdfxc6dO5Geno7i4mK0atUKfn5+aNq0qTZrJCIiItIJioJTbm4ubGxsYGpqikGDBmm5JCIiIiLdpGhxeIsWLdCrVy+sXLkS//77r7ZrIiIiItJJioLTpEmTkJ2djXfeeQfNmzdHv379EB8fj4KCAm3XR0RERKQzFAWn+fPn4+zZs/jrr78QHR2NvLw8vP3227C2tsagQYOwdu1a3L59W3FRaWlp8Pf3h5WVFUxNTeHl5YX169crnu/mzZuwtbWFSqVC3759Fc9DREREdVuNbvLr6uoKV1dXREREIDc3F1u2bEFUVBS2bt0KlUqF0tLSas+ZmpoKPz8/GBsbIzg4GObm5khKSkJQUBCys7MxefLkas85YcIE5OfnV/t5RERERA9SdMbpQUII7Nu3D3PmzMFnn32Gq1evolGjRujdu3e15yotLUVYWBj09PSwf/9+rFixAgsWLMCJEyfQpk0bREZGIisrq1pzJiUlISEhAXPnzq12PUREREQPUhSc7t69iy1btuDtt99G06ZN0bNnTyQkJKBv375ITU3F1atXsWPHjmrPu2fPHmRkZGDYsGHw8PCQ2i0tLREZGYni4mLEx8fLnu/atWsYO3Ys3nrrLfTv37/a9RARERE9SNGlukaNGuHu3buwsLDAoEGDEBwcjN69e0NfX79GxezduxcA4Ovrq9Hn5+cHANKGm3KMGTMG+vr6WLRoES/VERERUY0pCk4DBgzA66+/jn79+sHIyEhrxaSnpwMAnJ2dNfpsbGxgZmYmjanK6tWrsXHjRmzevBkNGjRgcCIiIqIaUxSc1q1bp+06AEAKN5aWlpX2W1hYyApAV65cQXh4OF5//XUEBARotUYiIiKqu2QHp/nz5+PNN9+EjY0NZs2aVeV4lUqFjz/+uEbFKTVq1CgYGhriq6++qpWfT0RERM8m2cHp888/R7t27eDv74/o6OgqxysJThVnmh52VqmgoAANGjR45Bzx8fHYvn07NmzYgMaNG1fr5xMRERE9iuzgdOzYMdja2gIALly48FiKqVjblJ6ejk6dOqn15ebmoqioCJ6eno+c448//gAADBkypNL+5ORkqFQqdOzYEcePH6950URERFRnyA5OLVu2lL62s7N7LMV4e3vjs88+Q0pKCoKDg9X6kpOTpTGP0rlzZxQVFWm0FxUVITExES1atICfnx9atWqlvcKJiIioTlC0OLxLly74+OOP0a9fv0r7N2zYgKVLlyI1NbVa8/bq1QsODg5ISEhAeHi4tJdTfn4+YmJiYGRkhJCQEGl8Tk4O8vPz0axZM+kyX1BQEIKCgjTmzszMRGJiItq3b4/Y2Nhq1UVEREQEKNwA88iRI7h+/XqlfSUlJThw4AAOHz5c7XkNDAwQGxuL8vJy9OjRA6NHj8bkyZPRsWNHnD17FjExMbC3t5fGR0REwNXVFZs2bVLyMoiIiIiqRXZwmjlzJvT19aVNLkNDQ6XvH3wYGxtjyZIl6Nmzp6KCfHx8cPDgQXTt2hWJiYlYtmwZrK2tsW7dOkX3qSMiIiLSFtmX6jw8PKTLZPHx8RBCYPjw4RrjDA0N0aZNG4wZM0ZxUZ6enti+fXuV4+Li4hAXFydrTnt7ewghFNdEREREJDs4BQQESJtJxsfHIy4uTm29EREREdGzTtHi8B9++AFdu3bVdi1EREREOk12cDp//jxat24NlUqF7t27AwAuXrz4yOfwI/9ERET0LJEdnDp37oxVq1bB398f9vb2UKlUVT6nrKysRsURERER6RLZwSkyMhLPP/88AGDGjBmyghMREdHT4sb1aygqKqztMmQxMzNHw8ZNaruMOkl2cJo4caL0tZx71RERET0tbly/hqipE1FcXFzbpchiZGSEmXMXMTzVAkWLw4mIiJ4lRUWFKC4uxtD+jmjayKS2y3mkf/69g/U/ZaCoqJDBqRYoCk5lZWVYuHAhNm/ejKtXr1a6lkmlUiEjI6PGBRIRET0pTRuZwNbGtLbLIB2mKDi9//77WLx4MZo1awYnJyfo6Sm6cwsRERHRU0VRcNq0aRP69euHLVu2SLdgISIiInrWKTpVdOPGDfj7+zM0ERERUZ2iKDh5e3vjzJkz2q6FiIiISKcpCk5fffUVNm3ahCVLlqC8vFzbNRERERHpJEXBydnZGZcvX0Z4eDgMDQ2hr6+v8TAw4E4HRERE9GxRlG5CQkLUdg4vLS1lUCIiIqJnnqK0ExcXp+UyiIiIiHQfN2AiIiIikkn2GaeAgAB8+umncHNzQ8+ePascr1KpsHv37hoVR0RERKRLZAeny5cv49atWwCA8+fPq61xIiIiIqoLZAenY8eOSV9nZmY+jlqIiIiIdFqVa5zKy8sRGBiItLQ0qe38+fPS2SciIiKiuqLK4FRQUICNGzciPT1danN2dkZSUtJjLYyIiIhI11R5qc7KygpNmzbF3LlzYWdnh5YtW0IIgczMTFy8ePGRz23VqpXWCiUiIiKqbbLWOM2ZMwdhYWHo0aMHgPufmJs5cyZmzpz5yOeVlZXVvEIiIiIiHSErOIWGhsLLyws7d+7EjRs3MHPmTNjZ2SE0NPQxl0dERESkO2R/qs7FxQUuLi4AIJ1tCgkJeWyFEREREekaRbdcKS8v13YdRERERDpP8S1Xrly5goiICPj6+sLFxQUHDhwAAPz555+IjY3VWoFEREREukLRGae//voL3bt3x61bt+Ds7Iz09HTcuXMHALB371689957aNq0KQYOHKjVYomIiIhqk6IzTh988AGaNm2KzMxM7Nq1C0IIqS88PByBgYGIjo7WVo1EREREOkFRcDp48CDGjh2LZs2aVXrPup49e+L06dM1Lo6IiIhIlygKTnp6eigqKnpo/5UrV2BmZqa4KCIiIiJdpCg49e3bF19//TXOnj0rtVWcefrtt9+wcOFC9OnTR3FRaWlp8Pf3h5WVFUxNTeHl5YX169fLfv727dsRHBwMFxcXWFlZoX79+nBxccHIkSPVaiYiIiKqDkXB6fPPP4eBgQE6dOiA/v37Q6VSYerUqQgICICXlxfq1auH2bNnKyooNTUVXbt2xcGDBzF06FCMGTMGubm5CAoKwoIFC2TN8fPPP+PIkSPo2LEjRowYgQkTJsDZ2Rnx8fFwd3fHnj17FNVGREREdZuiT9W1bNkSv//+O6ZPn45169ZBCIHjx4/j9OnTGDBgAD7//HO0bt262vOWlpYiLCwMenp62L9/Pzw8PAAAM2bMgKenJyIjIxEYGAg7O7tHzvP5559j8eLFGu27d+9G7969MXXqVKSlpVW7PiIiIqrbFO/j1KRJE6xYsQL5+fm4evUqsrOzUVBQgI0bN8LR0VHRnHv27EFGRgaGDRsmhSYAsLS0RGRkJIqLixEfH1/lPMbGxpW29+rVCw0aNMC5c+cU1UdERER1m6IzTg9SqVRo0qSJNmrB3r17AQC+vr4afX5+fgCAffv2KZ7/8OHDuHnzJrp166Z4DiIiIqq7ZAWnnj17PrTv/PnzyM7OBgC0adMGzZo1A3A/UO3evbtaxaSnpwMAnJ2dNfpsbGxgZmYmjZEjJSUFhw4dwr1795Ceno5t27ahcePGWLhwYbXqIiIiIgJkBqfz589r7NeUlZWlMe7MmTM4c+YMVCoVunTpUu1i8vPzAdy/NFcZCwsLaYwcKSkpagvKnZycsG7dOnTq1KnatRERERHJWuOUmZmJCxcuSI+PPvoI1tbWOHfuHMrLy9Uep06dQrNmzTBy5MjHXXuV5s+fDyEECgsLcfToUbRt2xZdu3ZFQkJCbZdGRERETyFFi8PnzZuHcePGwcHBQaPPxcUFEyZMwJw5c6o9b8WZpoedVSooKHjo2ahHMTMzg6enJzZv3gwXFxeMHj0a165dq/Y8REREVLcpCk5ZWVmwsrJ6aL+ZmVmll/KqUrG2qbJ1TLm5uSgqKqp0/ZNcBgYG8PHxwa1bt3Ds2DHF8xAREVHdpCg4OTk5Yfny5bhx44ZGX15eHpYvX64o4Hh7ewO4vzbpv5KTk9XGKHXlyhUAgKGhYY3mISIiorpH0XYEs2bNwpAhQ+Dq6oo33ngDzs7OUKlUOHfuHBISEvDPP/9g06ZN1Z63V69ecHBwQEJCAsLDw6W9nPLz8xETEwMjIyOEhIRI43NycpCfn49mzZqpXcI7duwYXnjhBY35k5OTsWnTJlhZWaFz587Vf+FERERUpykKTq+99hq2bt2KadOm4csvv1Tr69atG9auXavozJCBgQFiY2Ph5+eHHj16IDg4GObm5khKSkJWVhbmz58Pe3t7aXxERATi4+OxatUqhIaGSu0vvvgiOnToAHd3d7Ro0QK3bt3Cn3/+iQMHDsDQ0BDfffcdTE1Nlbx0IiIiqsMUb4Dp7+8Pf39/XLlyBRcvXgQA2NnZSfs4KeXj44ODBw8iKioKiYmJKCkpgZubG+bOnYugoCBZc8TExCA1NRX79u3DtWvXoKenh1atWmH06NGYNGkSXF1da1QjERER1U013jm8efPmaN68uTZqkXh6emL79u1VjouLi0NcXJxGe0REBCIiIrRaExEREZHie9URERER1TUMTkREREQyMTgRERERySQrOG3btg05OTmPuxYiIiIinVZlcCooKEBAQIDaTts9evSQNqQkIiIiqiuqDE4mJibQ19fHtm3bpLaDBw/in3/+eayFEREREemaKrcjMDQ0xNChQ7Fy5Urs3r0brVq1AgCEhoZi1apVD32eSqXC7t27tVcpERERUS2TtY/TypUr0apVKyQnJyMzMxMAIITAhQsXHmdtRERERDpFVnAyMTFBTEwMYmJiAAB6enqIi4tTu28cERER0bNO0XYEq1atQrdu3bRdCxEREZFOU3TLleHDh0tf//777zh37hwAwMnJCc8//7x2KiMiIiLSMYrvVbd7926MHTsWGRkZEEIAuL8g3NHREd988w169uyptSKJiIhIXc6VS7VdgixmZuZo2LhJbZehNYqC06FDh+Dv748mTZpg1qxZcHd3h56eHk6ePInly5fD398fe/fuhZeXl7brJSIiqtMKi4oBFfDdsq9quxRZDI2MMGvuomcmPCkKTlFRUWjdujUOHjyIxo0bS+39+/fH2LFj0bVrV8yYMQMpKSlaK5SIiIiAO/fKAAHY+raFUYP6tV3OIxXfvI3LKWdQVFRYt4PTkSNHMHv2bLXQVMHS0hKjR4/G9OnTa1wcERERVc6oQX2YNDWr7TLqHEWfqtPX18e9e/ce2l9cXAwDA8XLp4iIiIh0kqLg5O3tjS+//BJZWVkafVeuXMFXX30FHx+fGhdHREREpEsUnRaKiYlB586d4ebmhrfeegvt27eHSqXCqVOnsHr1agghpM0yiYiIiJ4VioJT+/btcfDgQUycOBHLli1T6/P29sZXX30FFxcXrRRIREREpCsUL0Ryd3dHamoqrl+/joyMDACAg4MDmjR5NlbNExEREf1XjVdwN27cuNJP1xERVbhx/RqKigpruwxZnrXN+ohIu/jRNyJ6rG5cv4aoqRNRXFxc26XIYmRkhJnP0GZ9RKRdDE5E9FgVFRWiuLgYQ/s7omkjk9ou55H++fcO1v+U8Uxt1kdE2sXgRERPRNNGJrC1Ma3tMoiIakTRPk5EREREdVGNzjgdOHAAGzZswLlz5wAATk5OGDJkCLp3766V4oiIiIh0iaLgVF5ejlGjRiE+Ph5CCDRo0AB6enrYsWMHlixZgtDQUMTGxkKlUmm7XiIiIqJao+hS3cKFCxEXF4dRo0YhOzsb//77L65du4acnByEh4cjLi4OCxcu1HatRERERLVKUXCKjY3F0KFDsXz5ctja2krt1tbWWLhwIV5//XWsXLlSa0USERER6QJFwenChQvo1avXQ/u9vb2RmZmptCYiIiIinaQoONnY2OCPP/54aP+ff/6JZs2aKS6KiIiISBcpCk7Dhg3D8uXLsWDBApSUlEjtpaWl+Prrr/HNN9/gzTff1FqRRERERLpAUXD6+OOP8fLLL+ODDz6AtbU1PD098dJLL8HGxgbh4eHw9vbG9OnTFReVlpYGf39/WFlZwdTUFF5eXli/fr2s5wohsH37dowdOxbu7u6wtLRE/fr10bFjR8TExODu3buK6yIiIqK6TdF2BCYmJti1axe+//57/Pjjj9I+Tl5eXhgyZAjeeust6Okp21szNTUVfn5+MDY2RnBwMMzNzZGUlISgoCBkZ2dj8uTJj3z+vXv34O/vj3r16uHll1+Gn58f7t69i+TkZEyfPh2bN2/G3r17Ub9+fUX1ERERUd2leANMlUqF4cOHY/jw4VorprS0FGFhYdDT08P+/fvh4eEBAJgxYwY8PT0RGRmJwMBA2NnZPXQOfX19fPrppxg3bhwaNGggtZeUlGDw4MHYunUrlixZgg8++EBrdRMREVHdIPu00K1btx5nHQCAPXv2ICMjA8OGDZNCEwBYWloiMjISxcXFiI+Pf+QchoaGmD59ulpoqmiPiIgAAOzbt0/rtRMREdGzT3ZwateuHXbt2nX/SXp60NfXf+TDwKD6J7P27t0LAPD19dXo8/PzA1Cz0GNoaAgAimojIiIikp0gAgMD4eDgAAAICQl5LLdTSU9PBwA4Oztr9NnY2MDMzEwao8R3330HoPJgRkRERFQV2cFpwYIF0tdxcXGPoxbk5+cDuH9prjIWFhbSmOravn07li9fDldXV4wcOVJxjURERFR3KfroW0pKCnJych7af/78eezcuVNxUdqWlpaGoKAgWFpaYsOGDahXr15tl0RERERPIUWLffr164e4uDi89dZblfbv2rULkyZNwu3bt6s1b8WZpoedVSooKNBY9F2VY8eOwdfXF3p6ekhOTkb79u2r9Xwiqntyrlyq7RKqZGZmjoaNm9R2GUR1juzgtH//fmnxthACUVFRuHDhgsa4e/fuYf369bCysqp2MRVrm9LT09GpUye1vtzcXBQVFcHT01P2fMeOHUOfPn1QXl6OlJQUvPjii9WuiYjqjsKiYkAFfLfsq9oupUqGRkaYNXcRwxPREyY7OKWmpmLmzJkA7u/hlJmZiejo6ErHWlpaYvny5dUuxtvbG5999hlSUlIQHBys1pecnCyNkaMiNJWVlSE5ORkvvfRSteshorrlzr0yQAC2vm1h1EB3N8ktvnkbl1POoKiokMGJ6AmTHZwmTZqE0NBQCCHg4OCA6OjoSje/NDQ0hLW1NfT19atdTK9eveDg4ICEhASEh4dLeznl5+cjJiYGRkZGCAkJkcbn5OQgPz8fzZo1U1tQ/ttvv6FPnz4oLS3Fjh070Llz52rXQkR1l1GD+jBpalbbZRCRDpIdnCwtLaVw4ujoCHd390fu4K2oGAMDxMbGws/PDz169FC75UpWVhbmz58Pe3t7aXxERATi4+OxatUqhIaGAgBu3LiBPn36IC8vD3379sXOnTs1FqpbWVlh0qRJWq2diIiInn2KFofXZC+lqvj4+ODgwYOIiopCYmIiSkpK4Obmhrlz5yIoKKjK5xcUFODmzZsAgB07dmDHjh0aY+zs7BiciIiIqNoUb6H9v//9DwkJCbh69SqEEGp9eXl5KCoqQkpKiqK5PT09sX379irHxcXFaewpZW9vr1EPERERkTYoCk579uxB//79ce/ePQD3F4s/GFZUKhUGDBignQqJiIiIdISiDTBnzJiBpk2b4tixY7h8+TKEENixYwfKy8uxf/9+mJqa8lIYERERPXMUBafjx49jzJgxeP7556Ub55aWlgIAunXrhrCwMEybNk17VRIRERHpAEXBSV9fX9puoFGjRjAyMsKlS/9/p11HR0ecOHFCOxUSERER6QhFwcnb2xurVq1Cfn4+VCoV3N3dsWbNGmmd008//QRra2utFkpERERU2xQFp9mzZ+PSpUt4+eWXAQDh4eE4cOAAHBwc4OjoiB07dmD06NHarJOIiIio1in6VJ2bmxt++eUXbN68GQDw5ptvoqCgAKtWrUJJSQlmz56NqVOnarNOIiIiolqneB8nd3d3uLu7S9+PGzcO48aN00pRRERERLqo2pfqbt68ibS0NGRnZz90TE5ODsLDw2tUGBEREZGuqVZwmjp1Kpo1awYvLy/Y29tj8ODBuHv3rtR/+fJlvPvuu3B0dMSSJUu0XiwRERFRbZIdnL799lt8/vnn6NOnD5YuXYrx48djy5YteO+993Dp0iWMHz8eTk5OWLJkCXr37o1jx449zrqJiIiInjjZa5y+++47+Pj4YOvWrVJb48aNER0djRUrVkAIgX79+mHmzJl44YUXHkuxRERERLVJ9hmnU6dOadx/bsiQIQAAIQQOHz6Mn376iaGJiIiInlmyg1N+fj4aNmyo1lbx/Y4dO/DSSy9ptzIiIiIiHaNoA0wiIiKiuqha+zj9+++/uHjxovR9Tk4OACArK0utvUKrVq1qWB4RERGR7qhWcJoyZQqmTJmi0T5mzJhKx5eVlSmrioiIiEgHyQ5OUVFRj7MOIiIiIp3H4EREREQkExeHExEREcnE4EREREQkE4MTERERkUwMTkREREQyMTgRERERycTgRERERCQTgxMRERGRTAxORERERDIxOBERERHJxOBEREREJBODExEREZFMDE5EREREMjE4EREREcmkk8EpLS0N/v7+sLKygqmpKby8vLB+/XrZz8/IyEB0dDQGDhwIW1tbqFQq2NvbP76CiYiIqE4wqO0C/is1NRV+fn4wNjZGcHAwzM3NkZSUhKCgIGRnZ2Py5MlVznHgwAHMnDkT+vr6cHV1RW5u7hOonIiIiJ51OnXGqbS0FGFhYdDT08P+/fuxYsUKLFiwACdOnECbNm0QGRmJrKysKufp0aMHDh8+jMLCQpw8eRKGhoZPoHoiIiJ61ulUcNqzZw8yMjIwbNgweHh4SO2WlpaIjIxEcXEx4uPjq5zHwcEBXl5eMDExeYzVEhERUV2jU8Fp7969AABfX1+NPj8/PwDAvn37nmRJRERERBKdCk7p6ekAAGdnZ40+GxsbmJmZSWOIiIiInjSdCk75+fkA7l+aq4yFhYU0hoiIiOhJ06ngRERERKTLdCo4VZxpethZpYKCgoeejSIiIiJ63HQqOFWsbapsHVNubi6KiooqXf9ERERE9CToVHDy9vYGAKSkpGj0JScnq40hIiIietJ0Kjj16tULDg4OSEhIwPHjx6X2/Px8xMTEwMjICCEhIVJ7Tk4O/v77by4YJyIioidCp265YmBggNjYWPj5+aFHjx5qt1zJysrC/Pnz1e45FxERgfj4eKxatQqhoaFS+/Xr1zFlyhTp+5KSEly/fl1tzPz589G4ceMn8KqIiIjoWaFTwQkAfHx8cPDgQURFRSExMRElJSVwc3PD3LlzERQUJGuOoqIijR3Gb926pdYWHR3N4ERERETVonPBCQA8PT2xffv2KsfFxcUhLi5Oo93e3h5CiMdQGREREdVlOrXGiYiIiEiXMTgRERERycTgRERERCQTgxMRERGRTAxORERERDIxOBERERHJxOBEREREJBODExEREZFMDE5EREREMjE4EREREcnE4EREREQkE4MTERERkUwMTkREREQyMTgRERERycTgRERERCQTgxMRERGRTAxORERERDIxOBERERHJxOBEREREJBODExEREZFMDE5EREREMjE4EREREcnE4EREREQkE4MTERERkUwMTkREREQyMTgRERERycTgRERERCQTgxMRERGRTAxORERERDIxOBERERHJxOBEREREJBODExEREZFMDE5EREREMulkcEpLS4O/vz+srKxgamoKLy8vrF+/vlpz3Lt3D7NmzYKzszOMjY3RvHlzjB49Gv/8889jqpqIiIiedQa1XcB/paamws/PD8bGxggODoa5uTmSkpIQFBSE7OxsTJ48uco5ysvLERAQgOTkZHh5eWHw4MFIT09HbGwsdu/ejSNHjqBJkyZP4NUQERHRs0SnzjiVlpYiLCwMenp62L9/P1asWIEFCxbgxIkTaNOmDSIjI5GVlVXlPPHx8UhOTsbrr7+OQ4cOYc6cOUhKSsLSpUtx/vx5fPTRR0/g1RAREdGzRqeC0549e5CRkYFhw4bBw8NDare0tERkZCSKi4sRHx9f5TwrV64EAHz22WdQqVRS+zvvvAMHBwesWbMGd+7c0Xr9RERE9GzTqUt1e/fuBQD4+vpq9Pn5+QEA9u3b98g57t69i6NHj6Jt27aws7NT61OpVOjTpw+WL1+OY8eOoXv37hrPF0IAAAoKCpS8hCeqsLAQxcUluJCdh6Jb92q7nEe6nFuA4uISFFy5ibu379Z2OQ9VnHcHxcUlKCwsfCqOgacBj1Pt43GqfTxOH4+n7Vg1NzdXO+FSKaFDAgMDBQBx7NixSvvNzMxEy5YtHznH//73PwFAvPLKK5X2z58/XwAQ3377baX92dnZAgAffPDBBx988FHHHvn5+VVmFZ0645Sfnw/g/qW5ylhYWEhjajLHg+P+q3nz5sjOzpaXOomIiOiZYW5uXuUYnQpOukBPTw8tWrSo7TKIiIhIB+nU4vCKs0QPOxtUUFDw0DNJ1ZnjwXFEREREculUcHJ2dgYApKena/Tl5uaiqKhIGvMwDg4O0NPTq3SOB+euah4iIiKi/9Kp4OTt7Q0ASElJ0ehLTk5WG/MwJiYm8PT0xJkzZzT2fBJCYOfOnTA1NcULL7ygpaqJiIiortCp4NSrVy84ODggISEBx48fl9rz8/MRExMDIyMjhISESO05OTn4+++/NS7LjR49GgAQEREhbS8AAMuXL8f58+fxxhtvwMTE5PG+mKdIZmYmVCoV4uLiZI+dP3/+4y9MC6Kjo7nIn2Szt7fHK6+8orX5Kvvd4jFJ9HTTqeBkYGCA2NhYlJeXo0ePHhg9ejQmT56Mjh074uzZs4iJiYG9vb00PiIiAq6urti0aZPaPMOHD4efnx/Wrl2LLl26YNq0aQgMDMS4cePQunVrfPrpp0/4lT0ZS5cuhUqlwksvvVTjuX7++WdER0fXvKgn4Pbt24iOjpb2AaOny8mTJxEYGAg7OzsYGxvD1tYWffr0weLFi2u7NHoKZGRkSJsbGxsbw8LCAl27dsWiRYu40XEV+LunUJUbFtSCo0ePir59+woLCwthYmIiPD09xbp16zTGDR8+XAAQq1at0ui7e/euiI6OFo6OjsLIyEjY2NiIUaNGidzc3CfwCmpHly5dhL29vQAg0tPTZT+vvLxc3LlzR5SWlkpt48ePF5UdHhcuXBAAxOeff66VmrXh2rVrAoCIiorS6CspKRF37tx58kWRLL/88oswMjISTk5O4pNPPhErV64UM2bMEL6+vsLR0fGJ12NnZyf69++vtfkqfl8efI/iMak927ZtEyYmJsLKykqEh4eLFStWiK+//loEBwcLQ0NDERYWVtsl6ixd+917mujkdgSenp7Yvn17lePi4uIeenmpXr16iIqKQlRUlJar000XLlzAoUOHsHHjRrzzzjtYs2ZNla+9tLQU5eXlMDIygrGx8ROq9MkyMDCAgYFOHuYEYPbs2bC0tERaWhqsrKzU+v7555/aKeox4zGpHRcuXEBwcDDs7OywZ88eNGvWTOobP348zp07h59++qkWK9RtdfF3T1t06lIdKbdmzRo0aNAA/fv3R2BgINasWaPW/+DapC+//BKOjo6oV68eTp06pbEOIzQ0FEuWLAFw/zY1FY//WrFihTTPiy++iLS0NLX+0NBQmJmZ4eLFi3jllVdgZmYGW1tbae6TJ0+iZ8+eMDU1hZ2dHRISEjR+Rl5eHiZNmoSWLVuiXr16cHJywty5c1FeXi69riZNmgAAZs6cKdVacZnxYetJVq9eDU9PT9SvXx8NGjRAjx49Kv1QAj1eGRkZaN++vcYbNwA0bdoUwP0PhHTs2LHS57dt21a6HdODx/iSJUvg4OCA+vXrw9fXF9nZ2RBC4JNPPkGLFi1gYmKCgIAA3Lhxo9J5U1JS4OHhAWNjY7Rr1w4bN27UGHP+/HkMGTIEDRs2RP369eHl5SXrH2oek9oxb948FBUV4dtvv1ULTRWcnJwwceJE6fvS0lJ88skn0nuWvb09IiMjce+e+u1VKta57d27Fy+88AJMTEzg5uYmLQXYuHEj3NzcYGxsjE6dOuGPP/5Qe7423vfkHFt79+6FSqXC+vXrMXv2bLRo0QLGxsbo1asXzp07V+Wfn5zfvQetXr0anTp1gomJCRo2bIjg4GBkZ2erjXn55ZfRoUMHnDp1Cj4+Pqhfvz5sbW0xb948jfkWL16M9u3bS8f7Cy+8oPFncfnyZbz99tuwtrZGvXr10L59e3z33XdVvrbHrrZPeZF2uLi4iJEjRwohhNi/f78AIH799Vepv+KSQbt27YSDg4OYM2eOWLhwocjKytK4nHDo0CHRp08fAUD88MMP0uPBeZ577jnh5OQk5s6dK+bNmycaN24sWrRoIYqLi6WfOXz4cGFsbCzatWsnxowZI5YsWSK6dOki/azmzZuLDz74QCxevFi0b99e6Ovri/Pnz0vPv3XrlnB3dxeNGjUSkZGR4ptvvhEhISFCpVKJiRMnCiGEKCoqEsuWLRMAxKuvvirVeuLECSGEEFFRURqXHKOjowUA0aVLF/H555+LRYsWiWHDhompU6dq/e+FHs3X11eYm5uLkydPPnTMypUrBQCNMb/++qsAIL7//nshxP8/Nj08PES7du3EF198IT766CNhZGQkvLy8RGRkpOjSpYv46quvRHh4uFCpVGLEiBFqc9rZ2Yk2bdoIKysrMW3aNPHFF18INzc3oaenJ1JSUqRxubm5wtraWpibm4vp06eLL774QnTs2FHo6emJjRs3SuMqu1THY1I7bG1thYODg+zxFUs7AgMDxZIlS0RISIgAIAYNGqQ2zs7OTrRt21Y0a9ZMREdHi4ULFwpbW1thZmYmVq9eLVq1aiXmzJkj5syZIywtLYWTk5MoKytT+zk1ed+Te2ylpqZK78WdOnUSCxcuFNHR0aJ+/frC09Ozyj8POb97FT799FOhUqlEUFCQWLp0qZg5c6Zo3LixsLe3Fzdv3pTGeXt7i+bNm4uWLVuKiRMniqVLl4qePXsKAOLnn3+Wxq1YsUL6u1i+fLlYtGiRGDlypAgPD1f7c2jRooVo2bKlmDVrlli2bJkYOHCgACAWLlxYZc2PE4PTM+DYsWMCgNi5c6cQ4v6apRYtWkjhQoj//wZuYWEh/vnnH7XnV/bmXtUap0aNGokbN25I7f/3f/8nAIitW7dKbRVvVDExMVLbzZs3hYmJiVCpVGrr1v7++2+NdUqffPKJMDU1FWfPnlWrYdq0aUJfX19cvHhRCPHoNU7//UcqPT1d6OnpiVdffVXtza7iz42erJSUFKGvry/09fVF586dxYcffiiSk5PVAnheXp4wNjbWCBHh4eHC1NRUFBUVCSH+/7HZpEkTkZeXJ42LiIgQAETHjh1FSUmJ1P76668LIyMjcffuXanNzs5OABBJSUlSW35+vmjWrJl47rnnpLZJkyYJAOLAgQNSW2FhoWjdurWwt7eXji05wYnHZPXl5+cLACIgIEDW+OPHjwsAYtSoUWrtU6ZMEQDEnj17pLaKY+DQoUNSW3JysgAgTExMRFZWltS+fPlyAUCkpqZKbTV935N7bFUEJ1dXV3Hv3j1p7KJFiyr9j8Z/yfndE0KIzMxMoa+vL2bPnq3WfvLkSWFgYKDW7u3trfafGSGEuHfvnrCxsRGDBw+W2gICAkT79u0fWd/IkSNFs2bNxPXr19Xag4ODhaWlpbh9+/Yjn/848VLdM2DNmjWwtraGj48PgPuX14KCgrBu3TqUlZWpjR08eLB0aasmgoKC0KBBA+n77t27A7h/ivm/Ro0aJX1tZWWFtm3bwtTUFEOHDpXa27ZtCysrK7Xnb9iwAd27d0eDBg1w/fp16dG7d2+UlZVh//791a578+bNKC8vx4wZM6Cnp3748yPiT16fPn1w+PBhDBw4ECdOnMC8efPg5+cHW1tbbNmyBcD9Xf4DAgKwdu1aaXuRsrIyJCYmYtCgQTA1NVWbc8iQIWp3Bqj4lOmbb76ptrbopZdeQnFxMS5fvqz2/ObNm+PVV1+VvrewsEBISAj++OMP5ObmArj/qVNPT09069ZNGmdmZobRo0cjMzMTp06dkv1nwGOy+iruACHnvmLA/b8vAHj//ffV2idPngwAGpfB2rVrh86dO0vfVxxDPXv2RKtWrTTatfm+V91ja8SIETAyMpK+f9R78YPk/O4B9y9NlpeXY+jQoWrvwzY2NnB2dkZqaqravGZmZnjzzTel742MjODp6alWj5WVFS5duqSxvKOCEAJJSUkYMGAAhBBqP9fPzw/5+fn4/fffH/n6HicGp6dcWVkZ1q1bBx8fH1y4cAHnzp3DuXPn8NJLL+Hq1avYvXu32vjWrVtr5ec++OYBQApRN2/eVGs3NjbWCGqWlpZo0aKFxj8KlpaWas9PT0/Hjh070KRJE7VH7969AShbwJiRkQE9PT20a9eu2s+lx+PFF1/Exo0bcfPmTfz666+IiIhAYWEhAgMDpX8kQkJCcPHiRRw4cAAAsGvXLly9ehVvvfWWxnz/PTYrQlTLli0rbf/vMevk5KRxbLZp0wbA/XVUAJCVlYW2bdtq/GxXV1epXy4ek9VXcbP2wsJCWeOzsrKgp6cHJycntXYbGxtYWVlp/H3V9BiqyftedY8tue/FlZHzu5eeng4hBJydnTXei0+fPq3xPlzZa2zQoIFaPVOnToWZmRk8PT3h7OyM8ePH45dffpH6r127hry8PKxYsULjZ44YMQJA7S5g50c7nnJ79uxBTk4O1q1bh3Xr1mn0r1mzBr6+vtL32tr4U19fv9J28cCGo48aJ+f55eXl6NOnDz788MNKx1b8Y0bPBiMjI7z44ot48cUX0aZNG4wYMQIbNmxAVFQU/Pz8YG1tjdWrV6NHjx5YvXo1bGxspBD9oJocc/R0sLCwQPPmzfG///2vWs+TewavpsfQkzwGtTHno373ysvLoVKpsH379kp/lpmZWbXrcXV1xZkzZ7Bt2zbs2LEDSUlJWLp0KWbMmIGZM2dKH/558803MXz48Ernc3d3l/36tI3B6Sm3Zs0aNG3aVPrExoM2btyITZs24Ztvvqn2vLpwicDR0RFFRUWV/uP4oOrU6ujoiPLycpw6dQoeHh41rJAel4pbIuXk5AC4/2Y8bNgwxMXFYe7cudi8eTPCwsIe+iZdE+fOnYMQQu24Onv2LABIG/Da2dnhzJkzGs/9+++/pX65eEwq88orr2DFihU4fPiw2mW1ytjZ2aG8vBzp6enSmRsAuHr1KvLy8qr19/W4afPYUuK/v3uOjo4QQqB169Za/c+qqakpgoKCEBQUhOLiYrz22muYPXs2IiIi0KRJE5ibm6OsrKzK9//awEt1T7E7d+5g48aNeOWVVxAYGKjxmDBhAgoLC9WuV8tVsW4kLy9Py1XLN3ToUBw+fFi6T+GD8vLyUFpaCgCoX7++1FaVQYMGQU9PD7NmzZL+V1OBZx6evNTU1Er/3CvWpDx4yeKtt97CzZs38c4776CoqEhtHYU2XblyRe1uBAUFBfj+++/h4eEBGxsbAIC/vz9+/fVXHD58WBp369YtrFixAvb29tW67MZjUpkPP/wQpqamGDVqFK5evarRn5GRgUWLFgG4//cFAF9++aXamC+++AIA0L9//8dbbDVo89h6FLm/e6+99hr09fUxc+ZMjfFCCPz777/V/tn/fY6RkRHatWsHIQRKSkqgr6+PwYMHIykpqdKziteuXav2z9QmnnF6im3ZsgWFhYUYOHBgpf1eXl5o0qQJ1qxZU+3bsHTq1AkAEB4eDj8/P+jr6yM4OLjGNVfHBx98gC1btuCVV15BaGgoOnXqhFu3buHkyZP48ccfkZmZicaNG8PExATt2rVDYmIi2rRpg4YNG6JDhw7o0KGDxpxOTk6YPn06PvnkE3Tv3h2vvfYa6tWrh7S0NDRv3hyfffbZE32Ndd27776L27dv49VXX4WLiwuKi4tx6NAhJCYmwt7eXlrPAADPPfccOnTogA0bNsDV1RXPP//8Y6mpTZs2GDlyJNLS0mBtbY3vvvsOV69exapVq6Qx06ZNw9q1a9GvXz+Eh4ejYcOGiI+Px4ULF5CUlKSxyPtReEwq4+joiISEBAQFBcHV1RUhISHo0KGDdAxt2LABoaGhAICOHTti+PDhWLFiBfLy8uDt7Y1ff/0V8fHxGDRokPTBGl2gzWPrUeT+7jk6OuLTTz9FREQEMjMzMWjQIJibm+PChQvYtGkTRo8ejSlTplTrZ/v6+sLGxgZdu3aFtbU1Tp8+ja+//hr9+/eXFvzPmTMHqampeOmllxAWFoZ27drhxo0b+P3337Fr166H7sH2RDzZD/GRNg0YMEAYGxuLW7duPXRMaGioMDQ0lLYsqOxWKZV9ZLq0tFS8++67okmTJkKlUkkfn37ULVfwn4/VDh8+XJiammqM8/b2rvSjqJXd7qKwsFBEREQIJycnYWRkJBo3biy6dOki5s+fr/ax2UOHDolOnToJIyMjtToq2zNHCCG+++478dxzz4l69eqJBg0aCG9vb2k7B3pytm/fLt5++23h4uIizMzMpFtAvPvuu+Lq1asa4+fNm6fxUe8KDzs2Kz62vWHDBrX2VatWCQAiLS1Naqs4BpOTk4W7u7uoV6+ecHFx0XiuEEJkZGSIwMBAYWVlJYyNjYWnp6fYtm1bpTVVtY+TEDwmlTp79qwICwsT9vb2wsjISJibm4uuXbuKxYsXq201UVJSImbOnClat24tDA0NRcuWLUVERITaGCEeftsdAGL8+PFqbZUdc9p435NzbD3suK7smKtMdX/3kpKSRLdu3YSpqakwNTUVLi4uYvz48eLMmTNVvsbhw4cLOzs76fvly5eLHj16iEaNGol69eoJR0dH8cEHH4j8/Hy15129elWMHz9etGzZUhgaGgobGxvRq1cvsWLFike+tsdNJQTPBRPR02HRokV47733kJmZqfFpIiKiJ4HBiYieCkIIdOzYEY0aNdLYO4aI6EnhGici0mm3bt3Cli1bkJqaipMnT+L//u//arskIqrDeMaJiHRaZmYmWrduDSsrK4wbNw6zZ8+u7ZKIqA5jcCIiIiKSifs4EREREcnE4EREREQkE4MTERERkUwMTkREREQyMTgRERERycTgRERERCQTgxMRERGRTAxORERERDL9P8yxIUBUJQO5AAAAAElFTkSuQmCC\n"
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Eqqnu8Ae7VMb",
        "outputId": "ef094b69-d023-4efe-cbcb-5017249677e2"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\n",
            "Analyzing ability: Symbolic\n",
            "pvalue      0.7428293587290682\n",
            "statistic   17.0\n"
          ]
        }
      ],
      "source": [
        "all_datasets_df_without_errors = all_datasets_df[\n",
        "    (all_datasets_df[\"sys1_direct_answer\"] != \"error\")\n",
        "    & (all_datasets_df[\"sys2_direct_answer\"] != \"error\")\n",
        "]\n",
        "\n",
        "\n",
        "for ability in all_datasets_df_without_errors[\"ability\"].unique():\n",
        "    print(f\"\\nAnalyzing ability: {ability}\")\n",
        "\n",
        "    ability_df = all_datasets_df_without_errors[\n",
        "        all_datasets_df_without_errors[\"ability\"] == ability\n",
        "    ]\n",
        "\n",
        "    sys1_yes_sys2_yes = ability_df[\n",
        "        (ability_df[\"sys1_direct_answer\"] == \"yes\")\n",
        "        & (ability_df[\"sys2_direct_answer\"] == \"yes\")\n",
        "    ]\n",
        "\n",
        "    sys1_no_sys2_no = ability_df[\n",
        "        (ability_df[\"sys1_direct_answer\"] == \"no\")\n",
        "        & (ability_df[\"sys2_direct_answer\"] == \"no\")\n",
        "    ]\n",
        "\n",
        "    sys1_yes_sys2_no = ability_df[\n",
        "        (ability_df[\"sys1_direct_answer\"] == \"yes\")\n",
        "        & (ability_df[\"sys2_direct_answer\"] == \"no\")\n",
        "    ]\n",
        "\n",
        "    sys1_no_sys2_yes = ability_df[\n",
        "        (ability_df[\"sys1_direct_answer\"] == \"no\")\n",
        "        & (ability_df[\"sys2_direct_answer\"] == \"yes\")\n",
        "    ]\n",
        "\n",
        "    table = np.array(\n",
        "        [\n",
        "            [len(sys1_yes_sys2_yes), len(sys1_yes_sys2_no)],\n",
        "            [len(sys1_no_sys2_yes), len(sys1_no_sys2_no)],\n",
        "        ]\n",
        "    )\n",
        "\n",
        "    result = mcnemar(table, exact=True, correction=True)\n",
        "    print(result)"
      ]
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "rnQDdmAD8nA4"
      },
      "execution_count": null,
      "outputs": []
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "py310",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.11.7"
    },
    "colab": {
      "provenance": []
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}