{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "R514MKNqG27Y"
   },
   "source": [
    "# WorkArena Knowledge Base\n",
    "Author: Alexandre Drouin (alexandre.drouin@servicenow.com)\n",
    "\n",
    "\n",
    "This notebook contains code to generate:\n",
    "* Knowledge base articles given a list of facts\n",
    "* Questions to query those articles for given facts\n",
    "* Multiple alternative wordings of the expected answers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "R3kofpiH7Bl2",
    "outputId": "d3d48e02-2ded-441f-b632-9fb43976b64d"
   },
   "outputs": [],
   "source": [
    "!pip install openai"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "PAK3oqCG61PZ"
   },
   "outputs": [],
   "source": [
    "import json\n",
    "import openai\n",
    "import os\n",
    "\n",
    "\n",
    "client = openai.OpenAI(api_key=os.environ[\"OPENAI_API_KEY\"])\n",
    "\n",
    "\n",
    "def chat(messages, model=\"gpt-4-1106-preview\"):\n",
    "    return (\n",
    "        client.chat.completions.create(model=model, messages=messages)\n",
    "        .choices[0]\n",
    "        .message\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "OGT5ZjrK7HVs"
   },
   "outputs": [],
   "source": [
    "def generate_article(item, value, all_facts, n_retries=5):\n",
    "    prompt = f\"\"\"\n",
    "<task>\n",
    "You are in charge of writing knowledge-base articles to document important company and workplace information.\n",
    "Your articles will be used as reference by the employees of the company.\n",
    "</task>\n",
    "\n",
    "Here is a list of facts in the knowledge base:\n",
    "<all_facts>\n",
    "{all_facts}\n",
    "</all_facts>\n",
    "\n",
    "You need to write an article about this specific fact:\n",
    "<article_fact>\n",
    "The {item} is {value}.\n",
    "</article_fact>\n",
    "\n",
    "<instructions>\n",
    "* Generate a knowledge-base article that contains the <article_fact> as-is. Do not modify or add to its text.\n",
    "* Hide the fact inside a bunch of other related information, but do not source the related information from <all_facts>. Make stuff up.\n",
    "* Make sure that nothing you write contradicts <all_facts>\n",
    "* You must use HTML format. Generate only the <body> tag.\n",
    "* Don't include information about the knowledge base itself or headers like \\\"welcome to our knowledge base\\\".\n",
    "</instructions>\n",
    "\n",
    "\"\"\"\n",
    "    messages = [\n",
    "        {\n",
    "            \"role\": \"system\",\n",
    "            \"content\": \"You are a ServiceNow system administrator in charge of writing knowledge base articles to help employees in your organization.\",\n",
    "        },\n",
    "        {\"role\": \"user\", \"content\": prompt},\n",
    "    ]\n",
    "\n",
    "    print(\"... attempting to produce article\")\n",
    "    for retry in range(n_retries):\n",
    "        try:\n",
    "            print(f\"... try {retry}\")\n",
    "            messages.append({\"role\": \"assistant\", \"content\": chat(messages).content})\n",
    "            article = (\n",
    "                messages[-1][\"content\"]\n",
    "                .replace(\"<body>\", \"\")\n",
    "                .replace(\"</body>\", \"\")\n",
    "                .replace(\"```HTML\", \"\")\n",
    "                .replace(\"```\", \"\")\n",
    "                .replace(\"<strong>\", \"\")\n",
    "                .replace(\"</strong>\", \"\")\n",
    "                .strip()\n",
    "            )\n",
    "\n",
    "            # Validate that the fact is included without modification\n",
    "            assert (\n",
    "                f\"the {item} is {value}\".lower() in article.lower()\n",
    "            ), f'Error: Could not find the string \"The {item} is {value}\" in this article.'\n",
    "            print(\"... valid article found.\")\n",
    "            break\n",
    "        except AssertionError as e:\n",
    "            messages.append(\n",
    "                {\"role\": \"user\", \"content\": str(e) + \"\\nDon't apologize and try again.\"}\n",
    "            )\n",
    "\n",
    "    if messages[-1][\"role\"] == \"user\":\n",
    "        raise RuntimeError(\n",
    "            f\"Failed to produce a valid article after {n_retries} retries.\"\n",
    "        )\n",
    "\n",
    "    return article"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "BuAPHUc18EDy",
    "outputId": "f58c7b3b-cdef-4c04-a946-be6f817cca80"
   },
   "outputs": [],
   "source": [
    "facts = [\n",
    "    {\"item\": \"password to conference room A-561\", \"value\": \"roo918k\"},\n",
    "    {\"item\": \"address of office #456\", \"value\": \"42, Pizza street, New York, USA\"},\n",
    "    {\"item\": \"number of employees in department X\", \"value\": \"75\"},\n",
    "    {\"item\": \"CEO's name\", \"value\": \"Alex Johnson\"},\n",
    "    {\"item\": \"year of company establishment\", \"value\": \"1998\"},\n",
    "    {\"item\": \"Wi-Fi network name in office #456\", \"value\": \"Office456_WiFi\"},\n",
    "    {\"item\": \"Wi-Fi password for office #456\", \"value\": \"456SecureNet!\"},\n",
    "    {\"item\": \"average annual revenue\", \"value\": \"$50 million\"},\n",
    "    {\"item\": \"number of branches worldwide\", \"value\": \"18\"},\n",
    "    {\"item\": \"name of the head of HR department\", \"value\": \"Samantha Green\"},\n",
    "    {\"item\": \"brand of coffee machine in kitchen #3\", \"value\": \"Delonghi Magnifica\"},\n",
    "    {\"item\": \"company's stock ticker symbol\", \"value\": \"COMPX\"},\n",
    "    {\"item\": \"company's main product\", \"value\": \"Advanced Analytics Software\"},\n",
    "    {\"item\": \"color of the carpet in conference room A-561\", \"value\": \"Navy Blue\"},\n",
    "    {\"item\": \"annual budget for marketing department\", \"value\": \"$2 million\"},\n",
    "    {\"item\": \"capacity of conference room B-762\", \"value\": \"30 people\"},\n",
    "    {\"item\": \"number of floors in the main office building\", \"value\": \"10\"},\n",
    "    {\"item\": \"type of plants in the lobby of office #456\", \"value\": \"Ficus Lyrata\"},\n",
    "    {\n",
    "        \"item\": \"catering service provider for office events\",\n",
    "        \"value\": \"Gourmet Caterers\",\n",
    "    },\n",
    "    {\"item\": \"software used for payroll management\", \"value\": \"QuickBooks Payroll\"},\n",
    "    {\"item\": \"number of parking spaces in office #456 parking lot\", \"value\": \"150\"},\n",
    "    {\"item\": \"company's first product\", \"value\": \"Data Analysis Toolkit\"},\n",
    "    {\"item\": \"brand of computers used in IT department\", \"value\": \"Lenovo ThinkPad\"},\n",
    "    {\"item\": \"annual CSR budget\", \"value\": \"$500,000\"},\n",
    "    {\"item\": \"make of company's official car\", \"value\": \"Tesla Model X\"},\n",
    "    {\"item\": \"name of the company's auditor\", \"value\": \"PriceWaterhouseCoopers\"},\n",
    "    {\"item\": \"number of customer service representatives\", \"value\": \"40\"},\n",
    "    {\n",
    "        \"item\": \"type of air conditioning system in office #456\",\n",
    "        \"value\": \"Centralized HVAC\",\n",
    "    },\n",
    "    {\"item\": \"name of the cafeteria manager in office #456\", \"value\": \"Miguel Torres\"},\n",
    "    {\"item\": \"duration of lunch break in office #456\", \"value\": \"1 hour\"},\n",
    "    {\"item\": \"official language for company communication\", \"value\": \"English\"},\n",
    "    {\"item\": \"number of conference rooms in office #456\", \"value\": \"8\"},\n",
    "    {\"item\": \"average time for IT support response\", \"value\": \"15 minutes\"},\n",
    "    {\"item\": \"brand of printers in office #456\", \"value\": \"HP LaserJet Pro\"},\n",
    "    {\"item\": \"capacity of the largest meeting room\", \"value\": \"50 people\"},\n",
    "    {\"item\": \"number of patents held by the company\", \"value\": \"35\"},\n",
    "    {\"item\": \"company's main competitor\", \"value\": \"TechRivals Inc.\"},\n",
    "    {\"item\": \"company's slogan\", \"value\": \"Innovating the Future\"},\n",
    "    {\"item\": \"number of countries the company operates in\", \"value\": \"12\"},\n",
    "    {\n",
    "        \"item\": \"type of security system at office #456\",\n",
    "        \"value\": \"Biometric Access Control\",\n",
    "    },\n",
    "    {\"item\": \"name of the employee of the month\", \"value\": \"Elena Rodriguez\"},\n",
    "    {\"item\": \"brand of the photocopier in office #456\", \"value\": \"Canon ImageRunner\"},\n",
    "    {\"item\": \"type of coffee provided in the kitchen\", \"value\": \"Arabica Beans\"},\n",
    "    {\"item\": \"annual IT budget\", \"value\": \"$1.5 million\"},\n",
    "    {\"item\": \"name of the legal counsel firm\", \"value\": \"Baker & McKenzie\"},\n",
    "    {\"item\": \"average employee satisfaction score\", \"value\": \"8.5/10\"},\n",
    "    {\"item\": \"number of departments in the company\", \"value\": \"12\"},\n",
    "    {\"item\": \"company's largest client\", \"value\": \"GlobalTech Industries\"},\n",
    "    {\n",
    "        \"item\": \"type of gym equipment in fitness center\",\n",
    "        \"value\": \"Life Fitness machines\",\n",
    "    },\n",
    "    {\"item\": \"average number of yearly hires\", \"value\": \"100\"},\n",
    "    {\"item\": \"brand of air purifiers used in the office\", \"value\": \"Dyson Pure Cool\"},\n",
    "    {\n",
    "        \"item\": \"name of the employee health insurance provider\",\n",
    "        \"value\": \"BlueCross BlueShield\",\n",
    "    },\n",
    "    {\"item\": \"number of projects completed last year\", \"value\": \"22\"},\n",
    "    {\"item\": \"company's main export market\", \"value\": \"European Union\"},\n",
    "    {\n",
    "        \"item\": \"name of the office cleaning service provider\",\n",
    "        \"value\": \"CleanSweep Inc.\",\n",
    "    },\n",
    "    {\"item\": \"type of video conferencing software used\", \"value\": \"Zoom\"},\n",
    "    {\"item\": \"color of the company logo\", \"value\": \"Royal Blue and Silver\"},\n",
    "    {\"item\": \"average client retention rate\", \"value\": \"85%\"},\n",
    "    {\"item\": \"name of the company's largest shareholder\", \"value\": \"David Thompson\"},\n",
    "    {\"item\": \"total square footage of office #456\", \"value\": \"25,000 square feet\"},\n",
    "    {\n",
    "        \"item\": \"type of backup power system in office #456\",\n",
    "        \"value\": \"Diesel Generators\",\n",
    "    },\n",
    "    {\"item\": \"average yearly expenditure on office supplies\", \"value\": \"$80,000\"},\n",
    "    {\"item\": \"name of the cafeteria food supplier\", \"value\": \"FreshFoods Ltd.\"},\n",
    "    {\n",
    "        \"item\": \"type of fire safety system in office #456\",\n",
    "        \"value\": \"Automatic Sprinkler System\",\n",
    "    },\n",
    "    {\"item\": \"company's primary industry\", \"value\": \"Technology\"},\n",
    "    {\"item\": \"number of annual company-wide meetings\", \"value\": \"4\"},\n",
    "    {\"item\": \"brand of smartphones provided to employees\", \"value\": \"Samsung Galaxy\"},\n",
    "    {\"item\": \"name of the software used for project management\", \"value\": \"Trello\"},\n",
    "    {\"item\": \"average number of business trips per employee annually\", \"value\": \"3\"},\n",
    "    {\"item\": \"number of IT support staff\", \"value\": \"15\"},\n",
    "    {\n",
    "        \"item\": \"company's primary social media platform for marketing\",\n",
    "        \"value\": \"LinkedIn\",\n",
    "    },\n",
    "    {\"item\": \"number of active contracts\", \"value\": \"40\"},\n",
    "    {\"item\": \"annual expenditure on R&D\", \"value\": \"$3 million\"},\n",
    "    {\n",
    "        \"item\": \"type of chairs used in conference room A-561\",\n",
    "        \"value\": \"Ergonomic Office Chairs\",\n",
    "    },\n",
    "    {\n",
    "        \"item\": \"name of the most used software by the design team\",\n",
    "        \"value\": \"Adobe Creative Suite\",\n",
    "    },\n",
    "    {\"item\": \"average delivery time for company products\", \"value\": \"5 business days\"},\n",
    "    {\"item\": \"type of lighting in office #456\", \"value\": \"LED Lights\"},\n",
    "    {\"item\": \"average monthly electricity bill for office #456\", \"value\": \"$10,000\"},\n",
    "    {\"item\": \"company's largest expense category\", \"value\": \"Employee Salaries\"},\n",
    "    {\"item\": \"brand of the refrigerator in the kitchen\", \"value\": \"LG InstaView\"},\n",
    "    {\n",
    "        \"item\": \"type of health and safety training provided\",\n",
    "        \"value\": \"First Aid and Fire Safety\",\n",
    "    },\n",
    "    {\n",
    "        \"item\": \"name of the corporate social responsibility program\",\n",
    "        \"value\": \"TechForGood\",\n",
    "    },\n",
    "    {\"item\": \"annual water consumption in office #456\", \"value\": \"50,000 gallons\"},\n",
    "    {\"item\": \"company's policy on remote work\", \"value\": \"Hybrid Model\"},\n",
    "    {\"item\": \"number of customer complaints last year\", \"value\": \"120\"},\n",
    "    {\"item\": \"name of the most popular product\", \"value\": \"Smart Analytics Pro\"},\n",
    "    {\n",
    "        \"item\": \"type of snacks available in the kitchen\",\n",
    "        \"value\": \"Healthy and Organic Snacks\",\n",
    "    },\n",
    "    {\"item\": \"average duration of employee tenure\", \"value\": \"5 years\"},\n",
    "    {\n",
    "        \"item\": \"brand of the security cameras in office #456\",\n",
    "        \"value\": \"Axis Communications\",\n",
    "    },\n",
    "    {\"item\": \"type of heating system in office #456\", \"value\": \"Forced Air Heating\"},\n",
    "    {\n",
    "        \"item\": \"name of the internet service provider for office #456\",\n",
    "        \"value\": \"Comcast Xfinity\",\n",
    "    },\n",
    "    {\n",
    "        \"item\": \"company's policy on environmental sustainability\",\n",
    "        \"value\": \"Zero-Waste Initiatives\",\n",
    "    },\n",
    "    {\n",
    "        \"item\": \"name of the annual team-building retreat location\",\n",
    "        \"value\": \"Lakeview Resort\",\n",
    "    },\n",
    "    {\n",
    "        \"item\": \"type of vending machines in office #456\",\n",
    "        \"value\": \"Cashless Payment Vending\",\n",
    "    },\n",
    "    {\"item\": \"number of team leaders in the company\", \"value\": \"35\"},\n",
    "    {\"item\": \"annual spending on employee training programs\", \"value\": \"$250,000\"},\n",
    "    {\"item\": \"CEO's direct phone line\", \"value\": \"+1-555-1010-2020\"},\n",
    "    {\"item\": \"Wi-Fi network name in the lobby\", \"value\": \"CorporateGuest123\"},\n",
    "    {\"item\": \"last fire drill date at main building\", \"value\": \"2023-06-15\"},\n",
    "    {\"item\": \"number of employees in marketing department\", \"value\": \"35\"},\n",
    "]\n",
    "\n",
    "print(\"Number of facts:\", len(facts))\n",
    "\n",
    "for i, f in enumerate(facts):\n",
    "    f[\"article\"] = generate_article(f[\"item\"], f[\"value\"], all_facts=facts)\n",
    "    print(\"... Article\", i + 1, f[\"item\"], f[\"value\"])\n",
    "    print(f[\"article\"])\n",
    "    print(\"\\n\" * 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 262
    },
    "id": "tDTelvGXWp-P",
    "outputId": "ddb6f999-f127-4e25-bb9f-56037a4fb7ed"
   },
   "outputs": [],
   "source": [
    "for f in facts:\n",
    "    f[\"article\"] = (\n",
    "        f[\"article\"]\n",
    "        .replace(\"```HTML\", \"\")\n",
    "        .replace(\"```\", \"\")\n",
    "        .replace(\"<strong>\", \"\")\n",
    "        .replace(\"</strong>\", \"\")\n",
    "        .strip()\n",
    "    )\n",
    "\n",
    "json.dump(facts, open(\"knowledge_base.json\", \"w\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "ctj7CPtAon3w"
   },
   "source": [
    "# Generate questions and answers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "QrPIz4CZ3ier",
    "outputId": "ceb47583-81ea-4f21-9fa5-dff4045d16c9"
   },
   "outputs": [],
   "source": [
    "import concurrent.futures\n",
    "import re\n",
    "\n",
    "from collections import Counter\n",
    "from tqdm.notebook import tqdm\n",
    "\n",
    "\n",
    "# Reload the generated KB\n",
    "kb = json.load(open(\"knowledge_base.json\", \"r\"))\n",
    "print(\"Loaded\", len(kb), \"articles\")\n",
    "\n",
    "\n",
    "# Clear all questions\n",
    "for kb_i in kb:\n",
    "    if \"questions\" in kb_i:\n",
    "        kb_i[\"questions\"] = []\n",
    "        kb_i[\"alternative_answers\"] = []"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "Ee_KMC2o3gDD"
   },
   "source": [
    "## Generate several variants of the question"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000,
     "referenced_widgets": [
      "e77eceb6a8624f66ac12cc3df788b37e",
      "ddb341b6b65a4ab19da7c1811a706be7",
      "b2de9413f61b4be7947f9bc0d3f12f41",
      "dd4959d2ace04d5eb2a8c3274a069647",
      "be1c8e816d0743eaa5c49d26e0050e94",
      "6bbe2aa489d6427daca0d0ac04a51f6e",
      "1cfe99f3f2af48e1800ad53f911de327",
      "d7c95fe313c546a6af713d3cb0e004d6",
      "a1221b65b0644cd794dc477d0492979f",
      "511c96890e1a4989bae8390ca627ac37",
      "0e044725d805479b9038d0552f277a7e"
     ]
    },
    "id": "NLERyQk-Xge5",
    "outputId": "0fb2d008-ac09-4cdf-f63d-cc623df09b63"
   },
   "outputs": [],
   "source": [
    "def get_most_frequent_items(my_list):\n",
    "    # Count the frequency of each item in the list\n",
    "    frequency = Counter(my_list)\n",
    "\n",
    "    # Find the maximum frequency\n",
    "    max_frequency = max(frequency.values())\n",
    "\n",
    "    # Get all items with the maximum frequency\n",
    "    most_frequent = [\n",
    "        item for item, count in frequency.items() if count == max_frequency\n",
    "    ]\n",
    "\n",
    "    return most_frequent\n",
    "\n",
    "\n",
    "def generate_question(article, item, value, n_questions=10, n_retries=5):\n",
    "    prompt = f\"\"\"\n",
    "    Here is an article taken from a company knowledge base.\n",
    "\n",
    "    <article>\n",
    "    {article}\n",
    "    </article>\n",
    "\n",
    "    This article contains the following fact:\n",
    "    <fact>\n",
    "    The {item} is {value}.\n",
    "    </fact>\n",
    "\n",
    "    Here is a question about this fact to which the answer is \\\"{value}\\\":\n",
    "    <question>\n",
    "    What is the {item}?\n",
    "    </question>\n",
    "\n",
    "    Produce {n_questions} rephrasings of this question.\n",
    "\n",
    "    <instructions>\n",
    "    * Make sure that your questions are precise and unambiguous.\n",
    "    * It must be clear that the questions are asking about \\\"{item}\\\" and\n",
    "      their answer must still be exactly \\\"{value}\\\".\n",
    "    * Make sure that you provide clear and specific instructions on the expected\n",
    "      format for the answer (e.g., Day, Month, Year).\n",
    "    * You cannot, in any circumstances, reveal information from the answer in the question or instructions.\n",
    "    * Make sure they are questions that end with a question mark.\n",
    "    * Make sure that your questions do not mention the article (e.g., \\\"in the article\\\").\n",
    "    * Answer with one per line and do not number them.\n",
    "    </instructions>\n",
    "\n",
    "    <example>\n",
    "    Suppose the question is \\\"On which day was the company founded?\\\" and the answer is \\\"January 26, 2024\\\",\n",
    "    then a good rephrasing would be \\\"When was the company founded? Answer with Month Day, Year\\\".\n",
    "    </example>\n",
    "\n",
    "    <example>\n",
    "    Suppose the question is \\\"What kind of fridge do we have in the cafeteria?\\\" and the answer is \\\"LG X456\\\",\n",
    "    then a good rephrasing would be \\\"Which type of fridge is in the cafeteria? Answer with Brand followed by Model name\\\".\n",
    "    </example>\n",
    "\n",
    "    <example>\n",
    "    Suppose the question is \\\"Where is the company headquarter located?\\\" and the answer is \\\"123, Banana Street, Montreal, Canada\\\",\n",
    "    then a good rephrasing would be \\\"What's the address of the company headquarter? Answer with Number, Street, City, Country\\\".\n",
    "    </example>\n",
    "\n",
    "    <example>\n",
    "    Suppose the question is \\\"What were the total sales of the company in 2023?\\\" and the answer is \\\"150B$\\\",\n",
    "    then a good rephrasing would be \\\"What do the company sales for 2023 sum up to? Answer with NumberB$, where B is billions\\\".\n",
    "    </example>\n",
    "    \"\"\"\n",
    "    good_questions = set([])\n",
    "    messages = [{\"role\": \"user\", \"content\": prompt}]\n",
    "    while len(good_questions) < n_questions:\n",
    "        try:\n",
    "            # Generate questions\n",
    "            questions = chat(messages).content\n",
    "\n",
    "            # Parse output\n",
    "            # ... check the article is not mentioned\n",
    "            assert (\n",
    "                \"article\" not in questions.lower()\n",
    "            ), \"You are not allowed to mention the article in your rephrasing of the questions.\"\n",
    "            # ... check the number of actual questions included\n",
    "            assert (\n",
    "                questions.count(\"?\") == n_questions\n",
    "            ), f\"I couldn't find {n_questions} question marks in the output. Make sure all questions end with ?\"\n",
    "            # ... heuristic to detect numbered questions\n",
    "            assert not all(\n",
    "                f\"{x}.\" in questions for x in range(4)\n",
    "            ), \"The questions appear to be numbered, but they should not.\"\n",
    "            questions = [q.strip() for q in questions.split(\"\\n\") if q != \"\"]\n",
    "            # ... check one question per line\n",
    "            assert (\n",
    "                len(questions) == n_questions\n",
    "            ), f\"Your answer is not {n_questions} lines long. Make sure it contains {n_questions} questions and one per line.\"\n",
    "            # ... check each question has instructions\n",
    "            assert all(\n",
    "                len(q.split(\"?\")) > 1 and len(\"\".join(q.split(\"?\")[1:]).strip()) > 5\n",
    "                for q in questions\n",
    "            ), f'Make sure you provide clear formatting instructions for each question (e.g., \"Question? Answer with\").'\n",
    "            # ... check that answer is not mentioned\n",
    "            for q in questions:\n",
    "                error = \"Do not include the answer in the questions/instructions!\"\n",
    "                # ... exact value is not in the question\n",
    "                assert value.lower() not in q.lower(), error\n",
    "\n",
    "            # Validate questions\n",
    "            bad_questions = []\n",
    "            bad_answers = []\n",
    "            for q in questions:\n",
    "                print(\"... testing:\", q, end=\" \")\n",
    "                success, answers = is_question_answerable(article, q, value)\n",
    "                if not success:\n",
    "                    bad_questions.append(q)\n",
    "                    bad_answers.append(answers)\n",
    "                    print(\"FAIL\")\n",
    "                    print(\"... preemptively stopping to give feedback\")\n",
    "                    break\n",
    "                else:\n",
    "                    good_questions.add(q)\n",
    "                    print(\"PASS\")\n",
    "\n",
    "            # Give feedback and retry\n",
    "            if len(bad_questions) > 0:\n",
    "                feedback = \"\"\n",
    "                for q, a in zip(bad_questions, bad_answers):\n",
    "                    feedback += \"<ambiguous_question>\\n\"\n",
    "                    feedback += \"    <statement>\\n\"\n",
    "                    feedback += \"        \" + q + \"\\n\"\n",
    "                    feedback += \"    </statement>\\n\"\n",
    "                    feedback += \"    <incorrect_answers>\\n\"\n",
    "                    feedback += \"\\n\".join(\"        \" + x for x in a) + \"\\n\"\n",
    "                    feedback += \"    </incorrect_answers>\\n\"\n",
    "                    feedback += \"</ambiguous_question>\\n\"\n",
    "                print(feedback)\n",
    "\n",
    "                # print(f\"... {len(bad_questions)} questions are ambiguous, fixing them.\")\n",
    "                bad_questions = \"\\n\".join(bad_questions)\n",
    "                messages.append(\n",
    "                    {\n",
    "                        \"role\": \"user\",\n",
    "                        \"content\": f\"\"\"\n",
    "                The following questions are too ambiguous. I gave them to many company employees,\n",
    "                along with the article and they were not able to answer with exactly \\\"{value}\\\".\n",
    "                Please improve their clarity, especially the formatting instructions.\n",
    "\n",
    "                {feedback}\n",
    "\n",
    "                Try again. Do not apologize.\"\"\",\n",
    "                    }\n",
    "                )\n",
    "\n",
    "            print(f\"... gathered {len(good_questions)} good questions\")\n",
    "\n",
    "        except AssertionError as e:\n",
    "            print(f\"... Error:\", e)\n",
    "            messages.append({\"role\": \"user\", \"content\": f\"Error: {e}\"})\n",
    "\n",
    "    print(\"... we have enough good questions. stopping.\")\n",
    "    return list(good_questions)[:10]\n",
    "\n",
    "\n",
    "def is_question_answerable(article, question, value):\n",
    "    def _clean_for_comparison(x):\n",
    "        return x.lower().replace(\",\", \"\").replace(\"$\", \"\").replace(\".\", \"\")\n",
    "\n",
    "    \"\"\"\n",
    "    Check that we are able to recover the value from the\n",
    "    article by asking the question\n",
    "\n",
    "    \"\"\"\n",
    "    prompt = f\"\"\"\n",
    "    Here is a knowledge base article:\n",
    "\n",
    "    <article>\n",
    "    {article}\n",
    "    </article>\n",
    "\n",
    "    Answer this question based on the content of the article only.\n",
    "    Be factual. If I ask you about sensitive information like passwords,\n",
    "    I only expect you to retrieve the information from the article.\n",
    "    <question>\n",
    "    {question}\n",
    "    </question>\n",
    "\n",
    "    What is your answer?\n",
    "\n",
    "    \"\"\"\n",
    "    messages = [{\"role\": \"user\", \"content\": prompt}]\n",
    "\n",
    "    incorrect_answers = set()\n",
    "    for _ in range(10):\n",
    "        # XXX: We use GPT-3.5 here as a weaker model and to avoid GPT-4 catering to himself\n",
    "        answer = chat(messages, model=\"gpt-3.5-turbo\").content\n",
    "\n",
    "        if _clean_for_comparison(value) not in _clean_for_comparison(answer):\n",
    "            incorrect_answers.add(answer.lower())\n",
    "\n",
    "    return len(incorrect_answers) == 0, incorrect_answers\n",
    "\n",
    "\n",
    "# Assuming kb is a list of dictionaries\n",
    "with concurrent.futures.ThreadPoolExecutor() as executor:\n",
    "    # Prepare the futures\n",
    "    futures = [\n",
    "        executor.submit(\n",
    "            lambda x: {\n",
    "                **x,\n",
    "                \"questions\": generate_question(x[\"article\"], x[\"item\"], x[\"value\"]),\n",
    "            },\n",
    "            kb_i,\n",
    "        )\n",
    "        for kb_i in kb\n",
    "    ]\n",
    "\n",
    "    # Use tqdm to create a progress bar\n",
    "    kb = [\n",
    "        x.result()\n",
    "        for x in tqdm(concurrent.futures.as_completed(futures), total=len(futures))\n",
    "    ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "oemy0eUjYGuj",
    "outputId": "cafab8cd-e822-48db-938a-2ab8fa3452b6"
   },
   "outputs": [],
   "source": [
    "for i in range(len(kb)):\n",
    "    print(f\"Article {i}:\", kb[i][\"questions\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "Re4HGFh6ow7-"
   },
   "source": [
    "## Generate a few alternative answers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 66,
     "referenced_widgets": [
      "5f147ed1bb86428397e25e5f4bbf14f4",
      "ca30f6701b8c44409a5c7356c264e3e6",
      "d304373e7bef447f919b083a64547a58",
      "4c8d2b15f0d84fe3b7adc5feeae9b6db",
      "f82189b7bd4944c0999ac01600efa4ae",
      "e69716214e49423898fdc173108cadbe",
      "483996fae22d4d2e89f83c05c7db6e76",
      "fdd79cc467e44d849d31b43b416bcb20",
      "eba164765ad544c38a7679d494343551",
      "571e7187a94d4860915f70f85654573b",
      "16997d3f7a414ec79bcca88298fac743"
     ]
    },
    "id": "Dxjc0HQOo25w",
    "outputId": "fdbd7723-1dda-4eda-f0ee-281fc051d04c"
   },
   "outputs": [],
   "source": [
    "def alternative_answers(article, item, value, questions, n_answers=10):\n",
    "    questions = \"\\n\".join(questions)\n",
    "\n",
    "    prompt = f\"\"\"\n",
    "    Here is a set of questions:\n",
    "    <questions>\n",
    "    {questions}\n",
    "    </questions>\n",
    "\n",
    "    The exact answer to all of these questions is:\n",
    "    <answer>\n",
    "    {value}\n",
    "    </answer>\n",
    "\n",
    "    Give me {n_answers} other ways to spell out the answer.\n",
    "    Don't add context words around it or anything, just reformulate it.\n",
    "\n",
    "    <example>\n",
    "    Initial value: 5.5/10\n",
    "    Reformulated: 5.5 out of 10\n",
    "    Reformulated: 55%\n",
    "    Reformulated: fifty-five percent\n",
    "    </example>\n",
    "\n",
    "    <example>\n",
    "    Initial value: Tesla Model X\n",
    "    Reformulated: Model X by Tesla\n",
    "    Reformulated: Tesla's Model X\n",
    "    </example>\n",
    "\n",
    "    <example>\n",
    "    Initial value: 150$\n",
    "    Reformulated: one hundred fifty dollars\n",
    "    Reformulated: 150.00$\n",
    "    Reformulated: $150\n",
    "    </example>\n",
    "\n",
    "    Answer with one per line. Don't number them.\n",
    "\n",
    "    \"\"\"\n",
    "    messages = [{\"role\": \"user\", \"content\": prompt}]\n",
    "\n",
    "    while True:\n",
    "        try:\n",
    "            answers = chat(messages).content\n",
    "\n",
    "            # Validation\n",
    "            # ... heuristic to detect numbered questions\n",
    "            assert not all(\n",
    "                f\"{x}.\" in answers for x in range(4)\n",
    "            ), \"The answers appear to be numbered, but they should not.\"\n",
    "            answers = [a.strip() for a in answers.split(\"\\n\") if a != \"\"]\n",
    "            # ... check one question per line\n",
    "            assert (\n",
    "                len(answers) == n_answers\n",
    "            ), f\"Your response is not {answers} lines long. Make sure it contains {answers} answers and one per line.\"\n",
    "            assert (\n",
    "                len(set(answers)) == n_answers\n",
    "            ), f\"You provided duplicate values. There were only {len(set(answers))} unique values.\"\n",
    "            break\n",
    "        except AssertionError as e:\n",
    "            print(f\"... Error:\", e)\n",
    "            messages.append({\"role\": \"user\", \"content\": f\"Error: {e}\"})\n",
    "\n",
    "    return answers\n",
    "\n",
    "\n",
    "for kb_i in tqdm(kb, total=len(kb)):\n",
    "    kb_i[\"alternative_answers\"] = alternative_answers(\n",
    "        kb_i[\"article\"], kb_i[\"item\"], kb_i[\"value\"], kb_i[\"questions\"]\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "CFi8jwUhFUDP",
    "outputId": "08a3c5c4-5da9-4fc4-a443-0e2ac1891f7e"
   },
   "outputs": [],
   "source": [
    "for kb_i in kb:\n",
    "    print(kb_i[\"alternative_answers\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "i-CbZL-do1fX"
   },
   "source": [
    "## Save it"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "Uww5Ut2rfR4f"
   },
   "outputs": [],
   "source": [
    "json.dump(kb, open(\"knowledge_base.json\", \"w\"))"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "name": "python3"
  },
  "language_info": {
   "name": "python"
  },
  "widgets": {
   "application/vnd.jupyter.widget-state+json": {
    "0e044725d805479b9038d0552f277a7e": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "16997d3f7a414ec79bcca88298fac743": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "1cfe99f3f2af48e1800ad53f911de327": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "483996fae22d4d2e89f83c05c7db6e76": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "4c8d2b15f0d84fe3b7adc5feeae9b6db": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_571e7187a94d4860915f70f85654573b",
      "placeholder": "​",
      "style": "IPY_MODEL_16997d3f7a414ec79bcca88298fac743",
      "value": " 100/100 [15:04&lt;00:00,  4.18s/it]"
     }
    },
    "511c96890e1a4989bae8390ca627ac37": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "571e7187a94d4860915f70f85654573b": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "5f147ed1bb86428397e25e5f4bbf14f4": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HBoxModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HBoxModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HBoxView",
      "box_style": "",
      "children": [
       "IPY_MODEL_ca30f6701b8c44409a5c7356c264e3e6",
       "IPY_MODEL_d304373e7bef447f919b083a64547a58",
       "IPY_MODEL_4c8d2b15f0d84fe3b7adc5feeae9b6db"
      ],
      "layout": "IPY_MODEL_f82189b7bd4944c0999ac01600efa4ae"
     }
    },
    "6bbe2aa489d6427daca0d0ac04a51f6e": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "a1221b65b0644cd794dc477d0492979f": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "ProgressStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "ProgressStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "bar_color": null,
      "description_width": ""
     }
    },
    "b2de9413f61b4be7947f9bc0d3f12f41": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "FloatProgressModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "FloatProgressModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "ProgressView",
      "bar_style": "success",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_d7c95fe313c546a6af713d3cb0e004d6",
      "max": 100,
      "min": 0,
      "orientation": "horizontal",
      "style": "IPY_MODEL_a1221b65b0644cd794dc477d0492979f",
      "value": 100
     }
    },
    "be1c8e816d0743eaa5c49d26e0050e94": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "ca30f6701b8c44409a5c7356c264e3e6": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_e69716214e49423898fdc173108cadbe",
      "placeholder": "​",
      "style": "IPY_MODEL_483996fae22d4d2e89f83c05c7db6e76",
      "value": "100%"
     }
    },
    "d304373e7bef447f919b083a64547a58": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "FloatProgressModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "FloatProgressModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "ProgressView",
      "bar_style": "success",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_fdd79cc467e44d849d31b43b416bcb20",
      "max": 100,
      "min": 0,
      "orientation": "horizontal",
      "style": "IPY_MODEL_eba164765ad544c38a7679d494343551",
      "value": 100
     }
    },
    "d7c95fe313c546a6af713d3cb0e004d6": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "dd4959d2ace04d5eb2a8c3274a069647": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_511c96890e1a4989bae8390ca627ac37",
      "placeholder": "​",
      "style": "IPY_MODEL_0e044725d805479b9038d0552f277a7e",
      "value": " 100/100 [23:30&lt;00:00, 59.58s/it]"
     }
    },
    "ddb341b6b65a4ab19da7c1811a706be7": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_6bbe2aa489d6427daca0d0ac04a51f6e",
      "placeholder": "​",
      "style": "IPY_MODEL_1cfe99f3f2af48e1800ad53f911de327",
      "value": "100%"
     }
    },
    "e69716214e49423898fdc173108cadbe": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "e77eceb6a8624f66ac12cc3df788b37e": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HBoxModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HBoxModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HBoxView",
      "box_style": "",
      "children": [
       "IPY_MODEL_ddb341b6b65a4ab19da7c1811a706be7",
       "IPY_MODEL_b2de9413f61b4be7947f9bc0d3f12f41",
       "IPY_MODEL_dd4959d2ace04d5eb2a8c3274a069647"
      ],
      "layout": "IPY_MODEL_be1c8e816d0743eaa5c49d26e0050e94"
     }
    },
    "eba164765ad544c38a7679d494343551": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "ProgressStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "ProgressStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "bar_color": null,
      "description_width": ""
     }
    },
    "f82189b7bd4944c0999ac01600efa4ae": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "fdd79cc467e44d849d31b43b416bcb20": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    }
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
