{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Automatically generate Q & A pairs from the WikiData graph\n",
    "\n",
    "See README.md for more information."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/notebooks/data-augmentation/wikidata-qa/wikidata.ipynb)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# uncomment and run below lines to set up if running in colab\n",
    "# !git clone https://github.com/LAION-AI/Open-Assistant.git\n",
    "# %cd Open-Assistant/notebooks/data-augmentation/wikidata-qa\n",
    "# !pip install -r requirements.txt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "import json\n",
    "import datetime\n",
    "import time\n",
    "from copy import deepcopy\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from tqdm import tqdm\n",
    "\n",
    "from typing import Optional, Any"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "class WikiGraph:\n",
    "    HEADER = {\n",
    "        \"User-Agent\": \"Mozilla/5.0 (compatible; WikiDataGraphCrawler/0.1)\",\n",
    "    }\n",
    "    TIMER = 200  # wait ms between calls\n",
    "\n",
    "    def __init__(self, file: Optional[str] = None, language: str = \"en\", seed: int = 12345678) -> None:\n",
    "        self.file = file\n",
    "        assert language in (\"en\",), f\"This language is not yet supported: {language}\"\n",
    "        self.language = language\n",
    "        self.cache = {self.language: {}}\n",
    "        np.random.seed(seed)\n",
    "        self.calls = 0\n",
    "        self.last_call = 0\n",
    "        if self.file:\n",
    "            self._load()\n",
    "\n",
    "    def _save(self) -> None:\n",
    "        if not self.file:\n",
    "            return\n",
    "        df = {\"language\": [], \"qid\": [], \"depth\": [], \"desc\": [], \"graph\": []}\n",
    "        for lang in self.cache:\n",
    "            for qid in self.cache[lang]:\n",
    "                for depth in self.cache[lang][qid]:\n",
    "                    df[\"language\"].append(lang)\n",
    "                    df[\"qid\"].append(qid)\n",
    "                    df[\"depth\"].append(depth)\n",
    "                    df[\"desc\"].append(json.dumps(self.cache[lang][qid][depth][\"desc\"]))\n",
    "                    df[\"graph\"].append(json.dumps(self.cache[lang][qid][depth][\"graph\"]))\n",
    "        df = pd.DataFrame(df)\n",
    "        df.to_csv(self.file, index=False)\n",
    "\n",
    "    def _load(self) -> None:\n",
    "        assert self.file\n",
    "        try:\n",
    "            df = pd.read_csv(self.file)\n",
    "        except FileNotFoundError:\n",
    "            return\n",
    "        self.cache = {}\n",
    "        for index, row in df.iterrows():\n",
    "            if row[\"language\"] not in self.cache:\n",
    "                self.cache[row[\"language\"]] = {}\n",
    "            if row[\"qid\"] not in self.cache[row[\"language\"]]:\n",
    "                self.cache[row[\"language\"]][row[\"qid\"]] = {}\n",
    "            self.cache[row[\"language\"]][row[\"qid\"]][row[\"depth\"]] = {\n",
    "                \"desc\": row[\"desc\"] if isinstance(row[\"desc\"], dict) else json.loads(row[\"desc\"]),\n",
    "                \"graph\": row[\"graph\"] if isinstance(row[\"graph\"], dict) else json.loads(row[\"graph\"]),\n",
    "            }\n",
    "\n",
    "    def _get(self, params: dict) -> dict:\n",
    "        self.calls += 1\n",
    "        diff = max(0.0, self.TIMER - (time.time() - self.last_call))\n",
    "        if diff:\n",
    "            time.sleep(diff / 1000.0)\n",
    "        data = requests.get(\"https://www.wikidata.org/w/api.php\", headers=self.HEADER, params=params)\n",
    "        self.last_call = time.time()\n",
    "        result = data.json()\n",
    "        if \"error\" in result:\n",
    "            raise Exception(result[\"error\"][\"code\"], result[\"error\"][\"info\"])\n",
    "        return result\n",
    "\n",
    "    def search(self, query: str) -> list:\n",
    "        params = {\"action\": \"wbsearchentities\", \"search\": query.strip(), \"language\": self.language, \"format\": \"json\"}\n",
    "        result = self._get(params)\n",
    "\n",
    "        if \"search\" not in result or not result[\"search\"]:\n",
    "            return []\n",
    "        output = []\n",
    "        for item in result[\"search\"]:\n",
    "            allow = False\n",
    "            if \"display\" in item:\n",
    "                if \"label\" in item[\"display\"]:\n",
    "                    if \"language\" in item[\"display\"][\"label\"] and item[\"display\"][\"label\"][\"language\"] == self.language:\n",
    "                        allow = True\n",
    "                if not allow and \"description\" in item[\"display\"]:\n",
    "                    if (\n",
    "                        \"language\" in item[\"display\"][\"description\"]\n",
    "                        and item[\"display\"][\"description\"][\"language\"] == self.language\n",
    "                    ):\n",
    "                        allow = True\n",
    "            if not allow and \"match\" in item:\n",
    "                if \"language\" in item[\"match\"] and item[\"match\"][\"language\"] == self.language:\n",
    "                    allow = True\n",
    "            if allow:\n",
    "                output.append({key: item[key] if key in item else \"\" for key in [\"id\", \"label\", \"description\"]})\n",
    "        return output\n",
    "\n",
    "    def _fetch(self, qid: str, depth: int = 1) -> str:\n",
    "        qid = qid.upper() if isinstance(qid, str) else f\"Q{qid}\"\n",
    "        if qid in self.cache[self.language] and self.cache[self.language][qid]:\n",
    "            largest = int(sorted(self.cache[self.language][qid].keys())[-1])\n",
    "            if largest >= depth:\n",
    "                return self.cache[self.language][qid][largest][\"desc\"][\"label\"]\n",
    "        else:\n",
    "            self.cache[self.language][qid] = {}\n",
    "\n",
    "        params = {\"action\": \"wbgetentities\", \"ids\": qid, \"language\": self.language, \"format\": \"json\"}\n",
    "        result = self._get(params)\n",
    "\n",
    "        if \"entities\" not in result or qid not in result[\"entities\"] or not result[\"entities\"][qid]:\n",
    "            raise ValueError(f\"No entities found for {qid}\")\n",
    "\n",
    "        hit = result[\"entities\"][qid]\n",
    "        desc = {\"qid\": qid, \"language\": self.language, \"label\": \"\", \"aliases\": [], \"description\": \"\"}\n",
    "        if \"labels\" in hit and self.language in hit[\"labels\"] and \"value\" in hit[\"labels\"][self.language]:\n",
    "            desc[\"label\"] = hit[\"labels\"][self.language][\"value\"]\n",
    "        # elif self.language != \"en\" and \"en\" in hit[\"labels\"] and \"value\" in hit[\"labels\"][\"en\"]:\n",
    "        #    desc[\"label\"] = hit[\"labels\"][\"en\"][\"value\"]\n",
    "\n",
    "        if \"aliases\" in hit and self.language in hit[\"aliases\"]:\n",
    "            desc[\"aliases\"] = [item[\"value\"] for item in hit[\"aliases\"][self.language] if \"value\" in item]\n",
    "        if (\n",
    "            \"descriptions\" in hit\n",
    "            and self.language in hit[\"descriptions\"]\n",
    "            and \"value\" in hit[\"descriptions\"][self.language]\n",
    "        ):\n",
    "            desc[\"description\"] = hit[\"descriptions\"][self.language][\"value\"]\n",
    "\n",
    "        graph = {}\n",
    "        if \"claims\" in hit and depth > 0:\n",
    "            for key in tqdm(hit[\"claims\"]):\n",
    "                if \"datavalue\" not in hit[\"claims\"][key][0][\"mainsnak\"]:\n",
    "                    continue\n",
    "                results = []\n",
    "                for elem in hit[\"claims\"][key]:\n",
    "                    item = elem[\"mainsnak\"][\"datavalue\"][\"value\"]\n",
    "                    if isinstance(item, dict) and \"id\" in item and item[\"id\"] == qid:\n",
    "                        results.append(desc[\"label\"])\n",
    "                    else:\n",
    "                        results.append(self._parse(item, qid, depth))\n",
    "                graph[key] = results\n",
    "\n",
    "        self.cache[self.language][qid][depth] = {\"desc\": deepcopy(desc), \"graph\": deepcopy(graph)}\n",
    "        self._save()\n",
    "\n",
    "        return desc[\"label\"]\n",
    "\n",
    "    def _parse(self, item: Any, qid: str, depth: int) -> str:\n",
    "        result = \"\"\n",
    "        if isinstance(item, dict):\n",
    "            if \"amount\" in item:\n",
    "                unit = item[\"unit\"].split(\"/Q\")[-1] if \"unit\" in item else \"\"\n",
    "                result = item[\"amount\"][(1 if item[\"amount\"][0] == \"+\" else 0) :]\n",
    "                if unit and unit != \"1\":\n",
    "                    unit = f\"Q{unit}\"\n",
    "                    if unit == qid:\n",
    "                        unit = desc[\"label\"]\n",
    "                    else:\n",
    "                        unit = self._fetch(unit, 0)\n",
    "                    if unit:\n",
    "                        result = f\"{result} {unit}\"\n",
    "            if \"latitude\" in item and \"longitude\" in item:\n",
    "                result = f'{item[\"latitude\"]} {item[\"longitude\"]}'\n",
    "            elif \"time\" in item:\n",
    "                result = str(item[\"time\"])\n",
    "                if \"T00:00:00Z\" in result:\n",
    "                    result = result.split(\"T00:00:00Z\")[0]\n",
    "                    if \"-00-00\" in result:\n",
    "                        result = result.split(\"-00-00\")[0]\n",
    "                if result[0] == \"+\":\n",
    "                    result = result[1:]\n",
    "                elif result[0] == \"-\":\n",
    "                    if self.language == \"en\":\n",
    "                        result = f\"{result} BC\"\n",
    "            elif \"id\" in item:\n",
    "                result = self._fetch(item[\"id\"], depth - 1)\n",
    "        elif isinstance(item, (str, int, float, bool)):\n",
    "            result = str(item)\n",
    "        return result\n",
    "\n",
    "    def _zalgo(self, question: str) -> str:\n",
    "        if len(question) > 2 and np.random.choice([True, False]):\n",
    "            if np.random.choice([True, False]):\n",
    "                # make it lowercase or all caps\n",
    "                if np.random.choice([True, False]):\n",
    "                    question = question.upper()\n",
    "                else:\n",
    "                    question = question.lower()\n",
    "            if np.random.choice([True, False]):\n",
    "                # add typo: remove characters at random\n",
    "                question = \"\".join([c for c, v in zip(question, np.random.normal(0, 1, len(question))) if v < 3.0])\n",
    "            if np.random.choice([True, False]):\n",
    "                # add typo: swap characters\n",
    "                n = np.random.randint(len(question) - 1)\n",
    "                question = question[:n] + question[n + 1] + question[n] + question[n + 2 :]\n",
    "\n",
    "            # question marks\n",
    "            if np.random.choice([True, False]):\n",
    "                if question[-1] == \"?\":\n",
    "                    if np.random.choice([True, False]):\n",
    "                        question = question[:-1]\n",
    "                    else:\n",
    "                        for i in range(np.random.randint(5)):\n",
    "                            question += \"?\"\n",
    "                elif np.random.choice([True, False]):\n",
    "                    question = question[:-1]\n",
    "        return question\n",
    "\n",
    "    def generate(self, qid: str, zalgo: bool = False, **kwargs):\n",
    "        self._fetch(qid)\n",
    "        if self.language == \"en\":\n",
    "            return self._generate_en(qid=qid, zalgo=zalgo, **kwargs)\n",
    "        else:\n",
    "            raise NotImplementedError(f\"Unknown language: {self.language}\")\n",
    "\n",
    "    def _generate_en(\n",
    "        self, qid: str, zalgo: bool = False, pronoun: str = \"it\", proper: bool = True\n",
    "    ) -> str:  # it is a proper noun\n",
    "        def _pronoun(which: str) -> tuple:\n",
    "            if which in (\"he\", \"him\", \"his\"):\n",
    "                return \"he\", \"him\", \"his\"\n",
    "            elif which in (\"she\", \"her\"):\n",
    "                return \"she\", \"her\", \"her\"\n",
    "            elif which in (\"it\", \"its\"):\n",
    "                return \"it\", \"it\", \"its\"\n",
    "            else:\n",
    "                return \"they\", \"them\", \"their\"\n",
    "\n",
    "        def _add_a(name: str) -> str:\n",
    "            if np.random.choice([True, False]):\n",
    "                return f\"the {name}\"\n",
    "            elif name[0].lower() in (\"a\", \"e\", \"i\", \"o\", \"u\"):\n",
    "                return f\"an {name}\"\n",
    "            else:\n",
    "                return f\"a {name}\"\n",
    "\n",
    "        sub, obj, pos = _pronoun(pronoun)\n",
    "\n",
    "        # question\n",
    "        Q = {\n",
    "            \"P6\": [\n",
    "                \"Do you know who the prime minister of {name} is?\",\n",
    "                \"Who is the president of {name}?\",\n",
    "                \"Who is the governor of {name}?\",\n",
    "            ],\n",
    "            \"P17\": [\n",
    "                \"Can you tell me Which country {name} is in?\",\n",
    "                \"Which country is {name} located in?\",\n",
    "                \"Where is {name} located in the world?\",\n",
    "            ],\n",
    "            \"P19\": [\"Do you know Where {name} was born at?\", \"What is {name}'s place of birth?\"],\n",
    "            \"P20\": [\"Can you tell me where {name} died?\", \"Where has {name} died?\"],\n",
    "            \"P22\": [\"Do you know who {name}'s father is?\", \"What is {name}'s father called?\", \"Who is {name}'s dad?\"],\n",
    "            \"P25\": [\n",
    "                \"Tell me who {name}'s mother is.\",\n",
    "                \"What is {name}'s mother called?\",\n",
    "                \"Who is {name}'s mom?\",\n",
    "                \"Who's {name}'s mum?\",\n",
    "            ],\n",
    "            \"P27\": [\n",
    "                \"Do you have any information on what country {name} is from?\",\n",
    "                \"Where is {name} from?\",\n",
    "                \"Where does {name} originate from?\",\n",
    "                \"What is {name}'s country of origin?\",\n",
    "            ],\n",
    "            \"P30\": [\n",
    "                \"Do you happen to know what continent {name} is under?\",\n",
    "                \"Which continent is {name} in?\",\n",
    "                \"Which continent does {name} belong to?\",\n",
    "            ],\n",
    "            \"P36\": [\n",
    "                \"Please tell me, what the capital of {name} is?\",\n",
    "                \"What's {name}'s capital city? Thank you in advance!\",\n",
    "            ],\n",
    "            \"P37\": [\n",
    "                \"Tell me what the official language of {name} is?\",\n",
    "                \"What language do they speak in {name}?\",\n",
    "                \"How do they speak in {name}?\",\n",
    "                \"What languages they understand in {name}?\",\n",
    "            ],\n",
    "            \"P38\": [\"Do you know what {name}'s currency is?\", \"What currency do they use in {name}?\"],\n",
    "            \"P40\": [\n",
    "                \"List {name}'s children.\",\n",
    "                \"Who are {name}'s children?\",\n",
    "                \"What are the names of {name}'s children?\",\n",
    "                \"Does {name} have children?\",\n",
    "                \"How many children does {name} have?\",\n",
    "                \"Does {name} have any kids?\",\n",
    "                \"How many children does {name} have?\",\n",
    "            ],\n",
    "            \"P50\": [\"Give me the name of the author for {name}.\", \"Who wrote {name}?\", \"Who's the author for {name}?\"],\n",
    "            \"P57\": [\n",
    "                \"Do you know who directed {name}?\",\n",
    "                \"Who directed {name}?\",\n",
    "                \"Who is the director of {name}?\",\n",
    "                \"{name} is directed by whom?\",\n",
    "            ],\n",
    "            \"P61\": [\"Do you know who invented {name}?\", \"Who discovered {name}?\", \"{name} was invented by whom?\"],\n",
    "            \"P106\": [\n",
    "                \"List the places {name} works at.\",\n",
    "                \"Where does {name} work at?\",\n",
    "                \"What is {name}'s occupation?\",\n",
    "                \"What does {name} do?\",\n",
    "                \"What does {name} work?\",\n",
    "                \"Where does {name} work at?\",\n",
    "                \"What does {name} work in?\",\n",
    "            ],\n",
    "            \"P138\": [\n",
    "                \"Describe what {name} was named after.\",\n",
    "                \"Do you know what {name} was named after?\",\n",
    "                \"What was {name} named after?\",\n",
    "                \"Who was {name} named after?\",\n",
    "                \"Why is {name} called {name}?\",\n",
    "                \"Why is {name} named like that?\",\n",
    "            ],\n",
    "            \"P169\": [\n",
    "                \"Tell me who is {name} the CEO of.\",\n",
    "                \"Who's {name} the CEO of?\",\n",
    "                \"Which company is {name} the CEO of?\",\n",
    "            ],\n",
    "            \"P170\": [\n",
    "                \"Tell me more about the creator of {name}.\",\n",
    "                \"Who crated {name}?\",\n",
    "                \"Who is {name}'s creator?\",\n",
    "                \"Who made {name}?\",\n",
    "                \"Who is responsible for {name}?\",\n",
    "            ],\n",
    "            \"P225\": [\n",
    "                \"Describe {name} to me in latin.\",\n",
    "                \"What is {name}'s scientific name?\",\n",
    "                \"What is {name}'s taxon name?\",\n",
    "                \"How do you say {name} in latin?\",\n",
    "                \"What is {name} in latin?\",\n",
    "            ],\n",
    "            \"P246\": [\n",
    "                \"Tell me {name}'s formula.\",\n",
    "                \"What is the formula for {name}?\",\n",
    "                \"What is the chemical formula of {name}?\",\n",
    "                \"What is the molecular formula of {name}?\",\n",
    "                \"Which chemical element is {name}?\",\n",
    "                \"Describe the chemical compound for {name}.\",\n",
    "                \"What is the chemical symbol for {name}?\",\n",
    "            ],\n",
    "            \"P274\": [\n",
    "                \"Tell me the chemical formula for {name}.\",\n",
    "                \"What is the formula for {name}?\",\n",
    "                \"What is the chemical formula of {name}?\",\n",
    "                \"What is the molecular formula of {name}?\",\n",
    "                \"Which chemical element is {name}?\",\n",
    "                \"Describe the chemical compound for {name}.\",\n",
    "                \"What is the chemical symbol for {name}?\",\n",
    "            ],\n",
    "            \"P275\": [\n",
    "                \"Describe {name}'s license.\",\n",
    "                \"What's {name}'s license?\",\n",
    "                \"Is {name} copyrighted?\",\n",
    "                \"Does {name} have a copyright license?\",\n",
    "                \"What license is associated with {name}?\",\n",
    "            ],\n",
    "            \"P366\": [\n",
    "                \"Give me use cases for {name}.\",\n",
    "                \"What's a use-case for {name}?\",\n",
    "                \"What is {name}'s main use case?\",\n",
    "                \"How is {name} used?\",\n",
    "                \"What is {name} good for?\",\n",
    "            ],\n",
    "            \"P487\": [\n",
    "                \"Say in emoji: {name}.\",\n",
    "                \"Is there an emoji for {name}?\",\n",
    "                \"Which unicode character does represent {name}?\",\n",
    "            ],\n",
    "            \"P509\": [\"Can you tell me how {name} died?\", \"What did {name} die of?\", \"What caused {name}'s death?\"],\n",
    "            \"P527\": [\n",
    "                \"What are the ingredients of {name}?\",\n",
    "                \"What are {name}s made of?\",\n",
    "                \"What are {name}s created from?\",\n",
    "                \"What are the parts of {name}?\",\n",
    "            ],\n",
    "            \"P569\": [\n",
    "                \"Do you know when {name} was born?\",\n",
    "                \"When did {name} born?\",\n",
    "                \"When was {name} born?\",\n",
    "                \"When is {name}'s birthday?\",\n",
    "            ],\n",
    "            \"P570\": [\n",
    "                \"Do you have information on the date of {name}'s death?\",\n",
    "                \"When did {name} die?\",\n",
    "                \"Is {name} dead?\",\n",
    "                \"Is {name} still alive?\",\n",
    "            ],\n",
    "            \"P571\": [\n",
    "                \"Do you have information on when {name} was first created? Thanks!\",\n",
    "                \"When was {name} created?\",\n",
    "                \"When was {name} first released?\",\n",
    "            ],\n",
    "            \"P575\": [\n",
    "                \"Please tell me when {name} was first discovered. Thank you!\",\n",
    "                \"When was {name} invented?\",\n",
    "                \"What was the date when {name} was finally discovered?\",\n",
    "            ],\n",
    "            \"P576\": [\n",
    "                \"Do you know when {name} was discontinued?\",\n",
    "                \"When was {name} demolished?\",\n",
    "                \"At what time was {name} dissolved?\",\n",
    "            ],\n",
    "            \"P580\": [\n",
    "                \"Can you recall when {name} started?\",\n",
    "                \"When did {name} start?\",\n",
    "                \"What was the starting date for {name}?\",\n",
    "                \"When did {name} break out?\",\n",
    "            ],\n",
    "            \"P582\": [\n",
    "                \"Do you have information on the date when {name} ended?\",\n",
    "                \"When did {name} end?\",\n",
    "                \"What was the ending date of {name}?\",\n",
    "                \"When was {name} finally over?\",\n",
    "            ],\n",
    "            \"P625\": [\n",
    "                \"Give me the coordinates for {name}!\",\n",
    "                \"Locate {name}.\",\n",
    "                \"What is {name}'s location?\",\n",
    "                \"Where can I find {name}?\",\n",
    "                \"What are the GPS coordinates for {name}?\",\n",
    "            ],\n",
    "            \"P837\": [\n",
    "                \"Tell me when {name} is!\",\n",
    "                \"When is {name} celebrated?\",\n",
    "                \"On which day is {name}?\",\n",
    "                \"When is {name} day?\",\n",
    "            ],\n",
    "            \"P856\": [\n",
    "                \"Give me the URL for {name}.\",\n",
    "                \"What's the URL for {name}? Thanks!\",\n",
    "                \"What's {name}'s website?\",\n",
    "                \"What is the official website for {name}?\",\n",
    "                \"Can you tell me the link to {name}?\",\n",
    "            ],\n",
    "            \"P973\": [\n",
    "                \"Return the URL for {name}!\",\n",
    "                \"Where can I find more information on {name}?\",\n",
    "                \"Where can I read more abou {name} online?\",\n",
    "                \"Is there a site that explains {name} in detail?\",\n",
    "            ],\n",
    "            \"P1082\": [\n",
    "                \"Count the number of people who live in {name}!\",\n",
    "                \"What is {name}'s population?\",\n",
    "                \"What is the population of {name}?\",\n",
    "                \"How many people live in {name}?\",\n",
    "            ],\n",
    "            \"P1120\": [\n",
    "                \"Do you know the number of people who died in {name}?\",\n",
    "                \"How many people have died due to {name}?\",\n",
    "                \"How many people have lost their lives in {name}?\",\n",
    "                \"What is the number of fatalities after {name}?\",\n",
    "                \"How many people have lost their lives in {name}?\",\n",
    "            ],\n",
    "            \"P2043\": [\"Calculate the length of {name}!\", \"How long is {name}?\", \"What is {name}'s length?\"],\n",
    "            \"P2044\": [\n",
    "                \"Do you know how tall {name} is?\",\n",
    "                \"How tall is {name}?\",\n",
    "                \"How high is {name}?\",\n",
    "                \"How many meters is {name} above sea level?\",\n",
    "                \"What is {name}'s elevation?\",\n",
    "            ],\n",
    "            \"P2046\": [\"Is {name} big?\", \"How big is {name}?\", \"What is the area of {name}?\", \"How big is {name}?\"],\n",
    "            \"P2049\": [\n",
    "                \"Describe the width of {name}.\",\n",
    "                \"What's {name}'s width?\",\n",
    "                \"How wide is {name}?\",\n",
    "                \"What's the width of {name}?\",\n",
    "            ],\n",
    "            \"P2250\": [\n",
    "                \"Do you know how long {name} lives?\",\n",
    "                \"What is the life expectancy of {name}?\",\n",
    "                \"How long do {name}s live?\",\n",
    "            ],\n",
    "            \"P2283\": [\n",
    "                \"Describe {name} in detail.\",\n",
    "                \"How does {name} work?\",\n",
    "                \"What makes {name} work in theory? Thanks for the answer!\",\n",
    "            ],\n",
    "            \"P3063\": [\"I need information on the gestation period of {name}s.\", \"How long are {name}s pregnant?\"],\n",
    "            \"P3373\": [\n",
    "                \"List {name}'s siblings.\",\n",
    "                \"Who are {name}'s siblings?\",\n",
    "                \"What are the names of {name}'s brothers and sisters?\" \"Does {name} have any siblings?\",\n",
    "                \"Does {name} have a brother or sister?\",\n",
    "                \"How many siblings does {name} have?\",\n",
    "                \"How many brothers and sisters does {name} have?\",\n",
    "            ],\n",
    "            \"P4511\": [\"Calculate the depth of {name}!\", \"How deep is {name}?\", \"What is {name}'s vertical depth?\"],\n",
    "            \"P4733\": [\n",
    "                \"Do you know the noise {name} makes?\",\n",
    "                \"What does {name} say?\",\n",
    "                \"What sound does {name} make?\",\n",
    "                \"How does {name} sound like?\",\n",
    "            ],\n",
    "            \"P7767\": [\"How would you serve {name} for me?\", \"How are {name}s served?\"],\n",
    "        }\n",
    "        # reference to name in question\n",
    "        Qp = {\n",
    "            \"P6\": [\"Tell me who {pos} governor is!\", \"Who's the governor?\", \"Who is {pos} president?\"],\n",
    "            \"P17\": [\n",
    "                \"Do you know which country is that in?\",\n",
    "                \"Which country is {sub} in?\",\n",
    "                \"Under which country is {sub} located?\",\n",
    "            ],\n",
    "            \"P19\": [\n",
    "                \"Can you tell me the place {sub} was born at? Thanks!\",\n",
    "                \"Where was {sub} born?\",\n",
    "                \"What is {pos} place of birth?\",\n",
    "            ],\n",
    "            \"P20\": [\n",
    "                \"Where di {sub} die, can you tell me that?\",\n",
    "                \"Where did {sub} die?\",\n",
    "                \"What is the place of {pos} death?\",\n",
    "            ],\n",
    "            \"P22\": [\n",
    "                \"Who is {pos} father, respond with his name. Thank you.\",\n",
    "                \"What is {pos} father called?\",\n",
    "                \"Who is {pos} dad?\",\n",
    "                \"What's {pos} father's name?\",\n",
    "            ],\n",
    "            \"P25\": [\n",
    "                \"Who is {pos} mother, respond with her name.\",\n",
    "                \"What is {pos} mother called?\",\n",
    "                \"Who is {pos} mom?\",\n",
    "                \"Who's {pos} mum?\",\n",
    "                \"What's {pos} mother's name?\",\n",
    "            ],\n",
    "            \"P27\": [\n",
    "                \"Can you tell me where {sub} came from?\",\n",
    "                \"Where is {sub} from?\",\n",
    "                \"Where does {sub} come from?\",\n",
    "                \"Where does {sub} originate from?\",\n",
    "            ],\n",
    "            \"P30\": [\n",
    "                \"Tell me the name of the continent {sub} is in.\",\n",
    "                \"Which continent is {sub} in?\",\n",
    "                \"Which continent does {sub} belong to?\",\n",
    "            ],\n",
    "            \"P36\": [\"Do you know {pos} capital?\", \"What is {pos} capital called?\", \"What's the name of {pos} capital?\"],\n",
    "            \"P37\": [\n",
    "                \"Describe {pos} official language.\",\n",
    "                \"What is {pos} official language?\",\n",
    "                \"What language do they speak there?\",\n",
    "            ],\n",
    "            \"P38\": [\n",
    "                \"Tell me more about {pos} currency.\",\n",
    "                \"What is {pos} currency?\",\n",
    "                \"Which currencies are used there?\",\n",
    "            ],\n",
    "            \"P40\": [\n",
    "                \"I need more information on {pos} children.\",\n",
    "                \"Who are {pos} children?\",\n",
    "                \"What are the names of {pos} kids?\",\n",
    "                \"How many children does {sub} have?\",\n",
    "                \"Does {sub} have kids?\",\n",
    "                \"Does {sub} have any children?\",\n",
    "                \"How many kids {sub} got?\",\n",
    "            ],\n",
    "            \"P50\": [\n",
    "                \"Please, describe {pos} author.\",\n",
    "                \"Who wrote {obj}?\",\n",
    "                \"Who's {pos} author?\",\n",
    "                \"Who {pos} author is?\",\n",
    "            ],\n",
    "            \"P57\": [\n",
    "                \"Who is {obj} director, do you have information on that in your database?\",\n",
    "                \"Who directed {obj}?\",\n",
    "                \"Who is {pos} director?\",\n",
    "            ],\n",
    "            \"P61\": [\"Tell me who invented {obj}!\", \"Do you know who discovered {obj} first?\"],\n",
    "            \"P106\": [\n",
    "                \"Do you have data on {pos} jobs?\",\n",
    "                \"Where does {sub} work at?\",\n",
    "                \"What does {sub} do for a living?\",\n",
    "                \"What's {pos} job?\",\n",
    "                \"What is {pos} occupation?\",\n",
    "            ],\n",
    "            \"P138\": [\n",
    "                \"Explain how {sub} got {pos} name!\",\n",
    "                \"How did {sub} get {pos} name?\",\n",
    "                \"Where did {sub} get {pos} name from?\",\n",
    "                \"Why is {sub} called {name}?\",\n",
    "            ],\n",
    "            \"P169\": [\n",
    "                \"Give me information on the companies {sub} is the CEO at.\",\n",
    "                \"Is {sub} the CEO of a company?\",\n",
    "                \"Which company is {sub} the CEO of?\",\n",
    "            ],\n",
    "            \"P170\": [\n",
    "                \"Who made {obj}, can you tell me?\",\n",
    "                \"Who crated {obj}?\",\n",
    "                \"Who is {pos} creator?\",\n",
    "                \"Who made {obj}?\",\n",
    "            ],\n",
    "            \"P225\": [\n",
    "                \"Translate {pos} name to latin.\",\n",
    "                \"What is {pos} scientific name?\",\n",
    "                \"How do you call {obj} in latin?\",\n",
    "                \"How to say {obj} in latin?\",\n",
    "            ],\n",
    "            \"P246\": [\n",
    "                \"Give me {pos} formula.\",\n",
    "                \"What is {pos} formula?\",\n",
    "                \"What is {pos} chemical formula?\",\n",
    "                \"What is {pos} molecular formula?\",\n",
    "                \"Which chemical element is {sub}?\",\n",
    "                \"Describe the chemical compound for {obj}.\",\n",
    "                \"What is the chemical symbol for {obj}?\",\n",
    "            ],\n",
    "            \"P274\": [\n",
    "                \"Describe {pos} chemical formula!\",\n",
    "                \"What is {pos} formula?\",\n",
    "                \"What is {pos} chemical formula?\",\n",
    "                \"What is {pos} molecular formula?\",\n",
    "                \"Which chemical element is {sub}?\",\n",
    "                \"Describe the chemical compound for {obj}.\",\n",
    "                \"What is the chemical symbol for {obj}?\",\n",
    "            ],\n",
    "            \"P275\": [\n",
    "                \"Do you know which license {pos} is under?\",\n",
    "                \"What's {pos} license?\",\n",
    "                \"Is {sub} copyrighted?\",\n",
    "                \"What license was {sub} released under?\",\n",
    "            ],\n",
    "            \"P366\": [\n",
    "                \"And {pos} use cases are?\",\n",
    "                \"What is {pos} main use case?\",\n",
    "                \"How is {sub} used?\",\n",
    "                \"What is {sub} good for?\",\n",
    "                \"What does {sub} do?\",\n",
    "            ],\n",
    "            \"P487\": [\n",
    "                \"Write {obj} down using emojis only.\",\n",
    "                \"Does {sub} have an emoji?\",\n",
    "                \"Is there a unicode character for {sub}?\",\n",
    "            ],\n",
    "            \"P509\": [\n",
    "                \"Can you tell me hat {sub} died of?\",\n",
    "                \"What did {sub} die of?\",\n",
    "                \"What was the cause of {pos} death?\",\n",
    "            ],\n",
    "            \"P527\": [\n",
    "                \"List {pos} parts.\",\n",
    "                \"What are {pos} ingredients?\",\n",
    "                \"What are they made of?\",\n",
    "                \"What are their parts?\",\n",
    "            ],\n",
    "            \"P569\": [\"{pos} birthday is?\", \"When did {sub} born?\", \"When was {sub} born?\", \"When is {pos} birthday?\"],\n",
    "            \"P570\": [\"Is {sub} dead?\", \"When did {sub} die?\", \"Is {sub} dead?\", \"Did {sub} die?\"],\n",
    "            \"P571\": [\n",
    "                \"Do you know the date of {pos} inception?\",\n",
    "                \"When was {sub} first released?\",\n",
    "                \"And when was {sub} actually created?\",\n",
    "            ],\n",
    "            \"P575\": [\"Tell me the date of {pos} discovery!\", \"When was {sub} invented then?\"],\n",
    "            \"P576\": [\n",
    "                \"Can you tell me the date {sub} was dinally discontinued?\",\n",
    "                \"When was {sub} demolished?\",\n",
    "                \"At what time was {sub} dissolved?\",\n",
    "            ],\n",
    "            \"P580\": [\"Write down the exact date {sub} started!\", \"When did {sub} start?\", \"When did {sub} break out?\"],\n",
    "            \"P582\": [\"Write down the exact date {sub} ended.\", \"When was {sub} finally over?\", \"When did {sub} end?\"],\n",
    "            \"P625\": [\"I need {pos} GPS location!\", \"What is {pos} GPS location?\", \"What are {pos} coordinates?\"],\n",
    "            \"P837\": [\"When would you celebrate {obj}?\", \"When is {sub} celebrated?\", \"On which day is {sub}?\"],\n",
    "            \"P856\": [\n",
    "                \"Send me {pos} web address.\",\n",
    "                \"What's the address of {pos} website?\",\n",
    "                \"What is {pos} official website?\",\n",
    "                \"Can you tell me the link to {obj}?\",\n",
    "            ],\n",
    "            \"P973\": [\n",
    "                \"Can you give me more information on {obj}?\",\n",
    "                \"Where can I find more info on {obj}?\",\n",
    "                \"Where can I read more about {obj} online?\",\n",
    "                \"Is there a site that explains {obj} in detail?\",\n",
    "            ],\n",
    "            \"P1082\": [\"Estimate {pos} population.\", \"How many people live there?\", \"How large is {pos} population?\"],\n",
    "            \"P1120\": [\n",
    "                \"Estimate the number of people who died in the event!\",\n",
    "                \"How many people died?\",\n",
    "                \"How deadly was {sub}?\",\n",
    "                \"How many fatalities were there?\",\n",
    "            ],\n",
    "            \"P2043\": [\"Do you know {pos} length?\", \"How long is {sub}?\", \"What'S the length of {obj}?\"],\n",
    "            \"P2044\": [\n",
    "                \"Is {sub} tall?\",\n",
    "                \"How tall is {sub}?\",\n",
    "                \"How high is {sub}?\",\n",
    "                \"Is {name} above sea level?\",\n",
    "                \"What is {pos} elevation?\",\n",
    "            ],\n",
    "            \"P2046\": [\"Provide information on {pos} area.\", \"How big is {sub}?\", \"How big is {pos} area?\"],\n",
    "            \"P2049\": [\"Calculate {pos} width!\", \"How wide is {sub}?\", \"What is {pos} width?\"],\n",
    "            \"P2250\": [\"Can you tell me how long {sub} live?\", \"What is {pos} life expectancy?\", \"How long they live?\"],\n",
    "            \"P2283\": [\n",
    "                \"Tell me how {sub} works under the hood.\",\n",
    "                \"Do you know how {sub} works?\",\n",
    "                \"How does {sub} work under the hood?\",\n",
    "                \"How does {obj} run?\",\n",
    "            ],\n",
    "            \"P3063\": [\"So how long is their gestation period?\", \"How long do they stay pregnant?\"],\n",
    "            \"P3373\": [\n",
    "                \"List {pos} siblings please.\",\n",
    "                \"Who are {pos} brothers and sisters?\",\n",
    "                \"What are the names of {pos} siblings?\",\n",
    "                \"Does {sub} have any siblings?\",\n",
    "                \"Does {sub} have a brother or sister?\",\n",
    "                \"How many siblings does {sub} have?\",\n",
    "                \"How many brothers and sisters does {sub} have?\",\n",
    "            ],\n",
    "            \"P4511\": [\"Do you know if {sub} is really deep?\", \"Is {sub} deep?\", \"How deep is {sub} really?\"],\n",
    "            \"P4733\": [\"Mimic {pos} sound!\", \"What sound does {sub} make?\", \"How does {sub} sound like?\"],\n",
    "            \"P7767\": [\"Would you serve {obj} hot or cold?\", \"How are they usually served?\"],\n",
    "        }\n",
    "\n",
    "        # single answer\n",
    "        A = {\n",
    "            \"P6\": [\"{name}'s president is {a}.\", \"{name}'s prime minister is {a}.\"],\n",
    "            \"P17\": [\"{name} is located in {a}.\", \"{name} is found in the country of {a}.\"],\n",
    "            \"P19\": [\"{name} was born in {a}.\", \"{sub} was born in {a}.\", \"In {a}.\"],\n",
    "            \"P20\": [\"{name} died in {a}.\", \"{sub} died in {a}.\"],\n",
    "            \"P22\": [\"{name}'s father is {a}.\", \"{pos} father is called {a}.\", \"His name is {a}.\", \"It's {a}.\"],\n",
    "            \"P25\": [\"{name}'s mother is {a}.\", \"{pos} mother is called {a}.\", \"Her name is {a}.\", \"It's {a}.\"],\n",
    "            \"P27\": [\"{name} originates from {a}.\", \"{sub} is from {a}.\", \"{name} comes from {a}.\"],\n",
    "            \"P30\": [\"{name} is part of {a}.\", \"{name} is part of the continent of {a}.\"],\n",
    "            \"P36\": [\"{name}'s capital city is {a}.\", \"{pos} capital is called {a}.\", \"The capital of {name} is {a}.\"],\n",
    "            \"P37\": [\"The official language of {name} is {a}.\", \"The people in {name} speak {a}.\"],\n",
    "            \"P38\": [\n",
    "                \"{name}'s currency is the {a}.\",\n",
    "                \"{name} uses {a} as their currency.\",\n",
    "                \"The currency of {name} is the {a}.\",\n",
    "            ],\n",
    "            \"P40\": [\n",
    "                \"{name} has one child named {a}.\",\n",
    "                \"{name} has a single child named {a}.\",\n",
    "                \"{sub} has a child named {a}.\",\n",
    "            ],\n",
    "            \"P50\": [\"{name} was written by {a}.\", \"The author of {name} is {a}.\", \"{a} is {pos} author.\"],\n",
    "            \"P57\": [\"{name} was directed by {a}.\", \"{sub} was directed by {a}.\"],\n",
    "            \"P61\": [\"{name} was discovered by {a}.\", \"{sub} was discovered by {a}.\"],\n",
    "            \"P106\": [\"{name} works at {a}.\", \"{sub} works at {a}.\", \"{name} job title is {a}.\"],\n",
    "            \"P138\": [\"{name} was named after {a}.\", \"{name} got {pos} name from {a}.\", \"{pos} name comes from {a}.\"],\n",
    "            \"P169\": [\"{name} is the CEO of {a}.\", \"{sub} is the CEO of {a}.\"],\n",
    "            \"P170\": [\n",
    "                \"{sub} was created by {a}.\",\n",
    "                \"{a} created {name}.\",\n",
    "                \"The creator of {name} is {a}.\",\n",
    "                \"{a} made {obj}.\",\n",
    "                \"{sub} was created by {a}.\",\n",
    "            ],\n",
    "            \"P225\": [\"{name} is called {a} in latin.\", \"The scientific term for {name} is {a}.\"],\n",
    "            \"P246\": [\"The element of {name} is {a}.\", \"The symbol for {name} is {a}.\"],\n",
    "            \"P274\": [\"The formula for {name} is {a}.\", \"The chemical formula of {name} is {a}.\"],\n",
    "            \"P275\": [\n",
    "                \"{name} has the following license: {a}.\",\n",
    "                \"{name} has a {a} license associated with {obj}.\",\n",
    "                \"{name} was released under {a}.\",\n",
    "                \"{sub} is licensed under {a}.\",\n",
    "            ],\n",
    "            \"P366\": [\n",
    "                \"{name} is most commonly used for {a}.\",\n",
    "                \"{sub} is used mostly for {a}.\",\n",
    "                \"{name} is mostly known for {a}.\",\n",
    "            ],\n",
    "            \"P487\": [\"{a}\", \"The {name} emoji is {a}.\", \"The {a} character repesents {name}.\"],\n",
    "            \"P509\": [\"{name} died of {a}.\", \"The cause of {pos} death was {a}.\"],\n",
    "            \"P527\": [\"{name} are made of {a}.\", \"They are made of {a}.\"],\n",
    "            \"P569\": [\"{name} was born on {a}.\", \"{pos} birthday is on the {a}.\"],\n",
    "            \"P570\": [\"{name} died at {a}\", \"{sub} died in {a}.\"],\n",
    "            \"P571\": [\n",
    "                \"{name} was created in {a}.\",\n",
    "                \"The date of {pos} inception is {a}.\",\n",
    "                \"{name} was first released in {a}.\",\n",
    "            ],\n",
    "            \"P575\": [\"{name} was invented at {a}.\", \"{name} was discovered in {a}.\"],\n",
    "            \"P576\": [\n",
    "                \"{name} was discontinued after {a}.\",\n",
    "                \"{name} was demolished by {a}.\",\n",
    "                \"{sub} got dissolved at {a}.\",\n",
    "            ],\n",
    "            \"P580\": [\"{name} started in {a}.\", \"{name} first started at {a}.\"],\n",
    "            \"P582\": [\"{name} ended in {a}.\", \"{name} lasted until {a}.\"],\n",
    "            \"P625\": [\"{name} is lcoated at {a}.\", \"The coordinates for {name} are {a}.\", \"{pos} GPS location is {a}.\"],\n",
    "            \"P837\": [\"{name} is celebrated on {a}.\", \"{name} is on {a}.\"],\n",
    "            \"P856\": [\n",
    "                \"The URL for {name} is: {a}\",\n",
    "                \"See {a}\",\n",
    "                \"The URL of {pos} webiste is {a}\",\n",
    "                \"{pos} web address is: {a}\",\n",
    "            ],\n",
    "            \"P973\": [\n",
    "                \"You can find out more at {a}\",\n",
    "                \"Here's a link on {name}: {a}\",\n",
    "                \"You can find out more about {obj} on {a}\",\n",
    "            ],\n",
    "            \"P1082\": [\n",
    "                \"{name}'s population is {a}.\",\n",
    "                \"Around {a} people live in {name}.\",\n",
    "                \"{pos} population is estimated to be around {a}.\",\n",
    "            ],\n",
    "            \"P1120\": [\n",
    "                \"The number of deaths was {a}.\",\n",
    "                \"The number of fatalities was {a}.\",\n",
    "                \"{a} died due to {name}.\",\n",
    "                \"{name} has taken the lives of {a}.\",\n",
    "            ],\n",
    "            \"P2043\": [\"{name} is {a} long.\", \"{sub} has a length of {a}.\"],\n",
    "            \"P2044\": [\"{name} is {a} tall.\", \"{name} is {a} above sea level.\", \"{pos} elevation is {a}.\"],\n",
    "            \"P2046\": [\"{name}'s area is {a}\", \"{pos} area is {a}.\"],\n",
    "            \"P2049\": [\"{name}'s widht is {a}.\", \"{name} is {a} wide.\"],\n",
    "            \"P2250\": [\"{name} have a life expectancy of {a}.\", \"{pos} life expectancy is about {a}.\"],\n",
    "            \"P2283\": [\n",
    "                \"{name} uses {a} to work.\",\n",
    "                \"{sub} works via {a}.\",\n",
    "                \"{name} works through {a}.\",\n",
    "                \"{sub} makes use of {a}.\",\n",
    "            ],\n",
    "            \"P3063\": [\n",
    "                \"The gestation period for {name}s is {a}.\",\n",
    "                \"The amount of time needed for their gestation period is known to be {a}.\",\n",
    "            ],\n",
    "            \"P3373\": [\"{name} has a siblings called {a}.\", \"{sub} has a sibling named {a}.\"],\n",
    "            \"P4511\": [\"{name} has a depth of {a}.\", \"{name} can be as deep as {a}.\", \"{pos} vertical depth is {a}.\"],\n",
    "            \"P4733\": [\"{name} makes the following sound: {a}\", \"{name} makes a {a} sound.\", \"The {name} says {a}.\"],\n",
    "            \"P7767\": [\"{name}s are served {a}.\", \"{name} is usually served {a}.\"],\n",
    "        }\n",
    "        # plural / multiple answers\n",
    "        Ap = {\n",
    "            \"P6\": [\"The governors of {name} are {a}.\", \"The ministers of {name} are {a}.\"],\n",
    "            \"P37\": [\"The official languages of {name} are {a}.\", \"They speak {a}.\"],\n",
    "            \"P38\": [\n",
    "                \"{name} accepts {a}.\",\n",
    "                \"{name} uses {a} as their countriy's currencies.\",\n",
    "                \"The currencies of {name} are {a}.\",\n",
    "            ],\n",
    "            \"P40\": [\n",
    "                \"{name} has {l} children: {a}.\",\n",
    "                \"The number of children {name} has is {l}. Their names are {a}.\",\n",
    "                \"{pos} {l} children are {a}.\",\n",
    "            ],\n",
    "            \"P50\": [\"{name} was co-written by {a}.\", \"The authors of {name} are {a}.\"],\n",
    "            \"P57\": [\"{name} was direcrted by the following people: {a}.\", \"{a} were the directors of {name}.\"],\n",
    "            \"P61\": [\"{pos} inventors are {a}.\", \"{name} was discovered by {a}.\"],\n",
    "            \"P106\": [\"{name} has multiple occupations: {a}.\", \"{name}'s job titles are: {a}.\"],\n",
    "            \"P169\": [\"{name} is the CEO of multiple companies, such as {a}.\", \"{sub} is the CEO at {a}.\"],\n",
    "            \"P225\": [\"The taxon names for {name} are {a}.\", \"The proper scientific terms for {name} are {a}.\"],\n",
    "            \"P246\": [\"The elements of {name} are {a}.\", \"The symbols for {name} are {a}.\"],\n",
    "            \"P274\": [\"The formulas for {name} are {a}.\", \"The chemical formulas of the compound {name} are {a}.\"],\n",
    "            \"P487\": [\"The {name} emojis are {a}.\", \"The characters {a} repesent {name}.\"],\n",
    "            \"P527\": [\"The ingredients of {name} are {a}.\", \"{a} are all parts needed for {name}.\"],\n",
    "            \"P575\": [\n",
    "                \"Sources disagree on the exact date, it is said that {name} was invented in {a}.\",\n",
    "                \"{name} was discovered multiple times at {a}.\",\n",
    "            ],\n",
    "            \"P856\": [\"The URLs for {name} are: {a}\", \"See {a}\", \"The URLs of {pos} webiste are {a}\"],\n",
    "            \"P625\": [\n",
    "                \"{name} can be found under the following GPS locations: {a}.\",\n",
    "                \"The coordinates for {name} are {a}.\",\n",
    "            ],\n",
    "            \"P973\": [\"You can find out more at {a}\", \"You can find out more about {obj} at {a}\"],\n",
    "            \"P1120\": [\n",
    "                \"There are multiple sources on the number of fatalities: {a}\",\n",
    "                \"{name} is know to take the lives of somewhere between {a}.\",\n",
    "            ],\n",
    "            \"P1082\": [\n",
    "                \"There are multiple sources on {pos} population: {a}.\",\n",
    "                \"There are different sources on {name}'s population: {a}.\",\n",
    "            ],\n",
    "            \"P2046\": [\"{name}'s area has changed over time: {a}\", \"{pos} area has altered over the ages to {a}.\"],\n",
    "            \"P3373\": [\n",
    "                \"{name} has {l} siblings: {a}.\",\n",
    "                \"The number of brothers and sisters {name} has is {l}. Their names are {a}.\",\n",
    "            ],\n",
    "            \"P4733\": [\"{name} makes sounds like {a}.\", \"The sounds {sub} often makes are {a}.\"],\n",
    "        }\n",
    "\n",
    "        assert len(Q.keys()) == len(A.keys())\n",
    "\n",
    "        largest = int(sorted(self.cache[self.language][qid].keys())[-1])\n",
    "        qs = [key for key in Q.keys() if key in self.cache[self.language][qid][largest][\"graph\"]]\n",
    "        if not qs:\n",
    "            return \"\"\n",
    "        np.random.shuffle(qs)\n",
    "\n",
    "        if np.random.choice([True, False]):\n",
    "            if np.random.choice([True, False]):\n",
    "                results = [f'Questions about {self.cache[self.language][qid][largest][\"desc\"][\"label\"]}:']\n",
    "            else:\n",
    "                results = [\n",
    "                    f'Questions and Answers on {self.cache[self.language][qid][largest][\"desc\"][\"label\"]}, {self.cache[self.language][qid][largest][\"desc\"][\"description\"]}:'\n",
    "                ]\n",
    "        else:\n",
    "            if np.random.choice([True, False]):\n",
    "                results = [\n",
    "                    f'Questions about {self.cache[self.language][qid][largest][\"desc\"][\"label\"]} ({self.cache[self.language][qid][largest][\"desc\"][\"description\"]}):'\n",
    "                ]\n",
    "            else:\n",
    "                results = [\n",
    "                    f'Questions and Answers on {self.cache[self.language][qid][largest][\"desc\"][\"label\"]} (also known as {\", \".join(self.cache[self.language][qid][largest][\"desc\"][\"aliases\"])}):'\n",
    "                ]\n",
    "\n",
    "        for i, key in enumerate(qs):\n",
    "            if np.random.choice([True, False]):\n",
    "                name = self.cache[self.language][qid][largest][\"desc\"][\"label\"]\n",
    "            else:\n",
    "                name = np.random.choice(self.cache[self.language][qid][largest][\"desc\"][\"aliases\"])\n",
    "            if not proper:\n",
    "                name = _add_a(name)\n",
    "            if i == 0 or np.random.choice([True, False]):\n",
    "                question = np.random.choice(Q[key]).format(name=name, sub=sub, obj=obj, pos=pos)\n",
    "            else:\n",
    "                question = np.random.choice(Qp[key] if key in Qp else Q[key]).format(\n",
    "                    name=name, sub=sub, obj=obj, pos=pos\n",
    "                )\n",
    "            if zalgo:\n",
    "                question = self._zalgo(question)\n",
    "\n",
    "            a = self.cache[self.language][qid][largest][\"graph\"][key]\n",
    "            l = len(a)\n",
    "            if key not in Ap or l <= 1:\n",
    "                if l <= 1:\n",
    "                    a = a[0]\n",
    "                else:\n",
    "                    a = \", \".join(a[:-1]) + f\" and {a[-1]}\"\n",
    "                answer = np.random.choice(A[key]).format(q=question, name=name, sub=sub, obj=obj, pos=pos, a=a, l=l)\n",
    "            else:\n",
    "                a = \", \".join(a[:-1]) + f\" and {a[-1]}\"\n",
    "                answer = np.random.choice(Ap[key]).format(q=question, name=name, sub=sub, obj=obj, pos=pos, a=a, l=l)\n",
    "\n",
    "            results.append(f\"Q: {question}\\r\\nA: {answer[0].upper()}{answer[1:]}\")\n",
    "        return \"\\n\\n\".join(results)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Search for a concept and use its QID to generate question and answer pairs."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "wg = WikiGraph(file=\"cache.tmp\")  # will save cached graph to cache.tmp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'id': 'Q115564437',\n",
       "  'label': 'ChatGPT',\n",
       "  'description': 'pre-trained language model developed by OpenAI'},\n",
       " {'id': 'Q116786574',\n",
       "  'label': 'ChatGPT: Na volta às aulas, experimentação precisa ser o caminho, defendem especialistas',\n",
       "  'description': 'webpage'},\n",
       " {'id': 'Q116488506',\n",
       "  'label': 'ChatGPT is fun, but not an author',\n",
       "  'description': 'scientific article'},\n",
       " {'id': 'Q116294278',\n",
       "  'label': 'ChatGPT listed as author on research papers: many scientists disapprove',\n",
       "  'description': 'scientific article published on 18 January 2023'}]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# search for QID\n",
    "wg.search(\"chatgpt\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████████████████████████████████████████████████████████████████████████████| 36/36 [00:10<00:00,  3.44it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Questions about ChatGPT (pre-trained language model developed by OpenAI):\n",
      "\n",
      "Q: Where can I read more abou GPT-3.5 online?\r\n",
      "A: Here's a link on GPT-3.5: https://openai.com/blog/chatgpt/\n",
      "\n",
      "Q: What's the URL for ChatGPT? Thanks!\r\n",
      "A: The URL for ChatGPT is: https://chat.openai.com/chat\n",
      "\n",
      "Q: When was Generative Pre-trained Transformer created?\r\n",
      "A: Generative Pre-trained Transformer was created in 2022-11-30.\n",
      "\n",
      "Q: What makes GPT-3.5 work in theory? Thanks for the answer!\r\n",
      "A: It works via Reinforcement Learning from Human Feedback and Proximal Policy Optimization.\n",
      "\n",
      "Q: Tell me who invented it!\r\n",
      "A: ChatGPT was discovered by OpenAI.\n",
      "\n",
      "Q: Why is GPT-3.5 named like that?\r\n",
      "A: GPT-3.5 got its name from online chat and Generative Pre-trained Transformer.\n",
      "\n",
      "Q: And its use cases are?\r\n",
      "A: ChatGPT is most commonly used for natural language generation and machine translation.\n",
      "\n",
      "Q: Who is reponsible for Generative Pre-trained Transformer?\r\n",
      "A: OpenAI made it.\n",
      "\n",
      "Q: What license is associated with Generative Pre-trained Transformer?\r\n",
      "A: Generative Pre-trained Transformer has the following license: proprietary license and proprietary software.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "# chatgpt\n",
    "print(wg.generate(qid=\"Q115564437\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|████████████████████████████████████████████████████████████████████████████████| 200/200 [00:55<00:00,  3.63it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Questions about Bill Gates (American businessman and philanthropist (born 1955)):\n",
      "\n",
      "Q: How many children does Bill Gates have?\r\n",
      "A: His 3 children are Jennifer Katherine Gates, Phoebe Adele Gates and Rory John Gates.\n",
      "\n",
      "Q: Who is his dad?\r\n",
      "A: His name is Bill Gates Sr..\n",
      "\n",
      "Q: Who is his mother, respond with her name.\r\n",
      "A: It's Mary Maxwell Gates.\n",
      "\n",
      "Q: Where is William Henry Gates III from?\r\n",
      "A: He is from United States of America.\n",
      "\n",
      "Q: What does William Henry, III Gates work in?\r\n",
      "A: William Henry, III Gates has multiple occupations: entrepreneur, programmer, computer scientist, inventor, financier, bridge player, investor, actor, philanthropist, writer, international forum participant and business magnate.\n",
      "\n",
      "Q: What's the URL for William Henry \"Bill\" Gates III? Thanks!\r\n",
      "A: His web address is: http://www.thegatesnotes.com/\n",
      "\n",
      "Q: his birthday is?\r\n",
      "A: His birthday is on the 1955-10-28.\n",
      "\n",
      "Q: List his siblings please.\r\n",
      "A: The number of brothers and sisters William Gates has is 2. Their names are Kristianne Gates and Libby Gates MacPhee.\n",
      "\n",
      "Q: Where was he born?\r\n",
      "A: William H. Gates III was born in Seattle.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "# bill gates\n",
    "print(wg.generate(\"Q5284\", pronoun=\"he\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|████████████████████████████████████████████████████████████████████████████████| 152/152 [01:41<00:00,  1.50it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Questions and Answers on Budapest (also known as Buda Pest, Buda-Pest, Budapešť, Budapesta, Budapeszt, Buda, Ofen, Budín, Budim, Budon, Pest, Pešť, Pešta, Alt-Ofen, Budapest, Hungary):\n",
      "\n",
      "Q: What is Budapest's location?\r\n",
      "A: The coordinates for Budapest are 47.498333333333 19.040833333333.\n",
      "\n",
      "Q: Do you know the date of its inception?\r\n",
      "A: Budon was first released in 1873-11-17.\n",
      "\n",
      "Q: WHERE IS BUDAPEST LOCATED IN THE WORLD\r\n",
      "A: Budapest is located in Hungary, Kingdom of Hungary, Hungarian Republic, Hungarian Soviet Republic, First Hungarian Republic, Austria-Hungary, First Hungarian Republic, Republic of Hungary and People's Republic of Hungary.\n",
      "\n",
      "Q: How did it get its name?\r\n",
      "A: Its name comes from Buda and Pest.\n",
      "\n",
      "Q: How big is Budim?\r\n",
      "A: Budim's area is 52514 hectare\n",
      "\n",
      "Q: What is its official website?\r\n",
      "A: The URLs for Budapest are: https://budapest.hu and https://budapest.hu/sites/english/\n",
      "\n",
      "Q: Tell me who its governor is!\r\n",
      "A: The governors of Budapest are István Tarlós, Gábor Demszky and Gergely Karácsony.\n",
      "\n",
      "Q: Which continent is it ni\r\n",
      "A: Budapešť is part of the continent of Europe.\n",
      "\n",
      "Q: describe its official language.\r\n",
      "A: The people in Budapest speak Hungarian.\n",
      "\n",
      "Q: Count the number of people who live in Budapešť!\r\n",
      "A: There are multiple sources on its population: 1744665, 1735711, 1740041, 1733685, 1757618, 370767, 506384, 880371, 733358, 296867 and 1706851.\n",
      "\n",
      "Q: Is Budim above sea level?\r\n",
      "A: Budim is 102 metre above sea level.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "# budapest\n",
    "print(wg.generate(\"Q1781\", zalgo=True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████████████████████████████████████████████████████████████████████████████| 47/47 [00:13<00:00,  3.53it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Questions and Answers on hamburger, American sandwich of ground beef patty:\n",
      "\n",
      "Q: Describe what a Hamburgh ſauſage was named after.\r\n",
      "A: A Hamburgh ſauſage got its name from sandwich and Hamburg.\n",
      "\n",
      "Q: How is a hamburger used?\r\n",
      "A: A hamburger is most commonly used for eating and burger eating contest.\n",
      "\n",
      "Q: Say in emoji: the hamburger.\r\n",
      "A: The 🍔 character repesents the hamburger.\n",
      "\n",
      "Q: What are the ingredients of the Hamburgh ſauſage?\r\n",
      "A: Patty, cheese, bread, lettuce, tomato and onion are all parts needed for the Hamburgh ſauſage.\n",
      "\n",
      "Q: When did it start?\r\n",
      "A: A hamburger started in 1758.\n",
      "\n",
      "Q: How are the hamburgers served?\r\n",
      "A: The hamburger is usually served hot.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "# hamburger\n",
    "print(wg.generate(\"Q6663\", proper=False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
