{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from datasets import load_dataset, DatasetDict, load_from_disk, Dataset\n",
    "import json\n",
    "from pathlib import Path\n",
    "from pprint import pprint\n",
    "import pandas as pd\n",
    "from itertools import groupby\n",
    "from funcy import flatten\n",
    "from tqdm.autonotebook import tqdm\n",
    "import re\n",
    "import unicodedata"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "63e59f8a6dd24cc1bfdaf85b32e8d708",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Filter:   0%|          | 0/5696 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "DatasetDict({\n",
       "    train: Dataset({\n",
       "        features: ['label', 'claim', 'unique_evidence', 'evidence_sentences', 'evidence_sentence_count', 'id', 'traj_keys', 'traj_values', 'rewoo_traj_keys', 'rewoo_traj_values', 'all_wiki_success', 'wiki_worked', 'articles', 'statuses', 'cot'],\n",
       "        num_rows: 5676\n",
       "    })\n",
       "    test: Dataset({\n",
       "        features: ['label', 'claim', 'unique_evidence', 'evidence_sentences', 'evidence_sentence_count', 'id'],\n",
       "        num_rows: 6612\n",
       "    })\n",
       "    validation: Dataset({\n",
       "        features: ['label', 'claim', 'unique_evidence', 'evidence_sentences', 'evidence_sentence_count', 'id', 'traj_keys', 'traj_values', 'rewoo_traj_keys', 'rewoo_traj_values', 'all_wiki_success', 'wiki_worked', 'articles', 'statuses', 'cot'],\n",
       "        num_rows: 1024\n",
       "    })\n",
       "})"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fever = load_from_disk(\"var/fever_augmented_nowikipages_json_val\")\n",
    "fever[\"train\"] = fever[\"train\"].filter(lambda x: x[\"wiki_worked\"])\n",
    "fever"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import warnings, wikipedia\n",
    "warnings.simplefilter(\"ignore\")\n",
    "\n",
    "from functools import cache\n",
    "\n",
    "@cache\n",
    "def search_new(subject: str, auto_suggest=False, redirect=False):\n",
    "    try:\n",
    "      result = wikipedia.summary(subject, auto_suggest=auto_suggest, redirect=redirect).strip(), \"success\"\n",
    "    except wikipedia.DisambiguationError as d:\n",
    "      result = f\"\\\"{subject}\\\" may refer to one of {d.args[1]}. Please retry the search with one of the subjects using Search[<subject>].\", \"disambg\"\n",
    "    except wikipedia.PageError as e:\n",
    "      result = f\"{e} Please retry the search using Search[<subject>].\", \"pageerror\"\n",
    "    except wikipedia.WikipediaException as e:\n",
    "      print(e, type(e))\n",
    "      result = str(e), f\"other:{type(e)}\"\n",
    "    except Exception as e:\n",
    "      print(e, type(e))\n",
    "      result = str(e), f\"other:{type(e)}\"\n",
    "    return result\n",
    "\n",
    "def remove_accents(x):\n",
    "    return (unicodedata.normalize('NFD', x))\n",
    "                       #.encode('ascii', 'ignore')\n",
    "                       #.decode('utf-8'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Psych is an American detective comedy-drama television series created by Steve Franks for USA Network. The series stars James Roday as Shawn Spencer, a young crime consultant for the Santa Barbara Police Department whose \"heightened observational skills\" and impressive eidetic memory allow him to convince people that he solves cases with his psychic abilities. The program also stars Dulé Hill as Shawn\\'s intelligent best friend and reluctant partner Burton \"Gus\" Guster, as well as Corbin Bernsen as Shawn\\'s father Henry, a former detective with the Santa Barbara Police Department.\\nPsych premiered on July 7, 2006, following the fifth-season premiere of Monk, and continued to be paired with the series until Monk\\'s conclusion on December 4, 2009. During the second season, an animated segment titled \"The Big Adventures of Little Shawn and Gus\" was added to the series. Psych was the highest-rated American basic cable television premiere of 2006. USA Network renewed the series for an eighth season on December 19, 2012, to include eight episodes, and ordered two more episodes on June 25, 2013, bringing the episode order to ten. On February 5, 2014, USA Network confirmed that the eighth season of Psych would be its last, with the series finale airing on March 26, 2014.\\nPsych: The Movie, a two-hour television film, aired on USA Network on December 7, 2017, launching the Psych film series, with Franks\\'s hope being to make five more Psych movies following Psych: The Movie. On February 14, 2019, it was announced Psych: The Movie 2 was greenlit and set to premiere in late 2019, for which the main cast would return, but the premiere thereof was subsequently delayed to 2020, with the film renamed Psych 2: Lassie Come Home, and released on NBCUniversal\\'s streaming service, Peacock, July 15, 2020, the day the service officially launched. On May 13, 2021, Peacock announced a third film, Psych 3: This Is Gus, which premiered on November 18, 2021. Three further Psych films are in development.'"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "wikipedia.page(\"Psych\", auto_suggest=False, redirect=True).summary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Psych is an American detective comedy-drama television series created by Steve Franks for USA Network. The series stars James Roday as Shawn Spencer, a young crime consultant for the Santa Barbara Police Department whose \"heightened observational skills\" and impressive eidetic memory allow him to convince people that he solves cases with his psychic abilities. The program also stars Dulé Hill as Shawn\\'s intelligent best friend and reluctant partner Burton \"Gus\" Guster, as well as Corbin Bernsen as Shawn\\'s father Henry, a former detective with the Santa Barbara Police Department.\\nPsych premiered on July 7, 2006, following the fifth-season premiere of Monk, and continued to be paired with the series until Monk\\'s conclusion on December 4, 2009. During the second season, an animated segment titled \"The Big Adventures of Little Shawn and Gus\" was added to the series. Psych was the highest-rated American basic cable television premiere of 2006. USA Network renewed the series for an eighth season on December 19, 2012, to include eight episodes, and ordered two more episodes on June 25, 2013, bringing the episode order to ten. On February 5, 2014, USA Network confirmed that the eighth season of Psych would be its last, with the series finale airing on March 26, 2014.\\nPsych: The Movie, a two-hour television film, aired on USA Network on December 7, 2017, launching the Psych film series, with Franks\\'s hope being to make five more Psych movies following Psych: The Movie. On February 14, 2019, it was announced Psych: The Movie 2 was greenlit and set to premiere in late 2019, for which the main cast would return, but the premiere thereof was subsequently delayed to 2020, with the film renamed Psych 2: Lassie Come Home, and released on NBCUniversal\\'s streaming service, Peacock, July 15, 2020, the day the service officially launched. On May 13, 2021, Peacock announced a third film, Psych 3: This Is Gus, which premiered on November 18, 2021. Three further Psych films are in development.'"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "wikipedia.summary(\"Psych\", auto_suggest=False, redirect=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "search(\"Kentucky inmate executions\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "search('what \"Gray Matters\" means')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f0bb476b09aa4df2b69f0c9bdfb55940",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "README.md:   0%|          | 0.00/11.9k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ab6c898e16c643079bbbdcbf648431c2",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "fever.py:   0%|          | 0.00/10.2k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "09eace494ed14982947050d5441529a9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading data:   0%|          | 0.00/33.0M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5dc2618f306c487c951d36617aed6895",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading data:   0%|          | 0.00/4.35M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "80a0af89724c494e8a3b40f2151dfec6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading data:   0%|          | 0.00/1.53M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3c55e652131b478bad986e728ff54630",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading data:   0%|          | 0.00/1.60M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "18f32b08406d4f9381950022e0b3f2da",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading data:   0%|          | 0.00/2.17M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "718537fcdbb84c19b77de634acfd5fe1",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading data:   0%|          | 0.00/2.18M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "35a3568970e0423884c1cefc4449c93c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating train split:   0%|          | 0/311431 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8c27c6a38bbc4a72a4f7fec88bc4fee5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating labelled_dev split:   0%|          | 0/37566 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0dd37fc436fb48eeaf7e6b9f0625b7d6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating unlabelled_dev split:   0%|          | 0/19998 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "94ab8e056c30461fb41f5ebefdae6fc7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating unlabelled_test split:   0%|          | 0/19998 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "28f5f8b1013d416da38b0fb7f4de7619",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating paper_dev split:   0%|          | 0/18999 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1b1d7cfc8efa48d39f49b95f90c3d38c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating paper_test split:   0%|          | 0/18567 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "fever = load_dataset(\"fever/fever\", \"v1.0\")\n",
    "bigbench_fever = json.loads(Path(\"var/fever/task.json\").read_text())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0507776c3ab248c9a9cb505558b074e8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Saving the dataset (0/1 shards):   0%|          | 0/311431 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a69f1a1539224494b0700db998a7fda2",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Saving the dataset (0/1 shards):   0%|          | 0/37566 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0b4c0dc27d3f4154ba76cca36c2786f1",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Saving the dataset (0/1 shards):   0%|          | 0/19998 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6f452ac210954c46bc4a1e0e2df6e9d4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Saving the dataset (0/1 shards):   0%|          | 0/19998 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "079b06ff7e024264b02e9f8823677058",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Saving the dataset (0/1 shards):   0%|          | 0/18999 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8294fd10ea2c4553bf5ad9dd7bdea067",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Saving the dataset (0/1 shards):   0%|          | 0/18567 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "fever.save_to_disk(\"var/fever/fever_original\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6192c2b0ba814f42a4e127265166f459",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Filter:   0%|          | 0/311431 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2688a4b829e64398888764d95b1068af",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Filter:   0%|          | 0/37566 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9d3c1df9887e4f719cc8d56140dc03c9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Filter:   0%|          | 0/19998 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "db39154a947949c0bdcc7dc47057276e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Filter:   0%|          | 0/19998 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fa56e99e1bee4a949c9cf6acb0269da0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Filter:   0%|          | 0/18999 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5ff2d65e35d2440d923b418d986e1ec3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Filter:   0%|          | 0/18567 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "35d58d4ef313498eb50678df818b55b3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Map (num_proc=16):   0%|          | 0/263822 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "aea4def634a24a4a97fd54e8673cf5f1",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Map (num_proc=16):   0%|          | 0/28625 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c01a641899db4091b901304a4951d797",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Map (num_proc=16):   0%|          | 0/14475 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b10299353507400c892e4d644f113c55",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Map (num_proc=16):   0%|          | 0/14150 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "def mapper(row):\n",
    "    if row[\"label\"] == \"SUPPORTS\":\n",
    "        bool_label = True\n",
    "    elif row[\"label\"] == \"REFUTES\":\n",
    "        bool_label = False\n",
    "    else:\n",
    "        print(row[\"label\"])\n",
    "        raise ValueError(\"WOW! That's unexpected.\")\n",
    "\n",
    "    wikipedia_title = row[\"evidence_wiki_url\"].replace(\"_\", \" \").replace(\"-LRB-\", \"(\").replace(\"-RRB-\", \")\")\n",
    "    in_bigbench = any(row[\"claim\"] in y[\"input\"] for y in bigbench_fever[\"examples\"])\n",
    "    return {\"bool_label\": bool_label, \"wikipedia_title\": wikipedia_title, \"in_bigbench\": in_bigbench}\n",
    "ds = fever.filter(lambda x: x[\"label\"] in [\"SUPPORTS\", \"REFUTES\"]).map(mapper, num_proc=16)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatasetDict({\n",
       "    train: Dataset({\n",
       "        features: ['id', 'label', 'claim', 'evidence_annotation_id', 'evidence_id', 'evidence_wiki_url', 'evidence_sentence_id', 'bool_label', 'wikipedia_title', 'in_bigbench'],\n",
       "        num_rows: 263822\n",
       "    })\n",
       "    labelled_dev: Dataset({\n",
       "        features: ['id', 'label', 'claim', 'evidence_annotation_id', 'evidence_id', 'evidence_wiki_url', 'evidence_sentence_id', 'bool_label', 'wikipedia_title', 'in_bigbench'],\n",
       "        num_rows: 28625\n",
       "    })\n",
       "    unlabelled_dev: Dataset({\n",
       "        features: ['id', 'label', 'claim', 'evidence_annotation_id', 'evidence_id', 'evidence_wiki_url', 'evidence_sentence_id'],\n",
       "        num_rows: 0\n",
       "    })\n",
       "    unlabelled_test: Dataset({\n",
       "        features: ['id', 'label', 'claim', 'evidence_annotation_id', 'evidence_id', 'evidence_wiki_url', 'evidence_sentence_id'],\n",
       "        num_rows: 0\n",
       "    })\n",
       "    paper_dev: Dataset({\n",
       "        features: ['id', 'label', 'claim', 'evidence_annotation_id', 'evidence_id', 'evidence_wiki_url', 'evidence_sentence_id', 'bool_label', 'wikipedia_title', 'in_bigbench'],\n",
       "        num_rows: 14475\n",
       "    })\n",
       "    paper_test: Dataset({\n",
       "        features: ['id', 'label', 'claim', 'evidence_annotation_id', 'evidence_id', 'evidence_wiki_url', 'evidence_sentence_id', 'bool_label', 'wikipedia_title', 'in_bigbench'],\n",
       "        num_rows: 14150\n",
       "    })\n",
       "})"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def trajectorize(row):\n",
    "    claim = row[\"claim\"].strip()\n",
    "    task = f\"On June 2017, the following claim was made: {claim}\\nQ: Was this claim true or false?\"\n",
    "    answer = row[\"bool_label\"]\n",
    "    wiki, worked = search(row[\"wikipedia_title\"])\n",
    "    wiki = wiki.strip()\n",
    "\n",
    "    trajectory = [\n",
    "        {\"task\": task},\n",
    "        {\"thought\": f\"I need to search {row['wikipedia_title']}.\"},\n",
    "        {\"action\": f\"Search[{row['wikipedia_title']}]\"},\n",
    "        {\"observation\": f\"[Document]\\n{wiki}\\n[End]\"},\n",
    "        {\"thought\": f\"The claim is {answer}.\"},\n",
    "        {\"action\": f\"Finish[{answer}]\"},\n",
    "    ]\n",
    "\n",
    "    traj_keys = [next(iter(t.keys())) for t in trajectory]\n",
    "    traj_values = [next(iter(t.values())) for t in trajectory]\n",
    "\n",
    "    rewoo_trajectory = [\n",
    "        {\"task\": task},\n",
    "        {\"thought\": f\"Search for more information about {row['wikipedia_title']}.\"},\n",
    "        {\"action\": f\"Search[{row['wikipedia_title']}]\"},\n",
    "        {\"observation\": f\"[Document]\\n{wiki}\\n[End]\"},\n",
    "    ]\n",
    "\n",
    "    rewoo_traj_keys = [next(iter(t.keys())) for t in rewoo_trajectory]\n",
    "    rewoo_traj_values = [next(iter(t.values())) for t in rewoo_trajectory]\n",
    "\n",
    "    return {\n",
    "        \"traj_keys\": traj_keys,\n",
    "        \"traj_values\": traj_values,\n",
    "        \"rewoo_traj_keys\": rewoo_traj_keys,\n",
    "        \"rewoo_traj_values\": rewoo_traj_values,\n",
    "        \"wiki_worked\": worked,\n",
    "        \"wiki\": wiki,\n",
    "    }\n",
    "\n",
    "ds[\"train_react\"] = (\n",
    "    ds[\"train\"]\n",
    "    .shuffle()\n",
    "    .select(range(5000))\n",
    "    .filter(lambda x: x[\"in_bigbench\"] == False)\n",
    "    .map(trajectorize, num_proc=32)\n",
    ")\n",
    "ds[\"train_react_wiki_positive\"] = ds[\"train_react\"].filter(\n",
    "    lambda x: x[\"wiki_worked\"] == True\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Start to end"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5abaa5fe70c04e62acc001486e981d17",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Resolving data files:   0%|          | 0/109 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e7ee291c213a455a9b845f33e3ab564f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading data:   0%|          | 0/109 [00:00<?, ?files/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5731b5f997254c70bbc0acfc291f3c18",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating train split: 0 examples [00:00, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "wikipages = load_dataset(\"json\", data_files=\"var/fever/wiki-pages/wiki-pages/wiki-*.jsonl\", encoding=\"utf-8\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a84b59050f1c41cd9af10fb3d136b66a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Map (num_proc=32):   0%|          | 0/5416537 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "wikipages[\"train\"] = wikipages[\"train\"].map(lambda x: {\n",
    "    \"lines_split\": list(filter(lambda x: x != '', re.split(r\"\\d+\\t\", x[\"lines\"])))\n",
    "    }, num_proc=32)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "wiki_pages_df = wikipages[\"train\"].to_pandas().set_index(\"id\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "wiki_pages_df.index = wiki_pages_df.index.map(remove_accents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "wiki_pages_df.to_parquet(\"var/fever/wiki_pages.parquet\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# shared task dev is FEVER dev set\n",
    "df = pd.read_json(\"var/fever/shared_task_dev.jsonl\", lines=True, encoding=\"utf-8\")#.set_index(\"id\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def evidence_mapper(evidence):\n",
    "    # print(len(evidence))\n",
    "    evidences = set([ (x[2], x[3]) for x in evidence[0] if x[2] is not None])\n",
    "    return list(evidences)\n",
    "df[\"unique_evidence\"] = df[\"evidence\"].apply(evidence_mapper)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def evidence_mapper_sentence(evidences):\n",
    "    lines = []\n",
    "    for title,line in evidences:\n",
    "        if title is None or line is None:\n",
    "            continue\n",
    "        title = remove_accents(title)\n",
    "\n",
    "        if title not in wiki_pages_df.index:\n",
    "            print(title)\n",
    "            continue\n",
    "\n",
    "        sentence = wiki_pages_df.loc[title]\n",
    "        # if len(sentence) > 1:\n",
    "            # print(\"wtf\", sentence)\n",
    "        if sentence[\"lines_split\"] is not None and len(sentence[\"lines_split\"]) > line:\n",
    "            sentence = sentence[\"lines_split\"][line]\n",
    "            lines.append((title, line, sentence))\n",
    "        else:\n",
    "            print(sentence)\n",
    "    return list(lines)\n",
    "df[\"evidence_sentences\"] = df[\"unique_evidence\"].apply(evidence_mapper_sentence)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bigbench = pd.DataFrame.from_records(bigbench_fever[\"examples\"])#.set_index(\"id\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"claim_in_bigbench\"] = df[\"claim\"].apply(lambda x: bigbench.input.str.contains(x).any())#any(x in y[\"input\"] for y in js[\"examples\"]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"evidence_sentence_count\"] = df[\"evidence_sentences\"].map(lambda x: len(x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>verifiable</th>\n",
       "      <th>label</th>\n",
       "      <th>claim</th>\n",
       "      <th>evidence</th>\n",
       "      <th>unique_evidence</th>\n",
       "      <th>evidence_sentences</th>\n",
       "      <th>claim_in_bigbench</th>\n",
       "      <th>evidence_sentence_count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>6612</th>\n",
       "      <td>183614</td>\n",
       "      <td>VERIFIABLE</td>\n",
       "      <td>REFUTES</td>\n",
       "      <td>Finding Dory was directed by someone who is ba...</td>\n",
       "      <td>[[[213723, 220883, Finding_Dory, 1], [213723, ...</td>\n",
       "      <td>[(Andrew_Stanton, 0), (Finding_Dory, 1)]</td>\n",
       "      <td>[(Andrew_Stanton, 0, Andrew Stanton -LRB- born...</td>\n",
       "      <td>False</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6613</th>\n",
       "      <td>186920</td>\n",
       "      <td>VERIFIABLE</td>\n",
       "      <td>SUPPORTS</td>\n",
       "      <td>Cher was released by Geffen Records.</td>\n",
       "      <td>[[[218086, 224474, Cher_-LRB-1987_album-RRB-, ...</td>\n",
       "      <td>[(Cher_-LRB-1987_album-RRB-, 0)]</td>\n",
       "      <td>[(Cher_-LRB-1987_album-RRB-, 0, Cher is the se...</td>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6614</th>\n",
       "      <td>76611</td>\n",
       "      <td>VERIFIABLE</td>\n",
       "      <td>REFUTES</td>\n",
       "      <td>Harvard University is barely residential.</td>\n",
       "      <td>[[[93481, 106435, Harvard_University, 15]]]</td>\n",
       "      <td>[(Harvard_University, 15)]</td>\n",
       "      <td>[(Harvard_University, 15, Harvard is a large ,...</td>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6615</th>\n",
       "      <td>121723</td>\n",
       "      <td>VERIFIABLE</td>\n",
       "      <td>REFUTES</td>\n",
       "      <td>Qui-Gon Jinn is a character in the Avengers fr...</td>\n",
       "      <td>[[[142921, 158037, Qui-Gon_Jinn, 0]]]</td>\n",
       "      <td>[(Qui-Gon_Jinn, 0)]</td>\n",
       "      <td>[(Qui-Gon_Jinn, 0, Qui-Gon Jinn is a fictional...</td>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6616</th>\n",
       "      <td>78846</td>\n",
       "      <td>VERIFIABLE</td>\n",
       "      <td>REFUTES</td>\n",
       "      <td>Daggering originated in Australia.</td>\n",
       "      <td>[[[95808, 108866, Daggering, 0]]]</td>\n",
       "      <td>[(Daggering, 0)]</td>\n",
       "      <td>[(Daggering, 0, Daggering is a form of dance o...</td>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19992</th>\n",
       "      <td>33339</td>\n",
       "      <td>VERIFIABLE</td>\n",
       "      <td>REFUTES</td>\n",
       "      <td>Firefox is the second most popular dog breed.</td>\n",
       "      <td>[[[49438, 58766, Firefox, 13]]]</td>\n",
       "      <td>[(Firefox, 13)]</td>\n",
       "      <td>[(Firefox, 13, , Firefox has between 9 % and 1...</td>\n",
       "      <td>True</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19993</th>\n",
       "      <td>8538</td>\n",
       "      <td>VERIFIABLE</td>\n",
       "      <td>REFUTES</td>\n",
       "      <td>Hermit crabs are arachnids.</td>\n",
       "      <td>[[[15450, 19262, Hermit_crab, 0], [15450, 1926...</td>\n",
       "      <td>[(Hermit_crab, 0), (Decapoda, 0)]</td>\n",
       "      <td>[(Hermit_crab, 0, Hermit crabs are decapod cru...</td>\n",
       "      <td>True</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19994</th>\n",
       "      <td>145641</td>\n",
       "      <td>VERIFIABLE</td>\n",
       "      <td>REFUTES</td>\n",
       "      <td>Michael Hutchence died on a boat.</td>\n",
       "      <td>[[[168967, 182663, Michael_Hutchence, 15]]]</td>\n",
       "      <td>[(Michael_Hutchence, 15)]</td>\n",
       "      <td>[(Michael_Hutchence, 15, On the morning of 22 ...</td>\n",
       "      <td>True</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19995</th>\n",
       "      <td>87517</td>\n",
       "      <td>VERIFIABLE</td>\n",
       "      <td>SUPPORTS</td>\n",
       "      <td>The Cyclades are located to the southeast of G...</td>\n",
       "      <td>[[[104709, 118125, Cyclades, 0]]]</td>\n",
       "      <td>[(Cyclades, 0)]</td>\n",
       "      <td>[(Cyclades, 0, The Cyclades -LRB- -LSB- ˈsɪklə...</td>\n",
       "      <td>True</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19997</th>\n",
       "      <td>81957</td>\n",
       "      <td>VERIFIABLE</td>\n",
       "      <td>REFUTES</td>\n",
       "      <td>Trouble with the Curve is a television show.</td>\n",
       "      <td>[[[99015, 112132, Trouble_with_the_Curve, 0]],...</td>\n",
       "      <td>[(Trouble_with_the_Curve, 0)]</td>\n",
       "      <td>[(Trouble_with_the_Curve, 0, Trouble with the ...</td>\n",
       "      <td>True</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>8905 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           id  verifiable     label  \\\n",
       "6612   183614  VERIFIABLE   REFUTES   \n",
       "6613   186920  VERIFIABLE  SUPPORTS   \n",
       "6614    76611  VERIFIABLE   REFUTES   \n",
       "6615   121723  VERIFIABLE   REFUTES   \n",
       "6616    78846  VERIFIABLE   REFUTES   \n",
       "...       ...         ...       ...   \n",
       "19992   33339  VERIFIABLE   REFUTES   \n",
       "19993    8538  VERIFIABLE   REFUTES   \n",
       "19994  145641  VERIFIABLE   REFUTES   \n",
       "19995   87517  VERIFIABLE  SUPPORTS   \n",
       "19997   81957  VERIFIABLE   REFUTES   \n",
       "\n",
       "                                                   claim  \\\n",
       "6612   Finding Dory was directed by someone who is ba...   \n",
       "6613                Cher was released by Geffen Records.   \n",
       "6614           Harvard University is barely residential.   \n",
       "6615   Qui-Gon Jinn is a character in the Avengers fr...   \n",
       "6616                  Daggering originated in Australia.   \n",
       "...                                                  ...   \n",
       "19992      Firefox is the second most popular dog breed.   \n",
       "19993                        Hermit crabs are arachnids.   \n",
       "19994                  Michael Hutchence died on a boat.   \n",
       "19995  The Cyclades are located to the southeast of G...   \n",
       "19997       Trouble with the Curve is a television show.   \n",
       "\n",
       "                                                evidence  \\\n",
       "6612   [[[213723, 220883, Finding_Dory, 1], [213723, ...   \n",
       "6613   [[[218086, 224474, Cher_-LRB-1987_album-RRB-, ...   \n",
       "6614         [[[93481, 106435, Harvard_University, 15]]]   \n",
       "6615               [[[142921, 158037, Qui-Gon_Jinn, 0]]]   \n",
       "6616                   [[[95808, 108866, Daggering, 0]]]   \n",
       "...                                                  ...   \n",
       "19992                    [[[49438, 58766, Firefox, 13]]]   \n",
       "19993  [[[15450, 19262, Hermit_crab, 0], [15450, 1926...   \n",
       "19994        [[[168967, 182663, Michael_Hutchence, 15]]]   \n",
       "19995                  [[[104709, 118125, Cyclades, 0]]]   \n",
       "19997  [[[99015, 112132, Trouble_with_the_Curve, 0]],...   \n",
       "\n",
       "                                unique_evidence  \\\n",
       "6612   [(Andrew_Stanton, 0), (Finding_Dory, 1)]   \n",
       "6613           [(Cher_-LRB-1987_album-RRB-, 0)]   \n",
       "6614                 [(Harvard_University, 15)]   \n",
       "6615                        [(Qui-Gon_Jinn, 0)]   \n",
       "6616                           [(Daggering, 0)]   \n",
       "...                                         ...   \n",
       "19992                           [(Firefox, 13)]   \n",
       "19993         [(Hermit_crab, 0), (Decapoda, 0)]   \n",
       "19994                 [(Michael_Hutchence, 15)]   \n",
       "19995                           [(Cyclades, 0)]   \n",
       "19997             [(Trouble_with_the_Curve, 0)]   \n",
       "\n",
       "                                      evidence_sentences  claim_in_bigbench  \\\n",
       "6612   [(Andrew_Stanton, 0, Andrew Stanton -LRB- born...              False   \n",
       "6613   [(Cher_-LRB-1987_album-RRB-, 0, Cher is the se...              False   \n",
       "6614   [(Harvard_University, 15, Harvard is a large ,...              False   \n",
       "6615   [(Qui-Gon_Jinn, 0, Qui-Gon Jinn is a fictional...              False   \n",
       "6616   [(Daggering, 0, Daggering is a form of dance o...              False   \n",
       "...                                                  ...                ...   \n",
       "19992  [(Firefox, 13, , Firefox has between 9 % and 1...               True   \n",
       "19993  [(Hermit_crab, 0, Hermit crabs are decapod cru...               True   \n",
       "19994  [(Michael_Hutchence, 15, On the morning of 22 ...               True   \n",
       "19995  [(Cyclades, 0, The Cyclades -LRB- -LSB- ˈsɪklə...               True   \n",
       "19997  [(Trouble_with_the_Curve, 0, Trouble with the ...               True   \n",
       "\n",
       "       evidence_sentence_count  \n",
       "6612                         2  \n",
       "6613                         1  \n",
       "6614                         1  \n",
       "6615                         1  \n",
       "6616                         1  \n",
       "...                        ...  \n",
       "19992                        1  \n",
       "19993                        2  \n",
       "19994                        1  \n",
       "19995                        1  \n",
       "19997                        1  \n",
       "\n",
       "[8905 rows x 9 columns]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "train_df = df[(~df.index.isin(bigbench.index)) & (df[\"evidence_sentence_count\"] > 0)]\n",
    "train_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bigbench = bigbench.set_index(\"id\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df.set_index(\"id\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_df = df[\n",
    "    (df.index.isin(bigbench.index)) & (df[\"evidence_sentence_count\"] > 0)\n",
    "].drop(columns=[\"verifiable\", \"claim_in_bigbench\", \"evidence\"])\n",
    "test_df[\"unique_evidence\"] = test_df[\"unique_evidence\"].map(\n",
    "    lambda x: [[str(title), str(sent_id)] for title, sent_id in x]\n",
    ")\n",
    "test_df[\"evidence_sentences\"] = test_df[\"evidence_sentences\"].map(\n",
    "    lambda x: [[str(title), str(sent_id), str(sent)] for title, sent_id, sent in x]\n",
    ")\n",
    "test_df[\"label\"] = test_df[\"label\"] == \"SUPPORTS\"\n",
    "test_df.index = test_df.index.astype(pd.StringDtype())\n",
    "test_df.claim = test_df.claim.astype(pd.StringDtype())\n",
    "test_df[\"id\"] = test_df.index\n",
    "test_df.to_json(\"fever_test_df.json\", orient=\"records\", lines=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_df = (\n",
    "    df[(~df.index.isin(bigbench.index)) & (df[\"evidence_sentence_count\"] > 0)]\n",
    "    .drop(columns=[\"verifiable\", \"claim_in_bigbench\", \"evidence\"])\n",
    ")\n",
    "train_df[\"unique_evidence\"] = train_df[\"unique_evidence\"].map(lambda x: [ [str(title), str(sent_id)] for title,sent_id in x ])\n",
    "train_df[\"evidence_sentences\"] = train_df[\"evidence_sentences\"].map(lambda x: [ [str(title), str(sent_id), str(sent)] for title,sent_id,sent in x ])\n",
    "train_df[\"label\"] = train_df[\"label\"] == \"SUPPORTS\"\n",
    "train_df.index = train_df.index.astype(pd.StringDtype())\n",
    "train_df.claim = train_df.claim.astype(pd.StringDtype())\n",
    "train_df[\"id\"] = train_df.index\n",
    "train_df.to_json(\"fever_train_df.json\", orient='records', lines=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5b2d3dac30234790867be48d3e4e0cb2",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating train split: 0 examples [00:00, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "841e389c45f24649b5e01a5a91ecf643",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating test split: 0 examples [00:00, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "df3068d7dcac401ebd76abddb6fd4e00",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Saving the dataset (0/1 shards):   0%|          | 0/6720 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "014c42504c4e41ce8e4a1ad0af70b505",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Saving the dataset (0/1 shards):   0%|          | 0/6612 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "DatasetDict({\n",
       "    train: Dataset({\n",
       "        features: ['label', 'claim', 'unique_evidence', 'evidence_sentences', 'evidence_sentence_count', 'id'],\n",
       "        num_rows: 6720\n",
       "    })\n",
       "    test: Dataset({\n",
       "        features: ['label', 'claim', 'unique_evidence', 'evidence_sentences', 'evidence_sentence_count', 'id'],\n",
       "        num_rows: 6612\n",
       "    })\n",
       "})"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "fever_train_ds = load_dataset(\"json\", data_files={\"train\": \"fever_train_df.json\", \"test\": \"fever_test_df.json\"})\n",
    "fever_train_ds.save_to_disk(\"var/fever_reprocessed\")\n",
    "fever_train_ds"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# End"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Redo based off original FEVER"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def clean_fever(text) -> str:\n",
    "    mapping = {\"_\": \" \",\n",
    "               \"-LRB- \": \"(\",\n",
    "               \" -RRB-\": \")\",\n",
    "               \"-LSB- \": \"[\",\n",
    "               \" -RSB-\": \"]\",\n",
    "               \"-LRB-\": \"(\",\n",
    "               \"-RRB-\": \")\",\n",
    "               \"-LSB-\": \"[\",\n",
    "               \"-RSB-\": \"]\",\n",
    "               \"-COLON-\": \":\"\n",
    "               }\n",
    "\n",
    "    for k,v in mapping.items():\n",
    "        text = text.replace(k, v)\n",
    "\n",
    "    return text.strip()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatasetDict({\n",
       "    train: Dataset({\n",
       "        features: ['label', 'claim', 'unique_evidence', 'evidence_sentences', 'evidence_sentence_count', 'id'],\n",
       "        num_rows: 6720\n",
       "    })\n",
       "    test: Dataset({\n",
       "        features: ['label', 'claim', 'unique_evidence', 'evidence_sentences', 'evidence_sentence_count', 'id'],\n",
       "        num_rows: 6612\n",
       "    })\n",
       "})"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fever = load_from_disk(\"var/fever_reprocessed\")\n",
    "fever"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "7608"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(list(flatten([ [y[0] for y in x] for x in fever[\"train\"][\"unique_evidence\"] ])))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1043"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "articles = list(set(flatten([ [y[0] for y in x] for x in fever[\"train\"][\"unique_evidence\"] ])))\n",
    "len(articles)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Dataset({\n",
       "    features: ['article'],\n",
       "    num_rows: 1043\n",
       "})"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "article_ds = Dataset.from_dict({\"article\": articles})\n",
    "article_ds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6f14fe64eb684be5920e03d49e9c7497",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Map:   0%|          | 0/1043 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>article</th>\n",
       "      <th>wiki</th>\n",
       "      <th>msg</th>\n",
       "      <th>cleaned</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1031</th>\n",
       "      <td>Trollhunters</td>\n",
       "      <td>\"Trollhunters\" may refer to one of ['Tales of ...</td>\n",
       "      <td>disambg</td>\n",
       "      <td>Trollhunters</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           article                                               wiki  \\\n",
       "1031  Trollhunters  \"Trollhunters\" may refer to one of ['Tales of ...   \n",
       "\n",
       "          msg       cleaned  \n",
       "1031  disambg  Trollhunters  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def searcher(row, auto_suggest: bool):\n",
    "    cleaned = clean_fever(row[\"article\"])\n",
    "    if \"msg\" in row:\n",
    "        if row[\"msg\"] != \"success\":\n",
    "            wiki, msg = search_new(cleaned, auto_suggest=auto_suggest, redirect=True)\n",
    "        else:\n",
    "            wiki = row[\"wiki\"]\n",
    "            msg = row[\"msg\"]\n",
    "    else:\n",
    "        wiki, msg = search_new(cleaned, auto_suggest=auto_suggest, redirect=True)\n",
    "    return {\"wiki\": wiki, \"msg\": msg, \"cleaned\": cleaned}\n",
    "article_ds = article_ds.map(lambda x: searcher(x, True), num_proc=4)\n",
    "article_ds = article_ds.map(lambda x: searcher(x, False), num_proc=1)\n",
    "article_df = article_ds.to_pandas()\n",
    "article_df[article_df.msg != \"success\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c660eeff42454ffd9e77849d9a45bbba",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Saving the dataset (0/1 shards):   0%|          | 0/1043 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "article_ds.save_to_disk(\"var/fever_articles_1_fail\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>wiki</th>\n",
       "      <th>msg</th>\n",
       "      <th>cleaned</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>article</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Trollhunters</th>\n",
       "      <td>\"Trollhunters\" may refer to one of ['Tales of ...</td>\n",
       "      <td>disambg</td>\n",
       "      <td>Trollhunters</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                           wiki      msg  \\\n",
       "article                                                                    \n",
       "Trollhunters  \"Trollhunters\" may refer to one of ['Tales of ...  disambg   \n",
       "\n",
       "                   cleaned  \n",
       "article                     \n",
       "Trollhunters  Trollhunters  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "article_ds = load_from_disk(\"var/fever_articles_1_fail\")\n",
    "article_df = article_ds.to_pandas().set_index(\"article\")\n",
    "article_df[article_df.msg != \"success\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "wiki       Psych is an American detective comedy-drama te...\n",
       "msg                                                  success\n",
       "cleaned                                                Psych\n",
       "Name: Psych, dtype: object"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "article_df.loc[\"Psych\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def search(query):\n",
    "    row = article_df.loc[query]\n",
    "    return row[\"wiki\"], row[\"msg\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "ename": "PageError",
     "evalue": "Page id \"troll hunters\" does not match any pages. Try another id!",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mPageError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[23], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m article_df[article_df\u001b[38;5;241m.\u001b[39mmsg \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msuccess\u001b[39m\u001b[38;5;124m\"\u001b[39m][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcleaned\u001b[39m\u001b[38;5;124m\"\u001b[39m]:\n\u001b[0;32m----> 2\u001b[0m     \u001b[38;5;28mprint\u001b[39m(i, \u001b[43mwikipedia\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msummary\u001b[49m\u001b[43m(\u001b[49m\u001b[43mi\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mredirect\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m)\n",
      "File \u001b[0;32m~/.conda/envs/pdlnew/lib/python3.12/site-packages/wikipedia/util.py:28\u001b[0m, in \u001b[0;36mcache.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m     26\u001b[0m   ret \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_cache[key]\n\u001b[1;32m     27\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m---> 28\u001b[0m   ret \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_cache[key] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     30\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ret\n",
      "File \u001b[0;32m~/.conda/envs/pdlnew/lib/python3.12/site-packages/wikipedia/wikipedia.py:231\u001b[0m, in \u001b[0;36msummary\u001b[0;34m(title, sentences, chars, auto_suggest, redirect)\u001b[0m\n\u001b[1;32m    216\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m'''\u001b[39;00m\n\u001b[1;32m    217\u001b[0m \u001b[38;5;124;03mPlain text summary of the page.\u001b[39;00m\n\u001b[1;32m    218\u001b[0m \n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    226\u001b[0m \u001b[38;5;124;03m* redirect - allow redirection without raising RedirectError\u001b[39;00m\n\u001b[1;32m    227\u001b[0m \u001b[38;5;124;03m'''\u001b[39;00m\n\u001b[1;32m    229\u001b[0m \u001b[38;5;66;03m# use auto_suggest and redirect to get the correct article\u001b[39;00m\n\u001b[1;32m    230\u001b[0m \u001b[38;5;66;03m# also, use page's error checking to raise DisambiguationError if necessary\u001b[39;00m\n\u001b[0;32m--> 231\u001b[0m page_info \u001b[38;5;241m=\u001b[39m \u001b[43mpage\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtitle\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mauto_suggest\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mauto_suggest\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mredirect\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mredirect\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    232\u001b[0m title \u001b[38;5;241m=\u001b[39m page_info\u001b[38;5;241m.\u001b[39mtitle\n\u001b[1;32m    233\u001b[0m pageid \u001b[38;5;241m=\u001b[39m page_info\u001b[38;5;241m.\u001b[39mpageid\n",
      "File \u001b[0;32m~/.conda/envs/pdlnew/lib/python3.12/site-packages/wikipedia/wikipedia.py:276\u001b[0m, in \u001b[0;36mpage\u001b[0;34m(title, pageid, auto_suggest, redirect, preload)\u001b[0m\n\u001b[1;32m    273\u001b[0m     \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mIndexError\u001b[39;00m:\n\u001b[1;32m    274\u001b[0m       \u001b[38;5;66;03m# if there is no suggestion or search results, the page doesn't exist\u001b[39;00m\n\u001b[1;32m    275\u001b[0m       \u001b[38;5;28;01mraise\u001b[39;00m PageError(title)\n\u001b[0;32m--> 276\u001b[0m   \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mWikipediaPage\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtitle\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mredirect\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mredirect\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpreload\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpreload\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    277\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m pageid \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    278\u001b[0m   \u001b[38;5;28;01mreturn\u001b[39;00m WikipediaPage(pageid\u001b[38;5;241m=\u001b[39mpageid, preload\u001b[38;5;241m=\u001b[39mpreload)\n",
      "File \u001b[0;32m~/.conda/envs/pdlnew/lib/python3.12/site-packages/wikipedia/wikipedia.py:299\u001b[0m, in \u001b[0;36mWikipediaPage.__init__\u001b[0;34m(self, title, pageid, redirect, preload, original_title)\u001b[0m\n\u001b[1;32m    296\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    297\u001b[0m   \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mEither a title or a pageid must be specified\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 299\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__load\u001b[49m\u001b[43m(\u001b[49m\u001b[43mredirect\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mredirect\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpreload\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpreload\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    301\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m preload:\n\u001b[1;32m    302\u001b[0m   \u001b[38;5;28;01mfor\u001b[39;00m prop \u001b[38;5;129;01min\u001b[39;00m (\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcontent\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124msummary\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mimages\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mreferences\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlinks\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124msections\u001b[39m\u001b[38;5;124m'\u001b[39m):\n",
      "File \u001b[0;32m~/.conda/envs/pdlnew/lib/python3.12/site-packages/wikipedia/wikipedia.py:345\u001b[0m, in \u001b[0;36mWikipediaPage.__load\u001b[0;34m(self, redirect, preload)\u001b[0m\n\u001b[1;32m    343\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmissing\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01min\u001b[39;00m page:\n\u001b[1;32m    344\u001b[0m   \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtitle\u001b[39m\u001b[38;5;124m'\u001b[39m):\n\u001b[0;32m--> 345\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m PageError(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtitle)\n\u001b[1;32m    346\u001b[0m   \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    347\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m PageError(pageid\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpageid)\n",
      "\u001b[0;31mPageError\u001b[0m: Page id \"troll hunters\" does not match any pages. Try another id!"
     ]
    }
   ],
   "source": [
    "for i in article_df[article_df.msg != \"success\"][\"cleaned\"]:\n",
    "    print(i, wikipedia.summary(i, redirect=True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def trajectorize(row):\n",
    "    evidence_sentences = row[\"evidence_sentences\"]\n",
    "\n",
    "    claim = row[\"claim\"].strip()\n",
    "    task = f\"On June 2017, the following claim was made: {claim}\\nQ: Was this claim true or false?\"\n",
    "    answer = str(row[\"label\"]).lower()\n",
    "\n",
    "    article_sentence_group = { k:list(v) for k,v in groupby(evidence_sentences, lambda x: x[0]) }\n",
    "\n",
    "    articles = {}\n",
    "    statuses = []\n",
    "    wiki_worked = True\n",
    "    for article in article_sentence_group:\n",
    "        cleaned_article = clean_fever(article)\n",
    "        wiki, worked = search(article)\n",
    "        wiki = wiki.strip()\n",
    "        if worked != \"success\":\n",
    "            wiki_worked = False\n",
    "            # if article in wiki_pages_df.index:\n",
    "            #     # lines = wiki_pages_df.loc[article][\"lines_split\"]\n",
    "            #     # lines = [ clean_fever(l.split('\\t')[0]) for l in lines ]\n",
    "            #     # wiki = \"\\n\".join(lines).strip().replace(\"\\n\", \" \")\n",
    "            #     wiki = wiki_pages_df.loc[article][\"text\"]\n",
    "\n",
    "            #     worked = \"fallback\"\n",
    "            # else:\n",
    "            #     worked = \"fallback_fail\"\n",
    "\n",
    "        articles[cleaned_article] = wiki\n",
    "        statuses.append(worked)\n",
    "    all_wiki_success = all(x in [\"success\",\"fallback\"] for x in statuses)\n",
    "\n",
    "    trajectory = [\n",
    "        {\"task\": task}]\n",
    "\n",
    "    for article,evidences in article_sentence_group.items():\n",
    "        cleaned_article = clean_fever(article)\n",
    "        trajectory.extend([\n",
    "        {\"thought\": f\"I need to search {cleaned_article}.\"},\n",
    "        {\"action\": '{\"name\": \"Search\", \"arguments\": {\"topic\": \"' + cleaned_article + '\"}}'}, #f\"Search[{cleaned_article}]\"},\n",
    "        {\"observation\": f\"[Document]\\n{articles[cleaned_article]}\\n[End]\"}])\n",
    "\n",
    "        for title,line,sent in evidences:\n",
    "            trajectory.append(\n",
    "        {\"observation\": clean_fever(sent.split('\\t')[0])})\n",
    "\n",
    "    trajectory.extend([\n",
    "        {\"thought\": f\"The claim is {answer}.\"},\n",
    "        {\"action\": '{\"name\": \"Finish\", \"arguments\": {\"topic\": \"' + answer + '\"}}'},#f\"Finish[{answer}]\"},\n",
    "    ])\n",
    "\n",
    "    traj_keys = [next(iter(t.keys())) for t in trajectory]\n",
    "    traj_values = [next(iter(t.values())) for t in trajectory]\n",
    "\n",
    "    rewoo_trajectory = [\n",
    "        {\"task\": task}]\n",
    "\n",
    "    for article,evidences in article_sentence_group.items():\n",
    "        cleaned_article = clean_fever(article)\n",
    "        rewoo_trajectory.extend([\n",
    "        {\"thought\": f\"Search for more information about {cleaned_article}.\"},\n",
    "        {\"action\": '{\"name\": \"Search\", \"arguments\": {\"topic\": \"' + cleaned_article + '\"}}'},#f\"Search[{cleaned_article}]\"},\n",
    "        {\"observation\": f\"[Document]\\n{articles[cleaned_article]}\\n[End]\"}])\n",
    "\n",
    "        for title,line,sent in evidences:\n",
    "            rewoo_trajectory.append(\n",
    "        {\"observation\": clean_fever(sent.split('\\t')[0])})\n",
    "\n",
    "    rewoo_traj_keys = [next(iter(t.keys())) for t in rewoo_trajectory]\n",
    "    rewoo_traj_values = [next(iter(t.values())) for t in rewoo_trajectory]\n",
    "\n",
    "    return {\n",
    "        \"traj_keys\": traj_keys,\n",
    "        \"traj_values\": traj_values,\n",
    "        \"rewoo_traj_keys\": rewoo_traj_keys,\n",
    "        \"rewoo_traj_values\": rewoo_traj_values,\n",
    "        \"all_wiki_success\": all_wiki_success,\n",
    "        \"wiki_worked\": wiki_worked,\n",
    "        \"articles\": list(articles.values()),\n",
    "        \"statuses\": statuses\n",
    "    }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "def sentencify(row):\n",
    "    evidence_sentences = row[\"evidence_sentences\"]\n",
    "\n",
    "    article_sentence_group = { clean_fever(k):list(v) for k,v in groupby(evidence_sentences, lambda x: x[0]) }\n",
    "\n",
    "    sentences = []\n",
    "    for article,evidences in article_sentence_group.items():\n",
    "        for title,line,sent in evidences:\n",
    "            sentences.append(clean_fever(sent.split('\\t')[0]))\n",
    "\n",
    "    return {\n",
    "        \"cot\": \" \".join(sentences).strip().replace(\"\\n\", \" \")\n",
    "    }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4cc1981e073c4361a859ab69122fda7f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Map (num_proc=4):   0%|          | 0/6720 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "afec2d36d85544d394cc0fa0d75830db",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Map (num_proc=4):   0%|          | 0/6720 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "fever[\"train\"] = (\n",
    "    fever[\"train\"]\n",
    "    # .select(range(500))\n",
    "    .map(trajectorize, num_proc=4)\n",
    "    .map(sentencify, num_proc=4)\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "612b8bb361844b74bc2dde0171455a1d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Saving the dataset (0/1 shards):   0%|          | 0/6720 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f0e7ed962ad64d3296a4aa6eb2e4493e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Saving the dataset (0/1 shards):   0%|          | 0/6612 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "fever.save_to_disk(\"var/fever_augmented_nowikipages_json\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "cba79e673ee740c38e449db1374d500c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Saving the dataset (0/1 shards):   0%|          | 0/5696 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c48f17084af64ebeb1d0332f0cdfa581",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Saving the dataset (0/1 shards):   0%|          | 0/6612 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7a8934d960864c039a408da5ab7e83aa",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Saving the dataset (0/1 shards):   0%|          | 0/1024 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "fever_augmented_nowikipages_json = load_from_disk(\"var/fever_augmented_nowikipages_json\")\n",
    "new_split = fever_augmented_nowikipages_json[\"train\"].train_test_split(test_size=1024)\n",
    "fever_augmented_nowikipages_json[\"train\"] = new_split[\"train\"]\n",
    "fever_augmented_nowikipages_json[\"validation\"] = new_split[\"test\"]\n",
    "fever_augmented_nowikipages_json.save_to_disk(\"var/fever_augmented_nowikipages_json_val\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "wiki_reworked = fever[\"train\"].filter(lambda x: x[\"wiki_worked\"] == False).map(trajectorize, num_proc=8)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f0262a3d010b4fd68b7f9cb3fdc570f2",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Filter:   0%|          | 0/6720 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "failed = fever[\"train\"].filter(lambda x: x[\"wiki_worked\"] == False).to_pandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>claim</th>\n",
       "      <th>unique_evidence</th>\n",
       "      <th>evidence_sentences</th>\n",
       "      <th>evidence_sentence_count</th>\n",
       "      <th>id</th>\n",
       "      <th>traj_keys</th>\n",
       "      <th>traj_values</th>\n",
       "      <th>rewoo_traj_keys</th>\n",
       "      <th>rewoo_traj_values</th>\n",
       "      <th>all_wiki_success</th>\n",
       "      <th>wiki_worked</th>\n",
       "      <th>articles</th>\n",
       "      <th>statuses</th>\n",
       "      <th>cot</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>True</td>\n",
       "      <td>Trollhunters was produced by an animation comp...</td>\n",
       "      <td>[[Trollhunters, 0]]</td>\n",
       "      <td>[[Trollhunters, 0, Trollhunters is an American...</td>\n",
       "      <td>1</td>\n",
       "      <td>60685</td>\n",
       "      <td>[task, thought, action, observation, observati...</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>[task, thought, action, observation, observation]</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>[\"Trollhunters\" may refer to one of ['Tales of...</td>\n",
       "      <td>[disambg]</td>\n",
       "      <td>Trollhunters is an American computer-animated ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>True</td>\n",
       "      <td>DreamWorks Animation produced Trollhunters.</td>\n",
       "      <td>[[Trollhunters, 0]]</td>\n",
       "      <td>[[Trollhunters, 0, Trollhunters is an American...</td>\n",
       "      <td>1</td>\n",
       "      <td>46475</td>\n",
       "      <td>[task, thought, action, observation, observati...</td>\n",
       "      <td>[On June 2017, the following claim was made: D...</td>\n",
       "      <td>[task, thought, action, observation, observation]</td>\n",
       "      <td>[On June 2017, the following claim was made: D...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>[\"Trollhunters\" may refer to one of ['Tales of...</td>\n",
       "      <td>[disambg]</td>\n",
       "      <td>Trollhunters is an American computer-animated ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>True</td>\n",
       "      <td>Trollhunters is computer-animated.</td>\n",
       "      <td>[[Trollhunters, 0]]</td>\n",
       "      <td>[[Trollhunters, 0, Trollhunters is an American...</td>\n",
       "      <td>1</td>\n",
       "      <td>53330</td>\n",
       "      <td>[task, thought, action, observation, observati...</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>[task, thought, action, observation, observation]</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>[\"Trollhunters\" may refer to one of ['Tales of...</td>\n",
       "      <td>[disambg]</td>\n",
       "      <td>Trollhunters is an American computer-animated ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>True</td>\n",
       "      <td>Trollhunters is a television series.</td>\n",
       "      <td>[[Trollhunters, 0]]</td>\n",
       "      <td>[[Trollhunters, 0, Trollhunters is an American...</td>\n",
       "      <td>1</td>\n",
       "      <td>18523</td>\n",
       "      <td>[task, thought, action, observation, observati...</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>[task, thought, action, observation, observation]</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>[\"Trollhunters\" may refer to one of ['Tales of...</td>\n",
       "      <td>[disambg]</td>\n",
       "      <td>Trollhunters is an American computer-animated ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>False</td>\n",
       "      <td>Trollhunters was only produced by Donald Trump.</td>\n",
       "      <td>[[Trollhunters, 0]]</td>\n",
       "      <td>[[Trollhunters, 0, Trollhunters is an American...</td>\n",
       "      <td>1</td>\n",
       "      <td>28716</td>\n",
       "      <td>[task, thought, action, observation, observati...</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>[task, thought, action, observation, observation]</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>[\"Trollhunters\" may refer to one of ['Tales of...</td>\n",
       "      <td>[disambg]</td>\n",
       "      <td>Trollhunters is an American computer-animated ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>True</td>\n",
       "      <td>Netflix is who Trollhunters was created for.</td>\n",
       "      <td>[[Trollhunters, 0]]</td>\n",
       "      <td>[[Trollhunters, 0, Trollhunters is an American...</td>\n",
       "      <td>1</td>\n",
       "      <td>82973</td>\n",
       "      <td>[task, thought, action, observation, observati...</td>\n",
       "      <td>[On June 2017, the following claim was made: N...</td>\n",
       "      <td>[task, thought, action, observation, observation]</td>\n",
       "      <td>[On June 2017, the following claim was made: N...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>[\"Trollhunters\" may refer to one of ['Tales of...</td>\n",
       "      <td>[disambg]</td>\n",
       "      <td>Trollhunters is an American computer-animated ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>False</td>\n",
       "      <td>Trollhunters was only created for Hulu.</td>\n",
       "      <td>[[Trollhunters, 0]]</td>\n",
       "      <td>[[Trollhunters, 0, Trollhunters is an American...</td>\n",
       "      <td>1</td>\n",
       "      <td>32923</td>\n",
       "      <td>[task, thought, action, observation, observati...</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>[task, thought, action, observation, observation]</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>[\"Trollhunters\" may refer to one of ['Tales of...</td>\n",
       "      <td>[disambg]</td>\n",
       "      <td>Trollhunters is an American computer-animated ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>False</td>\n",
       "      <td>Trollhunters was created by a horse.</td>\n",
       "      <td>[[Trollhunters, 0], [Guillermo_del_Toro, 0]]</td>\n",
       "      <td>[[Trollhunters, 0, Trollhunters is an American...</td>\n",
       "      <td>2</td>\n",
       "      <td>101434</td>\n",
       "      <td>[task, thought, action, observation, observati...</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>[task, thought, action, observation, observati...</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>[\"Trollhunters\" may refer to one of ['Tales of...</td>\n",
       "      <td>[disambg, success]</td>\n",
       "      <td>Trollhunters is an American computer-animated ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>False</td>\n",
       "      <td>Trollhunters is only hand-drawn.</td>\n",
       "      <td>[[Trollhunters, 0]]</td>\n",
       "      <td>[[Trollhunters, 0, Trollhunters is an American...</td>\n",
       "      <td>1</td>\n",
       "      <td>115863</td>\n",
       "      <td>[task, thought, action, observation, observati...</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>[task, thought, action, observation, observation]</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>[\"Trollhunters\" may refer to one of ['Tales of...</td>\n",
       "      <td>[disambg]</td>\n",
       "      <td>Trollhunters is an American computer-animated ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>False</td>\n",
       "      <td>Trollhunters was produced by Adam Sandler.</td>\n",
       "      <td>[[Trollhunters, 0]]</td>\n",
       "      <td>[[Trollhunters, 0, Trollhunters is an American...</td>\n",
       "      <td>1</td>\n",
       "      <td>68828</td>\n",
       "      <td>[task, thought, action, observation, observati...</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>[task, thought, action, observation, observation]</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>[\"Trollhunters\" may refer to one of ['Tales of...</td>\n",
       "      <td>[disambg]</td>\n",
       "      <td>Trollhunters is an American computer-animated ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>True</td>\n",
       "      <td>Trollhunters was created by a person.</td>\n",
       "      <td>[[Trollhunters, 0], [Guillermo_del_Toro, 0]]</td>\n",
       "      <td>[[Trollhunters, 0, Trollhunters is an American...</td>\n",
       "      <td>2</td>\n",
       "      <td>10041</td>\n",
       "      <td>[task, thought, action, observation, observati...</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>[task, thought, action, observation, observati...</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>[\"Trollhunters\" may refer to one of ['Tales of...</td>\n",
       "      <td>[disambg, success]</td>\n",
       "      <td>Trollhunters is an American computer-animated ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>False</td>\n",
       "      <td>Trollhunters is 3 years old.</td>\n",
       "      <td>[[Trollhunters, 3]]</td>\n",
       "      <td>[[Trollhunters, 3, The first two episodes of t...</td>\n",
       "      <td>1</td>\n",
       "      <td>53331</td>\n",
       "      <td>[task, thought, action, observation, observati...</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>[task, thought, action, observation, observation]</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>[\"Trollhunters\" may refer to one of ['Tales of...</td>\n",
       "      <td>[disambg]</td>\n",
       "      <td>The first two episodes of the series premiered...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>False</td>\n",
       "      <td>Trollhunters is a squid.</td>\n",
       "      <td>[[Trollhunters, 0]]</td>\n",
       "      <td>[[Trollhunters, 0, Trollhunters is an American...</td>\n",
       "      <td>1</td>\n",
       "      <td>150096</td>\n",
       "      <td>[task, thought, action, observation, observati...</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>[task, thought, action, observation, observation]</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>[\"Trollhunters\" may refer to one of ['Tales of...</td>\n",
       "      <td>[disambg]</td>\n",
       "      <td>Trollhunters is an American computer-animated ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>False</td>\n",
       "      <td>Trollhunters is a podcast series.</td>\n",
       "      <td>[[Trollhunters, 0]]</td>\n",
       "      <td>[[Trollhunters, 0, Trollhunters is an American...</td>\n",
       "      <td>1</td>\n",
       "      <td>18524</td>\n",
       "      <td>[task, thought, action, observation, observati...</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>[task, thought, action, observation, observation]</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>[\"Trollhunters\" may refer to one of ['Tales of...</td>\n",
       "      <td>[disambg]</td>\n",
       "      <td>Trollhunters is an American computer-animated ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>False</td>\n",
       "      <td>Trollhunters is only a film.</td>\n",
       "      <td>[[Trollhunters, 0]]</td>\n",
       "      <td>[[Trollhunters, 0, Trollhunters is an American...</td>\n",
       "      <td>1</td>\n",
       "      <td>124363</td>\n",
       "      <td>[task, thought, action, observation, observati...</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>[task, thought, action, observation, observation]</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>[\"Trollhunters\" may refer to one of ['Tales of...</td>\n",
       "      <td>[disambg]</td>\n",
       "      <td>Trollhunters is an American computer-animated ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>False</td>\n",
       "      <td>Trollhunters was created by Ron Howard.</td>\n",
       "      <td>[[Trollhunters, 0]]</td>\n",
       "      <td>[[Trollhunters, 0, Trollhunters is an American...</td>\n",
       "      <td>1</td>\n",
       "      <td>41935</td>\n",
       "      <td>[task, thought, action, observation, observati...</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>[task, thought, action, observation, observation]</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>[\"Trollhunters\" may refer to one of ['Tales of...</td>\n",
       "      <td>[disambg]</td>\n",
       "      <td>Trollhunters is an American computer-animated ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>True</td>\n",
       "      <td>Trollhunters is animated.</td>\n",
       "      <td>[[Trollhunters, 0]]</td>\n",
       "      <td>[[Trollhunters, 0, Trollhunters is an American...</td>\n",
       "      <td>1</td>\n",
       "      <td>126466</td>\n",
       "      <td>[task, thought, action, observation, observati...</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>[task, thought, action, observation, observation]</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>[\"Trollhunters\" may refer to one of ['Tales of...</td>\n",
       "      <td>[disambg]</td>\n",
       "      <td>Trollhunters is an American computer-animated ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>False</td>\n",
       "      <td>Trollhunters was created for HBO.</td>\n",
       "      <td>[[Trollhunters, 0]]</td>\n",
       "      <td>[[Trollhunters, 0, Trollhunters is an American...</td>\n",
       "      <td>1</td>\n",
       "      <td>141476</td>\n",
       "      <td>[task, thought, action, observation, observati...</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>[task, thought, action, observation, observation]</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>[\"Trollhunters\" may refer to one of ['Tales of...</td>\n",
       "      <td>[disambg]</td>\n",
       "      <td>Trollhunters is an American computer-animated ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>False</td>\n",
       "      <td>Trollhunters was created for cats.</td>\n",
       "      <td>[[Trollhunters, 0]]</td>\n",
       "      <td>[[Trollhunters, 0, Trollhunters is an American...</td>\n",
       "      <td>1</td>\n",
       "      <td>43378</td>\n",
       "      <td>[task, thought, action, observation, observati...</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>[task, thought, action, observation, observation]</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>[\"Trollhunters\" may refer to one of ['Tales of...</td>\n",
       "      <td>[disambg]</td>\n",
       "      <td>Trollhunters is an American computer-animated ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>True</td>\n",
       "      <td>Trollhunters was created by Guillermo del Toro.</td>\n",
       "      <td>[[Trollhunters, 0]]</td>\n",
       "      <td>[[Trollhunters, 0, Trollhunters is an American...</td>\n",
       "      <td>1</td>\n",
       "      <td>10040</td>\n",
       "      <td>[task, thought, action, observation, observati...</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>[task, thought, action, observation, observation]</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>[\"Trollhunters\" may refer to one of ['Tales of...</td>\n",
       "      <td>[disambg]</td>\n",
       "      <td>Trollhunters is an American computer-animated ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>False</td>\n",
       "      <td>Trollhunters was only created by Adam Sandler.</td>\n",
       "      <td>[[Trollhunters, 0]]</td>\n",
       "      <td>[[Trollhunters, 0, Trollhunters is an American...</td>\n",
       "      <td>1</td>\n",
       "      <td>58707</td>\n",
       "      <td>[task, thought, action, observation, observati...</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>[task, thought, action, observation, observation]</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>[\"Trollhunters\" may refer to one of ['Tales of...</td>\n",
       "      <td>[disambg]</td>\n",
       "      <td>Trollhunters is an American computer-animated ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>True</td>\n",
       "      <td>Trollhunters is computer-animated and of the f...</td>\n",
       "      <td>[[Trollhunters, 0]]</td>\n",
       "      <td>[[Trollhunters, 0, Trollhunters is an American...</td>\n",
       "      <td>1</td>\n",
       "      <td>116890</td>\n",
       "      <td>[task, thought, action, observation, observati...</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>[task, thought, action, observation, observation]</td>\n",
       "      <td>[On June 2017, the following claim was made: T...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>[\"Trollhunters\" may refer to one of ['Tales of...</td>\n",
       "      <td>[disambg]</td>\n",
       "      <td>Trollhunters is an American computer-animated ...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    label                                              claim  \\\n",
       "0    True  Trollhunters was produced by an animation comp...   \n",
       "1    True        DreamWorks Animation produced Trollhunters.   \n",
       "2    True                 Trollhunters is computer-animated.   \n",
       "3    True               Trollhunters is a television series.   \n",
       "4   False    Trollhunters was only produced by Donald Trump.   \n",
       "5    True       Netflix is who Trollhunters was created for.   \n",
       "6   False            Trollhunters was only created for Hulu.   \n",
       "7   False               Trollhunters was created by a horse.   \n",
       "8   False                   Trollhunters is only hand-drawn.   \n",
       "9   False         Trollhunters was produced by Adam Sandler.   \n",
       "10   True              Trollhunters was created by a person.   \n",
       "11  False                       Trollhunters is 3 years old.   \n",
       "12  False                           Trollhunters is a squid.   \n",
       "13  False                  Trollhunters is a podcast series.   \n",
       "14  False                       Trollhunters is only a film.   \n",
       "15  False            Trollhunters was created by Ron Howard.   \n",
       "16   True                          Trollhunters is animated.   \n",
       "17  False                  Trollhunters was created for HBO.   \n",
       "18  False                 Trollhunters was created for cats.   \n",
       "19   True    Trollhunters was created by Guillermo del Toro.   \n",
       "20  False     Trollhunters was only created by Adam Sandler.   \n",
       "21   True  Trollhunters is computer-animated and of the f...   \n",
       "\n",
       "                                 unique_evidence  \\\n",
       "0                            [[Trollhunters, 0]]   \n",
       "1                            [[Trollhunters, 0]]   \n",
       "2                            [[Trollhunters, 0]]   \n",
       "3                            [[Trollhunters, 0]]   \n",
       "4                            [[Trollhunters, 0]]   \n",
       "5                            [[Trollhunters, 0]]   \n",
       "6                            [[Trollhunters, 0]]   \n",
       "7   [[Trollhunters, 0], [Guillermo_del_Toro, 0]]   \n",
       "8                            [[Trollhunters, 0]]   \n",
       "9                            [[Trollhunters, 0]]   \n",
       "10  [[Trollhunters, 0], [Guillermo_del_Toro, 0]]   \n",
       "11                           [[Trollhunters, 3]]   \n",
       "12                           [[Trollhunters, 0]]   \n",
       "13                           [[Trollhunters, 0]]   \n",
       "14                           [[Trollhunters, 0]]   \n",
       "15                           [[Trollhunters, 0]]   \n",
       "16                           [[Trollhunters, 0]]   \n",
       "17                           [[Trollhunters, 0]]   \n",
       "18                           [[Trollhunters, 0]]   \n",
       "19                           [[Trollhunters, 0]]   \n",
       "20                           [[Trollhunters, 0]]   \n",
       "21                           [[Trollhunters, 0]]   \n",
       "\n",
       "                                   evidence_sentences  \\\n",
       "0   [[Trollhunters, 0, Trollhunters is an American...   \n",
       "1   [[Trollhunters, 0, Trollhunters is an American...   \n",
       "2   [[Trollhunters, 0, Trollhunters is an American...   \n",
       "3   [[Trollhunters, 0, Trollhunters is an American...   \n",
       "4   [[Trollhunters, 0, Trollhunters is an American...   \n",
       "5   [[Trollhunters, 0, Trollhunters is an American...   \n",
       "6   [[Trollhunters, 0, Trollhunters is an American...   \n",
       "7   [[Trollhunters, 0, Trollhunters is an American...   \n",
       "8   [[Trollhunters, 0, Trollhunters is an American...   \n",
       "9   [[Trollhunters, 0, Trollhunters is an American...   \n",
       "10  [[Trollhunters, 0, Trollhunters is an American...   \n",
       "11  [[Trollhunters, 3, The first two episodes of t...   \n",
       "12  [[Trollhunters, 0, Trollhunters is an American...   \n",
       "13  [[Trollhunters, 0, Trollhunters is an American...   \n",
       "14  [[Trollhunters, 0, Trollhunters is an American...   \n",
       "15  [[Trollhunters, 0, Trollhunters is an American...   \n",
       "16  [[Trollhunters, 0, Trollhunters is an American...   \n",
       "17  [[Trollhunters, 0, Trollhunters is an American...   \n",
       "18  [[Trollhunters, 0, Trollhunters is an American...   \n",
       "19  [[Trollhunters, 0, Trollhunters is an American...   \n",
       "20  [[Trollhunters, 0, Trollhunters is an American...   \n",
       "21  [[Trollhunters, 0, Trollhunters is an American...   \n",
       "\n",
       "    evidence_sentence_count      id  \\\n",
       "0                         1   60685   \n",
       "1                         1   46475   \n",
       "2                         1   53330   \n",
       "3                         1   18523   \n",
       "4                         1   28716   \n",
       "5                         1   82973   \n",
       "6                         1   32923   \n",
       "7                         2  101434   \n",
       "8                         1  115863   \n",
       "9                         1   68828   \n",
       "10                        2   10041   \n",
       "11                        1   53331   \n",
       "12                        1  150096   \n",
       "13                        1   18524   \n",
       "14                        1  124363   \n",
       "15                        1   41935   \n",
       "16                        1  126466   \n",
       "17                        1  141476   \n",
       "18                        1   43378   \n",
       "19                        1   10040   \n",
       "20                        1   58707   \n",
       "21                        1  116890   \n",
       "\n",
       "                                            traj_keys  \\\n",
       "0   [task, thought, action, observation, observati...   \n",
       "1   [task, thought, action, observation, observati...   \n",
       "2   [task, thought, action, observation, observati...   \n",
       "3   [task, thought, action, observation, observati...   \n",
       "4   [task, thought, action, observation, observati...   \n",
       "5   [task, thought, action, observation, observati...   \n",
       "6   [task, thought, action, observation, observati...   \n",
       "7   [task, thought, action, observation, observati...   \n",
       "8   [task, thought, action, observation, observati...   \n",
       "9   [task, thought, action, observation, observati...   \n",
       "10  [task, thought, action, observation, observati...   \n",
       "11  [task, thought, action, observation, observati...   \n",
       "12  [task, thought, action, observation, observati...   \n",
       "13  [task, thought, action, observation, observati...   \n",
       "14  [task, thought, action, observation, observati...   \n",
       "15  [task, thought, action, observation, observati...   \n",
       "16  [task, thought, action, observation, observati...   \n",
       "17  [task, thought, action, observation, observati...   \n",
       "18  [task, thought, action, observation, observati...   \n",
       "19  [task, thought, action, observation, observati...   \n",
       "20  [task, thought, action, observation, observati...   \n",
       "21  [task, thought, action, observation, observati...   \n",
       "\n",
       "                                          traj_values  \\\n",
       "0   [On June 2017, the following claim was made: T...   \n",
       "1   [On June 2017, the following claim was made: D...   \n",
       "2   [On June 2017, the following claim was made: T...   \n",
       "3   [On June 2017, the following claim was made: T...   \n",
       "4   [On June 2017, the following claim was made: T...   \n",
       "5   [On June 2017, the following claim was made: N...   \n",
       "6   [On June 2017, the following claim was made: T...   \n",
       "7   [On June 2017, the following claim was made: T...   \n",
       "8   [On June 2017, the following claim was made: T...   \n",
       "9   [On June 2017, the following claim was made: T...   \n",
       "10  [On June 2017, the following claim was made: T...   \n",
       "11  [On June 2017, the following claim was made: T...   \n",
       "12  [On June 2017, the following claim was made: T...   \n",
       "13  [On June 2017, the following claim was made: T...   \n",
       "14  [On June 2017, the following claim was made: T...   \n",
       "15  [On June 2017, the following claim was made: T...   \n",
       "16  [On June 2017, the following claim was made: T...   \n",
       "17  [On June 2017, the following claim was made: T...   \n",
       "18  [On June 2017, the following claim was made: T...   \n",
       "19  [On June 2017, the following claim was made: T...   \n",
       "20  [On June 2017, the following claim was made: T...   \n",
       "21  [On June 2017, the following claim was made: T...   \n",
       "\n",
       "                                      rewoo_traj_keys  \\\n",
       "0   [task, thought, action, observation, observation]   \n",
       "1   [task, thought, action, observation, observation]   \n",
       "2   [task, thought, action, observation, observation]   \n",
       "3   [task, thought, action, observation, observation]   \n",
       "4   [task, thought, action, observation, observation]   \n",
       "5   [task, thought, action, observation, observation]   \n",
       "6   [task, thought, action, observation, observation]   \n",
       "7   [task, thought, action, observation, observati...   \n",
       "8   [task, thought, action, observation, observation]   \n",
       "9   [task, thought, action, observation, observation]   \n",
       "10  [task, thought, action, observation, observati...   \n",
       "11  [task, thought, action, observation, observation]   \n",
       "12  [task, thought, action, observation, observation]   \n",
       "13  [task, thought, action, observation, observation]   \n",
       "14  [task, thought, action, observation, observation]   \n",
       "15  [task, thought, action, observation, observation]   \n",
       "16  [task, thought, action, observation, observation]   \n",
       "17  [task, thought, action, observation, observation]   \n",
       "18  [task, thought, action, observation, observation]   \n",
       "19  [task, thought, action, observation, observation]   \n",
       "20  [task, thought, action, observation, observation]   \n",
       "21  [task, thought, action, observation, observation]   \n",
       "\n",
       "                                    rewoo_traj_values  all_wiki_success  \\\n",
       "0   [On June 2017, the following claim was made: T...             False   \n",
       "1   [On June 2017, the following claim was made: D...             False   \n",
       "2   [On June 2017, the following claim was made: T...             False   \n",
       "3   [On June 2017, the following claim was made: T...             False   \n",
       "4   [On June 2017, the following claim was made: T...             False   \n",
       "5   [On June 2017, the following claim was made: N...             False   \n",
       "6   [On June 2017, the following claim was made: T...             False   \n",
       "7   [On June 2017, the following claim was made: T...             False   \n",
       "8   [On June 2017, the following claim was made: T...             False   \n",
       "9   [On June 2017, the following claim was made: T...             False   \n",
       "10  [On June 2017, the following claim was made: T...             False   \n",
       "11  [On June 2017, the following claim was made: T...             False   \n",
       "12  [On June 2017, the following claim was made: T...             False   \n",
       "13  [On June 2017, the following claim was made: T...             False   \n",
       "14  [On June 2017, the following claim was made: T...             False   \n",
       "15  [On June 2017, the following claim was made: T...             False   \n",
       "16  [On June 2017, the following claim was made: T...             False   \n",
       "17  [On June 2017, the following claim was made: T...             False   \n",
       "18  [On June 2017, the following claim was made: T...             False   \n",
       "19  [On June 2017, the following claim was made: T...             False   \n",
       "20  [On June 2017, the following claim was made: T...             False   \n",
       "21  [On June 2017, the following claim was made: T...             False   \n",
       "\n",
       "    wiki_worked                                           articles  \\\n",
       "0         False  [\"Trollhunters\" may refer to one of ['Tales of...   \n",
       "1         False  [\"Trollhunters\" may refer to one of ['Tales of...   \n",
       "2         False  [\"Trollhunters\" may refer to one of ['Tales of...   \n",
       "3         False  [\"Trollhunters\" may refer to one of ['Tales of...   \n",
       "4         False  [\"Trollhunters\" may refer to one of ['Tales of...   \n",
       "5         False  [\"Trollhunters\" may refer to one of ['Tales of...   \n",
       "6         False  [\"Trollhunters\" may refer to one of ['Tales of...   \n",
       "7         False  [\"Trollhunters\" may refer to one of ['Tales of...   \n",
       "8         False  [\"Trollhunters\" may refer to one of ['Tales of...   \n",
       "9         False  [\"Trollhunters\" may refer to one of ['Tales of...   \n",
       "10        False  [\"Trollhunters\" may refer to one of ['Tales of...   \n",
       "11        False  [\"Trollhunters\" may refer to one of ['Tales of...   \n",
       "12        False  [\"Trollhunters\" may refer to one of ['Tales of...   \n",
       "13        False  [\"Trollhunters\" may refer to one of ['Tales of...   \n",
       "14        False  [\"Trollhunters\" may refer to one of ['Tales of...   \n",
       "15        False  [\"Trollhunters\" may refer to one of ['Tales of...   \n",
       "16        False  [\"Trollhunters\" may refer to one of ['Tales of...   \n",
       "17        False  [\"Trollhunters\" may refer to one of ['Tales of...   \n",
       "18        False  [\"Trollhunters\" may refer to one of ['Tales of...   \n",
       "19        False  [\"Trollhunters\" may refer to one of ['Tales of...   \n",
       "20        False  [\"Trollhunters\" may refer to one of ['Tales of...   \n",
       "21        False  [\"Trollhunters\" may refer to one of ['Tales of...   \n",
       "\n",
       "              statuses                                                cot  \n",
       "0            [disambg]  Trollhunters is an American computer-animated ...  \n",
       "1            [disambg]  Trollhunters is an American computer-animated ...  \n",
       "2            [disambg]  Trollhunters is an American computer-animated ...  \n",
       "3            [disambg]  Trollhunters is an American computer-animated ...  \n",
       "4            [disambg]  Trollhunters is an American computer-animated ...  \n",
       "5            [disambg]  Trollhunters is an American computer-animated ...  \n",
       "6            [disambg]  Trollhunters is an American computer-animated ...  \n",
       "7   [disambg, success]  Trollhunters is an American computer-animated ...  \n",
       "8            [disambg]  Trollhunters is an American computer-animated ...  \n",
       "9            [disambg]  Trollhunters is an American computer-animated ...  \n",
       "10  [disambg, success]  Trollhunters is an American computer-animated ...  \n",
       "11           [disambg]  The first two episodes of the series premiered...  \n",
       "12           [disambg]  Trollhunters is an American computer-animated ...  \n",
       "13           [disambg]  Trollhunters is an American computer-animated ...  \n",
       "14           [disambg]  Trollhunters is an American computer-animated ...  \n",
       "15           [disambg]  Trollhunters is an American computer-animated ...  \n",
       "16           [disambg]  Trollhunters is an American computer-animated ...  \n",
       "17           [disambg]  Trollhunters is an American computer-animated ...  \n",
       "18           [disambg]  Trollhunters is an American computer-animated ...  \n",
       "19           [disambg]  Trollhunters is an American computer-animated ...  \n",
       "20           [disambg]  Trollhunters is an American computer-animated ...  \n",
       "21           [disambg]  Trollhunters is an American computer-animated ...  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "failed"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## End redo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "badapple = ds[\"train_react_wiki_positive\"].filter(lambda x: \"Bronson (film) is a fictionalized crime film based on someone's life\" in x[\"claim\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "badapple[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sum(map(int, ds[\"train_react\"][\"wiki_worked\"]))/len(ds[\"train_react\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ds[\"paper_test_bigbench\"] = ds[\"paper_test\"].filter(lambda x: x[\"in_bigbench\"] == True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ds.save_to_disk(\"var/fever_agentic\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_ds = DatasetDict()\n",
    "for k,v in ds_c.items():\n",
    "    if len(v) > 0:\n",
    "        new_ds[k] = v\n",
    "new_ds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_ds_bb = DatasetDict()\n",
    "for k,v in new_ds.items():\n",
    "    new_ds_bb[f\"{k}_bigbench\"] = v.filter(), num_proc=32)\n",
    "    new_ds_bb[f\"{k}_not_bigbench\"] = v.filter(lambda x: all(x[\"claim\"] not in y[\"input\"] for y in js[\"examples\"]), num_proc=32)\n",
    "new_ds_bb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ref_claim = new_ds.filter(lambda x: any(x[\"claim\"] in y[\"input\"] for y in js[\"examples\"]), num_proc=32)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_ds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ref_claim"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [],
   "source": [
    "ex = search(\"spain\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ex.args"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def rewoo_trajectory(row):\n",
    "    claim = row[\"claim\"]\n",
    "    task = f\"On June 2017, the following claim was made: {claim}\\nQ: Was this claim true or false?\"\n",
    "    answer = row[\"bool_label\"]\n",
    "    wiki, worked = search(row[\"wikipedia_title\"])\n",
    "    trajectory = [\n",
    "        {\"task\": task.strip()},\n",
    "        {\"thought\": f\"Search for more information about {row['wikipedia_title']}.\"},\n",
    "        {\"action\": f\"Search[{row['wikipedia_title']}]\"},\n",
    "        {\"observation\": f\"[Document]\\n{wiki}\\n[End]\"},\n",
    "    ]\n",
    "\n",
    "    traj_keys = [next(iter(t.keys())) for t in trajectory]\n",
    "    traj_values = [next(iter(t.values())) for t in trajectory]\n",
    "\n",
    "    return {\n",
    "            \"rewoo_traj_keys\": traj_keys,\n",
    "            \"rewoo_traj_values\": traj_values,\n",
    "        }\n",
    "\n",
    "ref_claim_react = ref_claim_react.map(rewoo_trajectory).filter(lambda x: x[\"wiki_worked\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ref_claim_react"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import random\n",
    "\n",
    "\n",
    "ref_claim_t[\"test\"][random.randint(0,4)][\"claim\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ref_claim_t = DatasetDict()\n",
    "ref_claim_t[\"train\"] = ref_claim_react\n",
    "ref_claim_t[\"test\"] = new_ds[\"paper_dev\"]\n",
    "ref_claim_t.save_to_disk(\"var/ref_claim_fever_react\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ref_claim.save_to_disk(\"var/ref_claim_fever\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ds_c"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for k,v in ds.items():\n",
    "    print(set(v[\"label\"]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# ds_f = ds.filter(lambda x: \"Ukrainian Soviet Socialist Republic\" in x[\"claim\"] )\n",
    "ds_f = ds.filter(lambda x: x[\"label\"] in [\"SUPPORTS\", \"REFUTES\"] )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ds_f"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ds_f[\"labelled_dev\"][0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "for row in ds[\"train\"]:\n",
    "    if \"Ukrainian Soviet Socialist Republic\" in row[\"claim\"]:\n",
    "        print(row)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(js[\"examples\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "js[\"examples\"][0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from datasets import Dataset\n",
    "fever_orig = Dataset.from_json(\"shared_task_dev.jsonl\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fever_train_ds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fever_train_ds[\"train\"][\"evidence_sentences\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_df = train_df.astype(\n",
    "    {\"verifiable\": }\n",
    ")\n",
    "train_df.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[(df.verifiable != \"VERIFIABLE\") & (df[\"claim_in_bigbench\"] == True)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [],
   "source": [
    "wiki_pages_df.index = wiki_pages_df.index.str.encode('utf-8').str.decode('utf-8')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "wiki_pages_df[wiki_pages_df.index.str.contains(\"Sim.*n_Bol.*var\") ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"Simón_Bolívar\".encode())\n",
    "print(\"Simón_Bolívar\".encode())\n",
    "print(unicodedata.normalize('NFD', \"Simón_Bolívar\").encode())\n",
    "print(unicodedata.normalize('NFD', \"Simón_Bolívar\").encode())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "wiki_pages_df.loc[\"Simón_Bolívar\".encode('utf-8').decode('utf-8')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "wiki_pages_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"ev_len1\"] = df[\"unique_evidence\"].map(len)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"ev_len1\"].max()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.iloc[19993]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.iloc[19993]['evidence']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "hc = wikipages[\"train\"].filter(lambda x: x[\"id\"] == \"Hermit_crab\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "hc_lines = hc[0][\"lines\"]\n",
    "hc_lines"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "list(filter(lambda x: x != '', re.split(r\"\\d+\\t\", hc_lines)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt = {\"webact_simple3\": \"\\nDetermine if there is Observation that SUPPORTS or REFUTES a Claim, or if there is NOT ENOUGH INFORMATION. \\nClaim: Nikolaj Coster-Waldau worked with the Fox Broadcasting Company.\\nAction 1: Search[Nikolaj Coster-Waldau]\\nObservation 1: Nikolaj William Coster-Waldau (born 27 July 1970) is a Danish actor and producer. He graduated from the Danish National School of Performing Arts in Copenhagen in 1993,[1] and had his breakthrough role in Denmark with the film Nightwatch (1994). He played Jaime Lannister in the HBO fantasy drama series Game of Thrones, for which he received two Primetime Emmy Award nominations for Outstanding Supporting Actor in a Drama Series.. Coster-Waldau has appeared in numerous films in his native Denmark and Scandinavia, including Headhunters (2011) and A Thousand Times Good Night (2013). In the U.S, his debut film role was in the war film Black Hawk Down (2001), playing Medal of Honor recipient Gary Gordon.[2] He then played a detective in the short-lived Fox television series New Amsterdam (2008), and appeared in the 2009 Fox television film Virtuality, originally intended as a pilot.\\nAction 2: Finish[SUPPORTS]\\n\\nClaim: Stranger Things is set in Bloomington, Indiana.\\nAction 1: Search[Stranger Things]\\nObservation 1: Stranger Things is an American science fiction horror drama television series created by the Duffer Brothers. Set in the 1980s, primarily in the fictional town of Hawkins, Indiana, the series centers on a number of mysteries and supernatural events occurring around the town and their impact on an ensemble of child and adult characters. \\nAction 2: Finish[REFUTES]\\n\\nClaim: Beautiful reached number two on the Billboard Hot 100 in 2003.?\\nAction 1: Search[Beautiful]\\nObservation 1: Could not find [Beautiful]. Similar: ['Beautiful', 'Beautiful, Beautiful', 'A Beautiful Mind (film)', 'Beautiful (Christina Aguilera song)', 'Life Is Beautiful'].\\nAction 2: Search[Beautiful (Christina Aguilera song)]\\nObservation 2: \\\"Beautiful\\\" is a song recorded by American singer Christina Aguilera for her fourth studio album, Stripped (2002).\\nAction 3: Lookup[Billboard Hot 100]\\nObservation 3: (Result 1 / 3) The song peaked at number two on the Billboard Hot 100 in the United States, where it was certified Gold for 500,000 units shipped.\\nAction 4: Finish[NOT ENOUGH INFO]\\n\\n\", \"cotqa_simple3\": \"Determine if there is Observation that SUPPORTS or REFUTES a Claim, or if there is NOT ENOUGH INFORMATION. \\nClaim: Nikolaj Coster-Waldau worked with the Fox Broadcasting Company.\\nThought: Nikolaj William Coster-Waldau appeared in the 2009 Fox television film Virtuality, so he has worked with the Fox Broadcasting Company.\\nAnswer: SUPPORTS\\n\\nClaim: Stranger Things is set in Bloomington, Indiana.\\nThought: Stranger Things is in the fictional town of Hawkins, Indiana, not in Bloomington, Indiana.\\nAnswer:REFUTES\\n\\nClaim: Beautiful reached number two on the Billboard Hot 100 in 2003.?\\nThought: The song peaked at number two on the Billboard Hot 100 in the United States, but not sure if it was in 2003.\\nAnswer: NOT ENOUGH INFO\\n\", \"webqa_simple3\": \"Determine if there is Observation that SUPPORTS or REFUTES a Claim, or if there is NOT ENOUGH INFORMATION. \\nClaim: Nikolaj Coster-Waldau worked with the Fox Broadcasting Company.\\nAnswer: SUPPORTS\\n\\nClaim: Stranger Things is set in Bloomington, Indiana.\\nAnswer:REFUTES\\n\\nClaim: Beautiful reached number two on the Billboard Hot 100 in 2003.?\\nAnswer: NOT ENOUGH INFO\\n\", \"webthink_simple3\": \"\\nDetermine if there is Observation that SUPPORTS or REFUTES a Claim, or if there is NOT ENOUGH INFORMATION. \\nClaim: Nikolaj Coster-Waldau worked with the Fox Broadcasting Company.\\nThought 1: I need to search Nikolaj Coster-Waldau and find if he has worked with the Fox Broadcasting Company.\\nAction 1: Search[Nikolaj Coster-Waldau]\\nObservation 1: Nikolaj William Coster-Waldau (born 27 July 1970) is a Danish actor and producer. He graduated from the Danish National School of Performing Arts in Copenhagen in 1993,[1] and had his breakthrough role in Denmark with the film Nightwatch (1994). He played Jaime Lannister in the HBO fantasy drama series Game of Thrones, for which he received two Primetime Emmy Award nominations for Outstanding Supporting Actor in a Drama Series.. Coster-Waldau has appeared in numerous films in his native Denmark and Scandinavia, including Headhunters (2011) and A Thousand Times Good Night (2013). In the U.S, his debut film role was in the war film Black Hawk Down (2001), playing Medal of Honor recipient Gary Gordon.[2] He then played a detective in the short-lived Fox television series New Amsterdam (2008), and appeared in the 2009 Fox television film Virtuality, originally intended as a pilot.\\nThought 2: Because he \\\"appeared in the 2009 Fox television film Virtuality\\\", he should have worked with the Fox Broadcasting Company.\\nAction 2: Finish[SUPPORTS]\\n\\nClaim: Stranger Things is set in Bloomington, Indiana.\\nThought 1: I should search for Stranger Things, and see if it is set in Bloomington, Indiana.\\nAction 1: Search[Stranger Things]\\nObservation 1: Stranger Things is an American science fiction horror drama television series created by the Duffer Brothers. Set in the 1980s, primarily in the fictional town of Hawkins, Indiana, the series centers on a number of mysteries and supernatural events occurring around the town and their impact on an ensemble of child and adult characters. \\nThought 2: The observation says that it is set in a \\\"fictional town of Hawkins, Indiana\\\", so it is not set in Bloomington.\\nAction 2: Finish[REFUTES]\\n\\nClaim: Beautiful reached number two on the Billboard Hot 100 in 2003.?\\nThought 1: I need to search the song Beautiful and find if it reached number two on the Billboard Hot 100 in 2003.\\nAction 1: Search[Beautiful]\\nObservation 1: Could not find [Beautiful]. Similar: ['Beautiful', 'Beautiful, Beautiful', 'A Beautiful Mind (film)', 'Beautiful (Christina Aguilera song)', 'Life Is Beautiful'].\\nThought 2: From suggestions, I should search \\\"Beautiful (Christina Aguilera song)\\\" to find the song.\\nAction 2: Search[Beautiful (Christina Aguilera song)]\\nObservation 2: \\\"Beautiful\\\" is a song recorded by American singer Christina Aguilera for her fourth studio album, Stripped (2002).\\nThought 3: It does not mention Billboard, so I need to look up \\\"Billboard Hot 100\\\" to find if it reached number two on it in 2003.\\nAction 3: Lookup[Billboard Hot 100]\\nObservation 3: (Result 1 / 3) The song peaked at number two on the Billboard Hot 100 in the United States, where it was certified Gold for 500,000 units shipped.\\nThought 4: It only says the song peaked at number two on the Billboard Hot 100, but not if it was in 2003. I am not sure if this claim is true or not.\\nAction 4: Finish[NOT ENOUGH INFO]\\n\\n\"}\n",
    "pprint(prompt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "react = {\"webact_simple3\": \"\\nDetermine if there is Observation that SUPPORTS or REFUTES a Claim, or if there is NOT ENOUGH INFORMATION. \\nClaim: Nikolaj Coster-Waldau worked with the Fox Broadcasting Company.\\nAction 1: Search[Nikolaj Coster-Waldau]\\nObservation 1: Nikolaj William Coster-Waldau (born 27 July 1970) is a Danish actor and producer. He graduated from the Danish National School of Performing Arts in Copenhagen in 1993,[1] and had his breakthrough role in Denmark with the film Nightwatch (1994). He played Jaime Lannister in the HBO fantasy drama series Game of Thrones, for which he received two Primetime Emmy Award nominations for Outstanding Supporting Actor in a Drama Series.. Coster-Waldau has appeared in numerous films in his native Denmark and Scandinavia, including Headhunters (2011) and A Thousand Times Good Night (2013). In the U.S, his debut film role was in the war film Black Hawk Down (2001), playing Medal of Honor recipient Gary Gordon.[2] He then played a detective in the short-lived Fox television series New Amsterdam (2008), and appeared in the 2009 Fox television film Virtuality, originally intended as a pilot.\\nAction 2: Finish[SUPPORTS]\\n\\nClaim: Stranger Things is set in Bloomington, Indiana.\\nAction 1: Search[Stranger Things]\\nObservation 1: Stranger Things is an American science fiction horror drama television series created by the Duffer Brothers. Set in the 1980s, primarily in the fictional town of Hawkins, Indiana, the series centers on a number of mysteries and supernatural events occurring around the town and their impact on an ensemble of child and adult characters. \\nAction 2: Finish[REFUTES]\\n\\nClaim: Beautiful reached number two on the Billboard Hot 100 in 2003.?\\nAction 1: Search[Beautiful]\\nObservation 1: Could not find [Beautiful]. Similar: ['Beautiful', 'Beautiful, Beautiful', 'A Beautiful Mind (film)', 'Beautiful (Christina Aguilera song)', 'Life Is Beautiful'].\\nAction 2: Search[Beautiful (Christina Aguilera song)]\\nObservation 2: \\\"Beautiful\\\" is a song recorded by American singer Christina Aguilera for her fourth studio album, Stripped (2002).\\nAction 3: Lookup[Billboard Hot 100]\\nObservation 3: (Result 1 / 3) The song peaked at number two on the Billboard Hot 100 in the United States, where it was certified Gold for 500,000 units shipped.\\nAction 4: Finish[NOT ENOUGH INFO]\\n\\n\", \"cotqa_simple3\": \"Determine if there is Observation that SUPPORTS or REFUTES a Claim, or if there is NOT ENOUGH INFORMATION. \\nClaim: Nikolaj Coster-Waldau worked with the Fox Broadcasting Company.\\nThought: Nikolaj William Coster-Waldau appeared in the 2009 Fox television film Virtuality, so he has worked with the Fox Broadcasting Company.\\nAnswer: SUPPORTS\\n\\nClaim: Stranger Things is set in Bloomington, Indiana.\\nThought: Stranger Things is in the fictional town of Hawkins, Indiana, not in Bloomington, Indiana.\\nAnswer:REFUTES\\n\\nClaim: Beautiful reached number two on the Billboard Hot 100 in 2003.?\\nThought: The song peaked at number two on the Billboard Hot 100 in the United States, but not sure if it was in 2003.\\nAnswer: NOT ENOUGH INFO\\n\", \"webqa_simple3\": \"Determine if there is Observation that SUPPORTS or REFUTES a Claim, or if there is NOT ENOUGH INFORMATION. \\nClaim: Nikolaj Coster-Waldau worked with the Fox Broadcasting Company.\\nAnswer: SUPPORTS\\n\\nClaim: Stranger Things is set in Bloomington, Indiana.\\nAnswer:REFUTES\\n\\nClaim: Beautiful reached number two on the Billboard Hot 100 in 2003.?\\nAnswer: NOT ENOUGH INFO\\n\", \"webthink_simple3\": \"\\nDetermine if there is Observation that SUPPORTS or REFUTES a Claim, or if there is NOT ENOUGH INFORMATION. \\nClaim: Nikolaj Coster-Waldau worked with the Fox Broadcasting Company.\\nThought 1: I need to search Nikolaj Coster-Waldau and find if he has worked with the Fox Broadcasting Company.\\nAction 1: Search[Nikolaj Coster-Waldau]\\nObservation 1: Nikolaj William Coster-Waldau (born 27 July 1970) is a Danish actor and producer. He graduated from the Danish National School of Performing Arts in Copenhagen in 1993,[1] and had his breakthrough role in Denmark with the film Nightwatch (1994). He played Jaime Lannister in the HBO fantasy drama series Game of Thrones, for which he received two Primetime Emmy Award nominations for Outstanding Supporting Actor in a Drama Series.. Coster-Waldau has appeared in numerous films in his native Denmark and Scandinavia, including Headhunters (2011) and A Thousand Times Good Night (2013). In the U.S, his debut film role was in the war film Black Hawk Down (2001), playing Medal of Honor recipient Gary Gordon.[2] He then played a detective in the short-lived Fox television series New Amsterdam (2008), and appeared in the 2009 Fox television film Virtuality, originally intended as a pilot.\\nThought 2: Because he \\\"appeared in the 2009 Fox television film Virtuality\\\", he should have worked with the Fox Broadcasting Company.\\nAction 2: Finish[SUPPORTS]\\n\\nClaim: Stranger Things is set in Bloomington, Indiana.\\nThought 1: I should search for Stranger Things, and see if it is set in Bloomington, Indiana.\\nAction 1: Search[Stranger Things]\\nObservation 1: Stranger Things is an American science fiction horror drama television series created by the Duffer Brothers. Set in the 1980s, primarily in the fictional town of Hawkins, Indiana, the series centers on a number of mysteries and supernatural events occurring around the town and their impact on an ensemble of child and adult characters. \\nThought 2: The observation says that it is set in a \\\"fictional town of Hawkins, Indiana\\\", so it is not set in Bloomington.\\nAction 2: Finish[REFUTES]\\n\\nClaim: Beautiful reached number two on the Billboard Hot 100 in 2003.?\\nThought 1: I need to search the song Beautiful and find if it reached number two on the Billboard Hot 100 in 2003.\\nAction 1: Search[Beautiful]\\nObservation 1: Could not find [Beautiful]. Similar: ['Beautiful', 'Beautiful, Beautiful', 'A Beautiful Mind (film)', 'Beautiful (Christina Aguilera song)', 'Life Is Beautiful'].\\nThought 2: From suggestions, I should search \\\"Beautiful (Christina Aguilera song)\\\" to find the song.\\nAction 2: Search[Beautiful (Christina Aguilera song)]\\nObservation 2: \\\"Beautiful\\\" is a song recorded by American singer Christina Aguilera for her fourth studio album, Stripped (2002).\\nThought 3: It does not mention Billboard, so I need to look up \\\"Billboard Hot 100\\\" to find if it reached number two on it in 2003.\\nAction 3: Lookup[Billboard Hot 100]\\nObservation 3: (Result 1 / 3) The song peaked at number two on the Billboard Hot 100 in the United States, where it was certified Gold for 500,000 units shipped.\\nThought 4: It only says the song peaked at number two on the Billboard Hot 100, but not if it was in 2003. I am not sure if this claim is true or not.\\nAction 4: Finish[NOT ENOUGH INFO]\\n\\n\"}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pprint(react)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "react"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "react_naive = {\"webthink_simple\": \"\\nQuestion: Musician and satirist Allie Goertz wrote a song about the \\\"The Simpsons\\\" character Milhouse, who Matt Groening named after who?\\nThought 1: I only need to search Milhouse and find who it is named after.\\nAction 1: Search[Milhouse]\\nObservation 1: Milhouse Mussolini Van Houten is a recurring character in the Fox animated television series The Simpsons voiced by Pamela Hayden and created by Matt Groening. Milhouse is Bart Simpson's best friend in Mrs. Krabappel's fourth grade class at Springfield Elementary School. He is an insecure, gullible, and less popular child than Bart who is often led into trouble by Bart, who takes advantage of his friend's na\\u00c3\\u00afvet\\u00c3\\u00a9. Milhouse is a regular target for school bully Nelson Muntz and his friends Jimbo Jones, Dolph Starbeam and Kearney Zzyzwicz. Milhouse has a crush on Bart's sister, Lisa, a common plot element.\\nThought 2: The paragraph does not tell who Milhouse is named after, maybe I can look up \\\"named after\\\".\\nAction 2: Lookup[named after]\\nObservation 2: (Result 1 / 1) Milhouse was designed by Matt Groening for a planned series on NBC, which was abandoned.[4] The design was then used for a Butterfinger commercial, and it was decided to use the character in the series.[5][6] Milhouse was named after U.S. president Richard Nixon, whose middle name was Milhous. The name was the most \\\"unfortunate name Matt Groening could think of for a kid\\\".[1] Years earlier, in a 1986 Life in Hell comic entitled \\\"What to Name the Baby\\\", Groening listed Milhouse as a name \\\"no longer recommended\\\".[7] Milhouse is a favorite among the staff as Al Jean noted \\\"most of the writers are more like Milhouse than Bart\\\".[1] His last name was given to him by Howard Gewirtz, a freelance writer who wrote the episode \\\"Homer Defined\\\". Gewirtz got the name from one of his wife's friends.[8] According to Arden Myrin and Dana Gould (former writer and co-executive producer of The Simpsons), Rob Cohen (Simpsons writer) inspired Milhouse's look.[2]\\nThought 3: Milhouse was named after U.S. president Richard Nixon, so the answer is Richard Nixon.\\nAction 3: Finish[Richard Nixon]\\nQuestion: Guitars for Wounded Warriors is an album that was recorded in the village in which New York county?\\nThought 1: I need to search Guitars for Wounded Warriors, then find in which New York county it was recorded.\\nAction 1: Search[Guitars for Wounded Warriors]\\nObservation 1: Guitars for Wounded Warriors is the sixth solo studio album by guitarist Brian Tarquin, released in August 2014 by Cleopatra Records. In 2016, it received a Best Compilation Album nomination by the Independent Music Awards. All tracks were recorded at Tarquin's Jungle Room Studios in New Paltz (village), New York. Being moved by the lack of support for returning veterans through his life Tarquin decided to compose a heavy instrumental rock album as a way to show his appreciation to all veterans. So once again he enlisted top notch players to guest with him on the album, Billy Sheehan on Black Hawk, Ron \\\"Bumblefoot\\\" Thal and Reb Beach on Taliban Terror, Steve Morse on Freedom, Chris Poland on Charlie Surfs and Hunting, Chuck Loeb on Escape Kabul, Hal Lindes on Sand & Blood, Gary Hoey on 5 Klicks To Hell and Baghdad, Alex De Rosso Dokken on Broken Arrow, and The Flyin' Ryan Brothers on Surgical Strike.  The entire album was engineered, produced and composed by Tarquin especially for each one of the guest guitarists. Partial proceeds are donated to the Fisher House Foundation from sales.\\nThought 2: Guitars for Wounded Warriors was recorded in New Paltz (village), New York. I need to search New Paltz (village), New York and find which county it is in.\\nAction 2: Search[New Paltz (village), New York]\\nObservation 2: New Paltz is a village in Ulster County located in the U.S. state of New York. It is approximately 80 miles (130 km) north of New York City and 70 miles (110 km) south of Albany. The population was 7,324 at the 2020 census.[3]\\nThought 3: New Paltz (village), New York is in Ulster County, so the answer is Ulster County.\\nAction 3: Finish[Ulster County]\\nQuestion: This American actor who starred in \\\"Veep\\\" was also in an action comedy film directed by Seth Rogen and Evan Goldberg heavily inspired by what?\\nThought 1: There could be many American actors who starred in \\\"Veep\\\". So I can first search \\\"directed by Seth Rogen and Evan Goldberg\\\" and find action comedy films directed by Seth Rogen and Evan Goldberg.\\nAction 1: Search[directed by Seth Rogen and Evan Goldberg]\\nObservation 1: Could not find directed by Seth Rogen and Evan Goldberg. Similar: ['The Interview (redirect from The Interview (Seth Rogen film))', 'Evan Goldberg', 'Jay and Seth Versus the Apocalypse', 'This Is the End (category Films directed by Seth Rogen)', 'Seth Rogen filmography', 'Superbad (category Films with screenplays by Seth Rogen)', 'Pineapple Express (film) (category Films with screenplays by Seth Rogen)', 'Sausage Party (category Films produced by Seth Rogen)', 'Untitled Teenage Mutant Ninja Turtles film (category Films produced by Seth Rogen)', 'The Green Hornet (2011 film) (category Films produced by Seth Rogen)', 'Good Boys (film) (category Films produced by Seth Rogen)', 'Seth Rogen', 'Neighbors 2: Sorority Rising (category Films produced by Seth Rogen)', 'An American Pickle (category Films produced by Seth Rogen)', 'The Watch (2012 film) (category Films with screenplays by Seth Rogen)', 'Neighbors (2014 film) (category Films produced by Seth Rogen)', 'The Night Before (2015 film) (category Films produced by Seth Rogen)', 'Martin Starr', 'Preacher (TV series) (category Television shows directed by Steph Green)', 'Long Shot (2019 film) (category Films produced by Seth Rogen)'].\\nThought 2: I can check search The Interview (Seth Rogen film), Jay and Seth Versus the Apocalypse, This Is the End (category Films directed by Seth Rogen) and find which is an action comedy film directed by Seth Rogen and Evan Goldberg.\\nAction 2: Search[The Interview (Seth Rogen film)]\\nObservation 2: The Interview is a 2014 action-adventure comedy film co-produced and directed by Seth Rogen and Evan Goldberg in their second directorial work, following This Is the End (2013). The screenplay was written by Dan Sterling, based on a story he co-wrote with Rogen and Goldberg. The film stars Rogen and James Franco as journalists who set up an interview with North Korean leader Kim Jong-un (Randall Park), and are then recruited by the CIA to assassinate him. The film is inspired by a 2012 Vice documentary.\\nThought 3: The Interview is an action comedy film directed by Seth Rogen and Evan Goldberg, and it is inspired by a 2012 Vice documentary. So I can probably stop now without checking other films, and the answer is probably a 2012 Vice documentary.\\nAction 3: Finish[a 2012 Vice documentary]\\nQuestion: The head coach during the 1997\\u201398 Indiana Pacers season retired as a player from what NBA team?\\nThought 1: I can search Indiana Pacers, find the head coach during the 1997\\u201398 season, then find from what NBA team it is retired as a player.\\nAction 1: Search[Indiana Pacers]\\nObservation 1: The Indiana Pacers are an American professional basketball team based in Indianapolis. The Pacers compete in the National Basketball Association (NBA) as a member of the league's Eastern Conference Central Division. The Pacers were first established in 1967 as a member of the American Basketball Association (ABA) and became a member of the NBA in 1976 as a result of the ABA\\u00e2\\u0080\\u0093NBA merger. They play their home games at Gainbridge Fieldhouse. The team is named after the state of Indiana's history with the Indianapolis 500's pace cars and with the harness racing industry.[8][9]\\nThought 2: I can look up \\\"1997\\u201398\\\" to find the head coach during the 1997\\u201398 season.\\nAction 2: Lookup[1997\\u201398]\\nObservation 2: No more results.\\nThought 3: I can look up \\\"1997\\\" instead to find the head coach during the 1997\\u201398 season\\nAction 3: Lookup[1997]\\nObservation 3: (Result 1 / 5) In the 1997\\u00e2\\u0080\\u009398 NBA season, Indiana native and former Boston Celtics great Larry Bird was hired as head coach. He led the Pacers to a 19-game improvement over the previous season, finishing 58\\u00e2\\u0080\\u009324 \\u00e2\\u0080\\u0093 at the time, the most the franchise had ever won as an NBA team, and tying the 1970\\u00e2\\u0080\\u009371 ABA Pacers for the franchise record. Chris Mullin joined the team in the off-season and immediately became a valuable part of the Pacers lineup\\u00e2\\u0080\\u0094and starting small forward. Assistant coaches Rick Carlisle, in charge of the offense, and Dick Harter, who coached the defense, were key in getting the most out of the Pacers' role players such as Dale Davis, Derrick McKey and a young Antonio Davis. Miller and Rik Smits both made the All-Star team that year, and in the playoffs, the Pacers breezed past the Cleveland Cavaliers and New York Knicks before falling to the Chicago Bulls in a seven-game Eastern Conference Finals.\\nThought 4: In the 1997-98 season, former Boston Celtics great Larry Bird was hired as head coach. So the answer is Boston Celtics.\\nAction 4: Finish[Boston Celtics]\\nQuestion: What profession does Nicholas Ray and Elia Kazan have in common?\\nThought 1: I need to search Nicholas Ray and Elia Kazan, find their professions, then find the profession they have in common.\\nAction 1: Search[Nicholas Ray]\\nObservation 1: Nicholas Ray (born Raymond Nicholas Kienzle Jr., August 7, 1911 \\u2013 June 16, 1979) was an American film director, screenwriter, and actor best known for the 1955 film Rebel Without a Cause. He is appreciated for many narrative features produced between 1947 and 1963 including They Live By Night, In A Lonely Place, Johnny Guitar, and Bigger Than Life, as well as an experimental work produced throughout the 1970s titled We Can't Go Home Again, which was unfinished at the time of Ray's death.\\nThought 2: Professions of Nicholas Ray are director, screenwriter, and actor. I need to search Elia Kazan next and find his professions.\\nAction 2: Search[Elia Kazan]\\nObservation 2: Elia Kazan (/\\u02c8i\\u02d0li\\u0259 k\\u0259\\u02c8z\\u00e6n/;[1][2] born Elias Kazantzoglou (Greek: \\u0397\\u03bb\\u03af\\u03b1\\u03c2 \\u039a\\u03b1\\u03b6\\u03b1\\u03bd\\u03c4\\u03b6\\u03cc\\u03b3\\u03bb\\u03bf\\u03c5);[3] September 7, 1909 \\u2013 September 28, 2003) was an American film and theatre director, producer, screenwriter and actor, described by The New York Times as \\\"one of the most honored and influential directors in Broadway and Hollywood history\\\".[4]\\nThought 3: Professions of Elia Kazan are director, producer, screenwriter, and actor. So profession Nicholas Ray and Elia Kazan have in common is director, screenwriter, and actor.\\nAction 3: Finish[director, screenwriter, actor]\\nQuestion: Which magazine was started first Arthur's Magazine or First for Women?\\nThought 1: I need to search Arthur's Magazine and First for Women, and find which was started first.\\nAction 1: Search[Arthur's Magazine]\\nObservation 1: Arthur's Magazine (1844\\u00e2\\u0080\\u00931846) was an American literary periodical published in Philadelphia in the 19th century. Edited by Timothy Shay Arthur, it featured work by Edgar A. Poe, J.H. Ingraham, Sarah Josepha Hale, Thomas G. Spear, and others.[1][2] In May 1846 it was merged into Godey's Lady's Book.[3]\\nThought 2: Arthur's Magazine was started in 1844. I need to search First for Women next.\\nAction 2: Search[First for Women]\\nObservation 2: First for Women is a woman's magazine published by Bauer Media Group in the USA.[1] The magazine was started in 1989. It is based in Englewood Cliffs, New Jersey.[2] In 2011 the circulation of the magazine was 1,310,696 copies.[3]\\nThought 3: First for Women was started in 1989. 1844 (Arthur's Magazine) < 1989 (First for Women), so Arthur's Magazine was started first.\\nAction 3: Finish[Arthur's Magazine]\\nQuestion: Were Pavel Urysohn and Leonid Levin known for the same type of work?\\nThought 1: I need to search Pavel Urysohn and Leonid Levin, find their types of work, then find if they are the same.\\nAction 1: Search[Pavel Urysohn]\\nObservation 1: Pavel Samuilovich Urysohn (\\u00d0\\u009f\\u00d0\\u00b0\\u00cc\\u0081\\u00d0\\u00b2\\u00d0\\u00b5\\u00d0\\u00bb \\u00d0\\u00a1\\u00d0\\u00b0\\u00d0\\u00bc\\u00d1\\u0083\\u00d0\\u00b8\\u00cc\\u0081\\u00d0\\u00bb\\u00d0\\u00be\\u00d0\\u00b2\\u00d0\\u00b8\\u00d1\\u0087 \\u00d0\\u00a3\\u00d1\\u0080\\u00d1\\u008b\\u00d1\\u0081\\u00d0\\u00be\\u00cc\\u0081\\u00d0\\u00bd) (February 3, 1898 \\u00e2\\u0080\\u0093 August 17, 1924) was a Soviet mathematician who is best known for his contributions in dimension theory, and for developing Urysohn's metrization theorem and Urysohn's lemma, both of which are fundamental results in topology. His name is also commemorated in the terms Urysohn universal space, Fr\\u00c3\\u00a9chet\\u00e2\\u0080\\u0093Urysohn space, Menger\\u00e2\\u0080\\u0093Urysohn dimension and Urysohn integral equation. He and Pavel Alexandrov formulated the modern definition of compactness in 1923.\\nThought 2: Pavel Urysohn is a mathematician. I need to search Leonid Levin next and find its type of work.\\nAction 2: Search[Leonid Levin]\\nObservation 2: Leonid Anatolievich Levin (/le\\u00c9\\u00aa.o\\u00ca\\u008a\\u00cb\\u0088ni\\u00cb\\u0090d \\u00cb\\u0088l\\u00c9\\u009bv\\u00c9\\u00aan/ lay-oh-NEED LEV-in; Russian: \\u00d0\\u009b\\u00d0\\u00b5\\u00d0\\u00be\\u00d0\\u00bd\\u00d0\\u00b8\\u00cc\\u0081\\u00d0\\u00b4 \\u00d0\\u0090\\u00d0\\u00bd\\u00d0\\u00b0\\u00d1\\u0082\\u00d0\\u00be\\u00cc\\u0081\\u00d0\\u00bb\\u00d1\\u008c\\u00d0\\u00b5\\u00d0\\u00b2\\u00d0\\u00b8\\u00d1\\u0087 \\u00d0\\u009b\\u00d0\\u00b5\\u00cc\\u0081\\u00d0\\u00b2\\u00d0\\u00b8\\u00d0\\u00bd; Ukrainian: \\u00d0\\u009b\\u00d0\\u00b5\\u00d0\\u00be\\u00d0\\u00bd\\u00d1\\u0096\\u00cc\\u0081\\u00d0\\u00b4 \\u00d0\\u0090\\u00d0\\u00bd\\u00d0\\u00b0\\u00d1\\u0082\\u00d0\\u00be\\u00cc\\u0081\\u00d0\\u00bb\\u00d1\\u0096\\u00d0\\u00b9\\u00d0\\u00be\\u00d0\\u00b2\\u00d0\\u00b8\\u00d1\\u0087 \\u00d0\\u009b\\u00d0\\u00b5\\u00cc\\u0081\\u00d0\\u00b2\\u00d1\\u0096\\u00d0\\u00bd; born November 2, 1948) is a Soviet-American mathematician and computer scientist. \\nThought 3: Leonid Levin is a mathematician and computer scientist. So Pavel Urysohn and Leonid Levin have the same type of work. \\nAction 3: Finish[yes]\\nQuestion: Which documentary is about Finnish rock groups, Adam Clayton Powell or The Saimaa Gesture?\\nThought 1: I need to search Adam Clayton Powell and The Saimaa Gesture, and find which documentary is about Finnish rock groups.\\nAction 1: Search[Adam Clayton Powell]\\nObservation 1: Could not find [Adam Clayton Powell]. Similar: ['Adam Clayton Powell Jr.', 'Adam Clayton Powell Sr.', 'Adam Clayton Powell IV', 'Adam Clayton Powell III', 'Seventh Avenue (Manhattan) (redirect from Adam Clayton Powell Jr. Boulevard)', 'Adam Clayton Powell Jr. State Office Building', 'Isabel Washington Powell', 'Adam Powell', 'Adam Clayton Powell (film)', 'Giancarlo Esposito', 'Harlem', 'St. Nicholas Historic District', 'Abyssinian Baptist Church', 'Powell (surname)', 'Adam (musical)', 'Statue of Adam Clayton Powell Jr.', 'List of people from Harlem', 'Adam (given name)', 'Five-Percent Nation', 'List of American liberals'].\\nThought 2: I can search Adam Clayton Powell (film) to find the documentary.\\nAction 2: Search[Adam Clayton Powell (film)]\\nObservation 2: Adam Clayton Powell is a 1989 American documentary film directed by Richard Kilberg.\\nThe film is about the rise and fall of influential African-American politician Adam Clayton Powell Jr.[3][4] It was later aired as part of the PBS series The American Experience.\\nThought 3: Adam Clayton Powell (film) is a documentary about an African-American politician, not Finnish rock groups. So the documentary about Finnish rock groups must instead be The Saimaa Gesture.\\nAction 3: Finish[The Saimaa Gesture]\\n\", \"cotqa_simple\": \"\\nQuestion: Musician and satirist Allie Goertz wrote a song about the \\\"The Simpsons\\\" character Milhouse, who Matt Groening named after who?\\nThought: Let's think step by step. Milhouse was named after U.S. president Richard Nixon, so the answer is Richard Nixon.\\nAnswer: Richard Nixon\\nQuestion: Guitars for Wounded Warriors is an album that was recorded in the village in which New York county?\\nThought: Let's think step by step. Guitars for Wounded Warriors was recorded at Tarquin's Jungle Room Studios in New Paltz (village), New York. New Paltz is a village in Ulster County located in the U.S. state of New York. So the answer is Ulster County.\\nAnswer: Ulster County\\nQuestion: This American actor who starred in \\\"Veep\\\" was also in an action comedy film directed by Seth Rogen and Evan Goldberg heavily inspired by what?\\nThought: Let's think step by step. The Interview is an action comedy film directed by Seth Rogen and Evan Goldberg. It is inspired by a 2012 Vice documentary. So the answer is a 2012 Vice documentary.\\nAnswer: a 2012 Vice documentary\\nQuestion: The head coach during the 1997\\u201398 Indiana Pacers season retired as a player from what NBA team?\\nThought: Let's think step by step. The head coach during the 1997\\u201398 Indiana Pacers season was Larry Bird. Larry Bird retired as a player from Boston Celtics. So the answer is Boston Celtics.\\nAnswer: Boston Celtics\\nQuestion: What profession does Nicholas Ray and Elia Kazan have in common?\\nThought: Let's think step by step. Professions of Nicholas Ray are director, screenwriter, and actor. Professions of Elia Kazan are director, producer, screenwriter, and actor. So profession Nicholas Ray and Elia Kazan have in common is director, screenwriter, and actor.\\nAnswer: director, screenwriter, actor\\nQuestion: Which band was founded first, Hole, the rock band that Courtney Love was a frontwoman of, or The Wolfhounds?\\nThought: Let's think step by step. Hole was founded in 1989. The Wolfhounds was founded in 1985. 1989 (Hole) > 1985 (The Wolfhounds), so The Wolfhounds was founded first.\\nAnswer: The Wolfhounds\\nQuestion: Were Pavel Urysohn and Leonid Levin known for the same type of work?\\nThought: Let's think step by step. Pavel Urysohn is a mathematician. Leonid Levin is a mathematician and computer scientist. So Pavel Urysohn and Leonid Levin have the same type of work.\\nAnswer: Yes\\nQuestion: Which documentary is about Finnish rock groups, Adam Clayton Powell or The Saimaa Gesture?\\nThought: Let's think step by step. Adam Clayton Powell (film) is a documentary about an African-American politician, not Finnish rock groups. So the documentary about Finnish rock groups must instead be The Saimaa Gesture.\\nAnswer: The Saimaa Gesture\\n\", \"webqa_simple\": \"\\nQuestion: Musician and satirist Allie Goertz wrote a song about the \\\"The Simpsons\\\" character Milhouse, who Matt Groening named after who?\\nAnswer: Richard Nixon\\nQuestion: Guitars for Wounded Warriors is an album that was recorded in the village in which New York county?\\nAnswer: Ulster County\\nQuestion: This American actor who starred in \\\"Veep\\\" was also in an action comedy film directed by Seth Rogen and Evan Goldberg heavily inspired by what?\\nAnswer: 2012 Vice documentary\\nQuestion: The head coach during the 1997\\u201398 Indiana Pacers season retired as a player from what NBA team?\\nAnswer: Boston Celtics\\nQuestion: What profession does Nicholas Ray and Elia Kazan have in common?\\nAnswer: director, screenwriter, actor\\nQuestion: Which magazine was started first Arthur's Magazine or First for Women?\\nAnswer: Arthur's Magazine\\nQuestion: Were Pavel Urysohn and Leonid Levin known for the same type of work?\\nAnswer: yes\\nQuestion: Which documentary is about Finnish rock groups, Adam Clayton Powell or The Saimaa Gesture?\\nAnswer: The Saimaa Gesture\\n\", \"webthink_simple_3\": \"\\nQuestion: What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into?\\nThought 1: I need to search Colorado orogeny, find the area that the eastern sector of the Colorado orogeny extends into, then find the elevation range of the area.\\nAction 1: Search[Colorado orogeny]\\nObservation 1: The Colorado orogeny was an episode of mountain building (an orogeny) in Colorado and surrounding areas. This took place from 1780 to 1650 million years ago (Mya), during the Paleoproterozoic (Statherian Period). It is recorded in the Colorado orogen, a >500-km-wide belt of oceanic arc rock that extends southward into New Mexico. The Colorado orogeny was likely part of the larger Yavapai orogeny.\\nThought 2: It does not mention the eastern sector of the Colorado orogeny. I need to look up eastern sector.\\nAction 2: Lookup[eastern sector]\\nObservation 2: (Result 1 / 1) The Colorado orogen, formerly called the Colorado province, is a >500-km-wide belt of oceanic arc rock (1.78\\u20131.65 Ga) that extends southward into New Mexico and composes a major part of the Proterozoic provinces of southwestern United States. This transcontinental collisional event occurred during the  Paleoproterozoic (Statherian Period).[1] The Wyoming sector of the Colorado orogeny was formerly called the Medicine Bow orogeny. The eastern sector extends into the High Plains and is called the Central Plains orogeny. The boundary between the Colorado orogeny and the Wyoming craton is the Cheyenne belt, a 5-km-wide mylonitic shear zone that verges northward. The Cheyenne belt transects and cuts off the south edge of the older Trans-Hudson orogeny.[2]\\nThought 3: The eastern sector of Colorado orogeny extends into the High Plains, so High Plains is the area. I need to search High Plains and find its elevation range.\\nAction 3: Search[High Plains]\\nObservation 3: High Plains refers to one of two distinct land regions:\\nThought 4: I need to search High Plains (United States).\\nAction 4: Search[High Plains (United States)]\\nObservation 4: The High Plains are a subregion of the Great Plains, mainly in the Western United States, but also partly in the Midwest states of Nebraska, Kansas, and South Dakota, generally encompassing the western part of the Great Plains before the region reaches the Rocky Mountains. The High Plains are located in eastern Montana, southeastern Wyoming, southwestern South Dakota, western Nebraska, eastern Colorado, western Kansas, eastern New Mexico, western Oklahoma, and to just south of the Texas Panhandle.[4] The southern region of the Western High Plains ecology region contains the geological formation known as Llano Estacado which can be seen from a short distance or on satellite maps.[5] From east to west, the High Plains rise in elevation from around 1,800 to 7,000 ft (550 to 2,130 m).[3]\\nThought 5: High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer is 1,800 to 7,000 ft.\\nAction 5: Finish[1,800 to 7,000 ft]\\nQuestion: Guitars for Wounded Warriors is an album that was recorded in the village in which New York county?\\nThought 1: I need to search Guitars for Wounded Warriors, then find in which New York county it was recorded.\\nAction 1: Search[Guitars for Wounded Warriors]\\nObservation 1: Guitars for Wounded Warriors is the sixth solo studio album by guitarist Brian Tarquin, released in August 2014 by Cleopatra Records. In 2016, it received a Best Compilation Album nomination by the Independent Music Awards. All tracks were recorded at Tarquin's Jungle Room Studios in New Paltz (village), New York. Being moved by the lack of support for returning veterans through his life Tarquin decided to compose a heavy instrumental rock album as a way to show his appreciation to all veterans. So once again he enlisted top notch players to guest with him on the album, Billy Sheehan on Black Hawk, Ron \\\"Bumblefoot\\\" Thal and Reb Beach on Taliban Terror, Steve Morse on Freedom, Chris Poland on Charlie Surfs and Hunting, Chuck Loeb on Escape Kabul, Hal Lindes on Sand & Blood, Gary Hoey on 5 Klicks To Hell and Baghdad, Alex De Rosso Dokken on Broken Arrow, and The Flyin' Ryan Brothers on Surgical Strike.  The entire album was engineered, produced and composed by Tarquin especially for each one of the guest guitarists. Partial proceeds are donated to the Fisher House Foundation from sales.\\nThought 2: Guitars for Wounded Warriors was recorded in New Paltz (village), New York. I need to search New Paltz (village), New York and find which county it is in.\\nAction 2: Search[New Paltz (village), New York]\\nObservation 2: New Paltz is a village in Ulster County located in the U.S. state of New York. It is approximately 80 miles (130 km) north of New York City and 70 miles (110 km) south of Albany. The population was 7,324 at the 2020 census.[3]\\nThought 3: New Paltz (village), New York is in Ulster County, so the answer is Ulster County.\\nAction 3: Finish[Ulster County]\\nQuestion: Which magazine was started first Arthur's Magazine or First for Women?\\nThought 1: I need to search Arthur's Magazine and First for Women, and find which was started first.\\nAction 1: Search[Arthur's Magazine]\\nObservation 1: Arthur's Magazine (1844\\u00e2\\u0080\\u00931846) was an American literary periodical published in Philadelphia in the 19th century. Edited by Timothy Shay Arthur, it featured work by Edgar A. Poe, J.H. Ingraham, Sarah Josepha Hale, Thomas G. Spear, and others.[1][2] In May 1846 it was merged into Godey's Lady's Book.[3]\\nThought 2: Arthur's Magazine was started in 1844. I need to search First for Women next.\\nAction 2: Search[First for Women]\\nObservation 2: First for Women is a woman's magazine published by Bauer Media Group in the USA.[1] The magazine was started in 1989. It is based in Englewood Cliffs, New Jersey.[2] In 2011 the circulation of the magazine was 1,310,696 copies.[3]\\nThought 3: First for Women was started in 1989. 1844 (Arthur's Magazine) < 1989 (First for Women), so Arthur's Magazine was started first.\\nAction 3: Finish[Arthur's Magazine]\\n\", \"webthink_simple6\": \"\\nQuestion: What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into?\\nThought 1: I need to search Colorado orogeny, find the area that the eastern sector of the Colorado orogeny extends into, then find the elevation range of the area.\\nAction 1: Search[Colorado orogeny]\\nObservation 1: The Colorado orogeny was an episode of mountain building (an orogeny) in Colorado and surrounding areas.\\nThought 2: It does not mention the eastern sector. So I need to look up eastern sector.\\nAction 2: Lookup[eastern sector]\\nObservation 2: (Result 1 / 1) The eastern sector extends into the High Plains and is called the Central Plains orogeny.\\nThought 3: The eastern sector of Colorado orogeny extends into the High Plains. So I need to search High Plains and find its elevation range.\\nAction 3: Search[High Plains]\\nObservation 3: High Plains refers to one of two distinct land regions:\\nThought 4: I need to instead search High Plains (United States).\\nAction 4: Search[High Plains (United States)]\\nObservation 4: The High Plains are a subregion of the Great Plains. From east to west, the High Plains rise in elevation from around 1,800 to 7,000 ft (550 to 2,130 m).[3]\\nThought 5: High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer is 1,800 to 7,000 ft.\\nAction 5: Finish[1,800 to 7,000 ft]\\nQuestion: Musician and satirist Allie Goertz wrote a song about the \\\"The Simpsons\\\" character Milhouse, who Matt Groening named after who?\\nThought 1: The question simplifies to \\\"The Simpsons\\\" character Milhouse is named after who. I only need to search Milhouse and find who it is named after.\\nAction 1: Search[Milhouse]\\nObservation 1: Milhouse Mussolini Van Houten is a recurring character in the Fox animated television series The Simpsons voiced by Pamela Hayden and created by Matt Groening.\\nThought 2: The paragraph does not tell who Milhouse is named after, maybe I can look up \\\"named after\\\".\\nAction 2: Lookup[named after]\\nObservation 2: (Result 1 / 1) Milhouse was named after U.S. president Richard Nixon, whose middle name was Milhous. \\nThought 3: Milhouse was named after U.S. president Richard Nixon, so the answer is Richard Nixon.\\nAction 3: Finish[Richard Nixon]\\nQuestion: Which documentary is about Finnish rock groups, Adam Clayton Powell or The Saimaa Gesture?\\nThought 1: I need to search Adam Clayton Powell and The Saimaa Gesture, and find which documentary is about Finnish rock groups.\\nAction 1: Search[Adam Clayton Powell]\\nObservation 1: Could not find [Adam Clayton Powell]. Similar: ['Adam Clayton Powell III', 'Seventh Avenue (Manhattan)', 'Adam Clayton Powell Jr. State Office Building', 'Isabel Washington Powell', 'Adam Powell', 'Adam Clayton Powell (film)', 'Giancarlo Esposito'].\\nThought 2: To find the documentary, I can search Adam Clayton Powell (film).\\nAction 2: Search[Adam Clayton Powell (film)]\\nObservation 2: Adam Clayton Powell is a 1989 American documentary film directed by Richard Kilberg.\\nThe film is about the rise and fall of influential African-American politician Adam Clayton Powell Jr.[3][4] It was later aired as part of the PBS series The American Experience.\\nThought 3: Adam Clayton Powell (film) is a documentary about an African-American politician, not Finnish rock groups. So the documentary about Finnish rock groups must instead be The Saimaa Gesture.\\nAction 3: Finish[The Saimaa Gesture]\\nQuestion: What profession does Nicholas Ray and Elia Kazan have in common?\\nThought 1: I need to search Nicholas Ray and Elia Kazan, find their professions, then find the profession they have in common.\\nAction 1: Search[Nicholas Ray]\\nObservation 1: Nicholas Ray (born Raymond Nicholas Kienzle Jr., August 7, 1911 \\u2013 June 16, 1979) was an American film director, screenwriter, and actor best known for the 1955 film Rebel Without a Cause.\\nThought 2: Professions of Nicholas Ray are director, screenwriter, and actor. I need to search Elia Kazan next and find his professions.\\nAction 2: Search[Elia Kazan]\\nObservation 2: Elia Kazan was an American film and theatre director, producer, screenwriter and actor.\\nThought 3: Professions of Elia Kazan are director, producer, screenwriter, and actor. So profession Nicholas Ray and Elia Kazan have in common is director, screenwriter, and actor.\\nAction 3: Finish[director, screenwriter, actor]\\nQuestion: Which magazine was started first Arthur's Magazine or First for Women?\\nThought 1: I need to search Arthur's Magazine and First for Women, and find which was started first.\\nAction 1: Search[Arthur's Magazine]\\nObservation 1: Arthur's Magazine (1844-\\u0080\\u00931846) was an American literary periodical published in Philadelphia in the 19th century. \\nThought 2: Arthur's Magazine was started in 1844. I need to search First for Women next.\\nAction 2: Search[First for Women]\\nObservation 2: First for Women is a woman's magazine published by Bauer Media Group in the USA.[1] The magazine was started in 1989. \\nThought 3: First for Women was started in 1989. 1844 (Arthur's Magazine) < 1989 (First for Women), so Arthur's Magazine was started first.\\nAction 3: Finish[Arthur's Magazine]\\nQuestion: Were Pavel Urysohn and Leonid Levin known for the same type of work?\\nThought 1: I need to search Pavel Urysohn and Leonid Levin, find their types of work, then find if they are the same.\\nAction 1: Search[Pavel Urysohn]\\nObservation 1: Pavel Samuilovich Urysohn (February 3, 1898 \\u00e2\\u0080\\u0093 August 17, 1924) was a Soviet mathematician who is best known for his contributions in dimension theory.\\nThought 2: Pavel Urysohn is a mathematician. I need to search Leonid Levin next and find its type of work.\\nAction 2: Search[Leonid Levin]\\nObservation 2: Leonid Anatolievich Levin is a Soviet-American mathematician and computer scientist. \\nThought 3: Leonid Levin is a mathematician and computer scientist. So Pavel Urysohn and Leonid Levin have the same type of work. \\nAction 3: Finish[yes]\\n\", \"webact_simple6\": \"Question: What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into?\\nAction 1: Search[Colorado orogeny]\\nObservation 1: The Colorado orogeny was an episode of mountain building (an orogeny) in Colorado and surrounding areas.\\nAction 2: Lookup[eastern sector]\\nObservation 2: (Result 1 / 1) The eastern sector extends into the High Plains and is called the Central Plains orogeny.\\nAction 3: Search[High Plains]\\nObservation 3: High Plains refers to one of two distinct land regions:\\nAction 4: Search[High Plains (United States)]\\nObservation 4: The High Plains are a subregion of the Great Plains. From east to west, the High Plains rise in elevation from around 1,800 to 7,000 ft (550 to 2,130 m).[3]\\nAction 5: Finish[1,800 to 7,000 ft]\\nQuestion: Musician and satirist Allie Goertz wrote a song about the \\\"The Simpsons\\\" character Milhouse, who Matt Groening named after who?\\nAction 1: Search[Milhouse]\\nObservation 1: Milhouse Mussolini Van Houten is a recurring character in the Fox animated television series The Simpsons voiced by Pamela Hayden and created by Matt Groening.\\nAction 2: Lookup[named after]\\nObservation 2: (Result 1 / 1) Milhouse was named after U.S. president Richard Nixon, whose middle name was Milhous. \\nAction 3: Finish[Richard Nixon]\\nQuestion: Which documentary is about Finnish rock groups, Adam Clayton Powell or The Saimaa Gesture?\\nAction 1: Search[Adam Clayton Powell]\\nObservation 1: Could not find [Adam Clayton Powell]. Similar: ['Adam Clayton Powell III', 'Seventh Avenue (Manhattan)', 'Adam Clayton Powell Jr. State Office Building', 'Isabel Washington Powell', 'Adam Powell', 'Adam Clayton Powell (film)', 'Giancarlo Esposito'].\\nAction 2: Search[Adam Clayton Powell (film)]\\nObservation 2: Adam Clayton Powell is a 1989 American documentary film directed by Richard Kilberg.\\nThe film is about the rise and fall of influential African-American politician Adam Clayton Powell Jr.[3][4] It was later aired as part of the PBS series The American Experience.\\nAction 3: Finish[The Saimaa Gesture]\\nQuestion: What profession does Nicholas Ray and Elia Kazan have in common?\\nAction 1: Search[Nicholas Ray]\\nObservation 1: Nicholas Ray (born Raymond Nicholas Kienzle Jr., August 7, 1911 \\u2013 June 16, 1979) was an American film director, screenwriter, and actor best known for the 1955 film Rebel Without a Cause.\\nAction 2: Search[Elia Kazan]\\nObservation 2: Elia Kazan was an American film and theatre director, producer, screenwriter and actor.\\nAction 3: Finish[director, screenwriter, actor]\\nQuestion: Which magazine was started first Arthur's Magazine or First for Women?\\nAction 1: Search[Arthur's Magazine]\\nObservation 1: Arthur's Magazine (1844-\\u0080\\u00931846) was an American literary periodical published in Philadelphia in the 19th century. \\nAction 2: Search[First for Women]\\nObservation 2: First for Women is a woman's magazine published by Bauer Media Group in the USA.[1] The magazine was started in 1989. \\nAction 3: Finish[Arthur's Magazine]\\nQuestion: Were Pavel Urysohn and Leonid Levin known for the same type of work?\\nAction 1: Search[Pavel Urysohn]\\nObservation 1: Pavel Samuilovich Urysohn (February 3, 1898 \\u00e2\\u0080\\u0093 August 17, 1924) was a Soviet mathematician who is best known for his contributions in dimension theory.\\nAction 2: Search[Leonid Levin]\\nObservation 2: Leonid Anatolievich Levin is a Soviet-American mathematician and computer scientist. \\nAction 3: Finish[yes]\\n\\n\", \"cotqa_simple6\": \"Question: What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into?\\nThought: Let's think step by step. The eastern sector of Colorado orogeny extends into the High Plains. High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer is 1,800 to 7,000 ft.\\nAnswer: 1,800 to 7,000 ft\\nQuestion: Musician and satirist Allie Goertz wrote a song about the \\\"The Simpsons\\\" character Milhouse, who Matt Groening named after who?\\nThought: Let's think step by step. Milhouse was named after U.S. president Richard Nixon, so the answer is Richard Nixon.\\nAnswer: Richard Nixon\\nQuestion: Which documentary is about Finnish rock groups, Adam Clayton Powell or The Saimaa Gesture?\\nThought: Let's think step by step. Adam Clayton Powell (film) is a documentary about an African-American politician, not Finnish rock groups. So the documentary about Finnish rock groups must instead be The Saimaa Gesture.\\nAnswer: The Saimaa Gesture\\nQuestion: What profession does Nicholas Ray and Elia Kazan have in common?\\nThought: Let's think step by step. Professions of Nicholas Ray are director, screenwriter, and actor. Professions of Elia Kazan are director, producer, screenwriter, and actor. So profession Nicholas Ray and Elia Kazan have in common is director, screenwriter, and actor.\\nAnswer: director, screenwriter, actor\\nQuestion: Which magazine was started first Arthur's Magazine or First for Women?\\nThought: Let's think step by step. Arthur's Magazine was started in 1844. First for Women was started in 1989. 1844 (Arthur's Magazine) < 1989 (First for Women), so Arthur's Magazine was started first.\\nAnswer: Arthur's Magazine\\nQuestion: Were Pavel Urysohn and Leonid Levin known for the same type of work?\\nThought: Let's think step by step. Pavel Urysohn is a mathematician. Leonid Levin is a mathematician and computer scientist. So Pavel Urysohn and Leonid Levin have the same type of work.\\nAnswer: Yes\\n\", \"webqa_simple6\": \"Question: What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into?\\nAnswer: 1,800 to 7,000 ft\\nQuestion: Musician and satirist Allie Goertz wrote a song about the \\\"The Simpsons\\\" character Milhouse, who Matt Groening named after who?\\nAnswer: Richard Nixon\\nQuestion: Which documentary is about Finnish rock groups, Adam Clayton Powell or The Saimaa Gesture?\\nAnswer: The Saimaa Gesture\\nQuestion: What profession does Nicholas Ray and Elia Kazan have in common?\\nAnswer: director, screenwriter, actor\\nQuestion: Which magazine was started first Arthur's Magazine or First for Women?\\nAnswer: Arthur's Magazine\\nQuestion: Were Pavel Urysohn and Leonid Levin known for the same type of work?\\nAnswer: Yes\\n\"}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "react_naive"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "wikipages.save_to_disk(\"var/wiki-pages\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "wikipages"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "notebook",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
