{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "\n",
    "with open(\"eval_labels.json\") as f:\n",
    "    data = json.load(f)\n",
    "\n",
    "for d in data:\n",
    "    if d[\"answer\"] not in d[\"options\"]:\n",
    "        print(\"Invalid answer\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "\n",
    "with open(\"fewshot_labels.json\") as f:\n",
    "    data = json.load(f)\n",
    "\n",
    "id = 0\n",
    "for d in data:\n",
    "    d[\"id\"] = id\n",
    "    id+=1\n",
    "\n",
    "with open(\"fewshot_labels.json\", \"w\") as f:\n",
    "    json.dump(data, f, indent=4)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The is not guaranteed, e.g. The mafia could attack. Please be careful.\n"
     ]
    }
   ],
   "source": [
    "import textwrap\n",
    "import nltk\n",
    "\n",
    "sent_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')\n",
    "def capitalize_sentence(text):\n",
    "    sentences = sent_tokenizer.tokenize(text)\n",
    "    sentences = [sent.capitalize() for sent in sentences]\n",
    "    return \" \".join(sentences)\n",
    "\n",
    "\n",
    "text = \"tHe is not guaranteed, e.g. the mafia could attack. please be careful.\"\n",
    "print(capitalize_sentence(text))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"fewshot_labels.json\") as f:\n",
    "    data = json.load(f)\n",
    "\n",
    "for d in data:\n",
    "    d[\"question\"] = capitalize_sentence(d[\"question\"])\n",
    "    new_options = []\n",
    "    for option in d[\"options\"]:\n",
    "        if not option.endswith(\".\"):\n",
    "            option = option + \".\"\n",
    "        new_options.append(capitalize_sentence(option))\n",
    "    d[\"options\"] = new_options\n",
    "\n",
    "    answer = d[\"answer\"]\n",
    "    if not answer.endswith(\".\"):\n",
    "        answer = answer + \".\"\n",
    "    d[\"answer\"] = capitalize_sentence(answer)\n",
    "\n",
    "    if d[\"answer\"] not in d[\"options\"]:\n",
    "        print(\"Invalid answer\")\n",
    "        print(d)\n",
    "\n",
    "with open(\"fewshot_labels.json\", \"w\") as f:\n",
    "    json.dump(data, f, indent=4)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
