{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "NKB7cAmGbEEu"
   },
   "source": [
    "# With history"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "id": "wEJGn6opZM1s"
   },
   "outputs": [],
   "source": [
    "# !pip install openai"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "id": "2TgJrr2TZ4Cz"
   },
   "outputs": [],
   "source": [
    "# import os\n",
    "# import openai\n",
    "# import pandas as pd\n",
    "# from collections import deque\n",
    "# from tqdm import tqdm\n",
    "\n",
    "# openai.api_key = \"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\" #Use cautiously "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "id": "kqj8xw7bZ_8O"
   },
   "outputs": [],
   "source": [
    "# from google.colab import drive\n",
    "# drive.mount('/content/drive')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "id": "cvCjKdexaB8v"
   },
   "outputs": [],
   "source": [
    "# def promptGen(oneshot, premise):\n",
    "#   p = oneshot + \"\\n\\nPremise: \"+ premise+\"\\n\\n\"\n",
    "#   return p\n",
    "\n",
    "# def gpt3(oneshot, dq, dqlen, premise,prmpt):\n",
    "#   allData = \"\"\n",
    "#   if len(dq)>dqlen:\n",
    "#     dq.popleft()\n",
    "#   history = \"\\n\".join(dq)\n",
    "#   ppt = oneshot + \"\\n\" + history + \"\\n\\n\" + prmpt  + \"\\n\\nPremise: \" + premise + \"\\n\\nHypothesis: \"\n",
    "#   # print(\"ppt----------> \",ppt)\n",
    "#   response = openai.Completion.create(\n",
    "#   model=\"text-davinci-002\",\n",
    "#   prompt=ppt,\n",
    "#   temperature=0.7,\n",
    "#   max_tokens=512,\n",
    "#   top_p=1,\n",
    "#   frequency_penalty=2,\n",
    "#   presence_penalty=2\n",
    "#   )\n",
    "#   print(\"-------------------------------------------------\")\n",
    "#   print(\"ppt----------> \",ppt, \"\\nGPT3:\\n\", response['choices'][0]['text'])\n",
    "#   allData = {\"Premise\":premise,\"GPT3 Response\":\"Hypothesis:\\n\"+ response['choices'][0]['text']}\n",
    "  \n",
    "#   dq.append(prmpt+\"\\n\\nPremise:\\n\"+ premise+\"\\n\\nHypothesis:\\n\"+response['choices'][0]['text'])\n",
    "\n",
    "#   return allData"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "id": "fDyfyunyannE"
   },
   "outputs": [],
   "source": [
    "# df = pd.read_csv(\"/content/drive/MyDrive/Colab Notebooks/DataGen/CB_premise_list.csv\")\n",
    "# df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "id": "zQTgr6-GaPG0"
   },
   "outputs": [],
   "source": [
    "# data = []\n",
    "# dq = deque()\n",
    "# pathtosave = \"/content/drive/MyDrive/Colab Notebooks/DataGen/Commit\"\n",
    "# shardno = 1\n",
    "# num = 5 #Num of Passage or data to generate\n",
    "# oneshot = '''Given a premise, the task is to generate a hypothesis and a label if it is \"entailment\", \"Contradiction\", or \"neutral\". 3 examples for each kind of label are given below to help you out. \\n\\nPremise: \\nShe said good morning to Alice and Alice said hallo. She was thin and rather tall with a very lined gentle face and hair that was white but which Alice could see had once been blonde. She could also have told this was Tina's mother before Mrs Darne went off down the passage that led to the Headmaster's Flat.\\n\\nHypothesis:\\nthis was Tina's mother\\n\\nLabel:\\nentailment\\n\\nPremise:\\nB: And I've worked in the hospital for fifteen years and I've taken care of a few AIDS patients. A: Uh-huh. B: Uh, when they asked us did we want to, uh, keep it the same or, uh, spend more, spend less, uh, I think right now what they're spending is adequate. Uh, for my personal opinion. Uh, because I think it's something that's going to take them a while to come up with a, uh, vaccine for. A: Yeah.  Uh-huh.  Uh-huh. B: I don't think it's going to be that easy to come up with.\\n\\nHypothesis:\\nit is going to be that easy to come up with.\\n\\nLabel:\\ncontradiction\\n\\nPremise:\\nB: All right, well. A: Um, short term, I don't think anything's going to be done about it or probably should be done about it. B: Right.  Uh, are you saying you don't think anything should be done in the short term?\\n\\nHypothesis:\\nanything should be done in the short term\\n\\nLabel:\\nneutral\\n'''\n",
    "# #change ppt\n",
    "# ppt = '''\\nGenerate hypothesis and label now. Be diverse in label generation.  Note that the label can be \"entailment\", \"Contradiction\" or \"neutral\".\\n '''\n",
    "# for i in (range(df.shape[0])):\n",
    "#   if data!=[] and len(data)%100==0:\n",
    "#     print(\"Shard no: \", shardno)\n",
    "#     dataDF = pd.DataFrame(data)\n",
    "#     dataDF.to_csv(pathtosave + f\"/shard_w_history_{shardno}.csv\", index=False)\n",
    "#     shardno+=1\n",
    "#     data = []\n",
    "#   if num==0:\n",
    "#     break\n",
    "#   # prompt = promptGen(oneshot, df.iloc[i]['Premise List'])\n",
    "#   # print(prompt)\n",
    "#   d = gpt3(oneshot,dq,10,df.iloc[i]['Premise List'],ppt)\n",
    "#   data.append(d)\n",
    "#   num-=1\n",
    "# dataDF = pd.DataFrame(data)\n",
    "# dataDF.to_csv(pathtosave + f\"/shard_w_history_{shardno}.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "id": "sJRza0wsofzr"
   },
   "outputs": [],
   "source": [
    "# dataDF = pd.DataFrame(data)\n",
    "# dataDF.to_csv(pathtosave + f\"/shard_{shardno}.csv\", index=False)\n",
    "# dataDF"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "id": "C5910dthoqLt"
   },
   "outputs": [],
   "source": [
    "# dataDF.iloc[4][1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "id": "uY2bVjdshE0B"
   },
   "outputs": [],
   "source": [
    "# for i in range(5):\n",
    "#   print(f\"{i+1} \"+df['Premise List'].iloc[i])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "id": "-f3uiWCzhyZu"
   },
   "outputs": [],
   "source": [
    "# df.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "O2C-tOrC_sSh"
   },
   "source": [
    "# <b>Oneshot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "6xGCT1julNIy",
    "outputId": "acc8f141-2b36-4aa2-c551-fa959886de90"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
      "Requirement already satisfied: openai in /usr/local/lib/python3.7/dist-packages (0.23.1)\n",
      "Requirement already satisfied: requests>=2.20 in /usr/local/lib/python3.7/dist-packages (from openai) (2.23.0)\n",
      "Requirement already satisfied: openpyxl>=3.0.7 in /usr/local/lib/python3.7/dist-packages (from openai) (3.0.10)\n",
      "Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from openai) (1.21.6)\n",
      "Requirement already satisfied: pandas-stubs>=1.1.0.11 in /usr/local/lib/python3.7/dist-packages (from openai) (1.2.0.62)\n",
      "Requirement already satisfied: pandas>=1.2.3 in /usr/local/lib/python3.7/dist-packages (from openai) (1.3.5)\n",
      "Requirement already satisfied: tqdm in /usr/local/lib/python3.7/dist-packages (from openai) (4.64.1)\n",
      "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from openai) (4.1.1)\n",
      "Requirement already satisfied: et-xmlfile in /usr/local/lib/python3.7/dist-packages (from openpyxl>=3.0.7->openai) (1.1.0)\n",
      "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=1.2.3->openai) (2.8.2)\n",
      "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=1.2.3->openai) (2022.2.1)\n",
      "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas>=1.2.3->openai) (1.15.0)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.20->openai) (2022.6.15)\n",
      "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.20->openai) (2.10)\n",
      "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests>=2.20->openai) (3.0.4)\n",
      "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.20->openai) (1.24.3)\n"
     ]
    }
   ],
   "source": [
    "!pip install openai"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "id": "iHdhpZnyvJ6x"
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import openai\n",
    "import pandas as pd\n",
    "from collections import deque\n",
    "from tqdm import tqdm\n",
    "\n",
    "openai.api_key = \"INSERT API TOKEN\" #Use cautiously "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "kmuYu-Vc_zKB",
    "outputId": "94acc5fc-89fa-47bd-fbe1-77ff93b04163"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
     ]
    }
   ],
   "source": [
    "from google.colab import drive\n",
    "drive.mount('/content/drive')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "id": "K4WJZbaC_32S"
   },
   "outputs": [],
   "source": [
    "def promptGen(oneshot, premise):\n",
    "  p = oneshot + \"\\n\\nPremise: \"+ premise+\"\\n\\n\"\n",
    "  return p\n",
    "\n",
    "def gpt3(oneshot, premise,prmpt):\n",
    "  allData = \"\"\n",
    "  ppt = oneshot + \"\\n\" + \"\\n\\n\" + prmpt  + \"\\n\\nPremise: \\n\" + premise + \"\\n\\nHypothesis: \\n\"\n",
    "  \n",
    "  response = openai.Completion.create(\n",
    "  model=\"text-davinci-002\",\n",
    "  prompt=ppt,\n",
    "  temperature=0.7,\n",
    "  max_tokens=512,\n",
    "  top_p=1,\n",
    "  frequency_penalty=2,\n",
    "  presence_penalty=2\n",
    "  )\n",
    "  # print(\"+++++++++++++++++++++++++++++++\")\n",
    "  # print(\"ppt----------> \",ppt, \"\\n GPT3:\\n\", response['choices'][0]['text'])\n",
    "  allData = {\"Premise\":premise,\"GPT3 Response\":\"Hypothesis:\\n\"+ response['choices'][0]['text']}\n",
    "\n",
    "  return allData"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {
    "id": "u_VshJ38__1Y"
   },
   "outputs": [],
   "source": [
    "df = pd.read_csv(\"/content/drive/MyDrive/Colab Notebooks/DataGen/CB_premise_list.csv\")\n",
    "df.head()\n",
    "dfcopy = df.copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "zITI1BZ3njJ1",
    "outputId": "5e5a8b9f-741b-4463-e5fb-fdac35a2bc6f"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1204, 1)"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dfcopy.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "5OSTFf-tlXS3"
   },
   "source": [
    "## neutral"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "0A2kl9cMADIl"
   },
   "outputs": [],
   "source": [
    "data = []\n",
    "pathtosave = \"/content/drive/MyDrive/Colab Notebooks/DataGen/Commit\"\n",
    "shardno = 1\n",
    "num = 400 #Num of Passage or data to generate\n",
    "oneshot = '''Given a premise, the task is to generate a hypothesis and a label if it is \"neutral\". An example for is given below to help you out. \n",
    "\n",
    "Premise:\n",
    "B: All right, well. A: Um, short term, I don't think anything's going to be done about it or probably should be done about it. B: Right.  Uh, are you saying you don't think anything should be done in the short term?\n",
    "\n",
    "Hypothesis:\n",
    "anything should be done in the short term\n",
    "\n",
    "Label:\n",
    "neutral'''\n",
    "#change ppt\n",
    "ppt = '''Generate hypothesis and label now. Be diverse in example generation.  Note that the label should be \"neutral\". '''\n",
    "\n",
    "df = dfcopy[:400]\n",
    "for i in tqdm(range(df.shape[0])):\n",
    "  if data!=[] and len(data)%100==0:\n",
    "    print(\"Shard no: \", shardno)\n",
    "    dataDF = pd.DataFrame(data)\n",
    "    dataDF.to_csv(pathtosave + f\"/shard_oneshot_neutral_{shardno}.csv\", index=False)\n",
    "    shardno+=1\n",
    "    data = []\n",
    "  if num==0:\n",
    "    break\n",
    "  # prompt = promptGen(oneshot, df.iloc[i]['Premise List'])\n",
    "  # print(prompt)\n",
    "  d = gpt3(oneshot,df.iloc[i]['Premise List'],ppt)\n",
    "  data.append(d)\n",
    "  num-=1\n",
    "dataDF = pd.DataFrame(data)\n",
    "dataDF.to_csv(pathtosave + f\"/shard_oneshot_neutral_{shardno}.csv\", index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "OHO4rGZUl5yM"
   },
   "source": [
    "## entailment "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "28WO_6IAXsU-",
    "outputId": "b37db76e-d1a9-4351-8d52-48f1c2ba8857"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  1%|          | 4/400 [00:07<11:34,  1.75s/it]\n"
     ]
    }
   ],
   "source": [
    "data = []\n",
    "pathtosave = \"/content/drive/MyDrive/Colab Notebooks/DataGen/Commit\"\n",
    "shardno = 1\n",
    "num = 400 #Num of Passage or data to generate\n",
    "oneshot = '''Given a premise, the task is to generate a hypothesis and a label if it is \"entailment\". An example for is given below to help you out. \n",
    "\n",
    "Premise: \n",
    "She said good morning to Alice and Alice said hallo. She was thin and rather tall with a very lined gentle face and hair that was white but which Alice could see had once been blonde. She could also have told this was Tina's mother before Mrs Darne went off down the passage that led to the Headmaster's Flat.\n",
    "\n",
    "Hypothesis:\n",
    "this was Tina's mother\n",
    "\n",
    "Label:\n",
    "entailment'''\n",
    "#change ppt\n",
    "ppt = '''Generate hypothesis and label now. Be diverse in example generation.  Note that the label should be \"entailment\". '''\n",
    "df = dfcopy[400:800]\n",
    "for i in tqdm(range(df.shape[0])):\n",
    "  if data!=[] and len(data)%100==0:\n",
    "    print(\"Shard no: \", shardno)\n",
    "    dataDF = pd.DataFrame(data)\n",
    "    dataDF.to_csv(pathtosave + f\"/shard_oneshot_entailment_{shardno}.csv\", index=False)\n",
    "    shardno+=1\n",
    "    data = []\n",
    "  if num==0:\n",
    "    break\n",
    "  # prompt = promptGen(oneshot, df.iloc[i]['Premise List'])\n",
    "  # print(prompt)\n",
    "  d = gpt3(oneshot,df.iloc[i]['Premise List'],ppt)\n",
    "  data.append(d)\n",
    "  num-=1\n",
    "dataDF = pd.DataFrame(data)\n",
    "dataDF.to_csv(pathtosave + f\"/shard_oneshot_entailment_{shardno}.csv\", index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "KZr2IS2amPOr"
   },
   "source": [
    "## Contradiction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "9OoSnXhAmOtg",
    "outputId": "03faf5ec-d6d9-46c6-8568-f6dfac8f5512"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  1%|          | 4/400 [00:06<11:10,  1.69s/it]\n"
     ]
    }
   ],
   "source": [
    "data = []\n",
    "pathtosave = \"/content/drive/MyDrive/Colab Notebooks/DataGen/Commit\"\n",
    "shardno = 1\n",
    "num = 400 #Num of Passage or data to generate\n",
    "oneshot = '''Given a premise, the task is to generate a hypothesis and a label if it is \"contradiction\". An example for is given below to help you out. \n",
    "\n",
    "Premise:\n",
    "B: And I've worked in the hospital for fifteen years and I've taken care of a few AIDS patients. A: Uh-huh. B: Uh, when they asked us did we want to, uh, keep it the same or, uh, spend more, spend less, uh, I think right now what they're spending is adequate. Uh, for my personal opinion. Uh, because I think it's something that's going to take them a while to come up with a, uh, vaccine for. A: Yeah.  Uh-huh.  Uh-huh. B: I don't think it's going to be that easy to come up with.\n",
    "\n",
    "Hypothesis:\n",
    "it is going to be that easy to come up with.\n",
    "\n",
    "Label:\n",
    "contradiction'''\n",
    "#change ppt\n",
    "ppt = '''Generate hypothesis and label now. Be diverse in example generation.  Note that the label should be \"contradiction\". '''\n",
    "df = dfcopy[800:1200]\n",
    "for i in tqdm(range(df.shape[0])):\n",
    "  if data!=[] and len(data)%100==0:\n",
    "    print(\"Shard no: \", shardno)\n",
    "    dataDF = pd.DataFrame(data)\n",
    "    dataDF.to_csv(pathtosave + f\"/shard_oneshot_contradiction_{shardno}.csv\", index=False)\n",
    "    shardno+=1\n",
    "    data = []\n",
    "  if num==0:\n",
    "    break\n",
    "  # prompt = promptGen(oneshot, df.iloc[i]['Premise List'])\n",
    "  # print(prompt)\n",
    "  d = gpt3(oneshot,df.iloc[i]['Premise List'],ppt)\n",
    "  data.append(d)\n",
    "  num-=1\n",
    "dataDF = pd.DataFrame(data)\n",
    "dataDF.to_csv(pathtosave + f\"/shard_oneshot_contradiction_{shardno}.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "yEOynQFhnIpB"
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [
    "NKB7cAmGbEEu"
   ],
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
