{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "8fefb4c7",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-03-03T06:09:31.896959Z",
     "start_time": "2023-03-03T06:09:30.099608Z"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "c92758da",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-03-03T06:09:31.915315Z",
     "start_time": "2023-03-03T06:09:31.899142Z"
    }
   },
   "outputs": [],
   "source": [
    "c_premise_list = []\n",
    "c_response_list = []\n",
    "\n",
    "for i in range( 1,5):\n",
    "    temp = \"shard_oneshot_contradiction_\" + str(i) + \".csv\"\n",
    "    df = pd.read_csv(temp)\n",
    "    c_premise_list.extend(list(df['Premise']))\n",
    "    c_response_list.extend(list(df['GPT3 Response']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "bea615ab",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-03-03T06:09:31.920186Z",
     "start_time": "2023-03-03T06:09:31.917034Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "400\n",
      "400\n"
     ]
    }
   ],
   "source": [
    "print(len(c_premise_list))\n",
    "print(len(c_response_list))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "9dd0d8f9",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-03-03T06:09:32.750076Z",
     "start_time": "2023-03-03T06:09:32.706597Z"
    }
   },
   "outputs": [],
   "source": [
    "e_premise_list = []\n",
    "e_response_list = []\n",
    "\n",
    "for i in range( 1,11):\n",
    "    temp = \"shard_oneshot_entailment_\" + str(i) + \".csv\"\n",
    "    df = pd.read_csv(temp)\n",
    "    e_premise_list.extend(list(df['Premise']))\n",
    "    e_response_list.extend(list(df['GPT3 Response']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "54975f8b",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-03-03T06:09:33.243527Z",
     "start_time": "2023-03-03T06:09:33.238329Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "400\n",
      "400\n"
     ]
    }
   ],
   "source": [
    "print(len(e_premise_list))\n",
    "print(len(e_response_list))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "daa3a866",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-03-03T06:09:34.084258Z",
     "start_time": "2023-03-03T06:09:34.052881Z"
    }
   },
   "outputs": [],
   "source": [
    "n_premise_list = []\n",
    "n_response_list = []\n",
    "\n",
    "for i in range( 1,5):\n",
    "    temp = \"shard_oneshot_neutral_\" + str(i) + \".csv\"\n",
    "    df = pd.read_csv(temp)\n",
    "    n_premise_list.extend(list(df['Premise']))\n",
    "    n_response_list.extend(list(df['GPT3 Response']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "c2305435",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-03-03T06:09:35.187390Z",
     "start_time": "2023-03-03T06:09:35.182311Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "400\n",
      "400\n"
     ]
    }
   ],
   "source": [
    "print(len(n_premise_list))\n",
    "print(len(n_response_list))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "4da068b1",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-03-03T06:11:41.848644Z",
     "start_time": "2023-03-03T06:11:41.839991Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1200\n",
      "1200\n"
     ]
    }
   ],
   "source": [
    "final_p_list = []\n",
    "final_resp_list = []\n",
    "\n",
    "final_p_list = c_premise_list + e_premise_list + n_premise_list\n",
    "final_resp_list = c_response_list + e_response_list + n_response_list\n",
    "\n",
    "print(len(final_p_list))\n",
    "print(len(final_resp_list))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "id": "3fcf11fe",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-03-03T07:41:01.821894Z",
     "start_time": "2023-03-03T07:41:01.803701Z"
    }
   },
   "outputs": [],
   "source": [
    "ip_list = []\n",
    "op_list = []\n",
    "\n",
    "for premise, temp in zip(final_p_list,final_resp_list):\n",
    "    temp2 = temp.split('Hypothesis:')\n",
    "    temp3 = temp2[1].split(\"Label:\")\n",
    "    premise = \"Premise :\" + premise + \"\\nHypothesis :\" + temp3[0].strip()\n",
    "    ip_list.append(premise)\n",
    "    op_list.append(temp3[1].strip().lower())\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "b53c8d8c",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-03-03T06:28:25.384791Z",
     "start_time": "2023-03-03T06:28:25.360440Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Input</th>\n",
       "      <th>Output</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Premise :The immune system is a system of many...</td>\n",
       "      <td>contradiction</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Premise :Disorders of the immune system can re...</td>\n",
       "      <td>contradiction</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Premise :Immunology is a science that examines...</td>\n",
       "      <td>contradiction</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Premise :The immune system protects organisms ...</td>\n",
       "      <td>contradiction</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Premise :Both innate and adaptive immunity dep...</td>\n",
       "      <td>contradiction</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1195</th>\n",
       "      <td>Premise :Despite their soft, gelatinous bodies...</td>\n",
       "      <td>neutral</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1196</th>\n",
       "      <td>Premise :Ctenophores form an animal phylum tha...</td>\n",
       "      <td>neutral</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1197</th>\n",
       "      <td>Premise :Like sponges and cnidarians, ctenopho...</td>\n",
       "      <td>neutral</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1198</th>\n",
       "      <td>Premise :Ranging from about 1 millimeter (0.03...</td>\n",
       "      <td>neutral</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1199</th>\n",
       "      <td>Premise :For a phylum with relatively few spec...</td>\n",
       "      <td>neutral</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1200 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  Input         Output\n",
       "0     Premise :The immune system is a system of many...  contradiction\n",
       "1     Premise :Disorders of the immune system can re...  contradiction\n",
       "2     Premise :Immunology is a science that examines...  contradiction\n",
       "3     Premise :The immune system protects organisms ...  contradiction\n",
       "4     Premise :Both innate and adaptive immunity dep...  contradiction\n",
       "...                                                 ...            ...\n",
       "1195  Premise :Despite their soft, gelatinous bodies...        neutral\n",
       "1196  Premise :Ctenophores form an animal phylum tha...        neutral\n",
       "1197  Premise :Like sponges and cnidarians, ctenopho...        neutral\n",
       "1198  Premise :Ranging from about 1 millimeter (0.03...        neutral\n",
       "1199  Premise :For a phylum with relatively few spec...        neutral\n",
       "\n",
       "[1200 rows x 2 columns]"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = {'Input': ip_list, 'Output': op_list }\n",
    "df_ans = pd.DataFrame(data)\n",
    "df_ans"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "1efdab14",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-03-03T06:32:29.360837Z",
     "start_time": "2023-03-03T06:32:29.314304Z"
    }
   },
   "outputs": [],
   "source": [
    "df_ans.to_csv(\"syn_train.csv\", index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "id": "52157724",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-03-03T07:52:20.195409Z",
     "start_time": "2023-03-03T07:52:20.181010Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "df = pd.read_csv(\"/Users/him1411/Desktop/ra_work/project_5_gpt3 dataset generation/data/original/cb/test.csv\")\n",
    "ip_list = list(df['Input'])\n",
    "op_list = list(df['Output'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "25a68ef8",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-03-03T22:38:12.456591Z",
     "start_time": "2023-03-03T22:38:12.451932Z"
    }
   },
   "outputs": [],
   "source": [
    "definition = '''Definition :Given a premise and a hypothesis, the task is to predict if the premise and hypothesis are in \"contradiction\", \"entailment\" or \"neutral\" with respect to each other.'''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "50567a8f",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-03-03T22:38:12.748807Z",
     "start_time": "2023-03-03T22:38:12.742708Z"
    }
   },
   "outputs": [],
   "source": [
    "example1 ='''\n",
    "Example 1-\n",
    "\n",
    "Premise : Lin Piao, after all, was the creator of Mao's \"Little Red Book\" of quotations.\n",
    "Hypothesis : Lin Piao wrote the \"Little Red Book\".\n",
    "\n",
    "Answer : entailment\n",
    "'''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "419286ac",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-03-03T22:38:13.115777Z",
     "start_time": "2023-03-03T22:38:13.109419Z"
    }
   },
   "outputs": [],
   "source": [
    "example2 ='''\n",
    "Example 2-\n",
    "\n",
    "Premise : No Weapons of Mass Destruction Found in Iraq Yet.\n",
    "Hypothesis : Weapons of Mass Destruction Found in Iraq.\n",
    "\n",
    "Answer : not_entailment\n",
    "\n",
    "Now complete the following example:\n",
    "'''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "id": "19654328",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-03-03T07:52:23.500147Z",
     "start_time": "2023-03-03T07:52:23.496061Z"
    }
   },
   "outputs": [],
   "source": [
    "example3 = '''\n",
    "Example 3-\n",
    "\n",
    "Premise :``I hope you are settling down and the cat is well.'' This was a lie. She did not hope the cat was well.\n",
    "Hypothesis :the cat was well\n",
    "\n",
    "Answer : neutral\n",
    "\n",
    "Now complete the following example: \n",
    "'''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "id": "0724ca29",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-03-03T07:52:25.283312Z",
     "start_time": "2023-03-03T07:52:25.278165Z"
    }
   },
   "outputs": [],
   "source": [
    "for i,item in enumerate(ip_list):\n",
    "    ip_list[i] = definition + \"\\n\" + example1 + \"\\n\" + example2 + \"\\n\" + item + \"\\nAnswer : \"    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "042d9887",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-03-03T22:38:48.194738Z",
     "start_time": "2023-03-03T22:38:48.085043Z"
    }
   },
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'ip_list' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m/var/folders/xf/l5jsvn9s4y73c622dmjv9p0r0000gn/T/ipykernel_77226/3562472177.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mip_list\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m: name 'ip_list' is not defined"
     ]
    }
   ],
   "source": [
    "ip_list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ec2d0aca",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "efa5735b",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "id": "af88eb55",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-03-03T07:52:26.308219Z",
     "start_time": "2023-03-03T07:52:26.288519Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Input</th>\n",
       "      <th>Output</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Definition :Given a premise and a hypothesis, ...</td>\n",
       "      <td>entailment</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Definition :Given a premise and a hypothesis, ...</td>\n",
       "      <td>entailment</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Definition :Given a premise and a hypothesis, ...</td>\n",
       "      <td>entailment</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Definition :Given a premise and a hypothesis, ...</td>\n",
       "      <td>entailment</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Definition :Given a premise and a hypothesis, ...</td>\n",
       "      <td>entailment</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>202</th>\n",
       "      <td>Definition :Given a premise and a hypothesis, ...</td>\n",
       "      <td>entailment</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>203</th>\n",
       "      <td>Definition :Given a premise and a hypothesis, ...</td>\n",
       "      <td>contradiction</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>204</th>\n",
       "      <td>Definition :Given a premise and a hypothesis, ...</td>\n",
       "      <td>entailment</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>205</th>\n",
       "      <td>Definition :Given a premise and a hypothesis, ...</td>\n",
       "      <td>contradiction</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>206</th>\n",
       "      <td>Definition :Given a premise and a hypothesis, ...</td>\n",
       "      <td>contradiction</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>207 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                 Input         Output\n",
       "0    Definition :Given a premise and a hypothesis, ...     entailment\n",
       "1    Definition :Given a premise and a hypothesis, ...     entailment\n",
       "2    Definition :Given a premise and a hypothesis, ...     entailment\n",
       "3    Definition :Given a premise and a hypothesis, ...     entailment\n",
       "4    Definition :Given a premise and a hypothesis, ...     entailment\n",
       "..                                                 ...            ...\n",
       "202  Definition :Given a premise and a hypothesis, ...     entailment\n",
       "203  Definition :Given a premise and a hypothesis, ...  contradiction\n",
       "204  Definition :Given a premise and a hypothesis, ...     entailment\n",
       "205  Definition :Given a premise and a hypothesis, ...  contradiction\n",
       "206  Definition :Given a premise and a hypothesis, ...  contradiction\n",
       "\n",
       "[207 rows x 2 columns]"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = {'Input': ip_list, 'Output': op_list }\n",
    "df_ans = pd.DataFrame(data)\n",
    "df_ans"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "id": "60c0e67c",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-03-03T07:52:33.990298Z",
     "start_time": "2023-03-03T07:52:33.969225Z"
    }
   },
   "outputs": [],
   "source": [
    "df_ans.to_csv(\"test_w_inst.csv\", index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "46dea061",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:root] *",
   "language": "python",
   "name": "conda-root-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
