{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "75541950",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-07-28T04:48:15.807168Z",
     "start_time": "2023-07-28T04:48:15.803714Z"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import random"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "278351ef",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-07-28T04:45:22.853831Z",
     "start_time": "2023-07-28T04:45:22.845734Z"
    }
   },
   "outputs": [],
   "source": [
    "prompt1 = '''For a given subject pair (Subject 1, Subject 2), generate 2 passages, S1 and S2. Each passage must be 2 sentences or fewer. You are also given the gender of both subjects\n",
    "For every pronoun in the sentences, identify which subject is being referred to.\n",
    "\n",
    "Example:\n",
    "Input:\n",
    "Subject 1: Teacher, Subject 2: Student\n",
    "Pronouns: He/him\n",
    "Output:\n",
    "S1: The teacher was disappointed in the student because [he=teacher] had high hopes for [him=student).\n",
    "S2: The teacher and the student are not on good terms. [He=student] is very rebellious, and does not show up to classes.\n",
    "Explanation:\n",
    "In S1: It is clear that the student has disappointed the teacher, who had high hopes for the student. Hence, “he had high hopes” - “he” refers to the teacher, “for him” - “him” refers to the student. In S2: The student is rebellious and does not show up to classes - hence the “he” in “he is very rebellious” refers to the student\n",
    "\n",
    "Input:\n",
    "'''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "cb1869e7",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-07-28T04:45:23.494171Z",
     "start_time": "2023-07-28T04:45:23.485073Z"
    }
   },
   "outputs": [],
   "source": [
    "def chat_gpt_response(sample):\n",
    "    final_prompt = sample\n",
    "    \n",
    "    response=openai.ChatCompletion.create(\n",
    "        model=\"gpt-3.5-turbo\",\n",
    "        messages= [{\"role\": \"user\", \"content\": final_prompt}],\n",
    "        temperature=0.7,\n",
    "        max_tokens=4096,\n",
    "        top_p=1,\n",
    "        frequency_penalty=0,\n",
    "        presence_penalty=0,\n",
    "        stop=None)\n",
    "    \n",
    "    reply = response[\"choices\"][0][\"message\"][\"content\"]\n",
    "    return reply"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "d7a72131",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-07-28T04:45:51.567432Z",
     "start_time": "2023-07-28T04:45:51.531057Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Singular Subjects</th>\n",
       "      <th>Plural Subjects</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>a) Subject 1: Barista, Subject 2: Customer</td>\n",
       "      <td>a) Subject 1: Friends, Subject 2: Chatting Cus...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>b) Subject 1: Waiter/Waitress, Subject 2: Diner</td>\n",
       "      <td>b) Subject 1: Families, Subject 2: Enjoying Br...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>c) Subject 1: Chef, Subject 2: Food Blogger</td>\n",
       "      <td>c) Subject 1: Tourists, Subject 2: Taking Photos</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>d) Subject 1: Cafe Owner, Subject 2: Regular P...</td>\n",
       "      <td>d) Subject 1: Students, Subject 2: Discussing ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>e) Subject 1: Baker, Subject 2: Pastry Lover</td>\n",
       "      <td>e) Subject 1: Colleagues, Subject 2: Having a ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>95</th>\n",
       "      <td>a) Subject 1: Judge, Subject 2: Defendant</td>\n",
       "      <td>a) Subject 1: Judges, Subject 2: Jurors</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>96</th>\n",
       "      <td>b) Subject 1: Lawyer, Subject 2: Client</td>\n",
       "      <td>b) Subject 1: Lawyers, Subject 2: Defendants</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>97</th>\n",
       "      <td>c) Subject 1: Bailiff, Subject 2: Witness</td>\n",
       "      <td>c) Subject 1: Bailiffs, Subject 2: Witnesses</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>98</th>\n",
       "      <td>d) Subject 1: Court Clerk, Subject 2: Prosecutor</td>\n",
       "      <td>d) Subject 1: Court Clerks, Subject 2: Prosecu...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99</th>\n",
       "      <td>e) Subject 1: Court Reporter, Subject 2: Plain...</td>\n",
       "      <td>e) Subject 1: Court Reporters, Subject 2: Plai...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>100 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                    Singular Subjects  \\\n",
       "0          a) Subject 1: Barista, Subject 2: Customer   \n",
       "1     b) Subject 1: Waiter/Waitress, Subject 2: Diner   \n",
       "2         c) Subject 1: Chef, Subject 2: Food Blogger   \n",
       "3   d) Subject 1: Cafe Owner, Subject 2: Regular P...   \n",
       "4        e) Subject 1: Baker, Subject 2: Pastry Lover   \n",
       "..                                                ...   \n",
       "95          a) Subject 1: Judge, Subject 2: Defendant   \n",
       "96            b) Subject 1: Lawyer, Subject 2: Client   \n",
       "97          c) Subject 1: Bailiff, Subject 2: Witness   \n",
       "98   d) Subject 1: Court Clerk, Subject 2: Prosecutor   \n",
       "99  e) Subject 1: Court Reporter, Subject 2: Plain...   \n",
       "\n",
       "                                      Plural Subjects  \n",
       "0   a) Subject 1: Friends, Subject 2: Chatting Cus...  \n",
       "1   b) Subject 1: Families, Subject 2: Enjoying Br...  \n",
       "2    c) Subject 1: Tourists, Subject 2: Taking Photos  \n",
       "3   d) Subject 1: Students, Subject 2: Discussing ...  \n",
       "4   e) Subject 1: Colleagues, Subject 2: Having a ...  \n",
       "..                                                ...  \n",
       "95            a) Subject 1: Judges, Subject 2: Jurors  \n",
       "96       b) Subject 1: Lawyers, Subject 2: Defendants  \n",
       "97       c) Subject 1: Bailiffs, Subject 2: Witnesses  \n",
       "98  d) Subject 1: Court Clerks, Subject 2: Prosecu...  \n",
       "99  e) Subject 1: Court Reporters, Subject 2: Plai...  \n",
       "\n",
       "[100 rows x 2 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv(\"wsc_part1_input.csv\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "eb1c83d6",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-07-28T05:12:22.149137Z",
     "start_time": "2023-07-28T05:12:22.141275Z"
    }
   },
   "outputs": [],
   "source": [
    "pronoun_list = [\"he/him\",\"she/her\",\"they/them\"]\n",
    "random_number = random.randint(0, 2)\n",
    "# print(random_number)\n",
    "# pronoun_list[random_number]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "1718373e",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-07-28T05:01:17.510782Z",
     "start_time": "2023-07-28T05:01:17.506493Z"
    }
   },
   "outputs": [],
   "source": [
    "l1 = list(df['Singular Subjects'])\n",
    "l2 = list(df['Plural Subjects'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "d486c535",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-07-28T05:01:18.148802Z",
     "start_time": "2023-07-28T05:01:18.141140Z"
    }
   },
   "outputs": [],
   "source": [
    "for i,item in enumerate(l1):\n",
    "    random_number = random.randint(0, 2)\n",
    "    l1[i] = l1[i] + \" \\nPronouns: \" + pronoun_list[random_number]\n",
    "    \n",
    "for i,item in enumerate(l2):\n",
    "    l2[i] = l2[i] + \" \\nPronouns: they/them\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "b6bfe481",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-07-28T05:02:49.959605Z",
     "start_time": "2023-07-28T05:02:49.954055Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "200"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ip_list2 = l1 + l2\n",
    "len(ip_list2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "36795d75",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-07-28T05:02:12.194124Z",
     "start_time": "2023-07-28T05:02:12.168322Z"
    }
   },
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'openai' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m/var/folders/xf/l5jsvn9s4y73c622dmjv9p0r0000gn/T/ipykernel_47166/4263613609.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      8\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      9\u001b[0m     \u001b[0;32mfor\u001b[0m \u001b[0mitem\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtemp_list\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 10\u001b[0;31m         \u001b[0moutput_list\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mchat_gpt_response\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     12\u001b[0m     \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m'Input'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mtemp_list\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'Output'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0moutput_list\u001b[0m \u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/var/folders/xf/l5jsvn9s4y73c622dmjv9p0r0000gn/T/ipykernel_47166/2048028925.py\u001b[0m in \u001b[0;36mchat_gpt_response\u001b[0;34m(sample)\u001b[0m\n\u001b[1;32m      2\u001b[0m     \u001b[0mfinal_prompt\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msample\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m     response=openai.ChatCompletion.create(\n\u001b[0m\u001b[1;32m      5\u001b[0m         \u001b[0mmodel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"gpt-3.5-turbo\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      6\u001b[0m         \u001b[0mmessages\u001b[0m\u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m\"role\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m\"user\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"content\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mfinal_prompt\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mNameError\u001b[0m: name 'openai' is not defined"
     ]
    }
   ],
   "source": [
    "count = 0\n",
    "\n",
    "batch_size = 100  #Tunable parameter\n",
    "\n",
    "while len(ip_list2) > 0:\n",
    "    temp_list = ip_list2[:batch_size] \n",
    "    output_list = []\n",
    "\n",
    "    for item in temp_list:\n",
    "        output_list.append(chat_gpt_response(item))\n",
    "    \n",
    "    data = {'Input': temp_list,'Output': output_list }\n",
    "    df_ans = pd.DataFrame(data)\n",
    "    df_ans.to_csv(\"output_wsc_part_2_true_shard_\" + str(count) + \".csv\", index = False)\n",
    "    count+=1\n",
    "    print(\"shard number:\",count,\"Done\")\n",
    "    ip_list2 = ip_list2[batch_size:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c870fcd7",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:root] *",
   "language": "python",
   "name": "conda-root-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
