{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "3f347b19",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-07-21T02:30:43.842214Z",
     "start_time": "2023-07-21T02:30:43.121933Z"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import openai\n",
    "import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "8fac28cb",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-07-21T02:30:50.799467Z",
     "start_time": "2023-07-21T02:30:50.792103Z"
    }
   },
   "outputs": [],
   "source": [
    "prompt1 = '''Given a paragraph, frame a question that requires information from multiple sentences of the paragraph to be answered correctly. \n",
    "Then, generate a set of options. The correct answer may be a combination of one, some, or all options. \n",
    "Also include options that do not answer the above question. Finally, output the combination of options that form the correct answer.\n",
    "\n",
    "Paragraph:\n",
    "'''\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "40fce8d8",
   "metadata": {},
   "outputs": [],
   "source": [
    "def chat_gpt_response(sample):\n",
    "    final_prompt = prompt1 + sample\n",
    "    response=openai.ChatCompletion.create(\n",
    "        model=\"gpt-3.5-turbo\",\n",
    "        messages= [{\"role\": \"user\", \"content\": final_prompt}],\n",
    "        temperature=0.7,\n",
    "        max_tokens=2048,\n",
    "        top_p=1,\n",
    "        frequency_penalty=0,\n",
    "        presence_penalty=0,\n",
    "        stop=None)\n",
    "    \n",
    "    reply = response[\"choices\"][0][\"message\"][\"content\"]\n",
    "    return reply"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "8d9919b8",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-07-21T02:33:52.560948Z",
     "start_time": "2023-07-21T02:33:52.495672Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5651"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv(\"multi_rc_passage_list.csv\")\n",
    "ip_list = list(df['passage_list'])\n",
    "ip_list2 = ip_list\n",
    "len(ip_list2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cd38e644",
   "metadata": {},
   "outputs": [],
   "source": [
    "count = 0\n",
    "batch_size = 100  \n",
    "\n",
    "while len(ip_list2) > 0:\n",
    "    temp_list = ip_list2[:batch_size]\n",
    " \n",
    "    output_list = []\n",
    "\n",
    "    for item in temp_list:\n",
    "        output_list.append(chat_gpt_response(item))\n",
    "    \n",
    "    data = {'Input': temp_list,'Output': output_list }\n",
    "    df_ans = pd.DataFrame(data)\n",
    "    df_ans.to_csv(\"output_multi_rc_qa_pair_shard_\" + str(count) + \".csv\")\n",
    "    count+=1\n",
    "    print(\"shard number:\",count,\"Done\")\n",
    "    ip_list2 = ip_list2[batch_size:]"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:root] *",
   "language": "python",
   "name": "conda-root-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
