{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import random\n",
    "import json"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# FigQA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>startphrase</th>\n",
       "      <th>ending1</th>\n",
       "      <th>ending2</th>\n",
       "      <th>labels</th>\n",
       "      <th>valid</th>\n",
       "      <th>qid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Her word had the strength of titanium.</td>\n",
       "      <td>Her promises can be believed.</td>\n",
       "      <td>Her promises cannot be trusted.</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Her word had the strength of a wine glass.</td>\n",
       "      <td>Her promises can be believed.</td>\n",
       "      <td>Her promises cannot be trusted.</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>His kisses have the passion of lovers meeting ...</td>\n",
       "      <td>His kisses are demonstrative and intense.</td>\n",
       "      <td>His kiss is unemotional.</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>His kisses have the passion of a couple in a l...</td>\n",
       "      <td>His kisses are demonstrative and intense.</td>\n",
       "      <td>His kiss is unemotional.</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>This winter is as cold as my mother-in-law tow...</td>\n",
       "      <td>It's very cold</td>\n",
       "      <td>It's pretty warm</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                         startphrase  \\\n",
       "0             Her word had the strength of titanium.   \n",
       "1         Her word had the strength of a wine glass.   \n",
       "2  His kisses have the passion of lovers meeting ...   \n",
       "3  His kisses have the passion of a couple in a l...   \n",
       "4  This winter is as cold as my mother-in-law tow...   \n",
       "\n",
       "                                     ending1                          ending2  \\\n",
       "0              Her promises can be believed.  Her promises cannot be trusted.   \n",
       "1              Her promises can be believed.  Her promises cannot be trusted.   \n",
       "2  His kisses are demonstrative and intense.         His kiss is unemotional.   \n",
       "3  His kisses are demonstrative and intense.         His kiss is unemotional.   \n",
       "4                             It's very cold                 It's pretty warm   \n",
       "\n",
       "   labels  valid qid  \n",
       "0       0      1  10  \n",
       "1       1      1  10  \n",
       "2       0      1  11  \n",
       "3       1      1  11  \n",
       "4       0      1  14  "
      ]
     },
     "execution_count": 95,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "figqa = pd.read_csv(\"./../data/fig_qa.csv\")\n",
    "figqa.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Agreement Disagreement"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [],
   "source": [
    "ans = []\n",
    "ops = []\n",
    "cts = []\n",
    "qid = []\n",
    "for index,row in figqa.iterrows():\n",
    "    ct1 = \"\"\n",
    "    ct1 += \"Speaker_1: \" + row['startphrase'] + \"\\n\"\n",
    "    ct2 = ct1 + \"Speaker_2: \" + row['ending2']\n",
    "    ct1 += \"Speaker_2: \" + row['ending1']\n",
    "    options = ['Speaker_2 agrees with Speaker_1','Speaker_2 disagrees with Speaker_1']\n",
    "    if row['labels']==0:\n",
    "        ans.append(options[0])\n",
    "        ans.append(options[1])\n",
    "    else:\n",
    "        ans.append(options[1])\n",
    "        ans.append(options[0])\n",
    "    ops.extend([options]*2)\n",
    "    cts.extend([ct1,ct2])\n",
    "    qid.extend([row['qid']]*2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>correct answer</th>\n",
       "      <th>options</th>\n",
       "      <th>pretext</th>\n",
       "      <th>qid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Speaker_2 agrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 di...</td>\n",
       "      <td>Speaker_1: Her word had the strength of titani...</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Speaker_2 disagrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 di...</td>\n",
       "      <td>Speaker_1: Her word had the strength of titani...</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Speaker_2 disagrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 di...</td>\n",
       "      <td>Speaker_1: Her word had the strength of a wine...</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Speaker_2 agrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 di...</td>\n",
       "      <td>Speaker_1: Her word had the strength of a wine...</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Speaker_2 agrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 di...</td>\n",
       "      <td>Speaker_1: His kisses have the passion of love...</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                       correct answer  \\\n",
       "0     Speaker_2 agrees with Speaker_1   \n",
       "1  Speaker_2 disagrees with Speaker_1   \n",
       "2  Speaker_2 disagrees with Speaker_1   \n",
       "3     Speaker_2 agrees with Speaker_1   \n",
       "4     Speaker_2 agrees with Speaker_1   \n",
       "\n",
       "                                             options  \\\n",
       "0  [Speaker_2 agrees with Speaker_1, Speaker_2 di...   \n",
       "1  [Speaker_2 agrees with Speaker_1, Speaker_2 di...   \n",
       "2  [Speaker_2 agrees with Speaker_1, Speaker_2 di...   \n",
       "3  [Speaker_2 agrees with Speaker_1, Speaker_2 di...   \n",
       "4  [Speaker_2 agrees with Speaker_1, Speaker_2 di...   \n",
       "\n",
       "                                             pretext qid  \n",
       "0  Speaker_1: Her word had the strength of titani...  10  \n",
       "1  Speaker_1: Her word had the strength of titani...  10  \n",
       "2  Speaker_1: Her word had the strength of a wine...  10  \n",
       "3  Speaker_1: Her word had the strength of a wine...  10  \n",
       "4  Speaker_1: His kisses have the passion of love...  11  "
      ]
     },
     "execution_count": 97,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fig_agree = pd.DataFrame()\n",
    "fig_agree['correct answer'] = ans\n",
    "fig_agree['options'] = ops\n",
    "fig_agree['pretext'] = cts\n",
    "fig_agree['qid'] = qid\n",
    "fig_agree.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('Speaker_1: Her word had the strength of titanium.\\nSpeaker_2: Her promises can be believed.',\n",
       " ['Speaker_2 agrees with Speaker_1', 'Speaker_2 disagrees with Speaker_1'],\n",
       " 'Speaker_2 agrees with Speaker_1')"
      ]
     },
     "execution_count": 98,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cts[0],ops[0],ans[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [],
   "source": [
    "narrowed_df = pd.DataFrame()\n",
    "sampled_indices = []\n",
    "for cat in fig_agree['correct answer'].unique():\n",
    "    cat_df = fig_agree[fig_agree['correct answer'] == cat]\n",
    "    sampled_cat_df = cat_df.sample(n = 1000, random_state=42)\n",
    "    narrowed_df = pd.concat([narrowed_df,sampled_cat_df])\n",
    "    sampled_indices.extend(sampled_cat_df.index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "931"
      ]
     },
     "execution_count": 100,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(set(narrowed_df['qid']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2000"
      ]
     },
     "execution_count": 101,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(narrowed_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>correct answer</th>\n",
       "      <th>options</th>\n",
       "      <th>pretext</th>\n",
       "      <th>qid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>14024</th>\n",
       "      <td>Speaker_2 agrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 di...</td>\n",
       "      <td>Speaker_1: The argument was as logical as a ra...</td>\n",
       "      <td>X2777</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>636</th>\n",
       "      <td>Speaker_2 agrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 di...</td>\n",
       "      <td>Speaker_1: The defense's argument had the simp...</td>\n",
       "      <td>392</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5379</th>\n",
       "      <td>Speaker_2 agrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 di...</td>\n",
       "      <td>Speaker_1: My legs are Tree stalks\\nSpeaker_2:...</td>\n",
       "      <td>X615</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12423</th>\n",
       "      <td>Speaker_2 agrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 di...</td>\n",
       "      <td>Speaker_1: The watermelon was as sweet as a ch...</td>\n",
       "      <td>X2376</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13179</th>\n",
       "      <td>Speaker_2 agrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 di...</td>\n",
       "      <td>Speaker_1: They felt the emotion as a warm bat...</td>\n",
       "      <td>X2565</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6820</th>\n",
       "      <td>Speaker_2 disagrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 di...</td>\n",
       "      <td>Speaker_1: The woman was as explosive as a fat...</td>\n",
       "      <td>X976</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11789</th>\n",
       "      <td>Speaker_2 disagrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 di...</td>\n",
       "      <td>Speaker_1: She is worth 10 XP\\nSpeaker_2: She ...</td>\n",
       "      <td>X2218</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4490</th>\n",
       "      <td>Speaker_2 disagrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 di...</td>\n",
       "      <td>Speaker_1: This razor is sharp like a pillow\\n...</td>\n",
       "      <td>X393</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6950</th>\n",
       "      <td>Speaker_2 disagrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 di...</td>\n",
       "      <td>Speaker_1: my mustache is a sphynx cat\\nSpeake...</td>\n",
       "      <td>X1008</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13678</th>\n",
       "      <td>Speaker_2 disagrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 di...</td>\n",
       "      <td>Speaker_1: The sun is happy\\nSpeaker_2: The we...</td>\n",
       "      <td>X2690</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2000 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                           correct answer  \\\n",
       "14024     Speaker_2 agrees with Speaker_1   \n",
       "636       Speaker_2 agrees with Speaker_1   \n",
       "5379      Speaker_2 agrees with Speaker_1   \n",
       "12423     Speaker_2 agrees with Speaker_1   \n",
       "13179     Speaker_2 agrees with Speaker_1   \n",
       "...                                   ...   \n",
       "6820   Speaker_2 disagrees with Speaker_1   \n",
       "11789  Speaker_2 disagrees with Speaker_1   \n",
       "4490   Speaker_2 disagrees with Speaker_1   \n",
       "6950   Speaker_2 disagrees with Speaker_1   \n",
       "13678  Speaker_2 disagrees with Speaker_1   \n",
       "\n",
       "                                                 options  \\\n",
       "14024  [Speaker_2 agrees with Speaker_1, Speaker_2 di...   \n",
       "636    [Speaker_2 agrees with Speaker_1, Speaker_2 di...   \n",
       "5379   [Speaker_2 agrees with Speaker_1, Speaker_2 di...   \n",
       "12423  [Speaker_2 agrees with Speaker_1, Speaker_2 di...   \n",
       "13179  [Speaker_2 agrees with Speaker_1, Speaker_2 di...   \n",
       "...                                                  ...   \n",
       "6820   [Speaker_2 agrees with Speaker_1, Speaker_2 di...   \n",
       "11789  [Speaker_2 agrees with Speaker_1, Speaker_2 di...   \n",
       "4490   [Speaker_2 agrees with Speaker_1, Speaker_2 di...   \n",
       "6950   [Speaker_2 agrees with Speaker_1, Speaker_2 di...   \n",
       "13678  [Speaker_2 agrees with Speaker_1, Speaker_2 di...   \n",
       "\n",
       "                                                 pretext    qid  \n",
       "14024  Speaker_1: The argument was as logical as a ra...  X2777  \n",
       "636    Speaker_1: The defense's argument had the simp...    392  \n",
       "5379   Speaker_1: My legs are Tree stalks\\nSpeaker_2:...   X615  \n",
       "12423  Speaker_1: The watermelon was as sweet as a ch...  X2376  \n",
       "13179  Speaker_1: They felt the emotion as a warm bat...  X2565  \n",
       "...                                                  ...    ...  \n",
       "6820   Speaker_1: The woman was as explosive as a fat...   X976  \n",
       "11789  Speaker_1: She is worth 10 XP\\nSpeaker_2: She ...  X2218  \n",
       "4490   Speaker_1: This razor is sharp like a pillow\\n...   X393  \n",
       "6950   Speaker_1: my mustache is a sphynx cat\\nSpeake...  X1008  \n",
       "13678  Speaker_1: The sun is happy\\nSpeaker_2: The we...  X2690  \n",
       "\n",
       "[2000 rows x 4 columns]"
      ]
     },
     "execution_count": 102,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "narrowed_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [],
   "source": [
    "shuffled_df = narrowed_df.sample(frac=1, random_state=42)  # Set random_state for reproducibility\n",
    "shuffled_df.reset_index(drop=True, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>correct answer</th>\n",
       "      <th>options</th>\n",
       "      <th>pretext</th>\n",
       "      <th>qid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Speaker_2 disagrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 di...</td>\n",
       "      <td>Speaker_1: The chair was comfortable like a pi...</td>\n",
       "      <td>X1607</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Speaker_2 agrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 di...</td>\n",
       "      <td>Speaker_1: the man's idea had the creativity o...</td>\n",
       "      <td>X3149</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Speaker_2 disagrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 di...</td>\n",
       "      <td>Speaker_1: The girl's taste was as rich as cha...</td>\n",
       "      <td>1367</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Speaker_2 agrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 di...</td>\n",
       "      <td>Speaker_1: The student has the insight of a go...</td>\n",
       "      <td>873</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Speaker_2 disagrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 di...</td>\n",
       "      <td>Speaker_1: Her eyes are real intoxication.\\nSp...</td>\n",
       "      <td>X700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1995</th>\n",
       "      <td>Speaker_2 disagrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 di...</td>\n",
       "      <td>Speaker_1: The shirt was as bright as a dark c...</td>\n",
       "      <td>X2046</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1996</th>\n",
       "      <td>Speaker_2 disagrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 di...</td>\n",
       "      <td>Speaker_1: I have the face of a model\\nSpeaker...</td>\n",
       "      <td>X363</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1997</th>\n",
       "      <td>Speaker_2 agrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 di...</td>\n",
       "      <td>Speaker_1: The chair was comfortable like a pi...</td>\n",
       "      <td>X1607</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1998</th>\n",
       "      <td>Speaker_2 disagrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 di...</td>\n",
       "      <td>Speaker_1: The lizard was as ugly as a dead sq...</td>\n",
       "      <td>X2571</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1999</th>\n",
       "      <td>Speaker_2 disagrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 di...</td>\n",
       "      <td>Speaker_1: The cracker was as crunchy as Bitin...</td>\n",
       "      <td>X2733</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2000 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                          correct answer  \\\n",
       "0     Speaker_2 disagrees with Speaker_1   \n",
       "1        Speaker_2 agrees with Speaker_1   \n",
       "2     Speaker_2 disagrees with Speaker_1   \n",
       "3        Speaker_2 agrees with Speaker_1   \n",
       "4     Speaker_2 disagrees with Speaker_1   \n",
       "...                                  ...   \n",
       "1995  Speaker_2 disagrees with Speaker_1   \n",
       "1996  Speaker_2 disagrees with Speaker_1   \n",
       "1997     Speaker_2 agrees with Speaker_1   \n",
       "1998  Speaker_2 disagrees with Speaker_1   \n",
       "1999  Speaker_2 disagrees with Speaker_1   \n",
       "\n",
       "                                                options  \\\n",
       "0     [Speaker_2 agrees with Speaker_1, Speaker_2 di...   \n",
       "1     [Speaker_2 agrees with Speaker_1, Speaker_2 di...   \n",
       "2     [Speaker_2 agrees with Speaker_1, Speaker_2 di...   \n",
       "3     [Speaker_2 agrees with Speaker_1, Speaker_2 di...   \n",
       "4     [Speaker_2 agrees with Speaker_1, Speaker_2 di...   \n",
       "...                                                 ...   \n",
       "1995  [Speaker_2 agrees with Speaker_1, Speaker_2 di...   \n",
       "1996  [Speaker_2 agrees with Speaker_1, Speaker_2 di...   \n",
       "1997  [Speaker_2 agrees with Speaker_1, Speaker_2 di...   \n",
       "1998  [Speaker_2 agrees with Speaker_1, Speaker_2 di...   \n",
       "1999  [Speaker_2 agrees with Speaker_1, Speaker_2 di...   \n",
       "\n",
       "                                                pretext    qid  \n",
       "0     Speaker_1: The chair was comfortable like a pi...  X1607  \n",
       "1     Speaker_1: the man's idea had the creativity o...  X3149  \n",
       "2     Speaker_1: The girl's taste was as rich as cha...   1367  \n",
       "3     Speaker_1: The student has the insight of a go...    873  \n",
       "4     Speaker_1: Her eyes are real intoxication.\\nSp...   X700  \n",
       "...                                                 ...    ...  \n",
       "1995  Speaker_1: The shirt was as bright as a dark c...  X2046  \n",
       "1996  Speaker_1: I have the face of a model\\nSpeaker...   X363  \n",
       "1997  Speaker_1: The chair was comfortable like a pi...  X1607  \n",
       "1998  Speaker_1: The lizard was as ugly as a dead sq...  X2571  \n",
       "1999  Speaker_1: The cracker was as crunchy as Bitin...  X2733  \n",
       "\n",
       "[2000 rows x 4 columns]"
      ]
     },
     "execution_count": 104,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "shuffled_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [],
   "source": [
    "shuffled_df.to_csv(\"./../global_datasets/task_4.csv\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Sarcasm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [],
   "source": [
    "addon = ['Yeah, ', 'True, ', 'Of course, ', 'Yes, ']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {},
   "outputs": [],
   "source": [
    "ans = []\n",
    "ops = []\n",
    "cts = []\n",
    "qid = []\n",
    "for index,row in figqa.iterrows():\n",
    "    ct1 = \"Speaker_1: \" + row['ending1'] + \"\\n\"\n",
    "    ct2 = \"Speaker_1: \" + row['ending2'] + \"\\n\"\n",
    "    ct1 += \"Speaker_2: \" + random.choice(addon) + row['startphrase']\n",
    "    ct2 += \"Speaker_2: \" + random.choice(addon) + row['startphrase']\n",
    "    options = ['Speaker_2 agrees with Speaker_1','Speaker_2 is being sarcastic with Speaker_1']\n",
    "    if row['labels']==0:\n",
    "        ans.append(options[0])\n",
    "        ans.append(options[1])\n",
    "    else:\n",
    "        ans.append(options[1])\n",
    "        ans.append(options[0])\n",
    "    ops.extend([options]*2)\n",
    "    cts.extend([ct1,ct2])\n",
    "    qid.extend([row['qid']]*2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>correct answer</th>\n",
       "      <th>options</th>\n",
       "      <th>pretext</th>\n",
       "      <th>qid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Speaker_2 agrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 is...</td>\n",
       "      <td>Speaker_1: Her promises can be believed.\\nSpea...</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Speaker_2 is being sarcastic with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 is...</td>\n",
       "      <td>Speaker_1: Her promises cannot be trusted.\\nSp...</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Speaker_2 is being sarcastic with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 is...</td>\n",
       "      <td>Speaker_1: Her promises can be believed.\\nSpea...</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Speaker_2 agrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 is...</td>\n",
       "      <td>Speaker_1: Her promises cannot be trusted.\\nSp...</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Speaker_2 agrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 is...</td>\n",
       "      <td>Speaker_1: His kisses are demonstrative and in...</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                correct answer  \\\n",
       "0              Speaker_2 agrees with Speaker_1   \n",
       "1  Speaker_2 is being sarcastic with Speaker_1   \n",
       "2  Speaker_2 is being sarcastic with Speaker_1   \n",
       "3              Speaker_2 agrees with Speaker_1   \n",
       "4              Speaker_2 agrees with Speaker_1   \n",
       "\n",
       "                                             options  \\\n",
       "0  [Speaker_2 agrees with Speaker_1, Speaker_2 is...   \n",
       "1  [Speaker_2 agrees with Speaker_1, Speaker_2 is...   \n",
       "2  [Speaker_2 agrees with Speaker_1, Speaker_2 is...   \n",
       "3  [Speaker_2 agrees with Speaker_1, Speaker_2 is...   \n",
       "4  [Speaker_2 agrees with Speaker_1, Speaker_2 is...   \n",
       "\n",
       "                                             pretext qid  \n",
       "0  Speaker_1: Her promises can be believed.\\nSpea...  10  \n",
       "1  Speaker_1: Her promises cannot be trusted.\\nSp...  10  \n",
       "2  Speaker_1: Her promises can be believed.\\nSpea...  10  \n",
       "3  Speaker_1: Her promises cannot be trusted.\\nSp...  10  \n",
       "4  Speaker_1: His kisses are demonstrative and in...  11  "
      ]
     },
     "execution_count": 108,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fig_sar = pd.DataFrame()\n",
    "fig_sar['correct answer'] = ans\n",
    "fig_sar['options'] = ops\n",
    "fig_sar['pretext'] = cts\n",
    "fig_sar['qid'] = qid\n",
    "fig_sar.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Speaker_1: Money is good\n",
      "Speaker_2: Yes, Money is a murderer\n",
      "['Speaker_2 agrees with Speaker_1', 'Speaker_2 is being sarcastic with Speaker_1']\n",
      "Speaker_2 is being sarcastic with Speaker_1\n"
     ]
    }
   ],
   "source": [
    "j = 38\n",
    "print(cts[j])\n",
    "print(ops[j])\n",
    "print(ans[j])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "metadata": {},
   "outputs": [],
   "source": [
    "narrowed_df = pd.DataFrame()\n",
    "sampled_indices = []\n",
    "for cat in fig_sar['correct answer'].unique():\n",
    "    cat_df = fig_sar[fig_sar['correct answer'] == cat]\n",
    "    sampled_cat_df = cat_df.sample(n = 1000, random_state=42)\n",
    "    narrowed_df = pd.concat([narrowed_df,sampled_cat_df])\n",
    "    sampled_indices.extend(sampled_cat_df.index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "931"
      ]
     },
     "execution_count": 111,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(set(narrowed_df['qid']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2000"
      ]
     },
     "execution_count": 112,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(narrowed_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>correct answer</th>\n",
       "      <th>options</th>\n",
       "      <th>pretext</th>\n",
       "      <th>qid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>14024</th>\n",
       "      <td>Speaker_2 agrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 is...</td>\n",
       "      <td>Speaker_1: The argument was not logical\\nSpeak...</td>\n",
       "      <td>X2777</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>636</th>\n",
       "      <td>Speaker_2 agrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 is...</td>\n",
       "      <td>Speaker_1: The argument was complicated.\\nSpea...</td>\n",
       "      <td>392</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5379</th>\n",
       "      <td>Speaker_2 agrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 is...</td>\n",
       "      <td>Speaker_1: My legs are thick and large\\nSpeake...</td>\n",
       "      <td>X615</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12423</th>\n",
       "      <td>Speaker_2 agrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 is...</td>\n",
       "      <td>Speaker_1: The watermelon was sweet\\nSpeaker_2...</td>\n",
       "      <td>X2376</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13179</th>\n",
       "      <td>Speaker_2 agrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 is...</td>\n",
       "      <td>Speaker_1: The emotion felt gentle and pleasan...</td>\n",
       "      <td>X2565</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6820</th>\n",
       "      <td>Speaker_2 is being sarcastic with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 is...</td>\n",
       "      <td>Speaker_1: The woman was very explosive.\\nSpea...</td>\n",
       "      <td>X976</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11789</th>\n",
       "      <td>Speaker_2 is being sarcastic with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 is...</td>\n",
       "      <td>Speaker_1: She is experienced.\\nSpeaker_2: Yes...</td>\n",
       "      <td>X2218</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4490</th>\n",
       "      <td>Speaker_2 is being sarcastic with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 is...</td>\n",
       "      <td>Speaker_1: This razor is very sharp\\nSpeaker_2...</td>\n",
       "      <td>X393</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6950</th>\n",
       "      <td>Speaker_2 is being sarcastic with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 is...</td>\n",
       "      <td>Speaker_1: my mustache is bushy and long\\nSpea...</td>\n",
       "      <td>X1008</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13678</th>\n",
       "      <td>Speaker_2 is being sarcastic with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 is...</td>\n",
       "      <td>Speaker_1: The weather is really hot!\\nSpeaker...</td>\n",
       "      <td>X2690</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2000 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                    correct answer  \\\n",
       "14024              Speaker_2 agrees with Speaker_1   \n",
       "636                Speaker_2 agrees with Speaker_1   \n",
       "5379               Speaker_2 agrees with Speaker_1   \n",
       "12423              Speaker_2 agrees with Speaker_1   \n",
       "13179              Speaker_2 agrees with Speaker_1   \n",
       "...                                            ...   \n",
       "6820   Speaker_2 is being sarcastic with Speaker_1   \n",
       "11789  Speaker_2 is being sarcastic with Speaker_1   \n",
       "4490   Speaker_2 is being sarcastic with Speaker_1   \n",
       "6950   Speaker_2 is being sarcastic with Speaker_1   \n",
       "13678  Speaker_2 is being sarcastic with Speaker_1   \n",
       "\n",
       "                                                 options  \\\n",
       "14024  [Speaker_2 agrees with Speaker_1, Speaker_2 is...   \n",
       "636    [Speaker_2 agrees with Speaker_1, Speaker_2 is...   \n",
       "5379   [Speaker_2 agrees with Speaker_1, Speaker_2 is...   \n",
       "12423  [Speaker_2 agrees with Speaker_1, Speaker_2 is...   \n",
       "13179  [Speaker_2 agrees with Speaker_1, Speaker_2 is...   \n",
       "...                                                  ...   \n",
       "6820   [Speaker_2 agrees with Speaker_1, Speaker_2 is...   \n",
       "11789  [Speaker_2 agrees with Speaker_1, Speaker_2 is...   \n",
       "4490   [Speaker_2 agrees with Speaker_1, Speaker_2 is...   \n",
       "6950   [Speaker_2 agrees with Speaker_1, Speaker_2 is...   \n",
       "13678  [Speaker_2 agrees with Speaker_1, Speaker_2 is...   \n",
       "\n",
       "                                                 pretext    qid  \n",
       "14024  Speaker_1: The argument was not logical\\nSpeak...  X2777  \n",
       "636    Speaker_1: The argument was complicated.\\nSpea...    392  \n",
       "5379   Speaker_1: My legs are thick and large\\nSpeake...   X615  \n",
       "12423  Speaker_1: The watermelon was sweet\\nSpeaker_2...  X2376  \n",
       "13179  Speaker_1: The emotion felt gentle and pleasan...  X2565  \n",
       "...                                                  ...    ...  \n",
       "6820   Speaker_1: The woman was very explosive.\\nSpea...   X976  \n",
       "11789  Speaker_1: She is experienced.\\nSpeaker_2: Yes...  X2218  \n",
       "4490   Speaker_1: This razor is very sharp\\nSpeaker_2...   X393  \n",
       "6950   Speaker_1: my mustache is bushy and long\\nSpea...  X1008  \n",
       "13678  Speaker_1: The weather is really hot!\\nSpeaker...  X2690  \n",
       "\n",
       "[2000 rows x 4 columns]"
      ]
     },
     "execution_count": 113,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "narrowed_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {},
   "outputs": [],
   "source": [
    "shuffled_df = narrowed_df.sample(frac=1, random_state=42)  # Set random_state for reproducibility\n",
    "shuffled_df.reset_index(drop=True, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>correct answer</th>\n",
       "      <th>options</th>\n",
       "      <th>pretext</th>\n",
       "      <th>qid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Speaker_2 is being sarcastic with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 is...</td>\n",
       "      <td>Speaker_1: The chair was uncomfortable.\\nSpeak...</td>\n",
       "      <td>X1607</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Speaker_2 agrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 is...</td>\n",
       "      <td>Speaker_1: The idea was very creative\\nSpeaker...</td>\n",
       "      <td>X3149</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Speaker_2 is being sarcastic with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 is...</td>\n",
       "      <td>Speaker_1: The girl had cheap and tacky taste....</td>\n",
       "      <td>1367</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Speaker_2 agrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 is...</td>\n",
       "      <td>Speaker_1: The student is unwise\\nSpeaker_2: O...</td>\n",
       "      <td>873</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Speaker_2 is being sarcastic with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 is...</td>\n",
       "      <td>Speaker_1: Her eyes are ugly.\\nSpeaker_2: True...</td>\n",
       "      <td>X700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1995</th>\n",
       "      <td>Speaker_2 is being sarcastic with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 is...</td>\n",
       "      <td>Speaker_1: The shirt was bright\\nSpeaker_2: Of...</td>\n",
       "      <td>X2046</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1996</th>\n",
       "      <td>Speaker_2 is being sarcastic with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 is...</td>\n",
       "      <td>Speaker_1: My face is hideous\\nSpeaker_2: True...</td>\n",
       "      <td>X363</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1997</th>\n",
       "      <td>Speaker_2 agrees with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 is...</td>\n",
       "      <td>Speaker_1: The chair was very comfortable.\\nSp...</td>\n",
       "      <td>X1607</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1998</th>\n",
       "      <td>Speaker_2 is being sarcastic with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 is...</td>\n",
       "      <td>Speaker_1: it was cute\\nSpeaker_2: True, The l...</td>\n",
       "      <td>X2571</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1999</th>\n",
       "      <td>Speaker_2 is being sarcastic with Speaker_1</td>\n",
       "      <td>[Speaker_2 agrees with Speaker_1, Speaker_2 is...</td>\n",
       "      <td>Speaker_1: The cracker made loud crunching noi...</td>\n",
       "      <td>X2733</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2000 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                   correct answer  \\\n",
       "0     Speaker_2 is being sarcastic with Speaker_1   \n",
       "1                 Speaker_2 agrees with Speaker_1   \n",
       "2     Speaker_2 is being sarcastic with Speaker_1   \n",
       "3                 Speaker_2 agrees with Speaker_1   \n",
       "4     Speaker_2 is being sarcastic with Speaker_1   \n",
       "...                                           ...   \n",
       "1995  Speaker_2 is being sarcastic with Speaker_1   \n",
       "1996  Speaker_2 is being sarcastic with Speaker_1   \n",
       "1997              Speaker_2 agrees with Speaker_1   \n",
       "1998  Speaker_2 is being sarcastic with Speaker_1   \n",
       "1999  Speaker_2 is being sarcastic with Speaker_1   \n",
       "\n",
       "                                                options  \\\n",
       "0     [Speaker_2 agrees with Speaker_1, Speaker_2 is...   \n",
       "1     [Speaker_2 agrees with Speaker_1, Speaker_2 is...   \n",
       "2     [Speaker_2 agrees with Speaker_1, Speaker_2 is...   \n",
       "3     [Speaker_2 agrees with Speaker_1, Speaker_2 is...   \n",
       "4     [Speaker_2 agrees with Speaker_1, Speaker_2 is...   \n",
       "...                                                 ...   \n",
       "1995  [Speaker_2 agrees with Speaker_1, Speaker_2 is...   \n",
       "1996  [Speaker_2 agrees with Speaker_1, Speaker_2 is...   \n",
       "1997  [Speaker_2 agrees with Speaker_1, Speaker_2 is...   \n",
       "1998  [Speaker_2 agrees with Speaker_1, Speaker_2 is...   \n",
       "1999  [Speaker_2 agrees with Speaker_1, Speaker_2 is...   \n",
       "\n",
       "                                                pretext    qid  \n",
       "0     Speaker_1: The chair was uncomfortable.\\nSpeak...  X1607  \n",
       "1     Speaker_1: The idea was very creative\\nSpeaker...  X3149  \n",
       "2     Speaker_1: The girl had cheap and tacky taste....   1367  \n",
       "3     Speaker_1: The student is unwise\\nSpeaker_2: O...    873  \n",
       "4     Speaker_1: Her eyes are ugly.\\nSpeaker_2: True...   X700  \n",
       "...                                                 ...    ...  \n",
       "1995  Speaker_1: The shirt was bright\\nSpeaker_2: Of...  X2046  \n",
       "1996  Speaker_1: My face is hideous\\nSpeaker_2: True...   X363  \n",
       "1997  Speaker_1: The chair was very comfortable.\\nSp...  X1607  \n",
       "1998  Speaker_1: it was cute\\nSpeaker_2: True, The l...  X2571  \n",
       "1999  Speaker_1: The cracker made loud crunching noi...  X2733  \n",
       "\n",
       "[2000 rows x 4 columns]"
      ]
     },
     "execution_count": 115,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "shuffled_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [],
   "source": [
    "shuffled_df.to_csv(\"./../global_datasets/task_5.csv\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Flute"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--2023-09-11 21:44:05--  https://raw.githubusercontent.com/tuhinjubcse/model-in-the-loop-fig-lang/main/idiomNLI/idiom_train.jsonl\n",
      "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n",
      "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n",
      "HTTP request sent, awaiting response... 200 OK\n",
      "Length: 746690 (729K) [text/plain]\n",
      "Saving to: ‘idiom_train.jsonl’\n",
      "\n",
      "idiom_train.jsonl   100%[===================>] 729.19K  2.91MB/s    in 0.2s    \n",
      "\n",
      "2023-09-11 21:44:05 (2.91 MB/s) - ‘idiom_train.jsonl’ saved [746690/746690]\n",
      "\n",
      "--2023-09-11 21:44:06--  https://raw.githubusercontent.com/tuhinjubcse/model-in-the-loop-fig-lang/main/metaphorNLI/metaphor_train.jsonl\n",
      "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.110.133, 185.199.109.133, ...\n",
      "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.\n",
      "HTTP request sent, awaiting response... 200 OK\n",
      "Length: 381917 (373K) [text/plain]\n",
      "Saving to: ‘metaphor_train.jsonl’\n",
      "\n",
      "metaphor_train.json 100%[===================>] 372.97K  2.22MB/s    in 0.2s    \n",
      "\n",
      "2023-09-11 21:44:06 (2.22 MB/s) - ‘metaphor_train.jsonl’ saved [381917/381917]\n",
      "\n",
      "--2023-09-11 21:44:06--  https://raw.githubusercontent.com/tuhinjubcse/model-in-the-loop-fig-lang/main/SarcasmNLI/sarcasm_train.jsonl\n",
      "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.110.133, 185.199.109.133, ...\n",
      "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.\n",
      "HTTP request sent, awaiting response... 200 OK\n",
      "Length: 1358730 (1.3M) [text/plain]\n",
      "Saving to: ‘sarcasm_train.jsonl’\n",
      "\n",
      "sarcasm_train.jsonl 100%[===================>]   1.29M  4.69MB/s    in 0.3s    \n",
      "\n",
      "2023-09-11 21:44:07 (4.69 MB/s) - ‘sarcasm_train.jsonl’ saved [1358730/1358730]\n",
      "\n",
      "--2023-09-11 21:44:07--  https://raw.githubusercontent.com/tuhinjubcse/model-in-the-loop-fig-lang/main/simileNLI/simile_train.jsonl\n",
      "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.110.133, 185.199.109.133, ...\n",
      "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.\n",
      "HTTP request sent, awaiting response... 200 OK\n",
      "Length: 402910 (393K) [text/plain]\n",
      "Saving to: ‘simile_train.jsonl’\n",
      "\n",
      "simile_train.jsonl  100%[===================>] 393.47K  --.-KB/s    in 0.06s   \n",
      "\n",
      "2023-09-11 21:44:07 (6.83 MB/s) - ‘simile_train.jsonl’ saved [402910/402910]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "!wget https://raw.githubusercontent.com/tuhinjubcse/model-in-the-loop-fig-lang/main/idiomNLI/idiom_train.jsonl\n",
    "!wget https://raw.githubusercontent.com/tuhinjubcse/model-in-the-loop-fig-lang/main/metaphorNLI/metaphor_train.jsonl\n",
    "!wget https://raw.githubusercontent.com/tuhinjubcse/model-in-the-loop-fig-lang/main/SarcasmNLI/sarcasm_train.jsonl\n",
    "!wget https://raw.githubusercontent.com/tuhinjubcse/model-in-the-loop-fig-lang/main/simileNLI/simile_train.jsonl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "metadata": {},
   "outputs": [],
   "source": [
    "flute = pd.DataFrame()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "metadata": {},
   "outputs": [],
   "source": [
    "fpath = \"./idiom_train.jsonl\"\n",
    "data_list = {}\n",
    "with open(fpath, \"r\") as json_file:\n",
    "    for line in json_file:\n",
    "        data = json.loads(line)\n",
    "        if data['id'] not in list(data_list.keys()):\n",
    "            data_list[data['id']] = {\n",
    "                'hypothesis' : data['hypothesis'],\n",
    "                'meaning1' : data['premise'],\n",
    "                'done' : 'no'\n",
    "            }\n",
    "            if data['label']=='Entailment':\n",
    "                data_list[data['id']]['correct_answer'] = 'meaning1'\n",
    "                data_list[data['id']]['entailed_exp'] = data['explanation']\n",
    "            else:\n",
    "                data_list[data['id']]['correct_answer'] = 'meaning2'\n",
    "                data_list[data['id']]['contradictory_exp'] = data['explanation']\n",
    "        else:\n",
    "            if data_list[data['id']]['correct_answer'] == 'meaning1':\n",
    "                if data['label'] == 'Contradiction':\n",
    "                    data_list[data['id']]['meaning2'] = data['premise']\n",
    "                    data_list[data['id']]['contradictory_exp'] = data['explanation']\n",
    "                    data_list[data['id']]['done'] = 'yes'\n",
    "            else:\n",
    "                if data['label'] == 'Entailment':\n",
    "                    data_list[data['id']]['meaning1'] = data['premise']\n",
    "                    data_list[data['id']]['entailed_exp'] = data['explanation']\n",
    "                    data_list[data['id']]['done'] = 'yes'\n",
    "df = pd.DataFrame.from_dict(data_list, orient='index')\n",
    "flute = pd.concat([flute,df])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "metadata": {},
   "outputs": [],
   "source": [
    "fpath = \"./metaphor_train.jsonl\"\n",
    "data_list = {}\n",
    "with open(fpath, \"r\") as json_file:\n",
    "    for line in json_file:\n",
    "        data = json.loads(line)\n",
    "        if data['id'] not in list(data_list.keys()):\n",
    "            data_list[data['id']] = {\n",
    "                'hypothesis' : data['hypothesis'],\n",
    "                'meaning1' : data['premise'],\n",
    "                'done' : 'no'\n",
    "            }\n",
    "            if data['label']=='Entailment':\n",
    "                data_list[data['id']]['correct_answer'] = 'meaning1'\n",
    "                data_list[data['id']]['entailed_exp'] = data['explanation']\n",
    "            else:\n",
    "                data_list[data['id']]['correct_answer'] = 'meaning2'\n",
    "                data_list[data['id']]['contradictory_exp'] = data['explanation']\n",
    "        else:\n",
    "            if data_list[data['id']]['correct_answer'] == 'meaning1':\n",
    "                if data['label'] == 'Contradiction':\n",
    "                    data_list[data['id']]['meaning2'] = data['premise']\n",
    "                    data_list[data['id']]['contradictory_exp'] = data['explanation']\n",
    "                    data_list[data['id']]['done'] = 'yes'\n",
    "            else:\n",
    "                if data['label'] == 'Entailment':\n",
    "                    data_list[data['id']]['meaning1'] = data['premise']\n",
    "                    data_list[data['id']]['entailed_exp'] = data['explanation']\n",
    "                    data_list[data['id']]['done'] = 'yes'\n",
    "df = pd.DataFrame.from_dict(data_list, orient='index')\n",
    "flute = pd.concat([flute,df])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "metadata": {},
   "outputs": [],
   "source": [
    "fpath = \"./sarcasm_train.jsonl\"\n",
    "data_list = {}\n",
    "with open(fpath, \"r\") as json_file:\n",
    "    for line in json_file:\n",
    "        data = json.loads(line)\n",
    "        if data['id'] not in list(data_list.keys()):\n",
    "            data_list[data['id']] = {\n",
    "                'hypothesis' : data['hypothesis'],\n",
    "                'meaning1' : data['premise'],\n",
    "                'done' : 'no'\n",
    "            }\n",
    "            if data['label']=='Entailment':\n",
    "                data_list[data['id']]['correct_answer'] = 'meaning1'\n",
    "                data_list[data['id']]['entailed_exp'] = data['explanation']\n",
    "            else:\n",
    "                data_list[data['id']]['correct_answer'] = 'meaning2'\n",
    "                data_list[data['id']]['contradictory_exp'] = data['explanation']\n",
    "        else:\n",
    "            if data_list[data['id']]['correct_answer'] == 'meaning1':\n",
    "                if data['label'] == 'Contradiction':\n",
    "                    data_list[data['id']]['meaning2'] = data['premise']\n",
    "                    data_list[data['id']]['contradictory_exp'] = data['explanation']\n",
    "                    data_list[data['id']]['done'] = 'yes'\n",
    "            else:\n",
    "                if data['label'] == 'Entailment':\n",
    "                    data_list[data['id']]['meaning1'] = data['premise']\n",
    "                    data_list[data['id']]['entailed_exp'] = data['explanation']\n",
    "                    data_list[data['id']]['done'] = 'yes'\n",
    "df = pd.DataFrame.from_dict(data_list, orient='index')\n",
    "flute = pd.concat([flute,df])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "metadata": {},
   "outputs": [],
   "source": [
    "fpath = \"./simile_train.jsonl\"\n",
    "data_list = {}\n",
    "with open(fpath, \"r\") as json_file:\n",
    "    for line in json_file:\n",
    "        data = json.loads(line)\n",
    "        if data['id'] not in list(data_list.keys()):\n",
    "            data_list[data['id']] = {\n",
    "                'hypothesis' : data['hypothesis'],\n",
    "                'meaning1' : data['premise'],\n",
    "                'done' : 'no'\n",
    "            }\n",
    "            if data['label']=='Entailment':\n",
    "                data_list[data['id']]['correct_answer'] = 'meaning1'\n",
    "                data_list[data['id']]['entailed_exp'] = data['explanation']\n",
    "            else:\n",
    "                data_list[data['id']]['correct_answer'] = 'meaning2'\n",
    "                data_list[data['id']]['contradictory_exp'] = data['explanation']\n",
    "        else: \n",
    "            if data_list[data['id']]['correct_answer'] == 'meaning1':\n",
    "                if data['label'] == 'Contradiction':\n",
    "                    data_list[data['id']]['meaning2'] = data['premise']\n",
    "                    data_list[data['id']]['contradictory_exp'] = data['explanation']\n",
    "                    data_list[data['id']]['done'] = 'yes'\n",
    "            else:\n",
    "                if data['label'] == 'Entailment':\n",
    "                    data_list[data['id']]['meaning1'] = data['premise']\n",
    "                    data_list[data['id']]['entailed_exp'] = data['explanation']\n",
    "                    data_list[data['id']]['done'] = 'yes'\n",
    "df = pd.DataFrame.from_dict(data_list, orient='index')\n",
    "flute = pd.concat([flute,df])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "correct_answer\n",
       "meaning1    625\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 130,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['correct_answer'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3388"
      ]
     },
     "execution_count": 131,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(flute)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "correct_answer\n",
       "meaning1    3163\n",
       "meaning2     225\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 132,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "flute['correct_answer'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "metadata": {},
   "outputs": [],
   "source": [
    "flute = flute.dropna()\n",
    "flute.reset_index(drop=True, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2716"
      ]
     },
     "execution_count": 134,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filtered_df = flute[~(flute['meaning1'].str.contains(' not ') | flute['meaning2'].str.contains(' not '))]\n",
    "len(filtered_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>hypothesis</th>\n",
       "      <th>meaning1</th>\n",
       "      <th>done</th>\n",
       "      <th>correct_answer</th>\n",
       "      <th>entailed_exp</th>\n",
       "      <th>meaning2</th>\n",
       "      <th>contradictory_exp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>To add insult to injury, a boy was leading a h...</td>\n",
       "      <td>To make things worse, a boy was leading a hand...</td>\n",
       "      <td>yes</td>\n",
       "      <td>meaning1</td>\n",
       "      <td>To add insult to injury means to make a bad si...</td>\n",
       "      <td>In order to make things a lot better, a boy wa...</td>\n",
       "      <td>To add insult to injury means to make a bad si...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Oh, you know how it is, mother, when you spend...</td>\n",
       "      <td>Oh, you know how it is, mother, when you spend...</td>\n",
       "      <td>yes</td>\n",
       "      <td>meaning1</td>\n",
       "      <td>An old flame is a former lover, so spending th...</td>\n",
       "      <td>Oh, you know how it is, mother, when you spend...</td>\n",
       "      <td>An old flame is a former lover, but spending t...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Definitely a good person, but one could tell s...</td>\n",
       "      <td>Definitely a good person, but one could tell s...</td>\n",
       "      <td>yes</td>\n",
       "      <td>meaning1</td>\n",
       "      <td>And all that jazz means and everything else, s...</td>\n",
       "      <td>Definitely a good person, but one could tell s...</td>\n",
       "      <td>And all that jazz means and everything else, b...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>To ask what's it like to be a warden in d.c., ...</td>\n",
       "      <td>To ask what's it like to be a warden in d.c., ...</td>\n",
       "      <td>yes</td>\n",
       "      <td>meaning1</td>\n",
       "      <td>And all that jazz means and everything else, s...</td>\n",
       "      <td>To ask what's it like to be a warden in d.c., ...</td>\n",
       "      <td>And all that jazz means and everything else, b...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>He would smash into it at full tilt.</td>\n",
       "      <td>He would smash into it as fast as he could.</td>\n",
       "      <td>yes</td>\n",
       "      <td>meaning1</td>\n",
       "      <td>To be at full tilt means to be going as fast a...</td>\n",
       "      <td>He would smash into it as slowly as he could.</td>\n",
       "      <td>To be at full tilt means to be going as fast a...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3130</th>\n",
       "      <td>You're loyal as Benedict Arnold</td>\n",
       "      <td>You're disloyal</td>\n",
       "      <td>yes</td>\n",
       "      <td>meaning1</td>\n",
       "      <td>Benedict Arnold was a traitor, so to say someo...</td>\n",
       "      <td>You're loyal</td>\n",
       "      <td>Benedict Arnold was a traitor, so being as loy...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3131</th>\n",
       "      <td>The cat was as white as charcoal</td>\n",
       "      <td>The cat was black</td>\n",
       "      <td>yes</td>\n",
       "      <td>meaning1</td>\n",
       "      <td>White and black are opposite colors on the col...</td>\n",
       "      <td>The cat was white</td>\n",
       "      <td>Charcoal is a black material, not a white one</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3132</th>\n",
       "      <td>You're as thin as a balloon</td>\n",
       "      <td>You're fat</td>\n",
       "      <td>yes</td>\n",
       "      <td>meaning1</td>\n",
       "      <td>A balloon is filled with air, and is therefore...</td>\n",
       "      <td>You're thin</td>\n",
       "      <td>A balloon is round, so saying someone is as th...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3133</th>\n",
       "      <td>The woman was as useful as People Magazine</td>\n",
       "      <td>The woman was useless.</td>\n",
       "      <td>yes</td>\n",
       "      <td>meaning1</td>\n",
       "      <td>People Magazine is a magazine full of pictures...</td>\n",
       "      <td>The woman was useful.</td>\n",
       "      <td>People Magazine is not useful, as it is a goss...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3134</th>\n",
       "      <td>The conversation was as juicy as a business me...</td>\n",
       "      <td>The conversation was very dull.</td>\n",
       "      <td>yes</td>\n",
       "      <td>meaning1</td>\n",
       "      <td>A business meeting is a formal occasion where ...</td>\n",
       "      <td>The conversation was very juicy.</td>\n",
       "      <td>A business meeting would not be considered a j...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2716 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                             hypothesis  \\\n",
       "0     To add insult to injury, a boy was leading a h...   \n",
       "2     Oh, you know how it is, mother, when you spend...   \n",
       "3     Definitely a good person, but one could tell s...   \n",
       "4     To ask what's it like to be a warden in d.c., ...   \n",
       "7                  He would smash into it at full tilt.   \n",
       "...                                                 ...   \n",
       "3130                    You're loyal as Benedict Arnold   \n",
       "3131                   The cat was as white as charcoal   \n",
       "3132                        You're as thin as a balloon   \n",
       "3133         The woman was as useful as People Magazine   \n",
       "3134  The conversation was as juicy as a business me...   \n",
       "\n",
       "                                               meaning1 done correct_answer  \\\n",
       "0     To make things worse, a boy was leading a hand...  yes       meaning1   \n",
       "2     Oh, you know how it is, mother, when you spend...  yes       meaning1   \n",
       "3     Definitely a good person, but one could tell s...  yes       meaning1   \n",
       "4     To ask what's it like to be a warden in d.c., ...  yes       meaning1   \n",
       "7           He would smash into it as fast as he could.  yes       meaning1   \n",
       "...                                                 ...  ...            ...   \n",
       "3130                                    You're disloyal  yes       meaning1   \n",
       "3131                                  The cat was black  yes       meaning1   \n",
       "3132                                         You're fat  yes       meaning1   \n",
       "3133                             The woman was useless.  yes       meaning1   \n",
       "3134                    The conversation was very dull.  yes       meaning1   \n",
       "\n",
       "                                           entailed_exp  \\\n",
       "0     To add insult to injury means to make a bad si...   \n",
       "2     An old flame is a former lover, so spending th...   \n",
       "3     And all that jazz means and everything else, s...   \n",
       "4     And all that jazz means and everything else, s...   \n",
       "7     To be at full tilt means to be going as fast a...   \n",
       "...                                                 ...   \n",
       "3130  Benedict Arnold was a traitor, so to say someo...   \n",
       "3131  White and black are opposite colors on the col...   \n",
       "3132  A balloon is filled with air, and is therefore...   \n",
       "3133  People Magazine is a magazine full of pictures...   \n",
       "3134  A business meeting is a formal occasion where ...   \n",
       "\n",
       "                                               meaning2  \\\n",
       "0     In order to make things a lot better, a boy wa...   \n",
       "2     Oh, you know how it is, mother, when you spend...   \n",
       "3     Definitely a good person, but one could tell s...   \n",
       "4     To ask what's it like to be a warden in d.c., ...   \n",
       "7         He would smash into it as slowly as he could.   \n",
       "...                                                 ...   \n",
       "3130                                       You're loyal   \n",
       "3131                                  The cat was white   \n",
       "3132                                        You're thin   \n",
       "3133                              The woman was useful.   \n",
       "3134                   The conversation was very juicy.   \n",
       "\n",
       "                                      contradictory_exp  \n",
       "0     To add insult to injury means to make a bad si...  \n",
       "2     An old flame is a former lover, but spending t...  \n",
       "3     And all that jazz means and everything else, b...  \n",
       "4     And all that jazz means and everything else, b...  \n",
       "7     To be at full tilt means to be going as fast a...  \n",
       "...                                                 ...  \n",
       "3130  Benedict Arnold was a traitor, so being as loy...  \n",
       "3131      Charcoal is a black material, not a white one  \n",
       "3132  A balloon is round, so saying someone is as th...  \n",
       "3133  People Magazine is not useful, as it is a goss...  \n",
       "3134  A business meeting would not be considered a j...  \n",
       "\n",
       "[2716 rows x 7 columns]"
      ]
     },
     "execution_count": 135,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filtered_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1770"
      ]
     },
     "execution_count": 136,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filtered_df = filtered_df[filtered_df['meaning1']!=filtered_df['meaning2']].reset_index(drop=True)\n",
    "len(filtered_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_df = pd.DataFrame()\n",
    "new_df['hypothesis'] = filtered_df['hypothesis']\n",
    "new_df['entailed_exp'] = filtered_df['entailed_exp']\n",
    "new_df['contradictory_exp'] = filtered_df['contradictory_exp']\n",
    "new_df['correct_answer'] = filtered_df['meaning1']\n",
    "def shuffle_meanings(row):\n",
    "    meanings = [row['meaning1'], row['meaning2']]\n",
    "    random.shuffle(meanings)\n",
    "    return pd.Series(meanings, index=['meaning1', 'meaning2'])\n",
    "new_df[['meaning1', 'meaning2']] = filtered_df.apply(shuffle_meanings, axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {},
   "outputs": [],
   "source": [
    "filtered_df = new_df.reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "hyp :  Mike nods and trots over c better late than never.\n",
      "option a :  Mike nods and trots over c although it would have been better if it happened earlier, at least it's happening now.\n",
      "option b :  Mike nods and trots over c  because it's happening now and there's no need for it to have happened earlier.\n",
      "correct answer :  Mike nods and trots over c although it would have been better if it happened earlier, at least it's happening now.\n",
      "exp1 :  The idiom better late than never means that the delayed occurrence or achievement of something is better than it not happening at all, which is what is happening in this sentence.\n",
      "exp2 :  The idiom better late than never means that the delayed occurrence or achievement of something is better than it not happening at all, but this sentence describes a situation where there is no need for it to have happened earlier.\n"
     ]
    }
   ],
   "source": [
    "j = 50\n",
    "print(\"hyp : \",filtered_df['hypothesis'][j])\n",
    "print(\"option a : \",filtered_df['meaning1'][j])\n",
    "print(\"option b : \",filtered_df['meaning2'][j])\n",
    "print(\"correct answer : \",filtered_df['correct_answer'][j])\n",
    "print(\"exp1 : \",filtered_df['entailed_exp'][j])\n",
    "print(\"exp2 : \",filtered_df['contradictory_exp'][j])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pretext</th>\n",
       "      <th>options</th>\n",
       "      <th>correct answer</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Sentence : To add insult to injury, a boy was ...</td>\n",
       "      <td>[To make things worse, a boy was leading a han...</td>\n",
       "      <td>To make things worse, a boy was leading a hand...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Sentence : Oh, you know how it is, mother, whe...</td>\n",
       "      <td>[Oh, you know how it is, mother, when you spen...</td>\n",
       "      <td>Oh, you know how it is, mother, when you spend...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Sentence : Definitely a good person, but one c...</td>\n",
       "      <td>[Definitely a good person, but one could tell ...</td>\n",
       "      <td>Definitely a good person, but one could tell s...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Sentence : To ask what's it like to be a warde...</td>\n",
       "      <td>[To ask what's it like to be a warden in d.c.,...</td>\n",
       "      <td>To ask what's it like to be a warden in d.c., ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Sentence : He would smash into it at full tilt.</td>\n",
       "      <td>[He would smash into it as fast as he could., ...</td>\n",
       "      <td>He would smash into it as fast as he could.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1765</th>\n",
       "      <td>Sentence : You're loyal as Benedict Arnold</td>\n",
       "      <td>[You're disloyal, You're loyal]</td>\n",
       "      <td>You're disloyal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1766</th>\n",
       "      <td>Sentence : The cat was as white as charcoal</td>\n",
       "      <td>[The cat was white, The cat was black]</td>\n",
       "      <td>The cat was black</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1767</th>\n",
       "      <td>Sentence : You're as thin as a balloon</td>\n",
       "      <td>[You're thin, You're fat]</td>\n",
       "      <td>You're fat</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1768</th>\n",
       "      <td>Sentence : The woman was as useful as People M...</td>\n",
       "      <td>[The woman was useless., The woman was useful.]</td>\n",
       "      <td>The woman was useless.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1769</th>\n",
       "      <td>Sentence : The conversation was as juicy as a ...</td>\n",
       "      <td>[The conversation was very juicy., The convers...</td>\n",
       "      <td>The conversation was very dull.</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1770 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                pretext  \\\n",
       "0     Sentence : To add insult to injury, a boy was ...   \n",
       "1     Sentence : Oh, you know how it is, mother, whe...   \n",
       "2     Sentence : Definitely a good person, but one c...   \n",
       "3     Sentence : To ask what's it like to be a warde...   \n",
       "4       Sentence : He would smash into it at full tilt.   \n",
       "...                                                 ...   \n",
       "1765         Sentence : You're loyal as Benedict Arnold   \n",
       "1766        Sentence : The cat was as white as charcoal   \n",
       "1767             Sentence : You're as thin as a balloon   \n",
       "1768  Sentence : The woman was as useful as People M...   \n",
       "1769  Sentence : The conversation was as juicy as a ...   \n",
       "\n",
       "                                                options  \\\n",
       "0     [To make things worse, a boy was leading a han...   \n",
       "1     [Oh, you know how it is, mother, when you spen...   \n",
       "2     [Definitely a good person, but one could tell ...   \n",
       "3     [To ask what's it like to be a warden in d.c.,...   \n",
       "4     [He would smash into it as fast as he could., ...   \n",
       "...                                                 ...   \n",
       "1765                    [You're disloyal, You're loyal]   \n",
       "1766             [The cat was white, The cat was black]   \n",
       "1767                          [You're thin, You're fat]   \n",
       "1768    [The woman was useless., The woman was useful.]   \n",
       "1769  [The conversation was very juicy., The convers...   \n",
       "\n",
       "                                         correct answer  \n",
       "0     To make things worse, a boy was leading a hand...  \n",
       "1     Oh, you know how it is, mother, when you spend...  \n",
       "2     Definitely a good person, but one could tell s...  \n",
       "3     To ask what's it like to be a warden in d.c., ...  \n",
       "4           He would smash into it as fast as he could.  \n",
       "...                                                 ...  \n",
       "1765                                    You're disloyal  \n",
       "1766                                  The cat was black  \n",
       "1767                                         You're fat  \n",
       "1768                             The woman was useless.  \n",
       "1769                    The conversation was very dull.  \n",
       "\n",
       "[1770 rows x 3 columns]"
      ]
     },
     "execution_count": 142,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_final = pd.DataFrame()\n",
    "df_final['pretext'] = 'Sentence : ' + filtered_df['hypothesis']\n",
    "df_final['options'] = [[filtered_df['meaning1'][i],filtered_df['meaning2'][i]] for i in range(len(filtered_df))]\n",
    "df_final['correct answer'] = filtered_df['correct_answer']\n",
    "df_final"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_final.to_csv(\"./../global_datasets/task_6.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pretext</th>\n",
       "      <th>options</th>\n",
       "      <th>correct answer</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Sentence : To add insult to injury, a boy was ...</td>\n",
       "      <td>[To make things worse, a boy was leading a han...</td>\n",
       "      <td>To make things worse, a boy was leading a hand...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Sentence : Oh, you know how it is, mother, whe...</td>\n",
       "      <td>[Oh, you know how it is, mother, when you spen...</td>\n",
       "      <td>Oh, you know how it is, mother, when you spend...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Sentence : Definitely a good person, but one c...</td>\n",
       "      <td>[Definitely a good person, but one could tell ...</td>\n",
       "      <td>Definitely a good person, but one could tell s...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Sentence : To ask what's it like to be a warde...</td>\n",
       "      <td>[To ask what's it like to be a warden in d.c.,...</td>\n",
       "      <td>To ask what's it like to be a warden in d.c., ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Sentence : He would smash into it at full tilt...</td>\n",
       "      <td>[He would smash into it as fast as he could., ...</td>\n",
       "      <td>He would smash into it as fast as he could.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1765</th>\n",
       "      <td>Sentence : You're loyal as Benedict Arnold\\nHi...</td>\n",
       "      <td>[You're disloyal, You're loyal]</td>\n",
       "      <td>You're disloyal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1766</th>\n",
       "      <td>Sentence : The cat was as white as charcoal\\nH...</td>\n",
       "      <td>[The cat was white, The cat was black]</td>\n",
       "      <td>The cat was black</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1767</th>\n",
       "      <td>Sentence : You're as thin as a balloon\\nHint :...</td>\n",
       "      <td>[You're thin, You're fat]</td>\n",
       "      <td>You're fat</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1768</th>\n",
       "      <td>Sentence : The woman was as useful as People M...</td>\n",
       "      <td>[The woman was useless., The woman was useful.]</td>\n",
       "      <td>The woman was useless.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1769</th>\n",
       "      <td>Sentence : The conversation was as juicy as a ...</td>\n",
       "      <td>[The conversation was very juicy., The convers...</td>\n",
       "      <td>The conversation was very dull.</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1770 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                pretext  \\\n",
       "0     Sentence : To add insult to injury, a boy was ...   \n",
       "1     Sentence : Oh, you know how it is, mother, whe...   \n",
       "2     Sentence : Definitely a good person, but one c...   \n",
       "3     Sentence : To ask what's it like to be a warde...   \n",
       "4     Sentence : He would smash into it at full tilt...   \n",
       "...                                                 ...   \n",
       "1765  Sentence : You're loyal as Benedict Arnold\\nHi...   \n",
       "1766  Sentence : The cat was as white as charcoal\\nH...   \n",
       "1767  Sentence : You're as thin as a balloon\\nHint :...   \n",
       "1768  Sentence : The woman was as useful as People M...   \n",
       "1769  Sentence : The conversation was as juicy as a ...   \n",
       "\n",
       "                                                options  \\\n",
       "0     [To make things worse, a boy was leading a han...   \n",
       "1     [Oh, you know how it is, mother, when you spen...   \n",
       "2     [Definitely a good person, but one could tell ...   \n",
       "3     [To ask what's it like to be a warden in d.c.,...   \n",
       "4     [He would smash into it as fast as he could., ...   \n",
       "...                                                 ...   \n",
       "1765                    [You're disloyal, You're loyal]   \n",
       "1766             [The cat was white, The cat was black]   \n",
       "1767                          [You're thin, You're fat]   \n",
       "1768    [The woman was useless., The woman was useful.]   \n",
       "1769  [The conversation was very juicy., The convers...   \n",
       "\n",
       "                                         correct answer  \n",
       "0     To make things worse, a boy was leading a hand...  \n",
       "1     Oh, you know how it is, mother, when you spend...  \n",
       "2     Definitely a good person, but one could tell s...  \n",
       "3     To ask what's it like to be a warden in d.c., ...  \n",
       "4           He would smash into it as fast as he could.  \n",
       "...                                                 ...  \n",
       "1765                                    You're disloyal  \n",
       "1766                                  The cat was black  \n",
       "1767                                         You're fat  \n",
       "1768                             The woman was useless.  \n",
       "1769                    The conversation was very dull.  \n",
       "\n",
       "[1770 rows x 3 columns]"
      ]
     },
     "execution_count": 144,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_final1 = pd.DataFrame()\n",
    "df_final1['pretext'] = 'Sentence : ' + filtered_df['hypothesis'] + '\\nHint : '+filtered_df['entailed_exp']\n",
    "df_final1['options'] = [[filtered_df['meaning1'][i],filtered_df['meaning2'][i]] for i in range(len(filtered_df))]\n",
    "df_final1['correct answer'] = filtered_df['correct_answer']\n",
    "df_final1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_final1.to_csv(\"./../global_datasets/task_7.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 146,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pretext</th>\n",
       "      <th>options</th>\n",
       "      <th>correct answer</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Sentence : To add insult to injury, a boy was ...</td>\n",
       "      <td>[To make things worse, a boy was leading a han...</td>\n",
       "      <td>To make things worse, a boy was leading a hand...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Sentence : Oh, you know how it is, mother, whe...</td>\n",
       "      <td>[Oh, you know how it is, mother, when you spen...</td>\n",
       "      <td>Oh, you know how it is, mother, when you spend...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Sentence : Definitely a good person, but one c...</td>\n",
       "      <td>[Definitely a good person, but one could tell ...</td>\n",
       "      <td>Definitely a good person, but one could tell s...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Sentence : To ask what's it like to be a warde...</td>\n",
       "      <td>[To ask what's it like to be a warden in d.c.,...</td>\n",
       "      <td>To ask what's it like to be a warden in d.c., ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Sentence : He would smash into it at full tilt...</td>\n",
       "      <td>[He would smash into it as fast as he could., ...</td>\n",
       "      <td>He would smash into it as fast as he could.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1765</th>\n",
       "      <td>Sentence : You're loyal as Benedict Arnold\\nHi...</td>\n",
       "      <td>[You're disloyal, You're loyal]</td>\n",
       "      <td>You're disloyal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1766</th>\n",
       "      <td>Sentence : The cat was as white as charcoal\\nH...</td>\n",
       "      <td>[The cat was white, The cat was black]</td>\n",
       "      <td>The cat was black</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1767</th>\n",
       "      <td>Sentence : You're as thin as a balloon\\nHint :...</td>\n",
       "      <td>[You're thin, You're fat]</td>\n",
       "      <td>You're fat</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1768</th>\n",
       "      <td>Sentence : The woman was as useful as People M...</td>\n",
       "      <td>[The woman was useless., The woman was useful.]</td>\n",
       "      <td>The woman was useless.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1769</th>\n",
       "      <td>Sentence : The conversation was as juicy as a ...</td>\n",
       "      <td>[The conversation was very juicy., The convers...</td>\n",
       "      <td>The conversation was very dull.</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1770 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                pretext  \\\n",
       "0     Sentence : To add insult to injury, a boy was ...   \n",
       "1     Sentence : Oh, you know how it is, mother, whe...   \n",
       "2     Sentence : Definitely a good person, but one c...   \n",
       "3     Sentence : To ask what's it like to be a warde...   \n",
       "4     Sentence : He would smash into it at full tilt...   \n",
       "...                                                 ...   \n",
       "1765  Sentence : You're loyal as Benedict Arnold\\nHi...   \n",
       "1766  Sentence : The cat was as white as charcoal\\nH...   \n",
       "1767  Sentence : You're as thin as a balloon\\nHint :...   \n",
       "1768  Sentence : The woman was as useful as People M...   \n",
       "1769  Sentence : The conversation was as juicy as a ...   \n",
       "\n",
       "                                                options  \\\n",
       "0     [To make things worse, a boy was leading a han...   \n",
       "1     [Oh, you know how it is, mother, when you spen...   \n",
       "2     [Definitely a good person, but one could tell ...   \n",
       "3     [To ask what's it like to be a warden in d.c.,...   \n",
       "4     [He would smash into it as fast as he could., ...   \n",
       "...                                                 ...   \n",
       "1765                    [You're disloyal, You're loyal]   \n",
       "1766             [The cat was white, The cat was black]   \n",
       "1767                          [You're thin, You're fat]   \n",
       "1768    [The woman was useless., The woman was useful.]   \n",
       "1769  [The conversation was very juicy., The convers...   \n",
       "\n",
       "                                         correct answer  \n",
       "0     To make things worse, a boy was leading a hand...  \n",
       "1     Oh, you know how it is, mother, when you spend...  \n",
       "2     Definitely a good person, but one could tell s...  \n",
       "3     To ask what's it like to be a warden in d.c., ...  \n",
       "4           He would smash into it as fast as he could.  \n",
       "...                                                 ...  \n",
       "1765                                    You're disloyal  \n",
       "1766                                  The cat was black  \n",
       "1767                                         You're fat  \n",
       "1768                             The woman was useless.  \n",
       "1769                    The conversation was very dull.  \n",
       "\n",
       "[1770 rows x 3 columns]"
      ]
     },
     "execution_count": 146,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_final2 = pd.DataFrame()\n",
    "df_final2['pretext'] = 'Sentence : ' + filtered_df['hypothesis'] + '\\nHint : '+filtered_df['contradictory_exp']\n",
    "df_final2['options'] = [[filtered_df['meaning1'][i],filtered_df['meaning2'][i]] for i in range(len(filtered_df))]\n",
    "df_final2['correct answer'] = filtered_df['correct_answer']\n",
    "df_final2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 147,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_final2.to_csv(\"./../global_datasets/task_8.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_final2.reset_index(drop=True,inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [
    {
     "ename": "KeyError",
     "evalue": "Index(['meaning1', 'meaning2'], dtype='object')",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "\u001b[0;32m/tmp/ipykernel_2892613/985245175.py\u001b[0m in \u001b[0;36m?\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mduplicate_rows\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf_final2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf_final2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mduplicated\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msubset\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'meaning1'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'meaning2'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkeep\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mduplicate_rows\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(self, subset, keep)\u001b[0m\n\u001b[1;32m   6650\u001b[0m         \u001b[0;31m# Otherwise, raise a KeyError, same as if you try to __getitem__ with a\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   6651\u001b[0m         \u001b[0;31m# key that doesn't exist.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   6652\u001b[0m         \u001b[0mdiff\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msubset\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   6653\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mdiff\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 6654\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mIndex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdiff\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   6655\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   6656\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msubset\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_unique\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   6657\u001b[0m             \u001b[0;31m# GH#45236 This is faster than get_group_index below\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyError\u001b[0m: Index(['meaning1', 'meaning2'], dtype='object')"
     ]
    }
   ],
   "source": [
    "duplicate_rows = df_final[df_final2.duplicated(subset=['meaning1','meaning2'], keep=False)]\n",
    "len(duplicate_rows)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {},
   "outputs": [],
   "source": [
    "li  = [i[0]==i[1] for i in df_final['options']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[\"The key will be unclaimed and available to anyone, but he 'll be too weak to make a play for it, leaving the field wide open for you.\",\n",
       " \"The key will be unclaimed and available to anyone, but he 'll be too weak to make a play for it, leaving the field wide open for you.\"]"
      ]
     },
     "execution_count": 111,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_final[li]['options'][583]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 119,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filtered_df['meaning1'][583]==filtered_df['meaning2'][583]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"The key will be unclaimed and available to anyone, but he 'll be too weak to make a play for it, leaving the field wide open for you.\""
      ]
     },
     "execution_count": 115,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The key will be up for grabs, but he 'll be too weak to make a play for it, leaving the field wide open for you.\n",
      "Up for grabs means available to anyone, and in this context the key is unclaimed and available to anyone.\n",
      "Up for grabs means available to anyone, but in this context the key is not available to anyone.\n",
      "The key will be unclaimed and available to anyone, but he 'll be too weak to make a play for it, leaving the field wide open for you.\n",
      "The key will be unclaimed and available to anyone, but he 'll be too weak to make a play for it, leaving the field wide open for you.\n",
      "The key will be unclaimed and available to anyone, but he 'll be too weak to make a play for it, leaving the field wide open for you.\n"
     ]
    }
   ],
   "source": [
    "temp = filtered_df.loc[583]\n",
    "for k in temp:\n",
    "    print(k)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
