{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a893f9bd",
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ae5e1230",
   "metadata": {},
   "outputs": [],
   "source": [
    "def extract_hypothesis_premise(text: str):\n",
    "    pattern = r\"Hypothesis\\s*:\\s*(.*?)\\s*Premise\\s*:\\s*(.*)\"\n",
    "    match = re.search(pattern, text, re.DOTALL)\n",
    "    if match:\n",
    "        hypothesis = match.group(1).strip()\n",
    "        premise = match.group(2).strip()\n",
    "        return hypothesis, premise\n",
    "    return None, None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "95ff7469",
   "metadata": {},
   "outputs": [],
   "source": [
    "idx = 2\n",
    "datasets = ['idiom', 'metaphor', 'simile']\n",
    "df = pd.read_csv(f'./{datasets[idx]}_test.csv')\n",
    "#df = pd.read_csv(f'./{datasets[idx]}_train.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f5329871",
   "metadata": {},
   "outputs": [],
   "source": [
    "senarios_list, option_list, answer_list = [], [], [] # convert NLI format to Scenario - options\n",
    "for i in range(len(df)//2):\n",
    "    senarios = df['sentence'][2*i]\n",
    "    hyp, prem1 = extract_hypothesis_premise(senarios)\n",
    "    senarios = df['sentence'][2*i + 1]\n",
    "    _, prem2 = extract_hypothesis_premise(senarios)\n",
    "    senarios_list.append(hyp)\n",
    "    option_list.append([prem1, prem2])\n",
    "    answer_list.append(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a87b8668",
   "metadata": {},
   "outputs": [],
   "source": [
    "so_df = pd.DataFrame({'scenarios':senarios_list, 'options':option_list, 'answer':answer_list})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "40360fd7",
   "metadata": {},
   "outputs": [],
   "source": [
    "so_df.to_csv(f'./{datasets[idx]}_test_for_new_method.csv')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "topo",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
