{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Clone this sub-repository onto your machine and ..\n",
    "# BERT To use the code for BERT, go to BERT sub-directory and run the following command:\n",
    "# python BERT.py <name of the dataset> <max sequence length> <batch size>\n",
    "\n",
    "# For example, to run the code for the QQP dataset with a maximum sequence length of 512 and a batch size of 32, use:\n",
    "# python BERT.py qqp 512 32\n",
    "# GPT-2 To use the code for GPT-2, go to GPT-2 sub-directory and run the following command:\n",
    "# python GPT-2.py <name of the dataset> <max sequence length> <batch size>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_name = 'MNLI'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "rm: cannot remove 'addText': No such file or directory\n",
      "rm: cannot remove 'bias': No such file or directory\n",
      "rm: cannot remove 'changeChar': No such file or directory\n",
      "rm: cannot remove 'noFirst': No such file or directory\n",
      "rm: cannot remove 'noLast': No such file or directory\n",
      "rm: cannot remove 'noNouns': No such file or directory\n",
      "rm: cannot remove 'noVerbs': No such file or directory\n",
      "rm: cannot remove 'swapText': No such file or directory\n",
      "rm: cannot remove '*.tsv': No such file or directory\n",
      "/home/CAMPUS/wcheng7/.pyenv/versions/3.11.7/lib/python3.11/site-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().\n",
      "  warnings.warn(_BETA_TRANSFORMS_WARNING)\n",
      "/home/CAMPUS/wcheng7/.pyenv/versions/3.11.7/lib/python3.11/site-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().\n",
      "  warnings.warn(_BETA_TRANSFORMS_WARNING)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "There are 5 GPU(s) available.\n",
      "We will use the GPU: Tesla V100S-PCIE-32GB\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 307/307 [00:43<00:00,  7.03it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Validation Matched Accuracy:  0.8185430463576159\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 307/307 [00:47<00:00,  6.47it/s]\n",
      "100%|██████████| 308/308 [01:06<00:00,  4.66it/s]\n",
      "100%|██████████| 35/35 [00:02<00:00, 12.17it/s]\n",
      "100%|██████████| 307/307 [00:53<00:00,  5.74it/s]\n",
      "100%|██████████| 308/308 [00:47<00:00,  6.55it/s]\n",
      "100%|██████████| 35/35 [00:02<00:00, 13.71it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Completed noNouns perturbation\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 307/307 [00:54<00:00,  5.64it/s]\n",
      "100%|██████████| 308/308 [01:12<00:00,  4.23it/s]\n",
      "100%|██████████| 35/35 [00:02<00:00, 12.87it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Completed noVerbs perturbation\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 307/307 [01:04<00:00,  4.74it/s]\n",
      "100%|██████████| 308/308 [01:13<00:00,  4.17it/s]\n",
      "100%|██████████| 35/35 [00:03<00:00, 11.22it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Completed noFirst perturbation\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 307/307 [01:05<00:00,  4.67it/s]\n",
      "100%|██████████| 308/308 [01:13<00:00,  4.20it/s]\n",
      "100%|██████████| 35/35 [00:03<00:00, 11.28it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Completed noLast perturbation\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 307/307 [01:12<00:00,  4.22it/s]\n",
      "100%|██████████| 308/308 [01:18<00:00,  3.94it/s]\n",
      "100%|██████████| 35/35 [00:03<00:00, 11.59it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Completed swapText perturbation\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 307/307 [01:25<00:00,  3.57it/s]\n",
      "100%|██████████| 308/308 [02:54<00:00,  1.77it/s]\n",
      " 86%|████████▌ | 30/35 [00:06<00:01,  3.84it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Completed addText perturbation\n",
      "Completed changeChar perturbation\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 35/35 [00:08<00:00,  4.30it/s]\n",
      "100%|██████████| 307/307 [03:38<00:00,  1.40it/s]s]\n",
      "100%|██████████| 308/308 [01:52<00:00,  2.73it/s]s]\n",
      "100%|██████████| 35/35 [00:05<00:00,  5.95it/s]s]\n",
      "100%|██████████| 307/307 [01:05<00:00,  4.68it/s]s]\n",
      "100%|██████████| 308/308 [3:22:30<00:00, 39.45s/it]      \n",
      "100%|██████████| 35/35 [00:02<00:00, 14.94it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Completed bias perturbation\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "CompletedProcess(args=['python', 'GPT-2.py', 'mnli', '512', '32', '/home/CAMPUS/wcheng7/test_trans/transformer/Robustness-of-Transformers-models/GPT-2/MNLI/gpt2-mnli'], returncode=0)"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import subprocess\n",
    "# Run the script with arguments\n",
    "subprocess.run([\"rm\", \"-r\", \"addText\", \"bias\", \"changeChar\", \"noFirst\", \"noLast\", \"noNouns\", \"noVerbs\", \"swapText\"])\n",
    "subprocess.run([\"rm\",'*.tsv'])\n",
    "# subprocess.run([\"python\", \"GPT-2.py\", f\"{dataset_name.lower()}\", \"512\", '32', f'/home/CAMPUS/wcheng7/test_trans/transformer/Robustness-of-Transformers-models/GPT-2/{dataset_name}/gpt2-{dataset_name.lower()}-SAGMAN'])\n",
    "subprocess.run([\"python\", \"GPT-2.py\", f\"{dataset_name.lower()}\", \"512\", '32', f'/home/CAMPUS/wcheng7/test_trans/transformer/Robustness-of-Transformers-models/GPT-2/{dataset_name}/gpt2-{dataset_name.lower()}'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "addText => robustness: 0.9057\n",
      "bias => robustness: 0.8090\n",
      "changeChar => robustness: 0.6870\n",
      "noFirst => robustness: 0.8970\n",
      "noLast => robustness: 0.9393\n",
      "noNouns => robustness: 0.7773\n",
      "noVerbs => robustness: 0.8100\n",
      "swapText => robustness: 0.8617\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "from sklearn.metrics import accuracy_score\n",
    "\n",
    "\n",
    "def compute_robustness(m_clean, m_perturbed):\n",
    "    \"\"\"\n",
    "    Given the clean metric m_c and the perturbed metric m_p,\n",
    "    compute the robustness as: 1 - (m_c - m_p) / m_c\n",
    "    \"\"\"\n",
    "\n",
    "    return 1 - (m_clean[\"prediction\"] - m_perturbed[\"prediction\"]).sum() / m_clean[\"prediction\"].sum()\n",
    "\n",
    "\n",
    "clean_file = f\"{dataset_name}.tsv\"  # Path to the original/clean SST-2 data\n",
    "df_clean = pd.read_csv(clean_file, sep=\"\\t\")\n",
    "df_clean[\"prediction\"] = df_clean[\"prediction\"].map({\"entailment\": 1, \"not_entailment\": 0})\n",
    "\n",
    "perturbation_folders = [\n",
    "    \"addText\",\n",
    "    \"bias\",\n",
    "    \"changeChar\",\n",
    "    \"noFirst\",\n",
    "    \"noLast\",\n",
    "    \"noNouns\",\n",
    "    \"noVerbs\",\n",
    "    \"swapText\"\n",
    "]\n",
    "\n",
    "robustness_scores = {}\n",
    "\n",
    "for folder in perturbation_folders:\n",
    "    # Build path to the perturbed SST-2 file\n",
    "    perturbed_file = os.path.join('./'+folder, f\"{dataset_name}.tsv\")\n",
    "    \n",
    "    if not os.path.exists(perturbed_file):\n",
    "        print(f\"[Warning] File not found: {perturbed_file}\")\n",
    "        continue\n",
    "\n",
    "    df_perturbed = pd.read_csv(perturbed_file, sep=\"\\t\")\n",
    "    df_perturbed[\"prediction\"] = df_perturbed[\"prediction\"].map({\"entailment\": 1, \"not_entailment\": 0})\n",
    "\n",
    "    robustness_score = compute_robustness(df_clean, df_perturbed)\n",
    "\n",
    "    robustness_scores[folder] = robustness_score\n",
    "\n",
    "for folder, score in robustness_scores.items():\n",
    "    print(f\"{folder} => robustness: {score:.4f}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "index         1.000018\n",
       "prediction    1.017932\n",
       "dtype: float64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "score"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
