{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d8dfd35a-41e1-4c08-8f36-da98f848dc04",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "loading annotations into memory...\n",
      "Done (t=0.08s)\n",
      "creating index...\n",
      "index created!\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2025-09-22 22:28:02.864462: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
      "2025-09-22 22:28:03.065702: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
      "To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 AVX512_FP16 AVX_VNNI AMX_TILE AMX_INT8 AMX_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
      "2025-09-22 22:28:04.872190: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
      "Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "calculating scores...\n",
      "computing bert embedding.\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c82d77396d804cce9e3db1ce45eb32e7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/156 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/gpfs/projects/punim2198/Aryan/hallucination-attack/virtualenv/Python3.11.3/lib/python3.11/site-packages/torch/nn/modules/module.py:1762: FutureWarning: `encoder_attention_mask` is deprecated and will be removed in version 4.55.0 for `RobertaSdpaSelfAttention.forward`.\n",
      "  return forward_call(*args, **kwargs)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "computing greedy matching.\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6d5f91d74ba14b3c80aa3f4882c94a34",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/79 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "done in 6.50 seconds, 769.43 sentences/sec\n",
      "BERTScore (F1): 0.894\n",
      "P: 0.879, R: 0.910\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "from pycocotools.coco import COCO\n",
    "from bert_score import score\n",
    "\n",
    "\n",
    "ANN_FILE  = \"../PathtoCOCO/COCO/annotations/captions_val2017.json\"\n",
    "PRED_FILE = \"./pred_captions_val2017_qwen_lora.json\"\n",
    "\n",
    "coco = COCO(ANN_FILE)\n",
    "with open(PRED_FILE, \"r\") as f:\n",
    "    preds = json.load(f)\n",
    "pred_captions = []\n",
    "ref_captions  = []\n",
    "\n",
    "for p in preds:\n",
    "    img_id = int(p[\"image_id\"])\n",
    "    pred_captions.append(p[\"caption\"])\n",
    "\n",
    "    anns = coco.imgToAnns[img_id]\n",
    "    ref_texts = [a[\"caption\"] for a in anns]\n",
    "\n",
    "    ref_captions.append(ref_texts[0])\n",
    "\n",
    "P, R, F1 = score(pred_captions, ref_captions, lang=\"en\", verbose=True)\n",
    "\n",
    "print(f\"BERTScore (F1): {F1.mean().item():.3f}\")\n",
    "print(f\"P: {P.mean().item():.3f}, R: {R.mean().item():.3f}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "b5b2d7df-b786-46e2-9140-bf2222423d4a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting bert-score\n",
      "  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)\n",
      "Requirement already satisfied: torch>=1.0.0 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from bert-score) (2.7.1)\n",
      "Requirement already satisfied: pandas>=1.0.1 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from bert-score) (2.3.2)\n",
      "Requirement already satisfied: transformers>=3.0.0 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from bert-score) (4.54.0)\n",
      "Requirement already satisfied: numpy in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from bert-score) (2.1.3)\n",
      "Requirement already satisfied: requests in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from bert-score) (2.32.4)\n",
      "Requirement already satisfied: tqdm>=4.31.1 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from bert-score) (4.67.1)\n",
      "Requirement already satisfied: matplotlib in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from bert-score) (3.10.3)\n",
      "Requirement already satisfied: packaging>=20.9 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from bert-score) (25.0)\n",
      "Requirement already satisfied: python-dateutil>=2.8.2 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from pandas>=1.0.1->bert-score) (2.9.0.post0)\n",
      "Requirement already satisfied: pytz>=2020.1 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from pandas>=1.0.1->bert-score) (2025.2)\n",
      "Requirement already satisfied: tzdata>=2022.7 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from pandas>=1.0.1->bert-score) (2025.2)\n",
      "Requirement already satisfied: six>=1.5 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from python-dateutil>=2.8.2->pandas>=1.0.1->bert-score) (1.17.0)\n",
      "Requirement already satisfied: filelock in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from torch>=1.0.0->bert-score) (3.18.0)\n",
      "Requirement already satisfied: typing-extensions>=4.10.0 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from torch>=1.0.0->bert-score) (4.14.1)\n",
      "Requirement already satisfied: sympy>=1.13.3 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from torch>=1.0.0->bert-score) (1.14.0)\n",
      "Requirement already satisfied: networkx in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from torch>=1.0.0->bert-score) (3.5)\n",
      "Requirement already satisfied: jinja2 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from torch>=1.0.0->bert-score) (3.1.6)\n",
      "Requirement already satisfied: fsspec in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from torch>=1.0.0->bert-score) (2025.7.0)\n",
      "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.6.77 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from torch>=1.0.0->bert-score) (12.6.77)\n",
      "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.6.77 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from torch>=1.0.0->bert-score) (12.6.77)\n",
      "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.6.80 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from torch>=1.0.0->bert-score) (12.6.80)\n",
      "Requirement already satisfied: nvidia-cudnn-cu12==9.5.1.17 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from torch>=1.0.0->bert-score) (9.5.1.17)\n",
      "Requirement already satisfied: nvidia-cublas-cu12==12.6.4.1 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from torch>=1.0.0->bert-score) (12.6.4.1)\n",
      "Requirement already satisfied: nvidia-cufft-cu12==11.3.0.4 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from torch>=1.0.0->bert-score) (11.3.0.4)\n",
      "Requirement already satisfied: nvidia-curand-cu12==10.3.7.77 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from torch>=1.0.0->bert-score) (10.3.7.77)\n",
      "Requirement already satisfied: nvidia-cusolver-cu12==11.7.1.2 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from torch>=1.0.0->bert-score) (11.7.1.2)\n",
      "Requirement already satisfied: nvidia-cusparse-cu12==12.5.4.2 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from torch>=1.0.0->bert-score) (12.5.4.2)\n",
      "Requirement already satisfied: nvidia-cusparselt-cu12==0.6.3 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from torch>=1.0.0->bert-score) (0.6.3)\n",
      "Requirement already satisfied: nvidia-nccl-cu12==2.26.2 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from torch>=1.0.0->bert-score) (2.26.2)\n",
      "Requirement already satisfied: nvidia-nvtx-cu12==12.6.77 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from torch>=1.0.0->bert-score) (12.6.77)\n",
      "Requirement already satisfied: nvidia-nvjitlink-cu12==12.6.85 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from torch>=1.0.0->bert-score) (12.6.85)\n",
      "Requirement already satisfied: nvidia-cufile-cu12==1.11.1.6 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from torch>=1.0.0->bert-score) (1.11.1.6)\n",
      "Requirement already satisfied: triton==3.3.1 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from torch>=1.0.0->bert-score) (3.3.1)\n",
      "Requirement already satisfied: setuptools>=40.8.0 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from triton==3.3.1->torch>=1.0.0->bert-score) (80.9.0)\n",
      "Requirement already satisfied: mpmath<1.4,>=1.1.0 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from sympy>=1.13.3->torch>=1.0.0->bert-score) (1.3.0)\n",
      "Requirement already satisfied: huggingface-hub<1.0,>=0.34.0 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from transformers>=3.0.0->bert-score) (0.34.2)\n",
      "Requirement already satisfied: pyyaml>=5.1 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from transformers>=3.0.0->bert-score) (6.0.2)\n",
      "Requirement already satisfied: regex!=2019.12.17 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from transformers>=3.0.0->bert-score) (2024.11.6)\n",
      "Requirement already satisfied: tokenizers<0.22,>=0.21 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from transformers>=3.0.0->bert-score) (0.21.2)\n",
      "Requirement already satisfied: safetensors>=0.4.3 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from transformers>=3.0.0->bert-score) (0.5.3)\n",
      "Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from huggingface-hub<1.0,>=0.34.0->transformers>=3.0.0->bert-score) (1.1.5)\n",
      "Requirement already satisfied: MarkupSafe>=2.0 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from jinja2->torch>=1.0.0->bert-score) (3.0.2)\n",
      "Requirement already satisfied: contourpy>=1.0.1 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from matplotlib->bert-score) (1.3.3)\n",
      "Requirement already satisfied: cycler>=0.10 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from matplotlib->bert-score) (0.12.1)\n",
      "Requirement already satisfied: fonttools>=4.22.0 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from matplotlib->bert-score) (4.59.0)\n",
      "Requirement already satisfied: kiwisolver>=1.3.1 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from matplotlib->bert-score) (1.4.8)\n",
      "Requirement already satisfied: pillow>=8 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from matplotlib->bert-score) (11.3.0)\n",
      "Requirement already satisfied: pyparsing>=2.3.1 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from matplotlib->bert-score) (3.2.3)\n",
      "Requirement already satisfied: charset_normalizer<4,>=2 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from requests->bert-score) (3.4.2)\n",
      "Requirement already satisfied: idna<4,>=2.5 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from requests->bert-score) (3.10)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from requests->bert-score) (2.5.0)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in ./virtualenv/Python3.11.3/lib/python3.11/site-packages (from requests->bert-score) (2025.7.14)\n",
      "Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)\n",
      "Installing collected packages: bert-score\n",
      "Successfully installed bert-score-0.3.13\n",
      "\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m25.1.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.2\u001b[0m\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "!pip install bert-score\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "95d04b6f-a856-4554-ace4-c3ede4c3458e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/bin/bash: line 1: java: command not found\n"
     ]
    }
   ],
   "source": [
    "!java --version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5b42175a-56a2-4536-9d5f-7473ccb4b47b",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
