{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "61c539a5",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "from torch.utils.data import DataLoader\n",
    "from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSeq2SeqLM\n",
    "from datasets import load_from_disk, list_datasets, load_dataset, disable_caching, load_metric\n",
    "import pandas as pd\n",
    "import ast\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "1c7e4b8f",
   "metadata": {},
   "outputs": [],
   "source": [
    "def blue_score(results, tokenizer):\n",
    "    predictions = []\n",
    "    references = []\n",
    "\n",
    "    with torch.no_grad():\n",
    "        for i in range(len(results)):\n",
    "            output_ids = ast.literal_eval(results[\"output_ids\"][i])\n",
    "            target_ids = ast.literal_eval(results[\"target_ids\"][i])\n",
    "        \n",
    "            prediction = tokenizer.decode(output_ids, skip_special_tokens=True)\n",
    "            reference = tokenizer.decode(target_ids, skip_special_tokens=True)\n",
    "            \n",
    "            predictions.append(prediction)\n",
    "            references.append([reference])\n",
    "    bleu = load_metric(\"sacrebleu\")\n",
    "    bleu.add_batch(predictions=predictions, references=references)\n",
    "    return bleu.compute()[\"score\"]\n",
    "\n",
    "\n",
    "def compute_fnr(results):\n",
    "    count = 0\n",
    "    z_list = []    \n",
    "    return (results[\"z_score\"] < 4).sum()/len(results)\n",
    "\n",
    "def compute_fpr(results):\n",
    "    count = 0\n",
    "    z_list = []    \n",
    "    return (results[\"z_score\"] >= 4).sum()/len(results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "46a1f294",
   "metadata": {},
   "outputs": [],
   "source": [
    "MODEL_NAME=\"facebook/nllb-200-3.3B\"\n",
    "#model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME, cache_dir=\"/data1/takezawa/huggingface\", torch_dtype=torch.float16, revision=\"1a07f7d195896b2114afcb79b7b57ab512e7b43e\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "c671aa5e",
   "metadata": {},
   "outputs": [],
   "source": [
    "language = \"de-en\"\n",
    "gamma = 0.0001\n",
    "\n",
    "# texts generated by the NS-Watermark\n",
    "results_ns_watermark = pd.read_csv(f\"results/wmt_{language}/z_score/ns_watermark_{gamma}_1.csv.gz\")\n",
    "\n",
    "# texts written by human\n",
    "results_human = pd.read_csv(f\"results/wmt_{language}/z_score/human_{gamma}.csv.gz\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "3d07351c",
   "metadata": {},
   "outputs": [],
   "source": [
    "if language in [\"en-fr\",\"en-de\"]:\n",
    "    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=\"/data1/takezawa/huggingface\", revision=\"1a07f7d195896b2114afcb79b7b57ab512e7b43e\")\n",
    "elif language == \"de-en\":\n",
    "    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, src_lang=\"deu_Latn\", cache_dir=\"/data1/takezawa/huggingface\", revision=\"1a07f7d195896b2114afcb79b7b57ab512e7b43e\")\n",
    "elif language == \"fr-en\":\n",
    "    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, src_lang=\"fra_Latn\", cache_dir=\"/data1/takezawa/huggingface\", revision=\"1a07f7d195896b2114afcb79b7b57ab512e7b43e\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "cae5007b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NS-Watermark (gamma=0.0001)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_8856/1741476300.py:15: FutureWarning: load_metric is deprecated and will be removed in the next major version of datasets. Use 'evaluate.load' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate\n",
      "  bleu = load_metric(\"sacrebleu\")\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\t BLEU=38.85980449519435, FNR=0.0\n",
      "Human\n",
      "\t FPR=0.0\n"
     ]
    }
   ],
   "source": [
    "print(f\"NS-Watermark (gamma={gamma})\")\n",
    "print(f\"\\t BLEU={blue_score(results_ns_watermark, tokenizer)}, FNR={compute_fnr(results_ns_watermark)}\")\n",
    "\n",
    "print(\"Human\")\n",
    "print(f\"\\t FPR={compute_fpr(results_human)}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.17"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
