{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "61c539a5",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "from torch.utils.data import DataLoader\n",
    "from transformers import AutoModelForCausalLM, AutoTokenizer, DataCollatorWithPadding\n",
    "\n",
    "from ns_watermark import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "af8c090b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c742049080f64d619e963f4ebe00b8b1",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "MODEL_NAME = \"meta-llama/Llama-2-7b-chat-hf\"\n",
    "tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, dtype=torch.float16, local_files_only=True, padding_side='left')\n",
    "model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map=\"auto\", local_files_only=True)\n",
    "tokenizer.pad_token_id = model.config.pad_token_id"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "0ac54c0a",
   "metadata": {},
   "outputs": [],
   "source": [
    "def classify(z_score, z=4):\n",
    "    if z_score >= z:\n",
    "        print(\"It is a LLMs-generated text.\")\n",
    "    else:\n",
    "        print(\"It is a human-written text.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "c19e24b1",
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt = [\"Write a 50-word article on 'Benefits of Open-Source in AI research'\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "6f2c6fd9",
   "metadata": {},
   "outputs": [],
   "source": [
    "input_ids = tokenizer(prompt, padding=True, return_tensors=\"pt\").input_ids"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ef53171b",
   "metadata": {},
   "source": [
    "## NS-Watermark"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "b410821f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Write a 50-word article on 'Benefits of Open-Source in AI research'\n",
      "\n",
      "\n",
      "Open-source AI research has numerous benefits, including increased collaboration and knowledge sharing among researchers, faster development and deployment of AI models, and improved transparency and reproducibility of results. Additionally, open-source AI research can lead to more operator-friendly and user-friendly AI systems, as well as increased innovation and creativity in the field.\n",
      "----------\n",
      "z-score: 10.953146427116755\n",
      "It is a LLMs-generated text.\n"
     ]
    }
   ],
   "source": [
    "watermark = NecessaryAndSufficientWatermark(gamma=0.0001)\n",
    "output_ids = watermark.generate(model, input_ids.to(model.device), max_length=100, alpha=1)\n",
    "\n",
    "\n",
    "print(prompt[0])\n",
    "print(tokenizer.decode(output_ids[0], skip_special_tokens=True))\n",
    "print(\"-\"*10)\n",
    "\n",
    "z_score = watermark.compute_z_score(output_ids)[0]\n",
    "print(f\"z-score: {z_score}\")\n",
    "classify(z_score)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a3af23f3",
   "metadata": {},
   "source": [
    "## w/o Watermark"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "d7d8bdd1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Write a 50-word article on 'Benefits of Open-Source in AI research'\n",
      "\n",
      "Open-source AI research has numerous benefits, including increased collaboration and knowledge sharing among researchers, faster development and deployment of AI models, and improved transparency and reproducibility of results. Additionally, open-source AI research can lead to more diverse and innovative solutions, as well as increased accessibility to AI technology for individuals and organizations.\n",
      "----------\n",
      "z-score: -0.10000500037503124\n",
      "It is a human-written text.\n"
     ]
    }
   ],
   "source": [
    "output_ids = model.generate(input_ids.to(model.device), max_new_tokens=100, length_penalty=0)\n",
    "print(tokenizer.decode(output_ids[0], skip_special_tokens=True))\n",
    "print(\"-\"*10)\n",
    "\n",
    "z_score = watermark.compute_z_score(output_ids)[0]\n",
    "print(f\"z-score: {z_score}\")\n",
    "classify(z_score)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "61f7a4ba",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.17"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
