{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import random\n",
    "\n",
    "ZWSP = chr(0x200B)\n",
    "ZWNJ = chr(0x200C)\n",
    "ZWJ = chr(0x200D)\n",
    "IT = chr(0x2062)\n",
    "IS = chr(0x2063)\n",
    "IP = chr(0x2064)\n",
    "\n",
    "characters = [ZWSP, ZWNJ, ZWJ, IT, IS, IP]\n",
    "WATERMARK_LEN = 10\n",
    "\n",
    "duplicate_check = set()\n",
    "\n",
    "def random_generate(characters, watermark_length, duplicate_check):\n",
    "    # Define the possible characters\n",
    "\n",
    "    old_len = len(duplicate_check)\n",
    "    new_length = len(duplicate_check)\n",
    "    # Generate the random string\n",
    "    while old_len >= new_length:\n",
    "        \n",
    "        random_watermark = ''\n",
    "        encoded_watermark = ''\n",
    "        \n",
    "        for _ in range(watermark_length):\n",
    "            random_index = random.randint(0, len(characters) - 1)\n",
    "            random_watermark += characters[random_index]\n",
    "            encoded_watermark += str(random_index)\n",
    "        \n",
    "        duplicate_check.add(encoded_watermark)\n",
    "        new_length = len(duplicate_check)\n",
    "        \n",
    "    return duplicate_check, random_watermark, encoded_watermark"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "duplicate_check, watermark, encoded_watermark = random_generate(characters, WATERMARK_LEN, duplicate_check)\n",
    "watermarked_sentence = \"This sentence is embedded \" + watermark+ \"with a 10-bit watermark.\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This sentence is embedded<span style=\"color: red;\">U+200BU+200DU+2063U+200CU+200CU+2064U+2064U+2062U+2064U+2063</span>  with a 10-character watermark."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This sentence is embedded &#x200b;⁤⁣⁣⁤‍⁤⁣⁤&#x200d;⁤⁣⁣⁤‍⁤⁣⁤&#x2063;&#x200c;&#x200c;&#x2064;⁣&#x2064;&#x2062;&#x2064;&#x2063;with a 10-character watermark."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This sentence is not embedded with a 10-character watermark."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "This sentence is embedded⁤⁣⁣⁤‍⁤⁣⁤‌⁣with a 10-bit watermark.\n"
     ]
    }
   ],
   "source": [
    "print(watermarked_sentence)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "This sentence is not embedded with a 10-bit watermark.\n"
     ]
    }
   ],
   "source": [
    "print(\"This sentence is not embedded with a 10-bit watermark.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "nlp",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
