{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Helper notebook to generate configuration file.\n",
    "\n",
    "The template of configuration file is adapted from: https://github.com/eth-sri/watermark-stealing/tree/main.\n",
    "\n",
    "For Stealing, please refer to watermark-stealing documentation.\n",
    "\n",
    "For Learning, setting the Server model ($\\mathcal{M}$), the spoofer LM, the seeding scheme and $\\delta$ below in the function below is all you need to generate a configuration file."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_config(server_model, attacker_model, seeding_scheme, delta):\n",
    "    \n",
    "    if \"llama\" in server_model:\n",
    "        out_dir = \"out_llama/\"\n",
    "    elif \"mistral\" in server_model:\n",
    "        out_dir = \"out_mistral/\"\n",
    "    \n",
    "    config = f\"\"\"\n",
    "meta:\n",
    "  device: 'cuda'\n",
    "  seed: [123] \n",
    "  out_root_dir: '{out_dir}'\n",
    "  use_neptune: false\n",
    "server:\n",
    "  model:\n",
    "    cfg_path: '{server_model}'\n",
    "    skip: true\n",
    "  watermark:\n",
    "    scheme: 'kgw'\n",
    "    generation:\n",
    "      - seeding_scheme:  [\"{seeding_scheme}\"]\n",
    "        gamma: 0.25\n",
    "        delta: {delta}\n",
    "    detection:\n",
    "      normalizers: []\n",
    "      ignore_repeated_ngrams: true \n",
    "      z_threshold: 4.0 \n",
    "attacker:\n",
    "  algo: 'our'\n",
    "  model:\n",
    "    cfg_path: '{attacker_model}'\n",
    "    skip: false\n",
    "    use_sampling: true\n",
    "  querying:\n",
    "    skip: true\n",
    "    dataset: 'c4'\n",
    "    batch_size: 64\n",
    "    start_from_batch_num: 0\n",
    "    end_at_batch_num: 500\n",
    "    apply_watermark: true \n",
    "  learning:\n",
    "    skip: false\n",
    "    mode: \"fast\"\n",
    "    nb_queries: 30000\n",
    "  generation:\n",
    "    sysprompt: null\n",
    "    spoofer_strength: 9.5\n",
    "    w_abcd: 2.0\n",
    "    w_partials: 1.0\n",
    "    w_empty: 0.5\n",
    "    w_future: 0.0\n",
    "    min_wm_count_nonempty: 2 \n",
    "    min_wm_mass_empty: 0.00007\n",
    "    future_num_cands: 5\n",
    "    future_num_options_per_fillin: 10\n",
    "    future_prefix_size: 10 \n",
    "    future_local_w_decay: 0.9 \n",
    "    panic_from: 750\n",
    "    repetition_penalty: 1.6\n",
    "    use_ft_counts: true\n",
    "    prevent_eos_if_zest_bad: true\n",
    "    use_graceful_conclusion: true \n",
    "evaluator:\n",
    "  skip: true\n",
    "  batch_size: 4\n",
    "  eval_class: \"spoofing\"\n",
    "  metrics:\n",
    "    - \"detector\"\n",
    "  eval_mode:\n",
    "    - \"dolly-writing-100-long\"\n",
    "gradio:\n",
    "  skip: false \n",
    "  make_public: false \n",
    "  port: 7860\n",
    "  default_prompt: \"Write a long essay about war.\"\n",
    "\n",
    "\"\"\"\n",
    "    \n",
    "    return config\n",
    "\n",
    "\n",
    "def get_script(configs, distilled, n_prompts, dataset):\n",
    "    script = \"#!/bin/bash\\n\"\n",
    "    for config in configs:\n",
    "        script += f\"bash reprompting_pipeline.sh {config} {distilled} {n_prompts} {dataset} Y\\n\"\n",
    "    return script"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "server_models =  []\n",
    "attacker_models = []\n",
    "seeding_schemes = []\n",
    "delta = []\n",
    "\n",
    "for server_model in server_models:\n",
    "    for attacker_model in attacker_models:\n",
    "        for seeding_scheme in seeding_schemes:\n",
    "            for d in delta:\n",
    "                config = get_config(server_model, attacker_model, seeding_scheme, d)\n",
    "                config_name = f\"configs/generated/config_{os.path.basename(server_model)}_{os.path.basename(attacker_model)}_{seeding_scheme}_{d}.yaml\"\n",
    "                os.makedirs(\"configs/generated\", exist_ok=True)\n",
    "                with open(config_name, \"w\") as f:\n",
    "                    f.write(config)\n",
    "                print(f\"Config file {config_name} created\")\n",
    "                \n",
    "                "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "ws2",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
