{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "5f7f87bd-6b96-49e2-a682-2f278f36307e",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/scratch/hht9zt/conda-envs/reg/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n",
      "2026-01-15 19:18:11.917116: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
      "2026-01-15 19:18:11.919518: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n",
      "2026-01-15 19:18:11.961975: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
      "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
      "2026-01-15 19:18:13.187906: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
     ]
    }
   ],
   "source": [
    "from pypdf import PdfReader\n",
    "import pickle\n",
    "import faiss\n",
    "import numpy as np\n",
    "import openai\n",
    "from sentence_transformers import SentenceTransformer\n",
    "import re"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "29c8e201-59ba-4e23-a344-7f5d86c92c1a",
   "metadata": {},
   "outputs": [],
   "source": [
    "EMBED_MODEL_NAME = \"all-MiniLM-L6-v2\"\n",
    "embedder = SentenceTransformer(EMBED_MODEL_NAME, device=\"cuda\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "48103229-9377-4ff1-a6df-7c788be088de",
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_pdf_text(path):\n",
    "    reader = PdfReader(path)\n",
    "    text = []\n",
    "    for page in reader.pages:\n",
    "        page_text = page.extract_text()\n",
    "        if page_text:\n",
    "            text.append(page_text)\n",
    "    return \"\\n\".join(text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "c01bc42c-a484-4412-9b5a-1970dfc81cea",
   "metadata": {},
   "outputs": [],
   "source": [
    "def chunk_by_subsection(text):\n",
    "\n",
    "    pattern = r\"(\\n?\\d+\\.\\d+\\.\\d+\\s+[A-Z][^\\n]+)\"\n",
    "\n",
    "    parts = re.split(pattern, text)\n",
    "\n",
    "    chunks = []\n",
    "\n",
    "    current_title = None\n",
    "    current_body = []\n",
    "\n",
    "    for part in parts:\n",
    "        part = part.strip()\n",
    "        if not part:\n",
    "            continue\n",
    "\n",
    "        # If this part looks like a subsection header\n",
    "        if re.match(r\"\\d+\\.\\d+\\.\\d+\\s+\", part):\n",
    "            if current_title is not None:\n",
    "                chunks.append({\n",
    "                    \"title\": current_title,\n",
    "                    \"text\": \"\\n\".join(current_body)\n",
    "                })\n",
    "            current_title = part\n",
    "            current_body = []\n",
    "        else:\n",
    "            current_body.append(part)\n",
    "\n",
    "    # Add the last chunk\n",
    "    if current_title is not None:\n",
    "        chunks.append({\n",
    "            \"title\": current_title,\n",
    "            \"text\": \"\\n\".join(current_body)\n",
    "        })\n",
    "\n",
    "    return chunks\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "d9524279-f5ff-4557-a1ac-1577bd6204a0",
   "metadata": {},
   "outputs": [],
   "source": [
    "def embed_chunks(subsections):\n",
    "\n",
    "    texts = []\n",
    "    for i, sec in enumerate(subsections, 1):\n",
    "        print(f\"Embedding subsection {i}: {sec['title']}\")\n",
    "        content = sec[\"title\"] + \"\\n\\n\" + sec[\"text\"]\n",
    "        texts.append(content)\n",
    "\n",
    "    # Batch embedding (fast & stable)\n",
    "    embeddings = embedder.encode(\n",
    "        texts,\n",
    "        batch_size=8,\n",
    "        show_progress_bar=True,\n",
    "        normalize_embeddings=True\n",
    "    )\n",
    "\n",
    "    return np.array(embeddings, dtype=\"float32\")\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "d8bc7399-7b7b-4625-bf8e-668acb937d6a",
   "metadata": {},
   "outputs": [],
   "source": [
    "def build_faiss_index(embeddings):\n",
    "    dim = len(embeddings[0])\n",
    "    index = faiss.IndexFlatL2(dim)\n",
    "    index.add(np.array(embeddings).astype(\"float32\"))\n",
    "    return index\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "b306770a-609d-4738-9a95-04405c84effc",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle\n",
    "\n",
    "def save_vector_store(index, chunks, path_prefix=\"brauer_ch2\"):\n",
    "    faiss.write_index(index, f\"{path_prefix}.index\")\n",
    "    with open(f\"{path_prefix}_chunks.pkl\", \"wb\") as f:\n",
    "        pickle.dump(chunks, f)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "eadab08c-a6f4-4b34-b39d-cd045973b537",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total chunks: 19\n",
      "Embedding subsection 1: 2.1.1 Simple Epidemic Models\n",
      "Embedding subsection 2: 2.1.2 The Kermack–McKendrick Model\n",
      "Embedding subsection 3: 2.1.3 Kermack–McKendrick Models with General\n",
      "Embedding subsection 4: 2.1.4 Exposed Periods\n",
      "Embedding subsection 5: 2.1.5 Treatment Models\n",
      "Embedding subsection 6: 2.1.6 An Epidemic Management\n",
      "Embedding subsection 7: 2.1.7 Stochastic Models for Disease Outbreaks\n",
      "Embedding subsection 8: 2.2.1 The SIR Model\n",
      "Embedding subsection 9: 2.2.2 The SIS Model\n",
      "Embedding subsection 10: 2.3.1 Herd Immunity\n",
      "Embedding subsection 11: 2.3.2 Age at Infection\n",
      "Embedding subsection 12: 2.3.3 The Interepidemic Period\n",
      "Embedding subsection 13: 2.3.5 Disease as Population Control\n",
      "Embedding subsection 14: 2.4.1 The Basic SI∗R Model\n",
      "Embedding subsection 15: 2.4.2 Equilibria\n",
      "Embedding subsection 16: 2.4.3 The Characteristic Equation\n",
      "Embedding subsection 17: 2.4.4 The Endemic Equilibrium\n",
      "Embedding subsection 18: 2.4.5 An SI∗S Model\n",
      "Embedding subsection 19: 2.4.6 An Age of Infection Epidemic Model\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Batches: 100%|██████████| 3/3 [00:00<00:00, 12.58it/s]\n"
     ]
    }
   ],
   "source": [
    "if __name__ == \"__main__\":\n",
    "    text = load_pdf_text(\"data/Chapter_2_epi.pdf\")\n",
    "    chunks = chunk_by_subsection(text)\n",
    "\n",
    "    print(\"Total chunks:\", len(chunks))\n",
    "\n",
    "    embeddings = embed_chunks(chunks)\n",
    "    index = build_faiss_index(embeddings)\n",
    "\n",
    "    save_vector_store(index, chunks)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "59ce30ec-8ba5-49ef-b3a5-fa7239e5a551",
   "metadata": {},
   "source": [
    "### RETRIEVAL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "d06d1166-d790-4366-ad40-6824550226b0",
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_vector_store(path=\"brauer_ch2.pkl\"):\n",
    "    with open(path, \"rb\") as f:\n",
    "        return pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "273a82be-1fe4-4e54-b8cb-31b10825d489",
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_vector_store(path_prefix=\"brauer_ch2\"):\n",
    "    index = faiss.read_index(f\"{path_prefix}.index\")\n",
    "    with open(f\"{path_prefix}_chunks.pkl\", \"rb\") as f:\n",
    "        chunks = pickle.load(f)\n",
    "    return index, chunks\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "d074d629-4b6c-496b-a5fa-b2c2172782b0",
   "metadata": {},
   "outputs": [],
   "source": [
    "def embed_query(query):\n",
    "    emb = embedder.encode(\n",
    "        query,\n",
    "        normalize_embeddings=True\n",
    "    )\n",
    "    return emb\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "aa9f7257-5019-4d13-80d7-6926cd5c8739",
   "metadata": {},
   "outputs": [],
   "source": [
    "# def embed_query(query, model=\"text-embedding-3-large\"):\n",
    "#     resp = openai.Embedding.create(\n",
    "#         model=model,\n",
    "#         input=query\n",
    "#     )\n",
    "#     return resp[\"data\"][0][\"embedding\"]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "244db119-6326-43de-b694-2b28935fbca0",
   "metadata": {},
   "outputs": [],
   "source": [
    "def retrieve_chunks(query, k=3):\n",
    "    index, chunks = load_vector_store()\n",
    "\n",
    "    q_emb = embed_query(query)\n",
    "    q_emb = np.array([q_emb]).astype(\"float32\")\n",
    "\n",
    "    distances, indices = index.search(q_emb, k)\n",
    "\n",
    "    results = []\n",
    "    for rank, idx in enumerate(indices[0]):\n",
    "        results.append({\n",
    "            \"rank\": rank + 1,\n",
    "            \"distance\": float(distances[0][rank]),\n",
    "            \"text\": chunks[idx]\n",
    "        })\n",
    "\n",
    "    return results\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "59782b9c-7ab3-4e37-b300-08af680baf37",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "--- RESULT 1 ---\n",
      "Distance: 0.95531165599823\n",
      "{'title': '2.1.2 The Kermack–McKendrick Model', 'text': 'We formulate our descriptions ascompartmental models, with the population\\nunder study being divided into compartments and with assumptions about\\nthe nature and time rate of transfer from one compartment to another. Dis-\\neases that confer immunity have a diﬀerent compartmental structure from\\ndiseases without immunity. We will use the terminology SIR to describe a\\ndisease which confers immunity against re-infection, to indicate that the pas-\\nsage of individuals is from the susceptible class S to the infective class I to\\nthe removed class R. On the other hand, we will use the terminology SIS\\nto describe a disease with no immunity against re-infection, to indicate that\\nthe passage of individuals is from the susceptible class to the infective class\\nand then back to the susceptible class. Other possibilities includeSEIR and\\nSEIS models, with an exposed period between being infected and becom-\\ning infective, and SIRS models, with temporary immunity on recovery from\\ninfection.\\nThe independent variable in our compartmental models is the time t and\\nthe rates of transfer between compartments are expressed mathematically as\\nderivatives with respect to time of the sizes of the compartments, and as a\\nresult our models are formulated initially as diﬀerential equations. Possible\\n2 Compartmental Models 25\\ngeneralizations, which we shall not explore in these notes, include models in\\nwhich the rates of transfer depend on the sizes of compartments over the past\\nas well as at the instant of transfer, leading to more general types of func-\\ntional equations, such as diﬀerential-diﬀerence equations, integral equations,\\nor integro-diﬀerential equations.\\nIn order to model such an epidemic we divide the population being studied\\ninto three classes labeledS, I,a n dR.W el e tS(t) denote the number of indi-\\nviduals who are susceptible to the disease, that is, who are not (yet) infected\\nat time t. I(t) denotes the number of infected individuals, assumed infectious\\nand able to spread the disease by contact with susceptibles.R(t) denotes the\\nnumber of individuals who have been infected and then removed from the\\npossibility of being infected again or of spreading infection. Removal is car-\\nried out either through isolation from the rest of the population or through\\nimmunization against infection or through recovery from the disease with full\\nimmunity against reinfection or through death caused by the disease. These\\ncharacterizations of removed members are diﬀerent from an epidemiological\\nperspective but are often equivalent from a modeling point of view which\\ntakes into account only the state of an individual with respect to the disease.\\nIn formulating models in terms of the derivatives of the sizes of each com-\\npartment we are assuming that the number of members in a compartment\\nis a diﬀerentiable function of time. This may be a reasonable approximation\\nif there are many members in a compartment, but it is certainly suspect\\notherwise. In formulating models as diﬀerential equations, we are assuming\\nthat the epidemic process is deterministic, that is, that the behaviour of a\\npopulation is determined completely by its history and by the rules which\\ndescribe the model. In other chapters of this volume Linda Allen and Ping\\nYan describe the study of stochastic models in which probabilistic concepts\\nare used and in which there is a distribution of possible behaviours. The de-\\nveloping study of network science, introduced in Chap. 4 of this volume and\\ndescribed in [28,30,33], is another approach.\\nThe basic compartmental models to describe the transmission of commu-\\nnicable diseases are contained in a sequence of three papers by W.O. Ker-\\nmack and A.G. McKendrick in 1927, 1932, and 1933 [21–23]. The ﬁrst of\\nthese papers described epidemic models. What is often called the Kermack–\\nMcKendrick epidemic model is actually a special case of the general model\\nintroduced in this paper. The general model included dependence on age of\\ninfection, that is, the time since becoming infected. Curiously, Kermack and\\nMcKendrick did not explore this situation further in their later models which\\nincluded demographic eﬀects. Age of infection models have become important\\nin the study of HIV/AIDS, and we will return to them in the last section of\\nthis chapter.\\nThe special case of the model proposed by Kermack and McKendrick in\\n1927 which is the starting point for our study of epidemic models is\\n26 F. Brauer\\nS′ = −βSI\\nI′ = βSI − αI\\nR′ = αI .\\nA ﬂow chart is shown in Fig. 2.1. It is based on the following assumptions:\\nS IR\\nFig. 2.1 Flow chart for the SIR model\\n(1) An average member of the population makes contact suﬃcient to trans-\\nmit infection with βN others per unit time, where N represents total\\npopulation size (mass action incidence) .\\n(2) Infectives leave the infective class at rate αI per unit time.\\n(3) There is no entry into or departure from the population, except possibly\\nthrough death from the disease.\\nAccording to (1), since the probability that a random contact by an in-\\nfective is with a susceptible, who can then transmit infection, is S/N,t h e\\nnumber of new infections in unit time per infective is ( βN)(S/N), giving\\na rate of new infections ( βN)(S/N)I = βSI. Alternately, we may argue\\nthat for a contact by a susceptible the probability that this contact is with\\nan infective is I/N and thus the rate of new infections per susceptible is\\n(βN)(I/N), giving a rate of new infections ( βN)(I/N)S = βSI. Note that\\nboth approaches give the same rate of new infections; there are situations\\nwhich we shall encounter where one is more appropriate than the other. We\\nneed not give an algebraic expression for N since it cancels out of the ﬁnal\\nmodel, but we should note that for a disease that is fatal to all who are in-\\nfected N = S+I; while, for a disease from which all infected members recover\\nwith immunity,N = S +I +R. Later, we will allow the possibility that some\\ninfectives recover while others die of the disease. The hypothesis (3) really\\nsays that the time scale of the disease is much faster than the time scale\\nof births and deaths so that demographic eﬀects on the population may be\\nignored. An alternative view is that we are only interested in studying the\\ndynamics of a single epidemic outbreak. In later sections we shall consider\\nmodels that are the same as those considered in this ﬁrst section except for\\nthe incorporation of demographic eﬀects (births and deaths) along with the\\ncorresponding epidemiological assumptions.\\n2 Compartmental Models 27\\nThe assumption (2) requires a fuller mathematical explanation, since the\\nassumption of a recovery rate proportional to the number of infectives has\\nno clear epidemiological meaning. We consider the “cohort” of members who\\nwere all infected at one time and let u(s) denote the number of these who\\nare still infective s time units after having been infected. If a fraction α of\\nthese leave the infective class in unit time then\\nu′ = −αu ,\\nand the solution of this elementary diﬀerential equation is\\nu(s)= u(0)e−αs .\\nThus, the fraction of infectives remaining infective s time units after having\\nbecome infective is e−αs, so that the length of the infective period is dis-\\ntributed exponentially with mean\\n∫ ∞\\n0 e−αsds =1 /α, and this is what (2)\\nreally assumes.\\nThe assumptions of a rate of contacts proportional to population size N\\nwith constant of proportionalityβ, and of an exponentially distributed recov-\\nery rate are unrealistically simple. More general models can be constructed\\nand analyzed, but our goal here is to show what may be deduced from ex-\\ntremely simple models. It will turn out that many more realistic models\\nexhibit very similar qualitative behaviours.\\nIn our model R is determined once S and I are known, and we can drop\\nthe R equation from our model, leaving the system of two equations\\nS′ = −βSI (2.1)\\nI′ =( βS − α)I.\\nWe are unable to solve this system analytically but we learn a great deal\\nabout the behaviour of its solutions by the following qualitative approach.\\nTo begin, we remark that the model makes sense only so long asS(t)a n dI(t)\\nremain non-negative. Thus if either S(t)o r I(t) reaches zero we consider the\\nsystem to have terminated. We observe thatS′ < 0 for all t and I′ > 0i fa n d\\nonly ifS>α / β.T h u sI increases so long asS>α / βbut sinceS decreases for\\nall t, I ultimately decreases and approaches zero. If S(0) <α / β ,Idecreases\\nto zero (no epidemic), while if S(0) >α / β , Iﬁrst increases to a maximum\\nattained when S = α/β and then decreases to zero (epidemic). We think of\\nintroducing a small number of infectives into a population of susceptibles and\\nask whether there will be an epidemic. The quantity βS(0)/α is a threshold\\nquantity, called the basic reproduction number and denoted by R0,w h i c h\\ndetermines whether there is an epidemic or not. If R0 < 1 the infection dies\\nout, while if R0 > 1 there is an epidemic.\\nThe deﬁnition of the basic reproduction number R0 is that the basic re-\\nproduction number is the number of secondary infections caused by a single\\ninfective introduced into a wholly susceptible population of size K ≈ S(0)\\n28 F. Brauer\\nover the course of the infection of this single infective. In this situation, an\\ninfective makes βK contacts in unit time, all of which are with susceptibles\\nand thus produce new infections, and the mean infective period is 1/α;t h u s\\nthe basic reproduction number is actually βK/α rather than βS(0)/α.\\nInstead of trying to solve for S and I as functions of t, we divide the two\\nequations of the model to give\\nI′\\nS′ = dI\\ndS = (βS − α)I\\n−βSI = −1+ α\\nβS ,\\nand integrate to ﬁnd the orbits (curves in the ( S, I)-plane, or phase plane)\\nI = −S + α\\nβ logS + c, (2.2)\\nwith c an arbitrary constant of integration. Here, we are using log to denote\\nthe natural logarithm. Another way to describe the orbits is to deﬁne the\\nfunction\\nV (S, I)= S + I − α\\nβ log S\\nand note that each orbit is a curve given implicitly by the equationV (S, I)= c\\nfor some choice of the constant c. The constant c is determined by the ini-\\ntial values S(0), I(0) of S and I, respectively, because c = V (S(0),I (0)) =\\nS(0)+I(0)−αlog S(0)/β. Note that the maximum value ofI on each of these\\norbits is attained when S = α/β. Note also that since none of these orbits\\nreaches theI -a x i s ,S> 0 for all times. In particular,S∞ = limt→∞ S(t) > 0,\\nwhich implies that part of the population escapes infection.\\nLet us think of a population of size K into which a small number of\\ninfectives is introduced, so that S0 ≈ K, I0 ≈ 0, and R0 = βK/α.I fw eu s e\\nthe fact that limt→∞ I(t) = 0, and let S∞ = limt→∞ S(t), then the relation\\nV (S0,I0)= V (S∞ ,0) gives\\nK − α\\nβ log S0 = S∞ − α\\nβ log S∞ ,\\nfrom which we obtain an expression for β/α in terms of the measurable\\nquantities S0 and S∞ , namely\\nβ\\nα = (log S0 − log S∞ )\\nK − S∞\\n.\\nWe may rewrite this in terms of R0 as the ﬁnal size relation\\nlog S0 − log S∞ = R0\\n[\\n1 − S∞\\nK\\n]\\n. (2.3)\\nIn particular, since the right side of (2.3) is ﬁnite, the left side is also ﬁnite,\\nand this shows that S∞ > 0.\\n2 Compartmental Models 29\\nIt is generally diﬃcult to estimate the contact rate β which depends on\\nthe particular disease being studied but may also depend on social and be-\\nhavioural factors. The quantitiesS0 and S∞ may be estimated by serological\\nstudies (measurements of immune responses in blood samples) before and\\nafter an epidemic, and from these data the basic reproduction number R0\\nmay be estimated by using (2.3). This estimate, however, is a retrospective\\none which can be determined only after the epidemic has run its course.\\nInitially, the number of infectives grows exponentially because the equation\\nfor I may be approximated by\\nI′ =( βK − α)I\\nand the initial growth rate is\\nr = βK − α = α(R0 − 1) .\\nThis initial growth rate r may be determined experimentally when an epi-\\ndemic begins. Then since K and α may be measured β may be calculated as\\nβ = r + α\\nK .\\nHowever, because of incomplete data and under-reporting of cases this esti-\\nmate may not be very accurate. This inaccuracy is even more pronounced for\\nan outbreak of a previously unknown disease, where early cases are likely to\\nbe misdiagnosed.\\nThe maximum number of infectives at any time is the number of infectives\\nwhen the derivative of I is zero, that is, when S = α/β. This maximum is\\ngiven by\\nImax = S0 + I0 − α\\nβ log S0 − α\\nβ + α\\nβ log α\\nβ , (2.4)\\nobtained by substituting S = α/β, I = Imax into (2.2).\\nExample. (The Great Plague in Eyam) The village of Eyam near Sheﬃeld,\\nEngland suﬀered an outbreak of bubonic plague in 1665–1666 the source of\\nwhich is generally believed to be the Great Plague of London. The Eyam\\nplague was survived by only 83 of an initial population of 350 persons. As\\ndetailed records were preserved and as the community was persuaded to\\nquarantine itself to try to prevent the spread of disease to other communities,\\nthe disease in Eyam has been used as a case study for modeling [31]. Detailed\\nexamination of the data indicates that there were actually two outbreaks\\nof which the ﬁrst was relatively mild. Thus we shall try to ﬁt the model\\n(2.1) over the period from mid-May to mid-October 1666, measuring time in\\nmonths with an initial population of seven infectives and 254 susceptibles,\\nand a ﬁnal population of 83. Values of susceptibles and infectives in Eyam are\\ngiven in [31] for various dates, beginning with S(0) = 254,I (0) = 7, shown\\nin Table 2.1.\\n30 F. Brauer\\n0\\n5\\n10\\n15\\n20\\n25\\n30\\nl(t)\\n50 100 150 200 250\\nS(t)\\nFig. 2.2 The S– I plane\\nTable 2.1 Eyam Plague data\\nDate (1666) Susceptibles Infectives\\nJuly 3/4 235 14.5\\nJuly 19 201 22\\nAugust 3/4 153.5 29\\nAugust 19 121 21\\nSeptember 3/4 108 8\\nSeptember 19 97 8\\nOctober 4/5 Unknown Unknown\\nOctober 20 83 0\\nThe relation (2.3) with S0 = 254, I0 =7 , S∞ = 83 gives β/α =6 .54 ×\\n10−3, α/β = 153. The infective period was 11 days, or 0.3667 month, so that\\nα =2 .73. Then β =0 .0178. The relation (2.4) gives an estimate of 30.4 for\\nthe maximum number of infectives. We use the values obtained here for the\\nparameters β and α in the model (2.1) for simulations of both the phase\\nplane, the (S, I)-plane, and for graphs of S and I as functions of t (Figs. 2.2,\\n2.3, and 2.4). Figure 2.5 plots these data points together with the phase\\nportrait given in Fig. 2.2 for the model (2.1).\\nThe actual data for the Eyam epidemic are remarkably close to the predic-\\ntions of this very simple model. However, the model is really too good to be\\ntrue. Our model assumes that infection is transmitted directly between peo-\\nple. While this is possible, bubonic plague is transmitted mainly by rat ﬂeas.\\nWhen an infected rat is bitten by a ﬂea, the ﬂea becomes extremely hungry\\nand bites the host rat repeatedly, spreading the infection in the rat. When\\nthe host rat dies its ﬂeas move on to other rats, spreading the disease further.\\nAs the number of available rats decreases the ﬂeas move to human hosts, and\\nthis is how plague starts in a human population (although the second phase\\nof the epidemic may have been the pneumonic form of bubonic plague, which\\n2 Compartmental Models 31\\n0\\n50\\n100\\n150\\n200\\n250\\nS(t)\\n12345\\nt\\nFig. 2.3 S as a function of t\\n0\\n5\\n10\\n15\\n20\\n25\\n30\\nl(t)\\n12345\\nt\\nFig. 2.4 I as a function of t\\ncan be spread from person to person). One of the main reasons for the spread\\nof plague from Asia into Europe was the passage of many trading ships; in\\nmedieval times ships were invariably infested with rats. An accurate model\\nof plague transmission would have to include ﬂea and rat populations, as well\\nas movement in space. Such a model would be extremely complicated and\\nits predictions might well not be any closer to observations than our simple\\nunrealistic model. In [31] a stochastic model was also used to ﬁt the data,\\nbut the ﬁt was rather poorer than the ﬁt for the simple deterministic model\\n(2.1).\\n32 F. Brauer\\n2\\n4\\n6\\n8\\n10\\n12\\n14\\n16\\n18\\n20\\n22\\n24\\n26\\n28\\n30\\n32\\nl(t)\\nS(t)\\n50 100 150 200 250\\nFig. 2.5 The S– I plane, model and data\\nIn the village of Eyam the rector persuaded the entire community to quar-\\nantine itself to prevent the spread of disease to other communities. This policy\\nactually increased the infection rate in the village by keeping ﬂeas, rats, and\\npeople in close contact with one another, and the mortality rate from bubonic\\nplague was much higher in Eyam than in London. Further, the quarantine\\ncould do nothing to prevent the travel of rats and thus did little to prevent the\\nspread of disease to other communities. One message this suggests to math-\\nematical modelers is that control strategies based on false models may be\\nharmful, and it is essential to distinguish between assumptions that simplify\\nbut do not alter the predicted eﬀects substantially, and wrong assumptions\\nwhich make an important diﬀerence.'}\n",
      "\n",
      "--- RESULT 2 ---\n",
      "Distance: 1.1873576641082764\n",
      "{'title': '2.2.2 The SIS Model', 'text': 'In order to describe a model for a disease from which infectives recover with\\nimmunity against reinfection and that includes births and deaths as in the\\nmodel (2.16), we may modify the model (2.16) by removing the equation for\\nR and moving the term fαI describing the rate of recovery from infection to\\nthe equation for S. This gives the model\\nS′ = Λ(N) − β(N)SI − µS + fαI (2.20)\\nI′ = β(N)SI − αI − µI\\ndescribing a population with a density-dependent birth rate Λ(N) per unit\\ntime, a proportional death rate µ in each class, and with a rate α of depar-\\nture from the infective class through recovery or disease death and with a\\nfraction f of infectives recovering with no immunity against reinfection. In\\nthis model, if f< 1 the total population size is not constant and K repre-\\nsents a carrying capacity, or maximum possible population size, rather than\\na constant population size.\\nIt is easy to verify that\\nR0 = Kβ(K)\\nµ + α .\\nIf we add the two equations of (2.20), and use N = S + I we obtain\\nN ′ = Λ(N) − µN − (1 − f)αI .\\nFor theSIS model we are able to carry out the analysis with a general contact\\nrate. If f = 1 the equation for N is\\nN ′ = Λ(N) − µN\\nand N approaches the limit K. The system (2.20) is asymptotically au-\\ntonomous and its asymptotic behaviour is the same as that of the single\\ndiﬀerential equation\\nI′ = β(K)I(K − I) − (α + µ)I, (2.21)\\nwhere S has been replaced by K − I. But (2.21) is a logistic equation which\\nis easily solved analytically by separation of variables or qualitatively by an\\nequilibrium analysis. We ﬁnd that I → 0i f Kβ(K) < (µ+α), or R0 < 1a n d\\nI → I∞ > 0 with\\nI∞ = K − µ + α\\nβ(K) = K(1 − 1\\nR0\\n)\\nif Kβ(K) > (µ + α)o r R0 > 1.\\n2 Compartmental Models 53\\nTo analyze the SIS model if f< 1, it is convenient to use I and N as\\nvariables instead of S and I, with S replaced by N − I. This gives the model\\nI′ = β(N)I(N − I) − (µ + α)I (2.22)\\nN ′ = Λ(N) − µN − (1 − f)αI .\\nEquilibria are found by setting the right sides of the two diﬀerential equations\\nequal to zero. The ﬁrst of the resulting algebraic equations factors, giving\\ntwo alternatives. The ﬁrst alternative is I = 0, which will give a disease-free\\nequilibrium I =0 ,N = K, and the second alternative is β(N)(N − I)=\\nµ + α, which may give an endemic equilibrium. For an endemic equilibrium\\n(I∞ ,N ∞ ) the ﬁrst equation gives\\nI∞ β(N∞ )= N∞ β(N∞ ) − (µ + α) .\\nSubstitution into the other equilibrium condition gives\\nΛ(N∞ )= µN∞ +( 1 − f)α[N∞ − µ + α\\nβ(N∞ )] ,\\nwhich can be simpliﬁed to\\nβ(N∞ )Λ(N∞ )= µN∞ β(N∞ )+( 1 − f)α[N∞ β(N∞ ) − (µ + α)] . (2.23)\\nAt N = 0 the left side of (2.23) is β(0)Λ(0) ≥ 0, while the right side is\\n−(1 − f)α(µ + α), which is negative since f< 1. At N = K the left side of\\n(2.23) is\\nβ(K)Λ(K)= µKβ(K)\\nwhile the right side of (2.23) is\\nµKβ(K)+( 1 − f)α[Kβ(K) − (µ + α)] .\\nSince\\nR0 = Kβ(K)\\nµ + α ,\\nif R0 > 1 the left side of (2.23) is less than the right side of (2.23), and\\nthis implies that (2.23) has a solution for N, 0 <N<K .T h u st h e r ei sa n\\nendemic equilibrium if R0 > 1. If R0 < 1 this reasoning may be used to show\\nthat there is no endemic equilibrium.\\nThe linearization of (2.22) at an equilibrium (I∞ ,N ∞ ) has coeﬃcient ma-\\ntrix\\n[ β(N∞ )(N∞ − 2I∞ ) − (µ + α) β(N∞ )I∞ + β′(N∞ )I∞ (N∞ − I∞ )\\n−(1 − f)αΛ ′(N∞ ) − µ.\\n]\\nAt the disease-free equilibrium the matrix is\\n54 F. Brauer\\n[\\nKβ(K) − (µ + α)0\\n−(1 − f)αΛ ′(K) − µ\\n]\\n,\\nwhich has eigenvalues Λ′(K) − µ and KβK − (µ + α). Thus, the disease-\\nfree equilibrium is asymptotically stable if Kβ(K) <µ + α,o r R0 < 1, and\\nunstable if Kβ(K) >µ +α,o r R0 > 1. Note that the condition for instability\\nof the disease-free equilibrium is the same as the condition for the existence\\nof an endemic equilibrium.\\nAt an endemic equilibrium, since β(N∞ )(N∞ − I∞ )= µ + α, the matrix\\nis [ −Iβ(N∞ ) I∞ β(N∞ )+ I∞ (N∞ − I∞ )β′(N∞ )\\n−(1 − f)αΛ ′(N∞ ) − µ\\n]\\n.\\nSince β′(N∞ ) ≤ 0\\nβ(N∞ )+( N∞ − I∞ )β′(N∞ ) ≥ β(N∞ )+ N∞ β′(N∞ ) ≥ 0 .\\nThus if Λ′(N∞ ) <µ the coeﬃcient matrix has sign structure\\n[\\n− +\\n−−\\n]\\n.\\nIt is clear that the coeﬃcient matrix has negative trace and positive determi-\\nnant if Λ′(N) <µ and this implies that the endemic equilibrium is asymp-\\ntotically stable. Thus, the endemic equilibrium, which exists if R0 > 1, is\\nalways asymptotically stable. If R0 < 1 the system has only the disease-free\\nequilibrium and this equilibrium is asymptotically stable. In the case f =1\\nthe veriﬁcation of these properties remains valid if there are no births and\\ndeaths. This suggests that a requirement for the existence of an endemic\\nequilibrium is a ﬂow of new susceptibles either through births, as in theSIR\\nmodel or through recovery without immunity against reinfection, as in the\\nSIS model with or without births and deaths.\\nIf the epidemiological and demographic time scales are very diﬀerent, for\\nthe SIR model we observed that the approach to endemic equilibrium is like a\\nrapid and severe epidemic. The same happens in theSIS model, especially if\\nthere is a signiﬁcant number of deaths due to disease. If there are few disease\\ndeaths the number of infectives at endemic equilibrium may be substantial,\\nand there may be damped oscillations of large amplitude about the endemic\\nequilibrium.\\nFor both the SIR and SIS models we may write the diﬀerential equation\\nfor I as\\nI′ = I[β(N)S − (µ + α)] = β(N)I[S − S∞ ] ,\\nwhich implies that whenever S exceeds its endemic equilibrium value S∞ , I\\nis increasing and epidemic-like behaviour is possible. If R0 < 1a n dS<K\\nit follows that I′ < 0, and thus I is decreasing. Thus, if R0 < 1, I cannot\\nincrease and no epidemic can occur.\\n2 Compartmental Models 55\\nNext, we will turn to some applications of SIR and SIS models, taken\\nmainly from [3].\\n2.3 Some Applications'}\n"
     ]
    }
   ],
   "source": [
    "results = retrieve_chunks(\n",
    "    \"loss of immunity and immune escape in SIR models\",\n",
    "    k=2\n",
    ")\n",
    "\n",
    "for r in results:\n",
    "    print(f\"\\n--- RESULT {r['rank']} ---\")\n",
    "    print(\"Distance:\", r[\"distance\"])\n",
    "    print(r[\"text\"])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fb22ee95-b8e8-496d-a92e-7394ee7e0c9d",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python (reg)",
   "language": "python",
   "name": "your_env_name"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
