{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "bc0400e1",
   "metadata": {},
   "source": [
    "# Map your concept to UMLS CUI"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a0280561",
   "metadata": {},
   "source": [
    "We assume that you have some related medical concepts (keywords) paired with images in your dataset.\n",
    "\n",
    "If so, we recommend you to use [UMLS metathesaurus browser](https://uts.nlm.nih.gov/uts/umls/home) to map your concepts to existing CUIs shown as below.\n",
    "\n",
    "e.g. nodule corresponds to C0028259\n",
    "\n",
    "<img src=\"img/mapping.png\">"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "01a84d64",
   "metadata": {},
   "source": [
    "# For practical usage we advise you to use following API"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "600fe55b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "No.1 => CUI = C0028259, Concept Name: Nodule\n",
      "No.2 => CUI = C2700389, Concept Name: Plant nodule\n",
      "No.3 => CUI = C0228505, Concept Name: Nodulus cerebelli\n",
      "C0028259\n"
     ]
    }
   ],
   "source": [
    "import requests\n",
    "from lxml.html import fromstring\n",
    "from cachetools import cached, TTLCache\n",
    "TTL_7HRS = TTLCache(maxsize=2, ttl=25200)\n",
    "\n",
    "class Auth:\n",
    "    def __init__(self, api_key):\n",
    "        self._api_key = api_key\n",
    "\n",
    "    @cached(TTL_7HRS)\n",
    "    def get_single_use_service_ticket(self):\n",
    "        url = 'https://utslogin.nlm.nih.gov/cas/v1/api-key'\n",
    "        headers = {\n",
    "            'Content-type': 'application/x-www-form-urlencoded',\n",
    "            'Accept': 'text/plain',\n",
    "            'User-Agent': 'python'\n",
    "        }\n",
    "        resp = requests.post(\n",
    "            url, data={'apikey': self._api_key}, headers=headers\n",
    "        )\n",
    "        resp.raise_for_status()\n",
    "        html = fromstring(resp.text)\n",
    "        ticket_granting_ticket_url = html.xpath('//form/@action')[0]\n",
    "\n",
    "        resp = requests.post(\n",
    "            ticket_granting_ticket_url,\n",
    "            data={'service': 'http://umlsks.nlm.nih.gov'},\n",
    "            headers=headers\n",
    "        )\n",
    "        resp.raise_for_status()\n",
    "        single_use_service_ticket = resp.text\n",
    "        return single_use_service_ticket\n",
    "\n",
    "class API:\n",
    "    BASE_URL = 'https://uts-ws.nlm.nih.gov/rest'\n",
    "\n",
    "    def __init__(self, *, api_key, version='current'):\n",
    "        self._auth = Auth(api_key=api_key)\n",
    "        self._version = version\n",
    "\n",
    "    def get_cui(self, cui):\n",
    "        url = f'{self.BASE_URL}/content/{self._version}/CUI/{cui}'\n",
    "        return self._get(url=url)\n",
    "    \n",
    "    def get_cui_code(self, keyword):\n",
    "        url = f'{self.BASE_URL}/search/{self._version}/?string={keyword}'\n",
    "        candidates= self._get(url=url)['result']['results']\n",
    "        for i in range(3):\n",
    "            cui=candidates[i]['ui']\n",
    "            name=candidates[i]['name']\n",
    "            print(f'No.{i+1} => CUI = {cui}, Concept Name: {name}')\n",
    "        return candidates[0]['ui']\n",
    "\n",
    "    def _get(self, url):\n",
    "        ticket = self._auth.get_single_use_service_ticket()\n",
    "        resp = requests.get(url, params={'ticket': ticket})\n",
    "        resp.raise_for_status()\n",
    "        return resp.json()\n",
    "    \n",
    "    \n",
    "cui_code = API(api_key='72d06e11-2fa4-4bf1-b702-ee2d852038a7').get_cui_code(keyword='nodule')\n",
    "print(cui_code)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c5f29a1d",
   "metadata": {},
   "source": [
    "# If you do not have concepts paired with your image"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d6759ee6",
   "metadata": {},
   "source": [
    "In this case, we advise your to extract CUIs from paired text.\n",
    "\n",
    "There are several tools available(e.g. [QuickUMLS](https://github.com/Georgetown-IR-Lab/QuickUMLS), [MedCAT](https://github.com/CogStack/MedCAT), [Scispacy](https://github.com/allenai/scispacy)).\n",
    "\n",
    "Here we show an example of how to use **Scispacy** below"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "17c5cf38",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[(90, 97, 'grouped', 'C0439745'), (98, 116, 'microcalcification', 'C0520594'), (152, 164, 'right breast', 'C0222600')]\n"
     ]
    }
   ],
   "source": [
    "import spacy\n",
    "import scispacy\n",
    "from scispacy.linking import EntityLinker\n",
    "\n",
    "spacy.prefer_gpu()\n",
    "nlp = spacy.load(\"en_core_sci_scibert\")\n",
    "nlp.add_pipe(\"scispacy_linker\", config={\"resolve_abbreviations\": True, \"linker_name\": \"umls\"})\n",
    "linker = nlp.get_pipe(\"scispacy_linker\")\n",
    "\n",
    "def parse_a_text(text):\n",
    "    entities = []\n",
    "    doc = nlp(text)\n",
    "    for ent in doc.ents:\n",
    "        # Noise Filtering\n",
    "        if len(ent.text) == 1:\n",
    "            continue\n",
    "        # Link to UMLS\n",
    "        if len(ent._.kb_ents) == 0:\n",
    "            continue\n",
    "        start_id = ent.start_char\n",
    "        end_id = ent.end_char\n",
    "        cuis = ent._.kb_ents\n",
    "        cuis = [cui[0] for cui in cuis if cui[1] >= 0.95]\n",
    "        if len(cuis) == 0:\n",
    "            continue\n",
    "        entities.append((start_id, end_id, ent.text, cuis[0]))\n",
    "    return entities\n",
    "\n",
    "CUI=parse_a_text('The breast mammogram (cranio-caudal view) showing an interval development of a suspicious grouped microcalcification in the upper outer quadrant of the right breast')\n",
    "print(CUI)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:arl] *",
   "language": "python",
   "name": "conda-env-arl-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
