{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# NLM/LMMS paths and parameters\n",
    "vecs_path = f'{basePath}data/vectors/lmms-sp-wsd.roberta-large.vectors.txt'\n",
    "wsd_encoder_cfg = {\n",
    "    'model_name_or_path': 'roberta-large',\n",
    "    'min_seq_len': 0,\n",
    "    'max_seq_len': 512,\n",
    "    'layers': [-n for n in range(1, 24 + 1)],  # all layers, with reversed indices\n",
    "    'layer_op': 'ws',\n",
    "    'weights_path': f'{basePath}data/weights/lmms-sp-wsd.roberta-large.weights.txt',\n",
    "    'subword_op': 'mean'\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Some weights of the model checkpoint at roberta-large were not used when initializing RobertaModel: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.bias']\n",
      "- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
      "- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
      "Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
     ]
    }
   ],
   "source": [
    "model_str = 'roberta-large'\n",
    "tokenizer = RobertaTokenizer.from_pretrained(model_str)\n",
    "model = RobertaModel.from_pretrained(model_str)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading NLM and sense embeddings ...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Some weights of the model checkpoint at roberta-large were not used when initializing RobertaModel: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.bias']\n",
      "- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
      "- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
      "Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Done\n"
     ]
    }
   ],
   "source": [
    "print('Loading NLM and sense embeddings ...')  # (takes a while)\n",
    "wsd_encoder = TransformersEncoder(wsd_encoder_cfg)\n",
    "senses_vsm = SensesVSM(vecs_path, normalize=True)\n",
    "print('Done')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def modify_join_tokens(lemma, next_lemma, pos, skip_words):\n",
    "    \n",
    "    # key is lemma to modify\n",
    "    # value is a list with three components \n",
    "    # 1) updated lemma, 2) updated pos, 3) whether to skip upcoming words \n",
    "    join_tokens_dict = {'brand': ['brand-new', 'ADJ', ['-', 'new']], 'pedalling': ['pedal', 'VERB', []], \n",
    "                        'scissor': ['scissors', 'NOUN', []], 'stabbing': ['stab', 'VERB', []], \n",
    "                        'all': ['all-night', 'ADJ', ['-', 'nighter']], 'Scissor': ['scissors', 'NOUN', []], \n",
    "                        'Polar': ['Polar', 'ADJ', []], 'tailless': ['tailless', 'ADJ', []], \n",
    "                        'underwater': ['underwater', 'ADJ', []], \n",
    "                        'dairy': ['dairy', 'NOUN', []], \n",
    "                        'spacewalk': ['spacewalk', 'VERB', []], \n",
    "                        'underneath': ['underneath', 'ADV', []], 'U.S.A.': ['U.S.A.', 'NOUN', []], \n",
    "                        'seventh': ['seventh', 'ADJ', []], \n",
    "                        'higher': ['higher', 'ADJ', []], 'showcase': ['showcase', 'NOUN', []], \n",
    "                        'artisan': ['artisan', 'NOUN', []],  \n",
    "                        'surrounding': ['surroundings', 'NOUN', []]}\n",
    "    \n",
    "    # these don't exist in LMMS\n",
    "    if pos == 'PROPN' or pos == 'NUM':\n",
    "        pos = 'NOUN'\n",
    "\n",
    "    if lemma in join_tokens_dict.keys():\n",
    "        lemma_new = join_tokens_dict[lemma][0]\n",
    "        pos_new = join_tokens_dict[lemma][1]\n",
    "        # in some cases, we want to join words together (like brand new -> brand-new)\n",
    "        # in this case we need to skip the sense embedding for the words that follow (e.g. new)\n",
    "        # so we add them to skip_words. However, we also need to check to make sure \n",
    "        # the upcoming words match, and if not, don't perform the update.\n",
    "        remove_tokens = join_tokens_dict[lemma][2]\n",
    "        if len(remove_tokens) != 0: \n",
    "            if next_lemma not in remove_tokens: # no match, don't perform an update\n",
    "                return lemma, pos, skip_words     \n",
    "        skip_words.extend(remove_tokens)\n",
    "        return lemma_new, pos_new, skip_words\n",
    "    else:\n",
    "        return lemma, pos, skip_words"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# words to merge with the next token(s)\n",
    "merge_word = {'brand_new': 'brand-new', 'all_-': 'all-night', 'united_states': \\\n",
    "                'united_states', 'ear_piercings': 'earring', 'tear_gas': 'tear_gas'}\n",
    "\n",
    "# how many tokens to skip after merging words \n",
    "num_skip = {'brand-new': 1, 'all-night': 2, 'united_states': 1, 'earring': 1, 'tear_gas': 1}\n",
    "\n",
    "def modify_pos_word(word, pos, next_word, merge_word=merge_word, num_skip=num_skip):\n",
    "    \n",
    "    # words to modify \n",
    "    update_words = {'artisanal': 'artisan', 'lawnmower': 'mower', 'fulltime': 'full-time', \n",
    "                    'waterbed': 'water_bed', 'airbed': 'air_mattress', 'videogame': 'video_game', '1970': 'seventies', \n",
    "                    'micromanager': 'manager', 'showcase': 'show_off', 'scissor': 'scissors', 'vikings': 'viking', \n",
    "                    'wildland': 'land', 'landform': 'terrain', 'stabbing': 'stab', 'shorter': 'short', \n",
    "                    'pedalling': 'pedal', 'freezing': 'frigid', 'tong': 'tongs', 'feet': 'ft'}\n",
    "    \n",
    "    # words to modify pos tagging \n",
    "    pos_tag = {'mild': 'ADJ', 'spacewalk': 'VERB', 'underwater': 'ADJ', 'artisan': 'NOUN', \n",
    "               'surrounding': 'ADJ', 'brand-new': 'ADJ', 'all-night': 'ADJ', 'underneath': 'ADV', 'polar': 'ADJ', \n",
    "               'stab': 'VERB', 'tear_gas': 'NOUN', 'dairy': 'NOUN', 'seventh': 'ADJ', 'tailless': 'ADJ'}\n",
    "    \n",
    "    modified = False\n",
    "    \n",
    "    # for words that are combined with their next token\n",
    "    if f'{word}_{next_word}' in merge_word.keys():\n",
    "        print(f\"Merging: {word}_{next_word}\")\n",
    "        word = merge_word[f'{word}_{next_word}']\n",
    "        modified = True\n",
    "        \n",
    "    # words we updated\n",
    "    if word in update_words.keys():\n",
    "        word = update_words[word]\n",
    "        modified = True\n",
    "    \n",
    "    # get updated pos tag based on the (potentially) modified representation\n",
    "    if word in pos_tag.keys():\n",
    "        pos = pos_tag[word]\n",
    "        \n",
    "    # for merged or updated words, check if we should skip the next token(s)\n",
    "    if word in num_skip.keys() and modified:\n",
    "        skip_next = num_skip[word]\n",
    "    else:\n",
    "        skip_next = 0\n",
    "\n",
    "   \n",
    "    return word, pos, skip_next\n",
    "        \n",
    "        \n",
    "def split_words_and_combine(word_list):\n",
    "    \n",
    "    split_word_dict = {'wildland': 'wild land', \n",
    "                       'landform': 'land form'}\n",
    "    word_list_new = []\n",
    "    for w in word_list:\n",
    "        if w in split_word_dict.keys():\n",
    "            print(w)\n",
    "            word_list_new.extend(split_word_dict[w].split())\n",
    "        else:\n",
    "            word_list_new.append(w)\n",
    "    \n",
    "    sentence = ' '.join(word_list_new)\n",
    "    sentence += '.'\n",
    "    return sentence\n",
    "\n",
    "def merge_tokens(tokens, merge_words=merge_word, num_skip_dict=num_skip):\n",
    "\n",
    "    new_tokens = []\n",
    "    num_skip = 0\n",
    "    for idx, t in enumerate(tokens):\n",
    "        \n",
    "        if num_skip > 0:\n",
    "            num_skip -= 1\n",
    "            continue\n",
    "        try:\n",
    "            next_token = tokens[idx+1]\n",
    "        except:\n",
    "            next_token = 'none'\n",
    "\n",
    "        if f'{t.lower()}_{next_token.lower()}' in merge_words.keys():\n",
    "            modified_word = merge_words[f'{t.lower()}_{next_token.lower()}']\n",
    "            print(\"MODIFYING FOR ROBERTA: \", modified_word)\n",
    "            num_skip = num_skip_dict[modified_word]\n",
    "            contextual_token = ''\n",
    "            for i in range(num_skip+1):\n",
    "                contextual_token += tokens[idx+i]\n",
    "            new_tokens.append(contextual_token)\n",
    "        else:\n",
    "            new_tokens.append(t)\n",
    "\n",
    "    return new_tokens"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load pereria text\n",
    "pereira_path = \"/home3/ebrahim/what-is-brainscore/pereira_data/sentences_ordered_dereferenced.txt\"\n",
    "with open(pereira_path, \"r\") as file:\n",
    "    # Read the contents line by line into a list\n",
    "    pereira_text = [line.strip() for line in file]\n",
    "    \n",
    "no_sense_words = []\n",
    "no_sense_sentences = []\n",
    "end_for_loop = False\n",
    "lemma_arr = []\n",
    "postag_arr = []\n",
    "skip_next = False\n",
    "for j, sentence in enumerate(pereira_text):\n",
    "    \n",
    "    doc = en_nlp(str(sentence))\n",
    "    tokens = [t.text for t in doc]\n",
    "    skip_words = []\n",
    "    if j % 100 == 0:\n",
    "        print(j)\n",
    "    skip_next = 0\n",
    "    \n",
    "    merged_tokens = merge_tokens(tokens)\n",
    "\n",
    "    # retrieve contextual embeddings  \n",
    "    #ctx_embeddings = wsd_encoder.token_embeddings([tokens])[0]\n",
    "    \n",
    "    for i, d in enumerate(doc):\n",
    "        \n",
    "        if skip_next > 0:\n",
    "            print(\"Skipping: \", d)\n",
    "            skip_next = skip_next - 1\n",
    "            continue\n",
    "        \n",
    "        if d.pos_ == 'PUNCT':\n",
    "            continue\n",
    "        \n",
    "        pos = d.pos_\n",
    "        \n",
    "        try:\n",
    "            next_tok =  str(d.nbor())\n",
    "        except:\n",
    "            next_tok = 'NONE'\n",
    "            \n",
    "        word, pos, skip_next = modify_pos_word(d.lemma_.lower(), pos, next_tok.lower())\n",
    "        \n",
    "         # these don't exist in LMMS\n",
    "        if pos == 'PROPN' or pos == 'NUM':\n",
    "            pos = 'NOUN'\n",
    "            \n",
    "        \n",
    "        # don't add if it's a stop word and it was not changed\n",
    "        if d.is_stop and word == d.lemma_:\n",
    "            continue\n",
    "            \n",
    "        #lemma, pos, skip_words = modify_join_tokens(d.lemma_, next_lemma, d.pos_, skip_words)\n",
    "        lemma_arr.append(word)\n",
    "        postag_arr.append(pos)\n",
    "\n",
    "    \n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# dictionary where key is word_pos, and value is number of senses\n",
    "num_senses = senses_vsm.num_senses_fast(lemma_arr=lemma_arr, postag_arr=postag_arr)  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAGdCAYAAAA44ojeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAt7ElEQVR4nO3dfVRVdd7//xeCIKJgqHAgAe9KRMVMDU85ZsmAxGU60qSNKZZjVy5oUmbMmPEmbQq1O8sxnbpMa4qsZqUllYamWEu8w7i86yJ1LC05UJqgOKLC/v0xX88vStMDm/hAz8daey3O3p/z3u9dbnn52fvs42VZliUAAACDNGvoBgAAAH6IgAIAAIxDQAEAAMYhoAAAAOMQUAAAgHEIKAAAwDgEFAAAYBwCCgAAMI5PQzdQG9XV1Tp69Khat24tLy+vhm4HAABcAcuydPLkSYWHh6tZs5+eI2mUAeXo0aOKiIho6DYAAEAtHDlyRB06dPjJMY0yoLRu3VrSfw4wMDCwgbsBAABXory8XBEREe7f4z+lUQaUC5d1AgMDCSgAADQyV3J7BjfJAgAA4xBQAACAcQgoAADAOAQUAABgHAIKAAAwDgEFAAAYh4ACAACMQ0ABAADGIaAAAADjEFAAAIBxCCgAAMA4BBQAAGAcAgoAADAOAQUAABjHx5PBixcv1uLFi/XFF19Iknr06KGZM2cqKSlJkjR48GDl5eXVeM9///d/a8mSJe7Xhw8f1qRJk7Rhwwa1atVKqampysrKko+PR63gEjo+/J5H47+Ym1xPnQAAUHsepYIOHTpo7ty5uuaaa2RZll5++WUNHz5cn376qXr06CFJmjhxoubMmeN+T8uWLd0/V1VVKTk5WQ6HQ5s3b1ZxcbHGjRun5s2b6/HHH7fpkAAAQGPnUUAZNmxYjdePPfaYFi9erC1btrgDSsuWLeVwOC76/g8//FD79u3TunXrFBoaquuuu06PPvqopk2bpkceeUS+vr61PIzGgxkOAAAur9b3oFRVVWnFihWqqKiQ0+l0r3/ttdfUrl079ezZU5mZmTp9+rR7W35+vnr16qXQ0FD3usTERJWXl2vv3r2X3FdlZaXKy8trLAAAoOny+MaP3bt3y+l06syZM2rVqpVWrlypmJgYSdLvfvc7RUVFKTw8XLt27dK0adNUVFSkt99+W5LkcrlqhBNJ7tcul+uS+8zKytLs2bM9bRUAADRSHgeUbt26qbCwUGVlZfrnP/+p1NRU5eXlKSYmRvfdd597XK9evRQWFqYhQ4bo4MGD6tKlS62bzMzMVEZGhvt1eXm5IiIial0PAACYzeNLPL6+vuratav69u2rrKws9e7dW88+++xFx8bFxUmSDhw4IElyOBwqKSmpMebC60vdtyJJfn5+CgwMrLEAAICmq87PQamurlZlZeVFtxUWFkqSwsLCJElOp1O7d+9WaWmpe0xubq4CAwPdl4kAAAA8usSTmZmppKQkRUZG6uTJk8rOztbGjRu1du1aHTx4UNnZ2brtttvUtm1b7dq1S1OmTNGgQYMUGxsrSUpISFBMTIzGjh2r+fPny+Vyafr06UpLS5Ofn1+9HCAAAGh8PAoopaWlGjdunIqLixUUFKTY2FitXbtWv/71r3XkyBGtW7dOCxYsUEVFhSIiIpSSkqLp06e73+/t7a2cnBxNmjRJTqdTAQEBSk1NrfHcFAAAAI8CytKlSy+5LSIi4kdPkb2YqKgovf/++57sFgAA/MLwXTwAAMA4BBQAAGAcAgoAADAOAQUAABiHgAIAAIxDQAEAAMYhoAAAAOMQUAAAgHEIKAAAwDgEFAAAYBwCCgAAMA4BBQAAGIeAAgAAjENAAQAAxiGgAAAA4xBQAACAcQgoAADAOAQUAABgHAIKAAAwDgEFAAAYh4ACAACMQ0ABAADGIaAAAADjEFAAAIBxCCgAAMA4BBQAAGAcAgoAADAOAQUAABiHgAIAAIxDQAEAAMYhoAAAAOMQUAAAgHEIKAAAwDgEFAAAYBwCCgAAMA4BBQAAGIeAAgAAjENAAQAAxiGgAAAA43gUUBYvXqzY2FgFBgYqMDBQTqdTH3zwgXv7mTNnlJaWprZt26pVq1ZKSUlRSUlJjRqHDx9WcnKyWrZsqZCQEE2dOlXnz5+352gAAECT4FFA6dChg+bOnauCggLt2LFDt956q4YPH669e/dKkqZMmaLVq1frrbfeUl5eno4ePaqRI0e6319VVaXk5GSdPXtWmzdv1ssvv6zly5dr5syZ9h4VAABo1Lwsy7LqUiA4OFhPPPGE7rjjDrVv317Z2dm64447JEn/93//p+7duys/P18DBgzQBx98oP/6r//S0aNHFRoaKklasmSJpk2bpm+++Ua+vr5XtM/y8nIFBQWprKxMgYGBdWn/Z9fx4fc8Gv/F3GSj6gMAUFue/P6u9T0oVVVVWrFihSoqKuR0OlVQUKBz584pPj7ePSY6OlqRkZHKz8+XJOXn56tXr17ucCJJiYmJKi8vd8/CXExlZaXKy8trLAAAoOnyOKDs3r1brVq1kp+fn+6//36tXLlSMTExcrlc8vX1VZs2bWqMDw0NlcvlkiS5XK4a4eTC9gvbLiUrK0tBQUHuJSIiwtO2AQBAI+JxQOnWrZsKCwu1detWTZo0Sampqdq3b1999OaWmZmpsrIy93LkyJF63R8AAGhYPp6+wdfXV127dpUk9e3bV9u3b9ezzz6rUaNG6ezZszpx4kSNWZSSkhI5HA5JksPh0LZt22rUu/ApnwtjLsbPz09+fn6etgoAABqpOj8Hpbq6WpWVlerbt6+aN2+u9evXu7cVFRXp8OHDcjqdkiSn06ndu3ertLTUPSY3N1eBgYGKiYmpaysAAKCJ8GgGJTMzU0lJSYqMjNTJkyeVnZ2tjRs3au3atQoKCtKECROUkZGh4OBgBQYG6oEHHpDT6dSAAQMkSQkJCYqJidHYsWM1f/58uVwuTZ8+XWlpacyQAAAAN48CSmlpqcaNG6fi4mIFBQUpNjZWa9eu1a9//WtJ0jPPPKNmzZopJSVFlZWVSkxM1PPPP+9+v7e3t3JycjRp0iQ5nU4FBAQoNTVVc+bMsfeoAABAo1bn56A0BJ6D0nD1AQCorZ/lOSgAAAD1hYACAACMQ0ABAADG8fg5KPhl4x4XAMDPgRkUAABgHAIKAAAwDgEFAAAYh4ACAACMQ0ABAADGIaAAAADjEFAAAIBxCCgAAMA4BBQAAGAcAgoAADAOAQUAABiHgAIAAIxDQAEAAMYhoAAAAOMQUAAAgHEIKAAAwDgEFAAAYBwCCgAAMA4BBQAAGIeAAgAAjENAAQAAxiGgAAAA4xBQAACAcQgoAADAOAQUAABgHAIKAAAwDgEFAAAYh4ACAACMQ0ABAADGIaAAAADjEFAAAIBxCCgAAMA4BBQAAGAcAgoAADAOAQUAABjHo4CSlZWl/v37q3Xr1goJCdGIESNUVFRUY8zgwYPl5eVVY7n//vtrjDl8+LCSk5PVsmVLhYSEaOrUqTp//nzdjwYAADQJPp4MzsvLU1pamvr376/z58/rz3/+sxISErRv3z4FBAS4x02cOFFz5sxxv27ZsqX756qqKiUnJ8vhcGjz5s0qLi7WuHHj1Lx5cz3++OM2HBIAAGjsPAooa9asqfF6+fLlCgkJUUFBgQYNGuRe37JlSzkcjovW+PDDD7Vv3z6tW7dOoaGhuu666/Too49q2rRpeuSRR+Tr61uLwwAAAE1Jne5BKSsrkyQFBwfXWP/aa6+pXbt26tmzpzIzM3X69Gn3tvz8fPXq1UuhoaHudYmJiSovL9fevXsvup/KykqVl5fXWAAAQNPl0QzK91VXV2vy5Mm66aab1LNnT/f63/3ud4qKilJ4eLh27dqladOmqaioSG+//bYkyeVy1QgnktyvXS7XRfeVlZWl2bNn17ZVAADQyNQ6oKSlpWnPnj365JNPaqy/77773D/36tVLYWFhGjJkiA4ePKguXbrUal+ZmZnKyMhwvy4vL1dERETtGgcAAMar1SWe9PR05eTkaMOGDerQocNPjo2Li5MkHThwQJLkcDhUUlJSY8yF15e6b8XPz0+BgYE1FgAA0HR5FFAsy1J6erpWrlypjz76SJ06dbrsewoLCyVJYWFhkiSn06ndu3ertLTUPSY3N1eBgYGKiYnxpB0AANBEeXSJJy0tTdnZ2XrnnXfUunVr9z0jQUFB8vf318GDB5Wdna3bbrtNbdu21a5duzRlyhQNGjRIsbGxkqSEhATFxMRo7Nixmj9/vlwul6ZPn660tDT5+fnZf4QAAKDR8WgGZfHixSorK9PgwYMVFhbmXt544w1Jkq+vr9atW6eEhARFR0frj3/8o1JSUrR69Wp3DW9vb+Xk5Mjb21tOp1N33323xo0bV+O5KQAA4JfNoxkUy7J+cntERITy8vIuWycqKkrvv/++J7sGAAC/IHwXDwAAMA4BBQAAGIeAAgAAjENAAQAAxiGgAAAA4xBQAACAcQgoAADAOAQUAABgHAIKAAAwDgEFAAAYh4ACAACMQ0ABAADGIaAAAADjEFAAAIBxCCgAAMA4BBQAAGAcAgoAADAOAQUAABiHgAIAAIxDQAEAAMYhoAAAAOMQUAAAgHEIKAAAwDgEFAAAYBwCCgAAMA4BBQAAGIeAAgAAjENAAQAAxiGgAAAA4xBQAACAcQgoAADAOAQUAABgHAIKAAAwDgEFAAAYh4ACAACMQ0ABAADG8WnoBoDv6/jwe1c89ou5yfXYCQCgITGDAgAAjONRQMnKylL//v3VunVrhYSEaMSIESoqKqox5syZM0pLS1Pbtm3VqlUrpaSkqKSkpMaYw4cPKzk5WS1btlRISIimTp2q8+fP1/1oAABAk+BRQMnLy1NaWpq2bNmi3NxcnTt3TgkJCaqoqHCPmTJlilavXq233npLeXl5Onr0qEaOHOneXlVVpeTkZJ09e1abN2/Wyy+/rOXLl2vmzJn2HRUAAGjUPLoHZc2aNTVeL1++XCEhISooKNCgQYNUVlampUuXKjs7W7feeqskadmyZerevbu2bNmiAQMG6MMPP9S+ffu0bt06hYaG6rrrrtOjjz6qadOm6ZFHHpGvr699RwcAABqlOt2DUlZWJkkKDg6WJBUUFOjcuXOKj493j4mOjlZkZKTy8/MlSfn5+erVq5dCQ0PdYxITE1VeXq69e/dedD+VlZUqLy+vsQAAgKar1gGlurpakydP1k033aSePXtKklwul3x9fdWmTZsaY0NDQ+Vyudxjvh9OLmy/sO1isrKyFBQU5F4iIiJq2zYAAGgEah1Q0tLStGfPHq1YscLOfi4qMzNTZWVl7uXIkSP1vk8AANBwavUclPT0dOXk5GjTpk3q0KGDe73D4dDZs2d14sSJGrMoJSUlcjgc7jHbtm2rUe/Cp3wujPkhPz8/+fn51aZVAADQCHk0g2JZltLT07Vy5Up99NFH6tSpU43tffv2VfPmzbV+/Xr3uqKiIh0+fFhOp1OS5HQ6tXv3bpWWlrrH5ObmKjAwUDExMXU5FgAA0ER4NIOSlpam7OxsvfPOO2rdurX7npGgoCD5+/srKChIEyZMUEZGhoKDgxUYGKgHHnhATqdTAwYMkCQlJCQoJiZGY8eO1fz58+VyuTR9+nSlpaUxSwIAACR5GFAWL14sSRo8eHCN9cuWLdP48eMlSc8884yaNWumlJQUVVZWKjExUc8//7x7rLe3t3JycjRp0iQ5nU4FBAQoNTVVc+bMqduRAACAJsOjgGJZ1mXHtGjRQosWLdKiRYsuOSYqKkrvv/++J7sGAAC/IHwXDwAAMA4BBQAAGIeAAgAAjENAAQAAxiGgAAAA4xBQAACAcQgoAADAOAQUAABgHAIKAAAwDgEFAAAYh4ACAACMQ0ABAADGIaAAAADjEFAAAIBxCCgAAMA4BBQAAGAcAgoAADAOAQUAABiHgAIAAIzj09ANAD+Xjg+/59H4L+Ym11MnAIDLYQYFAAAYh4ACAACMQ0ABAADGIaAAAADjEFAAAIBxCCgAAMA4BBQAAGAcAgoAADAOAQUAABiHgAIAAIxDQAEAAMYhoAAAAOMQUAAAgHEIKAAAwDgEFAAAYBwCCgAAMA4BBQAAGIeAAgAAjENAAQAAxvE4oGzatEnDhg1TeHi4vLy8tGrVqhrbx48fLy8vrxrL0KFDa4w5fvy4xowZo8DAQLVp00YTJkzQqVOn6nQgAACg6fA4oFRUVKh3795atGjRJccMHTpUxcXF7uX111+vsX3MmDHau3evcnNzlZOTo02bNum+++7zvHsAANAk+Xj6hqSkJCUlJf3kGD8/Pzkcjotu++yzz7RmzRpt375d/fr1kyQtXLhQt912m5588kmFh4d72hIAAGhi6uUelI0bNyokJETdunXTpEmTdOzYMfe2/Px8tWnTxh1OJCk+Pl7NmjXT1q1bL1qvsrJS5eXlNRYAANB02R5Qhg4dqldeeUXr16/XvHnzlJeXp6SkJFVVVUmSXC6XQkJCarzHx8dHwcHBcrlcF62ZlZWloKAg9xIREWF32wAAwCAeX+K5nNGjR7t/7tWrl2JjY9WlSxdt3LhRQ4YMqVXNzMxMZWRkuF+Xl5cTUgAAaMLq/WPGnTt3Vrt27XTgwAFJksPhUGlpaY0x58+f1/Hjxy9534qfn58CAwNrLAAAoOmq94Dy1Vdf6dixYwoLC5MkOZ1OnThxQgUFBe4xH330kaqrqxUXF1ff7QAAgEbA40s8p06dcs+GSNKhQ4dUWFio4OBgBQcHa/bs2UpJSZHD4dDBgwf10EMPqWvXrkpMTJQkde/eXUOHDtXEiRO1ZMkSnTt3Tunp6Ro9ejSf4AEAAJJqMYOyY8cO9enTR3369JEkZWRkqE+fPpo5c6a8vb21a9cu3X777br22ms1YcIE9e3bVx9//LH8/PzcNV577TVFR0dryJAhuu222zRw4EC98MIL9h0VAABo1DyeQRk8eLAsy7rk9rVr1162RnBwsLKzsz3dNQAA+IXgu3gAAIBxCCgAAMA4BBQAAGAcAgoAADAOAQUAABiHgAIAAIxDQAEAAMYhoAAAAOMQUAAAgHEIKAAAwDgEFAAAYBwCCgAAMA4BBQAAGIeAAgAAjENAAQAAxiGgAAAA4xBQAACAcQgoAADAOAQUAABgHAIKAAAwDgEFAAAYh4ACAACMQ0ABAADGIaAAAADjEFAAAIBxCCgAAMA4BBQAAGAcAgoAADAOAQUAABiHgAIAAIxDQAEAAMYhoAAAAOP4NHQDQFPR8eH3PBr/xdzkeuoEABo/ZlAAAIBxCCgAAMA4BBQAAGAcAgoAADAOAQUAABiHgAIAAIzjcUDZtGmThg0bpvDwcHl5eWnVqlU1tluWpZkzZyosLEz+/v6Kj4/X/v37a4w5fvy4xowZo8DAQLVp00YTJkzQqVOn6nQgAACg6fA4oFRUVKh3795atGjRRbfPnz9fzz33nJYsWaKtW7cqICBAiYmJOnPmjHvMmDFjtHfvXuXm5ionJ0ebNm3SfffdV/ujAAAATYrHD2pLSkpSUlLSRbdZlqUFCxZo+vTpGj58uCTplVdeUWhoqFatWqXRo0frs88+05o1a7R9+3b169dPkrRw4ULddtttevLJJxUeHl6HwwEAAE2BrfegHDp0SC6XS/Hx8e51QUFBiouLU35+viQpPz9fbdq0cYcTSYqPj1ezZs20devWi9atrKxUeXl5jQUAADRdtgYUl8slSQoNDa2xPjQ01L3N5XIpJCSkxnYfHx8FBwe7x/xQVlaWgoKC3EtERISdbQMAAMM0ik/xZGZmqqyszL0cOXKkoVsCAAD1yNaA4nA4JEklJSU11peUlLi3ORwOlZaW1th+/vx5HT9+3D3mh/z8/BQYGFhjAQAATZetAaVTp05yOBxav369e115ebm2bt0qp9MpSXI6nTpx4oQKCgrcYz766CNVV1crLi7OznYAAEAj5fGneE6dOqUDBw64Xx86dEiFhYUKDg5WZGSkJk+erL/+9a+65ppr1KlTJ82YMUPh4eEaMWKEJKl79+4aOnSoJk6cqCVLlujcuXNKT0/X6NGj+QQPAACQVIuAsmPHDt1yyy3u1xkZGZKk1NRULV++XA899JAqKip033336cSJExo4cKDWrFmjFi1auN/z2muvKT09XUOGDFGzZs2UkpKi5557zobDAQAATYHHAWXw4MGyLOuS2728vDRnzhzNmTPnkmOCg4OVnZ3t6a6BX7SOD7/n0fgv5ibXUycAUP8axad4AADALwsBBQAAGIeAAgAAjENAAQAAxiGgAAAA4xBQAACAcQgoAADAOAQUAABgHAIKAAAwDgEFAAAYh4ACAACMQ0ABAADGIaAAAADjEFAAAIBxCCgAAMA4BBQAAGAcAgoAADAOAQUAABiHgAIAAIxDQAEAAMYhoAAAAOMQUAAAgHEIKAAAwDgEFAAAYBwCCgAAMA4BBQAAGIeAAgAAjENAAQAAxiGgAAAA4xBQAACAcQgoAADAOD4N3QCAhtfx4fc8Gv/F3OR66gQA/oMZFAAAYBwCCgAAMA6XeADUOy4hAfAUMygAAMA4BBQAAGAcAgoAADCO7QHlkUcekZeXV40lOjravf3MmTNKS0tT27Zt1apVK6WkpKikpMTuNgAAQCNWLzMoPXr0UHFxsXv55JNP3NumTJmi1atX66233lJeXp6OHj2qkSNH1kcbAACgkaqXT/H4+PjI4XD8aH1ZWZmWLl2q7Oxs3XrrrZKkZcuWqXv37tqyZYsGDBhQH+0AAIBGpl5mUPbv36/w8HB17txZY8aM0eHDhyVJBQUFOnfunOLj491jo6OjFRkZqfz8/PpoBQAANEK2z6DExcVp+fLl6tatm4qLizV79mz96le/0p49e+RyueTr66s2bdrUeE9oaKhcLtcla1ZWVqqystL9ury83O62AQCAQWwPKElJSe6fY2NjFRcXp6ioKL355pvy9/evVc2srCzNnj3brhYBAIDh6v1jxm3atNG1116rAwcOyOFw6OzZszpx4kSNMSUlJRe9Z+WCzMxMlZWVuZcjR47Uc9cAAKAh1fuj7k+dOqWDBw9q7Nix6tu3r5o3b67169crJSVFklRUVKTDhw/L6XResoafn5/8/Pzqu1UAjRSP0geaHtsDyp/+9CcNGzZMUVFROnr0qGbNmiVvb2/dddddCgoK0oQJE5SRkaHg4GAFBgbqgQcekNPp5BM8AADAzfaA8tVXX+muu+7SsWPH1L59ew0cOFBbtmxR+/btJUnPPPOMmjVrppSUFFVWVioxMVHPP/+83W0AAIBGzPaAsmLFip/c3qJFCy1atEiLFi2ye9cAAKCJqPd7UACgseMeF+Dnx5cFAgAA4xBQAACAcQgoAADAOAQUAABgHAIKAAAwDgEFAAAYh4ACAACMQ0ABAADGIaAAAADjEFAAAIBxCCgAAMA4BBQAAGAcAgoAADAOAQUAABiHgAIAAIxDQAEAAMYhoAAAAOP4NHQDAPBL1vHh9zwa/8Xc5HrqBDALMygAAMA4BBQAAGAcAgoAADAOAQUAABiHm2QBoAnjJlw0VsygAAAA4zCDchH8iwMAgIbFDAoAADAOAQUAABiHSzwAgFrjkjjqCzMoAADAOMygAACM5ckMDbMzTQszKAAAwDgEFAAAYBwCCgAAMA73oAAAUA/4hFPdMIMCAACMwwwKAOAXiRkOszGDAgAAjENAAQAAxmnQgLJo0SJ17NhRLVq0UFxcnLZt29aQ7QAAAEM02D0ob7zxhjIyMrRkyRLFxcVpwYIFSkxMVFFRkUJCQhqqLQAAGoWmfg9Ng82gPP3005o4caLuuecexcTEaMmSJWrZsqVeeumlhmoJAAAYokFmUM6ePauCggJlZma61zVr1kzx8fHKz8//0fjKykpVVla6X5eVlUmSysvL66W/6srTHo33pI/6rP1Lq9+YezetfmPuvbHXb8y9m1a/MfduYv36cKEHy7IuP9hqAF9//bUlydq8eXON9VOnTrVuuOGGH42fNWuWJYmFhYWFhYWlCSxHjhy5bFZoFM9ByczMVEZGhvt1dXW1jh8/rrZt28rLy8vWfZWXlysiIkJHjhxRYGBgo6lN/Yar3djrN+beG3v9xtx7Y6/fmHtvzPUty9LJkycVHh5+2bENElDatWsnb29vlZSU1FhfUlIih8Pxo/F+fn7y8/Orsa5Nmzb12aICAwPr5X96fdemfsPVbuz1G3Pvjb1+Y+69sddvzL031vpBQUFXNK5BbpL19fVV3759tX79eve66upqrV+/Xk6nsyFaAgAABmmwSzwZGRlKTU1Vv379dMMNN2jBggWqqKjQPffc01AtAQAAQzRYQBk1apS++eYbzZw5Uy6XS9ddd53WrFmj0NDQhmpJ0n8uJ82aNetHl5RMr039hqvd2Os35t4be/3G3Htjr9+Ye28K9a+El2VdyWd9AAAAfj58Fw8AADAOAQUAABiHgAIAAIxDQAEAAMYhoHzPokWL1LFjR7Vo0UJxcXHatm2bLXU3bdqkYcOGKTw8XF5eXlq1apUtdS/IyspS//791bp1a4WEhGjEiBEqKiqyrf7ixYsVGxvrfmCP0+nUBx98YFv975s7d668vLw0efJkW+o98sgj8vLyqrFER0fbUvuCr7/+Wnfffbfatm0rf39/9erVSzt27LCldseOHX/Uv5eXl9LS0upcu6qqSjNmzFCnTp3k7++vLl266NFHH72y78i4QidPntTkyZMVFRUlf39/3Xjjjdq+fXutal3uPLIsSzNnzlRYWJj8/f0VHx+v/fv321L77bffVkJCgvvp1YWFhbb1fu7cOU2bNk29evVSQECAwsPDNW7cOB09etSW+tJ/zoPo6GgFBAToqquuUnx8vLZu3Wpb/e+7//775eXlpQULFthSe/z48T/68z906FBbe//ss890++23KygoSAEBAerfv78OHz5sS/2Lnb9eXl564oknbKl/6tQppaenq0OHDvL393d/+a4dtUtKSjR+/HiFh4erZcuWGjp06BWfU3YgoPw/b7zxhjIyMjRr1izt3LlTvXv3VmJiokpLS+tcu6KiQr1799aiRYts6PTH8vLylJaWpi1btig3N1fnzp1TQkKCKioqbKnfoUMHzZ07VwUFBdqxY4duvfVWDR8+XHv37rWl/gXbt2/X3//+d8XGxtpat0ePHiouLnYvn3zyiW21v/vuO910001q3ry5PvjgA+3bt09PPfWUrrrqKlvqb9++vUbvubm5kqTf/va3da49b948LV68WH/729/02Wefad68eZo/f74WLlxY59oX/P73v1dubq7+8Y9/aPfu3UpISFB8fLy+/vprj2td7jyaP3++nnvuOS1ZskRbt25VQECAEhMTdebMmTrXrqio0MCBAzVv3jyP+75c/dOnT2vnzp2aMWOGdu7cqbfffltFRUW6/fbbbakvSddee63+9re/affu3frkk0/UsWNHJSQk6JtvvrGl/gUrV67Uli1brugx5p7UHjp0aI3z4PXXX7et/sGDBzVw4EBFR0dr48aN2rVrl2bMmKEWLVrYUv/7fRcXF+ull16Sl5eXUlJSbKmfkZGhNWvW6NVXX9Vnn32myZMnKz09Xe+++26daluWpREjRuhf//qX3nnnHX366aeKiopSfHy8bb9bLsuG7/5rEm644QYrLS3N/bqqqsoKDw+3srKybN2PJGvlypW21vyh0tJSS5KVl5dXb/u46qqrrP/5n/+xrd7Jkyeta665xsrNzbVuvvlm68EHH7Sl7qxZs6zevXvbUutipk2bZg0cOLDe6v/Qgw8+aHXp0sWqrq6uc63k5GTr3nvvrbFu5MiR1pgxY+pc27Is6/Tp05a3t7eVk5NTY/31119v/eUvf6lT7R+eR9XV1ZbD4bCeeOIJ97oTJ05Yfn5+1uuvv16n2t936NAhS5L16aef1qLry9e/YNu2bZYk68svv6yX+mVlZZYka926dbbV/+qrr6yrr77a2rNnjxUVFWU988wzttROTU21hg8f7nGtK60/atQo6+677663+j80fPhw69Zbb7Wtfo8ePaw5c+bUWFebc+yHtYuKiixJ1p49e9zrqqqqrPbt21svvviix73XBjMoks6ePauCggLFx8e71zVr1kzx8fHKz89vwM5qp6ysTJIUHBxse+2qqiqtWLFCFRUVtn4tQVpampKTk2v8P7DL/v37FR4ers6dO2vMmDFXPHV7Jd59913169dPv/3tbxUSEqI+ffroxRdftK3+9509e1avvvqq7r33Xlu+JPPGG2/U+vXr9fnnn0uS/vd//1effPKJkpKS6lxbks6fP6+qqqof/UvU39/f1lksSTp06JBcLleNPz9BQUGKi4trtOewl5dXvXzn2NmzZ/XCCy8oKChIvXv3tqVmdXW1xo4dq6lTp6pHjx621Py+jRs3KiQkRN26ddOkSZN07NgxW+pWV1frvffe07XXXqvExESFhIQoLi7O9svwF5SUlOi9997ThAkTbKt544036t1339XXX38ty7K0YcMGff7550pISKhT3crKSkmqcf42a9ZMfn5+tp+/l0JAkfTtt9+qqqrqR0+xDQ0NlcvlaqCuaqe6ulqTJ0/WTTfdpJ49e9pWd/fu3WrVqpX8/Px0//33a+XKlYqJibGl9ooVK7Rz505lZWXZUu/74uLitHz5cq1Zs0aLFy/WoUOH9Ktf/UonT560pf6//vUvLV68WNdcc43Wrl2rSZMm6Q9/+INefvllW+p/36pVq3TixAmNHz/elnoPP/ywRo8erejoaDVv3lx9+vTR5MmTNWbMGFvqt27dWk6nU48++qiOHj2qqqoqvfrqq8rPz1dxcbEt+7jgwnnaFM7hM2fOaNq0abrrrrts/ZK2nJwctWrVSi1atNAzzzyj3NxctWvXzpba8+bNk4+Pj/7whz/YUu/7hg4dqldeeUXr16/XvHnzlJeXp6SkJFVVVdW5dmlpqU6dOqW5c+dq6NCh+vDDD/Wb3/xGI0eOVF5eng3d1/Tyyy+rdevWGjlypG01Fy5cqJiYGHXo0EG+vr4aOnSoFi1apEGDBtWpbnR0tCIjI5WZmanvvvtOZ8+e1bx58/TVV1/Zfv5eSoM96h71Iy0tTXv27LE94Xbr1k2FhYUqKyvTP//5T6WmpiovL6/OIeXIkSN68MEHlZube8XXfD3x/dmA2NhYxcXFKSoqSm+++aYt/4qprq5Wv3799Pjjj0uS+vTpoz179mjJkiVKTU2tc/3vW7p0qZKSkjy6vv9T3nzzTb322mvKzs5Wjx49VFhYqMmTJys8PNy23v/xj3/o3nvv1dVXXy1vb29df/31uuuuu1RQUGBL/abm3LlzuvPOO2VZlhYvXmxr7VtuuUWFhYX69ttv9eKLL+rOO+/U1q1bFRISUqe6BQUFevbZZ7Vz505bZvZ+aPTo0e6fe/XqpdjYWHXp0kUbN27UkCFD6lS7urpakjR8+HBNmTJFknTddddp8+bNWrJkiW6++eY61f+hl156SWPGjLH177qFCxdqy5YtevfddxUVFaVNmzYpLS1N4eHhdZqRbt68ud5++21NmDBBwcHB8vb2Vnx8vJKSkmy9kf6nMIMiqV27dvL29lZJSUmN9SUlJXI4HA3UlefS09OVk5OjDRs2qEOHDrbW9vX1VdeuXdW3b19lZWWpd+/eevbZZ+tct6CgQKWlpbr++uvl4+MjHx8f5eXl6bnnnpOPj48t/0r6vjZt2ujaa6/VgQMHbKkXFhb2o5DWvXt3Wy8jSdKXX36pdevW6fe//71tNadOneqeRenVq5fGjh2rKVOm2DqT1aVLF+Xl5enUqVM6cuSItm3bpnPnzqlz58627UOS+zxtzOfwhXDy5ZdfKjc31/avuA8ICFDXrl01YMAALV26VD4+Plq6dGmd63788ccqLS1VZGSk+xz+8ssv9cc//lEdO3ase+M/0LlzZ7Vr186Wc7hdu3by8fH5Wc7hjz/+WEVFRbaew//+97/15z//WU8//bSGDRum2NhYpaena9SoUXryySfrXL9v374qLCzUiRMnVFxcrDVr1ujYsWO2n7+XQkDRf3759u3bV+vXr3evq66u1vr16229z6K+WJal9PR0rVy5Uh999JE6depU7/usrq52X6OsiyFDhmj37t0qLCx0L/369dOYMWNUWFgob29vG7r9/506dUoHDx5UWFiYLfVuuummH32k+/PPP1dUVJQt9S9YtmyZQkJClJycbFvN06dPq1mzmn8FeHt7u/9VaaeAgACFhYXpu+++09q1azV8+HBb63fq1EkOh6PGOVxeXq6tW7c2inP4QjjZv3+/1q1bp7Zt29b7Pu06h8eOHatdu3bVOIfDw8M1depUrV271oZOa/rqq6907NgxW85hX19f9e/f/2c5h5cuXaq+ffvadt+P9J8/N+fOnav38zgoKEjt27fX/v37tWPHDtvP30vhEs//k5GRodTUVPXr10833HCDFixYoIqKCt1zzz11rn3q1Kkaaf/QoUMqLCxUcHCwIiMj61w/LS1N2dnZeuedd9S6dWv3NfegoCD5+/vXuX5mZqaSkpIUGRmpkydPKjs7Wxs3brTlL5/WrVv/6F6ZgIAAtW3b1pZ7aP70pz9p2LBhioqK0tGjRzVr1ix5e3vrrrvuqnNtSZoyZYpuvPFGPf7447rzzju1bds2vfDCC3rhhRdsqS/95xfJsmXLlJqaKh8f+07ZYcOG6bHHHlNkZKR69OihTz/9VE8//bTuvfde2/axdu1aWZalbt266cCBA5o6daqio6NrdV5d7jyaPHmy/vrXv+qaa65Rp06dNGPGDIWHh2vEiBF1rn38+HEdPnzY/WySC7/QHA7HFc3Q/FT9sLAw3XHHHdq5c6dycnJUVVXlPoeDg4Pl6+tbp/pt27bVY489pttvv11hYWH69ttvtWjRIn399ddX/HH1y/33+WGgat68uRwOh7p161an2sHBwZo9e7ZSUlLkcDh08OBBPfTQQ+ratasSExNt6X3q1KkaNWqUBg0apFtuuUVr1qzR6tWrtXHjRlvqS/8Jy2+99ZaeeuqpK6rpSf2bb75ZU6dOlb+/v6KiopSXl6dXXnlFTz/9dJ1rv/XWW2rfvr0iIyO1e/duPfjggxoxYkSdb8C9Yj/LZ4UaiYULF1qRkZGWr6+vdcMNN1hbtmyxpe6GDRssST9aUlNTbal/sdqSrGXLltlS/95777WioqIsX19fq3379taQIUOsDz/80JbaF2Pnx4xHjRplhYWFWb6+vtbVV19tjRo1yjpw4IAttS9YvXq11bNnT8vPz8+Kjo62XnjhBVvrr1271pJkFRUV2Vq3vLzcevDBB63IyEirRYsWVufOna2//OUvVmVlpW37eOONN6zOnTtbvr6+lsPhsNLS0qwTJ07UqtblzqPq6mprxowZVmhoqOXn52cNGTLkiv+bXa72smXLLrp91qxZda5/4aPLF1s2bNhQ5/r//ve/rd/85jdWeHi45evra4WFhVm33367tW3btiuqfSX/fX7Ik48Z/1Tt06dPWwkJCVb79u2t5s2bW1FRUdbEiRMtl8tla+9Lly61unbtarVo0cLq3bu3tWrVKlvr//3vf7f8/f1r9Wf/cvWLi4ut8ePHW+Hh4VaLFi2sbt26WU899dQVPYrgcrWfffZZq0OHDlbz5s2tyMhIa/r06bb+/XA5Xpb1M93tAgAAcIW4BwUAABiHgAIAAIxDQAEAAMYhoAAAAOMQUAAAgHEIKAAAwDgEFAAAYBwCCgAAMA4BBQAAGIeAAgAAjENAAQAAxiGgAAAA4/x/XTCMXADX4MEAAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from matplotlib import pyplot as plt\n",
    "plt.hist(list(num_senses.values()), bins=np.arange(0,20,1), width=0.5)\n",
    "plt.xticks(np.arange(0,20))\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n"
     ]
    }
   ],
   "source": [
    "# load pereria text\n",
    "fed_path = \"/home3/ebrahim/what-is-brainscore/federonko_data/sentences_ordered.txt\"\n",
    "with open(fed_path, \"r\") as file:\n",
    "    # Read the contents line by line into a list\n",
    "    sentence_text = [line.strip() for line in file]\n",
    "    \n",
    "no_sense_words = []\n",
    "no_sense_sentences = []\n",
    "end_for_loop = False\n",
    "lemma_arr = []\n",
    "postag_arr = []\n",
    "skip_next = False\n",
    "for j, sentence in enumerate(sentence_text):\n",
    "    \n",
    "    doc = en_nlp(str(sentence))\n",
    "    tokens = [t.text for t in doc]\n",
    "    skip_words = []\n",
    "    if j % 100 == 0:\n",
    "        print(j)\n",
    "    skip_next = 0\n",
    "    \n",
    "    for i, d in enumerate(doc):\n",
    "        \n",
    "        \n",
    "        if d.pos_ == 'PUNCT':\n",
    "            continue\n",
    "        \n",
    "        pos = d.pos_\n",
    "        \n",
    "        word = d.lemma_.lower()\n",
    "        \n",
    "        if word == 'clothe':\n",
    "            word = d.text\n",
    "            \n",
    "        \n",
    "         # these don't exist in LMMS\n",
    "        if pos == 'PROPN' or pos == 'NUM':\n",
    "            pos = 'NOUN'\n",
    "            \n",
    "        \n",
    "        # don't add if it's a stop word and it was not changed\n",
    "        if d.is_stop and word == d.lemma_:\n",
    "            continue\n",
    "            \n",
    "        #lemma, pos, skip_words = modify_join_tokens(d.lemma_, next_lemma, d.pos_, skip_words)\n",
    "        lemma_arr.append(word)\n",
    "        postag_arr.append(pos)\n",
    "\n",
    "# dictionary where key is word_pos, and value is number of senses\n",
    "num_senses = senses_vsm.num_senses_fast(lemma_arr=lemma_arr, postag_arr=postag_arr)      \n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "defaultdict(list, {})"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "senses_vsm.word_pos_sk['Alex']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['alex_NOUN',\n",
       " 'amy_NOUN',\n",
       " 'ann_NOUN',\n",
       " 'charlie_NOUN',\n",
       " 'emma_NOUN',\n",
       " 'eva_NOUN',\n",
       " 'timmy_NOUN',\n",
       " 'jessie_NOUN',\n",
       " 'near_ADP',\n",
       " 'jill_NOUN',\n",
       " 'liz_NOUN',\n",
       " 'larry_NOUN',\n",
       " 'i_PRON',\n",
       " 'steve_NOUN',\n",
       " 'susan_NOUN',\n",
       " 'outside_ADP',\n",
       " 'hannah_NOUN',\n",
       " 'brand_ADJ']"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "zero_senses = [key for key, val in num_senses.items() if val==0]\n",
    "zero_senses"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "llama",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
