{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "52dcc1d2-7b76-45cf-843b-4a15a367f91d",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import torch\n",
    "from glob import glob\n",
    "from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer\n",
    "from scipy.special import softmax\n",
    "from scipy.stats import entropy\n",
    "import pandas as pd\n",
    "from matplotlib import pyplot as plt\n",
    "from tqdm import tqdm\n",
    "from pathlib import Path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "fef9f00c-32af-42ba-a369-30dce332c3b1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "402920df23d2473e82bbfe59bbda0315",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer_config.json:   0%|          | 0.00/745 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a6a27fe496064b1ea1e7cece71db4146",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "032f6f0a72054ebf87df79de1f0d676a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c6e56b7a75964804910e6ae1f8a35486",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "special_tokens_map.json:   0%|          | 0.00/411 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "model_name = 'daryl149/llama-2-7b-hf'\n",
    "\n",
    "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
    "Token = {v: k for k, v in tokenizer.get_vocab().items()}\n",
    "\n",
    "def topk(v, k=40):\n",
    "    # Takes in logits\n",
    "    #v = softmax(v.flatten())\n",
    "    v = v.flatten()\n",
    "    idxs = v.argsort()[-k:][::-1]\n",
    "    ret = [(Token[i], v[i]) for i in idxs]\n",
    "    return pd.DataFrame(ret, columns=['token', 'logit'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "a32201b7-2b96-48ae-97a7-054950281864",
   "metadata": {},
   "outputs": [],
   "source": [
    "U, S, V = np.load('SVD/e_U.npy'), np.load('SVD/e_S.npy'), np.load('SVD/e_V.npy')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "id": "5348d995-04f0-4314-8c38-1a31ded8374e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "inf\n",
      "inf\n",
      "inf\n",
      "inf\n",
      "inf\n",
      "inf\n",
      "inf\n",
      "inf\n",
      "inf\n",
      "inf\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_87336/3486293557.py:2: RuntimeWarning: divide by zero encountered in scalar divide\n",
      "  print(np.linalg.norm(V[i,:32000]) / np.linalg.norm(V[i,32000:]))\n"
     ]
    }
   ],
   "source": [
    "for i in range(10):\n",
    "    print(np.linalg.norm(V[i,:32000]) / np.linalg.norm(V[i,32000:]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "a1b919aa-5a07-4d20-bb95-b623da0039d2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 S 132.8874648671305\n",
      "0 POS\n",
      "         token     logit           token     logit\n",
      "0          ouc  0.026253           textt  0.023534\n",
      "1       Static  0.023621         archivi  0.022365\n",
      "2       bullet  0.023017    ▁Хронологија  0.021102\n",
      "3      Example  0.021523         Portail  0.019069\n",
      "4     ▁spatial  0.020898               Ḩ  0.018919\n",
      "5       ▁scarc  0.019717    AccessorImpl  0.018715\n",
      "6       Cursor  0.019017         données  0.018518\n",
      "7         ▁sup  0.018933        ViewById  0.018455\n",
      "8      Manager  0.018902        makeText  0.018139\n",
      "9         Lond  0.018823       ▁Мексичка  0.017802\n",
      "10             0.018774             <s>  0.017687\n",
      "11    ▁Example  0.018622            eerd  0.017566\n",
      "12        ular  0.018434          brázky  0.017337\n",
      "13       tfrac  0.018300            $}}%  0.017126\n",
      "14     Builder  0.018168      ▁onderwerp  0.016955\n",
      "15        hbar  0.018150    rinningsområ  0.016913\n",
      "16         ▁zá  0.018109       ▁Kontrola  0.016733\n",
      "17          }[  0.018104           bolds  0.016698\n",
      "18       nabla  0.018051          ▁prüfe  0.016637\n",
      "19      Submit  0.017985   ▁bezeichneter  0.016431\n",
      "20        ▁règ  0.017763           achiv  0.016344\n",
      "21        ount  0.017738               ѫ  0.016310\n",
      "22        olia  0.017726          ździer  0.016290\n",
      "23          }'  0.017688      ▁nederbörd  0.016252\n",
      "24       ▁Cons  0.017446               𝓝  0.016252\n",
      "25          }\"  0.017439      ZygoteInit  0.016233\n",
      "26          (-  0.017438       ▁entferne  0.016137\n",
      "27      ▁chant  0.017314           IABot  0.016127\n",
      "28     ▁metric  0.017269      archiviato  0.015974\n",
      "29      ▁acres  0.017269        Webachiv  0.015967\n",
      "30    ▁Notable  0.017169           xtart  0.015930\n",
      "31          (_  0.017122  ActivityThread  0.015906\n",
      "32      ▁Allow  0.016977          Zygote  0.015882\n",
      "33    Argument  0.016909          ▁zvuky  0.015876\n",
      "34          }(  0.016891         ▁regnig  0.015828\n",
      "35  ▁principle  0.016764           itmap  0.015764\n",
      "36      ▁kilom  0.016400               ߬  0.015742\n",
      "37     unnable  0.016364       ▁varmaste  0.015730\n",
      "38        fund  0.016305       ightarrow  0.015700\n",
      "39        umph  0.016244        ▁Савезне  0.015674\n",
      "0 NEG\n",
      "               token     logit   token     logit\n",
      "0               ▁XII  0.024029       ▁  0.018500\n",
      "1              leben  0.023278       ,  0.017106\n",
      "2            ▁Wieder  0.023033       .  0.015425\n",
      "3           ▁players  0.021086       -  0.015181\n",
      "4                ▁ví  0.020956      ▁(  0.014895\n",
      "5               ▁kde  0.020665  <0x0A>  0.014851\n",
      "6           ▁prendre  0.020538       :  0.012708\n",
      "7                ▁tf  0.020308      ▁-  0.012428\n",
      "8                лта  0.020295     ▁in  0.011687\n",
      "9            ▁dentro  0.020218    ▁and  0.011411\n",
      "10               iał  0.020135      ▁A  0.011360\n",
      "11          ▁deutsch  0.020048      ▁a  0.011227\n",
      "12             affen  0.019750       /  0.011169\n",
      "13              ▁Rey  0.019383      ▁C  0.011134\n",
      "14            ▁quien  0.019121       (  0.011126\n",
      "15            ▁hasta  0.019007       1  0.010975\n",
      "16  ▁characteristics  0.018967      ▁I  0.010877\n",
      "17             ▁выше  0.018912      ▁T  0.010802\n",
      "18           ▁Luther  0.018874      ▁B  0.010692\n",
      "19           ▁prowad  0.018667      ▁s  0.010422\n",
      "20      ▁Deutschland  0.018558      ▁M  0.010316\n",
      "21              ichi  0.018512       ;  0.010296\n",
      "22             ▁vuel  0.018493      ▁S  0.010263\n",
      "23             ▁като  0.018484       '  0.010182\n",
      "24             ▁inne  0.018451      ▁e  0.010104\n",
      "25          histoire  0.018380      ▁D  0.010084\n",
      "26         ▁tropical  0.018326      ▁\"  0.009964\n",
      "27         ▁entering  0.018276      ▁F  0.009934\n",
      "28              lied  0.018265      ▁.  0.009896\n",
      "29            ▁juego  0.018058      ▁P  0.009883\n",
      "30                 汉  0.018051       2  0.009722\n",
      "31              ▁анг  0.018013      ▁–  0.009635\n",
      "32         ▁erstmals  0.017986     ▁to  0.009616\n",
      "33              ката  0.017982      ▁K  0.009613\n",
      "34               ший  0.017982       …  0.009600\n",
      "35            ▁fecha  0.017946      ▁c  0.009572\n",
      "36            ▁circa  0.017762      ▁H  0.009552\n",
      "37          ▁Quellen  0.017696      ▁G  0.009508\n",
      "38            ▁Bobby  0.017541      ▁E  0.009499\n",
      "39              ktur  0.017517     ...  0.009467\n",
      "1 S 83.78998201082841\n",
      "1 POS\n",
      "     token     logit     token     logit\n",
      "0      ▁IN  0.001646       vil  0.025341\n",
      "1      ▁le  0.001582        ux  0.023876\n",
      "2      ▁th  0.001376       cur  0.023646\n",
      "3       ▁I  0.001311        UM  0.022862\n",
      "4      ▁не  0.001248       lus  0.022581\n",
      "5   <0xE8>  0.001148       iot  0.022272\n",
      "6   <0xE2>  0.001121      ilus  0.021911\n",
      "7      ▁ob  0.001088        bo  0.021507\n",
      "8      ▁al  0.001068      pace  0.020904\n",
      "9       ▁а  0.001017      roph  0.020015\n",
      "10      ▁t  0.001014    ▁Natal  0.020010\n",
      "11     ▁tf  0.001011       com  0.020005\n",
      "12      ▁г  0.000991    typeof  0.019944\n",
      "13     ▁UN  0.000897      asts  0.019862\n",
      "14      ▁r  0.000869       сор  0.019795\n",
      "15     ▁по  0.000830       ten  0.019663\n",
      "16      ▁g  0.000799       oci  0.019534\n",
      "17     ▁ла  0.000738        tr  0.019261\n",
      "18  <0xE4>  0.000708       ino  0.019257\n",
      "19      ▁T  0.000688       vor  0.019159\n",
      "20      ▁G  0.000653      bert  0.019156\n",
      "21     ▁se  0.000635    ▁Chief  0.019065\n",
      "22      ▁O  0.000624      ▁wra  0.019032\n",
      "23      ▁V  0.000546    <0xA0>  0.019014\n",
      "24     ▁wh  0.000537       nes  0.018957\n",
      "25  <0xE7>  0.000531        ум  0.018846\n",
      "26    ▁sch  0.000529       oki  0.018777\n",
      "27    ▁pre  0.000499       zon  0.018703\n",
      "28       ’  0.000491       yan  0.018661\n",
      "29     ▁ad  0.000470       ver  0.018488\n",
      "30      ▁j  0.000463        uv  0.018482\n",
      "31      ▁Т  0.000419        añ  0.018436\n",
      "32     ▁ne  0.000410       ena  0.018296\n",
      "33     ▁на  0.000406      apis  0.018232\n",
      "34      ▁Р  0.000389      enne  0.018223\n",
      "35       P  0.000372        ot  0.018212\n",
      "36     ▁gl  0.000347  ▁Profile  0.018175\n",
      "37    ▁mak  0.000315      ▁hat  0.018139\n",
      "38     ▁ле  0.000300        vo  0.018095\n",
      "39       T  0.000290         Љ  0.018075\n",
      "1 NEG\n",
      "             token     logit        token     logit\n",
      "0         ymnasium  0.012229          <s>  0.023891\n",
      "1   ▁StringBuilder  0.012141  scriptstyle  0.023521\n",
      "2         Argument  0.011789        quipe  0.022201\n",
      "3              ouc  0.011600          ttp  0.022121\n",
      "4   ▁functionality  0.011360        faces  0.022105\n",
      "5            emplo  0.011287     ▁Leonard  0.022024\n",
      "6           bullet  0.011143           là  0.021940\n",
      "7         Defaults  0.011120     packages  0.021938\n",
      "8          Builder  0.011098          nea  0.021890\n",
      "9    Configuration  0.011071         imar  0.021852\n",
      "10       ClassName  0.011057          eln  0.021707\n",
      "11        pository  0.011051        olare  0.021291\n",
      "12        ▁Notable  0.011001          cji  0.021231\n",
      "13           legen  0.010954         iele  0.021219\n",
      "14     ▁excitement  0.010953   ▁longitude  0.020920\n",
      "15    architecture  0.010937     ▁unicode  0.020829\n",
      "16        ensemble  0.010892          ław  0.020819\n",
      "17          Logger  0.010886        alone  0.020727\n",
      "18  ▁compatibility  0.010861          raj  0.020645\n",
      "19          vector  0.010850        ▁axes  0.020454\n",
      "20     getInstance  0.010836     essional  0.020453\n",
      "21         Manager  0.010835           őd  0.020374\n",
      "22            iale  0.010832         stwo  0.020139\n",
      "23            вана  0.010817        widet  0.019756\n",
      "24         atorial  0.010784         iffe  0.019696\n",
      "25     orientation  0.010773         mary  0.019580\n",
      "26  implementation  0.010756         bere  0.019493\n",
      "27      ▁Barcelona  0.010738           DC  0.019352\n",
      "28         lemagne  0.010732         inci  0.019089\n",
      "29          kommen  0.010726  ▁Einzelnach  0.019052\n",
      "30     ▁simplicity  0.010717       <0xF3>  0.018975\n",
      "31        geometry  0.010709         nika  0.018922\n",
      "32            icus  0.010698        ▁Fich  0.018858\n",
      "33     destination  0.010697         ließ  0.018685\n",
      "34            ügel  0.010669           ▁Á  0.018661\n",
      "35     ondissement  0.010663   ▁Traceback  0.018614\n",
      "36  ▁documentation  0.010646          ugo  0.018589\n",
      "37          Static  0.010626          fte  0.018405\n",
      "38         atience  0.010624         тура  0.018401\n",
      "39          ackage  0.010596    ▁judgment  0.018381\n",
      "2 S 44.806065792223286\n",
      "2 POS\n",
      "       token     logit            token     logit\n",
      "0       ství  0.015198      ▁Einzelnach  0.026067\n",
      "1       stwa  0.014645           ARCHAR  0.025329\n",
      "2       erei  0.014456     ▁Хронологија  0.025282\n",
      "3     igkeit  0.014439              rès  0.023848\n",
      "4       acji  0.014290            atica  0.023582\n",
      "5       stwo  0.013980                ѫ  0.023068\n",
      "6      ování  0.013592              ysz  0.022956\n",
      "7       lera  0.013520            ▁pros  0.022304\n",
      "8       igte  0.013514            poque  0.021714\n",
      "9       ynie  0.013473         ▁Einzeln  0.021478\n",
      "10       ání  0.013363         enschaft  0.021453\n",
      "11       ził  0.013350          ▁Column  0.021173\n",
      "12       ění  0.013329                ズ  0.020787\n",
      "13     лении  0.013318               BA  0.020571\n",
      "14      zeti  0.013281            toire  0.020457\n",
      "15     nosti  0.013238               ći  0.020276\n",
      "16        ню  0.013179            ittee  0.020249\n",
      "17      aniu  0.013142             atol  0.020187\n",
      "18      ному  0.013092              esk  0.020119\n",
      "19     ności  0.012911         entlicht  0.020106\n",
      "20     resse  0.012899            ições  0.019990\n",
      "21       нию  0.012899             ющим  0.019911\n",
      "22        ři  0.012884            ätter  0.019690\n",
      "23    nahmen  0.012876       ▁angularjs  0.019459\n",
      "24    izzato  0.012831            ziale  0.019406\n",
      "25       niu  0.012794          idenote  0.019181\n",
      "26      kiem  0.012791               FI  0.019170\n",
      "27       сті  0.012736              roz  0.019053\n",
      "28       ení  0.012703          ▁column  0.019027\n",
      "29     ление  0.012685             atre  0.018997\n",
      "30      мена  0.012639              ▁ds  0.018974\n",
      "31  igkeiten  0.012622             cian  0.018936\n",
      "32      стей  0.012612              emi  0.018904\n",
      "33     nahme  0.012573              edi  0.018869\n",
      "34      ność  0.012570              вня  0.018865\n",
      "35     лення  0.012569          ▁Bedeut  0.018810\n",
      "36       нии  0.012538  ▁Unterscheidung  0.018757\n",
      "37    ciente  0.012537            igten  0.018661\n",
      "38     igung  0.012490             conv  0.018328\n",
      "39      ając  0.012482               rá  0.018271\n",
      "2 NEG\n",
      "     token     logit     token     logit\n",
      "0     ▁Wol  0.019457       ier  0.023472\n",
      "1     ▁Laz  0.016549      iera  0.023312\n",
      "2    ▁Jess  0.016172      case  0.022754\n",
      "3    ▁Nich  0.016143        oo  0.022686\n",
      "4     ▁Pat  0.016128      ▁Zug  0.022548\n",
      "5     ▁Ash  0.016114       pot  0.022530\n",
      "6     ▁Haw  0.015985    stream  0.022266\n",
      "7     ▁Lav  0.015873        ie  0.021894\n",
      "8     ▁Jah  0.015715        st  0.021866\n",
      "9     ▁Dum  0.015647     point  0.021385\n",
      "10    ▁Fol  0.015644       SEE  0.021224\n",
      "11   ▁Glad  0.015626   ▁насеље  0.020999\n",
      "12  ▁Steph  0.015552       ies  0.020791\n",
      "13    ▁Hob  0.015550       ney  0.020640\n",
      "14    ▁Lem  0.015548        lm  0.020429\n",
      "15    ▁Rod  0.015433   ▁linker  0.020253\n",
      "16   ▁Hard  0.015406      mont  0.020188\n",
      "17     ▁Ли  0.015403         └  0.020185\n",
      "18    ▁Moh  0.015373       hem  0.020125\n",
      "19    ▁Red  0.015334      pass  0.020073\n",
      "20    ▁Dor  0.015278      ▁Оте  0.019579\n",
      "21    ▁Rol  0.015235      cano  0.019475\n",
      "22   ▁Brad  0.015227  ▁formula  0.019349\n",
      "23    ▁Nik  0.015225       kin  0.019325\n",
      "24    ▁Mus  0.015222  ▁Formula  0.019305\n",
      "25    ▁Cos  0.015218         例  0.018982\n",
      "26    ▁Fre  0.015217       man  0.018633\n",
      "27    ▁Pol  0.015205       enc  0.018548\n",
      "28    ▁Nav  0.015197        ne  0.018496\n",
      "29   ▁Soph  0.015196      ▁Moh  0.018492\n",
      "30    ▁Tar  0.015185     ▁twee  0.018353\n",
      "31    ▁Rud  0.015171        ha  0.018294\n",
      "32    ▁Sto  0.015165       nab  0.018272\n",
      "33     ▁Ol  0.015156      shot  0.018242\n",
      "34    ▁Bal  0.015146   ▁Sultan  0.018215\n",
      "35    ▁Tas  0.015138         у  0.018190\n",
      "36    ▁Roc  0.015116    ▁Muham  0.018137\n",
      "37    ▁Bus  0.015111      nach  0.018124\n",
      "38    ▁Gor  0.015049        ny  0.018083\n",
      "39     ▁Cl  0.015038      ment  0.017823\n",
      "3 S 40.16791304523909\n",
      "3 POS\n",
      "               token     logit         token     logit\n",
      "0    ▁initialization  0.021799          inci  0.025324\n",
      "1    ▁authentication  0.020466        ▁Klein  0.024926\n",
      "2   ▁implementations  0.020403        <0xAE>  0.022177\n",
      "3   ▁transformations  0.020349           efe  0.022156\n",
      "4     ▁interpolation  0.020295          Pair  0.021237\n",
      "5     ▁authorization  0.019902          sson  0.021209\n",
      "6      ▁asynchronous  0.019635          town  0.021191\n",
      "7   ▁characteristics  0.019368            ei  0.021154\n",
      "8     authentication  0.019078           nas  0.021086\n",
      "9    ▁implementation  0.019061           arc  0.020340\n",
      "10    implementation  0.018834          ship  0.020290\n",
      "11   ▁multiplication  0.018590           tte  0.020263\n",
      "12    ▁documentation  0.018396           ▁Ny  0.019970\n",
      "13  ▁Representatives  0.018375      Category  0.019869\n",
      "14     ▁dependencies  0.018362           ▁eu  0.019775\n",
      "15    ▁functionality  0.018253           гар  0.019743\n",
      "16   ▁administrative  0.018213         mania  0.019717\n",
      "17  ▁representations  0.018069            pa  0.019312\n",
      "18   ▁representation  0.018065           пан  0.019280\n",
      "19    Authentication  0.017991             符  0.019251\n",
      "20    ▁decomposition  0.017934          obar  0.018908\n",
      "21   ▁recommendation  0.017909          ▁Pam  0.018819\n",
      "22      ▁credentials  0.017863         arium  0.018736\n",
      "23    ▁relationships  0.017701            dy  0.018696\n",
      "24    ▁administrator  0.017537             s  0.018630\n",
      "25   ▁identification  0.017503            go  0.018276\n",
      "26      dependencies  0.017402         aters  0.018190\n",
      "27     documentation  0.017394            dh  0.018190\n",
      "28    ▁Massachusetts  0.017379           лен  0.018177\n",
      "29   ▁configurations  0.017370  ▁competition  0.018159\n",
      "30   ▁transformation  0.017341           nar  0.018140\n",
      "31     ▁repositories  0.017337           ven  0.018031\n",
      "32    ▁notifications  0.017325       ▁Norweg  0.017887\n",
      "33  ▁straightforward  0.017276           SSN  0.017862\n",
      "34      ▁disappeared  0.017249           чий  0.017808\n",
      "35   ▁Authentication  0.017238          oven  0.017702\n",
      "36    ▁participation  0.017048            eu  0.017646\n",
      "37     ▁transactions  0.017038          burg  0.017561\n",
      "38       ▁deprecated  0.017002             њ  0.017547\n",
      "39      ▁assumptions  0.016798           dal  0.017516\n",
      "3 NEG\n",
      "   token     logit         token     logit\n",
      "0     ær  0.013471        ipedia  0.026152\n",
      "1     Bo  0.013174          andr  0.023298\n",
      "2      ö  0.013104         assoc  0.023137\n",
      "3     vy  0.013093      igkeiten  0.023063\n",
      "4    wen  0.012961             Ἐ  0.021819\n",
      "5     zu  0.012938           eso  0.020694\n",
      "6     nu  0.012904        ▁grado  0.020559\n",
      "7     ni  0.012837          éric  0.020522\n",
      "8     ál  0.012828            EQ  0.020502\n",
      "9     rz  0.012791            @\"  0.020490\n",
      "10   oko  0.012781             ✿  0.020192\n",
      "11    Li  0.012772            шу  0.020129\n",
      "12    Da  0.012762          ugin  0.020123\n",
      "13   aru  0.012708           fér  0.019839\n",
      "14    Op  0.012681       циональ  0.019807\n",
      "15   Mer  0.012679           uno  0.019769\n",
      "16    vu  0.012662     ▁mistakes  0.019615\n",
      "17    sn  0.012534       othèque  0.019609\n",
      "18    ře  0.012533        igkeit  0.019449\n",
      "19   Ber  0.012529       ▁équipe  0.018803\n",
      "20    Fl  0.012485        efined  0.018688\n",
      "21     ą  0.012476            AK  0.018687\n",
      "22    Gr  0.012460             Ć  0.018658\n",
      "23    ív  0.012435          orem  0.018569\n",
      "24    Na  0.012353     Wikimedia  0.018484\n",
      "25   gal  0.012308      ▁profile  0.018444\n",
      "26    Ne  0.012308           kit  0.018281\n",
      "27   san  0.012292          endo  0.018264\n",
      "28   roz  0.012288    ----------  0.018257\n",
      "29    kn  0.012273          clud  0.018146\n",
      "30    ei  0.012256         actly  0.018069\n",
      "31   mil  0.012233          wiki  0.017906\n",
      "32    Sm  0.012222           еру  0.017868\n",
      "33    Te  0.012213           cou  0.017853\n",
      "34    Ma  0.012167         igger  0.017762\n",
      "35   nar  0.012166             若  0.017703\n",
      "36   rob  0.012153         ensis  0.017634\n",
      "37   raz  0.012125       profile  0.017595\n",
      "38     ц  0.012125  ▁Хронологија  0.017593\n",
      "39    yc  0.012112          ules  0.017543\n",
      "4 S 39.607160201125886\n",
      "4 POS\n",
      "       token     logit            token     logit\n",
      "0     stream  0.014900            ▁Ward  0.025697\n",
      "1   building  0.014420             ▁civ  0.022144\n",
      "2     direct  0.013724              cov  0.021647\n",
      "3     Spring  0.013413                ̲  0.021390\n",
      "4    project  0.013109                a  0.020706\n",
      "5     ground  0.013009                ă  0.020342\n",
      "6   location  0.012830              QUE  0.020296\n",
      "7      child  0.012639          ▁Arbeit  0.019757\n",
      "8   jections  0.012626             othe  0.019547\n",
      "9      water  0.012562           ▁invån  0.019512\n",
      "10   rolling  0.012535            ucker  0.019455\n",
      "11       ray  0.012432                Ľ  0.019008\n",
      "12   subject  0.012417            ▁Beck  0.018936\n",
      "13     patch  0.012391        avascript  0.018799\n",
      "14      west  0.012371             ▁Tru  0.018757\n",
      "15  Security  0.012347              ▁PM  0.018732\n",
      "16     power  0.012335             hely  0.018672\n",
      "17     atter  0.012304            argin  0.018585\n",
      "18      hold  0.012296         toString  0.018450\n",
      "19    illing  0.012294               мп  0.018349\n",
      "20  learning  0.012277               ře  0.018192\n",
      "21  communic  0.012261                ט  0.018183\n",
      "22    Master  0.012195                ↓  0.018108\n",
      "23    Target  0.012189        ▁invånare  0.018077\n",
      "24    action  0.012182                ǫ  0.017986\n",
      "25   Channel  0.012178             atre  0.017967\n",
      "26   country  0.012138             cido  0.017919\n",
      "27     ision  0.012135              дон  0.017839\n",
      "28     Match  0.012128         ▁closure  0.017821\n",
      "29      ward  0.012108           ▁gener  0.017816\n",
      "30    screen  0.012108             ▁wis  0.017793\n",
      "31    Mobile  0.012094              ▁tu  0.017669\n",
      "32     cover  0.012031              che  0.017597\n",
      "33  Register  0.012026           ▁thous  0.017548\n",
      "34    Stream  0.011998             ▁Års  0.017546\n",
      "35      flow  0.011991                ך  0.017494\n",
      "36   jection  0.011955  PropertyChanged  0.017438\n",
      "37     light  0.011950           ▁Mason  0.017429\n",
      "38   natural  0.011880            ▁Carl  0.017416\n",
      "39    inding  0.011854                ↳  0.017403\n",
      "4 NEG\n",
      "      token     logit    token     logit\n",
      "0       ▁zá  0.017823     eval  0.028726\n",
      "1       ▁új  0.017602     mask  0.028701\n",
      "2      ▁nej  0.017505   achine  0.026170\n",
      "3      ▁při  0.017477       ec  0.024613\n",
      "4     ▁rozp  0.017401      iva  0.023882\n",
      "5     ▁einz  0.017355       rr  0.023818\n",
      "6     ▁több  0.017114     egen  0.023508\n",
      "7     ▁voet  0.016963     Mask  0.023456\n",
      "8       ▁či  0.016930    ▁mask  0.023177\n",
      "9      ▁dém  0.016853      tar  0.022869\n",
      "10    ▁oraz  0.016841     usch  0.022787\n",
      "11     ▁köz  0.016757     lets  0.022449\n",
      "12  ▁został  0.016754      sin  0.021864\n",
      "13   ▁проис  0.016546     phal  0.021715\n",
      "14    ▁azon  0.016407     ires  0.021651\n",
      "15      ▁mű  0.016393      acz  0.021306\n",
      "16   ▁przez  0.016373   onnées  0.020925\n",
      "17    ▁című  0.016360   ectors  0.020601\n",
      "18      ▁év  0.016316     itet  0.020490\n",
      "19     ▁bör  0.016110    ▁Сред  0.020279\n",
      "20     ▁sjö  0.016056      мец  0.020222\n",
      "21    ▁beim  0.016030      ütt  0.020194\n",
      "22   ▁szere  0.015991     ired  0.020181\n",
      "23      ▁fö  0.015947    ▁verf  0.020148\n",
      "24      ▁są  0.015812     lach  0.019837\n",
      "25     ▁był  0.015807     oupe  0.019826\n",
      "26    ▁vagy  0.015786   ▁Swift  0.019793\n",
      "27  ▁además  0.015760     arel  0.019689\n",
      "28    ▁höch  0.015716    ittel  0.019623\n",
      "29    ▁przy  0.015698   <0xA1>  0.019568\n",
      "30     ▁наи  0.015698  tersuch  0.019567\n",
      "31    ▁jeho  0.015691    Marie  0.019466\n",
      "32     ▁byl  0.015648     buff  0.019360\n",
      "33    ▁csal  0.015646     uals  0.019253\n",
      "34     ▁moż  0.015593   Prefix  0.019250\n",
      "35    ▁jsou  0.015349      eda  0.019135\n",
      "36    ▁wyst  0.015321       là  0.018959\n",
      "37   ▁außer  0.015252    ▁eval  0.018920\n",
      "38      ▁ér  0.015243     aces  0.018900\n",
      "39     ▁zus  0.015215       él  0.018872\n",
      "5 S 37.25744392965878\n",
      "5 POS\n",
      "            token     logit          token     logit\n",
      "0   ▁Philadelphia  0.018099       ispecies  0.030445\n",
      "1       ▁Victoria  0.016537       ▁listade  0.028748\n",
      "2       ▁Virginia  0.016345        ▁länkar  0.026723\n",
      "3      ▁continent  0.016332   ▁Хронологија  0.026458\n",
      "4      ▁surprised  0.015765      ▁Kontrola  0.025138\n",
      "5       ▁nouvelle  0.015633         ▁zvuky  0.024422\n",
      "6        ▁Toronto  0.015541  ▁Audiodateien  0.024146\n",
      "7        ▁ancient  0.015539        ▁odkazy  0.024013\n",
      "8        ▁Maurice  0.015321           äler  0.023737\n",
      "9       ▁mountain  0.015304           ikus  0.023322\n",
      "10        ▁Europa  0.015263            ída  0.022727\n",
      "11    ▁celebrated  0.015179           naio  0.021862\n",
      "12      ▁Harrison  0.015129      andenburg  0.021745\n",
      "13     ▁Mountains  0.015090           átum  0.021244\n",
      "14        ▁Vienna  0.015089       fficiale  0.021227\n",
      "15        ▁Europe  0.014890           unft  0.021165\n",
      "16     ▁Elizabeth  0.014856            iei  0.021153\n",
      "17      ▁Florence  0.014805              ễ  0.021137\n",
      "18    ▁succession  0.014723          orith  0.020678\n",
      "19      ▁northern  0.014665          emark  0.020663\n",
      "20    ▁California  0.014635           fico  0.020465\n",
      "21      ▁Canadian  0.014633       ▁månaden  0.020357\n",
      "22        ▁George  0.014613              ญ  0.020259\n",
      "23       ▁proprio  0.014598         ▁prüfe  0.020218\n",
      "24        ▁London  0.014585     ▁nederbörd  0.020093\n",
      "25       ▁village  0.014560           ▁Hab  0.020056\n",
      "26     ▁Alexander  0.014549          edeut  0.019951\n",
      "27         ▁Maria  0.014503           mina  0.019878\n",
      "28      ▁European  0.014455           iore  0.019845\n",
      "29       ▁country  0.014413         ▁Censo  0.019827\n",
      "30    ▁Australian  0.014410       egyzetek  0.019645\n",
      "31   ▁Deutschland  0.014403    ▁eredetiből  0.019639\n",
      "32       ▁Hermann  0.014379      ▁entferne  0.019629\n",
      "33  ▁Pennsylvania  0.014308         ▁videa  0.019625\n",
      "34       ▁juillet  0.014287            lax  0.019509\n",
      "35        ▁Carlos  0.014262         atform  0.019467\n",
      "36   ▁possibility  0.014223         oreign  0.019391\n",
      "37       ▁dernier  0.014195          après  0.019353\n",
      "38    ▁Petersburg  0.014169           érez  0.019238\n",
      "39      ▁François  0.014111          férés  0.019215\n",
      "5 NEG\n",
      "      token     logit  token     logit\n",
      "0     props  0.023798    our  0.027857\n",
      "1      JSON  0.021301    ern  0.024510\n",
      "2      (...  0.021125    ton  0.024153\n",
      "3        (.  0.020393   anka  0.023705\n",
      "4       cfg  0.020216    led  0.023531\n",
      "5     Usage  0.020134     br  0.023208\n",
      "6       ▁\\$  0.019716     il  0.022890\n",
      "7    params  0.019612    ens  0.022735\n",
      "8       ▁*)  0.019489   ▁поч  0.022606\n",
      "9     oauth  0.019439  stone  0.022320\n",
      "10    ▁JSON  0.019408    ala  0.022311\n",
      "11  forEach  0.019250    ela  0.021703\n",
      "12   pragma  0.019240     tv  0.021584\n",
      "13       `;  0.019207    ber  0.021451\n",
      "14     ▁ptr  0.019168    ull  0.021328\n",
      "15       }`  0.019058    age  0.021272\n",
      "16      ▁);  0.018987     ad  0.021257\n",
      "17     uuid  0.018962    ron  0.021059\n",
      "18       ${  0.018772    ice  0.020779\n",
      "19   Bounds  0.018637      z  0.020761\n",
      "20   webkit  0.018513    rim  0.020749\n",
      "21      ssl  0.018464    ile  0.020173\n",
      "22     argv  0.018307    lik  0.020092\n",
      "23     pred  0.018306    ald  0.019943\n",
      "24   Output  0.018297    lib  0.019937\n",
      "25    Batch  0.018258    mon  0.019875\n",
      "26   ToList  0.018199    ear  0.019838\n",
      "27  servlet  0.018127    ()\"  0.019493\n",
      "28    Debug  0.018095     ve  0.019426\n",
      "29      xls  0.018093     ze  0.019426\n",
      "30     cref  0.018067   ston  0.019338\n",
      "31    Modal  0.018051     Br  0.019144\n",
      "32      ]=\"  0.018025    ial  0.019031\n",
      "33     ()))  0.017979   ured  0.018984\n",
      "34   Socket  0.017845     sh  0.018879\n",
      "35     Http  0.017812   stat  0.018860\n",
      "36     Spec  0.017781    ace  0.018819\n",
      "37      msg  0.017661    rag  0.018817\n",
      "38    Autow  0.017652    ays  0.018816\n",
      "39      ().  0.017602   bury  0.018758\n",
      "6 S 34.559043756676445\n",
      "6 POS\n",
      "           token     logit       token     logit\n",
      "0     ▁Александр  0.018887         ham  0.024773\n",
      "1     ▁Alexandre  0.018732        alth  0.024495\n",
      "2      Microsoft  0.018502         iti  0.022831\n",
      "3         Pierre  0.018282       stadt  0.022226\n",
      "4       ▁Wilhelm  0.018083         bos  0.021963\n",
      "5          Marie  0.017908    Workbook  0.021602\n",
      "6         ▁Nikol  0.017787        edes  0.021345\n",
      "7   ▁Deutschland  0.017682       ▁Orig  0.021236\n",
      "8      ▁Giuseppe  0.017181       terre  0.020928\n",
      "9        Michael  0.017079  DateFormat  0.020766\n",
      "10     ▁Berliner  0.017016        ains  0.020605\n",
      "11     ▁Владимир  0.016990   ungsseite  0.020443\n",
      "12  ▁Christopher  0.016930        baut  0.020441\n",
      "13        France  0.016924          GV  0.020042\n",
      "14    ▁Stanisław  0.016903         ält  0.019781\n",
      "15        ▁Johan  0.016680        ting  0.019479\n",
      "16     ▁Budapest  0.016598       enson  0.019456\n",
      "17         David  0.016564           종  0.019438\n",
      "18   ▁Österreich  0.016501           ⊙  0.019304\n",
      "19    ▁Friedrich  0.016462        ▁Års  0.019197\n",
      "20     ▁Jonathan  0.016450         UID  0.019099\n",
      "21      ▁України  0.016347      ▁Label  0.019032\n",
      "22        ▁Михай  0.016343         nor  0.018939\n",
      "23    ▁Deutschen  0.016339       fault  0.018869\n",
      "24    ▁Regierung  0.016220         vas  0.018848\n",
      "25     ▁Deutsche  0.016068         bru  0.018798\n",
      "26        Daniel  0.015864        esen  0.018793\n",
      "27        ▁Tommy  0.015795          áv  0.018675\n",
      "28     ▁González  0.015784         ari  0.018601\n",
      "29      ▁Kenneth  0.015732       ▁Aven  0.018589\n",
      "30    ▁Landkreis  0.015725        ▁Wie  0.018423\n",
      "31      ▁Galerie  0.015724        fade  0.018342\n",
      "32      ▁Georges  0.015716  ▁Challenge  0.018159\n",
      "33  ▁Brandenburg  0.015691        keit  0.018142\n",
      "34          Mich  0.015637         ese  0.018042\n",
      "35    Foundation  0.015599      ▁fault  0.018018\n",
      "36        ▁Jason  0.015596    setState  0.018014\n",
      "37     ▁Benjamin  0.015547       ▁Emil  0.017891\n",
      "38       ▁Justin  0.015524         iki  0.017862\n",
      "39    ▁Frederick  0.015523         ity  0.017825\n",
      "6 NEG\n",
      "       token     logit       token     logit\n",
      "0     ▁dimin  0.015388      ferrer  0.030208\n",
      "1      ▁dedu  0.014883    compiler  0.024164\n",
      "2      ▁stra  0.014561        agna  0.022847\n",
      "3   ▁extract  0.014469         sak  0.022645\n",
      "4       ▁rag  0.014452         wor  0.022615\n",
      "5    ▁excess  0.014441       ▁vere  0.022538\n",
      "6       ▁rig  0.014428      ientos  0.022466\n",
      "7      ▁caus  0.014312          TD  0.022261\n",
      "8       ▁enc  0.014312       ▁Herm  0.022195\n",
      "9       ▁mud  0.014241          mx  0.022141\n",
      "10     ▁requ  0.014090          ом  0.021470\n",
      "11     ▁cond  0.013985       onato  0.021393\n",
      "12     ▁circ  0.013938      ▁Volks  0.021336\n",
      "13     ▁thin  0.013931      ▁saint  0.021282\n",
      "14      ▁dro  0.013832        ▁vol  0.020960\n",
      "15      ▁dim  0.013811   direction  0.020784\n",
      "16      ▁cav  0.013806        ilis  0.020558\n",
      "17    ▁corre  0.013744         omp  0.019800\n",
      "18     ▁reck  0.013702          юр  0.019696\n",
      "19     ▁peak  0.013695       xygen  0.019612\n",
      "20      ▁rid  0.013659    irection  0.019481\n",
      "21    ▁absor  0.013642        atge  0.019221\n",
      "22      ▁sug  0.013612      ▁Baden  0.019211\n",
      "23      ▁dig  0.013568       ockey  0.019167\n",
      "24      ▁fed  0.013549      plorer  0.019145\n",
      "25     ▁disp  0.013497        enst  0.019117\n",
      "26    ▁burst  0.013444         смо  0.018858\n",
      "27    ▁capac  0.013430        ▁man  0.018851\n",
      "28    ▁accum  0.013401  ▁direction  0.018808\n",
      "29     ▁gain  0.013379       ▁crow  0.018801\n",
      "30      ▁tin  0.013248        anha  0.018796\n",
      "31      ▁exp  0.013201   ▁Marshall  0.018731\n",
      "32     ▁temp  0.013174       ogram  0.018709\n",
      "33     ▁pare  0.013135      ischof  0.018676\n",
      "34     ▁trac  0.013120        ▁Cad  0.018661\n",
      "35   ▁height  0.013103        nier  0.018475\n",
      "36     ▁indu  0.013083          :@  0.018345\n",
      "37     ▁grad  0.013070        tick  0.018314\n",
      "38         必  0.013058        ixen  0.018268\n",
      "39     ▁prim  0.013049           符  0.018110\n",
      "7 S 30.982167421525304\n",
      "7 POS\n",
      "              token     logit token     logit\n",
      "0             alous  0.020952    em  0.031549\n",
      "1             imper  0.020802   dem  0.024600\n",
      "2                 ṭ  0.020370    fe  0.023613\n",
      "3         wordpress  0.019835  fore  0.023436\n",
      "4           predict  0.019827    DS  0.023407\n",
      "5                öv  0.019617  eros  0.023396\n",
      "6                іс  0.019244    gr  0.023015\n",
      "7              onel  0.018986  iana  0.022816\n",
      "8             ynast  0.018591   emp  0.022487\n",
      "9             comot  0.018443    ac  0.022465\n",
      "10              гре  0.018404    je  0.022369\n",
      "11              тай  0.018378    an  0.022315\n",
      "12             olan  0.018368    cs  0.022252\n",
      "13            gress  0.018364    OF  0.022120\n",
      "14             avid  0.018290   ero  0.022064\n",
      "15                香  0.018279    vo  0.022020\n",
      "16            colog  0.018224    ap  0.021893\n",
      "17     ▁энциклопеди  0.017911   old  0.021810\n",
      "18           ▁milit  0.017686   ron  0.021646\n",
      "19            polit  0.017634   jes  0.021437\n",
      "20           histor  0.017586   ens  0.021402\n",
      "21                Ṭ  0.017505    ls  0.021384\n",
      "22            ellan  0.017461    ux  0.021209\n",
      "23             olit  0.017436   ive  0.021198\n",
      "24           ▁polit  0.017406   que  0.021160\n",
      "25            tagon  0.017270   cle  0.020996\n",
      "26            ocrat  0.017086   ana  0.020972\n",
      "27       ographical  0.017083   ise  0.020845\n",
      "28             oman  0.017055    it  0.020773\n",
      "29             hely  0.016993    le  0.020752\n",
      "30                ட  0.016791   cur  0.020681\n",
      "31            okrat  0.016746   ian  0.020578\n",
      "32             adel  0.016658    ma  0.020481\n",
      "33              atr  0.016616    ng  0.020420\n",
      "34  ▁administration  0.016509     p  0.020397\n",
      "35            ▁prez  0.016490  ante  0.020386\n",
      "36            onial  0.016416   igo  0.020242\n",
      "37             oust  0.016412   ado  0.020209\n",
      "38         ▁organiz  0.016409  itis  0.020175\n",
      "39             chev  0.016375    ci  0.020149\n",
      "7 NEG\n",
      "       token     logit          token     logit\n",
      "0     ▁xmlns  0.023643           $}}%  0.028751\n",
      "1        ▁/*  0.022438           ugno  0.024078\n",
      "2         }>  0.020945         undial  0.023626\n",
      "3      ▁bool  0.020596          stadt  0.022441\n",
      "4        ▁=>  0.020249            égr  0.021094\n",
      "5         #,  0.020220      ▁Kontrola  0.021001\n",
      "6        ▁+=  0.020151   ▁Хронологија  0.020780\n",
      "7        ));  0.020025             èg  0.020713\n",
      "8         )+  0.020011         stract  0.020301\n",
      "9         :=  0.019878         ührung  0.020279\n",
      "10       ▁||  0.019831          bolds  0.020110\n",
      "11       >';  0.019811      ▁Мексичка  0.020048\n",
      "12       ▁!=  0.019662         onneur  0.020024\n",
      "13        &&  0.019655           odio  0.019833\n",
      "14      ▁///  0.019625          brary  0.019808\n",
      "15        ▁=  0.019437           ulté  0.019801\n",
      "16       ▁&&  0.019382        perties  0.019480\n",
      "17       ▁.=  0.019162           libs  0.019398\n",
      "18       ▁:=  0.019098              ỳ  0.019390\n",
      "19        _+  0.019061            ião  0.019334\n",
      "20      ▁\"\",  0.018930  EventListener  0.019249\n",
      "21   ▁String  0.018886           %%%%  0.019201\n",
      "22       ▁},  0.018842  ▁bezeichneter  0.019193\n",
      "23        >\"  0.018711      ictionary  0.018901\n",
      "24       ▁<<  0.018681           kazy  0.018884\n",
      "25        ▁{  0.018496          verso  0.018833\n",
      "26        /*  0.018233           reso  0.018680\n",
      "27        \"\"  0.018182        ionario  0.018669\n",
      "28    String  0.018144        TagName  0.018534\n",
      "29       >\",  0.018132           éral  0.018335\n",
      "30       \"/>  0.018044           egov  0.017985\n",
      "31        =>  0.018026          virti  0.017962\n",
      "32     ▁Size  0.018017         <0xF3>  0.017960\n",
      "33     xmlns  0.017975          perty  0.017883\n",
      "34      ▁[],  0.017974           uset  0.017796\n",
      "35       ▁-=  0.017871             żs  0.017786\n",
      "36      igte  0.017817             ië  0.017736\n",
      "37  ▁Package  0.017749            tei  0.017553\n",
      "38       )))  0.017742            <s>  0.017551\n",
      "39     ▁void  0.017717          ython  0.017533\n",
      "8 S 30.11751565538969\n",
      "8 POS\n",
      "       token     logit    token     logit\n",
      "0     ▁icons  0.019709     onna  0.023971\n",
      "1     ▁boats  0.019494       ne  0.023600\n",
      "2     ▁items  0.019469      ris  0.022832\n",
      "3      ▁menu  0.019286     tera  0.022348\n",
      "4   ▁canción  0.018930     ande  0.021621\n",
      "5     ▁magic  0.018696    ▁Werk  0.021490\n",
      "6      ▁punk  0.018639     esti  0.021448\n",
      "7       ▁bag  0.018570      yal  0.021321\n",
      "8     ▁ships  0.018445       ot  0.021122\n",
      "9     ▁album  0.018266     loss  0.020880\n",
      "10     ▁legs  0.018062       te  0.020805\n",
      "11     ▁song  0.018035       de  0.020711\n",
      "12       ▁bs  0.018031  ▁Jahres  0.020534\n",
      "13    ▁rooms  0.017746      wan  0.020392\n",
      "14     ▁apps  0.017674     ward  0.020224\n",
      "15    ▁bases  0.017624     ings  0.020130\n",
      "16         葉  0.017072      bas  0.020094\n",
      "17     ▁muse  0.017016     rest  0.020084\n",
      "18         校  0.016979      anz  0.020025\n",
      "19       ▁DJ  0.016933      ogo  0.019976\n",
      "20         屋  0.016853   ▁Bitte  0.019837\n",
      "21    ▁movie  0.016841       we  0.019812\n",
      "22  ▁gallery  0.016799      esc  0.019772\n",
      "23     ▁logo  0.016791      gun  0.019608\n",
      "24     ▁Navy  0.016790      ona  0.019577\n",
      "25         张  0.016668    inner  0.019573\n",
      "26     ▁cube  0.016653       öl  0.019500\n",
      "27   ▁voices  0.016581        ‏  0.019453\n",
      "28  ▁cookies  0.016552  respons  0.019371\n",
      "29     ▁room  0.016542        ж  0.019298\n",
      "30      ▁pie  0.016531       vd  0.019279\n",
      "31     ▁casa  0.016517      res  0.019162\n",
      "32     ▁Menu  0.016493       ge  0.019082\n",
      "33    ▁stats  0.016391       id  0.019076\n",
      "34      ▁bid  0.016279      ega  0.019061\n",
      "35         ी  0.016227  unächst  0.019045\n",
      "36   ▁folder  0.016125       CH  0.019035\n",
      "37     ▁agua  0.016120      tta  0.019007\n",
      "38    ▁Silva  0.016101     ells  0.018970\n",
      "39  ▁dataset  0.016032      inn  0.018961\n",
      "8 NEG\n",
      "               token     logit         token     logit\n",
      "0           ▁compreh  0.023425       ▁beskre  0.025394\n",
      "1              subst  0.022444  ▁Хронологија  0.024201\n",
      "2              oslov  0.021701           DEX  0.023847\n",
      "3             Recogn  0.021405          zeit  0.022170\n",
      "4             Intern  0.021373       ▁preced  0.022023\n",
      "5               Infl  0.021255         IMARY  0.020980\n",
      "6         ▁acknowled  0.020963          ▁too  0.020921\n",
      "7             Integr  0.020842      ViewById  0.020912\n",
      "8          ▁intellig  0.020147          eros  0.020653\n",
      "9          ▁metropol  0.020019          eerd  0.020583\n",
      "10         ▁distingu  0.019970    entication  0.019872\n",
      "11             trans  0.019960       pmatrix  0.019757\n",
      "12          ▁Mediter  0.019519         kunft  0.019422\n",
      "13         construct  0.019495             堂  0.019361\n",
      "14             ствен  0.019438           SBN  0.018729\n",
      "15        ▁Contempor  0.019366     abgerufen  0.018501\n",
      "16           ▁contin  0.019143           тку  0.018489\n",
      "17         ▁appropri  0.018944        ▁olymp  0.018184\n",
      "18  ________________  0.018867  ▁információk  0.018135\n",
      "19            ▁dimin  0.018867         CLARE  0.018127\n",
      "20          ▁Statist  0.018689    scriptsize  0.018101\n",
      "21           ▁satisf  0.018669           zug  0.018034\n",
      "22      ▁substantial  0.018653         ètres  0.017891\n",
      "23          ▁consequ  0.018578         Agent  0.017784\n",
      "24           ▁transm  0.018557            kö  0.017629\n",
      "25         ▁Metropol  0.018522     itionally  0.017565\n",
      "26               syn  0.018175          clud  0.017544\n",
      "27              ▁пер  0.018173             ظ  0.017411\n",
      "28              urop  0.018153      consulté  0.017398\n",
      "29            frastr  0.018042        indent  0.017336\n",
      "30         ▁propriet  0.017930          igny  0.017330\n",
      "31             Trans  0.017906         ▁viss  0.017305\n",
      "32           ▁conver  0.017879          aped  0.017241\n",
      "33          ▁constit  0.017862           ägt  0.017238\n",
      "34       ▁differenti  0.017793          luss  0.017233\n",
      "35            Expand  0.017750         datei  0.017195\n",
      "36            ▁inher  0.017637          ▁Kir  0.017181\n",
      "37            aggreg  0.017616          acle  0.017180\n",
      "38                 研  0.017585      estanden  0.017177\n",
      "39          ▁reconst  0.017557          zten  0.017159\n",
      "9 S 29.169443713285812\n",
      "9 POS\n",
      "        token     logit    token     logit\n",
      "0        ▁Une  0.022209        i  0.032119\n",
      "1        вшие  0.018583        a  0.025684\n",
      "2         ные  0.018510       añ  0.024953\n",
      "3        ются  0.018357        k  0.023542\n",
      "4        ▁Att  0.018197      ira  0.022913\n",
      "5       ▁zijn  0.018093      kow  0.022643\n",
      "6        ▁Auf  0.018026     enza  0.022399\n",
      "7       ▁Cont  0.017868      act  0.021380\n",
      "8         ная  0.017716       ct  0.021152\n",
      "9         Des  0.017708        o  0.021007\n",
      "10    ▁Comple  0.017531      way  0.020827\n",
      "11      ность  0.017417        t  0.020788\n",
      "12      ▁были  0.017277     ▁Мак  0.020746\n",
      "13      ▁пере  0.017251      ang  0.020546\n",
      "14  uellement  0.017123     ▁Fur  0.020440\n",
      "15      ▁Pres  0.017052        l  0.020348\n",
      "16        Att  0.017037      how  0.020298\n",
      "17      ▁была  0.017027      val  0.020182\n",
      "18        ным  0.016947      une  0.019682\n",
      "19       ▁Com  0.016885   ▁Außer  0.019561\n",
      "20       ется  0.016818      lay  0.019539\n",
      "21         ▁Э  0.016765      ave  0.019281\n",
      "22       ▁Res  0.016746    where  0.019163\n",
      "23    iennent  0.016661       on  0.019045\n",
      "24      ▁avec  0.016585       kt  0.019036\n",
      "25       ▁est  0.016561     Bean  0.018984\n",
      "26        ▁вы  0.016510        f  0.018851\n",
      "27      ▁Пере  0.016470      van  0.018807\n",
      "28     elijke  0.016465  ▁Palace  0.018788\n",
      "29      ▁Rece  0.016426     ▁Kil  0.018759\n",
      "30       ▁Des  0.016335      idi  0.018750\n",
      "31         ▁É  0.016225     iate  0.018726\n",
      "32        Acc  0.016210      ext  0.018515\n",
      "33        ють  0.016137      ▁lé  0.018327\n",
      "34      ación  0.016028       ex  0.018239\n",
      "35       Cons  0.016011    ▁Tier  0.018130\n",
      "36       лась  0.015891      Gen  0.018101\n",
      "37        ное  0.015880      uki  0.018002\n",
      "38      ações  0.015866      moz  0.017930\n",
      "39        ▁Вы  0.015733     wehr  0.017842\n",
      "9 NEG\n",
      "          token     logit        token     logit\n",
      "0        amarin  0.020622         eign  0.026715\n",
      "1           rgb  0.019951        ettes  0.024871\n",
      "2       ▁javafx  0.019752         otte  0.023077\n",
      "3         ▁curv  0.019198        quant  0.023074\n",
      "4    ▁Edinburgh  0.019021        ciale  0.022587\n",
      "5            jl  0.018676      ▁висини  0.022564\n",
      "6     ▁Maryland  0.018540        porte  0.022102\n",
      "7         ▁perl  0.018528       metros  0.021341\n",
      "8        ▁katol  0.018512         nder  0.020988\n",
      "9          ▁svg  0.018493         inho  0.020970\n",
      "10        ▁ruby  0.018275         dern  0.020557\n",
      "11       ▁canad  0.018205          ahu  0.020381\n",
      "12       ▁Scala  0.018131       ▁quant  0.020257\n",
      "13     uclidean  0.018035         odox  0.020236\n",
      "14          vim  0.017920        indre  0.020204\n",
      "15    ▁selenium  0.017909        elles  0.020086\n",
      "16         ▁JVM  0.017696        istre  0.019793\n",
      "17            唐  0.017677      refresh  0.019787\n",
      "18        ▁cycl  0.017564       uclide  0.019783\n",
      "19         ruby  0.017520         aupt  0.019450\n",
      "20     ▁генерал  0.017457         ▁beh  0.019300\n",
      "21       ▁Bruno  0.017451        erten  0.019299\n",
      "22     ▁italien  0.017420           ês  0.019269\n",
      "23          tcp  0.017400         sche  0.019209\n",
      "24        junit  0.017395        adesh  0.019203\n",
      "25           pb  0.017395        textt  0.019072\n",
      "26       ▁franc  0.017391        Quant  0.018903\n",
      "27       cuador  0.017378  ▁eredetiből  0.018886\n",
      "28       navbar  0.017373          ▁~[  0.018737\n",
      "29           gz  0.017370        achiv  0.018627\n",
      "30     jsfiddle  0.017364        apsed  0.018542\n",
      "31          oki  0.017229      ewnętrz  0.018498\n",
      "32       ▁Johan  0.017192        aires  0.018328\n",
      "33      ▁bitmap  0.017184         bour  0.018267\n",
      "34          trl  0.017183       consin  0.018220\n",
      "35       ▁rugby  0.017057      archivi  0.018208\n",
      "36       ▁flask  0.017041          ciu  0.018170\n",
      "37  ▁matplotlib  0.016918       <0x83>  0.018115\n",
      "38    ▁cardinal  0.016850       ▁Quant  0.018072\n",
      "39           _+  0.016778        plots  0.018059\n"
     ]
    }
   ],
   "source": [
    "for i in range(10):\n",
    "    print(i, 'S', S[i])\n",
    "    print(i, 'POS')\n",
    "    print(pd.concat([\n",
    "        topk(V[i,:32000]),\n",
    "        #topk(V[i,32000:]), \n",
    "        topk(U[:,i]),\n",
    "    ], axis=1))\n",
    "    print(i, 'NEG')\n",
    "    print(pd.concat([\n",
    "        topk(-V[i,:32000]),\n",
    "        #topk(-V[i,32000:]), \n",
    "        topk(-U[:,i]),\n",
    "    ], axis=1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e8d45288-730c-4b01-aeb4-a592ba7a01b2",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
