{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Import libs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import json\n",
    "from pathlib import Path\n",
    "import numpy as np\n",
    "import torch\n",
    "from sklearn.metrics import average_precision_score\n",
    "import torch.nn.functional as F"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{0: 2, 1: 2, 2: 3, 3: 3, 4: 2, 5: 2, 6: 2}\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "from collections import Counter\n",
    "\n",
    "# Your NumPy array\n",
    "array = np.array([0, 1, 2, 3, 4, 5, 6, 1, 2, 3, 0, 5, 6, 4, 3, 2])\n",
    "\n",
    "# Count occurrences of each number using Counter\n",
    "count_dict = Counter(array)\n",
    "\n",
    "# Sort the dictionary by keys\n",
    "sorted_count_dict = dict(sorted(count_dict.items()))\n",
    "\n",
    "print(sorted_count_dict)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([10.0000,  5.0000,  3.3333,  2.5000,  2.0000,  1.6667])"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "10 / torch.tensor([1, 2, 3, 4, 5, 6])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "None is None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_true = np.zeros(4096)\n",
    "y_true[10:350] = 1.\n",
    "y_true[800:806] = 1.\n",
    "y_true[3450:3460] = 1.\n",
    "\n",
    "y_pred = y_true.copy()\n",
    "y_pred[10:350] = 0.6\n",
    "y_pred[800:806] = 0.6\n",
    "y_pred[3450:3460] = 0.7\n",
    "y_pred[1000:1050] = 0.5 # what?????? change number\n",
    "\n",
    "average_precision_score(y_true, y_pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.7746052145957947\n",
      "[0.7746052  0.22539477]\n"
     ]
    }
   ],
   "source": [
    "x = torch.tensor(1.2345)\n",
    "sigmoid_output = torch.sigmoid(x)\n",
    "logits = torch.tensor([x, 0.0])\n",
    "softmax_output = F.softmax(logits, dim=0)\n",
    "print(sigmoid_output.item())\n",
    "print(softmax_output.numpy())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "arr = np.arange(10)\n",
    "np.random.shuffle(arr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([7, 2, 0, 9, 5, 8, 4, 3, 6, 1])"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'cls_token': tensor([1]),\n",
       " 'sep_token': tensor([2]),\n",
       " 'mem_token_ids': tensor([32000, 32001, 32002, 32003, 32004]),\n",
       " 'model.bert.embeddings.position_ids': tensor([[  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,\n",
       "           14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,\n",
       "           28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,\n",
       "           42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,\n",
       "           56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,\n",
       "           70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,\n",
       "           84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,\n",
       "           98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,\n",
       "          112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125,\n",
       "          126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,\n",
       "          140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153,\n",
       "          154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167,\n",
       "          168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181,\n",
       "          182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195,\n",
       "          196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,\n",
       "          210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,\n",
       "          224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237,\n",
       "          238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,\n",
       "          252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265,\n",
       "          266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279,\n",
       "          280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293,\n",
       "          294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307,\n",
       "          308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321,\n",
       "          322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335,\n",
       "          336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349,\n",
       "          350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363,\n",
       "          364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377,\n",
       "          378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391,\n",
       "          392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405,\n",
       "          406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419,\n",
       "          420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433,\n",
       "          434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447,\n",
       "          448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461,\n",
       "          462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475,\n",
       "          476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489,\n",
       "          490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503,\n",
       "          504, 505, 506, 507, 508, 509, 510, 511]]),\n",
       " 'model.bert.embeddings.word_embeddings.weight': tensor([[-0.0362, -0.0847, -0.0336,  ...,  0.0139,  0.0251,  0.0388],\n",
       "         [-0.0045, -0.0255,  0.0515,  ..., -0.0875,  0.0687, -0.0259],\n",
       "         [-0.0655, -0.0639, -0.0673,  ..., -0.0585,  0.0382, -0.0807],\n",
       "         ...,\n",
       "         [-0.0032, -0.0663,  0.0156,  ..., -0.0178,  0.0130,  0.0150],\n",
       "         [ 0.0516, -0.0339, -0.0159,  ...,  0.0027,  0.0146, -0.0402],\n",
       "         [-0.0356,  0.0133,  0.0057,  ..., -0.0138, -0.0130,  0.0074]]),\n",
       " 'model.bert.embeddings.position_embeddings.weight': tensor([[ 0.1154,  0.1284,  0.0018,  ..., -0.2988, -0.0591,  0.0517],\n",
       "         [ 0.0025, -0.0192,  0.0291,  ..., -0.0988, -0.0235,  0.0631],\n",
       "         [ 0.0046,  0.0080,  0.0459,  ..., -0.0607, -0.0074,  0.0376],\n",
       "         ...,\n",
       "         [ 0.0061, -0.0382, -0.0166,  ..., -0.0442,  0.0084, -0.0185],\n",
       "         [ 0.0482, -0.0250, -0.0362,  ..., -0.0838,  0.0024, -0.0298],\n",
       "         [ 0.1540,  0.0987, -0.0223,  ..., -0.1710, -0.0614, -0.0376]]),\n",
       " 'model.bert.embeddings.token_type_embeddings.weight': tensor([[-0.0120,  0.0175, -0.0138,  ...,  0.0600, -0.0118, -0.0157],\n",
       "         [ 0.0217,  0.0115,  0.0255,  ...,  0.0274, -0.0102, -0.0125]]),\n",
       " 'model.bert.embeddings.LayerNorm.weight': tensor([1.7861, 1.7295, 1.5833,  ..., 1.1677, 1.7702, 1.8007]),\n",
       " 'model.bert.embeddings.LayerNorm.bias': tensor([-0.0059, -0.1492,  0.0058,  ..., -0.3428,  0.0436, -0.0187]),\n",
       " 'model.bert.encoder.layer.0.pre_attention_ln.weight': tensor([0.3417, 0.3172, 0.3250,  ..., 0.2804, 0.2906, 0.2863]),\n",
       " 'model.bert.encoder.layer.0.pre_attention_ln.bias': tensor([ 0.0249, -0.0224,  0.0002,  ..., -0.0449, -0.0053,  0.0058]),\n",
       " 'model.bert.encoder.layer.0.post_attention_ln.weight': tensor([0.3135, 0.3574, 0.3598,  ..., 0.3748, 0.2904, 0.3056]),\n",
       " 'model.bert.encoder.layer.0.post_attention_ln.bias': tensor([-0.0055, -0.0034, -0.0053,  ..., -0.0202, -0.0066, -0.0140]),\n",
       " 'model.bert.encoder.layer.0.attention.self.query.weight': tensor([[ 0.0753, -0.0150, -0.0948,  ..., -0.0012,  0.0165, -0.0830],\n",
       "         [ 0.0331, -0.0387,  0.0050,  ...,  0.0303,  0.0614, -0.0011],\n",
       "         [-0.0361, -0.0051,  0.0295,  ...,  0.0082, -0.0254,  0.1117],\n",
       "         ...,\n",
       "         [-0.1230, -0.0825,  0.0131,  ...,  0.1401,  0.0599, -0.1253],\n",
       "         [-0.0255, -0.0588,  0.0969,  ...,  0.0235,  0.0669, -0.0094],\n",
       "         [ 0.0856, -0.0632, -0.0790,  ..., -0.0779, -0.0025, -0.0840]]),\n",
       " 'model.bert.encoder.layer.0.attention.self.query.bias': tensor([-0.0187,  0.0685, -0.0075,  ..., -0.2405,  0.2840,  0.1331]),\n",
       " 'model.bert.encoder.layer.0.attention.self.key.weight': tensor([[ 0.0937, -0.1700, -0.0503,  ...,  0.0085, -0.1168, -0.1603],\n",
       "         [-0.3080, -0.0514, -0.1063,  ..., -0.0693, -0.0498,  0.0952],\n",
       "         [-0.0275, -0.1185,  0.0051,  ..., -0.0874,  0.0339,  0.0665],\n",
       "         ...,\n",
       "         [-0.0678, -0.2121, -0.0354,  ...,  0.1201, -0.1212, -0.0303],\n",
       "         [ 0.0548,  0.0746, -0.1179,  ...,  0.0665, -0.1452,  0.0119],\n",
       "         [ 0.0062,  0.0712, -0.0437,  ...,  0.1240, -0.0401, -0.0766]]),\n",
       " 'model.bert.encoder.layer.0.attention.self.key.bias': tensor([-0.0630,  0.0720, -0.0345,  ...,  0.0763, -0.1298, -0.1729]),\n",
       " 'model.bert.encoder.layer.0.attention.self.value.weight': tensor([[ 0.0042,  0.0620, -0.0063,  ..., -0.0913,  0.0114,  0.0035],\n",
       "         [ 0.0518,  0.0660,  0.1229,  ..., -0.0440, -0.1614,  0.0573],\n",
       "         [ 0.0844, -0.2005, -0.0753,  ..., -0.0239,  0.1496,  0.0706],\n",
       "         ...,\n",
       "         [ 0.0221, -0.0961,  0.1568,  ...,  0.0378,  0.0222,  0.0636],\n",
       "         [-0.0110, -0.0336, -0.0536,  ...,  0.0152,  0.0073,  0.0039],\n",
       "         [-0.0805,  0.0620,  0.0041,  ..., -0.0610,  0.1412,  0.1814]]),\n",
       " 'model.bert.encoder.layer.0.attention.self.value.bias': tensor([ 0.0148,  0.0110,  0.0071,  ...,  0.0392, -0.0051, -0.0745]),\n",
       " 'model.bert.encoder.layer.0.attention.output.dense.weight': tensor([[-0.0726, -0.0139,  0.0455,  ...,  0.0299, -0.1149, -0.0325],\n",
       "         [ 0.0239, -0.0814, -0.0431,  ...,  0.0798, -0.0647,  0.0283],\n",
       "         [-0.0248, -0.0464, -0.0192,  ...,  0.0671, -0.0267, -0.0078],\n",
       "         ...,\n",
       "         [ 0.0983,  0.1226, -0.0381,  ..., -0.0302,  0.0397,  0.0592],\n",
       "         [ 0.0106,  0.0429,  0.0385,  ..., -0.0025,  0.0279,  0.0510],\n",
       "         [-0.0326,  0.0014,  0.1072,  ...,  0.0549,  0.0034,  0.0524]]),\n",
       " 'model.bert.encoder.layer.0.attention.output.dense.bias': tensor([-0.0852, -0.0640, -0.0271,  ..., -0.0551,  0.0955,  0.0812]),\n",
       " 'model.bert.encoder.layer.0.intermediate.dense.weight': tensor([[ 0.0861, -0.1032, -0.0892,  ..., -0.0742,  0.1937, -0.2328],\n",
       "         [-0.0685, -0.0063,  0.1523,  ...,  0.0988, -0.0740, -0.0335],\n",
       "         [-0.0039,  0.0360,  0.0631,  ...,  0.0281, -0.0029, -0.0426],\n",
       "         ...,\n",
       "         [ 0.0482,  0.0469,  0.0145,  ..., -0.1170, -0.0099, -0.1102],\n",
       "         [ 0.0050, -0.0463, -0.1123,  ..., -0.0915,  0.0727,  0.0634],\n",
       "         [ 0.1529,  0.1149, -0.1295,  ..., -0.0712, -0.0067,  0.0730]]),\n",
       " 'model.bert.encoder.layer.0.intermediate.dense.bias': tensor([-0.1178, -0.1324,  0.0349,  ..., -0.1221, -0.0178, -0.0804]),\n",
       " 'model.bert.encoder.layer.0.output.dense.weight': tensor([[ 0.1434,  0.0226,  0.0190,  ..., -0.1194, -0.0629,  0.0925],\n",
       "         [-0.0231, -0.0726,  0.0313,  ...,  0.1499, -0.1091, -0.0146],\n",
       "         [ 0.0385, -0.1021, -0.0533,  ..., -0.0895,  0.0049, -0.1039],\n",
       "         ...,\n",
       "         [-0.1835, -0.0123,  0.0192,  ...,  0.0735, -0.0100, -0.1357],\n",
       "         [ 0.1448, -0.0137, -0.0463,  ..., -0.0930, -0.0030,  0.0753],\n",
       "         [-0.1241,  0.0241,  0.0003,  ..., -0.0612, -0.0542, -0.0541]]),\n",
       " 'model.bert.encoder.layer.0.output.dense.bias': tensor([-0.0196, -0.1278,  0.0545,  ...,  0.0795,  0.0702, -0.0628]),\n",
       " 'model.bert.encoder.layer.1.pre_attention_ln.weight': tensor([0.6128, 0.6591, 0.6589,  ..., 0.5928, 0.5863, 0.6137]),\n",
       " 'model.bert.encoder.layer.1.pre_attention_ln.bias': tensor([ 0.0188, -0.0219, -0.0116,  ..., -0.0405, -0.0229, -0.0069]),\n",
       " 'model.bert.encoder.layer.1.post_attention_ln.weight': tensor([0.7682, 0.7645, 0.8055,  ..., 0.7597, 0.7265, 0.7667]),\n",
       " 'model.bert.encoder.layer.1.post_attention_ln.bias': tensor([ 0.0047, -0.0517, -0.0118,  ..., -0.0363, -0.0221, -0.0175]),\n",
       " 'model.bert.encoder.layer.1.attention.self.query.weight': tensor([[-8.1049e-02,  7.1460e-02,  2.0852e-01,  ..., -8.2115e-02,\n",
       "           1.1711e-02, -7.0536e-02],\n",
       "         [ 3.3337e-05, -1.1179e-03, -1.4706e-01,  ..., -6.4964e-02,\n",
       "           8.6415e-02,  1.9829e-02],\n",
       "         [-1.1788e-01, -5.3232e-02,  9.0858e-02,  ..., -6.3438e-02,\n",
       "          -1.0454e-01, -7.3442e-02],\n",
       "         ...,\n",
       "         [-6.7595e-03, -6.4330e-02,  1.2291e-01,  ..., -1.6400e-01,\n",
       "           5.4066e-02,  1.2841e-01],\n",
       "         [-1.4072e-01,  2.4364e-02,  1.6522e-01,  ..., -1.8356e-01,\n",
       "           1.9082e-01,  1.1009e-01],\n",
       "         [-1.0197e-02, -1.0517e-02, -5.7499e-02,  ..., -8.5976e-02,\n",
       "          -2.1156e-02, -6.2247e-02]]),\n",
       " 'model.bert.encoder.layer.1.attention.self.query.bias': tensor([-0.0446,  0.0748, -0.0908,  ...,  0.1023,  0.0706,  0.1657]),\n",
       " 'model.bert.encoder.layer.1.attention.self.key.weight': tensor([[ 0.0495,  0.1423,  0.0143,  ..., -0.0394,  0.0087, -0.0552],\n",
       "         [-0.0831,  0.0050, -0.0507,  ..., -0.0622, -0.0103,  0.0664],\n",
       "         [-0.0061, -0.0586, -0.2267,  ...,  0.0822,  0.1157,  0.1124],\n",
       "         ...,\n",
       "         [-0.0105,  0.0909, -0.0428,  ..., -0.0823,  0.0563, -0.0194],\n",
       "         [ 0.1031,  0.0808, -0.0427,  ..., -0.0842,  0.0130,  0.1444],\n",
       "         [-0.1448,  0.0950, -0.1137,  ...,  0.0567, -0.0029,  0.1063]]),\n",
       " 'model.bert.encoder.layer.1.attention.self.key.bias': tensor([ 0.0579, -0.1770,  0.0134,  ..., -0.1184,  0.0059,  0.0293]),\n",
       " 'model.bert.encoder.layer.1.attention.self.value.weight': tensor([[-0.0250, -0.0355, -0.0513,  ...,  0.0553,  0.0416, -0.0418],\n",
       "         [-0.0663,  0.0412, -0.0078,  ..., -0.0493,  0.1115, -0.0100],\n",
       "         [ 0.0109,  0.1110, -0.1020,  ..., -0.0344, -0.1300, -0.1115],\n",
       "         ...,\n",
       "         [ 0.0285,  0.0929,  0.0717,  ..., -0.0344, -0.2126, -0.0167],\n",
       "         [ 0.0724,  0.0301, -0.0295,  ...,  0.0170,  0.0691, -0.0368],\n",
       "         [ 0.0560,  0.0588, -0.0397,  ..., -0.0711, -0.0449, -0.0203]]),\n",
       " 'model.bert.encoder.layer.1.attention.self.value.bias': tensor([-0.0203,  0.0194,  0.0123,  ...,  0.0195,  0.0091,  0.0153]),\n",
       " 'model.bert.encoder.layer.1.attention.output.dense.weight': tensor([[ 0.0689,  0.1369, -0.0238,  ...,  0.0337,  0.0374, -0.0404],\n",
       "         [-0.1674, -0.1341, -0.1545,  ..., -0.0005,  0.0253,  0.0442],\n",
       "         [ 0.0214,  0.1251,  0.0241,  ..., -0.0066, -0.0213,  0.1392],\n",
       "         ...,\n",
       "         [-0.0155, -0.0467, -0.0031,  ...,  0.2090, -0.0182, -0.1428],\n",
       "         [-0.0096, -0.0610,  0.1769,  ...,  0.2109,  0.0036, -0.1086],\n",
       "         [ 0.0738, -0.0226,  0.0654,  ...,  0.0132, -0.0282, -0.0465]]),\n",
       " 'model.bert.encoder.layer.1.attention.output.dense.bias': tensor([-0.1688, -0.0647,  0.0040,  ..., -0.0131,  0.0440,  0.1155]),\n",
       " 'model.bert.encoder.layer.1.intermediate.dense.weight': tensor([[ 0.1232,  0.0352, -0.2054,  ...,  0.0257, -0.1679,  0.0064],\n",
       "         [-0.1218, -0.0346,  0.2220,  ..., -0.2946,  0.0553, -0.0602],\n",
       "         [-0.0386,  0.0518,  0.1587,  ..., -0.1026, -0.0651, -0.0648],\n",
       "         ...,\n",
       "         [ 0.0783, -0.0540,  0.1657,  ..., -0.0506, -0.1793, -0.0136],\n",
       "         [-0.0395, -0.0389, -0.0631,  ..., -0.0307, -0.1881,  0.0466],\n",
       "         [-0.1274,  0.1212, -0.0162,  ..., -0.0226, -0.0375, -0.0913]]),\n",
       " 'model.bert.encoder.layer.1.intermediate.dense.bias': tensor([-0.0469, -0.0511,  0.0248,  ..., -0.2276, -0.0314, -0.0121]),\n",
       " 'model.bert.encoder.layer.1.output.dense.weight': tensor([[-0.0587, -0.0439, -0.0147,  ...,  0.0698, -0.0050, -0.0846],\n",
       "         [ 0.0516,  0.0111,  0.0244,  ..., -0.0592,  0.0687, -0.0276],\n",
       "         [ 0.0175, -0.1574, -0.0962,  ...,  0.0756, -0.0779,  0.0718],\n",
       "         ...,\n",
       "         [-0.0081,  0.0142,  0.1576,  ..., -0.0066,  0.0364,  0.0364],\n",
       "         [-0.0533, -0.0371,  0.0025,  ..., -0.0340, -0.0315,  0.0463],\n",
       "         [ 0.0337, -0.0562,  0.0377,  ...,  0.0493,  0.0015, -0.0723]]),\n",
       " 'model.bert.encoder.layer.1.output.dense.bias': tensor([-0.0549, -0.0154, -0.0251,  ..., -0.0063,  0.0407, -0.0177]),\n",
       " 'model.bert.encoder.layer.2.pre_attention_ln.weight': tensor([0.5819, 0.5515, 0.5976,  ..., 0.5413, 0.6313, 0.5925]),\n",
       " 'model.bert.encoder.layer.2.pre_attention_ln.bias': tensor([ 0.0237, -0.0233, -0.0014,  ..., -0.0311, -0.0259, -0.0047]),\n",
       " 'model.bert.encoder.layer.2.post_attention_ln.weight': tensor([0.7642, 0.8524, 0.8182,  ..., 0.8095, 0.7804, 0.7992]),\n",
       " 'model.bert.encoder.layer.2.post_attention_ln.bias': tensor([ 0.0188, -0.0625, -0.0148,  ..., -0.0352, -0.0040,  0.0005]),\n",
       " 'model.bert.encoder.layer.2.attention.self.query.weight': tensor([[-0.0975, -0.1625,  0.0723,  ..., -0.0202,  0.1653,  0.0668],\n",
       "         [-0.1595, -0.0673, -0.0248,  ...,  0.1048,  0.1372,  0.0043],\n",
       "         [-0.0869, -0.3763, -0.1636,  ...,  0.1225, -0.0678, -0.2402],\n",
       "         ...,\n",
       "         [-0.1627, -0.1954,  0.3575,  ...,  0.2674, -0.1012,  0.2199],\n",
       "         [-0.3957, -0.2116, -0.0750,  ...,  0.2311, -0.1873, -0.0489],\n",
       "         [-0.2736, -0.0499,  0.1444,  ...,  0.2941, -0.1625,  0.0709]]),\n",
       " 'model.bert.encoder.layer.2.attention.self.query.bias': tensor([ 0.4688,  0.1668, -0.0809,  ...,  0.3721, -0.0549, -0.1205]),\n",
       " 'model.bert.encoder.layer.2.attention.self.key.weight': tensor([[-0.0787, -0.0620,  0.0243,  ...,  0.1805, -0.1564, -0.0746],\n",
       "         [-0.0857,  0.0293,  0.0187,  ...,  0.0706,  0.0644, -0.1314],\n",
       "         [-0.1094, -0.1306,  0.0026,  ...,  0.1001,  0.0429, -0.0012],\n",
       "         ...,\n",
       "         [ 0.0332,  0.2583,  0.3548,  ...,  0.0975, -0.1342, -0.0338],\n",
       "         [-0.2023, -0.2062, -0.1414,  ...,  0.2326,  0.1037, -0.2407],\n",
       "         [-0.3724,  0.0771, -0.1354,  ...,  0.0353, -0.0147, -0.4450]]),\n",
       " 'model.bert.encoder.layer.2.attention.self.key.bias': tensor([-1.5413, -0.4035,  0.0091,  ...,  0.0712,  0.0714, -0.2354]),\n",
       " 'model.bert.encoder.layer.2.attention.self.value.weight': tensor([[-0.0873, -0.0572, -0.0208,  ..., -0.0749, -0.0617,  0.0997],\n",
       "         [ 0.0762,  0.1828, -0.1196,  ..., -0.0209,  0.1962,  0.0286],\n",
       "         [-0.0623, -0.0802,  0.1323,  ..., -0.0309,  0.1264, -0.0748],\n",
       "         ...,\n",
       "         [-0.0123,  0.0488, -0.1402,  ..., -0.1001,  0.0710, -0.0152],\n",
       "         [-0.0630,  0.1678, -0.0034,  ..., -0.1309, -0.0301, -0.0551],\n",
       "         [ 0.0580, -0.0947, -0.0012,  ...,  0.0597,  0.0190,  0.0150]]),\n",
       " 'model.bert.encoder.layer.2.attention.self.value.bias': tensor([-0.0198,  0.0041,  0.0037,  ..., -0.0865,  0.0192,  0.0345]),\n",
       " 'model.bert.encoder.layer.2.attention.output.dense.weight': tensor([[ 0.1803, -0.0299, -0.0225,  ..., -0.1403,  0.0170, -0.0412],\n",
       "         [ 0.0008, -0.0979, -0.0946,  ..., -0.0015,  0.0343,  0.1628],\n",
       "         [ 0.0735,  0.0655, -0.1605,  ...,  0.0912,  0.0649,  0.0123],\n",
       "         ...,\n",
       "         [-0.1458, -0.0676,  0.1329,  ..., -0.1406,  0.0041, -0.0204],\n",
       "         [ 0.1515, -0.0044,  0.0338,  ..., -0.0143,  0.0765, -0.0213],\n",
       "         [ 0.0130,  0.1220,  0.0195,  ..., -0.1519,  0.0115, -0.0301]]),\n",
       " 'model.bert.encoder.layer.2.attention.output.dense.bias': tensor([-0.0973, -0.0202, -0.0912,  ..., -0.1561, -0.0136, -0.0085]),\n",
       " 'model.bert.encoder.layer.2.intermediate.dense.weight': tensor([[-0.0632, -0.0047, -0.0271,  ..., -0.1484,  0.0783,  0.1246],\n",
       "         [-0.0744, -0.0729,  0.0223,  ...,  0.1044, -0.0396,  0.0205],\n",
       "         [-0.1031,  0.0255,  0.3818,  ..., -0.2261,  0.0781, -0.2492],\n",
       "         ...,\n",
       "         [-0.1550,  0.0865, -0.0355,  ...,  0.1278, -0.0388,  0.0456],\n",
       "         [-0.1165,  0.1125,  0.0065,  ...,  0.1208,  0.0404, -0.0005],\n",
       "         [ 0.0045,  0.0242, -0.0467,  ..., -0.1620, -0.0013,  0.1803]]),\n",
       " 'model.bert.encoder.layer.2.intermediate.dense.bias': tensor([-0.0023, -0.0849, -0.0501,  ..., -0.0585, -0.0054, -0.0825]),\n",
       " 'model.bert.encoder.layer.2.output.dense.weight': tensor([[-0.0355, -0.0537,  0.0789,  ...,  0.0312,  0.1598,  0.1331],\n",
       "         [ 0.0432, -0.0434,  0.0010,  ...,  0.0770, -0.0366, -0.0133],\n",
       "         [-0.0264,  0.1151,  0.1649,  ..., -0.0292, -0.0678, -0.1315],\n",
       "         ...,\n",
       "         [ 0.0734, -0.0602,  0.0494,  ..., -0.0629, -0.1044, -0.0206],\n",
       "         [ 0.0094, -0.0110,  0.1416,  ..., -0.0641, -0.0601,  0.0373],\n",
       "         [ 0.0226, -0.0918, -0.1484,  ..., -0.0476, -0.0065,  0.1338]]),\n",
       " 'model.bert.encoder.layer.2.output.dense.bias': tensor([-0.0442, -0.0326, -0.0207,  ..., -0.0763,  0.0386,  0.0141]),\n",
       " 'model.bert.encoder.layer.3.pre_attention_ln.weight': tensor([0.5256, 0.4453, 0.5799,  ..., 0.4882, 0.5933, 0.5895]),\n",
       " 'model.bert.encoder.layer.3.pre_attention_ln.bias': tensor([ 1.8813e-02, -2.9305e-02, -5.6641e-04,  ..., -1.7212e-02,\n",
       "         -3.2072e-02,  4.3003e-07]),\n",
       " 'model.bert.encoder.layer.3.post_attention_ln.weight': tensor([0.7049, 0.7627, 0.7943,  ..., 0.7085, 0.7375, 0.7621]),\n",
       " 'model.bert.encoder.layer.3.post_attention_ln.bias': tensor([ 0.0288, -0.0774, -0.0064,  ..., -0.0787,  0.0163,  0.0130]),\n",
       " 'model.bert.encoder.layer.3.attention.self.query.weight': tensor([[-0.1239, -0.2307, -0.2000,  ...,  0.1364,  0.0120,  0.0873],\n",
       "         [-0.0226, -0.0962,  0.0510,  ...,  0.0800,  0.0127, -0.1822],\n",
       "         [-0.1594,  0.1911,  0.0266,  ...,  0.1027,  0.2036,  0.1344],\n",
       "         ...,\n",
       "         [ 0.0474, -0.2192, -0.1387,  ...,  0.1097,  0.2135, -0.0701],\n",
       "         [ 0.0064, -0.2405,  0.0829,  ...,  0.1437,  0.2306,  0.0786],\n",
       "         [ 0.1524,  0.0396, -0.0653,  ..., -0.2108, -0.1136, -0.2773]]),\n",
       " 'model.bert.encoder.layer.3.attention.self.query.bias': tensor([-0.0675,  0.0424, -0.0445,  ..., -0.0357,  0.1463, -0.5770]),\n",
       " 'model.bert.encoder.layer.3.attention.self.key.weight': tensor([[ 0.0722, -0.2031, -0.0473,  ..., -0.1007, -0.0052,  0.0203],\n",
       "         [-0.0406, -0.1920,  0.1232,  ..., -0.1560, -0.0648, -0.0481],\n",
       "         [ 0.1480,  0.0817,  0.0652,  ..., -0.1852,  0.0091, -0.0561],\n",
       "         ...,\n",
       "         [-0.2599,  0.0371,  0.1831,  ..., -0.0464, -0.1820,  0.1124],\n",
       "         [-0.1369,  0.2888, -0.0825,  ..., -0.0159, -0.0345, -0.1223],\n",
       "         [ 0.1342, -0.0266,  0.0710,  ..., -0.1190, -0.0543,  0.1823]]),\n",
       " 'model.bert.encoder.layer.3.attention.self.key.bias': tensor([-0.0348,  0.0420, -0.0027,  ..., -0.0573, -0.4738,  1.2903]),\n",
       " 'model.bert.encoder.layer.3.attention.self.value.weight': tensor([[-0.1663,  0.0702, -0.0671,  ..., -0.0081, -0.0821, -0.2938],\n",
       "         [ 0.0710,  0.0943, -0.2772,  ...,  0.1673,  0.0849, -0.0011],\n",
       "         [-0.1085,  0.1250,  0.1237,  ..., -0.0105,  0.0533,  0.0231],\n",
       "         ...,\n",
       "         [ 0.0282, -0.0794, -0.0340,  ...,  0.0314, -0.0249, -0.0212],\n",
       "         [-0.1740, -0.0679, -0.0353,  ..., -0.0377, -0.0893, -0.0392],\n",
       "         [ 0.0107, -0.2073, -0.0062,  ..., -0.1374, -0.0328,  0.0021]]),\n",
       " 'model.bert.encoder.layer.3.attention.self.value.bias': tensor([-0.0198,  0.0085, -0.0207,  ..., -0.0529, -0.0384, -0.0112]),\n",
       " 'model.bert.encoder.layer.3.attention.output.dense.weight': tensor([[ 0.0793,  0.0358,  0.0546,  ...,  0.0004,  0.0052, -0.0073],\n",
       "         [ 0.1028,  0.0288, -0.0539,  ...,  0.1428,  0.1484, -0.0352],\n",
       "         [ 0.0737,  0.2403, -0.2253,  ..., -0.0044,  0.1663, -0.0236],\n",
       "         ...,\n",
       "         [-0.0802, -0.0686,  0.1296,  ..., -0.1199, -0.1231, -0.0608],\n",
       "         [ 0.0959, -0.0639,  0.0847,  ..., -0.0351,  0.0571,  0.0411],\n",
       "         [ 0.2461, -0.0218, -0.0424,  ..., -0.0514,  0.0398, -0.0090]]),\n",
       " 'model.bert.encoder.layer.3.attention.output.dense.bias': tensor([-0.1304, -0.0269, -0.1235,  ..., -0.0123, -0.1283, -0.1627]),\n",
       " 'model.bert.encoder.layer.3.intermediate.dense.weight': tensor([[-0.2423,  0.2253, -0.2043,  ..., -0.0714,  0.1711, -0.0172],\n",
       "         [ 0.0069,  0.0587, -0.1764,  ..., -0.0205,  0.0748, -0.0657],\n",
       "         [-0.1073, -0.0185, -0.0983,  ..., -0.2271, -0.2794, -0.1342],\n",
       "         ...,\n",
       "         [-0.0911, -0.0761,  0.1108,  ..., -0.0270, -0.0398, -0.0304],\n",
       "         [ 0.0260,  0.0649,  0.2144,  ..., -0.0513, -0.1585, -0.0251],\n",
       "         [-0.1532,  0.0495,  0.1084,  ..., -0.0764, -0.1271,  0.1946]]),\n",
       " 'model.bert.encoder.layer.3.intermediate.dense.bias': tensor([-0.1527, -0.0549, -0.1002,  ..., -0.0566, -0.0908, -0.0443]),\n",
       " 'model.bert.encoder.layer.3.output.dense.weight': tensor([[-0.2884, -0.1597,  0.0615,  ..., -0.1021,  0.1021, -0.0670],\n",
       "         [ 0.0186, -0.0855,  0.0729,  ..., -0.1784,  0.0865,  0.0050],\n",
       "         [-0.0730,  0.0531,  0.1792,  ..., -0.1520,  0.0651, -0.0546],\n",
       "         ...,\n",
       "         [-0.1762,  0.0376, -0.0855,  ..., -0.0210, -0.1539,  0.0837],\n",
       "         [-0.0100, -0.0944, -0.0103,  ...,  0.0928, -0.1785,  0.0488],\n",
       "         [-0.0280, -0.1523, -0.0602,  ...,  0.0051, -0.0527,  0.0113]]),\n",
       " 'model.bert.encoder.layer.3.output.dense.bias': tensor([-0.0539, -0.1425, -0.0414,  ..., -0.0326,  0.0261,  0.0940]),\n",
       " 'model.bert.encoder.layer.4.pre_attention_ln.weight': tensor([0.5532, 0.5199, 0.6133,  ..., 0.5460, 0.6537, 0.6156]),\n",
       " 'model.bert.encoder.layer.4.pre_attention_ln.bias': tensor([ 0.0295, -0.0284,  0.0017,  ..., -0.0396, -0.0331,  0.0012]),\n",
       " 'model.bert.encoder.layer.4.post_attention_ln.weight': tensor([0.7470, 0.7907, 0.8159,  ..., 0.8074, 0.8382, 0.8620]),\n",
       " 'model.bert.encoder.layer.4.post_attention_ln.bias': tensor([ 0.0080, -0.1008, -0.0127,  ..., -0.0736,  0.0124,  0.0243]),\n",
       " 'model.bert.encoder.layer.4.attention.self.query.weight': tensor([[-0.0759,  0.0814,  0.2488,  ...,  0.2127,  0.1604, -0.1283],\n",
       "         [ 0.1376,  0.0729,  0.1798,  ...,  0.1050,  0.0357,  0.0034],\n",
       "         [-0.0427,  0.2965, -0.1710,  ...,  0.0889, -0.1507, -0.1526],\n",
       "         ...,\n",
       "         [ 0.2865,  0.0647,  0.2189,  ..., -0.1042,  0.1138,  0.1883],\n",
       "         [ 0.3203,  0.2204,  0.1756,  ..., -0.1508,  0.1404, -0.0681],\n",
       "         [-0.0965, -0.1296,  0.1763,  ..., -0.1775, -0.1010, -0.1368]]),\n",
       " 'model.bert.encoder.layer.4.attention.self.query.bias': tensor([ 0.0237,  0.2113, -0.0090,  ...,  0.0979, -0.0781, -0.1397]),\n",
       " 'model.bert.encoder.layer.4.attention.self.key.weight': tensor([[ 0.0918,  0.1195, -0.0597,  ..., -0.0650, -0.1616, -0.0135],\n",
       "         [ 0.1847, -0.0171, -0.0151,  ..., -0.0266, -0.1766, -0.0593],\n",
       "         [-0.1542,  0.1302, -0.2774,  ..., -0.0930, -0.2521, -0.1790],\n",
       "         ...,\n",
       "         [ 0.0029,  0.1191,  0.1219,  ...,  0.0006,  0.0563,  0.1654],\n",
       "         [ 0.1325,  0.1080,  0.1429,  ..., -0.0332, -0.1320,  0.0157],\n",
       "         [-0.0302, -0.0513, -0.0748,  ...,  0.1931,  0.1338,  0.1018]]),\n",
       " 'model.bert.encoder.layer.4.attention.self.key.bias': tensor([ 0.2724, -0.9825,  0.5103,  ...,  0.3399,  0.3918, -0.0093]),\n",
       " 'model.bert.encoder.layer.4.attention.self.value.weight': tensor([[-0.0309,  0.0054,  0.0854,  ...,  0.1183, -0.0280,  0.1175],\n",
       "         [-0.2001,  0.0776, -0.0802,  ..., -0.0040,  0.0024, -0.0837],\n",
       "         [-0.0906, -0.0300, -0.1130,  ..., -0.0222,  0.1876,  0.0152],\n",
       "         ...,\n",
       "         [ 0.1329,  0.0780, -0.0231,  ...,  0.0130,  0.0191, -0.0486],\n",
       "         [-0.0196, -0.0789,  0.1089,  ..., -0.0073,  0.0667,  0.0433],\n",
       "         [ 0.0767,  0.0623,  0.0676,  ..., -0.0052,  0.0756,  0.1013]]),\n",
       " 'model.bert.encoder.layer.4.attention.self.value.bias': tensor([-0.0320,  0.0266, -0.0510,  ...,  0.0101, -0.0018, -0.0458]),\n",
       " 'model.bert.encoder.layer.4.attention.output.dense.weight': tensor([[-0.1882,  0.1534, -0.0776,  ..., -0.0099,  0.0550,  0.0098],\n",
       "         [ 0.0860, -0.1273,  0.0466,  ..., -0.0640, -0.0086, -0.1173],\n",
       "         [ 0.0605, -0.0392, -0.0472,  ..., -0.1367, -0.1007, -0.0443],\n",
       "         ...,\n",
       "         [ 0.0642, -0.0202, -0.0083,  ..., -0.0466, -0.0678, -0.0973],\n",
       "         [ 0.0798, -0.0268, -0.1139,  ..., -0.0417, -0.0831,  0.0644],\n",
       "         [ 0.0888, -0.0285, -0.0179,  ..., -0.1315,  0.0185,  0.0343]]),\n",
       " 'model.bert.encoder.layer.4.attention.output.dense.bias': tensor([-0.1588,  0.0708,  0.0160,  ...,  0.0627, -0.0249, -0.0923]),\n",
       " 'model.bert.encoder.layer.4.intermediate.dense.weight': tensor([[ 0.1545, -0.0158,  0.0791,  ..., -0.0027, -0.0940, -0.0322],\n",
       "         [-0.0160, -0.1258,  0.1290,  ...,  0.1049,  0.1104, -0.0687],\n",
       "         [-0.0725,  0.0315, -0.0079,  ...,  0.2037, -0.0675,  0.1087],\n",
       "         ...,\n",
       "         [ 0.1409,  0.1756, -0.0597,  ..., -0.0870, -0.1969, -0.0515],\n",
       "         [ 0.0032,  0.0574,  0.0399,  ...,  0.1333,  0.0288,  0.1284],\n",
       "         [ 0.0187, -0.2074, -0.0356,  ..., -0.0278, -0.1638, -0.0208]]),\n",
       " 'model.bert.encoder.layer.4.intermediate.dense.bias': tensor([-0.0702, -0.0559, -0.0335,  ..., -0.0498, -0.0220, -0.0645]),\n",
       " 'model.bert.encoder.layer.4.output.dense.weight': tensor([[ 0.1245, -0.0743, -0.0079,  ...,  0.1441, -0.0754,  0.0418],\n",
       "         [ 0.0051, -0.0915,  0.0092,  ...,  0.0750, -0.0218, -0.0243],\n",
       "         [ 0.0456, -0.0132, -0.0550,  ..., -0.0780, -0.0535, -0.0979],\n",
       "         ...,\n",
       "         [-0.1361,  0.0295, -0.1360,  ...,  0.0306, -0.0232, -0.0041],\n",
       "         [-0.0679, -0.0764, -0.1166,  ..., -0.0727,  0.0098,  0.0880],\n",
       "         [ 0.1169, -0.0909,  0.0126,  ..., -0.1142, -0.0807,  0.0566]]),\n",
       " 'model.bert.encoder.layer.4.output.dense.bias': tensor([-0.0919, -0.2736,  0.0119,  ..., -0.0919,  0.1089,  0.2255]),\n",
       " 'model.bert.encoder.layer.5.pre_attention_ln.weight': tensor([0.6366, 0.6533, 0.7050,  ..., 0.6659, 0.7205, 0.7209]),\n",
       " 'model.bert.encoder.layer.5.pre_attention_ln.bias': tensor([ 0.0381, -0.0341,  0.0100,  ..., -0.0560, -0.0297,  0.0022]),\n",
       " 'model.bert.encoder.layer.5.post_attention_ln.weight': tensor([0.8735, 1.0210, 0.9676,  ..., 0.9563, 1.0088, 1.0375]),\n",
       " 'model.bert.encoder.layer.5.post_attention_ln.bias': tensor([ 0.0328, -0.0966,  0.0077,  ..., -0.1184,  0.0143,  0.0448]),\n",
       " 'model.bert.encoder.layer.5.attention.self.query.weight': tensor([[-0.1515,  0.0295,  0.1332,  ..., -0.0560,  0.0178,  0.0349],\n",
       "         [ 0.0486,  0.1025, -0.2087,  ...,  0.0328,  0.1917, -0.0991],\n",
       "         [-0.1466, -0.0378,  0.0703,  ..., -0.0123, -0.0056, -0.0452],\n",
       "         ...,\n",
       "         [ 0.0349,  0.0901,  0.0079,  ...,  0.2055, -0.0716,  0.0720],\n",
       "         [ 0.0037,  0.0926, -0.1109,  ...,  0.0155,  0.0934,  0.0153],\n",
       "         [ 0.1088,  0.1887, -0.0601,  ..., -0.1077, -0.0778,  0.1205]]),\n",
       " 'model.bert.encoder.layer.5.attention.self.query.bias': tensor([ 0.1843,  0.1103,  0.0918,  ...,  0.1560, -0.0991,  0.0418]),\n",
       " 'model.bert.encoder.layer.5.attention.self.key.weight': tensor([[-0.1045,  0.1933, -0.0976,  ...,  0.1943,  0.0559, -0.0573],\n",
       "         [-0.0315,  0.1227, -0.0530,  ...,  0.1543,  0.0498,  0.0910],\n",
       "         [-0.0045,  0.1801, -0.1044,  ..., -0.1075,  0.0038, -0.0511],\n",
       "         ...,\n",
       "         [ 0.0673,  0.1193,  0.1631,  ..., -0.1780,  0.0851,  0.1385],\n",
       "         [-0.0163,  0.0125, -0.0597,  ..., -0.1296,  0.0408, -0.0043],\n",
       "         [ 0.0664,  0.2334,  0.0850,  ..., -0.0186, -0.0312,  0.0536]]),\n",
       " 'model.bert.encoder.layer.5.attention.self.key.bias': tensor([-2.7710, -1.8611, -0.2295,  ..., -2.0738,  0.5439, -0.5056]),\n",
       " 'model.bert.encoder.layer.5.attention.self.value.weight': tensor([[-0.0366, -0.0459, -0.0574,  ..., -0.0388,  0.0262,  0.0194],\n",
       "         [-0.0589, -0.0027,  0.0474,  ..., -0.0266,  0.0723,  0.0191],\n",
       "         [-0.0730, -0.0071,  0.0737,  ..., -0.0146, -0.0922,  0.0095],\n",
       "         ...,\n",
       "         [ 0.0347, -0.1897,  0.0925,  ..., -0.0318,  0.1187,  0.0307],\n",
       "         [-0.1696, -0.0329, -0.0964,  ..., -0.1630, -0.0569,  0.0840],\n",
       "         [-0.0040, -0.0338,  0.0876,  ..., -0.0410, -0.0194, -0.0315]]),\n",
       " 'model.bert.encoder.layer.5.attention.self.value.bias': tensor([ 0.0208,  0.0508, -0.0231,  ..., -0.0569,  0.0028, -0.0121]),\n",
       " 'model.bert.encoder.layer.5.attention.output.dense.weight': tensor([[-0.0473, -0.0221, -0.0201,  ..., -0.1166, -0.0967,  0.0426],\n",
       "         [-0.1378, -0.0803,  0.0516,  ..., -0.1042,  0.0573, -0.0141],\n",
       "         [-0.0416,  0.2197, -0.0575,  ..., -0.0304,  0.1884, -0.0025],\n",
       "         ...,\n",
       "         [ 0.1922,  0.1154,  0.0023,  ...,  0.1343, -0.1895, -0.0077],\n",
       "         [ 0.0183,  0.0174, -0.1498,  ...,  0.1472,  0.0350,  0.0551],\n",
       "         [ 0.1080, -0.1015, -0.0807,  ...,  0.0846,  0.0824, -0.0201]]),\n",
       " 'model.bert.encoder.layer.5.attention.output.dense.bias': tensor([-0.1657,  0.1226,  0.0158,  ..., -0.0213,  0.0521, -0.0760]),\n",
       " 'model.bert.encoder.layer.5.intermediate.dense.weight': tensor([[-0.1559, -0.0029,  0.0370,  ...,  0.0255,  0.0955,  0.0737],\n",
       "         [-0.1366,  0.2162, -0.0093,  ..., -0.0103, -0.0603,  0.0025],\n",
       "         [ 0.1665,  0.0043, -0.0247,  ...,  0.0047,  0.1089,  0.0277],\n",
       "         ...,\n",
       "         [-0.0462, -0.0342,  0.0108,  ..., -0.1516, -0.0884, -0.1275],\n",
       "         [-0.0255, -0.0566,  0.0134,  ..., -0.1974,  0.0651,  0.0165],\n",
       "         [-0.0997,  0.0852, -0.0805,  ..., -0.0498, -0.0853, -0.0768]]),\n",
       " 'model.bert.encoder.layer.5.intermediate.dense.bias': tensor([-0.0573, -0.0957, -0.0515,  ..., -0.0589, -0.0746, -0.0253]),\n",
       " 'model.bert.encoder.layer.5.output.dense.weight': tensor([[-0.0912, -0.0038, -0.0980,  ...,  0.0110, -0.0376, -0.1864],\n",
       "         [-0.0559,  0.2467, -0.1277,  ..., -0.0871,  0.0173, -0.0254],\n",
       "         [ 0.0681, -0.0174, -0.1341,  ...,  0.0396, -0.0761,  0.0239],\n",
       "         ...,\n",
       "         [-0.2312,  0.0616,  0.0881,  ..., -0.0741,  0.1524,  0.0580],\n",
       "         [-0.0081, -0.0528,  0.0393,  ...,  0.1602,  0.0048, -0.0474],\n",
       "         [ 0.0052,  0.1188, -0.0249,  ...,  0.0478,  0.0201,  0.0031]]),\n",
       " 'model.bert.encoder.layer.5.output.dense.bias': tensor([-0.0755, -0.1407,  0.0577,  ...,  0.0366,  0.1165,  0.3046]),\n",
       " 'model.bert.encoder.layer.6.pre_attention_ln.weight': tensor([0.6047, 0.6897, 0.7350,  ..., 0.6446, 0.7009, 0.7260]),\n",
       " 'model.bert.encoder.layer.6.pre_attention_ln.bias': tensor([ 0.0497, -0.0423,  0.0136,  ..., -0.0392, -0.0262,  0.0004]),\n",
       " 'model.bert.encoder.layer.6.post_attention_ln.weight': tensor([0.8534, 0.9481, 0.8954,  ..., 0.8644, 0.9899, 0.9739]),\n",
       " 'model.bert.encoder.layer.6.post_attention_ln.bias': tensor([-0.0142, -0.0809,  0.0164,  ..., -0.0763,  0.0282,  0.0701]),\n",
       " 'model.bert.encoder.layer.6.attention.self.query.weight': tensor([[-0.1026,  0.0495,  0.0231,  ...,  0.0004,  0.1672, -0.0007],\n",
       "         [ 0.0336, -0.0526,  0.1484,  ...,  0.0432,  0.1536,  0.0838],\n",
       "         [-0.1210, -0.0876, -0.1007,  ..., -0.1033,  0.1353, -0.0873],\n",
       "         ...,\n",
       "         [-0.0112,  0.1009, -0.0193,  ...,  0.0067,  0.0449, -0.0482],\n",
       "         [ 0.0829,  0.0192,  0.0316,  ...,  0.0211, -0.0305, -0.1009],\n",
       "         [ 0.0569,  0.0544, -0.0342,  ..., -0.0253,  0.0439,  0.0034]]),\n",
       " 'model.bert.encoder.layer.6.attention.self.query.bias': tensor([ 0.2180,  0.2631, -0.1745,  ..., -0.0003,  0.0875,  0.0843]),\n",
       " 'model.bert.encoder.layer.6.attention.self.key.weight': tensor([[-0.0595,  0.0250, -0.0120,  ...,  0.0803,  0.1263,  0.0134],\n",
       "         [-0.0393, -0.0359, -0.0004,  ...,  0.0026, -0.0042,  0.1012],\n",
       "         [-0.0515,  0.0610, -0.0258,  ...,  0.0497,  0.1306, -0.0197],\n",
       "         ...,\n",
       "         [-0.1515,  0.0366, -0.0629,  ..., -0.1509, -0.0483, -0.0243],\n",
       "         [ 0.0285,  0.1623, -0.0268,  ...,  0.0789, -0.0410,  0.0789],\n",
       "         [ 0.0782,  0.0109, -0.0356,  ..., -0.0294, -0.0645, -0.0467]]),\n",
       " 'model.bert.encoder.layer.6.attention.self.key.bias': tensor([-0.6965, -1.1181,  0.2234,  ...,  0.0531,  0.0553, -0.0130]),\n",
       " 'model.bert.encoder.layer.6.attention.self.value.weight': tensor([[-0.0360,  0.0065,  0.0056,  ...,  0.0397,  0.0192,  0.0955],\n",
       "         [ 0.0697, -0.1787, -0.0813,  ...,  0.0419, -0.1092, -0.0769],\n",
       "         [ 0.0895,  0.0912,  0.0371,  ...,  0.0981, -0.0109,  0.0719],\n",
       "         ...,\n",
       "         [ 0.1220, -0.2177,  0.1180,  ...,  0.1122,  0.0125, -0.0425],\n",
       "         [ 0.0049,  0.0014, -0.2011,  ..., -0.0827, -0.0249, -0.1904],\n",
       "         [ 0.1610, -0.0102,  0.0015,  ..., -0.0288,  0.1110, -0.0658]]),\n",
       " 'model.bert.encoder.layer.6.attention.self.value.bias': tensor([ 0.0078, -0.0104,  0.0134,  ..., -0.0031, -0.0062, -0.0129]),\n",
       " 'model.bert.encoder.layer.6.attention.output.dense.weight': tensor([[ 0.0375, -0.0857,  0.1999,  ..., -0.2107,  0.0173, -0.0530],\n",
       "         [ 0.0431, -0.0764, -0.1106,  ...,  0.2341, -0.1783, -0.1233],\n",
       "         [ 0.0782,  0.0321,  0.0667,  ..., -0.1272,  0.1253, -0.1429],\n",
       "         ...,\n",
       "         [-0.0117,  0.1063,  0.1071,  ..., -0.0184,  0.1201, -0.0511],\n",
       "         [ 0.0618,  0.0178,  0.0333,  ..., -0.2094, -0.1734, -0.0243],\n",
       "         [ 0.1358,  0.0355,  0.0648,  ..., -0.0259,  0.0953, -0.1543]]),\n",
       " 'model.bert.encoder.layer.6.attention.output.dense.bias': tensor([-0.1305,  0.0972, -0.0408,  ..., -0.0472,  0.0615, -0.0457]),\n",
       " 'model.bert.encoder.layer.6.intermediate.dense.weight': tensor([[ 5.9439e-02, -5.3076e-02,  5.7359e-02,  ..., -2.2881e-04,\n",
       "           6.6810e-02, -5.5916e-02],\n",
       "         [ 2.5994e-01, -1.6219e-01,  1.0469e-02,  ...,  9.4994e-02,\n",
       "          -1.8266e-02, -4.4849e-01],\n",
       "         [ 1.2283e-01,  7.9409e-02, -2.1053e-01,  ...,  1.2846e-01,\n",
       "           9.9520e-02, -5.7137e-02],\n",
       "         ...,\n",
       "         [ 6.2237e-02, -1.4452e-01, -4.3541e-02,  ...,  8.3927e-02,\n",
       "          -1.0206e-01, -2.6601e-02],\n",
       "         [ 8.6262e-03, -8.3046e-02, -5.4579e-02,  ...,  7.8779e-02,\n",
       "           2.8975e-02,  2.9945e-02],\n",
       "         [-4.0388e-02,  2.9243e-02,  5.1823e-02,  ..., -3.5624e-02,\n",
       "           7.9093e-02, -1.2284e-01]]),\n",
       " 'model.bert.encoder.layer.6.intermediate.dense.bias': tensor([-0.1391, -0.1520, -0.1328,  ..., -0.0352,  0.0051, -0.0802]),\n",
       " 'model.bert.encoder.layer.6.output.dense.weight': tensor([[ 0.1027,  0.2477,  0.2255,  ..., -0.0182, -0.0592, -0.1880],\n",
       "         [ 0.0394,  0.0933,  0.0778,  ...,  0.1076,  0.0282, -0.0442],\n",
       "         [ 0.1658,  0.1229, -0.2210,  ..., -0.0985, -0.0118,  0.0510],\n",
       "         ...,\n",
       "         [ 0.0894, -0.0749,  0.0708,  ...,  0.0072, -0.1613, -0.0490],\n",
       "         [-0.0742,  0.0703,  0.2717,  ..., -0.0200, -0.0373, -0.0228],\n",
       "         [-0.0023, -0.0349, -0.4216,  ...,  0.1830,  0.0564, -0.0697]]),\n",
       " 'model.bert.encoder.layer.6.output.dense.bias': tensor([-0.0751, -0.1709,  0.0034,  ..., -0.0100,  0.1448,  0.1402]),\n",
       " 'model.bert.encoder.layer.7.pre_attention_ln.weight': tensor([0.6514, 0.7445, 0.7891,  ..., 0.6781, 0.7529, 0.7829]),\n",
       " 'model.bert.encoder.layer.7.pre_attention_ln.bias': tensor([ 0.0488, -0.0524,  0.0177,  ..., -0.0190, -0.0332, -0.0073]),\n",
       " 'model.bert.encoder.layer.7.post_attention_ln.weight': tensor([0.8060, 0.8697, 0.8013,  ..., 0.7894, 0.8913, 0.8849]),\n",
       " 'model.bert.encoder.layer.7.post_attention_ln.bias': tensor([-0.0429, -0.0480,  0.0291,  ..., -0.0944,  0.0528,  0.0412]),\n",
       " 'model.bert.encoder.layer.7.attention.self.query.weight': tensor([[-0.1614, -0.0160,  0.1108,  ...,  0.0380, -0.0202, -0.0036],\n",
       "         [-0.0166, -0.0787,  0.0407,  ..., -0.0406,  0.0162, -0.0211],\n",
       "         [-0.0629,  0.0046, -0.1359,  ...,  0.0246, -0.1258, -0.0207],\n",
       "         ...,\n",
       "         [ 0.1031, -0.1793, -0.1134,  ..., -0.1333, -0.0227, -0.0865],\n",
       "         [-0.0452,  0.0325,  0.0519,  ..., -0.0247, -0.0787,  0.1024],\n",
       "         [ 0.0857,  0.2256, -0.0740,  ...,  0.0846, -0.0177, -0.0623]]),\n",
       " 'model.bert.encoder.layer.7.attention.self.query.bias': tensor([ 0.0381, -0.0814,  0.0603,  ..., -0.0690, -0.0015,  0.0505]),\n",
       " 'model.bert.encoder.layer.7.attention.self.key.weight': tensor([[-0.0294, -0.1184, -0.0910,  ...,  0.0506, -0.0651, -0.0177],\n",
       "         [-0.1385,  0.0043,  0.0352,  ..., -0.0344, -0.0498, -0.1032],\n",
       "         [ 0.1177,  0.0084, -0.1242,  ...,  0.1043, -0.0886, -0.2122],\n",
       "         ...,\n",
       "         [ 0.0462, -0.0268,  0.0513,  ..., -0.2513,  0.1157, -0.0635],\n",
       "         [ 0.0730, -0.0151,  0.0400,  ...,  0.2448, -0.0671, -0.0240],\n",
       "         [-0.0419, -0.0156, -0.0579,  ...,  0.0112,  0.1213, -0.0076]]),\n",
       " 'model.bert.encoder.layer.7.attention.self.key.bias': tensor([-0.2989,  0.3844, -1.0770,  ...,  1.0892, -0.0871, -0.7142]),\n",
       " 'model.bert.encoder.layer.7.attention.self.value.weight': tensor([[ 0.2340,  0.1543, -0.0262,  ..., -0.0981, -0.2075, -0.1533],\n",
       "         [ 0.0438, -0.1483, -0.0594,  ..., -0.0404,  0.0023, -0.0080],\n",
       "         [-0.0697,  0.0415,  0.1121,  ..., -0.0773,  0.0185,  0.0379],\n",
       "         ...,\n",
       "         [ 0.0511,  0.0111, -0.0081,  ..., -0.0215, -0.1367, -0.0648],\n",
       "         [ 0.2538,  0.0402,  0.0508,  ...,  0.1039,  0.0495,  0.0175],\n",
       "         [ 0.0213, -0.1229,  0.0672,  ...,  0.0214,  0.0517, -0.1357]]),\n",
       " 'model.bert.encoder.layer.7.attention.self.value.bias': tensor([-0.0053, -0.0093, -0.0355,  ...,  0.0162, -0.0204, -0.0058]),\n",
       " 'model.bert.encoder.layer.7.attention.output.dense.weight': tensor([[ 0.0026,  0.0063, -0.0647,  ..., -0.0345, -0.0833, -0.1555],\n",
       "         [-0.0251,  0.0279,  0.0857,  ...,  0.0347, -0.0319, -0.0476],\n",
       "         [-0.0109, -0.0845, -0.0887,  ...,  0.0119,  0.0703, -0.0019],\n",
       "         ...,\n",
       "         [-0.1255,  0.0487,  0.0134,  ...,  0.0498,  0.0767, -0.0087],\n",
       "         [ 0.1683, -0.0801, -0.0687,  ...,  0.0346, -0.1288,  0.1105],\n",
       "         [ 0.0551,  0.1864,  0.1227,  ..., -0.0776,  0.1031, -0.0218]]),\n",
       " 'model.bert.encoder.layer.7.attention.output.dense.bias': tensor([-0.0860,  0.0896, -0.0022,  ...,  0.0011,  0.0457, -0.0495]),\n",
       " 'model.bert.encoder.layer.7.intermediate.dense.weight': tensor([[-0.0875, -0.0197, -0.1205,  ...,  0.0682,  0.1325, -0.0293],\n",
       "         [ 0.0075,  0.0585, -0.2231,  ...,  0.0715, -0.1063,  0.0545],\n",
       "         [ 0.0838,  0.0123,  0.0800,  ..., -0.0308, -0.1016, -0.1363],\n",
       "         ...,\n",
       "         [-0.0660,  0.0040, -0.0193,  ..., -0.0678, -0.1129,  0.2135],\n",
       "         [-0.1059, -0.0337, -0.0046,  ..., -0.0854, -0.1510, -0.2429],\n",
       "         [-0.0850, -0.1422,  0.0990,  ...,  0.2214,  0.0470,  0.0555]]),\n",
       " 'model.bert.encoder.layer.7.intermediate.dense.bias': tensor([-0.0419,  0.0126, -0.0917,  ..., -0.0149, -0.0359, -0.0315]),\n",
       " 'model.bert.encoder.layer.7.output.dense.weight': tensor([[ 1.3013e-02, -2.4782e-02,  2.3868e-02,  ..., -1.2514e-03,\n",
       "           9.8352e-02,  8.2910e-02],\n",
       "         [ 6.1359e-02, -2.2931e-02, -9.1799e-02,  ..., -9.8838e-02,\n",
       "           3.6750e-02,  4.8779e-02],\n",
       "         [ 7.5091e-02,  1.4632e-01, -2.1317e-03,  ...,  2.3678e-02,\n",
       "          -3.3198e-03, -2.4074e-02],\n",
       "         ...,\n",
       "         [-1.7079e-05, -6.5707e-02, -2.4108e-02,  ..., -2.2420e-01,\n",
       "           5.4174e-02, -1.3570e-01],\n",
       "         [-2.8799e-02,  9.3031e-02,  1.2750e-01,  ..., -1.1243e-02,\n",
       "           2.0597e-01,  1.5737e-02],\n",
       "         [ 1.3722e-02,  1.2178e-02,  1.3359e-01,  ...,  1.5808e-01,\n",
       "           1.9647e-01, -5.4147e-02]]),\n",
       " 'model.bert.encoder.layer.7.output.dense.bias': tensor([-0.1112, -0.1103, -0.1155,  ..., -0.0028,  0.0226,  0.0679]),\n",
       " 'model.bert.encoder.layer.8.pre_attention_ln.weight': tensor([0.6787, 0.7657, 0.7530,  ..., 0.7017, 0.7483, 0.7654]),\n",
       " 'model.bert.encoder.layer.8.pre_attention_ln.bias': tensor([ 0.0430, -0.0656, -0.0248,  ...,  0.0298, -0.0636, -0.0010]),\n",
       " 'model.bert.encoder.layer.8.post_attention_ln.weight': tensor([0.7883, 0.8011, 0.7382,  ..., 0.7376, 0.8016, 0.8016]),\n",
       " 'model.bert.encoder.layer.8.post_attention_ln.bias': tensor([-0.0396, -0.0200, -0.0059,  ..., -0.0905,  0.0207,  0.0351]),\n",
       " 'model.bert.encoder.layer.8.attention.self.query.weight': tensor([[ 0.0596, -0.1095, -0.0563,  ..., -0.1763,  0.0026,  0.0561],\n",
       "         [-0.0128,  0.0491,  0.1313,  ...,  0.0946, -0.1126,  0.0356],\n",
       "         [-0.0087,  0.0057, -0.0788,  ...,  0.0796,  0.0058,  0.0038],\n",
       "         ...,\n",
       "         [-0.0421,  0.2030,  0.2098,  ..., -0.0189, -0.0677,  0.0062],\n",
       "         [ 0.0962,  0.0797, -0.0577,  ..., -0.0193, -0.0976,  0.1711],\n",
       "         [-0.2013, -0.1216, -0.0470,  ...,  0.1434, -0.0943,  0.0462]]),\n",
       " 'model.bert.encoder.layer.8.attention.self.query.bias': tensor([-0.0347, -0.0358,  0.0667,  ...,  0.0189, -0.0387, -0.0422]),\n",
       " 'model.bert.encoder.layer.8.attention.self.key.weight': tensor([[ 0.0337, -0.0549, -0.0960,  ...,  0.0303, -0.0625, -0.0230],\n",
       "         [ 0.1526,  0.1620,  0.1382,  ..., -0.0805,  0.0310,  0.1796],\n",
       "         [ 0.0240,  0.1165,  0.0302,  ..., -0.1331, -0.0445,  0.1546],\n",
       "         ...,\n",
       "         [-0.0233, -0.1717,  0.1922,  ..., -0.0286, -0.1482,  0.1874],\n",
       "         [ 0.0991, -0.0323, -0.1469,  ..., -0.1123, -0.1623,  0.0657],\n",
       "         [-0.1714, -0.0598,  0.1370,  ...,  0.0963, -0.0208,  0.2402]]),\n",
       " 'model.bert.encoder.layer.8.attention.self.key.bias': tensor([ 2.9551, -1.1248, -3.5568,  ...,  1.5374, -2.5847, -0.8095]),\n",
       " 'model.bert.encoder.layer.8.attention.self.value.weight': tensor([[ 0.0809,  0.0361, -0.0201,  ..., -0.0110, -0.0551,  0.0019],\n",
       "         [ 0.0088,  0.0182,  0.0811,  ..., -0.0544, -0.2887, -0.2327],\n",
       "         [ 0.0900,  0.0281, -0.1068,  ..., -0.1430, -0.2143, -0.0739],\n",
       "         ...,\n",
       "         [ 0.1118, -0.0942, -0.0281,  ...,  0.0064,  0.0798, -0.1170],\n",
       "         [ 0.0520,  0.0734,  0.0112,  ..., -0.0137,  0.0538, -0.0390],\n",
       "         [-0.0625, -0.0246,  0.0936,  ...,  0.1234,  0.0180,  0.0999]]),\n",
       " 'model.bert.encoder.layer.8.attention.self.value.bias': tensor([-0.0249, -0.0741, -0.0231,  ...,  0.0842,  0.0190, -0.0694]),\n",
       " 'model.bert.encoder.layer.8.attention.output.dense.weight': tensor([[ 0.0172,  0.0447,  0.0894,  ..., -0.0633, -0.1023, -0.1716],\n",
       "         [ 0.1007,  0.1012, -0.1462,  ...,  0.0444, -0.0629,  0.0372],\n",
       "         [-0.0130,  0.0476,  0.0677,  ..., -0.1017,  0.0582,  0.0604],\n",
       "         ...,\n",
       "         [ 0.0383, -0.0494,  0.0109,  ...,  0.0134, -0.0987,  0.0377],\n",
       "         [-0.2338, -0.0101,  0.1016,  ...,  0.0217, -0.0694, -0.0051],\n",
       "         [-0.0478, -0.0076,  0.0081,  ..., -0.0439,  0.0503,  0.0294]]),\n",
       " 'model.bert.encoder.layer.8.attention.output.dense.bias': tensor([-0.1317,  0.1948,  0.0461,  ..., -0.0491,  0.0133, -0.0911]),\n",
       " 'model.bert.encoder.layer.8.intermediate.dense.weight': tensor([[ 0.1673, -0.0482, -0.0675,  ..., -0.1260, -0.0321,  0.0186],\n",
       "         [-0.0478,  0.1665, -0.0540,  ..., -0.0351, -0.0585, -0.1073],\n",
       "         [ 0.0831, -0.1788,  0.0957,  ...,  0.0611, -0.0784,  0.0541],\n",
       "         ...,\n",
       "         [-0.0390, -0.1953, -0.0197,  ...,  0.0740, -0.0839, -0.0580],\n",
       "         [ 0.1644, -0.0775, -0.1504,  ...,  0.0186,  0.1126,  0.1427],\n",
       "         [ 0.0384,  0.0825, -0.0364,  ...,  0.1140, -0.0619,  0.1610]]),\n",
       " 'model.bert.encoder.layer.8.intermediate.dense.bias': tensor([-0.0548, -0.0312, -0.0791,  ..., -0.0260, -0.0987, -0.0811]),\n",
       " 'model.bert.encoder.layer.8.output.dense.weight': tensor([[ 0.1199, -0.0042,  0.0514,  ...,  0.0471, -0.1062, -0.0761],\n",
       "         [-0.0221,  0.2026, -0.0288,  ...,  0.0759,  0.0580,  0.1172],\n",
       "         [-0.0230,  0.0596,  0.0166,  ...,  0.1269, -0.0828, -0.0732],\n",
       "         ...,\n",
       "         [-0.2890,  0.0264, -0.1762,  ..., -0.0860, -0.0926, -0.0023],\n",
       "         [ 0.0121,  0.0104, -0.0774,  ...,  0.0629,  0.0654, -0.0268],\n",
       "         [ 0.0303, -0.2136,  0.1655,  ..., -0.0378,  0.0021, -0.1862]]),\n",
       " 'model.bert.encoder.layer.8.output.dense.bias': tensor([-0.1833, -0.0889, -0.1378,  ..., -0.0487, -0.0247,  0.0789]),\n",
       " 'model.bert.encoder.layer.9.pre_attention_ln.weight': tensor([0.7087, 0.8282, 0.7802,  ..., 0.7344, 0.7812, 0.7903]),\n",
       " 'model.bert.encoder.layer.9.pre_attention_ln.bias': tensor([ 0.0534, -0.0357,  0.0169,  ..., -0.0124, -0.0379, -0.0042]),\n",
       " 'model.bert.encoder.layer.9.post_attention_ln.weight': tensor([0.8000, 0.8123, 0.7551,  ..., 0.7376, 0.7991, 0.8589]),\n",
       " 'model.bert.encoder.layer.9.post_attention_ln.bias': tensor([-0.0247, -0.0199,  0.0023,  ..., -0.0519,  0.0189,  0.0531]),\n",
       " 'model.bert.encoder.layer.9.attention.self.query.weight': tensor([[-4.6172e-02, -1.6275e-01,  9.7038e-02,  ..., -1.3540e-01,\n",
       "          -1.2690e-01, -8.8483e-03],\n",
       "         [ 3.0782e-02, -2.3218e-02,  1.0401e-01,  ...,  6.7769e-02,\n",
       "          -1.0131e-01, -3.5200e-02],\n",
       "         [ 4.2627e-02, -9.5891e-02,  9.4733e-02,  ..., -2.4640e-02,\n",
       "          -2.1079e-01,  7.5593e-05],\n",
       "         ...,\n",
       "         [-9.9984e-02,  3.4562e-02, -2.2078e-02,  ..., -4.7558e-02,\n",
       "          -3.5849e-02,  6.1772e-02],\n",
       "         [ 5.9187e-02, -9.5069e-02,  2.5891e-01,  ...,  6.4695e-02,\n",
       "           6.5676e-02,  5.2262e-02],\n",
       "         [-1.3363e-01,  4.1587e-02,  3.3819e-02,  ..., -4.4365e-02,\n",
       "           1.2973e-02, -6.7478e-02]]),\n",
       " 'model.bert.encoder.layer.9.attention.self.query.bias': tensor([-0.1837, -0.1080,  0.1328,  ..., -0.0278, -0.0704, -0.0645]),\n",
       " 'model.bert.encoder.layer.9.attention.self.key.weight': tensor([[-0.1236, -0.2465,  0.1371,  ...,  0.0017,  0.1778,  0.0908],\n",
       "         [-0.2361, -0.0372, -0.1294,  ...,  0.0384,  0.0098,  0.0926],\n",
       "         [ 0.1523, -0.0863,  0.0943,  ...,  0.0382,  0.0318, -0.1072],\n",
       "         ...,\n",
       "         [-0.1020,  0.0424, -0.0260,  ..., -0.1036, -0.1253, -0.0297],\n",
       "         [-0.0265, -0.0673,  0.0527,  ...,  0.0253,  0.0507,  0.1357],\n",
       "         [-0.3288,  0.0945, -0.0782,  ..., -0.0860,  0.1256,  0.0310]]),\n",
       " 'model.bert.encoder.layer.9.attention.self.key.bias': tensor([ 2.9780,  0.4457,  1.0111,  ..., -0.0195, -0.1159,  0.8443]),\n",
       " 'model.bert.encoder.layer.9.attention.self.value.weight': tensor([[-0.0589, -0.0903,  0.1681,  ..., -0.0389, -0.0243,  0.1329],\n",
       "         [ 0.1383, -0.0945, -0.0234,  ..., -0.0182,  0.0647,  0.0141],\n",
       "         [ 0.0943, -0.2865,  0.0863,  ...,  0.0852,  0.1259, -0.0415],\n",
       "         ...,\n",
       "         [ 0.0518, -0.0111,  0.0345,  ..., -0.0296, -0.0378,  0.0369],\n",
       "         [-0.0763,  0.0245, -0.0288,  ...,  0.0959, -0.0035, -0.0026],\n",
       "         [-0.1026,  0.0577, -0.1237,  ...,  0.0116,  0.0062,  0.0096]]),\n",
       " 'model.bert.encoder.layer.9.attention.self.value.bias': tensor([ 0.0389, -0.0279,  0.0036,  ..., -0.0136, -0.0135, -0.0568]),\n",
       " 'model.bert.encoder.layer.9.attention.output.dense.weight': tensor([[-0.1357,  0.1599,  0.1005,  ...,  0.1234, -0.0016,  0.1207],\n",
       "         [-0.1382, -0.0381, -0.1692,  ...,  0.1624,  0.0888,  0.0067],\n",
       "         [ 0.0275,  0.0925,  0.0561,  ..., -0.0069,  0.0452,  0.1370],\n",
       "         ...,\n",
       "         [ 0.0174,  0.0220,  0.2208,  ..., -0.1425, -0.0845, -0.0336],\n",
       "         [-0.0417, -0.0029,  0.2779,  ..., -0.0517,  0.0656,  0.0060],\n",
       "         [ 0.0824,  0.0081,  0.1008,  ...,  0.0264, -0.0491,  0.0040]]),\n",
       " 'model.bert.encoder.layer.9.attention.output.dense.bias': tensor([-0.1066,  0.2899,  0.0459,  ..., -0.0868,  0.1126,  0.0431]),\n",
       " 'model.bert.encoder.layer.9.intermediate.dense.weight': tensor([[ 0.0573, -0.0088, -0.1570,  ...,  0.0849, -0.0335, -0.0249],\n",
       "         [-0.0206, -0.0769, -0.1139,  ...,  0.1173, -0.0586, -0.0216],\n",
       "         [ 0.1858, -0.1097,  0.0425,  ...,  0.1064, -0.1572,  0.0120],\n",
       "         ...,\n",
       "         [ 0.1403, -0.0369,  0.0355,  ...,  0.0711,  0.0564, -0.0763],\n",
       "         [-0.0737, -0.1922,  0.1466,  ...,  0.1989,  0.0536, -0.1287],\n",
       "         [ 0.0559,  0.0609, -0.0549,  ..., -0.1989, -0.2542, -0.0130]]),\n",
       " 'model.bert.encoder.layer.9.intermediate.dense.bias': tensor([-0.0971, -0.1010, -0.0522,  ..., -0.0342, -0.0906, -0.0765]),\n",
       " 'model.bert.encoder.layer.9.output.dense.weight': tensor([[-0.0976, -0.1275, -0.1157,  ...,  0.1180, -0.1140, -0.1739],\n",
       "         [-0.1181, -0.1028,  0.0869,  ..., -0.0464, -0.1241,  0.1113],\n",
       "         [-0.0755, -0.0380, -0.0543,  ...,  0.1625, -0.0278, -0.1544],\n",
       "         ...,\n",
       "         [-0.2364, -0.2328, -0.1529,  ...,  0.0092, -0.2219, -0.1448],\n",
       "         [ 0.0256,  0.0213,  0.0916,  ..., -0.0171,  0.0376, -0.0812],\n",
       "         [-0.1195,  0.0532, -0.0196,  ...,  0.0616, -0.1387,  0.0719]]),\n",
       " 'model.bert.encoder.layer.9.output.dense.bias': tensor([-0.0855, -0.1170, -0.1114,  ..., -0.0369, -0.0051, -0.0242]),\n",
       " 'model.bert.encoder.layer.10.pre_attention_ln.weight': tensor([0.7441, 0.8168, 0.8178,  ..., 0.7860, 0.8193, 0.8108]),\n",
       " 'model.bert.encoder.layer.10.pre_attention_ln.bias': tensor([ 0.0456, -0.0337,  0.0123,  ..., -0.0031, -0.0235, -0.0161]),\n",
       " 'model.bert.encoder.layer.10.post_attention_ln.weight': tensor([0.9208, 0.8665, 0.8295,  ..., 0.7952, 0.8477, 0.8724]),\n",
       " 'model.bert.encoder.layer.10.post_attention_ln.bias': tensor([-0.0660,  0.0302, -0.0170,  ..., -0.0192,  0.0140,  0.0428]),\n",
       " 'model.bert.encoder.layer.10.attention.self.query.weight': tensor([[ 0.0449, -0.0070, -0.0008,  ..., -0.0125, -0.0558,  0.0760],\n",
       "         [ 0.0754,  0.1567,  0.0213,  ...,  0.0112,  0.1875,  0.0176],\n",
       "         [-0.0702, -0.0168, -0.0749,  ...,  0.0594,  0.0151, -0.0207],\n",
       "         ...,\n",
       "         [ 0.0276,  0.0004,  0.0454,  ..., -0.0703,  0.1394,  0.0598],\n",
       "         [ 0.0777, -0.1989,  0.1058,  ..., -0.0168, -0.0266,  0.0644],\n",
       "         [-0.0010,  0.0291,  0.0578,  ...,  0.0553, -0.1353, -0.1867]]),\n",
       " 'model.bert.encoder.layer.10.attention.self.query.bias': tensor([-0.0260, -0.0312,  0.0105,  ..., -0.0035, -0.0650, -0.0167]),\n",
       " 'model.bert.encoder.layer.10.attention.self.key.weight': tensor([[-0.0498, -0.0159, -0.1838,  ...,  0.1460, -0.0836,  0.0125],\n",
       "         [ 0.2208,  0.0855,  0.1315,  ..., -0.1576,  0.0355, -0.0401],\n",
       "         [ 0.1016,  0.0035, -0.2681,  ...,  0.1232, -0.2143, -0.0271],\n",
       "         ...,\n",
       "         [ 0.0314,  0.1286, -0.0910,  ...,  0.0955,  0.1141, -0.0875],\n",
       "         [ 0.0224,  0.0534, -0.0119,  ...,  0.2063,  0.2237,  0.1301],\n",
       "         [-0.1161, -0.1224, -0.0730,  ..., -0.1469,  0.0762, -0.0280]]),\n",
       " 'model.bert.encoder.layer.10.attention.self.key.bias': tensor([-0.2203,  0.0581, -0.1308,  ..., -0.2343, -0.1637, -0.9808]),\n",
       " 'model.bert.encoder.layer.10.attention.self.value.weight': tensor([[-0.0430,  0.1603, -0.0930,  ..., -0.0635, -0.0527,  0.1333],\n",
       "         [ 0.0665, -0.0636,  0.0212,  ...,  0.0054, -0.0148,  0.0216],\n",
       "         [ 0.1310,  0.0460,  0.0609,  ..., -0.2326,  0.0190,  0.0257],\n",
       "         ...,\n",
       "         [-0.0258,  0.0493,  0.1417,  ..., -0.1081,  0.1071,  0.0291],\n",
       "         [-0.1478, -0.1769,  0.0237,  ...,  0.1142, -0.0263, -0.0309],\n",
       "         [-0.2373, -0.0670, -0.0101,  ..., -0.0377, -0.0489, -0.1417]]),\n",
       " 'model.bert.encoder.layer.10.attention.self.value.bias': tensor([ 0.0603, -0.0306, -0.0330,  ..., -0.0020, -0.0133, -0.0054]),\n",
       " 'model.bert.encoder.layer.10.attention.output.dense.weight': tensor([[ 0.0441,  0.0328, -0.1336,  ...,  0.0415,  0.1234,  0.0385],\n",
       "         [ 0.0303, -0.0624, -0.0592,  ..., -0.0810,  0.0931, -0.0928],\n",
       "         [-0.0306,  0.0648, -0.0988,  ..., -0.0719,  0.1000, -0.1444],\n",
       "         ...,\n",
       "         [-0.0119,  0.0709,  0.0659,  ...,  0.1701, -0.0294,  0.0004],\n",
       "         [ 0.0217, -0.0319, -0.1585,  ..., -0.0672,  0.0134,  0.0763],\n",
       "         [-0.1330,  0.1117,  0.0229,  ..., -0.0473,  0.0092,  0.1051]]),\n",
       " 'model.bert.encoder.layer.10.attention.output.dense.bias': tensor([-0.0149,  0.1799,  0.0388,  ..., -0.1555,  0.0749,  0.1314]),\n",
       " 'model.bert.encoder.layer.10.intermediate.dense.weight': tensor([[ 0.1298,  0.0386, -0.0753,  ...,  0.0889,  0.0643, -0.0997],\n",
       "         [ 0.1193,  0.1186,  0.0699,  ...,  0.0371, -0.0117,  0.0310],\n",
       "         [ 0.0250, -0.0459, -0.0753,  ..., -0.2041,  0.1115,  0.1042],\n",
       "         ...,\n",
       "         [-0.0591,  0.0411, -0.0130,  ...,  0.1270, -0.1506,  0.1130],\n",
       "         [ 0.2572, -0.0453, -0.0923,  ...,  0.0730,  0.1949,  0.0571],\n",
       "         [ 0.2541,  0.1520,  0.0950,  ...,  0.0442,  0.0963, -0.0658]]),\n",
       " 'model.bert.encoder.layer.10.intermediate.dense.bias': tensor([-0.0345, -0.0466, -0.0751,  ..., -0.0951, -0.1077, -0.0673]),\n",
       " 'model.bert.encoder.layer.10.output.dense.weight': tensor([[ 0.0845, -0.0691, -0.0142,  ..., -0.0485,  0.0355,  0.2013],\n",
       "         [ 0.0032, -0.1479,  0.0073,  ..., -0.0218, -0.1746,  0.0610],\n",
       "         [ 0.0169, -0.1014, -0.1160,  ...,  0.1496, -0.0038, -0.1567],\n",
       "         ...,\n",
       "         [ 0.1189,  0.1308, -0.1806,  ...,  0.0089,  0.1907,  0.0187],\n",
       "         [ 0.1142,  0.2163,  0.0239,  ..., -0.1764,  0.1329, -0.0639],\n",
       "         [-0.1635, -0.0405,  0.0286,  ...,  0.1438,  0.0729, -0.0761]]),\n",
       " 'model.bert.encoder.layer.10.output.dense.bias': tensor([-0.2150,  0.0306, -0.0775,  ..., -0.1004, -0.0194,  0.0590]),\n",
       " 'model.bert.encoder.layer.11.pre_attention_ln.weight': tensor([0.8146, 0.8625, 0.8937,  ..., 0.8286, 0.8613, 0.9011]),\n",
       " 'model.bert.encoder.layer.11.pre_attention_ln.bias': tensor([ 0.0271, -0.0321,  0.0118,  ..., -0.0026, -0.0277, -0.0110]),\n",
       " 'model.bert.encoder.layer.11.post_attention_ln.weight': tensor([1.0010, 0.8814, 0.8806,  ..., 0.8310, 0.8832, 0.9029]),\n",
       " 'model.bert.encoder.layer.11.post_attention_ln.bias': tensor([-0.1427,  0.0456, -0.0315,  ..., -0.0420,  0.0449,  0.0437]),\n",
       " 'model.bert.encoder.layer.11.attention.self.query.weight': tensor([[-0.2482,  0.0684,  0.0117,  ...,  0.0452,  0.0860,  0.1068],\n",
       "         [-0.1681,  0.0865,  0.0246,  ..., -0.0420,  0.0634,  0.0490],\n",
       "         [ 0.0555, -0.0184,  0.0800,  ..., -0.0686, -0.0142,  0.1016],\n",
       "         ...,\n",
       "         [ 0.0822, -0.0164,  0.2318,  ...,  0.0068,  0.0565,  0.0465],\n",
       "         [-0.2154, -0.0661, -0.0332,  ..., -0.0412,  0.1142, -0.1487],\n",
       "         [ 0.0224, -0.0515, -0.0850,  ...,  0.0369, -0.2469, -0.1124]]),\n",
       " 'model.bert.encoder.layer.11.attention.self.query.bias': tensor([ 0.0680,  0.5834, -0.1620,  ..., -0.0065,  0.0481, -0.0193]),\n",
       " 'model.bert.encoder.layer.11.attention.self.key.weight': tensor([[ 0.0183,  0.1965,  0.0359,  ...,  0.1129,  0.0962,  0.1659],\n",
       "         [ 0.1673,  0.0160,  0.1574,  ...,  0.0336,  0.0060, -0.1349],\n",
       "         [ 0.1268, -0.1232,  0.0325,  ..., -0.0676, -0.1882, -0.1044],\n",
       "         ...,\n",
       "         [ 0.0074, -0.0796,  0.0776,  ...,  0.0883, -0.0209, -0.0846],\n",
       "         [-0.1545,  0.1221, -0.0550,  ...,  0.2054, -0.1238, -0.2743],\n",
       "         [ 0.0370, -0.0956,  0.0887,  ..., -0.1082, -0.1604,  0.0283]]),\n",
       " 'model.bert.encoder.layer.11.attention.self.key.bias': tensor([ 0.3142,  1.7389,  0.0309,  ..., -0.1190,  0.0050,  0.0085]),\n",
       " 'model.bert.encoder.layer.11.attention.self.value.weight': tensor([[-0.0337, -0.0222,  0.1306,  ...,  0.0650,  0.0606, -0.0170],\n",
       "         [ 0.1197, -0.0337, -0.0384,  ...,  0.0613, -0.0807, -0.0304],\n",
       "         [-0.0477, -0.0924, -0.2458,  ...,  0.0349, -0.0918, -0.0301],\n",
       "         ...,\n",
       "         [-0.0359, -0.0575, -0.1119,  ...,  0.0084, -0.1924,  0.0495],\n",
       "         [ 0.0965,  0.0993,  0.0188,  ...,  0.0371,  0.0555,  0.0848],\n",
       "         [-0.0038,  0.1052, -0.0944,  ...,  0.1565, -0.2138,  0.0043]]),\n",
       " 'model.bert.encoder.layer.11.attention.self.value.bias': tensor([ 0.0350,  0.0108,  0.0027,  ..., -0.0245,  0.0215, -0.0039]),\n",
       " 'model.bert.encoder.layer.11.attention.output.dense.weight': tensor([[ 0.0333, -0.1784,  0.1489,  ...,  0.0285, -0.0087,  0.0347],\n",
       "         [-0.1026, -0.0095,  0.1012,  ..., -0.0578, -0.0006, -0.1668],\n",
       "         [-0.0248, -0.0369, -0.0064,  ...,  0.0804,  0.0314,  0.1488],\n",
       "         ...,\n",
       "         [-0.1329, -0.1101,  0.0532,  ..., -0.0542,  0.0444, -0.0483],\n",
       "         [ 0.1567, -0.0040,  0.1073,  ...,  0.1220,  0.0232,  0.1610],\n",
       "         [ 0.0014, -0.0729, -0.1608,  ...,  0.0590, -0.1131,  0.1565]]),\n",
       " 'model.bert.encoder.layer.11.attention.output.dense.bias': tensor([-0.0093,  0.0747,  0.0057,  ..., -0.0078,  0.1264, -0.0196]),\n",
       " 'model.bert.encoder.layer.11.intermediate.dense.weight': tensor([[ 0.2183,  0.1550,  0.0366,  ...,  0.2082, -0.0131,  0.1853],\n",
       "         [-0.0029,  0.1555, -0.0534,  ...,  0.1083, -0.0428, -0.0063],\n",
       "         [-0.0267, -0.0631, -0.1796,  ..., -0.1147,  0.0588,  0.0089],\n",
       "         ...,\n",
       "         [ 0.2314,  0.1686,  0.1216,  ...,  0.0165, -0.0270,  0.0480],\n",
       "         [ 0.0084, -0.0223, -0.0189,  ..., -0.0520, -0.1120, -0.0773],\n",
       "         [-0.0683, -0.0242,  0.0827,  ..., -0.3004, -0.0970, -0.1602]]),\n",
       " 'model.bert.encoder.layer.11.intermediate.dense.bias': tensor([-0.0783,  0.0086,  0.0220,  ..., -0.0951,  0.0038, -0.1229]),\n",
       " 'model.bert.encoder.layer.11.output.dense.weight': tensor([[ 0.1291,  0.0895,  0.0346,  ...,  0.1097,  0.0639,  0.1355],\n",
       "         [ 0.0501, -0.1273,  0.0770,  ..., -0.0621,  0.0953,  0.0553],\n",
       "         [-0.0844, -0.0465,  0.1277,  ...,  0.1627, -0.0199, -0.0556],\n",
       "         ...,\n",
       "         [-0.0536, -0.0186,  0.0886,  ...,  0.1223, -0.0363, -0.0330],\n",
       "         [-0.1903, -0.0851, -0.1252,  ..., -0.2012,  0.0948, -0.0309],\n",
       "         [ 0.1673, -0.0622, -0.0184,  ..., -0.1392, -0.0398, -0.1173]]),\n",
       " 'model.bert.encoder.layer.11.output.dense.bias': tensor([-0.0765,  0.0149, -0.0804,  ..., -0.0804, -0.0194,  0.0201]),\n",
       " 'model.bert.encoder.layer.12.pre_attention_ln.weight': tensor([0.8113, 0.8457, 0.8469,  ..., 0.8748, 0.8905, 0.8829]),\n",
       " 'model.bert.encoder.layer.12.pre_attention_ln.bias': tensor([ 0.0492, -0.0497,  0.0179,  ...,  0.0060, -0.0291, -0.0137]),\n",
       " 'model.bert.encoder.layer.12.post_attention_ln.weight': tensor([1.0281, 0.8681, 0.8719,  ..., 0.8641, 0.9229, 0.8848]),\n",
       " 'model.bert.encoder.layer.12.post_attention_ln.bias': tensor([-0.1541,  0.0290, -0.0669,  ..., -0.0207,  0.0234,  0.0366]),\n",
       " 'model.bert.encoder.layer.12.attention.self.query.weight': tensor([[-0.1457, -0.1166,  0.1663,  ..., -0.2227, -0.0168, -0.0810],\n",
       "         [-0.0889,  0.0051,  0.1184,  ...,  0.0693, -0.0824,  0.1198],\n",
       "         [-0.0657, -0.0656,  0.0424,  ..., -0.0434, -0.2099, -0.0235],\n",
       "         ...,\n",
       "         [ 0.0175, -0.1529,  0.0975,  ...,  0.0741, -0.0778, -0.0853],\n",
       "         [-0.0217, -0.0179, -0.0134,  ..., -0.0742, -0.0417,  0.0497],\n",
       "         [-0.0188, -0.1419, -0.0273,  ...,  0.0327, -0.0276, -0.0942]]),\n",
       " 'model.bert.encoder.layer.12.attention.self.query.bias': tensor([ 0.0279, -0.0108, -0.0677,  ...,  0.2682, -0.1964, -0.4149]),\n",
       " 'model.bert.encoder.layer.12.attention.self.key.weight': tensor([[-0.1203,  0.0404,  0.1925,  ..., -0.2751, -0.0449, -0.1679],\n",
       "         [-0.1139, -0.2020,  0.1561,  ..., -0.0203, -0.1183,  0.2051],\n",
       "         [-0.1115,  0.0129,  0.1533,  ..., -0.0784, -0.0619, -0.0889],\n",
       "         ...,\n",
       "         [ 0.0610, -0.0141,  0.0698,  ...,  0.0599, -0.0585, -0.0696],\n",
       "         [-0.2020,  0.1352,  0.1414,  ..., -0.1526, -0.0480,  0.0093],\n",
       "         [-0.2335,  0.0116, -0.0833,  ..., -0.0898,  0.1133,  0.0138]]),\n",
       " 'model.bert.encoder.layer.12.attention.self.key.bias': tensor([-0.0446,  0.2117,  0.4948,  ...,  1.0281, -0.4460, -1.7175]),\n",
       " 'model.bert.encoder.layer.12.attention.self.value.weight': tensor([[ 0.0211,  0.0324,  0.1502,  ..., -0.0389,  0.0007, -0.0734],\n",
       "         [-0.0492,  0.0714,  0.0381,  ..., -0.0068,  0.0217, -0.0502],\n",
       "         [-0.0836, -0.0579,  0.1766,  ..., -0.1488,  0.1188, -0.0874],\n",
       "         ...,\n",
       "         [ 0.0790, -0.0494, -0.0506,  ..., -0.0275, -0.0311, -0.1112],\n",
       "         [-0.0282,  0.0737,  0.0416,  ..., -0.0181, -0.1288,  0.1329],\n",
       "         [-0.0478, -0.0430,  0.2635,  ...,  0.1759,  0.1561, -0.0424]]),\n",
       " 'model.bert.encoder.layer.12.attention.self.value.bias': tensor([ 0.0193,  0.0021, -0.0077,  ...,  0.0520,  0.0248,  0.0077]),\n",
       " 'model.bert.encoder.layer.12.attention.output.dense.weight': tensor([[-4.7768e-03, -1.3721e-02,  4.6485e-02,  ...,  8.1139e-02,\n",
       "           1.2363e-01, -3.5134e-02],\n",
       "         [ 3.4323e-02, -6.1689e-02,  1.2284e-02,  ..., -1.3429e-01,\n",
       "           1.2307e-02, -6.0366e-02],\n",
       "         [ 7.7121e-02,  5.9733e-02, -7.9360e-03,  ...,  3.1122e-02,\n",
       "          -1.0068e-01,  1.3582e-01],\n",
       "         ...,\n",
       "         [ 5.2998e-02,  8.2817e-02,  2.2877e-01,  ...,  5.7306e-02,\n",
       "           3.5715e-02,  1.0424e-01],\n",
       "         [-5.1574e-02,  1.0288e-01,  6.4839e-02,  ..., -4.0948e-02,\n",
       "           3.6301e-02,  9.2250e-02],\n",
       "         [-2.6445e-02,  5.3316e-02,  6.6939e-05,  ..., -8.9491e-03,\n",
       "          -5.5107e-02,  4.6305e-02]]),\n",
       " 'model.bert.encoder.layer.12.attention.output.dense.bias': tensor([ 0.0808,  0.2645,  0.0892,  ..., -0.0080,  0.1068,  0.0975]),\n",
       " 'model.bert.encoder.layer.12.intermediate.dense.weight': tensor([[-0.0106,  0.0656,  0.0302,  ...,  0.1007, -0.0440, -0.0848],\n",
       "         [-0.0526, -0.0850, -0.1515,  ...,  0.0023,  0.0091, -0.1402],\n",
       "         [-0.0242,  0.1533,  0.0445,  ..., -0.0076,  0.0079, -0.1248],\n",
       "         ...,\n",
       "         [-0.0731, -0.0722, -0.0428,  ..., -0.0970, -0.0732, -0.1285],\n",
       "         [-0.0961, -0.1555,  0.0893,  ..., -0.0465, -0.1399, -0.1841],\n",
       "         [ 0.0773, -0.0896,  0.0047,  ..., -0.0649,  0.0466, -0.0891]]),\n",
       " 'model.bert.encoder.layer.12.intermediate.dense.bias': tensor([-0.0909, -0.1060, -0.0259,  ..., -0.0882, -0.0688, -0.0462]),\n",
       " 'model.bert.encoder.layer.12.output.dense.weight': tensor([[ 1.3170e-03, -3.8467e-02,  6.2072e-02,  ..., -4.7453e-02,\n",
       "           1.1839e-03, -1.5600e-02],\n",
       "         [-3.1699e-02,  6.0405e-02, -6.9528e-02,  ..., -3.0969e-01,\n",
       "          -1.0542e-01,  1.5516e-02],\n",
       "         [ 6.3799e-02,  8.1330e-02, -1.0133e-01,  ...,  6.8739e-02,\n",
       "           1.2135e-01,  1.1305e-05],\n",
       "         ...,\n",
       "         [ 7.3308e-02,  3.2692e-02,  2.4972e-02,  ..., -8.5747e-02,\n",
       "           1.9207e-02,  1.0024e-01],\n",
       "         [-1.8679e-02, -8.1249e-02,  5.3596e-02,  ..., -7.0230e-02,\n",
       "           1.1362e-01,  1.3609e-02],\n",
       "         [ 1.4634e-01,  6.3207e-02,  2.9433e-02,  ..., -1.1235e-01,\n",
       "          -2.7144e-01,  7.5100e-02]]),\n",
       " 'model.bert.encoder.layer.12.output.dense.bias': tensor([-0.3146,  0.0115, -0.0796,  ..., -0.1222,  0.1317,  0.1203]),\n",
       " 'model.bert.encoder.layer.13.pre_attention_ln.weight': tensor([0.8385, 0.8403, 0.8627,  ..., 0.8683, 0.8941, 0.9083]),\n",
       " 'model.bert.encoder.layer.13.pre_attention_ln.bias': tensor([ 0.0306, -0.0398,  0.0005,  ..., -0.0050, -0.0286, -0.0015]),\n",
       " 'model.bert.encoder.layer.13.post_attention_ln.weight': tensor([1.0745, 0.9138, 0.9042,  ..., 0.8762, 0.9802, 0.9295]),\n",
       " 'model.bert.encoder.layer.13.post_attention_ln.bias': tensor([-0.1811,  0.0502, -0.0743,  ..., -0.0512,  0.0561,  0.0287]),\n",
       " 'model.bert.encoder.layer.13.attention.self.query.weight': tensor([[-0.0677, -0.0050,  0.0210,  ...,  0.1256,  0.0757, -0.0628],\n",
       "         [ 0.0107, -0.1124,  0.0955,  ...,  0.0250, -0.2457,  0.1428],\n",
       "         [-0.0415,  0.0987, -0.0657,  ..., -0.0388,  0.0503, -0.0418],\n",
       "         ...,\n",
       "         [ 0.2366,  0.0403,  0.0704,  ...,  0.0043,  0.0639, -0.0280],\n",
       "         [ 0.0726, -0.0102, -0.1339,  ...,  0.0327, -0.0961, -0.0302],\n",
       "         [ 0.0643,  0.0104,  0.0456,  ..., -0.0450,  0.1381,  0.0305]]),\n",
       " 'model.bert.encoder.layer.13.attention.self.query.bias': tensor([ 0.1719, -0.0599,  0.3137,  ..., -0.1917,  0.0722,  0.0316]),\n",
       " 'model.bert.encoder.layer.13.attention.self.key.weight': tensor([[ 9.7959e-02,  1.4952e-02,  8.1247e-02,  ..., -1.1870e-01,\n",
       "          -1.9339e-01,  1.2655e-01],\n",
       "         [ 1.6152e-01, -2.0892e-01, -7.8917e-02,  ..., -1.9837e-02,\n",
       "           6.8013e-02,  1.5942e-01],\n",
       "         [-1.2921e-02,  1.4213e-04, -2.7614e-02,  ...,  1.6764e-02,\n",
       "          -1.1254e-01, -1.7264e-01],\n",
       "         ...,\n",
       "         [-5.1602e-02,  1.0981e-01,  6.8771e-02,  ...,  7.2972e-02,\n",
       "          -1.2168e-01, -1.1237e-02],\n",
       "         [-2.3335e-03, -9.5785e-02, -1.5582e-01,  ...,  9.3368e-02,\n",
       "          -6.6878e-02,  8.5124e-02],\n",
       "         [ 2.7300e-01,  9.2883e-02,  1.5769e-01,  ..., -7.4772e-02,\n",
       "           1.2429e-01,  7.8705e-02]]),\n",
       " 'model.bert.encoder.layer.13.attention.self.key.bias': tensor([-3.6815,  0.5397, -3.6688,  ...,  3.1355, -1.7297, -1.4660]),\n",
       " 'model.bert.encoder.layer.13.attention.self.value.weight': tensor([[-0.0707,  0.1650,  0.0474,  ...,  0.0377,  0.0528,  0.0370],\n",
       "         [ 0.0703,  0.0552,  0.1374,  ...,  0.1028,  0.0922,  0.0206],\n",
       "         [-0.1031,  0.0070, -0.0321,  ...,  0.0039, -0.1998,  0.1060],\n",
       "         ...,\n",
       "         [ 0.1607, -0.1330, -0.0091,  ..., -0.0142, -0.0692, -0.1693],\n",
       "         [-0.1066,  0.2555,  0.1276,  ..., -0.0262,  0.0971,  0.1295],\n",
       "         [-0.0187,  0.0241, -0.1761,  ...,  0.0252, -0.0746, -0.0863]]),\n",
       " 'model.bert.encoder.layer.13.attention.self.value.bias': tensor([-0.0004, -0.0184,  0.0101,  ..., -0.0357, -0.0303,  0.0091]),\n",
       " 'model.bert.encoder.layer.13.attention.output.dense.weight': tensor([[ 0.0633,  0.1741, -0.0645,  ..., -0.2039, -0.0405, -0.0955],\n",
       "         [ 0.1012,  0.1446,  0.1104,  ..., -0.0285,  0.0884,  0.0521],\n",
       "         [ 0.0323, -0.1955, -0.0078,  ...,  0.0177, -0.0824,  0.0406],\n",
       "         ...,\n",
       "         [ 0.0559, -0.0714,  0.0401,  ..., -0.0965,  0.0029, -0.0012],\n",
       "         [-0.0629, -0.0406,  0.0541,  ..., -0.0093,  0.0893,  0.1599],\n",
       "         [ 0.0736,  0.1152,  0.0348,  ...,  0.0130,  0.0639,  0.0903]]),\n",
       " 'model.bert.encoder.layer.13.attention.output.dense.bias': tensor([-0.1016,  0.1501,  0.1882,  ..., -0.0097,  0.0808, -0.0177]),\n",
       " 'model.bert.encoder.layer.13.intermediate.dense.weight': tensor([[ 0.0977, -0.0544, -0.0668,  ..., -0.0008,  0.0661, -0.1320],\n",
       "         [ 0.0855, -0.0472,  0.1989,  ...,  0.0753, -0.1014, -0.0613],\n",
       "         [ 0.0634,  0.0190,  0.0528,  ...,  0.0029, -0.0853,  0.1998],\n",
       "         ...,\n",
       "         [ 0.0647,  0.0701, -0.1841,  ...,  0.1701, -0.0341,  0.0416],\n",
       "         [ 0.1218,  0.1222,  0.1691,  ...,  0.0015, -0.0247,  0.0043],\n",
       "         [ 0.3190, -0.0308,  0.2199,  ..., -0.0400, -0.1699, -0.0297]]),\n",
       " 'model.bert.encoder.layer.13.intermediate.dense.bias': tensor([-0.0902, -0.1197, -0.0840,  ..., -0.0163,  0.0174, -0.0874]),\n",
       " 'model.bert.encoder.layer.13.output.dense.weight': tensor([[-0.0134, -0.0775, -0.1294,  ...,  0.1092, -0.0654,  0.1060],\n",
       "         [-0.2160, -0.0523, -0.0442,  ...,  0.0014, -0.0312,  0.0074],\n",
       "         [-0.1060,  0.0842,  0.1284,  ...,  0.0942, -0.1065,  0.0031],\n",
       "         ...,\n",
       "         [ 0.0329, -0.0615,  0.1138,  ..., -0.1397, -0.0701, -0.0694],\n",
       "         [-0.0045,  0.0867,  0.0667,  ..., -0.0654, -0.0094,  0.0337],\n",
       "         [-0.1991, -0.1254, -0.0035,  ...,  0.0333, -0.0896, -0.0492]]),\n",
       " 'model.bert.encoder.layer.13.output.dense.bias': tensor([-0.1609,  0.0591, -0.0745,  ..., -0.0611,  0.0429, -0.0075]),\n",
       " 'model.bert.encoder.layer.14.pre_attention_ln.weight': tensor([0.8586, 0.8654, 0.8891,  ..., 0.9078, 0.9177, 0.9691]),\n",
       " 'model.bert.encoder.layer.14.pre_attention_ln.bias': tensor([ 0.0395, -0.0476,  0.0097,  ...,  0.0056, -0.0248, -0.0099]),\n",
       " 'model.bert.encoder.layer.14.post_attention_ln.weight': tensor([1.0426, 0.9197, 0.8998,  ..., 0.8910, 0.9803, 0.9513]),\n",
       " 'model.bert.encoder.layer.14.post_attention_ln.bias': tensor([-0.1448,  0.0696, -0.0605,  ..., -0.0231,  0.0438,  0.0278]),\n",
       " 'model.bert.encoder.layer.14.attention.self.query.weight': tensor([[-0.0693,  0.2241, -0.0559,  ...,  0.0085, -0.1803, -0.1319],\n",
       "         [ 0.1460,  0.0035,  0.0176,  ...,  0.0719, -0.0538,  0.0452],\n",
       "         [-0.1616, -0.0735,  0.1236,  ..., -0.0143, -0.0385,  0.0678],\n",
       "         ...,\n",
       "         [-0.0021, -0.0091,  0.0122,  ...,  0.0801,  0.0302,  0.0418],\n",
       "         [ 0.1764,  0.1538,  0.0277,  ..., -0.0566, -0.0866, -0.0713],\n",
       "         [ 0.0758, -0.1101,  0.0807,  ..., -0.0470,  0.0279,  0.0454]]),\n",
       " 'model.bert.encoder.layer.14.attention.self.query.bias': tensor([ 0.0060, -0.1143, -0.1751,  ...,  0.1203, -0.0047, -0.0212]),\n",
       " 'model.bert.encoder.layer.14.attention.self.key.weight': tensor([[ 0.1015, -0.0588, -0.0223,  ...,  0.1375, -0.0357, -0.1204],\n",
       "         [ 0.1741, -0.1254,  0.1417,  ..., -0.0883,  0.0775,  0.0597],\n",
       "         [ 0.0107, -0.0367, -0.1958,  ..., -0.0713, -0.1647, -0.0422],\n",
       "         ...,\n",
       "         [ 0.1378,  0.0063,  0.0691,  ...,  0.0028,  0.0205, -0.0896],\n",
       "         [ 0.1539,  0.0096,  0.0187,  ..., -0.1769, -0.0187, -0.2270],\n",
       "         [ 0.2025,  0.0250, -0.0097,  ...,  0.0222, -0.0840, -0.0774]]),\n",
       " 'model.bert.encoder.layer.14.attention.self.key.bias': tensor([-0.7638, -0.3483,  0.0561,  ..., -0.3098, -0.2340, -0.1439]),\n",
       " 'model.bert.encoder.layer.14.attention.self.value.weight': tensor([[ 0.0107, -0.0113,  0.0242,  ..., -0.1195,  0.0919,  0.0321],\n",
       "         [ 0.1140, -0.0472,  0.0122,  ...,  0.0182, -0.0569, -0.1398],\n",
       "         [-0.1949, -0.0136,  0.0838,  ...,  0.0483,  0.1225,  0.2804],\n",
       "         ...,\n",
       "         [-0.0538, -0.0197, -0.1091,  ..., -0.1316,  0.0899,  0.0402],\n",
       "         [ 0.0944,  0.2354, -0.0144,  ...,  0.1441,  0.0296, -0.0504],\n",
       "         [ 0.1046,  0.1255,  0.2269,  ..., -0.1181, -0.1531,  0.0507]]),\n",
       " 'model.bert.encoder.layer.14.attention.self.value.bias': tensor([-0.0399, -0.0128,  0.0057,  ..., -0.0029,  0.0090, -0.0223]),\n",
       " 'model.bert.encoder.layer.14.attention.output.dense.weight': tensor([[ 0.0343,  0.0062,  0.1249,  ...,  0.0782, -0.2055,  0.0022],\n",
       "         [-0.0621, -0.0575, -0.0080,  ..., -0.1298,  0.0583, -0.0434],\n",
       "         [-0.0039, -0.0467, -0.0048,  ..., -0.0325,  0.0025,  0.1087],\n",
       "         ...,\n",
       "         [ 0.0368,  0.0159, -0.1167,  ...,  0.0229, -0.1180,  0.0568],\n",
       "         [-0.0438,  0.0725, -0.3067,  ..., -0.0136, -0.0055, -0.1418],\n",
       "         [-0.0028, -0.0287,  0.0754,  ...,  0.0870, -0.1553, -0.0842]]),\n",
       " 'model.bert.encoder.layer.14.attention.output.dense.bias': tensor([-0.1012,  0.2324,  0.1146,  ..., -0.1385,  0.0489, -0.0716]),\n",
       " 'model.bert.encoder.layer.14.intermediate.dense.weight': tensor([[ 0.0785, -0.2940,  0.1177,  ...,  0.0907, -0.0735,  0.1565],\n",
       "         [-0.0329, -0.0293,  0.0821,  ...,  0.0145,  0.0086, -0.0693],\n",
       "         [ 0.1819, -0.2040,  0.0341,  ..., -0.0905, -0.0806,  0.0120],\n",
       "         ...,\n",
       "         [ 0.1322,  0.0342, -0.0471,  ...,  0.1141,  0.1074, -0.1470],\n",
       "         [ 0.0598,  0.0381, -0.1406,  ...,  0.2105,  0.1167, -0.0329],\n",
       "         [ 0.1439,  0.2103, -0.0251,  ...,  0.0487,  0.0700,  0.0702]]),\n",
       " 'model.bert.encoder.layer.14.intermediate.dense.bias': tensor([-0.0616, -0.0359, -0.0804,  ..., -0.0266, -0.0905, -0.0105]),\n",
       " 'model.bert.encoder.layer.14.output.dense.weight': tensor([[ 0.1384,  0.1124,  0.1543,  ...,  0.0310,  0.1782,  0.0586],\n",
       "         [ 0.3080,  0.0277, -0.0754,  ..., -0.0270,  0.3044, -0.1316],\n",
       "         [ 0.1900, -0.0733, -0.0662,  ...,  0.0038, -0.0125,  0.0238],\n",
       "         ...,\n",
       "         [-0.0418,  0.0730, -0.0203,  ...,  0.1287,  0.2174, -0.1071],\n",
       "         [ 0.1536,  0.1057, -0.0939,  ...,  0.0080, -0.0221,  0.0705],\n",
       "         [-0.2413,  0.0568,  0.0551,  ...,  0.0931,  0.1136, -0.0126]]),\n",
       " 'model.bert.encoder.layer.14.output.dense.bias': tensor([-0.1690,  0.0557, -0.1016,  ..., -0.0185,  0.1808,  0.1204]),\n",
       " 'model.bert.encoder.layer.15.pre_attention_ln.weight': tensor([0.8321, 0.8823, 0.8532,  ..., 0.9156, 0.8988, 0.9313]),\n",
       " 'model.bert.encoder.layer.15.pre_attention_ln.bias': tensor([ 4.0149e-02, -4.6244e-02,  8.7725e-03,  ...,  9.2416e-07,\n",
       "         -2.3694e-02, -1.0816e-02]),\n",
       " 'model.bert.encoder.layer.15.post_attention_ln.weight': tensor([1.0391, 0.9099, 0.9175,  ..., 0.8945, 0.9792, 0.9594]),\n",
       " 'model.bert.encoder.layer.15.post_attention_ln.bias': tensor([-0.1633,  0.0510, -0.0480,  ..., -0.0160,  0.0861,  0.0211]),\n",
       " 'model.bert.encoder.layer.15.attention.self.query.weight': tensor([[ 0.0851,  0.1692,  0.0767,  ..., -0.0482,  0.0055, -0.0968],\n",
       "         [-0.0340,  0.0557, -0.0782,  ..., -0.0254, -0.0444, -0.0909],\n",
       "         [ 0.1210, -0.2367, -0.0766,  ..., -0.0925,  0.1976,  0.1170],\n",
       "         ...,\n",
       "         [ 0.1756, -0.0014,  0.1056,  ...,  0.1477,  0.1040, -0.0674],\n",
       "         [ 0.2122, -0.0256, -0.0220,  ...,  0.2298, -0.0389, -0.0310],\n",
       "         [ 0.0304, -0.0590,  0.0736,  ..., -0.1472, -0.1027,  0.0401]]),\n",
       " 'model.bert.encoder.layer.15.attention.self.query.bias': tensor([ 0.0175, -0.2908,  0.1528,  ..., -0.1336, -0.0505,  0.1853]),\n",
       " 'model.bert.encoder.layer.15.attention.self.key.weight': tensor([[ 0.0151,  0.0009,  0.0117,  ..., -0.1319,  0.0457,  0.0271],\n",
       "         [-0.1504,  0.0915,  0.0489,  ..., -0.0170, -0.0353, -0.0691],\n",
       "         [ 0.0055, -0.1014,  0.0084,  ..., -0.0136,  0.0953,  0.0994],\n",
       "         ...,\n",
       "         [ 0.0944,  0.1105, -0.0309,  ..., -0.0931,  0.0359, -0.0397],\n",
       "         [ 0.0431,  0.0467,  0.0222,  ...,  0.1407, -0.2144,  0.1549],\n",
       "         [-0.1240, -0.0325, -0.0942,  ..., -0.2648,  0.0502,  0.0698]]),\n",
       " 'model.bert.encoder.layer.15.attention.self.key.bias': tensor([-1.3579,  7.9965, -1.5125,  ...,  0.4817,  0.6754, -0.6126]),\n",
       " 'model.bert.encoder.layer.15.attention.self.value.weight': tensor([[ 0.0725,  0.0412, -0.1069,  ...,  0.0468, -0.2403,  0.0172],\n",
       "         [ 0.2060, -0.1077,  0.1385,  ..., -0.0685, -0.1210, -0.0596],\n",
       "         [ 0.0920, -0.0265, -0.0653,  ...,  0.0835,  0.0181, -0.1092],\n",
       "         ...,\n",
       "         [-0.0197,  0.0628,  0.2879,  ..., -0.0296, -0.0302, -0.0058],\n",
       "         [-0.1079,  0.2359, -0.1775,  ..., -0.0227, -0.0384, -0.1305],\n",
       "         [-0.0221,  0.1612, -0.0220,  ...,  0.0115, -0.0545,  0.1529]]),\n",
       " 'model.bert.encoder.layer.15.attention.self.value.bias': tensor([ 0.0166, -0.0030, -0.0027,  ...,  0.0322,  0.0378, -0.0470]),\n",
       " 'model.bert.encoder.layer.15.attention.output.dense.weight': tensor([[-0.0811,  0.0269,  0.0163,  ...,  0.0556, -0.0546,  0.1045],\n",
       "         [-0.0099,  0.1703,  0.0039,  ...,  0.0184,  0.0093, -0.1094],\n",
       "         [-0.0534,  0.1229, -0.0134,  ..., -0.1882, -0.0775,  0.0193],\n",
       "         ...,\n",
       "         [ 0.1269, -0.1176,  0.0599,  ...,  0.1630,  0.0977, -0.0285],\n",
       "         [ 0.1199,  0.0288,  0.0286,  ...,  0.0545, -0.0087, -0.1066],\n",
       "         [-0.0992, -0.0752,  0.0876,  ..., -0.0053, -0.2189, -0.0274]]),\n",
       " 'model.bert.encoder.layer.15.attention.output.dense.bias': tensor([-0.1165,  0.2721,  0.1146,  ..., -0.0392, -0.0481, -0.1193]),\n",
       " 'model.bert.encoder.layer.15.intermediate.dense.weight': tensor([[ 0.1164, -0.0200,  0.1184,  ..., -0.0950, -0.1114, -0.0857],\n",
       "         [-0.0235,  0.0116, -0.0948,  ...,  0.0716, -0.1524,  0.0272],\n",
       "         [ 0.1593, -0.1016, -0.0357,  ..., -0.0233, -0.0468, -0.1470],\n",
       "         ...,\n",
       "         [ 0.1101,  0.0773,  0.0063,  ...,  0.1215, -0.1351, -0.2060],\n",
       "         [ 0.0348, -0.0166,  0.1117,  ..., -0.1433, -0.0550,  0.0226],\n",
       "         [ 0.1020, -0.0493,  0.1388,  ...,  0.1546, -0.0472, -0.0192]]),\n",
       " 'model.bert.encoder.layer.15.intermediate.dense.bias': tensor([-0.0863, -0.0937, -0.0946,  ..., -0.0456,  0.0748, -0.0044]),\n",
       " 'model.bert.encoder.layer.15.output.dense.weight': tensor([[ 0.0041,  0.0073, -0.1013,  ..., -0.1552,  0.0112, -0.0756],\n",
       "         [ 0.0243,  0.0189,  0.0959,  ..., -0.1031, -0.0050, -0.1240],\n",
       "         [-0.0348,  0.0978, -0.0979,  ...,  0.1275, -0.1252, -0.0287],\n",
       "         ...,\n",
       "         [ 0.0484,  0.0678, -0.0225,  ...,  0.0398,  0.2256, -0.1213],\n",
       "         [-0.1007, -0.1183,  0.2650,  ...,  0.0743, -0.0262,  0.1421],\n",
       "         [ 0.0192,  0.1467,  0.0662,  ...,  0.0650, -0.0098,  0.1323]]),\n",
       " 'model.bert.encoder.layer.15.output.dense.bias': tensor([-0.1670,  0.0664, -0.0565,  ..., -0.1340,  0.0859,  0.0037]),\n",
       " 'model.bert.encoder.layer.16.pre_attention_ln.weight': tensor([0.9119, 0.9411, 0.9602,  ..., 0.9973, 0.9904, 1.0130]),\n",
       " 'model.bert.encoder.layer.16.pre_attention_ln.bias': tensor([ 0.0374, -0.0476,  0.0030,  ...,  0.0037, -0.0293, -0.0114]),\n",
       " 'model.bert.encoder.layer.16.post_attention_ln.weight': tensor([1.0174, 0.9841, 0.9345,  ..., 0.9307, 1.0161, 1.0055]),\n",
       " 'model.bert.encoder.layer.16.post_attention_ln.bias': tensor([-0.1130,  0.1093, -0.0383,  ..., -0.0133,  0.0817,  0.0014]),\n",
       " 'model.bert.encoder.layer.16.attention.self.query.weight': tensor([[ 0.0029,  0.0500,  0.2076,  ..., -0.0959,  0.2065,  0.1442],\n",
       "         [ 0.1054,  0.0107, -0.1997,  ...,  0.0008,  0.0389, -0.1509],\n",
       "         [-0.2629,  0.1308, -0.0484,  ...,  0.0195, -0.1038,  0.0999],\n",
       "         ...,\n",
       "         [ 0.1676,  0.1247, -0.1249,  ...,  0.0793, -0.1134,  0.0549],\n",
       "         [-0.1261, -0.1860, -0.0122,  ..., -0.1729, -0.0401,  0.0133],\n",
       "         [ 0.0409,  0.0207,  0.0347,  ..., -0.0471, -0.0450,  0.0970]]),\n",
       " 'model.bert.encoder.layer.16.attention.self.query.bias': tensor([-0.2736, -0.0009, -0.0840,  ...,  0.0958,  0.0146,  0.1059]),\n",
       " 'model.bert.encoder.layer.16.attention.self.key.weight': tensor([[-0.1918,  0.0450,  0.0770,  ..., -0.0307,  0.1978,  0.0386],\n",
       "         [-0.0222,  0.0566, -0.1572,  ..., -0.0216, -0.0563, -0.1394],\n",
       "         [-0.1424,  0.0775, -0.0335,  ..., -0.0115,  0.0090,  0.1376],\n",
       "         ...,\n",
       "         [-0.0093, -0.0392, -0.0591,  ...,  0.0979, -0.0966,  0.1467],\n",
       "         [-0.3273,  0.0521,  0.0217,  ..., -0.0606,  0.1542, -0.0452],\n",
       "         [ 0.0194, -0.0274, -0.2024,  ..., -0.0449,  0.1177, -0.0202]]),\n",
       " 'model.bert.encoder.layer.16.attention.self.key.bias': tensor([-4.5236, -0.8743, -0.4762,  ..., -0.1451,  0.1551, -0.4072]),\n",
       " 'model.bert.encoder.layer.16.attention.self.value.weight': tensor([[ 0.0066,  0.1588,  0.1545,  ...,  0.0816, -0.0459,  0.0049],\n",
       "         [-0.0004, -0.0646,  0.1448,  ...,  0.1020, -0.1759, -0.0137],\n",
       "         [-0.0483,  0.1010, -0.1173,  ..., -0.2261,  0.1121, -0.1963],\n",
       "         ...,\n",
       "         [-0.0924,  0.0949,  0.1450,  ..., -0.0300,  0.0943,  0.0128],\n",
       "         [-0.0891,  0.1530, -0.1562,  ...,  0.0165,  0.0983,  0.0555],\n",
       "         [-0.0807, -0.0367, -0.0342,  ...,  0.0199, -0.1743,  0.0235]]),\n",
       " 'model.bert.encoder.layer.16.attention.self.value.bias': tensor([ 0.0184,  0.0129,  0.0112,  ..., -0.0246,  0.0108,  0.0167]),\n",
       " 'model.bert.encoder.layer.16.attention.output.dense.weight': tensor([[-0.0677, -0.0187, -0.1382,  ...,  0.2224,  0.0921,  0.2083],\n",
       "         [ 0.2232,  0.0865,  0.0407,  ..., -0.1549, -0.1217, -0.0538],\n",
       "         [ 0.1386,  0.0190, -0.0305,  ..., -0.1239,  0.1392,  0.1066],\n",
       "         ...,\n",
       "         [ 0.1749,  0.0293, -0.0863,  ...,  0.0273, -0.2320, -0.0362],\n",
       "         [-0.0172, -0.0157,  0.0195,  ...,  0.0089, -0.0786, -0.1040],\n",
       "         [ 0.0604, -0.1487, -0.2836,  ..., -0.0490,  0.0057, -0.0055]]),\n",
       " 'model.bert.encoder.layer.16.attention.output.dense.bias': tensor([-0.0011,  0.1938,  0.0967,  ..., -0.0081,  0.0330, -0.0623]),\n",
       " 'model.bert.encoder.layer.16.intermediate.dense.weight': tensor([[ 0.1191, -0.1167,  0.1181,  ...,  0.0171,  0.0564,  0.1427],\n",
       "         [ 0.1096, -0.0646, -0.0812,  ...,  0.0328,  0.0021,  0.0594],\n",
       "         [ 0.0405, -0.1326, -0.0394,  ...,  0.0109,  0.0183, -0.0572],\n",
       "         ...,\n",
       "         [ 0.0333, -0.0614,  0.0692,  ...,  0.1024,  0.0461,  0.0201],\n",
       "         [ 0.1870,  0.1129, -0.1483,  ...,  0.0615, -0.0263,  0.0062],\n",
       "         [ 0.0140, -0.0547, -0.2267,  ...,  0.1319, -0.0897, -0.0156]]),\n",
       " 'model.bert.encoder.layer.16.intermediate.dense.bias': tensor([-0.0892, -0.0714, -0.0048,  ..., -0.0621, -0.0406, -0.1063]),\n",
       " 'model.bert.encoder.layer.16.output.dense.weight': tensor([[-0.0981, -0.0973, -0.0759,  ...,  0.0394,  0.1147, -0.2120],\n",
       "         [-0.0261, -0.0902, -0.0061,  ...,  0.0386, -0.0927,  0.0107],\n",
       "         [-0.2998,  0.0049,  0.1262,  ..., -0.0327,  0.0441,  0.1207],\n",
       "         ...,\n",
       "         [-0.2312, -0.0409,  0.0156,  ...,  0.0065, -0.1331, -0.1583],\n",
       "         [-0.0496, -0.0133, -0.0617,  ..., -0.0984, -0.1879,  0.0962],\n",
       "         [ 0.0366,  0.1567,  0.0857,  ..., -0.0094, -0.0587,  0.0544]]),\n",
       " 'model.bert.encoder.layer.16.output.dense.bias': tensor([-0.1038,  0.0909,  0.0435,  ..., -0.0814,  0.1327,  0.1007]),\n",
       " 'model.bert.encoder.layer.17.pre_attention_ln.weight': tensor([0.8842, 0.9118, 0.9334,  ..., 0.9469, 0.9762, 0.9958]),\n",
       " 'model.bert.encoder.layer.17.pre_attention_ln.bias': tensor([ 0.0443, -0.0647,  0.0047,  ...,  0.0065, -0.0250, -0.0079]),\n",
       " 'model.bert.encoder.layer.17.post_attention_ln.weight': tensor([1.0831, 0.9526, 0.9574,  ..., 0.9461, 1.0274, 1.0437]),\n",
       " 'model.bert.encoder.layer.17.post_attention_ln.bias': tensor([-0.1235,  0.0496, -0.0516,  ..., -0.0197,  0.0746,  0.0046]),\n",
       " 'model.bert.encoder.layer.17.attention.self.query.weight': tensor([[-0.0360, -0.1460, -0.0407,  ..., -0.0046, -0.0342, -0.0537],\n",
       "         [-0.0866,  0.3488, -0.2468,  ..., -0.0918,  0.0158, -0.1000],\n",
       "         [ 0.0295, -0.0313, -0.0098,  ...,  0.0206,  0.1786,  0.1191],\n",
       "         ...,\n",
       "         [-0.0781,  0.0051,  0.0693,  ...,  0.1854, -0.0182, -0.1813],\n",
       "         [ 0.0717,  0.0251, -0.1037,  ..., -0.0444, -0.0227, -0.0740],\n",
       "         [ 0.0886, -0.0006, -0.1355,  ..., -0.0779,  0.2908,  0.0395]]),\n",
       " 'model.bert.encoder.layer.17.attention.self.query.bias': tensor([ 0.0409, -0.0501, -0.1045,  ...,  0.1132,  0.1695, -0.0838]),\n",
       " 'model.bert.encoder.layer.17.attention.self.key.weight': tensor([[-0.1308,  0.0421, -0.0456,  ..., -0.0987, -0.1293, -0.1438],\n",
       "         [-0.1248, -0.0605, -0.2501,  ...,  0.0319,  0.2053,  0.0208],\n",
       "         [ 0.1330, -0.0593, -0.0073,  ..., -0.0818,  0.1396, -0.0910],\n",
       "         ...,\n",
       "         [-0.1639,  0.0294,  0.0257,  ...,  0.0236, -0.0400, -0.2098],\n",
       "         [-0.0275,  0.2039,  0.1433,  ..., -0.0156, -0.1212,  0.1651],\n",
       "         [ 0.0122,  0.0173, -0.1421,  ..., -0.0675,  0.3125,  0.0631]]),\n",
       " 'model.bert.encoder.layer.17.attention.self.key.bias': tensor([-0.0024,  0.1377,  0.1118,  ..., -0.0501, -1.4617,  0.2796]),\n",
       " 'model.bert.encoder.layer.17.attention.self.value.weight': tensor([[ 0.0218, -0.0157, -0.1504,  ..., -0.0584,  0.0889, -0.0392],\n",
       "         [-0.0068,  0.0486, -0.0876,  ..., -0.0079, -0.1139,  0.0159],\n",
       "         [-0.0032, -0.2223,  0.0149,  ..., -0.1365, -0.1740, -0.0980],\n",
       "         ...,\n",
       "         [ 0.0847, -0.0688,  0.0126,  ...,  0.2354,  0.1119,  0.1343],\n",
       "         [ 0.0318,  0.0220, -0.1158,  ..., -0.0420, -0.0746, -0.1940],\n",
       "         [-0.1050, -0.0768, -0.1986,  ...,  0.1942,  0.1866, -0.0081]]),\n",
       " 'model.bert.encoder.layer.17.attention.self.value.bias': tensor([ 0.0023, -0.0111, -0.0390,  ..., -0.0042, -0.0105,  0.0061]),\n",
       " 'model.bert.encoder.layer.17.attention.output.dense.weight': tensor([[ 0.1134, -0.0163, -0.1080,  ...,  0.1440, -0.0524,  0.0932],\n",
       "         [-0.1251,  0.0937,  0.1119,  ..., -0.0333, -0.0197,  0.0135],\n",
       "         [-0.0565,  0.1604, -0.0103,  ...,  0.2115,  0.0844,  0.1197],\n",
       "         ...,\n",
       "         [ 0.0794,  0.0208,  0.2022,  ...,  0.0203, -0.1033, -0.0294],\n",
       "         [ 0.1264,  0.0473,  0.0926,  ...,  0.0289, -0.2210,  0.0255],\n",
       "         [ 0.0285,  0.0070,  0.1945,  ..., -0.0609,  0.0526, -0.0208]]),\n",
       " 'model.bert.encoder.layer.17.attention.output.dense.bias': tensor([-0.0426,  0.1396,  0.0122,  ..., -0.0034,  0.0635, -0.0869]),\n",
       " 'model.bert.encoder.layer.17.intermediate.dense.weight': tensor([[ 0.0601, -0.0740,  0.1437,  ..., -0.0076, -0.1572,  0.0632],\n",
       "         [ 0.1905, -0.1209, -0.1311,  ..., -0.0096, -0.0715,  0.1271],\n",
       "         [-0.0401, -0.0986, -0.0356,  ...,  0.1947, -0.1740, -0.0280],\n",
       "         ...,\n",
       "         [ 0.0216, -0.0521,  0.0796,  ...,  0.0869, -0.0454, -0.0077],\n",
       "         [-0.0114, -0.0222, -0.1031,  ...,  0.0917, -0.1405,  0.0294],\n",
       "         [ 0.2130,  0.1406,  0.1319,  ...,  0.0607,  0.0988,  0.0571]]),\n",
       " 'model.bert.encoder.layer.17.intermediate.dense.bias': tensor([-0.0559, -0.0500, -0.0869,  ..., -0.0799, -0.0252, -0.0619]),\n",
       " 'model.bert.encoder.layer.17.output.dense.weight': tensor([[-0.0021, -0.1021, -0.0140,  ..., -0.1568, -0.0940,  0.0394],\n",
       "         [ 0.0224,  0.1130,  0.1767,  ..., -0.1660,  0.1805, -0.0944],\n",
       "         [-0.2370, -0.0954,  0.1080,  ..., -0.0981, -0.0189, -0.0128],\n",
       "         ...,\n",
       "         [ 0.0136,  0.0732, -0.1087,  ...,  0.0803, -0.1904, -0.0474],\n",
       "         [-0.0870,  0.0896,  0.0942,  ...,  0.0753,  0.0627, -0.0511],\n",
       "         [ 0.0147, -0.0479,  0.0627,  ...,  0.0424, -0.0466, -0.0214]]),\n",
       " 'model.bert.encoder.layer.17.output.dense.bias': tensor([-0.0873,  0.1396,  0.0021,  ..., -0.0972,  0.1171,  0.0471]),\n",
       " 'model.bert.encoder.layer.18.pre_attention_ln.weight': tensor([0.9258, 0.9502, 0.9386,  ..., 0.9645, 0.9887, 0.9999]),\n",
       " 'model.bert.encoder.layer.18.pre_attention_ln.bias': tensor([ 0.0464, -0.0651,  0.0028,  ...,  0.0015, -0.0246, -0.0146]),\n",
       " 'model.bert.encoder.layer.18.post_attention_ln.weight': tensor([1.0205, 0.9975, 0.9411,  ..., 0.9320, 1.0141, 1.0284]),\n",
       " 'model.bert.encoder.layer.18.post_attention_ln.bias': tensor([-0.0863,  0.0709, -0.0310,  ...,  0.0127,  0.0565, -0.0049]),\n",
       " 'model.bert.encoder.layer.18.attention.self.query.weight': tensor([[ 0.0310, -0.0779, -0.0352,  ...,  0.0371, -0.0523, -0.1340],\n",
       "         [ 0.0012, -0.0035, -0.0479,  ...,  0.0548, -0.0641,  0.0086],\n",
       "         [-0.1419, -0.0306,  0.0009,  ..., -0.0045,  0.1187, -0.0744],\n",
       "         ...,\n",
       "         [ 0.0809,  0.1140, -0.2442,  ...,  0.0551, -0.0509,  0.1998],\n",
       "         [ 0.1183,  0.0650, -0.0190,  ..., -0.1669, -0.1469,  0.1058],\n",
       "         [-0.0318,  0.0959,  0.0694,  ...,  0.0744,  0.0289,  0.1352]]),\n",
       " 'model.bert.encoder.layer.18.attention.self.query.bias': tensor([ 0.2263,  0.1948,  0.3219,  ..., -0.0465, -0.0160, -0.0200]),\n",
       " 'model.bert.encoder.layer.18.attention.self.key.weight': tensor([[-0.0753,  0.0309,  0.0148,  ...,  0.0275, -0.0625, -0.2576],\n",
       "         [ 0.0259, -0.0438, -0.0591,  ...,  0.0123,  0.0401, -0.0646],\n",
       "         [-0.0322,  0.0450,  0.1566,  ..., -0.0004, -0.0176, -0.2571],\n",
       "         ...,\n",
       "         [-0.0452, -0.0533, -0.0531,  ...,  0.2321, -0.2226, -0.0136],\n",
       "         [ 0.0493, -0.0211, -0.0832,  ..., -0.2363,  0.0130,  0.1021],\n",
       "         [-0.2447, -0.0301,  0.0035,  ...,  0.0134,  0.1027,  0.0799]]),\n",
       " 'model.bert.encoder.layer.18.attention.self.key.bias': tensor([8.3722e-01, 2.7167e+00, 1.7189e+00,  ..., 6.6905e-01, 3.5783e-01,\n",
       "         1.7904e-03]),\n",
       " 'model.bert.encoder.layer.18.attention.self.value.weight': tensor([[ 0.3113,  0.2134,  0.0081,  ...,  0.0109, -0.0280, -0.2294],\n",
       "         [ 0.1399,  0.1444,  0.1256,  ...,  0.0299, -0.2062, -0.0864],\n",
       "         [ 0.0817,  0.0243, -0.0379,  ...,  0.1685, -0.0769, -0.1682],\n",
       "         ...,\n",
       "         [ 0.1547,  0.0346,  0.0196,  ..., -0.0265, -0.0377, -0.1769],\n",
       "         [ 0.0243, -0.0695,  0.0994,  ..., -0.1511, -0.0389,  0.0800],\n",
       "         [ 0.1547,  0.0127, -0.2160,  ..., -0.0354,  0.1194, -0.1357]]),\n",
       " 'model.bert.encoder.layer.18.attention.self.value.bias': tensor([-0.0078, -0.0196,  0.0208,  ..., -0.0178, -0.0114, -0.0347]),\n",
       " 'model.bert.encoder.layer.18.attention.output.dense.weight': tensor([[ 0.1558,  0.0042, -0.1177,  ...,  0.0751,  0.0220, -0.0324],\n",
       "         [ 0.2265,  0.0445, -0.2203,  ..., -0.0582, -0.1743,  0.0144],\n",
       "         [-0.1002,  0.0277, -0.1724,  ...,  0.0067, -0.2346, -0.0182],\n",
       "         ...,\n",
       "         [-0.0595, -0.0922,  0.1356,  ..., -0.0409, -0.0542,  0.0543],\n",
       "         [-0.0277,  0.0638, -0.1285,  ...,  0.0163, -0.0570, -0.1055],\n",
       "         [-0.0111, -0.0647,  0.0355,  ...,  0.0864,  0.0906,  0.1019]]),\n",
       " 'model.bert.encoder.layer.18.attention.output.dense.bias': tensor([-0.0271,  0.1389,  0.1743,  ..., -0.0165,  0.0525, -0.0627]),\n",
       " 'model.bert.encoder.layer.18.intermediate.dense.weight': tensor([[ 0.0105, -0.1214,  0.0308,  ..., -0.0294,  0.0325,  0.0439],\n",
       "         [ 0.0947, -0.1191,  0.1357,  ...,  0.1673, -0.0043,  0.0434],\n",
       "         [ 0.0448, -0.0937,  0.1270,  ..., -0.0989, -0.1403, -0.0258],\n",
       "         ...,\n",
       "         [-0.0349, -0.2065, -0.0460,  ...,  0.1417, -0.0834, -0.0319],\n",
       "         [ 0.1176,  0.0196, -0.1495,  ..., -0.0910,  0.0083,  0.0055],\n",
       "         [-0.0170, -0.1171,  0.0962,  ...,  0.0511, -0.0429, -0.0582]]),\n",
       " 'model.bert.encoder.layer.18.intermediate.dense.bias': tensor([-0.0753, -0.1310, -0.0154,  ..., -0.0612,  0.0445, -0.0197]),\n",
       " 'model.bert.encoder.layer.18.output.dense.weight': tensor([[-0.1160, -0.0410,  0.0811,  ..., -0.0712,  0.0033,  0.0464],\n",
       "         [ 0.1045,  0.1970,  0.1048,  ...,  0.0451, -0.1182, -0.0263],\n",
       "         [-0.2994,  0.0536, -0.0563,  ...,  0.0928,  0.2251, -0.1077],\n",
       "         ...,\n",
       "         [ 0.0562,  0.0543,  0.1313,  ...,  0.1363,  0.0421, -0.0723],\n",
       "         [-0.0148, -0.1793,  0.0252,  ...,  0.0402, -0.1364,  0.0153],\n",
       "         [ 0.0940, -0.2147,  0.0618,  ...,  0.2170, -0.1821, -0.0499]]),\n",
       " 'model.bert.encoder.layer.18.output.dense.bias': tensor([-0.2466,  0.0944, -0.1031,  ..., -0.0028,  0.1107,  0.0799]),\n",
       " 'model.bert.encoder.layer.19.pre_attention_ln.weight': tensor([0.9222, 0.9607, 0.9559,  ..., 0.9649, 1.0100, 1.0513]),\n",
       " 'model.bert.encoder.layer.19.pre_attention_ln.bias': tensor([ 0.0595, -0.0774, -0.0026,  ...,  0.0163, -0.0215, -0.0093]),\n",
       " 'model.bert.encoder.layer.19.post_attention_ln.weight': tensor([0.9705, 0.9800, 0.9418,  ..., 0.9120, 1.0005, 1.0366]),\n",
       " 'model.bert.encoder.layer.19.post_attention_ln.bias': tensor([-0.0173,  0.0889, -0.0629,  ...,  0.0001,  0.0272, -0.0202]),\n",
       " 'model.bert.encoder.layer.19.attention.self.query.weight': tensor([[ 0.1296, -0.1457, -0.1811,  ..., -0.0680, -0.1391,  0.0493],\n",
       "         [ 0.2067, -0.0231, -0.0188,  ...,  0.0930, -0.0957, -0.0753],\n",
       "         [ 0.1653,  0.0030, -0.0096,  ..., -0.1372,  0.0925,  0.0247],\n",
       "         ...,\n",
       "         [-0.1045, -0.0491,  0.0837,  ...,  0.0992,  0.0205, -0.0191],\n",
       "         [-0.0018,  0.0162, -0.1774,  ...,  0.0747, -0.1377, -0.0185],\n",
       "         [-0.2395, -0.1130,  0.1708,  ..., -0.1186,  0.0615,  0.1327]]),\n",
       " 'model.bert.encoder.layer.19.attention.self.query.bias': tensor([ 0.1516,  0.0819,  0.0241,  ...,  0.0433, -0.0884, -0.0151]),\n",
       " 'model.bert.encoder.layer.19.attention.self.key.weight': tensor([[-0.0235,  0.0049, -0.1676,  ...,  0.1123, -0.1197,  0.1305],\n",
       "         [ 0.0204,  0.0206,  0.0869,  ..., -0.0395,  0.0174, -0.0591],\n",
       "         [ 0.1564,  0.0015,  0.0534,  ...,  0.0650,  0.0422, -0.1409],\n",
       "         ...,\n",
       "         [-0.0168,  0.1096, -0.0270,  ..., -0.1257,  0.0871, -0.1046],\n",
       "         [ 0.0325,  0.0740, -0.1429,  ...,  0.0502, -0.0341, -0.2555],\n",
       "         [-0.0456, -0.0446,  0.0471,  ..., -0.0559, -0.0061, -0.0852]]),\n",
       " 'model.bert.encoder.layer.19.attention.self.key.bias': tensor([-5.8558, 14.0717,  1.6930,  ...,  0.6271, -0.4204, -0.1756]),\n",
       " 'model.bert.encoder.layer.19.attention.self.value.weight': tensor([[ 0.0927, -0.1787,  0.0924,  ..., -0.0874,  0.1762, -0.0123],\n",
       "         [ 0.0030,  0.0032,  0.0278,  ...,  0.0343, -0.1458,  0.0525],\n",
       "         [-0.2039,  0.0106,  0.0660,  ..., -0.0542, -0.1410,  0.1535],\n",
       "         ...,\n",
       "         [-0.0023, -0.1544,  0.0792,  ..., -0.0751,  0.1240,  0.1044],\n",
       "         [-0.2390,  0.1287,  0.1271,  ...,  0.0667,  0.1291,  0.0634],\n",
       "         [-0.1050,  0.0193, -0.1307,  ...,  0.0209, -0.1912,  0.1186]]),\n",
       " 'model.bert.encoder.layer.19.attention.self.value.bias': tensor([ 0.0026,  0.0007, -0.0149,  ...,  0.0126,  0.0080,  0.0023]),\n",
       " 'model.bert.encoder.layer.19.attention.output.dense.weight': tensor([[-0.0303,  0.0291,  0.1839,  ...,  0.1199,  0.2144,  0.0015],\n",
       "         [ 0.0923, -0.1332, -0.1495,  ...,  0.1309, -0.2300, -0.0670],\n",
       "         [-0.0100, -0.0692, -0.1686,  ...,  0.0028, -0.1281,  0.1880],\n",
       "         ...,\n",
       "         [-0.0161, -0.2129,  0.0112,  ..., -0.0290,  0.0446,  0.0942],\n",
       "         [-0.0218,  0.0124,  0.0351,  ..., -0.1810, -0.0916,  0.1753],\n",
       "         [ 0.0108, -0.1831, -0.1358,  ..., -0.0644, -0.1280, -0.0825]]),\n",
       " 'model.bert.encoder.layer.19.attention.output.dense.bias': tensor([-0.1362,  0.1693,  0.0874,  ..., -0.0554,  0.0113,  0.0606]),\n",
       " 'model.bert.encoder.layer.19.intermediate.dense.weight': tensor([[ 0.0696,  0.1456, -0.0191,  ..., -0.0359,  0.0609,  0.0867],\n",
       "         [ 0.1247, -0.0281,  0.0474,  ...,  0.0024,  0.0255,  0.0853],\n",
       "         [ 0.0371, -0.1927,  0.1456,  ..., -0.0733,  0.0835,  0.1014],\n",
       "         ...,\n",
       "         [ 0.1061, -0.1040,  0.1008,  ...,  0.0646, -0.0417, -0.1476],\n",
       "         [ 0.0478, -0.1147,  0.0163,  ...,  0.0383,  0.0962, -0.2046],\n",
       "         [ 0.0983, -0.0235,  0.0798,  ...,  0.0034, -0.1474,  0.0007]]),\n",
       " 'model.bert.encoder.layer.19.intermediate.dense.bias': tensor([-0.1202, -0.0257, -0.1196,  ..., -0.0106, -0.0764, -0.0489]),\n",
       " 'model.bert.encoder.layer.19.output.dense.weight': tensor([[ 0.1744, -0.0241,  0.1611,  ..., -0.0050,  0.0615, -0.1749],\n",
       "         [ 0.0025, -0.0848,  0.1428,  ..., -0.0715, -0.0729, -0.1070],\n",
       "         [ 0.0827,  0.0151, -0.2155,  ..., -0.0477,  0.0400, -0.1112],\n",
       "         ...,\n",
       "         [ 0.0540, -0.0027, -0.1195,  ...,  0.0172,  0.0291,  0.0158],\n",
       "         [-0.0102, -0.0317, -0.1422,  ...,  0.0816,  0.0397, -0.1602],\n",
       "         [-0.0261, -0.0713, -0.0713,  ...,  0.0505,  0.2718, -0.1143]]),\n",
       " 'model.bert.encoder.layer.19.output.dense.bias': tensor([-0.1072,  0.0085, -0.1485,  ...,  0.0113,  0.0805,  0.1625]),\n",
       " 'model.bert.encoder.layer.20.pre_attention_ln.weight': tensor([0.9542, 0.9532, 0.9768,  ..., 0.9796, 1.0282, 1.0441]),\n",
       " 'model.bert.encoder.layer.20.pre_attention_ln.bias': tensor([ 0.0675, -0.0845,  0.0124,  ...,  0.0178, -0.0179,  0.0045]),\n",
       " 'model.bert.encoder.layer.20.post_attention_ln.weight': tensor([0.9136, 0.8873, 0.8749,  ..., 0.8742, 0.9326, 0.9687]),\n",
       " 'model.bert.encoder.layer.20.post_attention_ln.bias': tensor([ 0.0121,  0.0041, -0.0522,  ...,  0.0468,  0.0138, -0.0182]),\n",
       " 'model.bert.encoder.layer.20.attention.self.query.weight': tensor([[ 0.1588, -0.0459, -0.0351,  ..., -0.0210, -0.0505, -0.0436],\n",
       "         [-0.1826, -0.0866,  0.0928,  ...,  0.1794,  0.0473, -0.1753],\n",
       "         [-0.0585,  0.1462, -0.0312,  ..., -0.1244,  0.0190,  0.1130],\n",
       "         ...,\n",
       "         [ 0.0570, -0.0828,  0.0210,  ...,  0.0059,  0.0472,  0.1538],\n",
       "         [-0.1574, -0.0393,  0.0774,  ...,  0.0752, -0.0724,  0.0550],\n",
       "         [ 0.0373,  0.0292,  0.1869,  ...,  0.1230, -0.1303,  0.0610]]),\n",
       " 'model.bert.encoder.layer.20.attention.self.query.bias': tensor([-0.0770,  0.0311,  0.0328,  ..., -0.0282, -0.0783, -0.0670]),\n",
       " 'model.bert.encoder.layer.20.attention.self.key.weight': tensor([[ 0.1128,  0.1020, -0.0547,  ..., -0.0322,  0.0951,  0.0853],\n",
       "         [-0.0682, -0.0408,  0.0007,  ...,  0.1334,  0.0133, -0.0422],\n",
       "         [-0.0240,  0.0269,  0.0981,  ..., -0.0533,  0.0129,  0.0024],\n",
       "         ...,\n",
       "         [-0.0690, -0.1056,  0.0034,  ..., -0.0924,  0.0599,  0.1749],\n",
       "         [-0.0669,  0.0030,  0.0436,  ..., -0.0824,  0.0150, -0.0379],\n",
       "         [ 0.1120, -0.1410,  0.1919,  ...,  0.0406, -0.1202,  0.0602]]),\n",
       " 'model.bert.encoder.layer.20.attention.self.key.bias': tensor([-0.4072,  0.2565, -0.1480,  ..., -0.4832,  0.1389,  0.4233]),\n",
       " 'model.bert.encoder.layer.20.attention.self.value.weight': tensor([[-0.0156, -0.1053, -0.0825,  ...,  0.0575, -0.0340, -0.1101],\n",
       "         [ 0.1972,  0.0494,  0.1378,  ...,  0.0425, -0.2191,  0.0828],\n",
       "         [ 0.1075, -0.0289, -0.0981,  ...,  0.0451,  0.0424, -0.0674],\n",
       "         ...,\n",
       "         [ 0.0305,  0.1049,  0.0711,  ...,  0.0582, -0.0258, -0.1212],\n",
       "         [ 0.0234,  0.0325,  0.0784,  ...,  0.1539,  0.0046,  0.1412],\n",
       "         [-0.0348, -0.1195, -0.1599,  ...,  0.1088,  0.1170, -0.0298]]),\n",
       " 'model.bert.encoder.layer.20.attention.self.value.bias': tensor([-0.0286,  0.0146, -0.0049,  ..., -0.0257, -0.0102, -0.0272]),\n",
       " 'model.bert.encoder.layer.20.attention.output.dense.weight': tensor([[-0.1267, -0.2236, -0.1877,  ...,  0.1167, -0.0913, -0.0459],\n",
       "         [ 0.0461,  0.0152, -0.0667,  ..., -0.1143, -0.0347, -0.0091],\n",
       "         [ 0.0590, -0.1437,  0.0922,  ...,  0.0155, -0.1419, -0.0576],\n",
       "         ...,\n",
       "         [-0.0646,  0.1091, -0.0509,  ...,  0.1334, -0.1191, -0.1449],\n",
       "         [ 0.0356,  0.2162, -0.1444,  ...,  0.0979, -0.1244,  0.0904],\n",
       "         [ 0.0885, -0.1380,  0.0358,  ...,  0.0221, -0.1639,  0.0799]]),\n",
       " 'model.bert.encoder.layer.20.attention.output.dense.bias': tensor([-0.0797,  0.1355,  0.0021,  ..., -0.0875,  0.0458,  0.0380]),\n",
       " 'model.bert.encoder.layer.20.intermediate.dense.weight': tensor([[ 0.1176,  0.1128,  0.0431,  ..., -0.0858,  0.0563, -0.1345],\n",
       "         [ 0.0835,  0.0560,  0.0457,  ...,  0.0685,  0.1070, -0.0862],\n",
       "         [-0.0626,  0.0497, -0.0779,  ...,  0.0895, -0.0911,  0.0819],\n",
       "         ...,\n",
       "         [ 0.1513, -0.0498,  0.1424,  ..., -0.0048, -0.1296,  0.0146],\n",
       "         [ 0.0032,  0.0877, -0.0359,  ...,  0.0366, -0.1069, -0.0432],\n",
       "         [ 0.0139, -0.0091, -0.0409,  ..., -0.0083, -0.1231,  0.0326]]),\n",
       " 'model.bert.encoder.layer.20.intermediate.dense.bias': tensor([-0.1006, -0.1000, -0.0817,  ..., -0.1323, -0.1130,  0.0005]),\n",
       " 'model.bert.encoder.layer.20.output.dense.weight': tensor([[-0.0112, -0.0831,  0.0153,  ...,  0.0081, -0.0845, -0.0905],\n",
       "         [ 0.1048,  0.1173, -0.0190,  ..., -0.1202, -0.0724,  0.0226],\n",
       "         [-0.0501, -0.2921,  0.1153,  ...,  0.1920,  0.0677,  0.0623],\n",
       "         ...,\n",
       "         [-0.0200,  0.2755,  0.0760,  ..., -0.0281,  0.0431, -0.2040],\n",
       "         [ 0.0325,  0.2158,  0.0962,  ..., -0.1282, -0.1906,  0.1387],\n",
       "         [ 0.0762,  0.0336,  0.1600,  ..., -0.1623, -0.0773,  0.0188]]),\n",
       " 'model.bert.encoder.layer.20.output.dense.bias': tensor([ 0.0120, -0.1140, -0.1050,  ..., -0.0203,  0.0763,  0.0463]),\n",
       " 'model.bert.encoder.layer.21.pre_attention_ln.weight': tensor([0.8812, 0.8669, 0.8742,  ..., 0.9330, 0.9186, 0.9726]),\n",
       " 'model.bert.encoder.layer.21.pre_attention_ln.bias': tensor([ 0.0836, -0.1088, -0.0012,  ...,  0.0403, -0.0408, -0.0037]),\n",
       " 'model.bert.encoder.layer.21.post_attention_ln.weight': tensor([0.9393, 0.9213, 0.8716,  ..., 0.9195, 0.9246, 0.9744]),\n",
       " 'model.bert.encoder.layer.21.post_attention_ln.bias': tensor([ 0.0107,  0.0079, -0.0377,  ...,  0.0501,  0.0199,  0.0081]),\n",
       " 'model.bert.encoder.layer.21.attention.self.query.weight': tensor([[-0.0026, -0.1054, -0.0414,  ...,  0.0254,  0.0955,  0.0585],\n",
       "         [-0.0883,  0.0731, -0.0466,  ..., -0.0087,  0.0679,  0.0717],\n",
       "         [-0.1112, -0.1256, -0.0808,  ...,  0.0839,  0.0699,  0.0943],\n",
       "         ...,\n",
       "         [ 0.0792,  0.0951,  0.0202,  ...,  0.0815,  0.0412, -0.0976],\n",
       "         [-0.0123, -0.2483, -0.0125,  ...,  0.1368, -0.0758, -0.0952],\n",
       "         [-0.1445,  0.1322,  0.0583,  ..., -0.2434, -0.0522, -0.0762]]),\n",
       " 'model.bert.encoder.layer.21.attention.self.query.bias': tensor([ 0.0398,  0.0617,  0.0516,  ...,  0.0046, -0.0119,  0.0003]),\n",
       " 'model.bert.encoder.layer.21.attention.self.key.weight': tensor([[ 0.3063, -0.1770, -0.0376,  ..., -0.0324, -0.0230, -0.0332],\n",
       "         [-0.0286,  0.0445, -0.0747,  ..., -0.0096, -0.0287, -0.0235],\n",
       "         [-0.1605, -0.2365,  0.1035,  ...,  0.0322,  0.0889, -0.0903],\n",
       "         ...,\n",
       "         [-0.0073, -0.1153,  0.2424,  ..., -0.0860,  0.0233, -0.0166],\n",
       "         [-0.0380, -0.0161, -0.0484,  ...,  0.0202, -0.1764, -0.1278],\n",
       "         [ 0.0298,  0.0383,  0.2407,  ..., -0.1216,  0.0355,  0.0985]]),\n",
       " 'model.bert.encoder.layer.21.attention.self.key.bias': tensor([ 0.3848, -0.4975,  0.4590,  ...,  2.8623,  2.6003, -2.1227]),\n",
       " 'model.bert.encoder.layer.21.attention.self.value.weight': tensor([[-0.0394, -0.0972, -0.0043,  ...,  0.0898,  0.0539, -0.1057],\n",
       "         [ 0.2298,  0.1070,  0.1473,  ...,  0.0421,  0.0233, -0.1378],\n",
       "         [-0.0168,  0.1139, -0.0073,  ..., -0.0632,  0.0314,  0.1140],\n",
       "         ...,\n",
       "         [-0.0930, -0.0303, -0.1533,  ...,  0.0281,  0.0022, -0.0727],\n",
       "         [-0.0617,  0.0478, -0.1739,  ...,  0.1034,  0.1697, -0.1924],\n",
       "         [-0.1796,  0.0721,  0.0378,  ..., -0.0436, -0.0554, -0.0792]]),\n",
       " 'model.bert.encoder.layer.21.attention.self.value.bias': tensor([-0.0321, -0.0375, -0.0003,  ..., -0.0135,  0.0576, -0.0069]),\n",
       " 'model.bert.encoder.layer.21.attention.output.dense.weight': tensor([[ 0.0985, -0.0151, -0.0449,  ..., -0.0815, -0.0138, -0.0059],\n",
       "         [ 0.1126,  0.1026, -0.0362,  ..., -0.0640,  0.1363, -0.0140],\n",
       "         [ 0.1375, -0.1375,  0.0051,  ...,  0.1450, -0.0450,  0.0575],\n",
       "         ...,\n",
       "         [-0.0235, -0.0317, -0.0675,  ...,  0.1381,  0.0339,  0.0151],\n",
       "         [-0.0134, -0.0395, -0.0747,  ...,  0.2041, -0.0760, -0.1242],\n",
       "         [ 0.0137,  0.0702, -0.0323,  ...,  0.1167, -0.0135,  0.0656]]),\n",
       " 'model.bert.encoder.layer.21.attention.output.dense.bias': tensor([-0.1852,  0.0723, -0.0684,  ...,  0.0695, -0.0023, -0.0688]),\n",
       " 'model.bert.encoder.layer.21.intermediate.dense.weight': tensor([[-0.0401, -0.0710, -0.1336,  ..., -0.0649,  0.0544, -0.1574],\n",
       "         [-0.0216, -0.2315, -0.1395,  ..., -0.0457,  0.0480,  0.1398],\n",
       "         [-0.1781, -0.0828,  0.0740,  ...,  0.0777,  0.0200, -0.0132],\n",
       "         ...,\n",
       "         [-0.1165,  0.0519, -0.0331,  ...,  0.0148, -0.1068, -0.1049],\n",
       "         [ 0.1359, -0.1141, -0.0813,  ...,  0.0946,  0.0102,  0.0400],\n",
       "         [ 0.0367, -0.0374,  0.1286,  ..., -0.0171, -0.1622, -0.1124]]),\n",
       " 'model.bert.encoder.layer.21.intermediate.dense.bias': tensor([-0.0357, -0.0774, -0.1054,  ..., -0.0372, -0.0660, -0.1180]),\n",
       " 'model.bert.encoder.layer.21.output.dense.weight': tensor([[ 0.1201,  0.0722, -0.0361,  ...,  0.0445, -0.0418, -0.0998],\n",
       "         [ 0.1663,  0.0523,  0.0471,  ..., -0.1245,  0.0916,  0.0856],\n",
       "         [ 0.1397, -0.2378,  0.2557,  ...,  0.0574, -0.0369, -0.0264],\n",
       "         ...,\n",
       "         [ 0.0026,  0.0986,  0.1259,  ...,  0.0751, -0.0404,  0.0548],\n",
       "         [-0.0239, -0.1345,  0.1003,  ...,  0.2430, -0.1644, -0.0214],\n",
       "         [-0.2829,  0.1240, -0.0768,  ...,  0.0815, -0.0251, -0.0079]]),\n",
       " 'model.bert.encoder.layer.21.output.dense.bias': tensor([ 0.0810,  0.0042,  0.0331,  ..., -0.0214, -0.0133,  0.0769]),\n",
       " 'model.bert.encoder.layer.22.pre_attention_ln.weight': tensor([0.9292, 0.9178, 0.8916,  ..., 0.9342, 0.9455, 0.9752]),\n",
       " 'model.bert.encoder.layer.22.pre_attention_ln.bias': tensor([ 0.0961, -0.1360,  0.0050,  ...,  0.0257, -0.0156,  0.0049]),\n",
       " 'model.bert.encoder.layer.22.post_attention_ln.weight': tensor([0.9644, 1.0162, 0.8954,  ..., 0.9520, 0.9533, 1.0060]),\n",
       " 'model.bert.encoder.layer.22.post_attention_ln.bias': tensor([ 0.0027,  0.0522, -0.0647,  ...,  0.0497,  0.0009,  0.0237]),\n",
       " 'model.bert.encoder.layer.22.attention.self.query.weight': tensor([[ 0.0637, -0.1133, -0.0211,  ..., -0.0707,  0.0388,  0.0160],\n",
       "         [-0.1057, -0.0853, -0.0109,  ..., -0.1318, -0.0213, -0.0546],\n",
       "         [-0.1337, -0.0061, -0.1130,  ...,  0.0734, -0.0908,  0.0126],\n",
       "         ...,\n",
       "         [-0.1171, -0.0225, -0.0470,  ..., -0.0161,  0.0329,  0.0848],\n",
       "         [-0.1172,  0.0468, -0.0086,  ..., -0.0075, -0.0690,  0.0211],\n",
       "         [ 0.0406,  0.0162, -0.1616,  ...,  0.1483, -0.1252,  0.0858]]),\n",
       " 'model.bert.encoder.layer.22.attention.self.query.bias': tensor([-0.0049,  0.2530,  0.0539,  ...,  0.0333,  0.0235,  0.0053]),\n",
       " 'model.bert.encoder.layer.22.attention.self.key.weight': tensor([[ 0.0844,  0.0801,  0.0334,  ...,  0.1039, -0.0253, -0.0110],\n",
       "         [ 0.1007,  0.0773,  0.0098,  ..., -0.1510, -0.2028, -0.0333],\n",
       "         [-0.0375, -0.1190,  0.0509,  ..., -0.0437, -0.0565,  0.0592],\n",
       "         ...,\n",
       "         [ 0.0377, -0.1859,  0.0439,  ..., -0.0540, -0.0427, -0.0319],\n",
       "         [-0.0166, -0.1337, -0.0832,  ..., -0.0624,  0.0184,  0.1346],\n",
       "         [-0.0628, -0.1374, -0.1448,  ...,  0.1528,  0.1296,  0.0671]]),\n",
       " 'model.bert.encoder.layer.22.attention.self.key.bias': tensor([ 2.2196,  6.3110,  4.1481,  ..., -3.6723, -1.4307, -1.2906]),\n",
       " 'model.bert.encoder.layer.22.attention.self.value.weight': tensor([[-0.1000,  0.0107,  0.0259,  ...,  0.0877, -0.0878, -0.0612],\n",
       "         [ 0.1827, -0.0796, -0.0798,  ...,  0.0785, -0.1062, -0.0559],\n",
       "         [-0.0650, -0.0625, -0.1636,  ...,  0.0314,  0.0615,  0.0203],\n",
       "         ...,\n",
       "         [ 0.1303,  0.0771,  0.0842,  ...,  0.3261, -0.0059, -0.1172],\n",
       "         [-0.1980,  0.0759,  0.0723,  ..., -0.0707, -0.1063,  0.0461],\n",
       "         [-0.1645,  0.1115,  0.0437,  ...,  0.1804, -0.1767,  0.0348]]),\n",
       " 'model.bert.encoder.layer.22.attention.self.value.bias': tensor([ 0.0286,  0.0244, -0.1006,  ...,  0.0155, -0.0388, -0.0477]),\n",
       " 'model.bert.encoder.layer.22.attention.output.dense.weight': tensor([[ 0.0480, -0.1655,  0.0042,  ...,  0.0433,  0.0051, -0.0883],\n",
       "         [-0.0412,  0.0206, -0.0685,  ..., -0.1302,  0.0206, -0.0029],\n",
       "         [ 0.0257, -0.1129, -0.0205,  ...,  0.0234, -0.0290,  0.0953],\n",
       "         ...,\n",
       "         [-0.0384, -0.0370, -0.0580,  ..., -0.2175, -0.0057, -0.2215],\n",
       "         [ 0.0011, -0.1294,  0.0284,  ..., -0.1049, -0.0957,  0.2735],\n",
       "         [ 0.0127, -0.1518,  0.1412,  ...,  0.0820, -0.1339, -0.0075]]),\n",
       " 'model.bert.encoder.layer.22.attention.output.dense.bias': tensor([-0.2980, -0.1472, -0.1705,  ...,  0.0417,  0.1167,  0.0661]),\n",
       " 'model.bert.encoder.layer.22.intermediate.dense.weight': tensor([[-0.0016, -0.0401,  0.0776,  ...,  0.0135, -0.0126,  0.0165],\n",
       "         [ 0.0859, -0.0392, -0.0279,  ...,  0.0378, -0.0494, -0.0287],\n",
       "         [ 0.0387,  0.0074,  0.1904,  ..., -0.0410,  0.1613, -0.0370],\n",
       "         ...,\n",
       "         [-0.1406,  0.0333,  0.0723,  ...,  0.0545,  0.0397, -0.0537],\n",
       "         [-0.1436, -0.0274, -0.0701,  ...,  0.0985, -0.1168,  0.0352],\n",
       "         [ 0.1184, -0.1259, -0.0236,  ..., -0.0692, -0.0148,  0.1800]]),\n",
       " 'model.bert.encoder.layer.22.intermediate.dense.bias': tensor([-0.0059, -0.0503, -0.0491,  ..., -0.0362, -0.0011, -0.0767]),\n",
       " 'model.bert.encoder.layer.22.output.dense.weight': tensor([[-0.0253, -0.1004, -0.0740,  ..., -0.1517,  0.0510, -0.0080],\n",
       "         [-0.0569,  0.0141,  0.0769,  ...,  0.1033,  0.0339, -0.0996],\n",
       "         [ 0.0081, -0.0768,  0.0629,  ...,  0.0043, -0.1325, -0.1430],\n",
       "         ...,\n",
       "         [-0.0501, -0.0423, -0.0698,  ..., -0.0502, -0.0934,  0.2077],\n",
       "         [-0.2494,  0.0485,  0.0556,  ..., -0.1258,  0.0197,  0.0647],\n",
       "         [ 0.0670,  0.1750, -0.0983,  ..., -0.0642, -0.0293,  0.0764]]),\n",
       " 'model.bert.encoder.layer.22.output.dense.bias': tensor([ 0.2054, -0.3271,  0.1156,  ..., -0.0606,  0.2299,  0.0637]),\n",
       " 'model.bert.encoder.layer.23.pre_attention_ln.weight': tensor([0.9072, 0.9073, 0.8221,  ..., 0.9056, 0.9025, 0.9508]),\n",
       " 'model.bert.encoder.layer.23.pre_attention_ln.bias': tensor([ 0.0920, -0.1163,  0.0009,  ...,  0.0301, -0.0042, -0.0021]),\n",
       " 'model.bert.encoder.layer.23.post_attention_ln.weight': tensor([0.8746, 0.9118, 0.8500,  ..., 0.8388, 0.8318, 0.8322]),\n",
       " 'model.bert.encoder.layer.23.post_attention_ln.bias': tensor([-0.0003,  0.0230, -0.0314,  ...,  0.0400,  0.0229,  0.0142]),\n",
       " 'model.bert.encoder.layer.23.attention.self.query.weight': tensor([[ 0.0362,  0.2030, -0.0971,  ..., -0.0232, -0.0550,  0.0575],\n",
       "         [-0.0194,  0.0876,  0.0319,  ...,  0.1780,  0.0343, -0.1450],\n",
       "         [ 0.1165,  0.0248, -0.0945,  ...,  0.1536,  0.0290, -0.2005],\n",
       "         ...,\n",
       "         [-0.1459,  0.0826,  0.0104,  ..., -0.1001, -0.0155, -0.0812],\n",
       "         [-0.0674, -0.1067,  0.0430,  ..., -0.0739,  0.0722, -0.0134],\n",
       "         [-0.0296, -0.0109, -0.0520,  ...,  0.1270,  0.2408, -0.0444]]),\n",
       " 'model.bert.encoder.layer.23.attention.self.query.bias': tensor([ 0.0047,  0.1091,  0.1500,  ..., -0.0588,  0.0983,  0.0737]),\n",
       " 'model.bert.encoder.layer.23.attention.self.key.weight': tensor([[-7.7934e-02, -1.8061e-02,  2.1750e-04,  ..., -6.4448e-02,\n",
       "           1.4468e-01,  4.8747e-02],\n",
       "         [ 7.3481e-02,  1.2397e-01,  1.1357e-01,  ...,  1.0466e-01,\n",
       "          -6.5239e-02,  1.5433e-02],\n",
       "         [ 6.0817e-02,  3.4247e-02,  1.4415e-03,  ...,  1.8159e-01,\n",
       "          -9.3216e-04, -5.9915e-02],\n",
       "         ...,\n",
       "         [-2.3002e-01, -1.1725e-02, -2.8192e-02,  ...,  1.1367e-03,\n",
       "           8.3726e-02,  4.2397e-02],\n",
       "         [-1.2169e-01, -2.0484e-01,  2.4764e-02,  ...,  1.5536e-02,\n",
       "           5.7480e-02,  1.0583e-02],\n",
       "         [ 7.4336e-02, -4.4622e-02, -2.1180e-03,  ...,  1.1695e-01,\n",
       "           2.5405e-01, -1.3235e-02]]),\n",
       " 'model.bert.encoder.layer.23.attention.self.key.bias': tensor([3.0478, 0.9880, 1.3059,  ..., 2.5361, 2.0092, 3.4780]),\n",
       " 'model.bert.encoder.layer.23.attention.self.value.weight': tensor([[ 0.1386, -0.1288, -0.0539,  ...,  0.0358,  0.0846, -0.0447],\n",
       "         [-0.2672, -0.0903,  0.1585,  ..., -0.1497, -0.0508,  0.1770],\n",
       "         [ 0.1942,  0.1406,  0.0422,  ..., -0.0648,  0.0405, -0.0907],\n",
       "         ...,\n",
       "         [-0.1596,  0.0460,  0.0089,  ...,  0.0343,  0.0635,  0.1187],\n",
       "         [ 0.0178,  0.0542, -0.1193,  ..., -0.1825, -0.0726, -0.0732],\n",
       "         [ 0.1222, -0.0951, -0.0567,  ..., -0.1025,  0.0526, -0.1090]]),\n",
       " 'model.bert.encoder.layer.23.attention.self.value.bias': tensor([ 0.0181,  0.0022, -0.0181,  ..., -0.0384, -0.0087, -0.0469]),\n",
       " 'model.bert.encoder.layer.23.attention.output.dense.weight': tensor([[ 2.4260e-03,  1.1235e-01,  6.3885e-02,  ..., -1.2492e-01,\n",
       "          -9.7251e-02,  1.5290e-01],\n",
       "         [-4.5288e-02, -9.6232e-02,  2.3793e-02,  ..., -1.2526e-01,\n",
       "          -7.3060e-02, -1.0749e-01],\n",
       "         [-3.5535e-02, -9.3212e-02,  1.0014e-01,  ...,  3.4465e-02,\n",
       "           5.9871e-02,  1.4335e-01],\n",
       "         ...,\n",
       "         [ 2.4584e-02, -2.5633e-01, -5.2617e-02,  ..., -1.7865e-04,\n",
       "          -6.1269e-02,  1.1545e-01],\n",
       "         [ 6.0681e-02,  1.1685e-01, -1.9229e-01,  ...,  7.3538e-02,\n",
       "           1.7913e-01, -2.5453e-02],\n",
       "         [-1.0232e-01, -1.9097e-01, -1.6524e-01,  ...,  2.2099e-01,\n",
       "          -6.6381e-02, -8.0127e-02]]),\n",
       " 'model.bert.encoder.layer.23.attention.output.dense.bias': tensor([-0.1244,  0.1061, -0.0676,  ..., -0.0304,  0.2324,  0.0240]),\n",
       " 'model.bert.encoder.layer.23.intermediate.dense.weight': tensor([[-0.0716,  0.0093,  0.0079,  ..., -0.0199, -0.0560,  0.1658],\n",
       "         [-0.1496, -0.1087,  0.0726,  ...,  0.0220, -0.0545,  0.1677],\n",
       "         [ 0.0204, -0.0421,  0.0590,  ...,  0.0540, -0.0488, -0.0479],\n",
       "         ...,\n",
       "         [ 0.1289, -0.0510, -0.0069,  ...,  0.0199, -0.0857,  0.0672],\n",
       "         [-0.0116, -0.0557, -0.1048,  ..., -0.0333, -0.0540, -0.0463],\n",
       "         [-0.0421,  0.0121,  0.0350,  ...,  0.0250, -0.0641,  0.0566]]),\n",
       " 'model.bert.encoder.layer.23.intermediate.dense.bias': tensor([-0.0435, -0.0379, -0.1245,  ..., -0.1019, -0.0547,  0.0042]),\n",
       " 'model.bert.encoder.layer.23.output.dense.weight': tensor([[-0.0463, -0.2196,  0.0113,  ..., -0.0299,  0.0054, -0.0730],\n",
       "         [ 0.1155, -0.0701, -0.1335,  ...,  0.0303,  0.1043,  0.0608],\n",
       "         [-0.0284,  0.2016, -0.2097,  ...,  0.1003, -0.1959, -0.0129],\n",
       "         ...,\n",
       "         [-0.0519,  0.0689, -0.1408,  ..., -0.2573, -0.0311,  0.0493],\n",
       "         [ 0.0020,  0.0805, -0.0569,  ..., -0.0177, -0.2052,  0.1375],\n",
       "         [-0.1435,  0.2560, -0.0015,  ...,  0.0176,  0.1204, -0.1146]]),\n",
       " 'model.bert.encoder.layer.23.output.dense.bias': tensor([ 0.3803, -0.6612,  0.3985,  ..., -0.0530,  0.3079,  0.2816]),\n",
       " 'model.bert.encoder.last_layer_ln.weight': tensor([1.0471, 1.0323, 0.9789,  ..., 0.9343, 0.9876, 0.9290]),\n",
       " 'model.bert.encoder.last_layer_ln.bias': tensor([-0.0748,  0.0614, -0.0084,  ..., -0.0172,  0.0196,  0.0119]),\n",
       " 'model.classifier.weight': tensor([[ 0.0390,  0.0144, -0.0021,  ..., -0.0287,  0.0296,  0.0084],\n",
       "         [ 0.0202, -0.0261,  0.0108,  ..., -0.0057,  0.0136,  0.0095],\n",
       "         [-0.0350, -0.0150, -0.0176,  ..., -0.0573,  0.0075,  0.0007],\n",
       "         [ 0.0464,  0.0144,  0.0073,  ..., -0.0126, -0.0011,  0.0280],\n",
       "         [ 0.0156,  0.0047, -0.0253,  ..., -0.0730, -0.0060, -0.0472],\n",
       "         [-0.0007,  0.0387,  0.0216,  ..., -0.0046,  0.0089, -0.0118]]),\n",
       " 'model.classifier.bias': tensor([-0.0009,  0.0084,  0.0308, -0.0145, -0.0247, -0.0196]),\n",
       " 'model.embeddings.weight': tensor([[-0.0362, -0.0847, -0.0336,  ...,  0.0139,  0.0251,  0.0388],\n",
       "         [-0.0045, -0.0255,  0.0515,  ..., -0.0875,  0.0687, -0.0259],\n",
       "         [-0.0655, -0.0639, -0.0673,  ..., -0.0585,  0.0382, -0.0807],\n",
       "         ...,\n",
       "         [-0.0032, -0.0663,  0.0156,  ..., -0.0178,  0.0130,  0.0150],\n",
       "         [ 0.0516, -0.0339, -0.0159,  ...,  0.0027,  0.0146, -0.0402],\n",
       "         [-0.0356,  0.0133,  0.0057,  ..., -0.0138, -0.0130,  0.0074]])}"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "torch.load('/home/jovyan/dnalm/runs/annotation_upweighted_edges_large_42k_flash_attention/bert_large_512_lastln_t2t_1000G_bs256_lr_1e-04_fp16/model_1750000/rmt_seglen_512_len42000_maxnsegm_10000_msz_5_bptt-1_lr5e-06_AdamW_constant_with_warmup_wd1e-04_p10000_bs64_it500000/run_1/model_best/pytorch_model.bin', map_location='cpu')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Configure model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "os.chdir('/home/jovyan/dnalm/')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "rmt_model_path = Path('/home/jovyan/dnalm/runs/annotation_bert_base_MANE_4k_bpe/bert_large_512_lastln_t2t_1000G_bs256_lr_1e-04_fp16/model_1750000/rmt_seglen_512_len4096_maxnsegm_10000_msz_5_bptt-1_lr2e-05_AdamW_constant_with_warmup_wd1e-04_p10000_bs64_it500000/run_1/')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "exp_config = json.load((rmt_model_path / 'config.json').open('r')) # it should be config.json that I sent you"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "input_seq_len: 4096\n",
      "model_cfg: ./data/configs/L12-H768-A12-V32k-preln-lastln.json\n",
      "model_cls: src.gena_lm.modeling_rmt:RMTEncoderForTokenClassification\n",
      "backbone_cls: src.gena_lm.modeling_bert:BertForTokenClassification\n",
      "input_size: 512\n",
      "num_mem_tokens: 5\n",
      "max_n_segments: 10000\n",
      "tokenizer: ./data/tokenizers/t2t_1000h_multi_32k/\n"
     ]
    }
   ],
   "source": [
    "for k in ['input_seq_len', 'model_cfg', 'model_cls', 'backbone_cls', 'input_size', 'num_mem_tokens', 'max_n_segments', 'tokenizer']:\n",
    "    print(f'{k}: {exp_config[k]}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/jovyan/dnalm/my_saved_conda_envs/gena/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    }
   ],
   "source": [
    "from transformers import AutoTokenizer, AutoConfig\n",
    "tokenizer = AutoTokenizer.from_pretrained('./data/tokenizers/t2t_1000h_multi_32k/')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "from src.gena_lm.modeling_bert import BertForTokenClassification"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_cfg = AutoConfig.from_pretrained('./data/configs/L12-H768-A12-V32k-preln-lastln.json') # here it soulbe config for backbone model, don't change it, you can change only path to it\n",
    "model_cfg.num_labels = 6\n",
    "model_cfg.problem_type = 'multi_label_classification'\n",
    "model_cls = BertForTokenClassification\n",
    "model = model_cls(config=model_cfg)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# ckpt = torch.load(str('/home/jovyan/dnalm/runs/annotation_bert_base_MANE_4k_bpe/bert_large_512_lastln_t2t_1000G_bs256_lr_1e-04_fp16/model_1750000/rmt_seglen_512_len4096_maxnsegm_10000_msz_5_bptt-1_lr2e-05_AdamW_constant_with_warmup_wd1e-04_p10000_bs64_it500000/run_1/model_best/pytorch_model.bin'), map_location='cpu')\n",
    "# model.load_state_dict(ckpt, strict=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# model.bert.base_model.embeddings.word_embeddings(torch.tensor([100]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "rmt_config = {\n",
    "            'num_mem_tokens': exp_config['num_mem_tokens'],\n",
    "            'max_n_segments': exp_config['max_n_segments'],\n",
    "            'input_size': exp_config['input_size'],\n",
    "            'bptt_depth': -1,\n",
    "            'sum_loss': True,\n",
    "            'tokenizer': tokenizer\n",
    "        }\n",
    "from src.gena_lm.modeling_rmt import RMTEncoderForTokenClassification\n",
    "rmt_cls = RMTEncoderForTokenClassification\n",
    "model = rmt_cls(model, **rmt_config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "missing: []\n",
      "unexpected_k: []\n"
     ]
    }
   ],
   "source": [
    "# load pre-trained weights\n",
    "ckpt = torch.load(str('/home/jovyan/dnalm/runs/annotation_bert_base_MANE_4k_bpe/bert_large_512_lastln_t2t_1000G_bs256_lr_1e-04_fp16/model_1750000/rmt_seglen_512_len4096_maxnsegm_10000_msz_5_bptt-1_lr2e-05_AdamW_constant_with_warmup_wd1e-04_p10000_bs64_it500000/run_1/model_best/pytorch_model.bin'), map_location='cpu')\n",
    "missing_k, unexpected_k = model.load_state_dict(ckpt, strict=False)\n",
    "print(f'missing: {missing_k}') # if no missing tensors - that is correct, otherwise - no!\n",
    "print(f'unexpected_k: {unexpected_k}') # if no missing tensors - that is correct, otherwise - no!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = model.eval()\n",
    "# model.half()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "# model.model.embeddings(torch.tensor([1]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Evaluation example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "# seq = ''\n",
    "# with open('/home/jovyan/dnalm/downstream_tasks/annotation/sample.fasta', 'r') as f:\n",
    "#     for line in f:\n",
    "#         seq += line[:-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# seq"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "seq = 'ATGC'*1234\n",
    "input_features = tokenizer(seq, return_tensors='pt')\n",
    "\n",
    "input_features['labels_mask'] = input_features['attention_mask'] # dumb realization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "# check the length of the sequence in tokens with no PADDING (but with SEP and CLS)\n",
    "# tokenizer([seq], return_tensors='pt')['input_ids'].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([1, 619])"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "input_features['input_ids'].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'labels_mask'])"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "input_features.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "input_features['labels'] = torch.randint(0, 6, (input_features['input_ids'].shape[1], 6)).unsqueeze(axis=0) # yeah, for now you must specify whatever labels, model won't work without them, it does not change the prediction\n",
    "# input_features['labels'] = None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([1, 619, 6])"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "input_features['labels'].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/jovyan/dnalm/my_saved_conda_envs/gena/lib/python3.9/site-packages/transformers/modeling_utils.py:1101: FutureWarning: The `device` argument is deprecated and will be removed in v5 of Transformers.\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "# with torch.autocast(device_type='cuda', dtype=torch.float16):\n",
    "with torch.no_grad():\n",
    "    out = model(**input_features, output_hidden_states=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['loss', 'logits', 'hidden_states', 'loss_0', 'loss_1', 'logits_segm', 'labels_segm', 'rmt_logits_masks', 'rmt_logits_masks_segm'])"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out.keys()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# New"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([1, 512, 768])"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out['hidden_states'][12].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([1, 1024, 6])"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out['logits'].shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Old"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([1, 3584])"
      ]
     },
     "execution_count": 118,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out.rmt_logits_masks.shape # REMEMBER, here is the predictions for ALL tokens, including SEP, PADDING, and CLS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[0., 0., 0.,  ..., 0., 0., 0.]])"
      ]
     },
     "execution_count": 119,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out.rmt_logits_masks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([1, 3372, 6])"
      ]
     },
     "execution_count": 120,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# you can handle it like this\n",
    "out.logits[:, out.rmt_logits_masks[0, :].bool(), :].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([-0.2498, -0.2897, -0.0503, -0.7057, -0.7579,  0.2251])"
      ]
     },
     "execution_count": 121,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out.logits[0, 0, :] # use softmax to get probabilities"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiMAAAGdCAYAAADAAnMpAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAABDRUlEQVR4nO3deXxU9aH///dMVgIkbJIIBIPVigqiQuHGpcttrtR6ae3t4qVepbS1Py08qqZfa6kKt7UVu1FsS6XaUnu/1ULt13rbQvFqlKrXKGVTqYpaFBBJABEStiwzn98fyTkzZ5bMQjLzmZnX8/HIg+TMOck5Q84n7896fMYYIwAAgCzxZ/sEAABAYSOMAACArCKMAACArCKMAACArCKMAACArCKMAACArCKMAACArCKMAACArCrO9gkkIxgM6u2339bQoUPl8/myfToAACAJxhi1t7drzJgx8vvjt3/kRBh5++23VVtbm+3TAAAAadi1a5fGjRsX9/WcCCNDhw6V1HMxlZWVWT4bAACQjLa2NtXW1rp/x+PJiTDidM1UVlYSRgAAyDGJhlgwgBUAAGQVYQQAAGQVYQQAAGQVYQQAAGQVYQQAAGQVYQQAAGQVYQQAAGQVYQQAAGRVymHkySef1KxZszRmzBj5fD49/PDDCY9Zt26dzj//fJWVlem0007Tfffdl8apAgCAfJRyGDly5IimTJmiZcuWJbX/G2+8ocsuu0wf+tCHtGXLFt1www364he/qEceeSTlkwUAAPkn5eXgL730Ul166aVJ7798+XJNmDBBP/zhDyVJZ555pp5++mn96Ec/0syZM1P98QAAIM8M+JiR5uZmNTQ0eLbNnDlTzc3NcY/p6OhQW1ub5wMAAOSnAQ8jLS0tqq6u9myrrq5WW1ubjh07FvOYxYsXq6qqyv2ora0d6NMEAGBAtbYd14+bXtOz29/J9qlYx8qn9i5YsECNjY3u184jiIFY3nr3qC767hOaespw/f7a+oRPhwSATHp5T5suvespz7Y59afomx+flKUzss+Ah5Gamhq1trZ6trW2tqqyslKDBg2KeUxZWZnKysoG+tSQJy767hOSpI073tWRzoCGlFmZsQEUqLvX/SNq26adBzN/IhYb8G6a+vp6NTU1ebY9+uijqq+vH+gfjQKwaee72T4FAJAkdQeC+uXTb+jlPd5xjs7XZ4+p1LLPni9JMjIZPz+bpVyFPHz4sF5//XX36zfeeENbtmzRiBEjNH78eC1YsEC7d+/Wf/3Xf0mSrr32Wv30pz/V1772NX3+85/X448/rt/97ndavXp1/10FCta//eyZbJ8CgAIXCBr995bd2rDjXT3w3E5J0pt3XiZJOtrZre37j0iSfjnnfXqlpSeYGLKIR8phZMOGDfrQhz7kfu2M7ZgzZ47uu+8+7dmzRzt37nRfnzBhglavXq0bb7xRd911l8aNG6df/OIXTOvFCTPczQCyrCsQ1Om3/CXu6w9ueEuBoFHtiEGqqSrXttZ2SYSRSCmHkQ9+8IN9/hGItbrqBz/4QW3evDnVHwX0qaM7GLWNgAIgk7bsOtjn6w9v2S1Jmj19vCSJ4fWx8Wwa5KxjnYFsnwKAAvf2wdhLVDg6eytNZ4+p8myn2uRFGEHOOtpFGAGQXT/8n1f7fD3Ymzr8vU0izsoDtOJ6EUaQs451dkuSBpUUZflMABSq4wkqRU7o8PV20PjoqImJMIKcdbS3m2ZwWSiMUNcAkEltx7v6fN3EbRkZwJPKQawOhZwTCBp9e/VL7s1cUVosqTOr5wSg8HQFgjreFT2QPlzQKah8nn8QgTCCnLP6xT361f++6X5dUUo3DYDMazvWd6uIFGqt9Uc8poJFz7zopkHO2dfe4fl63PDQYwVo+gSQKYd6w0hfj6AIumNGetFNExNhBDmn2O+tYZwycnCWzgRAIXO6aMr7GkTvjBnxewewkkW8CCPIOf6IMPKJ88Zm6UwAoGdQ6vnjh8V8LbJlhKm9sRFGkHPCs8jXPnKGzqgZmr2TAVCwwsd9/Opz02Pu46wz4vMxdLUvhBHknI6w0etDI/tqqWwAyDCfpKqKkpivOYHFHzGbhqLKizCCnHMsbJGhC04blcUzAVDIkulpCfbWnZyWEbeFhDTiQRhBTnm1tV2r/rZLkjS9boTec9IQ5u0DyKpkemCiFj0buNPJSawzgpzy6eXN7nS6900YnuWzAYC+BSOWg0dstIwgpxwKW2RozLBBUa+zkBAAm7hhJHLMCLNpPGgZQU4IBo3+v99s9GwbGyOMAECm9dXq4a4GTzdNn2gZQU54/q2DevSlVs+2s06ulMSUOQDZkdQAVvdBee5KI0kfW0gII8gJkfft5eeO0ejK8qycCwCE67s+FNFN47aMkEbCEUaQE0r83l/VW//1rJj7UdsAYJPolhHEQhhBTghEpIw+nwUBABmQTOuGM1A1atEzKk4ehBHkhK5A0PN1WXHoV5f6BoBs6qsMChrvXs4YN8KIF2EEOaGz2xtGSor41QWQXckNYI3dMgIvSnTkhM6IlpF4qGwAyDSnteMns8+TJE0Mf3hnnAflsc6IF2EEOaGrO7kwAgDZMqS8Z+mu8JbbqJYRmkZiIowgJzgtI1WDSvTsgg97XuPmBpANybRtOPs4C6M5/9Iu4kUYQU54rfWwJOmccVWqqWJ9EQD2Cp9lE7UcvLPOCGnEgzAC67Ud79JdTa9JSjxwlX5YAJkSWd7EaqSNXA7e3U7biAdhBNbb23bc/ZweGQC26aur2LDoWVIII7BeWXFogbPuYHRtgmfTALBVaACrs85Iz3Yacb0II7Ce3x8KG9y/AGzhDk6N7IIx8fdhAGtshBFYrztsjZESf9+tINzgALIlVistA1iTQxiB9T5/39/cz4sShBEAyJRkAoU7gDVqxBtpJBxhBNb7x74j7ucsAw/ANtFBo0f4bBsWPesbJTtyCi0jAOwRu3XDySDhLSdOF447ZoSGEQ/CCHLK1FOG9/k6NziATAsNTvUK9tEyQlHlRRhBTrlyxvhsnwIAJCV8JYLQcvA9WKDRizCCnFIcZ8wI/bAAMi1RnghfZdXHX9s+8fYAAHAC4s2TCQ8rUYueDfhZ5RbCCPIKz3sAkCmRpU1fi5/5Ij6jl8aLMAIAwAmI90gK7wDWyOXgSSPhCCPICwwZAWCb8LgR/dRehCOMAACQhtDqqpHbe14IbxmJN/0XPQgjyC9UNwBkSeRKrCYY/ZqPEawxEUYAAEhDonEf4QPq/REtI2QRL8II8kK8AWQAMODiFD/BWMvBM4A1JsIIrMYNCyBXxXpQnvtahs/FdoQRWK07mNotyw0OIFOc8sbJGZENtDFbRhjCGhNhBFbrDhAvAOQmZ8xIeEgJddNk4YQsRhiB1bqCwcQ7ielyADIv4bNpel/3xxjTxmrRXoQRWI2WEQC2ixxA74QQZ52RWJUlWka8CCOwWncguZYRBzc4gGyJXvys59/wlhEm/sVGGIHVulIcwAoAmeKOCYnzejDGEq1OKwolmxdhBFYLJNlNQ20DgC2ckBJruXhfaCeEIYzAaskOYAWAjEsyUMSqLDGA1YswAqst+Z9XU9qfGxxAprlhI4kWWqb2xkYYgdVWv7gn26cAADElmyfCFzpj0bPY0gojy5YtU11dncrLyzVjxgytX7++z/2XLl2qM844Q4MGDVJtba1uvPFGHT9+PK0TBmLhBgeQLVFP6+0jpfDQ3thSDiOrVq1SY2OjFi1apE2bNmnKlCmaOXOm9u7dG3P/Bx54QF//+te1aNEivfzyy/rlL3+pVatW6Rvf+MYJnzwAALZIJpS4T+2ln8Yj5TCyZMkSXXPNNZo7d67OOussLV++XBUVFVqxYkXM/Z955hldeOGF+uxnP6u6ujpdcsklmj17dsLWFCAd3N8AMsWdLZOgYTb2AFaESymMdHZ2auPGjWpoaAh9A79fDQ0Nam5ujnnMBRdcoI0bN7rhY/v27VqzZo0++tGPnsBpAwCQgxjAGlNxKjvv379fgUBA1dXVnu3V1dV65ZVXYh7z2c9+Vvv379dFF10kY4y6u7t17bXX9tlN09HRoY6ODvfrtra2VE4ThYghIwAyLN7sPdPH64xvi23AZ9OsW7dOd9xxh372s59p06ZNeuihh7R69WrdfvvtcY9ZvHixqqqq3I/a2tqBPk0AAE5IvO4aXxL7FLqUWkZGjRqloqIitba2era3traqpqYm5jG33XabrrrqKn3xi1+UJE2ePFlHjhzRl770Jd1yyy3y+6Pz0IIFC9TY2Oh+3dbWRiBBUmj5BJArjDFRD9krVCm1jJSWlmrq1KlqampytwWDQTU1Nam+vj7mMUePHo0KHEVFRZLijyYuKytTZWWl5wMAAJuEBrDGDhR9zaaJ93qhSqllRJIaGxs1Z84cTZs2TdOnT9fSpUt15MgRzZ07V5J09dVXa+zYsVq8eLEkadasWVqyZInOO+88zZgxQ6+//rpuu+02zZo1yw0lwImibgHAFpEVbZ/nqb2UVrGkHEauuOIK7du3TwsXLlRLS4vOPfdcrV271h3UunPnTk9LyK233iqfz6dbb71Vu3fv1kknnaRZs2bpO9/5Tv9dBQAAGRbZsJFMzPC0jPTjueS6lMOIJM2fP1/z58+P+dq6deu8P6C4WIsWLdKiRYvS+VEocINLi3SkM6BTRw3Wj2efl3B/FhICkGnxQkis0ii8YaSnvKKlREozjACZ4jRprvjc+1Q3anCWzwYAUpdKWClUPCgPVusKBCVJxUV91x7ohgWQaU5LbGT501fICF9nhIbcEMIIrNYd7LlbS4r4VQVgt8jBqW63sWehkcydTy6hhIe1jDEK9IaRYn9ydzA1DQCZ4hQ3qbTMesaM0FHjIozAWk6riCQVx1gcDwBslihqUHkKoYSHtQJhYSRRFuF5DwCsEREy6KVJjDCCnMBCQQCs4w4J6SmfkimmwssyWkZCCCOwVviNShQBkGsSLQePEMIIrBU+uIuGEQC2ccqoROWTdzn46ONBGIHFvC0jrDMCIDckGzHopgkhjMBa4fcpYQOA7aKLqei04Vn0bEDPJrcQRmCtdJ4zQ00DQKbEWtMslvDKFBWr2AgjsBYtIwDyGQ/2DCGMwFopjRkZ4HMBgEgmzhKsTshIlDWIIiGEEdgrPIyQNgBYLl455YuzDw0jIYQRWCsYdqf6k0wjTJUDkCnplDaeVl6KKxdhBNbyjBnJ2lkAQN/ilU+hXpzY64wghDACa4UP7kplUSEAyKa+GjzCSypackMII7CWdzYNYQOAXZwKU6h4Sq2cYsxICGEE1krnRuXmBmCLWOuQeB6Ul9nTsRphBNZK9rkPAJAN7piQFI7xdNNQe3IRRmCvJFc3THYfAMgEJ2PEGhNC5So2wgisFWskOgDYKu46I3G20y4SQhiBtZJ97oPnmAE5EwCI5pZRKVSYPGNGKLBchBFYizEjAHJZ4uXgSSMOwgisFWoZSSKNEFgAZFxvhSlqa2TI8O5BBSsaYQTWcm9nblwAOSDZosrdj4YRF2EE1nKmvflTCCNMlQNgi9CYkjivZ+5UrEcYgbVS6qYBgAxLFDbicQaxUncKIYzAWqnc6MQVALZIFDKc8ooBrCGEEVjLxBkcBgA2CK3A2lNKRU7xjVeGMYA1GmEE1kpnDj/1DAC5gm6aEMIIrJXOcx8AIONSHTPSewBZJIQwAmuZFJZgZcl4AJkWr2XDfTZNvHFvPud14oiDMAJr0TICIJekvM4IXIQRWCutMSNUNABkCIPs+w9hBBbj2TQAcl+8tZKoPIUQRmCtVJ7aS2ABkGnpL3rW/+eS6wgjsJY7ZoQ7F0AOiCyqaPlIHmEE1kqlZSTsqAE4EwBIH/WpxAgjsJY7OIw7GYCFIldgTRbP24pGGIG1gsGef3k2DYBc4qwfkmi2Dd04IYQRWItpcwBs5oQOp8KUbIsHjb3RCCOwVjoj1alpAEDuIYzAevSvAshFiRZuNAy4dxFGYK1UWkYY5AogW6Km9Cbaf8DOJHcRRmAtxowAyCXUidJHGIG10no2zQCdCwBECq2FFK8bJrnjQRiBxbhPAeQjupWjEUZgrchpcwBgo3SXgafCFUIYgbVCz6ZJvC95BUCmJZoNE69CRXkVjTACayXqj+3rGABA7iCMwGJ00wCwV6LKT+IBrNSeHIQRWCvYe5/6SSMAckhk901UEUaRFoUwAmuFumkSI68AyBZndgzlUPoII7CWSSWNOMcwPh1AhiTspjnBdUgKCWEE1nJn02T1LACgf1GmRUsrjCxbtkx1dXUqLy/XjBkztH79+j73P3jwoObNm6eTTz5ZZWVleu9736s1a9akdcIoHOmswAoAmRKvwsS41NQVp3rAqlWr1NjYqOXLl2vGjBlaunSpZs6cqW3btmn06NFR+3d2dupf/uVfNHr0aP3+97/X2LFjtWPHDg0bNqw/zh95LLVn0xBYAGRXdHdMnHVGejcQWkJSDiNLlizRNddco7lz50qSli9frtWrV2vFihX6+te/HrX/ihUrdODAAT3zzDMqKSmRJNXV1Z3YWaMwpPDUXvcQbm4AGcIq0f0npW6azs5Obdy4UQ0NDaFv4PeroaFBzc3NMY/54x//qPr6es2bN0/V1dWaNGmS7rjjDgUCgbg/p6OjQ21tbZ4PFJ5QEyh3OoB8RO3JkVIY2b9/vwKBgKqrqz3bq6ur1dLSEvOY7du36/e//70CgYDWrFmj2267TT/84Q/17W9/O+7PWbx4saqqqtyP2traVE4TecKk0TICAJkWu3Mm/vIElGnRBnw2TTAY1OjRo3XPPfdo6tSpuuKKK3TLLbdo+fLlcY9ZsGCBDh065H7s2rVroE8TFkplmi43N4BMiyyhKIfSl9KYkVGjRqmoqEitra2e7a2traqpqYl5zMknn6ySkhIVFRW5284880y1tLSos7NTpaWlUceUlZWprKwslVNDHkpnNg1jRgDkCsqrkJRaRkpLSzV16lQ1NTW524LBoJqamlRfXx/zmAsvvFCvv/66gsGgu+3VV1/VySefHDOIAA7WGQFgtQQVptCTx72vU6ZFS7mbprGxUffee69+/etf6+WXX9Z1112nI0eOuLNrrr76ai1YsMDd/7rrrtOBAwd0/fXX69VXX9Xq1at1xx13aN68ef13FchLzkh1P0vzAcghtHikLuWpvVdccYX27dunhQsXqqWlReeee67Wrl3rDmrduXOn/GF/PWpra/XII4/oxhtv1DnnnKOxY8fq+uuv180339x/V4G8lGgp5XDUNABki1P+JNuj7K4zMjCnk5NSDiOSNH/+fM2fPz/ma+vWrYvaVl9fr2effTadH4UCZuIsGJTMMQAw0BKVN2k8Xqtg0QAOa3EjA8hndOeEEEZgLRMa/ZXV8wCAWOKvhWR6X49do6JEi0YYgbVSmU1DXgGQbawWnT7CCKyVznMfaPYEkHnphRDGuIUQRmAt1hkBYLNEUSJeGUZLbjTCCKyVzgqsAJBttNCmjjACi/V20ySxJ321ADItcgBr8vUmn+d4EEZgMZ7aCyCX0bqbPMIIrBXqb+VGBmCvdEsoWkZCCCOwlmEEKwCLxZsNkyhj0FASjTACazk3uj+JG5ebG0C2uGNGIrabFMa9FTrCCKwVTOFBeQ6aPQFkyomWN6wzEkIYgbXSWfQMAGxHkRaNMALrEUYA2Cyy9dZ9Jg0zApNGGIG1TArdNNzrADItspMl2dDh7Ee3cghhBNZyB3+l8mwa+mABZFiiMorlCRIjjMBa1BoAWC1BIUURljzCCKzF6oUAclHCdUZoKYlCGIG1UlnzjMACINPcMsotfiIHska+jngII7BWOlN76doBkCsor0III7AWq8EDyAWpdrvQUhKNMAJ7MWYEgMXitWyElhmh6SNZhBFYK5Vn0wBAtiW9zkjvv4SVEMIIrBV079MUnk0zIGcCANFMaGVGnCDCCKzFSHQAuYzlCZJHGIG1ePw2gFwQWUaZBNNknHDCbJoQwgislUrLCBUPAJkW9WyarJxFfiCMwFqhqb0pjBmhqgEgw+J1wyRanoDSKoQwAnulsegZAGQKdZ/+QxiBtaKXWgYA+0V131CGJUQYgbVCs+YS38nc7ACyxSl+Irtr4nUbO7vRrRxCGIG10pnDz60NIFMob/oPYQTW4tk0AHJBVMus8fxDy20SCCOwFgsGAbBZut0sbjdNP55LriOMwFqptIyk+tRMAOhvlELpI4zAWk6tI5UH5TEeDECmxS2iEgzCp7wKIYzAWnTTAMhFiTIGLbnRCCOwFs+mAZALnAoT9ab0EUZgLZPCoBEKAQCZlqibxa1QRZRPoa/pp3EQRmClzu6g7n3qDUmpNmlycwPILOpCJ44wAiv9z0st2n+4QxKtHgDsZOJUfiKn/FKEJUYYgZUOH+92P2871pXFMwGA5ES24sbrxvEleL0QEUZgpapBJe7nnYFgwv2peQDIGgqgE0YYgZXCKwyp1B6oaQDIlKTLG/qaEyKMwEqECgC5Iqp7xvk3XjdNbzihmAshjMBK4QPDuGEB2CiybKIBJH2EEVgvmYdRsUorAFtFlk4MYI1GGIGV0r1JubcBZFq8uhDlUfIII7ASNzEA28WrNNHikTrCCKyUTNcMANggXiexU45FtZz4vK+DMIIckMz9yogRAJkWbwVWpI4wAiulPWaEsgEAcg5hBFbyTu0lYQCwV2Q3jFNmxXvwuDubZiBPKscQRmCl8BYOWjsA2CiybGKFgfQRRmCllAMIhQCADNtz6Jik6BVYI0Wug+SuwEpFy0UYgZXSfzYNdzeAzPjNszslSU+/vj/m6xRHyUsrjCxbtkx1dXUqLy/XjBkztH79+qSOW7lypXw+ny6//PJ0fiwKFGNGANhs98Fjnq8JIalLOYysWrVKjY2NWrRokTZt2qQpU6Zo5syZ2rt3b5/Hvfnmm/o//+f/6OKLL077ZFE4aOEAkGviPZYi/gBWyjlHymFkyZIluuaaazR37lydddZZWr58uSoqKrRixYq4xwQCAV155ZX65je/qVNPPfWEThiFIdVuGoaMALAPYSNZKYWRzs5Obdy4UQ0NDaFv4PeroaFBzc3NcY/71re+pdGjR+sLX/hC+meKwmJifprKYQCAHFGcys779+9XIBBQdXW1Z3t1dbVeeeWVmMc8/fTT+uUvf6ktW7Yk/XM6OjrU0dHhft3W1pbKaSIP0HwJIFdFll6RvTc+FhqJMqCzadrb23XVVVfp3nvv1ahRo5I+bvHixaqqqnI/amtrB/AsYSOGjADINZHdxZRjyUupZWTUqFEqKipSa2urZ3tra6tqamqi9v/HP/6hN998U7NmzXK3BYPBnh9cXKxt27bpPe95T9RxCxYsUGNjo/t1W1sbgaTAmLhfxBZv4BgA2MZZl4SsEpJSGCktLdXUqVPV1NTkTs8NBoNqamrS/Pnzo/afOHGiXnzxRc+2W2+9Ve3t7brrrrviBoyysjKVlZWlcmrIM54VWFO4ZamJALBNokXRkGIYkaTGxkbNmTNH06ZN0/Tp07V06VIdOXJEc+fOlSRdffXVGjt2rBYvXqzy8nJNmjTJc/ywYcMkKWo7EI4xIwBylvH8gySkHEauuOIK7du3TwsXLlRLS4vOPfdcrV271h3UunPnTvn9LOyKE8OzaQDkmmR7i539KNtCUg4jkjR//vyY3TKStG7duj6Pve+++9L5kSgwJs7n8dAICsA2btiggEqIJgxYL5XVWOneAYDcQxiBnWi/BJCjIitF8RpGqDyFEEZgpVS7aQAg2yJnzRA2kkcYgZVSHcDKMiMAcgXrIkUjjMBKaT+1l4oIAMvEyx70RocQRmAlumkA5ConZBA2kkcYgZW4iQHkih98eoqkFNYZ6f2XYi6EMAIrpXqTstwygEyrLO9Zquu88cP63I/yKTHCCKxk0lyClZoGgExxiqb4U3eRLMIIrMcNDcBGTtnk90VO6e1baDl4SjcHYQRW4tk0AGwX7C2cnHARr4WEmbyJEUaQF7jZAWSaU1GKbBkJvR67JkV5FY0wAiuFr1yYyiqGtKIAyJTgCRY4FFchhBFYiW4aALZzx4z4I8aMRBRatIQkRhiBlcgfAKwXOZsm6XVGnBGs/X1CuYswAivRMgLAdk43TbwxIw7WGUmMMAIreceMpHccAAwkp7Th2TMnjjACK3ETA7Bd5NReR9LrjFB5chFGYD0WBgJgo9AKrD7Pv5EYwJoYYQRWSjWA+LjbAWRQeBnlj9dNE6flg9IqGmEEVkq3MYRGFACZEF7WpFsZorwKIYwAAJCiYB8tI4SM1BFGYKXwe3nc8IqsnQcAxBJeRrljRpINJb07ElpCCCOwUvhNescnJiXcnz5YAJkU3jLi4y/pCeMthJWcgV9Xzhiv0ZXlKRwHAAPPM2Ykwb4MsE+MMAIruVPmuIcBWC7+U3tj7+/sTeUphDACK7krG9IBA8BCnm6a3mKK0ip9hBHYKeV1RpzDqGsAGHjebppEz6aJ+JrUEoUwAisleuYDAGTT3vYO9/O4z6ZJ8D2oPIUQRmCl0DLLAGCfTy9/xv08VhgxMbpxEB9hBFZyZtMwCh2AjfYf7nQ/dwawJlteUapFI4zASqm2XoaeggkAmRUvXCTqhqG8CiGMwGo0jACwXbypvQ6KscQII7ASU3sB5IrYY0bit3z4WA4+CmEEVmLRMwC5wgkXFFfpI4zASu4A1iT3d1tQqGkAsEzkwFZCSzTCCOxEqACQ6xKWYxR0DsIIrMSiZwByGTEjNYQRWMmZEsc6IwByRbziiuXgEyOMwErprsBqqI8AsESi8ojZNCGEEVjJvUeTTCPUNAAgdxFGYKVQywgpA0Du6evZNE65RsNICGEEVgo9mybLJwIASYqsPNENkzzCCKyW8pgRbn4A1olqGkEEwgislOoKrNzbAHINlacQwgisxpgRADYaNaRUklQ3siLm60aMCUkFYQRWSvTobQDIppKinj+fP5l9fmhjvHVG6KVJiDACK6W7AisZBkAmBE3iQfaJyiPWRQohjMBKKS96xrQbABnEk8X7F2EEVnJrDNzpACzktt7GqTKFt4rEWw6eltwQwgislO5y8ACQCc64Nn/YX9HIuhPdMMkjjMBKaY8Z6fczAYBoJ7JKNLMEoxFGYKVUb3RubQCZ5Axg9SdR+MSrVFF5CiGMwFIsBw/AXolab40MY0JSQBiB1cgiAGwUDDoVplApFa+8imzhpZIVjTACK6U7bY7F0gBkQmg2zQl8D8orF2EEVkr1HqWmASCj3ApT/MKHqJG8tMLIsmXLVFdXp/Lycs2YMUPr16+Pu++9996riy++WMOHD9fw4cPV0NDQ5/6AFJoS19eNDgDZkmgAq2edkcjl4CnWoqQcRlatWqXGxkYtWrRImzZt0pQpUzRz5kzt3bs35v7r1q3T7Nmz9cQTT6i5uVm1tbW65JJLtHv37hM+eeQvWi8B2CzWomdUntKXchhZsmSJrrnmGs2dO1dnnXWWli9froqKCq1YsSLm/vfff7++/OUv69xzz9XEiRP1i1/8QsFgUE1NTSd88shfrDMCwGbJPJsmXq2KdUaipRRGOjs7tXHjRjU0NIS+gd+vhoYGNTc3J/U9jh49qq6uLo0YMSLuPh0dHWpra/N8oLCwzggAm6UyyD7u9F9qT66Uwsj+/fsVCARUXV3t2V5dXa2WlpakvsfNN9+sMWPGeAJNpMWLF6uqqsr9qK2tTeU0kQcM64wAsJgTJPwUUv0io7Np7rzzTq1cuVJ/+MMfVF5eHne/BQsW6NChQ+7Hrl27MniWsALPpgFgsVgVpsjyKl7DB/klWnEqO48aNUpFRUVqbW31bG9tbVVNTU2fx/7gBz/QnXfeqccee0znnHNOn/uWlZWprKwslVNDnkl7zAjNngAyIJhCV3LcJ/syys2VUstIaWmppk6d6hl86gxGra+vj3vc9773Pd1+++1au3atpk2blv7ZouAkPWaEqgaADDJJPJuGylHyUmoZkaTGxkbNmTNH06ZN0/Tp07V06VIdOXJEc+fOlSRdffXVGjt2rBYvXixJ+u53v6uFCxfqgQceUF1dnTu2ZMiQIRoyZEg/XgrySVIj1QEgS9ycweDUfpFyGLniiiu0b98+LVy4UC0tLTr33HO1du1ad1Drzp075feHGlzuvvtudXZ26lOf+pTn+yxatEj/+Z//eWJnj7wVCDq1DtIIAPvEGsAat7gisCSUchiRpPnz52v+/PkxX1u3bp3n6zfffDOdH4EC57SMFCXzfG4P7m4AAyv8mTJ9LzMSZ50RKllReDYNrOS2jCQZRri1AWRKMCxjJNN6G28PWkZCCCOwUiDY828RNQgAlvG0jMTrgqGVNiWEEVgp1E2T5RMBgAjhLSOeZ9NEtIHEXWdkAM4p11HUw0qhJ2KmdtvS7AlgoIW3eviS+Csab4wIxVUIYQRWcsaMJDuAld4cAJliPC0j6A+EEVgp/dk0ADCwTBIDWI2J31JL5SkaYQRWclpGmAIHwDaebprwZ9PEKa7iz6aho8ZBGIGVgmnOpuHWBjDQ4g1gTRZVrGiEEVgpkOJsmnQKBABIRzJTe6XElSMqTyGEEViJ5eAB2MrTMhJ3nZHE+yCEMAIrGQawArBViiuwRmIsXDTCCKzkdNMkuxy8g/FgAAZa8ASfTRPaoX/OJx8QRmCllJeDp6IBIEPCM0T8qb3JBRb0IIzASsEUFz0DgEwJJjmANR5KtWiEEVgpkOZy8AAw0DwrsIaVUZHFVaLHWvAwvRDCCKwUdGfTpHYcNzeAgeaUM4nqSm53c0RBRh0rGmEEVgqkOJvGxnv7ncMd+szyZj24YVe2TwVAP3JaRvocvCop0Lt6I93NiRFGYKVgmrNpbPKjx17V+jcP6Kbfv5DtUwHQj5wwkqgbOV7LSOT3AWEElkp3OXibHDzale1TADAAnMpSZPEUuRJ0/JaR3C3XBgphBFYKpDmbxqaahkWnAqAfhcJIgpaRhANY4SCMwEqpzqaxsQGFJ3KiEHQHgvrdhl3adeBotk8lY5JpuTVG6u6tVBXncHdzphRn+wSAWPJhnRGnwALy2Yr/fUN3rHlFZcV+bfv2pdk+nYxwKkuRISNqam+ccszGylO20TICK6X61F4bhS+MtHX3IS197FUd7wpk8YyA/vf4K3slSR3dhZO+3Qd5JqgsMYA1ebSMwErpPrXXpns7/Fz+9SdPS+oZ4HZ9w+nZOSFgABzrLLyAneyYtngDWGkYiZbD9U7ks2SnzjkiR7HbIFat5+U9bZk/EWAAHS3gMNJn+WRSXy+pkBFGYKV0Z9PY5LGXW6O20VeMfFOIYSQYb8xIxH5uOcZy8AkRRmAl5yYuLsrNv97OwLVIhBHkm0IcB5V8N03ssSWUA9EII7BSV5rLKNsynba9ozvmdhu7k4ATUUgDVx3dbshIbr94U3stKa6sQBiBdQJB496kJYnu9l621TTajrH6KgpDV6DwwojTTdPnOiMyebFEQaYQRmCd7rAFOopytJvmEGEEBSIQp0syn8XrpolckbU73n60kEYhjMA63YFQ4ZZsy4ht4j6XhjIIeaa7AMNIsi0ewQSzaQrvnYsvN0t65LXwMJKrA1jjDerLzasBEK47yXWQ4regDMx55TLCCKwT3k2T7DMdbLu5g4xMA/KWuxx8H5UlYxJP7UUIYQTWCe9nTfRUTFvFCyO5ej0AQoJxQkbcdUbiVaqotLgII7COMzo/l0egx5tgkLtXBMCR7LNp4g5gpSCIQhiBdZwbvSSNMGJLRSMQt2UkwycCoN8l2/3CANbkEUZgna6A0x+b/K+nbVPl4q3ACuQTWxYZzLRknjljFBqMn8utvJlCGIF1nGe65PJaHQxgRSGIXH21UMJJsrNk4i2OZlvlyQaEEVjnzr+8ku1TOGHxFoKiCEI+6ejyhpFCWQAtUfeL40hHzxT/8tKimK8XSHZLCmEEVuk+waWlbXkKJrNpUAg6ur3r6RTKAmhO90tf64y0HDqul/a0SZKGV5R6X6QYiEIYgVWef+tQWsfZ9jee2TQoBMcLvGWkr3WQPvrjp9zPh1eUDPg55TrCCKxy6Fhntk+hX8SbTQPkk+MF2jLiVDYip/bGa/kcFtky0qtQxtgkgzACqzh9rLku7mwamkaQRyLHjJxoN2uuCCTx1N5wQ8uKPV9TDEQjjMAqRzu7T+h4WyoaTjPulHFVnu2Mokc+iWwZKZhuGmc2TZLPzkq0OBoII7DM0c5Q4fbgtfVZPJMT4xTKZSXeUfS2jW0BTkTkAyFztZumte24fvHUdrUcOp7U/t399MyZ3Hy3BkZx4l2AzHHCyBXTavW+uhFZPpv0OS0j5SWxp/QB+SAfBrB2B4KacUeTJGnZE69r88JLkjpGkkpSWJgxHLPqotEyAqs43TSD4szL7y8d3YEB7d92vnV5MbcY8lfk1N6uHBwz8trew+7n7x5NbqHFzt7F3kqLT7BlJPey24ChpIRVnAGsg8vSCyPJ3NzHuwJ637cf07/+5Om0fkYynJaRyFBFfQj5JB9aRrbvO+L5OpkZLl0n2jKS1lH5jTACqxw40jO1N2qRoARSafbcuvuQ2o5365WW9gGbWucUyhWlRZ5BrLTOIp/kw5iRyEHz+9o7Eh7T2bvoWbphBNF4J2EVJ4yMHJJaGEmF8yA+aeBqck7LiN/n039+7Gx3O7NpkE8in02Tiy0jxyIC1da3Ey+82FfLSCoVjtx7twYOYQRWeac3jIwYXDZgP6M7GCpAOweojzsY9iCtUsaNIE9Ftozk4piRY50RYWR3W8JjnOssTXJqbyRaSKNRSsIqe9t6ptaNSrNlJFFN43hXwLOwWmf3wBSegbCWkVKacpGnOrpyf52RyJaRl95OIYwkUdG4oeH09E6swFBKwhoHj3a6LSN1IwendGwyFY3uQFAzlz6pa3+z0d02UC0jzrct8vtUU1XubvdzxyGPRHbT5OKYESeM1I4YJEla+/cW/d9nd/R5TGd3/DEj4WXRrZedqRsa3hu1j3Nc5GykQkbRCGvseOeoJKmmslyDy9JfAqe17bgOHYueorfn0HH3ZzgGqmUk/BHjQ8tL9NHJNb2v0D6L/BHZTZOTLSO93TThFaDbHt6qtVtb4h7T15iRqkGhh+JVV5ZHvS6FHpz3vbXb0h5Ef7wroINH8+NZXhJhBBZpO94TIIYPTn/w6qFjXfrnH6zTP/9gXdSNelfTa1H7hw9m7U/OmBGnb/jsMVWe7UA+iJzam8tjRsYNr/Bsv/Y3G3XHmpdjBiw3jMTophk1JDTeLTyYhAsPMdv3H4m5T1/uf26HJt62Vud+61E1/m5LXjxwjzACaxw+3jPFLvKhUslwHv1w+59f0pHOgN450qkNb77r2ef3G9+KOm6gx4w4y0U7oSSYB4UG7GaM0Zfv36iPLH1SrW3JLW+eriMR02JzsWXEWfX59NFDdNFpozyv3fPkdq1+cU/UMe6iZzEGsJ40NBRGKuOEkfCW210HjsbcJ56O7oBu+cNW9+uHNu3W3yLKulyUVhhZtmyZ6urqVF5erhkzZmj9+vV97v/ggw9q4sSJKi8v1+TJk7VmzZq0Thb5rb2jp2BLZ8GzCaOGRG072HvDG2N0z5P/iHncQNXkwmfTSKFQkoNlNXJMa1uH1rzYolda2vXXbfsG9GdFPsslF8eMOC2ywypK9Jsvzoh6/Su/3axDESuzdvbRTRM+Rixey0j4oy7eTLFl5LXWw1Hb3s2D7pqUw8iqVavU2NioRYsWadOmTZoyZYpmzpypvXv3xtz/mWee0ezZs/WFL3xBmzdv1uWXX67LL79cW7dujbk/CpfTMjKkPPYN3JfzTxkWte35XQe15H+2acKCNbpjzSsxj4scgNdfwmfThP+bD82psFdnd1DPvfGO+/Xe9oFtGdnTG0aKe0N3LraMtPVWWir7KHfW/n2PPr7sf3X6LWu0852j6uiKH0beWz3U/XxYnDAya8oYVVf2tKC8+U5qLSP7DkcvyvbHLW/r3SO5HUhSDiNLlizRNddco7lz5+qss87S8uXLVVFRoRUrVsTc/6677tJHPvIR3XTTTTrzzDN1++236/zzz9dPf/rTEz555JfDvS0jQ9Lopjlv/PCobf/32R368eOv93ncgHXThM2mkULdNAHCyIDoCgS159AxHe3s1rf+9JI++P0n9OJbh9zw19kdzIsg+M7hDjX+bosu/t7j+sVT292p8I5bH35R16/c4n798Ja3B+xcOroD2nPomCRp/Mie8RaJWhq7A9n5fzh0tEuL//KytrW0R7/WG0aqegeVPvW1D0Xtc/P/e1HP7zqoroDR+7//hNa/eUCSdOpJ0bP+/nniaA0uLdInzx8Xd/xbkd+nxn/pmWWzdXfiRdbC7Y+xQuzqF/fomv/akNL3cQSCJmqtlWxIqdTv7OzUxo0btWDBAneb3+9XQ0ODmpubYx7T3NysxsZGz7aZM2fq4YcfjvtzOjo61NEResPb2hLP+07Hz//6D+16tyeVOitj2roYjaWnFcWo5/kwRsZ9ToyzTeHbYuyzaUdPv+dJaawxMnbYIH3vU+eotMivHe8c1Y8eezXuvov/bbIWPPSiJGneA5s0Y8IIlRb7VVbsV3lJkXtuofMMf+ZN39fgfP7s9p7aqTOWxWkZ2bzzoBY89IIkn3y+nv/Xnn99nt+98P/vvpa6T6Vgj7dnX9+iOxjU8a6gysIG6kXubyK+c5HfL7+vp0vKEwbU8z4U+3061hXQgSM9U7lLi/167+gh8vt87vvpfMdQzbNne9AYFfn9nvfKGGnV33ZGdYHN+unTKvL7PLX1M6qHauLJQ1VW7Jff1/t/4PMpEDDqDpqodSP8vlCgjCfdv61GRvvaO9QVMBo2qETFRT73e3V0B3WsK6Bhg0rUFQjq3aNd2n+4Q38PWwPj26tf1rdXv6y6kRU6a0ylWg4d16adBz0/4/W9hzXv/k0aOaQ07PfURPxeh/2uh12Lz9fzh8rnk8qKi2Rk1NkdlE8+dXQHtLe9Q0EjDS0v1thhg7R93xHd+vBWbdzxroLGuP//x7uC2nngqHa8c0T7D3fq1JMG65yxVb3vv09+n/cecP5Pwu8Nf+825/cpspyJdy1GPefhjBf7+V+367LJJ2tQaZFKinzq6A66rTtOl0rtiAotmnWWvvmnl/r8/7v49FHuwPRw760equcXXaLiBOsLXXz6SfL7pA073tX8BzZpeEWppzzx+eQpW5zr+tPzPQHzk+eP09hh5W6Fa8OOdzX5Px9RZXmJRleWqaTIr0DQqKaqXEPLit37x6j3397/n3Wv7tPh492aMGqw/t+XL+izhWggpRRG9u/fr0AgoOrqas/26upqvfJK7GbwlpaWmPu3tMSfNrV48WJ985vfTOXU0rL27y3aHHHzIruGlhXrszNOSevYz0yrldQzIOylPYdUUVqsEYNLNbFmqCaNrdKDG97SoFK/Zk8fr7VbW/TXV/fpwJFO/aWPKXwnwu+Tzh7bU1gNH9xzg+88cFQ716fWLFsInt91cEC+b2S3wbbWdm1rja4d57I33znaZ1N/rAGY/WnqKcPdP+Ttx7v1X819r9Gxfd+RqIfTZVKs96OkyKfRYQNP5144QXMvnKC71/1D313r/dtWWuzXdR94jz53QV3cn5EoiEjSmGGD9JFJNVrzYov+/ELq/0cXnjZSl046Wa1tHVq1YZeknve//Xi3dh88lvL3e23vYW3eeVAfeO9JKR/bH9JfzGEALViwwNOa0tbWptra2n7/Of/+vlp94L0nRaVPGWNXE0kfVS8jC1tNwmo1UliNJ3xbxCwTZ5+LThvlGQCWjtoRFfr5VdOiti+cdZb7+Z2fnKxHX2pVZ3dQpcV+dXYH1dEd7FlRMs7593weqrmFXa6nZc2nngf9XXDaSHe64KWTTtaxzqAOHOlwa3Ju7USK+j+O9T8e79cy2f//Pn9XYn3j3nMqKynytMDEaqlxNhkjdQec8TKh7SVFfhX5feoOGgWCRqVFfo0cUqpBJUV6691j6ugOeH4nfOppJXG+l0+Sv7eFojsQ+V4ZjRhcqqmnDFdHd1BHOro1vKJU/9h3WO8e6ZSRNGlslfa1d6jl0HF1dAd733untaW3BaSop4UkXNBIgWBwYMoD03Pe5SVFOnisy22FkHrGYJQW+XW0K6Biv0/DKkolI501plKnjR6iIn9PK8rOA0e1cccBHesMaGh5icYNH6TqynK92tquU08arCde2af2jm4ZY3r+7533OOx9dn5n3fc+YnxTINjzHvS0YvS8VlFapKpBJaqqKFH9e0bqeGdAk8ZUqe14l9va4e+9j/x+nyrLi1VTNUilxX79Y+9h93c/aIwCJry10WlRCbVqKKw2H3kvht+rfV1LWbFfg0qLdPBol8qK/eoKGHUFelr8yor9mjS2quc9jnDdB9+j//in8Xr8lb16/+knndCyA7F8/1NT9LEpY7XzwBF3Zejweym8DAp/rbqyTJ84b6x8Pp+++6lz9K3Lz9Z/b3lbf3vjgKbVDdeQshIdONopv0/q6ArqaGd3T2tTWEuT0zLYFTDaffCopp0yQpPGVPbr9aUipTAyatQoFRUVqbW11bO9tbVVNTU1MY+pqalJaX9JKisrU1nZwD2bxHHF+8YP+M+AnU6uGqSr6+sy9vPKS4r02Rn8vmXSpLHRTej55rTRQ3Ta6OiZZHWjBve+PjTqtYFQWV6ia95/alL7Zqvmna6h5SX6+LljB+R7Dy4r1kcmxf9bmKyy4iJ9Zlqt2zqci1IawFpaWqqpU6eqqanJ3RYMBtXU1KT6+vqYx9TX13v2l6RHH3007v4AAKCwpNxN09jYqDlz5mjatGmaPn26li5dqiNHjmju3LmSpKuvvlpjx47V4sWLJUnXX3+9PvCBD+iHP/yhLrvsMq1cuVIbNmzQPffc079XAgAAclLKYeSKK67Qvn37tHDhQrW0tOjcc8/V2rVr3UGqO3fulD/saWAXXHCBHnjgAd166636xje+odNPP10PP/ywJk2a1H9XAQAAcpbP5MDk+7a2NlVVVenQoUOqrMzeABsAAJC8ZP9+82waAACQVYQRAACQVYQRAACQVYQRAACQVYQRAACQVYQRAACQVYQRAACQVYQRAACQVYQRAACQVSkvB58NziKxbW1tWT4TAACQLOfvdqLF3nMijLS3t0uSamtz9/HIAAAUqvb2dlVVVcV9PSeeTRMMBvX2229r6NCh8vl8/fZ929raVFtbq127dhXsM28K/T0o9OuXeA8K/fol3gOuf+Cu3xij9vZ2jRkzxvMQ3Ug50TLi9/s1bty4Afv+lZWVBfkLGK7Q34NCv36J96DQr1/iPeD6B+b6+2oRcTCAFQAAZBVhBAAAZFVBh5GysjItWrRIZWVl2T6VrCn096DQr1/iPSj065d4D7j+7F9/TgxgBQAA+augW0YAAED2EUYAAEBWEUYAAEBWEUYAAEBWFXQYWbZsmerq6lReXq4ZM2Zo/fr12T6lfrF48WK9733v09ChQzV69Ghdfvnl2rZtm2ef48ePa968eRo5cqSGDBmiT37yk2ptbfXss3PnTl122WWqqKjQ6NGjddNNN6m7uzuTl9Iv7rzzTvl8Pt1www3utny//t27d+s//uM/NHLkSA0aNEiTJ0/Whg0b3NeNMVq4cKFOPvlkDRo0SA0NDXrttdc83+PAgQO68sorVVlZqWHDhukLX/iCDh8+nOlLSUsgENBtt92mCRMmaNCgQXrPe96j22+/3fN8jHx7D5588knNmjVLY8aMkc/n08MPP+x5vb+u94UXXtDFF1+s8vJy1dbW6nvf+95AX1pS+rr+rq4u3XzzzZo8ebIGDx6sMWPG6Oqrr9bbb7/t+R75ev2Rrr32Wvl8Pi1dutSzPavXbwrUypUrTWlpqVmxYoX5+9//bq655hozbNgw09ramu1TO2EzZ840v/rVr8zWrVvNli1bzEc/+lEzfvx4c/jwYXefa6+91tTW1pqmpiazYcMG80//9E/mggsucF/v7u42kyZNMg0NDWbz5s1mzZo1ZtSoUWbBggXZuKS0rV+/3tTV1ZlzzjnHXH/99e72fL7+AwcOmFNOOcV87nOfM88995zZvn27eeSRR8zrr7/u7nPnnXeaqqoq8/DDD5vnn3/efOxjHzMTJkwwx44dc/f5yEc+YqZMmWKeffZZ89RTT5nTTjvNzJ49OxuXlLLvfOc7ZuTIkebPf/6zeeONN8yDDz5ohgwZYu666y53n3x7D9asWWNuueUW89BDDxlJ5g9/+IPn9f643kOHDpnq6mpz5ZVXmq1bt5rf/va3ZtCgQebnP/95pi4zrr6u/+DBg6ahocGsWrXKvPLKK6a5udlMnz7dTJ061fM98vX6wz300ENmypQpZsyYMeZHP/qR57VsXn/BhpHp06ebefPmuV8HAgEzZswYs3jx4iye1cDYu3evkWT++te/GmN6bsySkhLz4IMPuvu8/PLLRpJpbm42xvT8Yvv9ftPS0uLuc/fdd5vKykrT0dGR2QtIU3t7uzn99NPNo48+aj7wgQ+4YSTfr//mm282F110UdzXg8GgqampMd///vfdbQcPHjRlZWXmt7/9rTHGmJdeeslIMn/729/cff7yl78Yn89ndu/ePXAn308uu+wy8/nPf96z7d/+7d/MlVdeaYzJ//cg8o9Rf13vz372MzN8+HDPPXDzzTebM844Y4CvKDV9/TF2rF+/3kgyO3bsMMYUxvW/9dZbZuzYsWbr1q3mlFNO8YSRbF9/QXbTdHZ2auPGjWpoaHC3+f1+NTQ0qLm5OYtnNjAOHTokSRoxYoQkaePGjerq6vJc/8SJEzV+/Hj3+pubmzV58mRVV1e7+8ycOVNtbW36+9//nsGzT9+8efN02WWXea5Tyv/r/+Mf/6hp06bp05/+tEaPHq3zzjtP9957r/v6G2+8oZaWFs/1V1VVacaMGZ7rHzZsmKZNm+bu09DQIL/fr+eeey5zF5OmCy64QE1NTXr11VclSc8//7yefvppXXrppZIK4z0I11/X29zcrPe///0qLS1195k5c6a2bdumd999N0NX0z8OHTokn8+nYcOGScr/6w8Gg7rqqqt000036eyzz456PdvXX5BhZP/+/QoEAp4/NJJUXV2tlpaWLJ3VwAgGg7rhhht04YUXatKkSZKklpYWlZaWujehI/z6W1paYr4/zmu2W7lypTZt2qTFixdHvZbv1799+3bdfffdOv300/XII4/ouuuu01e+8hX9+te/lhQ6/75+/1taWjR69GjP68XFxRoxYoT11y9JX//61/Xv//7vmjhxokpKSnTeeefphhtu0JVXXimpMN6DcP11vbl8X4Q7fvy4br75Zs2ePdt9MFy+X/93v/tdFRcX6ytf+UrM17N9/Tnx1F6kb968edq6dauefvrpbJ9KxuzatUvXX3+9Hn30UZWXl2f7dDIuGAxq2rRpuuOOOyRJ5513nrZu3arly5drzpw5WT67zPjd736n+++/Xw888IDOPvtsbdmyRTfccIPGjBlTMO8BYuvq6tJnPvMZGWN09913Z/t0MmLjxo266667tGnTJvl8vmyfTkwF2TIyatQoFRUVRc2eaG1tVU1NTZbOqv/Nnz9ff/7zn/XEE09o3Lhx7vaamhp1dnbq4MGDnv3Dr7+mpibm++O8ZrONGzdq7969Ov/881VcXKzi4mL99a9/1Y9//GMVFxeruro6r6//5JNP1llnneXZduaZZ2rnzp2SQuff1+9/TU2N9u7d63m9u7tbBw4csP76Jemmm25yW0cmT56sq666SjfeeKPbUlYI70G4/rreXL4vpFAQ2bFjhx599FG3VUTK7+t/6qmntHfvXo0fP94tE3fs2KGvfvWrqqurk5T96y/IMFJaWqqpU6eqqanJ3RYMBtXU1KT6+vosnln/MMZo/vz5+sMf/qDHH39cEyZM8Lw+depUlZSUeK5/27Zt2rlzp3v99fX1evHFFz2/nM7NG/mHzjYf/vCH9eKLL2rLli3ux7Rp03TllVe6n+fz9V944YVRU7lfffVVnXLKKZKkCRMmqKamxnP9bW1teu655zzXf/DgQW3cuNHd5/HHH1cwGNSMGTMycBUn5ujRo/L7vcVbUVGRgsGgpMJ4D8L11/XW19frySefVFdXl7vPo48+qjPOOEPDhw/P0NWkxwkir732mh577DGNHDnS83o+X/9VV12lF154wVMmjhkzRjfddJMeeeQRSRZc/wkPgc1RK1euNGVlZea+++4zL730kvnSl75khg0b5pk9kauuu+46U1VVZdatW2f27Nnjfhw9etTd59prrzXjx483jz/+uNmwYYOpr6839fX17uvO1NZLLrnEbNmyxaxdu9acdNJJOTG1NZbw2TTG5Pf1r1+/3hQXF5vvfOc75rXXXjP333+/qaioML/5zW/cfe68804zbNgw89///d/mhRdeMB//+MdjTvM877zzzHPPPWeefvppc/rpp1s7rTXSnDlzzNixY92pvQ899JAZNWqU+drXvubuk2/vQXt7u9m8ebPZvHmzkWSWLFliNm/e7M4W6Y/rPXjwoKmurjZXXXWV2bp1q1m5cqWpqKiwYmprX9ff2dlpPvaxj5lx48aZLVu2eMrF8Jkh+Xr9sUTOpjEmu9dfsGHEGGN+8pOfmPHjx5vS0lIzffp08+yzz2b7lPqFpJgfv/rVr9x9jh07Zr785S+b4cOHm4qKCvOJT3zC7Nmzx/N93nzzTXPppZeaQYMGmVGjRpmvfvWrpqurK8NX0z8iw0i+X/+f/vQnM2nSJFNWVmYmTpxo7rnnHs/rwWDQ3Hbbbaa6utqUlZWZD3/4w2bbtm2efd555x0ze/ZsM2TIEFNZWWnmzp1r2tvbM3kZaWtrazPXX3+9GT9+vCkvLzennnqqueWWWzx/ePLtPXjiiSdi3vdz5swxxvTf9T7//PPmoosuMmVlZWbs2LHmzjvvzNQl9qmv63/jjTfilotPPPGE+z3y9fpjiRVGsnn9PmPCliQEAADIsIIcMwIAAOxBGAEAAFlFGAEAAFlFGAEAAFlFGAEAAFlFGAEAAFlFGAEAAFlFGAEAAFlFGAEAAFlFGAEAAFlFGAEAAFlFGAEAAFn1/wO8qr64QVpnQQAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "from torch.nn.functional import sigmoid\n",
    "\n",
    "plt.clf()\n",
    "plt.plot(sigmoid(out.logits[:, out.rmt_logits_masks[0, :].bool(), :][0, :, 4][:1400]))\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([6.8392e-05, 4.8174e-05, 6.2841e-05, 3.9234e-05, 4.7705e-05, 4.8060e-05,\n",
       "        4.7575e-05, 4.1978e-05, 4.6392e-05, 3.9365e-05])"
      ]
     },
     "execution_count": 134,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sigmoid(out.logits[:, out.rmt_logits_masks[0, :].bool(), :][0, :, 4][:10])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([-0.7579, -0.4074, -0.3354,  ...,  0.5541,  0.1620, -0.6756])"
      ]
     },
     "execution_count": 128,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out.logits[0, :, 4]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(19564)"
      ]
     },
     "execution_count": 115,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tokenizer([seq[1:]], return_tensors='pt')['input_ids'][0, 1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'CTTG'"
      ]
     },
     "execution_count": 116,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tokenizer.decode(19564)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Testing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "import h5py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_file = h5py.File('/home/jovyan/shares/SR003.nfs2/mane/main_transcript_train_dataset.hdf5', \"r\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "32283"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(list(dataset_file.keys()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "input_ids = np.array(dataset_file[f'transcript_{0}'][\"input_ids\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([    1,  3948,   194, ...,   753, 10660,     2])"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "input_ids"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(20906,)"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "input_ids.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "token_type_ids = np.zeros(input_ids.shape[0])\n",
    "attention_mask = (input_ids != 3).astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "input_features = {\"input_ids\": torch.tensor(input_ids.reshape(1, -1)),\n",
    " \"token_type_ids\": torch.tensor(token_type_ids.reshape(1, -1)),\n",
    " \"attention_mask\": torch.tensor(attention_mask.reshape(1, -1))}\n",
    "input_features['labels'] = torch.randint(0, 6, (input_features['input_ids'].shape[1], 6)).unsqueeze(axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "ename": "UnboundLocalError",
     "evalue": "local variable 'out' referenced before assignment",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mUnboundLocalError\u001b[0m                         Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[46], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m out \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43minput_features\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/dnalm/my_saved_conda_envs/gena/lib/python3.9/site-packages/torch/nn/modules/module.py:1511\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1509\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m   1510\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1511\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/dnalm/my_saved_conda_envs/gena/lib/python3.9/site-packages/torch/nn/modules/module.py:1520\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1515\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1516\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1517\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1518\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1519\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1522\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m   1523\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
      "File \u001b[0;32m~/dnalm/src/gena_lm/modeling_rmt.py:230\u001b[0m, in \u001b[0;36mRMTEncoderForTokenClassification.forward\u001b[0;34m(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, labels, labels_mask, pos_weight, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[1;32m    228\u001b[0m \u001b[38;5;66;03m# drop unnecessary hiddens to save memory\u001b[39;00m\n\u001b[1;32m    229\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124moutput_hidden_states\u001b[39m\u001b[38;5;124m'\u001b[39m):\n\u001b[0;32m--> 230\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m key \u001b[38;5;129;01min\u001b[39;00m \u001b[43mout\u001b[49m\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m    231\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhidden_state\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01min\u001b[39;00m key:\n\u001b[1;32m    232\u001b[0m             out[key] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
      "\u001b[0;31mUnboundLocalError\u001b[0m: local variable 'out' referenced before assignment"
     ]
    }
   ],
   "source": [
    "out = model(**input_features)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "gena_ipynb",
   "language": "python",
   "name": "gena_ipynb"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
