{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.7.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"! pip install datasets transformers seqeval\nimport os\nimport math\nimport random\nimport csv\nimport sys\n\nimport numpy as np\nimport pandas as pd\nfrom sklearn import metrics\nfrom sklearn.metrics import f1_score, precision_score, recall_score\nfrom sklearn.metrics import classification_report\nimport statistics as stats\nfrom datasets import load_dataset, load_metric\n\nfrom transformers import AutoModelForTokenClassification, TrainingArguments, Trainer\nfrom transformers import AutoTokenizer\n\nimport random \nimport numpy as np\nimport torch\n\ndef random_seed(seed_value): \n    np.random.seed(seed_value) \n    torch.manual_seed(seed_value)\n    random.seed(seed_value) \n\n\nrandom_seed(42)","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","execution":{"iopub.status.busy":"2022-06-15T19:58:22.849733Z","iopub.execute_input":"2022-06-15T19:58:22.850162Z","iopub.status.idle":"2022-06-15T19:58:47.681975Z","shell.execute_reply.started":"2022-06-15T19:58:22.850083Z","shell.execute_reply":"2022-06-15T19:58:47.681169Z"},"trusted":true},"execution_count":1,"outputs":[{"name":"stdout","text":"Requirement already satisfied: datasets in /opt/conda/lib/python3.7/site-packages (2.1.0)\nRequirement already satisfied: transformers in /opt/conda/lib/python3.7/site-packages (4.18.0)\nCollecting seqeval\n  Downloading seqeval-1.2.2.tar.gz (43 kB)\n\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.6/43.6 kB\u001b[0m \u001b[31m524.9 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m \u001b[36m0:00:01\u001b[0m\n\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25ldone\n\u001b[?25hRequirement already satisfied: pyarrow>=5.0.0 in /opt/conda/lib/python3.7/site-packages (from datasets) (5.0.0)\nRequirement already satisfied: dill in /opt/conda/lib/python3.7/site-packages (from datasets) (0.3.5.1)\nRequirement already satisfied: aiohttp in /opt/conda/lib/python3.7/site-packages (from datasets) (3.8.1)\nRequirement already satisfied: packaging in /opt/conda/lib/python3.7/site-packages (from datasets) (21.3)\nRequirement already satisfied: pandas in /opt/conda/lib/python3.7/site-packages (from datasets) (1.3.5)\nRequirement already satisfied: importlib-metadata in /opt/conda/lib/python3.7/site-packages (from datasets) (4.11.4)\nRequirement already satisfied: tqdm>=4.62.1 in /opt/conda/lib/python3.7/site-packages (from datasets) (4.64.0)\nRequirement already satisfied: requests>=2.19.0 in /opt/conda/lib/python3.7/site-packages (from datasets) (2.27.1)\nRequirement already satisfied: fsspec[http]>=2021.05.0 in /opt/conda/lib/python3.7/site-packages (from datasets) (2022.5.0)\nRequirement already satisfied: responses<0.19 in /opt/conda/lib/python3.7/site-packages (from datasets) (0.18.0)\nRequirement already satisfied: xxhash in /opt/conda/lib/python3.7/site-packages (from datasets) (3.0.0)\nRequirement already satisfied: multiprocess in /opt/conda/lib/python3.7/site-packages (from datasets) (0.70.13)\nRequirement already satisfied: huggingface-hub<1.0.0,>=0.1.0 in /opt/conda/lib/python3.7/site-packages (from datasets) (0.5.1)\nRequirement already satisfied: numpy>=1.17 in /opt/conda/lib/python3.7/site-packages (from datasets) (1.21.6)\nRequirement already satisfied: regex!=2019.12.17 in /opt/conda/lib/python3.7/site-packages (from transformers) (2021.11.10)\nRequirement already satisfied: filelock in /opt/conda/lib/python3.7/site-packages (from transformers) (3.6.0)\nRequirement already satisfied: tokenizers!=0.11.3,<0.13,>=0.11.1 in /opt/conda/lib/python3.7/site-packages (from transformers) (0.12.1)\nRequirement already satisfied: sacremoses in /opt/conda/lib/python3.7/site-packages (from transformers) (0.0.53)\nRequirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.7/site-packages (from transformers) (6.0)\nRequirement already satisfied: scikit-learn>=0.21.3 in /opt/conda/lib/python3.7/site-packages (from seqeval) (1.0.2)\nRequirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.7/site-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets) (4.2.0)\nRequirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/lib/python3.7/site-packages (from packaging->datasets) (3.0.9)\nRequirement already satisfied: charset-normalizer~=2.0.0 in /opt/conda/lib/python3.7/site-packages (from requests>=2.19.0->datasets) (2.0.12)\nRequirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.7/site-packages (from requests>=2.19.0->datasets) (2022.5.18.1)\nRequirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.7/site-packages (from requests>=2.19.0->datasets) (1.26.9)\nRequirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.7/site-packages (from requests>=2.19.0->datasets) (3.3)\nRequirement already satisfied: joblib>=0.11 in /opt/conda/lib/python3.7/site-packages (from scikit-learn>=0.21.3->seqeval) (1.1.0)\nRequirement already satisfied: threadpoolctl>=2.0.0 in /opt/conda/lib/python3.7/site-packages (from scikit-learn>=0.21.3->seqeval) (3.1.0)\nRequirement already satisfied: scipy>=1.1.0 in /opt/conda/lib/python3.7/site-packages (from scikit-learn>=0.21.3->seqeval) (1.7.3)\nRequirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.7/site-packages (from aiohttp->datasets) (21.4.0)\nRequirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /opt/conda/lib/python3.7/site-packages (from aiohttp->datasets) (4.0.2)\nRequirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.7/site-packages (from aiohttp->datasets) (1.2.0)\nRequirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/lib/python3.7/site-packages (from aiohttp->datasets) (1.7.2)\nRequirement already satisfied: asynctest==0.13.0 in /opt/conda/lib/python3.7/site-packages (from aiohttp->datasets) (0.13.0)\nRequirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.7/site-packages (from aiohttp->datasets) (1.3.0)\nRequirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.7/site-packages (from aiohttp->datasets) (6.0.2)\nRequirement already satisfied: zipp>=0.5 in /opt/conda/lib/python3.7/site-packages (from importlib-metadata->datasets) (3.8.0)\nRequirement already satisfied: python-dateutil>=2.7.3 in /opt/conda/lib/python3.7/site-packages (from pandas->datasets) (2.8.2)\nRequirement already satisfied: pytz>=2017.3 in /opt/conda/lib/python3.7/site-packages (from pandas->datasets) (2022.1)\nRequirement already satisfied: six in /opt/conda/lib/python3.7/site-packages (from sacremoses->transformers) (1.16.0)\nRequirement already satisfied: click in /opt/conda/lib/python3.7/site-packages (from sacremoses->transformers) (8.0.4)\nBuilding wheels for collected packages: seqeval\n  Building wheel for seqeval (setup.py) ... \u001b[?25ldone\n\u001b[?25h  Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16180 sha256=5091b97fbfa49e1e7f56046e98082b9552fe7a2b4d9849be379247d12a4a6c19\n  Stored in directory: /root/.cache/pip/wheels/05/96/ee/7cac4e74f3b19e3158dce26a20a1c86b3533c43ec72a549fd7\nSuccessfully built seqeval\nInstalling collected packages: seqeval\nSuccessfully installed seqeval-1.2.2\n\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n\u001b[0m","output_type":"stream"}]},{"cell_type":"code","source":"os.environ[\"WANDB_DISABLED\"] = \"true\"","metadata":{"execution":{"iopub.status.busy":"2022-06-15T19:58:47.683835Z","iopub.execute_input":"2022-06-15T19:58:47.684521Z","iopub.status.idle":"2022-06-15T19:58:47.690879Z","shell.execute_reply.started":"2022-06-15T19:58:47.684484Z","shell.execute_reply":"2022-06-15T19:58:47.690030Z"},"trusted":true},"execution_count":2,"outputs":[]},{"cell_type":"code","source":"ncbi = load_dataset(\"jnlpba\")","metadata":{"execution":{"iopub.status.busy":"2022-06-15T19:58:47.692225Z","iopub.execute_input":"2022-06-15T19:58:47.692591Z","iopub.status.idle":"2022-06-15T19:59:08.825200Z","shell.execute_reply.started":"2022-06-15T19:58:47.692545Z","shell.execute_reply":"2022-06-15T19:59:08.824401Z"},"trusted":true},"execution_count":3,"outputs":[{"output_type":"display_data","data":{"text/plain":"Downloading builder script:   0%|          | 0.00/2.08k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"2e637e7e09094187a2e85fd9a37d79bb"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Downloading metadata:   0%|          | 0.00/1.20k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"fa3569f5b6e5446fac83b02e8bd1b27c"}},"metadata":{}},{"name":"stdout","text":"Downloading and preparing dataset jnlpba/jnlpba (download: 3.02 MiB, generated: 20.19 MiB, post-processed: Unknown size, total: 23.21 MiB) to /root/.cache/huggingface/datasets/jnlpba/jnlpba/1.0.0/3062f220823930cffde7976b694aa67bac3b06c322a02ced92d3761519810ce4...\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"Downloading data:   0%|          | 0.00/2.31M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"bc8f41da05964229866bf420b5fbb872"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Downloading data:   0%|          | 0.00/863k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"a2150082d3a84a29b6e42151bd0e6a11"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Generating train split:   0%|          | 0/37094 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Generating validation split:   0%|          | 0/7714 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"name":"stdout","text":"Dataset jnlpba downloaded and prepared to /root/.cache/huggingface/datasets/jnlpba/jnlpba/1.0.0/3062f220823930cffde7976b694aa67bac3b06c322a02ced92d3761519810ce4. Subsequent calls will reuse this data.\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"  0%|          | 0/2 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"eef01641cbae470ea9b8967494fff430"}},"metadata":{}}]},{"cell_type":"code","source":"ncbi[\"train\"][0]","metadata":{"execution":{"iopub.status.busy":"2022-06-15T19:59:08.827373Z","iopub.execute_input":"2022-06-15T19:59:08.827938Z","iopub.status.idle":"2022-06-15T19:59:08.838379Z","shell.execute_reply.started":"2022-06-15T19:59:08.827901Z","shell.execute_reply":"2022-06-15T19:59:08.837299Z"},"trusted":true},"execution_count":4,"outputs":[{"execution_count":4,"output_type":"execute_result","data":{"text/plain":"{'id': '1',\n 'tokens': ['IL-2',\n  'gene',\n  'expression',\n  'and',\n  'NF-kappa',\n  'B',\n  'activation',\n  'through',\n  'CD28',\n  'requires',\n  'reactive',\n  'oxygen',\n  'production',\n  'by',\n  '5-lipoxygenase',\n  '.'],\n 'ner_tags': [1, 2, 0, 0, 9, 10, 0, 0, 9, 0, 0, 0, 0, 0, 9, 0]}"},"metadata":{}}]},{"cell_type":"code","source":"label_list = ncbi[\"train\"].features[f\"ner_tags\"].feature.names\nprint(len(label_list))\nlabel_list","metadata":{"execution":{"iopub.status.busy":"2022-06-15T19:59:08.839957Z","iopub.execute_input":"2022-06-15T19:59:08.840561Z","iopub.status.idle":"2022-06-15T19:59:08.848762Z","shell.execute_reply.started":"2022-06-15T19:59:08.840525Z","shell.execute_reply":"2022-06-15T19:59:08.847917Z"},"trusted":true},"execution_count":5,"outputs":[{"name":"stdout","text":"11\n","output_type":"stream"},{"execution_count":5,"output_type":"execute_result","data":{"text/plain":"['O',\n 'B-DNA',\n 'I-DNA',\n 'B-RNA',\n 'I-RNA',\n 'B-cell_line',\n 'I-cell_line',\n 'B-cell_type',\n 'I-cell_type',\n 'B-protein',\n 'I-protein']"},"metadata":{}}]},{"cell_type":"code","source":"MODEL_NAME = \"AnonymousSub/fpdm_models_scibert_hybrid_epochs_4\"","metadata":{"execution":{"iopub.status.busy":"2022-06-15T19:59:08.850259Z","iopub.execute_input":"2022-06-15T19:59:08.851020Z","iopub.status.idle":"2022-06-15T19:59:08.855724Z","shell.execute_reply.started":"2022-06-15T19:59:08.850985Z","shell.execute_reply":"2022-06-15T19:59:08.853929Z"},"trusted":true},"execution_count":6,"outputs":[]},{"cell_type":"code","source":"tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)","metadata":{"execution":{"iopub.status.busy":"2022-06-15T19:59:08.857153Z","iopub.execute_input":"2022-06-15T19:59:08.857651Z","iopub.status.idle":"2022-06-15T19:59:13.332276Z","shell.execute_reply.started":"2022-06-15T19:59:08.857614Z","shell.execute_reply":"2022-06-15T19:59:13.331405Z"},"trusted":true},"execution_count":7,"outputs":[{"output_type":"display_data","data":{"text/plain":"Downloading:   0%|          | 0.00/427 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"6481700a0245468fa16d105b278aef16"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Downloading:   0%|          | 0.00/223k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"b16aeb7872234fa1ace27cfa7e3615fc"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"d655fe82cef8430f8c5135e78de7b2ea"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"def0f04828304e05b722e66a61ab4b89"}},"metadata":{}}]},{"cell_type":"code","source":"label_all_tokens = True","metadata":{"execution":{"iopub.status.busy":"2022-06-15T19:59:13.333645Z","iopub.execute_input":"2022-06-15T19:59:13.334013Z","iopub.status.idle":"2022-06-15T19:59:13.338742Z","shell.execute_reply.started":"2022-06-15T19:59:13.333976Z","shell.execute_reply":"2022-06-15T19:59:13.337219Z"},"trusted":true},"execution_count":8,"outputs":[]},{"cell_type":"code","source":"def tokenize_and_align_labels(examples):\n    tokenized_inputs = tokenizer(examples[\"tokens\"], truncation=True, is_split_into_words=True)\n\n    labels = []\n    for i, label in enumerate(examples[f\"ner_tags\"]):\n        word_ids = tokenized_inputs.word_ids(batch_index=i)\n        previous_word_idx = None\n        label_ids = []\n        for word_idx in word_ids:\n            # Special tokens have a word id that is None. We set the label to -100 so they are automatically\n            # ignored in the loss function.\n            if word_idx is None:\n                label_ids.append(-100)\n            # We set the label for the first token of each word.\n            elif word_idx != previous_word_idx:\n                label_ids.append(label[word_idx])\n            # For the other tokens in a word, we set the label to either the current label or -100, depending on\n            # the label_all_tokens flag.\n            else:\n                label_ids.append(label[word_idx] if label_all_tokens else -100)\n            previous_word_idx = word_idx\n\n        labels.append(label_ids)\n\n    tokenized_inputs[\"labels\"] = labels\n    return tokenized_inputs","metadata":{"execution":{"iopub.status.busy":"2022-06-15T19:59:13.340101Z","iopub.execute_input":"2022-06-15T19:59:13.342682Z","iopub.status.idle":"2022-06-15T19:59:13.368104Z","shell.execute_reply.started":"2022-06-15T19:59:13.342643Z","shell.execute_reply":"2022-06-15T19:59:13.367289Z"},"trusted":true},"execution_count":9,"outputs":[]},{"cell_type":"code","source":"tokenize_and_align_labels(ncbi['train'][:5])","metadata":{"execution":{"iopub.status.busy":"2022-06-15T19:59:13.370149Z","iopub.execute_input":"2022-06-15T19:59:13.372711Z","iopub.status.idle":"2022-06-15T19:59:13.390674Z","shell.execute_reply.started":"2022-06-15T19:59:13.372683Z","shell.execute_reply":"2022-06-15T19:59:13.389957Z"},"trusted":true},"execution_count":10,"outputs":[{"name":"stderr","text":"Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n","output_type":"stream"},{"execution_count":10,"output_type":"execute_result","data":{"text/plain":"{'input_ids': [[102, 1902, 579, 170, 983, 940, 137, 5245, 579, 19685, 132, 2239, 833, 1389, 2557, 2978, 6608, 3858, 1865, 214, 305, 579, 2266, 18368, 205, 103], [102, 2239, 131, 111, 1389, 2557, 1437, 2629, 2315, 106, 1626, 1729, 163, 7655, 1261, 168, 105, 377, 2239, 2429, 121, 4006, 1865, 131, 12577, 579, 170, 145, 1902, 579, 170, 546, 137, 377, 4498, 205, 103], [102, 121, 1916, 105, 9242, 185, 405, 198, 1389, 2557, 16823, 3412, 147, 111, 2857, 5609, 2256, 131, 6608, 3858, 17753, 145, 25726, 546, 334, 220, 1761, 168, 1389, 2557, 579, 6224, 2239, 131, 111, 5245, 579, 19685, 132, 1352, 1389, 2557, 579, 14346, 1127, 137, 1902, 579, 170, 940, 205, 103], [102, 14316, 150, 131, 111, 1389, 2557, 3354, 11448, 241, 797, 147, 3862, 787, 9925, 4655, 1071, 422, 2594, 214, 111, 2239, 131, 26920, 106, 30132, 137, 305, 579, 2266, 18368, 205, 103], [102, 580, 453, 1739, 198, 2266, 18368, 9125, 9808, 16887, 2256, 334, 666, 5073, 1902, 579, 170, 940, 2168, 5245, 579, 19685, 132, 2239, 205, 103]], 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], 'labels': [[-100, 1, 1, 1, 2, 0, 0, 9, 9, 9, 10, 0, 0, 9, 9, 0, 0, 0, 0, 0, 9, 9, 9, 9, 0, -100], [-100, 0, 0, 0, 9, 9, 10, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 0, 9, 9, 9, 0, 0, 0, 0, 0, -100], [-100, 0, 7, 8, 8, 0, 0, 0, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 0, 0, 0, 0, 0, 9, 9, 9, 10, 0, 9, 9, 9, 9, 10, 0, 9, 9, 9, 0, 0, -100], [-100, 0, 0, 0, 0, 9, 9, 0, 0, 0, 0, 0, 0, 9, 10, 10, 0, 0, 0, 0, 0, 0, 0, 9, 10, 10, 0, 9, 9, 9, 9, 0, -100], [-100, 0, 0, 0, 0, 9, 9, 10, 0, 0, 0, 0, 0, 0, 9, 9, 9, 0, 0, 9, 9, 9, 10, 0, 0, -100]]}"},"metadata":{}}]},{"cell_type":"code","source":"tokenized_datasets = ncbi.map(tokenize_and_align_labels, batched=True)","metadata":{"execution":{"iopub.status.busy":"2022-06-15T19:59:13.391513Z","iopub.execute_input":"2022-06-15T19:59:13.391747Z","iopub.status.idle":"2022-06-15T19:59:26.201420Z","shell.execute_reply.started":"2022-06-15T19:59:13.391725Z","shell.execute_reply":"2022-06-15T19:59:26.200656Z"},"trusted":true},"execution_count":11,"outputs":[{"output_type":"display_data","data":{"text/plain":"  0%|          | 0/38 [00:00<?, ?ba/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"e3383bb930164e4e8995204443ed78f2"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"  0%|          | 0/8 [00:00<?, ?ba/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"df905b6a32eb40e986e8ace8611f2cd3"}},"metadata":{}}]},{"cell_type":"code","source":"model = AutoModelForTokenClassification.from_pretrained(MODEL_NAME, num_labels=len(label_list))","metadata":{"execution":{"iopub.status.busy":"2022-06-15T19:59:26.202782Z","iopub.execute_input":"2022-06-15T19:59:26.203156Z","iopub.status.idle":"2022-06-15T19:59:48.247225Z","shell.execute_reply.started":"2022-06-15T19:59:26.203119Z","shell.execute_reply":"2022-06-15T19:59:48.246279Z"},"trusted":true},"execution_count":12,"outputs":[{"output_type":"display_data","data":{"text/plain":"Downloading:   0%|          | 0.00/706 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"efe35c02b0e44443b88079a4005ad76d"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Downloading:   0%|          | 0.00/419M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"aab85f248f5541d9a4e31a69c6c977d4"}},"metadata":{}},{"name":"stderr","text":"Some weights of BertForTokenClassification were not initialized from the model checkpoint at AnonymousSub/fpdm_models_scibert_hybrid_epochs_4 and are newly initialized: ['classifier.bias', 'classifier.weight']\nYou should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n","output_type":"stream"}]},{"cell_type":"code","source":"args = TrainingArguments(\"SCIBERT-VOCAB-UNCASED-finetuned-NER\",\n    evaluation_strategy = \"epoch\",\n    learning_rate=2e-5,\n    per_device_train_batch_size=16,\n    per_device_eval_batch_size=32,\n    gradient_accumulation_steps = 2,\n    num_train_epochs=4,\n    weight_decay=0.01\n)","metadata":{"execution":{"iopub.status.busy":"2022-06-15T19:59:48.248677Z","iopub.execute_input":"2022-06-15T19:59:48.249057Z","iopub.status.idle":"2022-06-15T19:59:48.330307Z","shell.execute_reply.started":"2022-06-15T19:59:48.249019Z","shell.execute_reply":"2022-06-15T19:59:48.329513Z"},"trusted":true},"execution_count":13,"outputs":[{"name":"stderr","text":"Using the `WAND_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).\n","output_type":"stream"}]},{"cell_type":"code","source":"from transformers import DataCollatorForTokenClassification\n\ndata_collator = DataCollatorForTokenClassification(tokenizer)","metadata":{"execution":{"iopub.status.busy":"2022-06-15T19:59:48.331634Z","iopub.execute_input":"2022-06-15T19:59:48.332037Z","iopub.status.idle":"2022-06-15T19:59:48.336188Z","shell.execute_reply.started":"2022-06-15T19:59:48.331996Z","shell.execute_reply":"2022-06-15T19:59:48.335322Z"},"trusted":true},"execution_count":14,"outputs":[]},{"cell_type":"code","source":"metric = load_metric(\"seqeval\")","metadata":{"execution":{"iopub.status.busy":"2022-06-15T19:59:48.337988Z","iopub.execute_input":"2022-06-15T19:59:48.338629Z","iopub.status.idle":"2022-06-15T19:59:49.157432Z","shell.execute_reply.started":"2022-06-15T19:59:48.338592Z","shell.execute_reply":"2022-06-15T19:59:49.156690Z"},"trusted":true},"execution_count":15,"outputs":[{"output_type":"display_data","data":{"text/plain":"Downloading builder script:   0%|          | 0.00/2.47k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"0ba4d1673c73402d99952d83b96341da"}},"metadata":{}}]},{"cell_type":"code","source":"import numpy as np\n\ndef compute_metrics(p):\n    predictions, labels = p\n    predictions = np.argmax(predictions, axis=2)\n\n    # Remove ignored index (special tokens)\n    true_predictions = [\n        [label_list[p] for (p, l) in zip(prediction, label) if l != -100]\n        for prediction, label in zip(predictions, labels)\n    ]\n    true_labels = [\n        [label_list[l] for (p, l) in zip(prediction, label) if l != -100]\n        for prediction, label in zip(predictions, labels)\n    ]\n\n    results = metric.compute(predictions=true_predictions, references=true_labels)\n    return {\n        \"precision\": results[\"overall_precision\"],\n        \"recall\": results[\"overall_recall\"],\n        \"f1\": results[\"overall_f1\"],\n        \"accuracy\": results[\"overall_accuracy\"],\n    }","metadata":{"execution":{"iopub.status.busy":"2022-06-15T19:59:49.158544Z","iopub.execute_input":"2022-06-15T19:59:49.158886Z","iopub.status.idle":"2022-06-15T19:59:49.166728Z","shell.execute_reply.started":"2022-06-15T19:59:49.158848Z","shell.execute_reply":"2022-06-15T19:59:49.166003Z"},"trusted":true},"execution_count":16,"outputs":[]},{"cell_type":"code","source":"trainer = Trainer(\n    model,\n    args,\n    train_dataset=tokenized_datasets[\"train\"],\n    eval_dataset=tokenized_datasets[\"validation\"],\n    data_collator=data_collator,\n    tokenizer=tokenizer,\n    compute_metrics=compute_metrics\n)","metadata":{"execution":{"iopub.status.busy":"2022-06-15T19:59:49.167932Z","iopub.execute_input":"2022-06-15T19:59:49.168444Z","iopub.status.idle":"2022-06-15T19:59:53.980150Z","shell.execute_reply.started":"2022-06-15T19:59:49.168409Z","shell.execute_reply":"2022-06-15T19:59:53.979394Z"},"trusted":true},"execution_count":17,"outputs":[]},{"cell_type":"code","source":"trainer.train()","metadata":{"execution":{"iopub.status.busy":"2022-06-15T19:59:53.981532Z","iopub.execute_input":"2022-06-15T19:59:53.981906Z","iopub.status.idle":"2022-06-15T20:21:46.654326Z","shell.execute_reply.started":"2022-06-15T19:59:53.981867Z","shell.execute_reply":"2022-06-15T20:21:46.651811Z"},"trusted":true},"execution_count":18,"outputs":[{"name":"stderr","text":"The following columns in the training set  don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: id, ner_tags, tokens. If id, ner_tags, tokens are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.\n/opt/conda/lib/python3.7/site-packages/transformers/optimization.py:309: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n  FutureWarning,\n***** Running training *****\n  Num examples = 37094\n  Num Epochs = 4\n  Instantaneous batch size per device = 16\n  Total train batch size (w. parallel, distributed & accumulation) = 32\n  Gradient Accumulation steps = 2\n  Total optimization steps = 4636\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"\n    <div>\n      \n      <progress value='4636' max='4636' style='width:300px; height:20px; vertical-align: middle;'></progress>\n      [4636/4636 21:51, Epoch 3/4]\n    </div>\n    <table border=\"1\" class=\"dataframe\">\n  <thead>\n <tr style=\"text-align: left;\">\n      <th>Epoch</th>\n      <th>Training Loss</th>\n      <th>Validation Loss</th>\n      <th>Precision</th>\n      <th>Recall</th>\n      <th>F1</th>\n      <th>Accuracy</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>0</td>\n      <td>0.185200</td>\n      <td>0.264621</td>\n      <td>0.705324</td>\n      <td>0.818843</td>\n      <td>0.757856</td>\n      <td>0.919291</td>\n    </tr>\n    <tr>\n      <td>1</td>\n      <td>0.131300</td>\n      <td>0.297781</td>\n      <td>0.703232</td>\n      <td>0.816100</td>\n      <td>0.755474</td>\n      <td>0.917926</td>\n    </tr>\n    <tr>\n      <td>2</td>\n      <td>0.095900</td>\n      <td>0.326993</td>\n      <td>0.707896</td>\n      <td>0.818619</td>\n      <td>0.759242</td>\n      <td>0.917990</td>\n    </tr>\n    <tr>\n      <td>3</td>\n      <td>0.069800</td>\n      <td>0.352605</td>\n      <td>0.713525</td>\n      <td>0.817500</td>\n      <td>0.761982</td>\n      <td>0.919115</td>\n    </tr>\n  </tbody>\n</table><p>"},"metadata":{}},{"name":"stderr","text":"Saving model checkpoint to SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-500\nConfiguration saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-500/config.json\nModel weights saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-500/pytorch_model.bin\ntokenizer config file saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-500/tokenizer_config.json\nSpecial tokens file saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-500/special_tokens_map.json\nSaving model checkpoint to SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-1000\nConfiguration saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-1000/config.json\nModel weights saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-1000/pytorch_model.bin\ntokenizer config file saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-1000/tokenizer_config.json\nSpecial tokens file saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-1000/special_tokens_map.json\nThe following columns in the evaluation set  don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: id, ner_tags, tokens. If id, ner_tags, tokens are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.\n***** Running Evaluation *****\n  Num examples = 7714\n  Batch size = 32\nSaving model checkpoint to SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-1500\nConfiguration saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-1500/config.json\nModel weights saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-1500/pytorch_model.bin\ntokenizer config file saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-1500/tokenizer_config.json\nSpecial tokens file saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-1500/special_tokens_map.json\nSaving model checkpoint to SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-2000\nConfiguration saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-2000/config.json\nModel weights saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-2000/pytorch_model.bin\ntokenizer config file saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-2000/tokenizer_config.json\nSpecial tokens file saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-2000/special_tokens_map.json\nThe following columns in the evaluation set  don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: id, ner_tags, tokens. If id, ner_tags, tokens are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.\n***** Running Evaluation *****\n  Num examples = 7714\n  Batch size = 32\nSaving model checkpoint to SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-2500\nConfiguration saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-2500/config.json\nModel weights saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-2500/pytorch_model.bin\ntokenizer config file saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-2500/tokenizer_config.json\nSpecial tokens file saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-2500/special_tokens_map.json\nSaving model checkpoint to SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-3000\nConfiguration saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-3000/config.json\nModel weights saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-3000/pytorch_model.bin\ntokenizer config file saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-3000/tokenizer_config.json\nSpecial tokens file saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-3000/special_tokens_map.json\nThe following columns in the evaluation set  don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: id, ner_tags, tokens. If id, ner_tags, tokens are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.\n***** Running Evaluation *****\n  Num examples = 7714\n  Batch size = 32\nSaving model checkpoint to SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-3500\nConfiguration saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-3500/config.json\nModel weights saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-3500/pytorch_model.bin\ntokenizer config file saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-3500/tokenizer_config.json\nSpecial tokens file saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-3500/special_tokens_map.json\nSaving model checkpoint to SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-4000\nConfiguration saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-4000/config.json\nModel weights saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-4000/pytorch_model.bin\ntokenizer config file saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-4000/tokenizer_config.json\nSpecial tokens file saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-4000/special_tokens_map.json\nSaving model checkpoint to SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-4500\nConfiguration saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-4500/config.json\nModel weights saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-4500/pytorch_model.bin\ntokenizer config file saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-4500/tokenizer_config.json\nSpecial tokens file saved in SCIBERT-VOCAB-UNCASED-finetuned-NER/checkpoint-4500/special_tokens_map.json\nThe following columns in the evaluation set  don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: id, ner_tags, tokens. If id, ner_tags, tokens are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.\n***** Running Evaluation *****\n  Num examples = 7714\n  Batch size = 32\n\n\nTraining completed. Do not forget to share your model on huggingface.co/models =)\n\n\n","output_type":"stream"},{"execution_count":18,"output_type":"execute_result","data":{"text/plain":"TrainOutput(global_step=4636, training_loss=0.13221461532237305, metrics={'train_runtime': 1312.6229, 'train_samples_per_second': 113.038, 'train_steps_per_second': 3.532, 'total_flos': 5249115444000336.0, 'train_loss': 0.13221461532237305, 'epoch': 4.0})"},"metadata":{}}]},{"cell_type":"code","source":"trainer.evaluate()","metadata":{"execution":{"iopub.status.busy":"2022-06-15T20:21:46.656241Z","iopub.execute_input":"2022-06-15T20:21:46.656903Z","iopub.status.idle":"2022-06-15T20:22:16.450277Z","shell.execute_reply.started":"2022-06-15T20:21:46.656859Z","shell.execute_reply":"2022-06-15T20:22:16.449497Z"},"trusted":true},"execution_count":19,"outputs":[{"name":"stderr","text":"The following columns in the evaluation set  don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: id, ner_tags, tokens. If id, ner_tags, tokens are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.\n***** Running Evaluation *****\n  Num examples = 7714\n  Batch size = 32\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"\n    <div>\n      \n      <progress value='242' max='242' style='width:300px; height:20px; vertical-align: middle;'></progress>\n      [242/242 00:20]\n    </div>\n    "},"metadata":{}},{"execution_count":19,"output_type":"execute_result","data":{"text/plain":"{'eval_loss': 0.35260501503944397,\n 'eval_precision': 0.7135248705169549,\n 'eval_recall': 0.8174998600459049,\n 'eval_f1': 0.7619817892457407,\n 'eval_accuracy': 0.9191150061058831,\n 'eval_runtime': 29.7643,\n 'eval_samples_per_second': 259.17,\n 'eval_steps_per_second': 8.131,\n 'epoch': 4.0}"},"metadata":{}}]},{"cell_type":"code","source":"predictions, labels, _ = trainer.predict(tokenized_datasets[\"test\"])\npredictions = np.argmax(predictions, axis=2)\n\n# Remove ignored index (special tokens)\ntrue_predictions = [\n    [label_list[p] for (p, l) in zip(prediction, label) if l != -100]\n    for prediction, label in zip(predictions, labels)\n]\ntrue_labels = [\n    [label_list[l] for (p, l) in zip(prediction, label) if l != -100]\n    for prediction, label in zip(predictions, labels)\n]\n\nresults = metric.compute(predictions=true_predictions, references=true_labels)\nresults","metadata":{"execution":{"iopub.status.busy":"2022-06-15T20:22:16.451538Z","iopub.execute_input":"2022-06-15T20:22:16.451878Z","iopub.status.idle":"2022-06-15T20:22:16.771375Z","shell.execute_reply.started":"2022-06-15T20:22:16.451842Z","shell.execute_reply":"2022-06-15T20:22:16.770256Z"},"trusted":true},"execution_count":20,"outputs":[{"traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)","\u001b[0;32m/tmp/ipykernel_33/929968631.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpredictions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtrainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtokenized_datasets\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"test\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0mpredictions\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpredictions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;31m# Remove ignored index (special tokens)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m true_predictions = [\n","\u001b[0;32m/opt/conda/lib/python3.7/site-packages/datasets/dataset_dict.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, k)\u001b[0m\n\u001b[1;32m     39\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__getitem__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mDataset\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     40\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNamedSplit\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 41\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__getitem__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     42\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     43\u001b[0m             available_suggested_splits = [\n","\u001b[0;31mKeyError\u001b[0m: 'test'"],"ename":"KeyError","evalue":"'test'","output_type":"error"}]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]}]}