{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.7.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"!pip install fasthugs","metadata":{"_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","execution":{"iopub.status.busy":"2022-06-01T21:24:44.399401Z","iopub.execute_input":"2022-06-01T21:24:44.400173Z","iopub.status.idle":"2022-06-01T21:25:04.262429Z","shell.execute_reply.started":"2022-06-01T21:24:44.400077Z","shell.execute_reply":"2022-06-01T21:25:04.261432Z"},"trusted":true},"execution_count":1,"outputs":[{"name":"stdout","text":"Collecting fasthugs\n  Downloading fasthugs-0.0.1-py3-none-any.whl (15 kB)\nRequirement already satisfied: packaging in /opt/conda/lib/python3.7/site-packages (from fasthugs) (21.3)\nRequirement already satisfied: torch>=1.7.1 in /opt/conda/lib/python3.7/site-packages (from fasthugs) (1.11.0)\nRequirement already satisfied: transformers in /opt/conda/lib/python3.7/site-packages (from fasthugs) (4.18.0)\nRequirement already satisfied: datasets in /opt/conda/lib/python3.7/site-packages (from fasthugs) (2.1.0)\nRequirement already satisfied: fastai>=2.2.2 in /opt/conda/lib/python3.7/site-packages (from fasthugs) (2.6.3)\nRequirement already satisfied: pip in /opt/conda/lib/python3.7/site-packages (from fasthugs) (22.1)\nRequirement already satisfied: fastcore in /opt/conda/lib/python3.7/site-packages (from fasthugs) (1.4.3)\nRequirement already satisfied: torchvision>=0.8.2 in /opt/conda/lib/python3.7/site-packages (from fastai>=2.2.2->fasthugs) (0.12.0)\nRequirement already satisfied: matplotlib in /opt/conda/lib/python3.7/site-packages (from fastai>=2.2.2->fasthugs) (3.5.2)\nRequirement already satisfied: fastdownload<2,>=0.0.5 in /opt/conda/lib/python3.7/site-packages (from fastai>=2.2.2->fasthugs) (0.0.6)\nRequirement already satisfied: pillow>6.0.0 in /opt/conda/lib/python3.7/site-packages (from fastai>=2.2.2->fasthugs) (9.1.0)\nRequirement already satisfied: scipy in /opt/conda/lib/python3.7/site-packages (from fastai>=2.2.2->fasthugs) (1.7.3)\nRequirement already satisfied: spacy<4 in /opt/conda/lib/python3.7/site-packages (from fastai>=2.2.2->fasthugs) (3.2.4)\nRequirement already satisfied: fastprogress>=0.2.4 in /opt/conda/lib/python3.7/site-packages (from fastai>=2.2.2->fasthugs) (1.0.2)\nRequirement already satisfied: pyyaml in /opt/conda/lib/python3.7/site-packages (from fastai>=2.2.2->fasthugs) (6.0)\nRequirement already satisfied: requests in /opt/conda/lib/python3.7/site-packages (from fastai>=2.2.2->fasthugs) (2.27.1)\nRequirement already satisfied: scikit-learn in /opt/conda/lib/python3.7/site-packages (from fastai>=2.2.2->fasthugs) (1.0.2)\nRequirement already satisfied: pandas in /opt/conda/lib/python3.7/site-packages (from fastai>=2.2.2->fasthugs) (1.3.5)\nRequirement already satisfied: typing-extensions in /opt/conda/lib/python3.7/site-packages (from torch>=1.7.1->fasthugs) (4.2.0)\nRequirement already satisfied: multiprocess in /opt/conda/lib/python3.7/site-packages (from datasets->fasthugs) (0.70.13)\nRequirement already satisfied: responses<0.19 in /opt/conda/lib/python3.7/site-packages (from datasets->fasthugs) (0.18.0)\nRequirement already satisfied: pyarrow>=5.0.0 in /opt/conda/lib/python3.7/site-packages (from datasets->fasthugs) (5.0.0)\nRequirement already satisfied: aiohttp in /opt/conda/lib/python3.7/site-packages (from datasets->fasthugs) (3.8.1)\nRequirement already satisfied: xxhash in /opt/conda/lib/python3.7/site-packages (from datasets->fasthugs) (3.0.0)\nRequirement already satisfied: importlib-metadata in /opt/conda/lib/python3.7/site-packages (from datasets->fasthugs) (4.11.4)\nRequirement already satisfied: fsspec[http]>=2021.05.0 in /opt/conda/lib/python3.7/site-packages (from datasets->fasthugs) (2022.5.0)\nRequirement already satisfied: huggingface-hub<1.0.0,>=0.1.0 in /opt/conda/lib/python3.7/site-packages (from datasets->fasthugs) (0.5.1)\nRequirement already satisfied: tqdm>=4.62.1 in /opt/conda/lib/python3.7/site-packages (from datasets->fasthugs) (4.64.0)\nRequirement already satisfied: numpy>=1.17 in /opt/conda/lib/python3.7/site-packages (from datasets->fasthugs) (1.21.6)\nRequirement already satisfied: dill in /opt/conda/lib/python3.7/site-packages (from datasets->fasthugs) (0.3.5.1)\nRequirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/lib/python3.7/site-packages (from packaging->fasthugs) (3.0.9)\nRequirement already satisfied: regex!=2019.12.17 in /opt/conda/lib/python3.7/site-packages (from transformers->fasthugs) (2021.11.10)\nRequirement already satisfied: filelock in /opt/conda/lib/python3.7/site-packages (from transformers->fasthugs) (3.6.0)\nRequirement already satisfied: sacremoses in /opt/conda/lib/python3.7/site-packages (from transformers->fasthugs) (0.0.53)\nRequirement already satisfied: tokenizers!=0.11.3,<0.13,>=0.11.1 in /opt/conda/lib/python3.7/site-packages (from transformers->fasthugs) (0.12.1)\nRequirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.7/site-packages (from requests->fastai>=2.2.2->fasthugs) (1.26.9)\nRequirement already satisfied: charset-normalizer~=2.0.0 in /opt/conda/lib/python3.7/site-packages (from requests->fastai>=2.2.2->fasthugs) (2.0.12)\nRequirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.7/site-packages (from requests->fastai>=2.2.2->fasthugs) (3.3)\nRequirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.7/site-packages (from requests->fastai>=2.2.2->fasthugs) (2022.5.18.1)\nRequirement already satisfied: preshed<3.1.0,>=3.0.2 in /opt/conda/lib/python3.7/site-packages (from spacy<4->fastai>=2.2.2->fasthugs) (3.0.6)\nRequirement already satisfied: pydantic!=1.8,!=1.8.1,<1.9.0,>=1.7.4 in /opt/conda/lib/python3.7/site-packages (from spacy<4->fastai>=2.2.2->fasthugs) (1.8.2)\nRequirement already satisfied: jinja2 in /opt/conda/lib/python3.7/site-packages (from spacy<4->fastai>=2.2.2->fasthugs) (3.1.2)\nRequirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /opt/conda/lib/python3.7/site-packages (from spacy<4->fastai>=2.2.2->fasthugs) (1.0.7)\nRequirement already satisfied: click<8.1.0 in /opt/conda/lib/python3.7/site-packages (from spacy<4->fastai>=2.2.2->fasthugs) (8.0.4)\nRequirement already satisfied: langcodes<4.0.0,>=3.2.0 in /opt/conda/lib/python3.7/site-packages (from spacy<4->fastai>=2.2.2->fasthugs) (3.3.0)\nRequirement already satisfied: blis<0.8.0,>=0.4.0 in /opt/conda/lib/python3.7/site-packages (from spacy<4->fastai>=2.2.2->fasthugs) (0.7.7)\nCollecting typing-extensions\n  Downloading typing_extensions-3.10.0.2-py3-none-any.whl (26 kB)\nRequirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /opt/conda/lib/python3.7/site-packages (from spacy<4->fastai>=2.2.2->fasthugs) (1.0.2)\nRequirement already satisfied: setuptools in /opt/conda/lib/python3.7/site-packages (from spacy<4->fastai>=2.2.2->fasthugs) (59.8.0)\nRequirement already satisfied: pathy>=0.3.5 in /opt/conda/lib/python3.7/site-packages (from spacy<4->fastai>=2.2.2->fasthugs) (0.6.1)\nRequirement already satisfied: wasabi<1.1.0,>=0.8.1 in /opt/conda/lib/python3.7/site-packages (from spacy<4->fastai>=2.2.2->fasthugs) (0.9.1)\nRequirement already satisfied: srsly<3.0.0,>=2.4.1 in /opt/conda/lib/python3.7/site-packages (from spacy<4->fastai>=2.2.2->fasthugs) (2.4.3)\nRequirement already satisfied: catalogue<2.1.0,>=2.0.6 in /opt/conda/lib/python3.7/site-packages (from spacy<4->fastai>=2.2.2->fasthugs) (2.0.7)\nRequirement already satisfied: cymem<2.1.0,>=2.0.2 in /opt/conda/lib/python3.7/site-packages (from spacy<4->fastai>=2.2.2->fasthugs) (2.0.6)\nRequirement already satisfied: typer<0.5.0,>=0.3.0 in /opt/conda/lib/python3.7/site-packages (from spacy<4->fastai>=2.2.2->fasthugs) (0.4.1)\nRequirement already satisfied: spacy-legacy<3.1.0,>=3.0.8 in /opt/conda/lib/python3.7/site-packages (from spacy<4->fastai>=2.2.2->fasthugs) (3.0.9)\nRequirement already satisfied: thinc<8.1.0,>=8.0.12 in /opt/conda/lib/python3.7/site-packages (from spacy<4->fastai>=2.2.2->fasthugs) (8.0.16)\nRequirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/lib/python3.7/site-packages (from aiohttp->datasets->fasthugs) (1.7.2)\nRequirement already satisfied: asynctest==0.13.0 in /opt/conda/lib/python3.7/site-packages (from aiohttp->datasets->fasthugs) (0.13.0)\nRequirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.7/site-packages (from aiohttp->datasets->fasthugs) (21.4.0)\nRequirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.7/site-packages (from aiohttp->datasets->fasthugs) (1.2.0)\nRequirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.7/site-packages (from aiohttp->datasets->fasthugs) (6.0.2)\nRequirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /opt/conda/lib/python3.7/site-packages (from aiohttp->datasets->fasthugs) (4.0.2)\nRequirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.7/site-packages (from aiohttp->datasets->fasthugs) (1.3.0)\nRequirement already satisfied: zipp>=0.5 in /opt/conda/lib/python3.7/site-packages (from importlib-metadata->datasets->fasthugs) (3.8.0)\nRequirement already satisfied: cycler>=0.10 in /opt/conda/lib/python3.7/site-packages (from matplotlib->fastai>=2.2.2->fasthugs) (0.11.0)\nRequirement already satisfied: fonttools>=4.22.0 in /opt/conda/lib/python3.7/site-packages (from matplotlib->fastai>=2.2.2->fasthugs) (4.33.3)\nRequirement already satisfied: kiwisolver>=1.0.1 in /opt/conda/lib/python3.7/site-packages (from matplotlib->fastai>=2.2.2->fasthugs) (1.4.2)\nRequirement already satisfied: python-dateutil>=2.7 in /opt/conda/lib/python3.7/site-packages (from matplotlib->fastai>=2.2.2->fasthugs) (2.8.2)\nRequirement already satisfied: pytz>=2017.3 in /opt/conda/lib/python3.7/site-packages (from pandas->fastai>=2.2.2->fasthugs) (2022.1)\nRequirement already satisfied: six in /opt/conda/lib/python3.7/site-packages (from sacremoses->transformers->fasthugs) (1.16.0)\nRequirement already satisfied: joblib in /opt/conda/lib/python3.7/site-packages (from sacremoses->transformers->fasthugs) (1.1.0)\nRequirement already satisfied: threadpoolctl>=2.0.0 in /opt/conda/lib/python3.7/site-packages (from scikit-learn->fastai>=2.2.2->fasthugs) (3.1.0)\nRequirement already satisfied: smart-open<6.0.0,>=5.0.0 in /opt/conda/lib/python3.7/site-packages (from pathy>=0.3.5->spacy<4->fastai>=2.2.2->fasthugs) (5.2.1)\nRequirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.7/site-packages (from jinja2->spacy<4->fastai>=2.2.2->fasthugs) (2.0.1)\nInstalling collected packages: typing-extensions, fasthugs\n  Attempting uninstall: typing-extensions\n    Found existing installation: typing_extensions 4.2.0\n    Uninstalling typing_extensions-4.2.0:\n      Successfully uninstalled typing_extensions-4.2.0\n\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\ntensorflow-io 0.21.0 requires tensorflow-io-gcs-filesystem==0.21.0, which is not installed.\ntensorflow 2.6.4 requires absl-py~=0.10, but you have absl-py 1.0.0 which is incompatible.\ntensorflow 2.6.4 requires numpy~=1.19.2, but you have numpy 1.21.6 which is incompatible.\ntensorflow 2.6.4 requires six~=1.15.0, but you have six 1.16.0 which is incompatible.\ntensorflow 2.6.4 requires wrapt~=1.12.1, but you have wrapt 1.14.1 which is incompatible.\ntensorflow-transform 1.8.0 requires tensorflow!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<2.9,>=1.15.5, but you have tensorflow 2.6.4 which is incompatible.\ntensorflow-serving-api 2.8.0 requires tensorflow<3,>=2.8.0, but you have tensorflow 2.6.4 which is incompatible.\nrich 12.4.4 requires typing-extensions<5.0,>=4.0.0; python_version < \"3.9\", but you have typing-extensions 3.10.0.2 which is incompatible.\npytorch-lightning 1.6.3 requires typing-extensions>=4.0.0, but you have typing-extensions 3.10.0.2 which is incompatible.\npytools 2022.1.9 requires typing-extensions>=4.0; python_version < \"3.11\", but you have typing-extensions 3.10.0.2 which is incompatible.\nflax 0.5.0 requires typing-extensions>=4.1.1, but you have typing-extensions 3.10.0.2 which is incompatible.\nflake8 4.0.1 requires importlib-metadata<4.3; python_version < \"3.8\", but you have importlib-metadata 4.11.4 which is incompatible.\napache-beam 2.38.0 requires dill<0.3.2,>=0.3.1.1, but you have dill 0.3.5.1 which is incompatible.\napache-beam 2.38.0 requires httplib2<0.20.0,>=0.8, but you have httplib2 0.20.4 which is incompatible.\naioitertools 0.10.0 requires typing_extensions>=4.0; python_version < \"3.10\", but you have typing-extensions 3.10.0.2 which is incompatible.\naiobotocore 2.3.2 requires botocore<1.24.22,>=1.24.21, but you have botocore 1.26.7 which is incompatible.\u001b[0m\u001b[31m\n\u001b[0mSuccessfully installed fasthugs-0.0.1 typing-extensions-3.10.0.2\n\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n\u001b[0m","output_type":"stream"}]},{"cell_type":"code","source":"from transformers import AutoModelForSequenceClassification\nfrom fastai.text.all import *\nfrom fastai.callback.wandb import *\n\nfrom fasthugs.learner import TransLearner\nfrom fasthugs.data import TransformersTextBlock, TextGetter, get_splits, PreprocCategoryBlock\n\nfrom datasets import load_dataset, concatenate_datasets\n\nimport random \nimport numpy as np\nimport torch\n\ndef random_seed(seed_value): \n    np.random.seed(seed_value) \n    torch.manual_seed(seed_value)\n    random.seed(seed_value) \n\n\nrandom_seed(42)","metadata":{"execution":{"iopub.status.busy":"2022-06-01T21:25:08.160105Z","iopub.execute_input":"2022-06-01T21:25:08.160477Z","iopub.status.idle":"2022-06-01T21:25:21.537594Z","shell.execute_reply.started":"2022-06-01T21:25:08.160445Z","shell.execute_reply":"2022-06-01T21:25:21.536387Z"},"trusted":true},"execution_count":2,"outputs":[]},{"cell_type":"code","source":"ds_name = 'glue'\nmodel_name = \"AnonymousSub/rule_based_roberta_hier_triplet_epochs_1_shard_1\"\n\nmax_len = 512\nbs = 32\nval_bs = bs*2\n\nlr = 3e-5","metadata":{"execution":{"iopub.status.busy":"2022-06-01T21:25:21.542859Z","iopub.execute_input":"2022-06-01T21:25:21.544150Z","iopub.status.idle":"2022-06-01T21:25:21.552867Z","shell.execute_reply.started":"2022-06-01T21:25:21.544106Z","shell.execute_reply":"2022-06-01T21:25:21.552087Z"},"trusted":true},"execution_count":3,"outputs":[]},{"cell_type":"code","source":"GLUE_TASKS = [\"cola\", \"mnli\", \"mnli-mm\", \"mrpc\", \"qnli\", \"qqp\", \"rte\", \"sst2\", \"stsb\", \"wnli\"]\ndef validate_task():\n    assert task in GLUE_TASKS","metadata":{"execution":{"iopub.status.busy":"2022-06-01T21:25:21.554378Z","iopub.execute_input":"2022-06-01T21:25:21.555071Z","iopub.status.idle":"2022-06-01T21:25:21.563322Z","shell.execute_reply.started":"2022-06-01T21:25:21.555033Z","shell.execute_reply":"2022-06-01T21:25:21.562520Z"},"trusted":true},"execution_count":4,"outputs":[]},{"cell_type":"code","source":"from fastai.metrics import MatthewsCorrCoef, F1Score, PearsonCorrCoef, SpearmanCorrCoef","metadata":{"execution":{"iopub.status.busy":"2022-06-01T21:25:21.565477Z","iopub.execute_input":"2022-06-01T21:25:21.566184Z","iopub.status.idle":"2022-06-01T21:25:21.571624Z","shell.execute_reply.started":"2022-06-01T21:25:21.566146Z","shell.execute_reply":"2022-06-01T21:25:21.570709Z"},"trusted":true},"execution_count":5,"outputs":[]},{"cell_type":"code","source":"glue_metrics = {\n    'cola':[MatthewsCorrCoef()],\n    'sst2':[accuracy],\n    'mrpc':[F1Score(), accuracy],\n    'stsb':[PearsonCorrCoef(), SpearmanCorrCoef()],\n    'qqp' :[F1Score(), accuracy],\n    'mnli':[accuracy],\n    'qnli':[accuracy],\n    'rte' :[accuracy],\n    'wnli':[accuracy],\n}","metadata":{"execution":{"iopub.status.busy":"2022-06-01T21:25:21.573094Z","iopub.execute_input":"2022-06-01T21:25:21.573642Z","iopub.status.idle":"2022-06-01T21:25:21.580649Z","shell.execute_reply.started":"2022-06-01T21:25:21.573593Z","shell.execute_reply":"2022-06-01T21:25:21.579808Z"},"trusted":true},"execution_count":6,"outputs":[]},{"cell_type":"code","source":"task = 'stsb'\nvalidate_task()","metadata":{"execution":{"iopub.status.busy":"2022-06-01T21:25:21.582021Z","iopub.execute_input":"2022-06-01T21:25:21.582649Z","iopub.status.idle":"2022-06-01T21:25:21.589481Z","shell.execute_reply.started":"2022-06-01T21:25:21.582481Z","shell.execute_reply":"2022-06-01T21:25:21.587947Z"},"trusted":true},"execution_count":7,"outputs":[]},{"cell_type":"code","source":"ds = load_dataset(ds_name, task)","metadata":{"execution":{"iopub.status.busy":"2022-06-01T21:25:21.590849Z","iopub.execute_input":"2022-06-01T21:25:21.592800Z","iopub.status.idle":"2022-06-01T21:25:25.077980Z","shell.execute_reply.started":"2022-06-01T21:25:21.592762Z","shell.execute_reply":"2022-06-01T21:25:25.077183Z"},"trusted":true},"execution_count":8,"outputs":[{"output_type":"display_data","data":{"text/plain":"Downloading builder script:   0%|          | 0.00/7.78k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"a6bbcbc00ac9413eba8f4c03e0c82eb2"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Downloading metadata:   0%|          | 0.00/4.47k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"12cc310d7b074aef960fc688f8d6291d"}},"metadata":{}},{"name":"stdout","text":"Downloading and preparing dataset glue/stsb (download: 784.05 KiB, generated: 1.09 MiB, post-processed: Unknown size, total: 1.86 MiB) to /root/.cache/huggingface/datasets/glue/stsb/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad...\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"Downloading data:   0%|          | 0.00/803k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"5f5bf85427de4ceb87a2d999f69dda2e"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Generating train split:   0%|          | 0/5749 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Generating validation split:   0%|          | 0/1500 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Generating test split:   0%|          | 0/1379 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"name":"stdout","text":"Dataset glue downloaded and prepared to /root/.cache/huggingface/datasets/glue/stsb/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad. Subsequent calls will reuse this data.\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"  0%|          | 0/3 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"7b1bda8ca3ff49e58c75754810959ee9"}},"metadata":{}}]},{"cell_type":"code","source":"ds.keys()","metadata":{"execution":{"iopub.status.busy":"2022-06-01T21:25:25.081984Z","iopub.execute_input":"2022-06-01T21:25:25.084144Z","iopub.status.idle":"2022-06-01T21:25:25.095076Z","shell.execute_reply.started":"2022-06-01T21:25:25.084104Z","shell.execute_reply":"2022-06-01T21:25:25.094328Z"},"trusted":true},"execution_count":9,"outputs":[{"execution_count":9,"output_type":"execute_result","data":{"text/plain":"dict_keys(['train', 'validation', 'test'])"},"metadata":{}}]},{"cell_type":"code","source":"len(ds['train']), len(ds['validation'])","metadata":{"execution":{"iopub.status.busy":"2022-06-01T21:25:25.099592Z","iopub.execute_input":"2022-06-01T21:25:25.101963Z","iopub.status.idle":"2022-06-01T21:25:25.111623Z","shell.execute_reply.started":"2022-06-01T21:25:25.101910Z","shell.execute_reply":"2022-06-01T21:25:25.110732Z"},"trusted":true},"execution_count":10,"outputs":[{"execution_count":10,"output_type":"execute_result","data":{"text/plain":"(5749, 1500)"},"metadata":{}}]},{"cell_type":"code","source":"train_idx, valid_idx = get_splits(ds)\nvalid_idx","metadata":{"execution":{"iopub.status.busy":"2022-06-01T21:25:25.117716Z","iopub.execute_input":"2022-06-01T21:25:25.119927Z","iopub.status.idle":"2022-06-01T21:25:25.129768Z","shell.execute_reply.started":"2022-06-01T21:25:25.119892Z","shell.execute_reply":"2022-06-01T21:25:25.128874Z"},"trusted":true},"execution_count":11,"outputs":[{"execution_count":11,"output_type":"execute_result","data":{"text/plain":"(#1500) [5749,5750,5751,5752,5753,5754,5755,5756,5757,5758...]"},"metadata":{}}]},{"cell_type":"code","source":"train_ds = concatenate_datasets([ds['train'], ds['validation']])","metadata":{"execution":{"iopub.status.busy":"2022-06-01T21:25:25.131537Z","iopub.execute_input":"2022-06-01T21:25:25.136140Z","iopub.status.idle":"2022-06-01T21:25:25.147777Z","shell.execute_reply.started":"2022-06-01T21:25:25.136100Z","shell.execute_reply":"2022-06-01T21:25:25.147083Z"},"trusted":true},"execution_count":12,"outputs":[]},{"cell_type":"code","source":"train_ds[0]","metadata":{"execution":{"iopub.status.busy":"2022-06-01T21:25:25.151655Z","iopub.execute_input":"2022-06-01T21:25:25.153163Z","iopub.status.idle":"2022-06-01T21:25:25.163321Z","shell.execute_reply.started":"2022-06-01T21:25:25.153128Z","shell.execute_reply":"2022-06-01T21:25:25.162446Z"},"trusted":true},"execution_count":13,"outputs":[{"execution_count":13,"output_type":"execute_result","data":{"text/plain":"{'sentence1': 'A plane is taking off.',\n 'sentence2': 'An air plane is taking off.',\n 'label': 5.0,\n 'idx': 0}"},"metadata":{}}]},{"cell_type":"code","source":"dblock = DataBlock(blocks = [TransformersTextBlock(pretrained_model_name=model_name), RegressionBlock(1)],\n                   get_x=TextGetter('sentence1', 'sentence2'),\n                   get_y=ItemGetter('label'),\n                   splitter=IndexSplitter(valid_idx))","metadata":{"execution":{"iopub.status.busy":"2022-06-01T21:25:25.164779Z","iopub.execute_input":"2022-06-01T21:25:25.168065Z","iopub.status.idle":"2022-06-01T21:25:31.049376Z","shell.execute_reply.started":"2022-06-01T21:25:25.168030Z","shell.execute_reply":"2022-06-01T21:25:31.048504Z"},"trusted":true},"execution_count":14,"outputs":[{"output_type":"display_data","data":{"text/plain":"Downloading:   0%|          | 0.00/384 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"a3659067b3c542c0864874bc7c4cc1dd"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Downloading:   0%|          | 0.00/780k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"46e520e456b5488cb1b8813eb02f16dc"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"a3dfd0883f424657a286cffc53682aa9"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Downloading:   0%|          | 0.00/1.29M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"e3561d77044e42d9b3ea74e49702c0df"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Downloading:   0%|          | 0.00/239 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"5d62790b064c49d089af8ccdf0821422"}},"metadata":{}}]},{"cell_type":"code","source":"%%time\ndls = dblock.dataloaders(train_ds, bs=bs, val_bs=val_bs)","metadata":{"execution":{"iopub.status.busy":"2022-06-01T21:25:31.050781Z","iopub.execute_input":"2022-06-01T21:25:31.052455Z","iopub.status.idle":"2022-06-01T21:25:43.129211Z","shell.execute_reply.started":"2022-06-01T21:25:31.052413Z","shell.execute_reply":"2022-06-01T21:25:43.128281Z"},"trusted":true},"execution_count":15,"outputs":[{"name":"stdout","text":"CPU times: user 6.91 s, sys: 1.43 s, total: 8.35 s\nWall time: 12.1 s\n","output_type":"stream"}]},{"cell_type":"code","source":"dls.show_batch(max_n=5)","metadata":{"execution":{"iopub.status.busy":"2022-06-01T21:25:43.130526Z","iopub.execute_input":"2022-06-01T21:25:43.131407Z","iopub.status.idle":"2022-06-01T21:25:43.196629Z","shell.execute_reply.started":"2022-06-01T21:25:43.131368Z","shell.execute_reply":"2022-06-01T21:25:43.195808Z"},"trusted":true},"execution_count":16,"outputs":[{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>text</th>\n      <th>text_</th>\n      <th>text__</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>A plane is taking off.</td>\n      <td>An air plane is taking off.</td>\n      <td>5.0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Sony Xperia Z2 and Nokia‚Äôs X series unveiled</td>\n      <td>Court orders political ban on Italy‚Äôs Berlusconi</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>He's not wealthy because he's successful.</td>\n      <td>Why hate people because they are successful?</td>\n      <td>0.800000011920929</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Three more US soldiers killed in Afghanistan</td>\n      <td>NATO Soldier Killed in Afghanistan</td>\n      <td>1.7999999523162842</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>The songs are on offer for 99 cents each, or $9.99 for an album.</td>\n      <td>The company will offer songs for 99 cents and albums for $9.95.</td>\n      <td>3.3329999446868896</td>\n    </tr>\n  </tbody>\n</table>"},"metadata":{}}]},{"cell_type":"code","source":"import wandb\n\nWANDB_NAME = f'{ds_name}-{task}-{model_name}'\nGROUP = f'{ds_name}-{task}-{model_name}-{lr:.0e}'\nNOTES = f'finetuning {model_name} with RAdam lr={lr:.0e}'\nCONFIG = {}\nTAGS =[model_name, ds_name, 'radam']","metadata":{"execution":{"iopub.status.busy":"2022-06-01T21:25:43.199483Z","iopub.execute_input":"2022-06-01T21:25:43.199763Z","iopub.status.idle":"2022-06-01T21:25:43.204463Z","shell.execute_reply.started":"2022-06-01T21:25:43.199737Z","shell.execute_reply":"2022-06-01T21:25:43.203569Z"},"trusted":true},"execution_count":17,"outputs":[]},{"cell_type":"code","source":"wandb.init(reinit=True, project=\"fasthugs\", entity=\"fastai_community\",\n           name=WANDB_NAME, group=GROUP, notes=NOTES, tags=TAGS, config=CONFIG);","metadata":{"execution":{"iopub.status.busy":"2022-06-01T21:25:43.206085Z","iopub.execute_input":"2022-06-01T21:25:43.206697Z","iopub.status.idle":"2022-06-01T21:26:15.035556Z","shell.execute_reply.started":"2022-06-01T21:25:43.206662Z","shell.execute_reply":"2022-06-01T21:26:15.034781Z"},"trusted":true},"execution_count":18,"outputs":[{"name":"stderr","text":"\u001b[34m\u001b[1mwandb\u001b[0m: You can find your API key in your browser here: https://wandb.ai/authorize\n","output_type":"stream"},{"output_type":"stream","name":"stdin","text":"\u001b[34m\u001b[1mwandb\u001b[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:  ········································\n"},{"name":"stderr","text":"\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n","output_type":"stream"},{"name":"stdout","text":"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"wandb version 0.12.17 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Tracking run with wandb version 0.12.16"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Run data is saved locally in <code>/kaggle/working/wandb/run-20220601_212610-11cbvz5r</code>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Syncing run <strong><a href=\"https://wandb.ai/fastai_community/fasthugs/runs/11cbvz5r\" target=\"_blank\">glue-stsb-AnonymousSub/rule_based_roberta_hier_triplet_epochs_1_shard_1</a></strong> to <a href=\"https://wandb.ai/fastai_community/fasthugs\" target=\"_blank\">Weights & Biases</a> (<a href=\"https://wandb.me/run\" target=\"_blank\">docs</a>)<br/>"},"metadata":{}}]},{"cell_type":"code","source":"model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1)\nmetrics = glue_metrics[task]\nlearn = TransLearner(dls, model, metrics=metrics).to_fp16()","metadata":{"execution":{"iopub.status.busy":"2022-06-01T21:26:15.039117Z","iopub.execute_input":"2022-06-01T21:26:15.039388Z","iopub.status.idle":"2022-06-01T21:26:42.806555Z","shell.execute_reply.started":"2022-06-01T21:26:15.039361Z","shell.execute_reply":"2022-06-01T21:26:42.805784Z"},"trusted":true},"execution_count":19,"outputs":[{"output_type":"display_data","data":{"text/plain":"Downloading:   0%|          | 0.00/723 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"27da967e3f044324ad9ca66de057735d"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Downloading:   0%|          | 0.00/476M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"3d1917373a864169b6f57211eaa2faf5"}},"metadata":{}},{"name":"stderr","text":"Some weights of the model checkpoint at AnonymousSub/rule_based_roberta_hier_triplet_epochs_1_shard_1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.bias', 'pooler.dense.weight']\n- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\nSome weights of RobertaForSequenceClassification were not initialized from the model checkpoint at AnonymousSub/rule_based_roberta_hier_triplet_epochs_1_shard_1 and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.out_proj.bias']\nYou should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n","output_type":"stream"}]},{"cell_type":"code","source":"cbs = []\nlearn.fit_one_cycle(10, lr, cbs=cbs)","metadata":{"execution":{"iopub.status.busy":"2022-06-01T21:26:42.807691Z","iopub.execute_input":"2022-06-01T21:26:42.808497Z","iopub.status.idle":"2022-06-01T21:35:19.993434Z","shell.execute_reply.started":"2022-06-01T21:26:42.808460Z","shell.execute_reply":"2022-06-01T21:35:19.992129Z"},"trusted":true},"execution_count":20,"outputs":[{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"\n<style>\n    /* Turns off some styling */\n    progress {\n        /* gets rid of default border in Firefox and Opera. */\n        border: none;\n        /* Needs to be in here for Safari polyfill so background images work as expected. */\n        background-size: auto;\n    }\n    .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n        background: #F44336;\n    }\n</style>\n"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: left;\">\n      <th>epoch</th>\n      <th>train_loss</th>\n      <th>valid_loss</th>\n      <th>pearsonr</th>\n      <th>spearmanr</th>\n      <th>time</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>0</td>\n      <td>1.818572</td>\n      <td>0.679656</td>\n      <td>0.850310</td>\n      <td>0.847685</td>\n      <td>00:49</td>\n    </tr>\n    <tr>\n      <td>1</td>\n      <td>0.568844</td>\n      <td>0.464313</td>\n      <td>0.894114</td>\n      <td>0.894649</td>\n      <td>00:52</td>\n    </tr>\n    <tr>\n      <td>2</td>\n      <td>0.389603</td>\n      <td>0.453627</td>\n      <td>0.904373</td>\n      <td>0.903994</td>\n      <td>00:52</td>\n    </tr>\n    <tr>\n      <td>3</td>\n      <td>0.279905</td>\n      <td>0.492652</td>\n      <td>0.905581</td>\n      <td>0.904163</td>\n      <td>00:51</td>\n    </tr>\n    <tr>\n      <td>4</td>\n      <td>0.205784</td>\n      <td>0.505553</td>\n      <td>0.901399</td>\n      <td>0.898220</td>\n      <td>00:52</td>\n    </tr>\n    <tr>\n      <td>5</td>\n      <td>0.173306</td>\n      <td>0.473948</td>\n      <td>0.909177</td>\n      <td>0.906299</td>\n      <td>00:51</td>\n    </tr>\n    <tr>\n      <td>6</td>\n      <td>0.126259</td>\n      <td>0.422385</td>\n      <td>0.906482</td>\n      <td>0.904919</td>\n      <td>00:52</td>\n    </tr>\n    <tr>\n      <td>7</td>\n      <td>0.105504</td>\n      <td>0.439509</td>\n      <td>0.909816</td>\n      <td>0.906624</td>\n      <td>00:51</td>\n    </tr>\n    <tr>\n      <td>8</td>\n      <td>0.083578</td>\n      <td>0.419202</td>\n      <td>0.909292</td>\n      <td>0.906407</td>\n      <td>00:52</td>\n    </tr>\n    <tr>\n      <td>9</td>\n      <td>0.082968</td>\n      <td>0.426244</td>\n      <td>0.909239</td>\n      <td>0.906327</td>\n      <td>00:52</td>\n    </tr>\n  </tbody>\n</table>"},"metadata":{}},{"name":"stdout","text":"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\nhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n","output_type":"stream"}]},{"cell_type":"code","source":"learn.show_results()","metadata":{"execution":{"iopub.status.busy":"2022-06-01T21:35:19.998370Z","iopub.execute_input":"2022-06-01T21:35:20.001339Z","iopub.status.idle":"2022-06-01T21:35:20.364138Z","shell.execute_reply.started":"2022-06-01T21:35:20.001288Z","shell.execute_reply":"2022-06-01T21:35:20.363096Z"},"trusted":true},"execution_count":21,"outputs":[{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"\n<style>\n    /* Turns off some styling */\n    progress {\n        /* gets rid of default border in Firefox and Opera. */\n        border: none;\n        /* Needs to be in here for Safari polyfill so background images work as expected. */\n        background-size: auto;\n    }\n    .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n        background: #F44336;\n    }\n</style>\n"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":""},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>text</th>\n      <th>text_</th>\n      <th>text__</th>\n      <th>text___</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>A man with a hard hat is dancing.</td>\n      <td>A man wearing a hard hat is dancing.</td>\n      <td>5.0</td>\n      <td>(5.20703125,)</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>The woman felt threatened and went to the magistrate's office, police said.</td>\n      <td>The woman reported that she felt threatened and obtained a warrant for Stackhouse's arrest from the local magistrate's office.</td>\n      <td>3.25</td>\n      <td>(3.505859375,)</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>We always put it crushed up in a bit of butter or on a sardine.</td>\n      <td>We had a cat that required daily medication, and it was a struggle to give the cat a pill.</td>\n      <td>1.399999976158142</td>\n      <td>(1.61328125,)</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Aristotle didn't put the world at the center of the universe per se.</td>\n      <td>Why did Aristotle place the earth at the centre of an infinite universe?</td>\n      <td>2.799999952316284</td>\n      <td>(2.1796875,)</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Webster's New World Rhyming Dictionary: Clement Wood's Updated This is the rhyming dictionary I turn to first.</td>\n      <td>http://www.rhymezone.com/ You just type in a word, then select one of the following.</td>\n      <td>1.2000000476837158</td>\n      <td>(2.625,)</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Indian police round up all five suspects in Mumbai rape case</td>\n      <td>Mumbai police arrest fifth suspect in gang-rape case</td>\n      <td>3.799999952316284</td>\n      <td>(4.52734375,)</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>The puppy is outdoor.</td>\n      <td>A man in printed board shorts is doing a yoga pose on the beach.</td>\n      <td>0.0</td>\n      <td>(0.0799560546875,)</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>A person is scaling a rock wall.</td>\n      <td>A person and a horse are above a fence.</td>\n      <td>0.20000000298023224</td>\n      <td>(0.625,)</td>\n    </tr>\n    <tr>\n      <th>8</th>\n      <td>My experience with Danios has that they always have been \"bossy\" fish.</td>\n      <td>Breed Zebra Danios are extremely hardy fish, they are almost difficult to kill.</td>\n      <td>3.0</td>\n      <td>(2.65234375,)</td>\n    </tr>\n  </tbody>\n</table>"},"metadata":{}}]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]}]}