{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "True\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "flag = torch.cuda.is_available()\n",
    "print(flag)\n",
    "\n",
    "ngpu= 1\n",
    "# Decide which device we want to run on \n",
    "device = torch.device(\"cuda:0\" if (torch.cuda.is_available() and ngpu > 0) else \"cpu\")\n",
    "torch.cuda.set_device(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch \n",
    "from torch.utils import data # 获取迭代数据\n",
    "from torch.autograd import Variable # 获取变量\n",
    "import torchvision\n",
    "from torchvision.datasets import mnist # 获取数据集\n",
    "import matplotlib.pyplot as plt\n",
    "from torch.utils.data import Dataset, DataLoader\n",
    "from torch.autograd import Variable\n",
    "from torch import optim\n",
    "from torch import nn\n",
    "import json\n",
    "import numpy as np\n",
    "import string\n",
    "import os\n",
    "from tqdm import tqdm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "class qa_train_context_dataset(Dataset):\n",
    "    \n",
    "        \n",
    "    def graph_refine_data( self,case ):\n",
    "        graph_context = []\n",
    "    \n",
    "        for paragh in case[\"context\"]:\n",
    "            qu_graph = self.float_to_int(self.str_to_int(case['srl_question_to_graph']))\n",
    "            sub_context = []\n",
    "      \n",
    "            for sent_idx,sent_value in enumerate(paragh):\n",
    "                #print(\"s_v:\",sent_value[5])\n",
    "           \n",
    "                sub_context.append([self.float_to_int(self.str_to_int(sent_value[0])),   \n",
    "                                    self.float_to_int(self.str_ar_to_int(sent_value[1])),\n",
    "                                    self.float_to_int(self.str_ar_to_int(sent_value[2])),\n",
    "                                    self.float_to_int(self.str_to_int(sent_value[3])),   \n",
    "                                    self.float_to_int(self.str_to_int(sent_value[4])),\n",
    "                                    self.true_1_false_0(sent_value[5]),\n",
    "                                     ])\n",
    "            \n",
    "            graph_context.append(sub_context)\n",
    "    \n",
    "        return dict([\n",
    "                    (\"_id\",case[\"_id\"]),\n",
    "                    #(\"answer\",case['answer']),\n",
    "                    #(\"question\", case['question']),\n",
    "                    (\"qu_graph\", qu_graph),\n",
    "                    #(\"srl_question\", qu_sent_dict_to_graph(qu_sent_dict),srl_parse_sent(predictor_srl,case['question']) ),\n",
    "                    #(\"supporting_facts\",case[\"supporting_facts\"]),\n",
    "                    (\"context\",graph_context),\n",
    "                    (\"type\",case[\"type\"]),\n",
    "                    (\"level\",case[\"level\"]), \n",
    "                    ])\n",
    "    \n",
    "    \n",
    "    def example_to_sentlist(self,case,example_id):\n",
    "        sent_graph_list = []\n",
    "        \n",
    "        qu_graph = case['qu_graph']\n",
    "        context0 = case['context'][0]\n",
    "        context1 = case['context'][1]\n",
    "        art_id = case[\"_id\"]\n",
    "        #print(\"c0_n:\",len(context0))\n",
    "        #print(\"c1_n:\",len(context1))\n",
    "        context0.extend(context1)\n",
    "        #print(\"c0_u_n:\",len(context0))\n",
    "        #print(context0[0][5])\n",
    "\n",
    "            #print(qu_graph)\n",
    "        \n",
    "#flow_power_start=======================================================================        \n",
    "#         core_article_infor_list = []\n",
    "#         core_qu_infor_list = []\n",
    "        verb_num = 4\n",
    "        srl_label_num = 25\n",
    "        sent_size = srl_label_num*verb_num\n",
    "        sent_num = 5\n",
    "        qu_graph = qu_graph.float().reshape(1,1,sent_size)\n",
    "#         flow_once_qu_article_power_list = []\n",
    "        \n",
    "#         for con_value in context0 :\n",
    "#             #con_value[1] = con_value[1].float().reshape(1,1,75,5).unsqueeze(0).unsqueeze(0).cuda()\n",
    "            \n",
    "#             core_article_infor = con_value[1].float().reshape(1,sent_num,sent_size)\n",
    "#             core_qu_infor = con_value[4].float().reshape(1,1,sent_size)\n",
    "#             flow_once_qu_article_power_list.append(\n",
    "#                 self.article_value(core_article_infor,sent_num,sent_size).mul(\n",
    "#                 self.pad_core_qu_info(core_qu_infor,sent_num,sent_size)))\n",
    "#             #print(self.article_value(core_article_infor,sent_num,sent_size).reshape(10,100))\n",
    "#             #print(self.pad_core_qu_info(core_qu_infor,sent_num,sent_size).reshape(10,100))\n",
    "#         flow_once_qu_article_power_graph = self.sum_flow_article_qu_power(flow_once_qu_article_power_list)\n",
    "#         #print(flow_once_qu_article_power_graph.reshape(10,100))\n",
    "#         flow_once_core_list = self.divide_article_to_sent(flow_once_qu_article_power_graph,\n",
    "#                                                          sent_num,sent_size)\n",
    "        \n",
    "#         #flow_twice==========================================================================================================\n",
    "#         flow_twice_qu_article_power_list = []\n",
    "        \n",
    "#         sent_id = 0\n",
    "#         for con_value in context0 :\n",
    "#             #con_value[1] = con_value[1].float().reshape(1,1,75,5).unsqueeze(0).unsqueeze(0).cuda()\n",
    "            \n",
    "#             core_article_infor = con_value[1].float().reshape(1,sent_num,sent_size)\n",
    "            \n",
    "#             #print(flow_once_qu_article_power_list[sent_id].size())\n",
    "#             flow_twice_qu_article_power_list.append(\n",
    "#                 self.article_value(core_article_infor,sent_num,sent_size).mul(\n",
    "#                 self.pad_core_qu_info(flow_once_core_list[sent_id],sent_num,sent_size)))\n",
    "#             sent_id = sent_id + 1 \n",
    "            \n",
    "#         flow_twice_qu_article_power_graph = self.sum_flow_article_qu_power(flow_twice_qu_article_power_list)\n",
    "#         flow_twice_core_list = self.divide_article_to_sent(flow_twice_qu_article_power_graph,\n",
    "#                                                          sent_num,sent_size)\n",
    "        \n",
    "        #flow_once_core_list\n",
    "#flow_power_end=======================================================================               \n",
    "            \n",
    "           \n",
    "        \n",
    "        core_sent_id = -1\n",
    "        for con_value in context0 :\n",
    "            core_sent_id = core_sent_id + 1\n",
    "            #con_value[1] = con_value[1].float().reshape(1,1,75,5).unsqueeze(0).unsqueeze(0).cuda()\n",
    "            core_graph = con_value[0].float().reshape(1,1,sent_size)\n",
    "            core_article_infor = con_value[1].float().reshape(1,sent_num,sent_size)\n",
    "            core_article_power = con_value[2].float().reshape(1,sent_num,sent_size)\n",
    "            core_qu_infor = con_value[4].float().reshape(1,1,sent_size)\n",
    "            core_qu_power = con_value[3].float().reshape(1,1,sent_size)\n",
    "            #core_qu_infor = torch.cat((con_value[0],con_value[3]),0).reshape(1,2,75)\n",
    "            #core_qu_power = torch.cat((con_value[0],con_value[4]),0).reshape(1,2,75)\n",
    "            #qu_core_infor = torch.cat((qu_graph,con_value[4]),0).reshape(1,2,75)\n",
    "            #qu_core_power = torch.cat((qu_graph,con_value[3]),0).reshape(1,2,75)\n",
    "\n",
    "            con_value[5] = con_value[5].float()\n",
    "            label = con_value[5]\n",
    "            #print(\"example_id_N:\",example_id)\n",
    "#                                  core_sent_dict_to_graph(core_sent_dict) , \n",
    "#                                  core_graph_data_core_other(para,core_sent_dict) , \n",
    "#                                  core_graph_data_other(para,core_sent_dict),\n",
    "#                                  core_graph_data_qu(core_sent_dict,qu_sent_dict),#core为骨架\n",
    "#                                  qu_graph_data_core(core_sent_dict,qu_sent_dict),#qu为骨架\n",
    "            \n",
    "            merge_sum_sent_qu = []\n",
    "            merge_sum_sent_other = []\n",
    "            \n",
    "            article_sum = []\n",
    "            qu_article_info_sum = []\n",
    "            qu_article_power_sum = []\n",
    "            other_sent_id = 0\n",
    "            for other_sent_value in context0 :\n",
    "                other_sent_value_self = other_sent_value[0].float().reshape(1,1,sent_size)\n",
    "                other_sent_value_cq = other_sent_value[3].float().reshape(1,1,sent_size)\n",
    "                other_sent_value_qc = other_sent_value[4].float().reshape(1,1,sent_size)\n",
    "                core_article_power_list = core_article_power.reshape(sent_num,sent_size)\n",
    "                c_o_sent_value = core_article_power_list[other_sent_id]\n",
    "                \n",
    "                #merge_sent_qu_cosent = torch.cat((qu_graph,other_sent_value_qc + core_qu_power),0).reshape(1,2,100)#c代表core,o代表other\n",
    "                #merge_sent_o_cqusent = torch.cat((other_sent_value_self,other_sent_value_cq+c_o_sent_value),0).reshape(1,2,100)\n",
    "               \n",
    "                #merge_sum_sent_qu.extend(merge_sent_qu_cosent)\n",
    "                #merge_sum_sent_other.extend(merge_sent_o_cqusent)\n",
    "                article_sum.extend(other_sent_value_self)\n",
    "                qu_article_info_sum.extend(other_sent_value_cq)\n",
    "                qu_article_power_sum.extend(other_sent_value_qc)\n",
    "                \n",
    "                other_sent_id = other_sent_id + 1\n",
    "                \n",
    "                \n",
    "            sent_graph_list.append([core_article_infor,\n",
    "                                   core_article_power,\n",
    "                                   core_qu_infor,\n",
    "                                   core_qu_power,\n",
    "                                   qu_graph,\n",
    "                                   core_graph,\n",
    "                                   label,\n",
    "                                   example_id,\n",
    "                                   self.merge_list_to_graph(self.padding_sum_graph(sent_num,article_sum,sent_size)),\n",
    "                                   self.merge_list_to_graph(self.padding_sum_graph(sent_num,qu_article_info_sum,sent_size)),\n",
    "                                   self.merge_list_to_graph(self.padding_sum_graph(sent_num,qu_article_power_sum,sent_size)),\n",
    "                                   art_id,\n",
    "#                                    flow_once_qu_article_power_graph.reshape(1,sent_num,sent_size),\n",
    "#                                    flow_once_core_list[core_sent_id].reshape(1,1,sent_size),\n",
    "#                                    flow_twice_qu_article_power_graph.reshape(1,sent_num,sent_size),\n",
    "#                                    flow_twice_core_list[core_sent_id].reshape(1,1,sent_size),\n",
    "                                   ])\n",
    "        return sent_graph_list\n",
    "        \n",
    "        \n",
    "    \n",
    "                \n",
    "        \n",
    "#flow_power_tools_start==============================================================================       \n",
    "#查看core_article_info哪一行有值,有值的一行的值置为1，没有出现core_sent元素的句子的值置为0\n",
    "    def article_value(self,core_article_info,sent_num,sent_size):\n",
    "        a = core_article_info.reshape(sent_num*sent_size).int()\n",
    "        contant = sent_size\n",
    "        i = 0\n",
    "        b = []\n",
    "        #graph_value = null\n",
    "\n",
    "        for i in range(sent_num):\n",
    "            b.append(sum(a[i*contant:i*contant+contant]))\n",
    "\n",
    "        token = 0\n",
    "        pad_0 = torch.zeros(sent_size)\n",
    "        pad_1 = torch.ones(sent_size)\n",
    "        for value in b:\n",
    "            if value == 0 and token == 0:\n",
    "                graph_value = pad_0\n",
    "                token = token+1\n",
    "                continue\n",
    "            if value >= 1 and token == 0:\n",
    "                graph_value = pad_1\n",
    "                token = token+1\n",
    "                continue\n",
    "            if value == 0:\n",
    "                graph_value = torch.cat((graph_value,pad_0),0)\n",
    "            if value >= 1:\n",
    "                graph_value = torch.cat((graph_value,pad_1),0)\n",
    "\n",
    "\n",
    "        return graph_value.int()\n",
    "\n",
    "    #将core_qu_info填充为图片大小  相对于qu_core_power\n",
    "    def pad_core_qu_info(self,core_qu_info,sent_num,sent_size):\n",
    "        a = core_qu_info.reshape(sent_size)\n",
    "        pad_graph = a\n",
    "        for i in range(sent_num-1):\n",
    "            pad_graph = torch.cat((pad_graph,a),0)\n",
    "        return pad_graph.int()\n",
    " \n",
    "\n",
    " #    def core_article_qu_power_graph_list():\n",
    "#         core_article_qu_power_graph_list = []\n",
    "\n",
    "#         for case in data:\n",
    "#             core_article_qu_power_graph_list.append(article_value(case['']).mual(pad_core_qu_power(case[''])))\n",
    "\n",
    "\n",
    "\n",
    "#         return core_article_qu_info_graph_list\n",
    "#累加每个句子流动后的qu_power值\n",
    "\n",
    "    def sum_flow_article_qu_power(self,core_article_qu_power_list):\n",
    "        token_id = 0\n",
    "\n",
    "        for case in core_article_qu_power_list:\n",
    "            if token_id == 0:\n",
    "                sum_article_qu_power_graph = case\n",
    "                token_id = token_id + 1\n",
    "                continue\n",
    "            sum_article_qu_power_graph = sum_article_qu_power_graph + case\n",
    "            #print(sum_article_qu_power_graph.reshape(10,100))\n",
    "        pos_id = 0    \n",
    "        for value in sum_article_qu_power_graph:\n",
    "            \n",
    "            if value >= 1:\n",
    "                #print(value)\n",
    "                sum_article_qu_power_graph[pos_id] = 1\n",
    "            pos_id = pos_id + 1\n",
    "        return sum_article_qu_power_graph\n",
    "\n",
    "#将flow_power_qu_article_power,分配给对应的句子\n",
    "    def divide_article_to_sent(self,sum_article_qu_power_graph,sent_num,sent_size):\n",
    "        a = sum_article_qu_power_graph.reshape(sent_num*sent_size).int()\n",
    "        contant = sent_size\n",
    "        for value in a:\n",
    "            if value >= 1:\n",
    "                a[value] = 1\n",
    "\n",
    "\n",
    "        flow_once_core_qu_power_list = []\n",
    "        contant = sent_size\n",
    "        for i in range(sent_num):\n",
    "            flow_once_core_qu_power_list.append(a[i*contant:i*contant+contant])\n",
    "\n",
    "\n",
    "\n",
    "        return flow_once_core_qu_power_list\n",
    "\n",
    "\n",
    "#flow_power_tools_end==============================================================================                    \n",
    "                \n",
    "    \n",
    "    \n",
    "    def __init__(self,file_name,transform = None):\n",
    "        self.file_name = file_name\n",
    "        self.transform = transform #变换\n",
    "        with open(file_name, \"r\", encoding='utf-8') as reader:\n",
    "            orig_data = json.load(reader)\n",
    "            print(\"Load ok\")\n",
    "            #orig_data = orig_data[0:2]\n",
    "        self.orig_data = orig_data #目录里的所有文件\n",
    "        self.srl_data = []\n",
    "        self.sum_sent_graph_list = []\n",
    "        for article in tqdm(orig_data):\n",
    "            #print(\"article:\",article)\n",
    "            self.srl_data.append( self.graph_refine_data(article) )\n",
    "         \n",
    "        example_id = 0\n",
    "        for case in tqdm(self.srl_data) :\n",
    "            #print(\"example_id:\",example_id)\n",
    "            self.sum_sent_graph_list.extend(self.example_to_sentlist(case,example_id)) #这里需要用extend而不是append\n",
    "            example_id = example_id + 1\n",
    "            \n",
    "            \n",
    "        #print(\"s:\",self.srl_data[0]['context'][0])\n",
    "        #print(\"sum:\",self.sum_sent_graph_list[0][6])\n",
    "        #print(\"sum:\",self.sum_sent_graph_list[0][7])\n",
    "        #print(\"sum:\",len(self.sum_sent_graph_list))\n",
    "    \n",
    "        \n",
    "    def __len__(self):#返回整个数据集的大小\n",
    "        return len(self.sum_sent_graph_list)    #返回的需要训练的数据集大小而不是原数据集的大小\n",
    "    \n",
    "    \n",
    "    def str_to_int(self,data_str):\n",
    "        data_str_list = data_str.split()\n",
    "        b_data = [ int(i) for i in data_str_list ]\n",
    "        \n",
    "        return b_data\n",
    "    \n",
    "    def str_ar_to_int(self,data_str):\n",
    "    \n",
    "    \n",
    "        data_str_list = data_str.replace(\"[\",' ').replace(\"]\",' ').replace(\",\",' ').split()\n",
    "    #print(data_str_list)\n",
    "        b_data = [ int(i) for i in data_str_list ]\n",
    "    #for s_idx,s_data in data_str_list:\n",
    "        #data_str_list[s_idx]=int(s_data)\n",
    "        \n",
    "        \n",
    "        return b_data\n",
    "    \n",
    "    def float_to_int(self,data):\n",
    "        return torch.Tensor(data).int()\n",
    "    \n",
    "    def true_1_false_0(self,data):\n",
    "        if data == True :\n",
    "            return torch.tensor(1)\n",
    "        if data == False :\n",
    "            return torch.tensor(0)\n",
    "    def merge_list_to_graph(self,merge_sum_sent_list):\n",
    "        merge_list_num = 0    \n",
    "        for sent_merge_data in merge_sum_sent_list:\n",
    "            if merge_list_num == 0 :\n",
    "                merge_data = sent_merge_data\n",
    "                merge_list_num = merge_list_num + 1\n",
    "                continue\n",
    "            merge_data = torch.cat((merge_data,sent_merge_data),0)\n",
    "            merge_list_num = merge_list_num + 1\n",
    "        return merge_data\n",
    "    \n",
    "    def padding_sum_graph(self,sent_num,sum_graph_list,sent_size):\n",
    "        pro_list = []\n",
    "        pro_list = sum_graph_list\n",
    "        add_pad_size = torch.zeros(sent_size,1).reshape(1,1,sent_size)\n",
    "\n",
    "        for add_padding in range(sent_num - len(sum_graph_list)):\n",
    "            pro_list.extend(add_pad_size)\n",
    "\n",
    "        return pro_list \n",
    "            \n",
    "        \n",
    "        \n",
    "    \n",
    "    def __getitem__(self,index):#根据索引index返回dataset[index]\n",
    "        \n",
    "        \n",
    "        sent_graph_index = self.sum_sent_graph_list[index]\n",
    "        #print(sent_graph_index)\n",
    "#         article_index = self.orig_data[index]\n",
    "        \n",
    "#         qu_graph = article_index['qu_graph']\n",
    "#         context0_value = article_index['context'][0]\n",
    "#         context1_value = article_index['context'][1]\n",
    "\n",
    "        core_article_infor = sent_graph_index[0]\n",
    "        core_article_power = sent_graph_index[1]\n",
    "        core_qu_infor = sent_graph_index[2]\n",
    "        core_qu_power = sent_graph_index[3]\n",
    "        qu_graph = sent_graph_index[4]\n",
    "        core_graph = sent_graph_index[5]\n",
    "        label = sent_graph_index[6]\n",
    "        example_id = sent_graph_index[7]\n",
    "        #merge_sum_sent_qu = sent_graph_index[8]\n",
    "        #merge_sum_sent_other = sent_graph_index[9]\n",
    "        article_sum = sent_graph_index[8]\n",
    "        qu_article_info_sum = sent_graph_index[9]\n",
    "        qu_article_power_sum = sent_graph_index[10]\n",
    "        art_id = sent_graph_index[11]\n",
    "#         flow_once_qu_article_power = sent_graph_index[12]\n",
    "#         flow_once_qu_core_power = sent_graph_index[13]\n",
    "#         flow_twice_qu_article_power = sent_graph_index[14]\n",
    "#         flow_twice_qu_core_power = sent_graph_index[15]\n",
    "       \n",
    "        sample = {'core_article_infor': core_article_infor,\n",
    "                  'core_article_power':core_article_power,\n",
    "                  'core_qu_infor': core_qu_infor,\n",
    "                  'core_qu_power': core_qu_power,\n",
    "                  'qu_graph': qu_graph,\n",
    "                  'core_graph': core_graph,\n",
    "                  #'merge_sum_sent_qu': merge_sum_sent_qu,\n",
    "                  #'merge_sum_sent_other':merge_sum_sent_other,\n",
    "                  'article_sum':article_sum,\n",
    "                  'qu_article_info_sum':qu_article_info_sum,\n",
    "                  'qu_article_power_sum':qu_article_power_sum,\n",
    "                  'label': label,\n",
    "                  'example_id': example_id,\n",
    "                  'art_id': art_id,\n",
    "#                   'flow_once_qu_article_power':flow_once_qu_article_power,\n",
    "#                   'flow_once_qu_core_power':flow_once_qu_core_power,\n",
    "#                   'flow_twice_qu_article_power':flow_twice_qu_article_power,\n",
    "#                   'flow_twice_qu_core_power':flow_twice_qu_core_power,\n",
    "                 }\n",
    "        \n",
    "        if self.transform:\n",
    "            sample = self.transform(sample)\n",
    "        return sample \n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_file_name = \"train_sent_5_verb_4_data_v1.json\"\n",
    "dev_file_name = \"dev_sent_5_verb_4_data_v1.json\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|          | 60/28213 [00:00<00:47, 594.14it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Load ok\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 28213/28213 [00:49<00:00, 565.25it/s]\n",
      "100%|██████████| 28213/28213 [00:52<00:00, 537.07it/s]\n",
      "  3%|▎         | 63/2285 [00:00<00:03, 628.28it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Load ok\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 2285/2285 [00:03<00:00, 603.97it/s]\n",
      "100%|██████████| 2285/2285 [00:03<00:00, 668.70it/s]\n"
     ]
    }
   ],
   "source": [
    "train_data = qa_train_context_dataset(train_file_name,transform=None)#初始化类，设置数据集所在路径以及变换\n",
    "dev_data = qa_train_context_dataset(dev_file_name,transform=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "#sent 5,6,7,8\n",
    "train_loader = DataLoader(train_data,batch_size=64,drop_last=True,shuffle=False)\n",
    "dev_loader = DataLoader(dev_data,batch_size=64,drop_last=True,shuffle=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# #sent 9  and sent 10\n",
    "# train_loader = DataLoader(train_data,batch_size=128,drop_last=True,shuffle=False)\n",
    "# dev_loader = DataLoader(dev_data,batch_size=128,drop_last=True,shuffle=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "def conv_article_Batch(in_planes, out_planes, kernel_size,stride):                      \n",
    "    return torch.nn.Sequential(\n",
    "            torch.nn.Conv2d(in_channels = in_planes,\n",
    "                            out_channels = out_planes,\n",
    "                            kernel_size=kernel_size,\n",
    "                            stride=stride,\n",
    "                            #padding=1\n",
    "                           ),\n",
    "            #torch.nn.BatchNorm2d(num_features=out_planes),\n",
    "            #torch.nn.ReLU(),\n",
    "            torch.nn.Sigmoid()\n",
    "            #torch.nn.Dropout(0.5),\n",
    "        )\n",
    "def conv_sent_Batch(in_planes, out_planes, kernel_size,stride):                      \n",
    "    return torch.nn.Sequential(\n",
    "            torch.nn.Conv2d(in_channels = in_planes,\n",
    "                            out_channels = out_planes,\n",
    "                            kernel_size=kernel_size,\n",
    "                            stride=stride,\n",
    "                            #padding=1\n",
    "                           ),\n",
    "            #torch.nn.BatchNorm2d(num_features=out_planes),\n",
    "            torch.nn.Sigmoid()\n",
    "            #torch.nn.ReLU(),\n",
    "            #torch.nn.Dropout(0.5),\n",
    "        )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "class conv_one_sent_layer(torch.nn.Module):\n",
    "    def __init__(self,batch_size,graph_size,in_channels,out_channels,verb_num):\n",
    "        super(conv_one_sent_layer,self).__init__()\n",
    "        self.batch_size = batch_size\n",
    "        self.in_channels = in_channels\n",
    "        self.out_channels = out_channels\n",
    "        self.sent_kernel_size = (1,graph_size)\n",
    "        self.pos_kernel_size = (verb_num,1)\n",
    "        self.sent_stride = 1\n",
    "        self.pos_stride = 1\n",
    "        self.graph_size = graph_size\n",
    "        self.verb_num = verb_num\n",
    "\n",
    "        self.sent_power_conv = conv_sent_Batch(self.in_channels,self.out_channels,self.sent_kernel_size,self.sent_stride)\n",
    "        self.pos_power_conv = conv_sent_Batch(self.in_channels,self.out_channels,self.pos_kernel_size,self.pos_stride)\n",
    "        \n",
    "    def forward(self,   \n",
    "                sent_graph,):\n",
    "        sent_power = self.sent_power_conv(sent_graph.reshape(self.batch_size,1,1,self.graph_size))\n",
    "        #print(sent_graph.reshape(self.batch_size,-1)[:,0::25].size())\n",
    "        pos_i_power = self.pos_power_conv(sent_graph.reshape(self.batch_size,1,self.verb_num,25))\n",
    "        \n",
    "        return sent_power,pos_i_power\n",
    "\n",
    "    \n",
    "class conv_article_layer(torch.nn.Module):\n",
    "    def __init__(self,batch_size,graph_size,sent_num,in_channels,out_channels,verb_num):\n",
    "        super(conv_article_layer,self).__init__()\n",
    "        self.batch_size = batch_size\n",
    "        self.in_channels = in_channels\n",
    "        self.out_channels = out_channels\n",
    "        self.verb_num = verb_num\n",
    "        self.sent_num = sent_num\n",
    "        self.srl_len = 25\n",
    "        self.sent_size = self.verb_num*self.srl_len\n",
    "        self.sent_kernel_size = (self.sent_num,self.sent_size)\n",
    "        self.pos_kernel_size = (self.sent_num*self.verb_num,1)\n",
    "        self.sent_stride = 1\n",
    "        self.pos_stride = 1\n",
    "        self.graph_size = graph_size\n",
    "        \n",
    "\n",
    "        self.article_power_conv = conv_article_Batch(self.in_channels,self.out_channels,self.sent_kernel_size,self.sent_stride)\n",
    "        self.pos_power_conv = conv_article_Batch(self.in_channels,self.out_channels,self.pos_kernel_size,self.pos_stride)\n",
    "        \n",
    "        \n",
    "    def forward(self,   \n",
    "                sent_graph,):\n",
    "        #print(\"self.sent_size\",self.sent_size)\n",
    "        article_power = self.article_power_conv(sent_graph.reshape(self.batch_size,1,self.sent_num,self.sent_size))\n",
    "        \n",
    "        a_pos_i_power = self.pos_power_conv(sent_graph.reshape(self.batch_size,1,self.sent_num*self.verb_num,25))\n",
    "        \n",
    "        \n",
    "        return article_power,a_pos_i_power\n",
    "            \n",
    "            "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "conv_pos_power_net(\n",
      "  (core_power_conv): conv_one_sent_layer(\n",
      "    (sent_power_conv): Sequential(\n",
      "      (0): Conv2d(1, 1, kernel_size=(1, 100), stride=(1, 1))\n",
      "      (1): Sigmoid()\n",
      "    )\n",
      "    (pos_power_conv): Sequential(\n",
      "      (0): Conv2d(1, 1, kernel_size=(4, 1), stride=(1, 1))\n",
      "      (1): Sigmoid()\n",
      "    )\n",
      "  )\n",
      "  (qu_power_conv): conv_one_sent_layer(\n",
      "    (sent_power_conv): Sequential(\n",
      "      (0): Conv2d(1, 1, kernel_size=(1, 100), stride=(1, 1))\n",
      "      (1): Sigmoid()\n",
      "    )\n",
      "    (pos_power_conv): Sequential(\n",
      "      (0): Conv2d(1, 1, kernel_size=(4, 1), stride=(1, 1))\n",
      "      (1): Sigmoid()\n",
      "    )\n",
      "  )\n",
      "  (article_sum_power_conv): conv_article_layer(\n",
      "    (article_power_conv): Sequential(\n",
      "      (0): Conv2d(1, 1, kernel_size=(5, 100), stride=(1, 1))\n",
      "      (1): Sigmoid()\n",
      "    )\n",
      "    (pos_power_conv): Sequential(\n",
      "      (0): Conv2d(1, 1, kernel_size=(20, 1), stride=(1, 1))\n",
      "      (1): Sigmoid()\n",
      "    )\n",
      "  )\n",
      "  (qu_article_power_conv): conv_article_layer(\n",
      "    (article_power_conv): Sequential(\n",
      "      (0): Conv2d(1, 1, kernel_size=(5, 100), stride=(1, 1))\n",
      "      (1): Sigmoid()\n",
      "    )\n",
      "    (pos_power_conv): Sequential(\n",
      "      (0): Conv2d(1, 1, kernel_size=(20, 1), stride=(1, 1))\n",
      "      (1): Sigmoid()\n",
      "    )\n",
      "  )\n",
      "  (core_article_power_conv): conv_article_layer(\n",
      "    (article_power_conv): Sequential(\n",
      "      (0): Conv2d(1, 1, kernel_size=(5, 100), stride=(1, 1))\n",
      "      (1): Sigmoid()\n",
      "    )\n",
      "    (pos_power_conv): Sequential(\n",
      "      (0): Conv2d(1, 1, kernel_size=(20, 1), stride=(1, 1))\n",
      "      (1): Sigmoid()\n",
      "    )\n",
      "  )\n",
      "  (cqi_info_conv): conv_one_sent_layer(\n",
      "    (sent_power_conv): Sequential(\n",
      "      (0): Conv2d(1, 1, kernel_size=(1, 100), stride=(1, 1))\n",
      "      (1): Sigmoid()\n",
      "    )\n",
      "    (pos_power_conv): Sequential(\n",
      "      (0): Conv2d(1, 1, kernel_size=(4, 1), stride=(1, 1))\n",
      "      (1): Sigmoid()\n",
      "    )\n",
      "  )\n",
      "  (cqp_power_conv): conv_one_sent_layer(\n",
      "    (sent_power_conv): Sequential(\n",
      "      (0): Conv2d(1, 1, kernel_size=(1, 100), stride=(1, 1))\n",
      "      (1): Sigmoid()\n",
      "    )\n",
      "    (pos_power_conv): Sequential(\n",
      "      (0): Conv2d(1, 1, kernel_size=(4, 1), stride=(1, 1))\n",
      "      (1): Sigmoid()\n",
      "    )\n",
      "  )\n",
      "  (core_article_info_conv): conv_article_layer(\n",
      "    (article_power_conv): Sequential(\n",
      "      (0): Conv2d(1, 1, kernel_size=(5, 100), stride=(1, 1))\n",
      "      (1): Sigmoid()\n",
      "    )\n",
      "    (pos_power_conv): Sequential(\n",
      "      (0): Conv2d(1, 1, kernel_size=(20, 1), stride=(1, 1))\n",
      "      (1): Sigmoid()\n",
      "    )\n",
      "  )\n",
      "  (qu_article_info_conv): conv_article_layer(\n",
      "    (article_power_conv): Sequential(\n",
      "      (0): Conv2d(1, 1, kernel_size=(5, 100), stride=(1, 1))\n",
      "      (1): Sigmoid()\n",
      "    )\n",
      "    (pos_power_conv): Sequential(\n",
      "      (0): Conv2d(1, 1, kernel_size=(20, 1), stride=(1, 1))\n",
      "      (1): Sigmoid()\n",
      "    )\n",
      "  )\n",
      "  (mlp1): Linear(in_features=234, out_features=200, bias=True)\n",
      "  (mlp2): Linear(in_features=200, out_features=100, bias=True)\n",
      "  (mlp3): Linear(in_features=100, out_features=1, bias=True)\n",
      ")\n"
     ]
    }
   ],
   "source": [
    "class conv_pos_power_net(torch.nn.Module):\n",
    "    def __init__(self,batch_size,in_channels,out_channels):\n",
    "        super(conv_pos_power_net,self).__init__()\n",
    "        self.batch_size = batch_size\n",
    "        self.in_channels = in_channels\n",
    "        self.out_channels = out_channels\n",
    "        self.verb_num = 4\n",
    "        self.srl_len = 25\n",
    "        self.graph_size = self.verb_num*self.srl_len\n",
    "        self.sent_num = 5\n",
    "        \n",
    "        \n",
    "        self.core_power_conv = conv_one_sent_layer(self.batch_size,self.graph_size,self.in_channels,self.out_channels,self.verb_num)\n",
    "        self.qu_power_conv = conv_one_sent_layer(self.batch_size,self.graph_size,self.in_channels,self.out_channels,self.verb_num)\n",
    "        self.article_sum_power_conv = conv_article_layer(self.batch_size,self.graph_size,self.sent_num,self.in_channels,self.out_channels,self.verb_num)\n",
    "        self.qu_article_power_conv = conv_article_layer(self.batch_size,self.graph_size,self.sent_num,self.in_channels,self.out_channels,self.verb_num)\n",
    "        self.core_article_power_conv = conv_article_layer(self.batch_size,self.graph_size,self.sent_num,self.in_channels,self.out_channels,self.verb_num)\n",
    "        self.cqi_info_conv = conv_one_sent_layer(self.batch_size,self.graph_size,self.in_channels,self.out_channels,self.verb_num)\n",
    "        self.cqp_power_conv = conv_one_sent_layer(self.batch_size,self.graph_size,self.in_channels,self.out_channels,self.verb_num)\n",
    "        self.core_article_info_conv = conv_article_layer(self.batch_size,self.graph_size,self.sent_num,self.in_channels,self.out_channels,self.verb_num)\n",
    "        self.qu_article_info_conv = conv_article_layer(self.batch_size,self.graph_size,self.sent_num,self.in_channels,self.out_channels,self.verb_num)\n",
    "#         self.flow_once_qu_article_power_conv = conv_article_layer(self.batch_size,self.graph_size,self.sent_num,self.in_channels,self.out_channels)\n",
    "#         self.flow_once_qu_core_power_conv = conv_one_sent_layer(self.batch_size,self.graph_size,self.in_channels,self.out_channels)\n",
    "#         self.flow_twice_qu_article_power_conv = conv_article_layer(self.batch_size,self.graph_size,self.sent_num,self.in_channels,self.out_channels)\n",
    "#         self.flow_twice_qu_core_power_conv = conv_one_sent_layer(self.batch_size,self.graph_size,self.in_channels,self.out_channels)\n",
    "        \n",
    "        #self.mlp1 = torch.nn.Linear((9+25*9+4*9),200*self.out_channels)\n",
    "        \n",
    "        self.mlp1 = torch.nn.Linear((9+25*9),200*self.out_channels)\n",
    "        self.mlp2 = torch.nn.Linear(200*self.out_channels,100*self.out_channels)\n",
    "        self.mlp3 = torch.nn.Linear(100*self.out_channels,1)\n",
    "        \n",
    "    def forward(self,   \n",
    "                core_graph,    \n",
    "                qu_graph,\n",
    "                #core_article_infor,\n",
    "                core_article_power,\n",
    "                core_qu_info,\n",
    "                core_qu_power,\n",
    "                article_sum,\n",
    "                qu_article_power_sum,\n",
    "                core_article_info,\n",
    "                qu_article_info_sum,\n",
    "#                 flow_once_qu_article_power,\n",
    "#                 flow_once_qu_core_power,\n",
    "#                 flow_twice_qu_article_power,\n",
    "#                 flow_twice_qu_core_power,\n",
    "                \n",
    "               ):\n",
    "        \n",
    "        \n",
    "        core_power,core_pos_i_power = self.core_power_conv(core_graph)\n",
    "        #print(core_power.size())\n",
    "        #print(core_pos_i_power.size())\n",
    "        qu_power,qu_pos_i_power= self.qu_power_conv(qu_graph)\n",
    "        core_sent_article_power,core_sent_article_pos_i_power = self.core_article_power_conv(core_article_power)\n",
    "        context_power,context_pos_i_power = self.article_sum_power_conv(article_sum)\n",
    "        qu_article_power,qu_article_pos_i_power = self.qu_article_power_conv(qu_article_power_sum)\n",
    "        core_in_qu_power,core_in_qu_pos_i_power = self.cqp_power_conv(core_qu_power)\n",
    "        qu_in_core_power,qu_in_core_pos_i_power = self.cqi_info_conv(core_qu_info)\n",
    "        core_sent_article_info,core_sent_article_pos_i_info = self.core_article_info_conv(core_article_info)\n",
    "        qu_article_info,qu_article_pos_i_info = self.qu_article_info_conv(qu_article_info_sum)\n",
    "#         flow_once_article,flow_once_article_pos = self.flow_once_qu_article_power_conv(flow_once_qu_article_power)\n",
    "#         flow_once_qu,flow_once_qu_pos = self.flow_once_qu_core_power_conv(flow_once_qu_core_power)\n",
    "#         flow_twice_article,flow_twice_article_pos = self.flow_twice_qu_article_power_conv(flow_twice_qu_article_power)\n",
    "#         flow_twice_qu,flow_twice_qu_pos = self.flow_twice_qu_core_power_conv(flow_twice_qu_core_power)\n",
    "        \n",
    "        \n",
    "        \n",
    "        x = torch.cat((\n",
    "            core_power,core_pos_i_power,\n",
    "            qu_power,qu_pos_i_power,\n",
    "            core_sent_article_power,core_sent_article_pos_i_power,\n",
    "            context_power,context_pos_i_power,\n",
    "            qu_article_power,qu_article_pos_i_power,\n",
    "            core_in_qu_power,core_in_qu_pos_i_power,\n",
    "            qu_in_core_power,qu_in_core_pos_i_power,\n",
    "            core_sent_article_info,core_sent_article_pos_i_info,\n",
    "            qu_article_info,qu_article_pos_i_info,\n",
    "#             flow_once_article,flow_once_article_pos,\n",
    "#             flow_once_qu,flow_once_qu_pos,\n",
    "#             flow_twice_article,flow_twice_article_pos,\n",
    "#             flow_twice_qu,flow_twice_qu_pos,\n",
    "            \n",
    "        ),3)\n",
    "        \n",
    "        \n",
    "        x = x.view(self.batch_size, -1)\n",
    "        x = self.mlp1(x)\n",
    "        #print(\"mlp1:\",x.size())\n",
    "        x = self.mlp2(x)\n",
    "        x = self.mlp3(x)\n",
    "        x = torch.sigmoid(x)\n",
    "        #print(\"x\"x.size())\n",
    "        \n",
    "        return x\n",
    "        \n",
    "model = conv_pos_power_net(64,1,1)\n",
    "\n",
    "#output = nn.parallel.data_parallel(new_net, input, device_ids=[0, 1])\n",
    "print(model)\n",
    "#out = model()\n",
    "if torch.cuda.is_available():\n",
    "    model = model.cuda()\n",
    "    #model = torch.nn.DataParallel(model,device_ids=[0,1,2,3])        \n",
    "        \n",
    "        \n",
    "        \n",
    "        \n",
    "        \n",
    "        \n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "#criterion = nn.CrossEntropyLoss()\n",
    "#criterion = nn.BCEWithLogitsLoss()\n",
    "learning_rate = 0.01\n",
    "criterion = nn.BCELoss()\n",
    "optimizer = optim.Adam(model.parameters(),lr=learning_rate)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 训练模型\n",
    "def train_code_confing(train_loader,dev_loader,epoch_num):\n",
    "    dev_epoch_num = 0\n",
    "    for num in range(epoch_num):\n",
    "        \n",
    "        epoch = 0\n",
    "        \n",
    "                    \n",
    "        for data in tqdm(train_loader):\n",
    "            #print(data['flow_once_qu_article_power'].type())\n",
    "            if torch.cuda.is_available():\n",
    "\n",
    "                #qu_graph = data['qu_graph'].cuda() \n",
    "                qu_graph = data['qu_graph'].cuda()\n",
    "                core_graph = data['core_graph'].cuda()\n",
    "                core_article_info = data['core_article_infor'].cuda()\n",
    "                #print(core_article_infor.size())\n",
    "                core_article_power = data['core_article_power'].cuda()    \n",
    "                core_qu_infor = data['core_qu_infor'].cuda()    \n",
    "                core_qu_power = data['core_qu_power'].cuda()\n",
    "                article_sum = data['article_sum'].cuda()\n",
    "                qu_article_info_sum = data['qu_article_info_sum'].cuda()\n",
    "                qu_article_power_sum = data['qu_article_power_sum'].cuda()\n",
    "#                 flower_once_qu_article_power = data['flow_once_qu_article_power'].float().cuda(),\n",
    "#                 flower_once_qu_core_power = data['flow_once_qu_core_power'].float().cuda(),\n",
    "#                 flower_twice_qu_article_power = data['flow_twice_qu_article_power'].float().cuda(),\n",
    "#                 flower_twice_qu_core_power = data['flow_twice_qu_core_power'].float().cuda(),\n",
    "                \n",
    "                \n",
    "                #merge_sum_sent_qu = data['merge_sum_sent_qu'].cuda()\n",
    "                #merge_sum_sent_other = data['merge_sum_sent_other'].cuda()\n",
    "                \n",
    "                \n",
    "                    #print(\"size:\",con_value[1].size())\n",
    "#                     #print(\"cai_size:\",core_article_infor.size())\n",
    "#                 print(data['flow_once_qu_article_power'].type())\n",
    "#                 print(data['flow_once_qu_article_power'].cuda().type())\n",
    "    \n",
    "#                 print(flower_once_qu_article_power.type())\n",
    "                out = model(core_graph,\n",
    "                            qu_graph,\n",
    "                            #core_article_infor,\n",
    "                            core_article_power,\n",
    "                            core_qu_infor,\n",
    "                            core_qu_power,\n",
    "                            article_sum,\n",
    "                            qu_article_power_sum,\n",
    "                            core_article_info,\n",
    "                            qu_article_info_sum,\n",
    "#                             data['flow_once_qu_article_power'].float().cuda(),\n",
    "#                             data['flow_once_qu_core_power'].float().cuda(),\n",
    "#                             data['flow_twice_qu_article_power'].float().cuda(),\n",
    "#                             data['flow_twice_qu_core_power'].float().cuda(),\n",
    "                            #flower_once_qu_article_power.float().cuda(),\n",
    "                            #flower_once_qu_core_power.float().cuda(),\n",
    "                            )\n",
    "\n",
    "#                 out = nn.parallel.data_parallel(model, (core_article_infor,core_article_power,\n",
    "#                             core_qu_infor,core_qu_power,\n",
    "#                             qu_core_infor,qu_core_power), device_ids=[0,1,2,3])\n",
    "                    #print(\"out:\",out)\n",
    "                label = data['label'].cuda()\n",
    "                #example_id = data['example_id']\n",
    "#                 print(\"p_out_t:\",out.size())\n",
    "#                 print(\"p_out_t:\",out)\n",
    "\n",
    "                out = out.squeeze(1)\n",
    "                #label = label.unsqueeze(0)\n",
    "#                 print(\"out_t:\",out.size())\n",
    "#                 print(\"out_t:\",out)\n",
    "#                 print(\"label:\",label.size())\n",
    "#                 print(\"label:\",label)\n",
    "                loss = criterion(out,label)\n",
    "\n",
    "                print_loss = loss.data.item()\n",
    "\n",
    "                optimizer.zero_grad()\n",
    "                loss.backward()\n",
    "                optimizer.step()\n",
    "                epoch+=1\n",
    "                if epoch%500 == 0:\n",
    "                    print('epoch: {}, loss: {:.4}'.format(epoch, loss.data.item()))\n",
    "                    eval_code_confing(dev_loader)\n",
    "\n",
    "            else:\n",
    "                print(\"train_error\")\n",
    "            \n",
    "        dev_epoch_num = dev_epoch_num + 1\n",
    "        if dev_epoch_num%1 == 0:\n",
    "            #print(\"epoch_num:\",epoch_num)\n",
    "            \n",
    "            eval_code_confing(dev_loader)\n",
    "        torch.save(model, './flow_srl_feature_v2.0_5.pkl')\n",
    "\n",
    "\n",
    "        \n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "def exact_example_num(predict_label_list,dev_label_list,example_id_list):\n",
    "    \n",
    "    #predict_label_list = predict_label_list.detach().cpu()\n",
    "    example_id_list = [ i.detach().cpu() for i in example_id_list ]\n",
    "    predict_label_list = [ i.detach().cpu() for i in predict_label_list ]\n",
    "    dev_label_list = [ i.detach().cpu() for i in dev_label_list ]\n",
    "\n",
    "    assert len(predict_label_list) == len(dev_label_list)\n",
    "    assert len(predict_label_list) == len(example_id_list)\n",
    "\n",
    "    num = len(dev_label_list)\n",
    "    n = np.max(example_id_list) + 1\n",
    "    print(\"num:\",num)\n",
    "    print(\"n:\",n)\n",
    "    gold_sp = [ [] for i in range(n) ]\n",
    "    pred_sp = [ [] for i in range(n) ]\n",
    "\n",
    "    last_id,sd = -1,0\n",
    "    for i in range(num):\n",
    "        if example_id_list[i] != last_id:\n",
    "            last_id, sd = example_id_list[i], 0\n",
    "        if dev_label_list[i].long() == 1:\n",
    "            gold_sp[last_id].append(sd)\n",
    "        if predict_label_list[i] > 0.5:\n",
    "            pred_sp[last_id].append(sd)\n",
    "        sd += 1\n",
    "    \n",
    "    em,f1 = 0,0\n",
    "    for i in range(n):\n",
    "        cur_sp_pred = pred_sp[i]\n",
    "        gold_sp_pred = gold_sp[i]\n",
    "        tp, fp, fn = 0, 0, 0\n",
    "        for e in cur_sp_pred:\n",
    "            if e in gold_sp_pred:\n",
    "                tp += 1\n",
    "            else:\n",
    "                fp += 1\n",
    "        for e in gold_sp_pred:\n",
    "            if e not in cur_sp_pred:\n",
    "                fn += 1\n",
    "        prec = 1.0 * tp / (tp + fp) if tp + fp > 0 else 0.0\n",
    "        recall = 1.0 * tp / (tp + fn) if tp + fn > 0 else 0.0\n",
    "        f1 += 2 * prec * recall / (prec + recall) if prec + recall > 0 else 0.0\n",
    "        em += 1.0 if fp + fn == 0 else 0.0\n",
    "    em /= n\n",
    "    f1 /= n\n",
    "    print(\"em:\",em)\n",
    "    print(\"f1:\",f1)\n",
    "    return em,f1\n",
    "                "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "def eval_code_confing(dev_loader):\n",
    "    # 模型评估\n",
    "    print(\"load_function\")\n",
    "    model.eval()\n",
    "    eval_loss = 0\n",
    "    eval_acc = 0\n",
    "    \n",
    "    predict_label_list = []\n",
    "    dev_label_list = []\n",
    "    example_id_list = []\n",
    "    for data in tqdm(dev_loader):\n",
    "        \n",
    "        if torch.cuda.is_available():\n",
    "            \n",
    "            #qu_graph = data['qu_graph'].cuda()              \n",
    "            #qu_graph = data['qu_graph'].cuda() \n",
    "            qu_graph = data['qu_graph'].cuda()\n",
    "            core_graph = data['core_graph'].cuda()\n",
    "            core_article_info = data['core_article_infor'].cuda()\n",
    "            #print(core_article_infor.size())\n",
    "            core_article_power = data['core_article_power'].cuda()    \n",
    "            core_qu_infor = data['core_qu_infor'].cuda()    \n",
    "            core_qu_power = data['core_qu_power'].cuda()\n",
    "            article_sum = data['article_sum'].cuda()\n",
    "            qu_article_info_sum = data['qu_article_info_sum'].cuda()\n",
    "            qu_article_power_sum = data['qu_article_power_sum'].cuda()\n",
    "#             flower_once_qu_article_power = data['flow_once_qu_article_power'].cuda(),\n",
    "#             flower_once_qu_core_power = data['flow_once_qu_core_power'].cuda(),\n",
    "#             flower_twice_qu_article_power = data['flow_twice_qu_article_power'].cuda(),\n",
    "#             flower_twice_qu_core_power = data['flow_twice_qu_core_power'].cuda(),\n",
    "            example_id = data['example_id']\n",
    "            #merge_sum_sent_qu = data['merge_sum_sent_qu'].cuda()\n",
    "            #merge_sum_sent_other = data['merge_sum_sent_other'].cuda()\n",
    "\n",
    "\n",
    "                #print(\"size:\",con_value[1].size())\n",
    "                #print(\"cai_size:\",core_article_infor.size())\n",
    "            out = model(qu_graph,\n",
    "                        core_graph,\n",
    "                        #core_article_infor,\n",
    "                        core_article_power,\n",
    "                        core_qu_infor,\n",
    "                        core_qu_power,\n",
    "                        article_sum,\n",
    "                        qu_article_power_sum,\n",
    "                        core_article_info,\n",
    "                        qu_article_info_sum,\n",
    "#                         data['flow_once_qu_article_power'].float().cuda(),\n",
    "#                         data['flow_once_qu_core_power'].float().cuda(),\n",
    "#                         data['flow_twice_qu_article_power'].float().cuda(),\n",
    "#                         data['flow_twice_qu_core_power'].float().cuda(),\n",
    "                        #flower_once_qu_article_power,\n",
    "                        #flower_once_qu_core_power,\n",
    "                        )\n",
    "            #print(\"out:\",out)\n",
    "            label = data['label'].cuda()\n",
    "           \n",
    "            #print(\"p_out_t:\",out.size())\n",
    "            #print(\"p_out_t:\",out)\n",
    "\n",
    "            out = out.squeeze(1)\n",
    "            loss = criterion(out,label)\n",
    "            \n",
    "            #print(\"out:\",out)\n",
    "            predict_label_list.extend(out)\n",
    "            #print(len(predict_label_list))\n",
    "            #print(\"predict_label_list:\",predict_label_list)\n",
    "            \n",
    "            #return 0\n",
    "            #print(predict_label_list[1])\n",
    "            dev_label_list.extend(label)\n",
    "            example_id_list.extend(example_id)\n",
    "            \n",
    "            \n",
    "            \n",
    "            \n",
    "            eval_loss += loss.data.item()\n",
    "            pred = out > 0.5\n",
    "            num_correct = (pred.long().reshape(1,-1) == label.long()).sum()\n",
    "            #print(\"pred:\",pred.long().reshape(1,-1))\n",
    "            #print(\"label:\",label.long())\n",
    "            #print(\"num_correct:\",num_correct)\n",
    "            eval_acc += num_correct.item()\n",
    "            #print(\"eval_acc:\",eval_acc)\n",
    "            \n",
    "\n",
    "\n",
    "    #sent_num = 2355\n",
    "    #sent_num = 6911\n",
    "    \n",
    "    print(\"predict_label_list:\",len(predict_label_list),\"dev_label_list:\",len(dev_label_list),\"example_id_list:\",len(example_id_list))\n",
    "    sent_num = len(dev_loader)*64\n",
    "    context_exact = exact_example_num(predict_label_list,dev_label_list,example_id_list)\n",
    "    print('Test Loss: {:.6f}, Acc: {:.6f}'.format(\n",
    "        eval_loss / sent_num,\n",
    "        eval_acc / sent_num,\n",
    "        #context_exact\n",
    "    ))\n",
    "    #return dev_merge_list_to_graph(predict_label_list),dev_merge_list_to_graph(dev_label_list),dev_merge_list_to_graph(example_id_list)\n",
    "\n",
    "\n",
    "#     print('Test Loss: {:.6f}, Acc: {:.6f}'.format(\n",
    "#         eval_loss / (len(dev_s5_v3_loader)),\n",
    "#         eval_acc / (len(dev_s5_v3_loader))\n",
    "#     ))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#sent7verb3\n",
    "epoch_num = 10\n",
    "train_code_confing(train_loader,dev_loader,epoch_num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 27%|██▋       | 495/1821 [00:10<00:23, 57.56it/s]\n",
      "  0%|          | 0/147 [00:00<?, ?it/s]\u001b[A\n",
      " 10%|█         | 15/147 [00:00<00:00, 143.25it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.5819\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 20%|██        | 30/147 [00:00<00:00, 143.67it/s]\u001b[A\n",
      " 28%|██▊       | 41/147 [00:00<00:00, 128.82it/s]\u001b[A\n",
      " 35%|███▌      | 52/147 [00:00<00:00, 122.03it/s]\u001b[A\n",
      " 45%|████▍     | 66/147 [00:00<00:00, 125.74it/s]\u001b[A\n",
      " 54%|█████▍    | 80/147 [00:00<00:00, 128.71it/s]\u001b[A\n",
      " 64%|██████▍   | 94/147 [00:00<00:00, 130.05it/s]\u001b[A\n",
      " 74%|███████▍  | 109/147 [00:00<00:00, 133.35it/s]\u001b[A\n",
      " 83%|████████▎ | 122/147 [00:00<00:00, 126.12it/s]\u001b[A\n",
      "100%|██████████| 147/147 [00:01<00:00, 126.91it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 9408 dev_label_list: 9408 example_id_list: 9408\n",
      "num: 9408\n",
      "n: 2279\n",
      "em: 0.3571741992101799\n",
      "f1: 0.7917375345633318\n",
      "Test Loss: 0.008172, Acc: 0.752976\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 55%|█████▍    | 996/1821 [00:21<00:13, 59.51it/s]\n",
      "  0%|          | 0/147 [00:00<?, ?it/s]\u001b[A\n",
      " 10%|█         | 15/147 [00:00<00:00, 148.85it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.5267\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 20%|██        | 30/147 [00:00<00:00, 147.06it/s]\u001b[A\n",
      " 31%|███▏      | 46/147 [00:00<00:00, 148.23it/s]\u001b[A\n",
      " 38%|███▊      | 56/147 [00:00<00:00, 127.34it/s]\u001b[A\n",
      " 45%|████▍     | 66/147 [00:00<00:00, 112.43it/s]\u001b[A\n",
      " 52%|█████▏    | 76/147 [00:00<00:00, 108.17it/s]\u001b[A\n",
      " 61%|██████    | 89/147 [00:00<00:00, 111.87it/s]\u001b[A\n",
      " 69%|██████▉   | 102/147 [00:00<00:00, 115.78it/s]\u001b[A\n",
      " 78%|███████▊  | 115/147 [00:00<00:00, 119.46it/s]\u001b[A\n",
      " 86%|████████▋ | 127/147 [00:01<00:00, 116.55it/s]\u001b[A\n",
      "100%|██████████| 147/147 [00:01<00:00, 117.97it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 9408 dev_label_list: 9408 example_id_list: 9408\n",
      "num: 9408\n",
      "n: 2279\n",
      "em: 0.38657305835892936\n",
      "f1: 0.7825226185252486\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 55%|█████▌    | 1008/1821 [00:23<01:07, 12.00it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test Loss: 0.007960, Acc: 0.760098\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 82%|████████▏ | 1496/1821 [00:31<00:05, 55.86it/s]\n",
      "  0%|          | 0/147 [00:00<?, ?it/s]\u001b[A\n",
      " 10%|█         | 15/147 [00:00<00:00, 142.15it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.5246\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 20%|█▉        | 29/147 [00:00<00:00, 140.24it/s]\u001b[A\n",
      " 29%|██▉       | 43/147 [00:00<00:00, 139.43it/s]\u001b[A\n",
      " 39%|███▉      | 58/147 [00:00<00:00, 139.90it/s]\u001b[A\n",
      " 49%|████▉     | 72/147 [00:00<00:00, 139.20it/s]\u001b[A\n",
      " 59%|█████▊    | 86/147 [00:00<00:00, 138.88it/s]\u001b[A\n",
      " 69%|██████▊   | 101/147 [00:00<00:00, 140.65it/s]\u001b[A\n",
      " 78%|███████▊  | 115/147 [00:00<00:00, 138.46it/s]\u001b[A\n",
      " 88%|████████▊ | 130/147 [00:00<00:00, 139.38it/s]\u001b[A\n",
      "100%|██████████| 147/147 [00:01<00:00, 139.14it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 9408 dev_label_list: 9408 example_id_list: 9408\n",
      "num: 9408\n",
      "n: 2279\n",
      "em: 0.37735849056603776\n",
      "f1: 0.7800312724182811\n",
      "Test Loss: 0.007990, Acc: 0.754996\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 1821/1821 [00:39<00:00, 46.10it/s]\n",
      " 10%|█         | 15/147 [00:00<00:00, 149.58it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 147/147 [00:00<00:00, 152.11it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 9408 dev_label_list: 9408 example_id_list: 9408\n",
      "num: 9408\n",
      "n: 2279\n",
      "em: 0.3203159280386134\n",
      "f1: 0.7960327907673201\n",
      "Test Loss: 0.008415, Acc: 0.743941\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 27%|██▋       | 493/1821 [00:07<00:19, 69.54it/s]\n",
      "  0%|          | 0/147 [00:00<?, ?it/s]\u001b[A\n",
      " 11%|█         | 16/147 [00:00<00:00, 155.10it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.5881\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 22%|██▏       | 32/147 [00:00<00:00, 153.90it/s]\u001b[A\n",
      " 33%|███▎      | 48/147 [00:00<00:00, 154.20it/s]\u001b[A\n",
      " 44%|████▎     | 64/147 [00:00<00:00, 154.76it/s]\u001b[A\n",
      " 52%|█████▏    | 76/147 [00:00<00:00, 138.81it/s]\u001b[A\n",
      " 63%|██████▎   | 93/147 [00:00<00:00, 145.03it/s]\u001b[A\n",
      " 75%|███████▍  | 110/147 [00:00<00:00, 151.07it/s]\u001b[A\n",
      " 86%|████████▋ | 127/147 [00:00<00:00, 154.78it/s]\u001b[A\n",
      "100%|██████████| 147/147 [00:00<00:00, 152.54it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 9408 dev_label_list: 9408 example_id_list: 9408\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 27%|██▋       | 500/1821 [00:09<01:44, 12.69it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "num: 9408\n",
      "n: 2279\n",
      "em: 0.37823606845107505\n",
      "f1: 0.7830470757851158\n",
      "Test Loss: 0.007937, Acc: 0.757228\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 55%|█████▍    | 994/1821 [00:17<00:14, 56.70it/s]\n",
      "  0%|          | 0/147 [00:00<?, ?it/s]\u001b[A\n",
      " 10%|█         | 15/147 [00:00<00:00, 144.53it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.5311\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 20%|██        | 30/147 [00:00<00:00, 144.26it/s]\u001b[A\n",
      " 31%|███       | 45/147 [00:00<00:00, 144.84it/s]\u001b[A\n",
      " 40%|████      | 59/147 [00:00<00:00, 141.68it/s]\u001b[A\n",
      " 50%|████▉     | 73/147 [00:00<00:00, 140.54it/s]\u001b[A\n",
      " 60%|█████▉    | 88/147 [00:00<00:00, 141.64it/s]\u001b[A\n",
      " 70%|███████   | 103/147 [00:00<00:00, 143.69it/s]\u001b[A\n",
      " 80%|████████  | 118/147 [00:00<00:00, 143.64it/s]\u001b[A\n",
      "100%|██████████| 147/147 [00:01<00:00, 143.35it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 9408 dev_label_list: 9408 example_id_list: 9408\n",
      "num: 9408\n",
      "n: 2279\n",
      "em: 0.38174637999122424\n",
      "f1: 0.7742598048433923\n",
      "Test Loss: 0.008088, Acc: 0.755421\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 82%|████████▏ | 1494/1821 [00:28<00:05, 57.71it/s]\n",
      "  0%|          | 0/147 [00:00<?, ?it/s]\u001b[A\n",
      " 10%|█         | 15/147 [00:00<00:00, 143.73it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.5663\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 20%|██        | 30/147 [00:00<00:00, 144.81it/s]\u001b[A\n",
      " 31%|███       | 45/147 [00:00<00:00, 145.60it/s]\u001b[A\n",
      " 41%|████      | 60/147 [00:00<00:00, 145.24it/s]\u001b[A\n",
      " 51%|█████     | 75/147 [00:00<00:00, 144.59it/s]\u001b[A\n",
      " 62%|██████▏   | 91/147 [00:00<00:00, 147.15it/s]\u001b[A\n",
      " 73%|███████▎  | 107/147 [00:00<00:00, 148.29it/s]\u001b[A\n",
      " 84%|████████▎ | 123/147 [00:00<00:00, 149.20it/s]\u001b[A\n",
      "100%|██████████| 147/147 [00:00<00:00, 147.92it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 9408 dev_label_list: 9408 example_id_list: 9408\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 82%|████████▏ | 1500/1821 [00:29<00:30, 10.56it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "num: 9408\n",
      "n: 2279\n",
      "em: 0.3523475208424748\n",
      "f1: 0.7886071585281735\n",
      "Test Loss: 0.008056, Acc: 0.751063\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 1821/1821 [00:35<00:00, 51.23it/s]\n",
      " 10%|█         | 15/147 [00:00<00:00, 144.88it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 147/147 [00:01<00:00, 137.81it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 9408 dev_label_list: 9408 example_id_list: 9408\n",
      "num: 9408\n",
      "n: 2279\n",
      "em: 0.3229486616937253\n",
      "f1: 0.7966944566330226\n",
      "Test Loss: 0.008432, Acc: 0.744898\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 27%|██▋       | 496/1821 [00:09<00:29, 45.55it/s]\n",
      "  0%|          | 0/147 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 12/147 [00:00<00:01, 119.10it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.5859\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 16%|█▋        | 24/147 [00:00<00:01, 118.56it/s]\u001b[A\n",
      " 21%|██        | 31/147 [00:00<00:01, 97.83it/s] \u001b[A\n",
      " 29%|██▉       | 43/147 [00:00<00:01, 103.40it/s]\u001b[A\n",
      " 38%|███▊      | 56/147 [00:00<00:00, 107.98it/s]\u001b[A\n",
      " 46%|████▋     | 68/147 [00:00<00:00, 110.43it/s]\u001b[A\n",
      " 55%|█████▌    | 81/147 [00:00<00:00, 115.11it/s]\u001b[A\n",
      " 66%|██████▌   | 97/147 [00:00<00:00, 123.94it/s]\u001b[A\n",
      " 76%|███████▌  | 112/147 [00:00<00:00, 130.57it/s]\u001b[A\n",
      " 87%|████████▋ | 128/147 [00:01<00:00, 136.59it/s]\u001b[A\n",
      "100%|██████████| 147/147 [00:01<00:00, 127.74it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 9408 dev_label_list: 9408 example_id_list: 9408\n",
      "num: 9408\n",
      "n: 2279\n",
      "em: 0.3734093900833699\n",
      "f1: 0.7822482013135779\n",
      "Test Loss: 0.007931, Acc: 0.755527\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 55%|█████▍    | 999/1821 [00:19<00:14, 57.61it/s]\n",
      "  0%|          | 0/147 [00:00<?, ?it/s]\u001b[A\n",
      " 10%|█         | 15/147 [00:00<00:00, 145.63it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4986\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 20%|██        | 30/147 [00:00<00:00, 146.31it/s]\u001b[A\n",
      " 31%|███       | 45/147 [00:00<00:00, 146.52it/s]\u001b[A\n",
      " 41%|████      | 60/147 [00:00<00:00, 145.88it/s]\u001b[A\n",
      " 51%|█████     | 75/147 [00:00<00:00, 146.49it/s]\u001b[A\n",
      " 61%|██████    | 90/147 [00:00<00:00, 146.84it/s]\u001b[A\n",
      " 71%|███████▏  | 105/147 [00:00<00:00, 147.72it/s]\u001b[A\n",
      " 81%|████████  | 119/147 [00:00<00:00, 144.30it/s]\u001b[A\n",
      "100%|██████████| 147/147 [00:01<00:00, 145.33it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 9408 dev_label_list: 9408 example_id_list: 9408\n",
      "num: 9408\n",
      "n: 2279\n",
      "em: 0.3835015357612988\n",
      "f1: 0.774632775444533\n",
      "Test Loss: 0.007974, Acc: 0.757972\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 82%|████████▏ | 1493/1821 [00:30<00:05, 61.82it/s]\n",
      "  0%|          | 0/147 [00:00<?, ?it/s]\u001b[A\n",
      " 11%|█         | 16/147 [00:00<00:00, 159.76it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.5264\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 22%|██▏       | 33/147 [00:00<00:00, 162.24it/s]\u001b[A\n",
      " 34%|███▍      | 50/147 [00:00<00:00, 164.45it/s]\u001b[A\n",
      " 46%|████▋     | 68/147 [00:00<00:00, 166.30it/s]\u001b[A\n",
      " 59%|█████▊    | 86/147 [00:00<00:00, 167.94it/s]\u001b[A\n",
      " 71%|███████   | 104/147 [00:00<00:00, 169.49it/s]\u001b[A\n",
      " 82%|████████▏ | 121/147 [00:00<00:00, 168.36it/s]\u001b[A\n",
      "100%|██████████| 147/147 [00:00<00:00, 162.60it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 9408 dev_label_list: 9408 example_id_list: 9408\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 82%|████████▏ | 1500/1821 [00:31<00:24, 13.00it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "num: 9408\n",
      "n: 2279\n",
      "em: 0.3646336112329969\n",
      "f1: 0.788155484513533\n",
      "Test Loss: 0.007982, Acc: 0.753827\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 1821/1821 [00:36<00:00, 49.26it/s]\n",
      " 12%|█▏        | 17/147 [00:00<00:00, 169.05it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 147/147 [00:00<00:00, 157.65it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 9408 dev_label_list: 9408 example_id_list: 9408\n",
      "num: 9408\n",
      "n: 2279\n",
      "em: 0.3321632294866169\n",
      "f1: 0.7980717663692609\n",
      "Test Loss: 0.008356, Acc: 0.748831\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 27%|██▋       | 493/1821 [00:09<00:22, 57.90it/s]\n",
      "  0%|          | 0/147 [00:00<?, ?it/s]\u001b[A\n",
      " 12%|█▏        | 18/147 [00:00<00:00, 170.43it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.5787\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 24%|██▍       | 35/147 [00:00<00:00, 168.37it/s]\u001b[A\n",
      " 35%|███▍      | 51/147 [00:00<00:00, 165.46it/s]\u001b[A\n",
      " 46%|████▋     | 68/147 [00:00<00:00, 165.08it/s]\u001b[A\n",
      " 58%|█████▊    | 85/147 [00:00<00:00, 164.98it/s]\u001b[A\n",
      " 70%|███████   | 103/147 [00:00<00:00, 166.91it/s]\u001b[A\n",
      " 82%|████████▏ | 121/147 [00:00<00:00, 168.02it/s]\u001b[A\n",
      "100%|██████████| 147/147 [00:00<00:00, 165.85it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 9408 dev_label_list: 9408 example_id_list: 9408\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 27%|██▋       | 500/1821 [00:10<01:42, 12.87it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "num: 9408\n",
      "n: 2279\n",
      "em: 0.3681439227731461\n",
      "f1: 0.7930295242274144\n",
      "Test Loss: 0.008107, Acc: 0.757547\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 55%|█████▍    | 998/1821 [00:19<00:13, 60.56it/s]\n",
      "  0%|          | 0/147 [00:00<?, ?it/s]\u001b[A\n",
      "  7%|▋         | 10/147 [00:00<00:01, 94.87it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4926\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 18%|█▊        | 26/147 [00:00<00:01, 107.47it/s]\u001b[A\n",
      " 29%|██▊       | 42/147 [00:00<00:00, 117.96it/s]\u001b[A\n",
      " 39%|███▉      | 58/147 [00:00<00:00, 126.48it/s]\u001b[A\n",
      " 50%|█████     | 74/147 [00:00<00:00, 134.35it/s]\u001b[A\n",
      " 61%|██████    | 90/147 [00:00<00:00, 140.81it/s]\u001b[A\n",
      " 72%|███████▏  | 106/147 [00:00<00:00, 144.63it/s]\u001b[A\n",
      " 83%|████████▎ | 122/147 [00:00<00:00, 147.12it/s]\u001b[A\n",
      "100%|██████████| 147/147 [00:00<00:00, 148.28it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 9408 dev_label_list: 9408 example_id_list: 9408\n",
      "num: 9408\n",
      "n: 2279\n",
      "em: 0.38525669153137343\n",
      "f1: 0.7805250841012119\n",
      "Test Loss: 0.007948, Acc: 0.758503\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 82%|████████▏ | 1496/1821 [00:28<00:05, 57.63it/s]\n",
      "  0%|          | 0/147 [00:00<?, ?it/s]\u001b[A\n",
      " 10%|█         | 15/147 [00:00<00:00, 149.90it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.5462\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 21%|██        | 31/147 [00:00<00:00, 150.11it/s]\u001b[A\n",
      " 32%|███▏      | 47/147 [00:00<00:00, 150.42it/s]\u001b[A\n",
      " 42%|████▏     | 62/147 [00:00<00:00, 148.41it/s]\u001b[A\n",
      " 52%|█████▏    | 77/147 [00:00<00:00, 148.53it/s]\u001b[A\n",
      " 63%|██████▎   | 92/147 [00:00<00:00, 148.30it/s]\u001b[A\n",
      " 73%|███████▎  | 107/147 [00:00<00:00, 148.07it/s]\u001b[A\n",
      " 84%|████████▎ | 123/147 [00:00<00:00, 150.03it/s]\u001b[A\n",
      "100%|██████████| 147/147 [00:00<00:00, 150.21it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 9408 dev_label_list: 9408 example_id_list: 9408\n",
      "num: 9408\n",
      "n: 2279\n",
      "em: 0.3409390083369899\n",
      "f1: 0.790381816029026\n",
      "Test Loss: 0.008112, Acc: 0.746705\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 1821/1821 [00:36<00:00, 49.77it/s]\n",
      "  9%|▉         | 13/147 [00:00<00:01, 124.34it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 147/147 [00:01<00:00, 146.04it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 9408 dev_label_list: 9408 example_id_list: 9408\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/1821 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "num: 9408\n",
      "n: 2279\n",
      "em: 0.32514260640631854\n",
      "f1: 0.7967550512965127\n",
      "Test Loss: 0.008461, Acc: 0.746173\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 10%|█         | 184/1821 [00:03<00:31, 51.48it/s]\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-33-8b7255c304f3>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m#sent5verb4\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0mepoch_num\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mtrain_code_confing\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_loader\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdev_loader\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mepoch_num\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m<ipython-input-30-36cf930af26f>\u001b[0m in \u001b[0;36mtrain_code_confing\u001b[0;34m(train_loader, dev_loader, epoch_num)\u001b[0m\n\u001b[1;32m     77\u001b[0m                 \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mzero_grad\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     78\u001b[0m                 \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 79\u001b[0;31m                 \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     80\u001b[0m                 \u001b[0mepoch\u001b[0m\u001b[0;34m+=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     81\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0mepoch\u001b[0m\u001b[0;34m%\u001b[0m\u001b[0;36m500\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.conda/envs/learn_py3/lib/python3.7/site-packages/torch/optim/adam.py\u001b[0m in \u001b[0;36mstep\u001b[0;34m(self, closure)\u001b[0m\n\u001b[1;32m     99\u001b[0m                     \u001b[0mdenom\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmax_exp_avg_sq\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msqrt\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgroup\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'eps'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    100\u001b[0m                 \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 101\u001b[0;31m                     \u001b[0mdenom\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mexp_avg_sq\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msqrt\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgroup\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'eps'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    102\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    103\u001b[0m                 \u001b[0mbias_correction1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mbeta1\u001b[0m \u001b[0;34m**\u001b[0m \u001b[0mstate\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'step'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "#sent5verb4\n",
    "epoch_num = 10\n",
    "train_code_confing(train_loader,dev_loader,epoch_num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 17%|█▋        | 496/2883 [00:10<00:46, 51.88it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  2%|▏         | 4/242 [00:00<00:06, 37.51it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.4697\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  5%|▍         | 11/242 [00:00<00:05, 43.22it/s]\u001b[A\n",
      "  7%|▋         | 18/242 [00:00<00:04, 48.74it/s]\u001b[A\n",
      " 10%|▉         | 24/242 [00:00<00:04, 51.52it/s]\u001b[A\n",
      " 13%|█▎        | 32/242 [00:00<00:03, 56.47it/s]\u001b[A\n",
      " 17%|█▋        | 41/242 [00:00<00:03, 62.20it/s]\u001b[A\n",
      " 21%|██        | 50/242 [00:00<00:02, 67.09it/s]\u001b[A\n",
      " 24%|██▍       | 59/242 [00:00<00:02, 70.90it/s]\u001b[A\n",
      " 28%|██▊       | 68/242 [00:00<00:02, 74.12it/s]\u001b[A\n",
      " 31%|███▏      | 76/242 [00:01<00:02, 75.26it/s]\u001b[A\n",
      " 35%|███▍      | 84/242 [00:01<00:02, 72.33it/s]\u001b[A\n",
      " 38%|███▊      | 92/242 [00:01<00:02, 71.87it/s]\u001b[A\n",
      " 42%|████▏     | 101/242 [00:01<00:01, 75.24it/s]\u001b[A\n",
      " 45%|████▌     | 109/242 [00:01<00:01, 72.97it/s]\u001b[A\n",
      " 49%|████▉     | 118/242 [00:01<00:01, 75.97it/s]\u001b[A\n",
      " 52%|█████▏    | 127/242 [00:01<00:01, 76.91it/s]\u001b[A\n",
      " 56%|█████▌    | 136/242 [00:01<00:01, 78.80it/s]\u001b[A\n",
      " 60%|█████▉    | 145/242 [00:01<00:01, 81.32it/s]\u001b[A\n",
      " 64%|██████▎   | 154/242 [00:02<00:01, 68.66it/s]\u001b[A\n",
      " 67%|██████▋   | 162/242 [00:02<00:01, 71.18it/s]\u001b[A\n",
      " 71%|███████   | 171/242 [00:02<00:00, 75.01it/s]\u001b[A\n",
      " 74%|███████▍  | 180/242 [00:02<00:00, 77.44it/s]\u001b[A\n",
      " 79%|███████▊  | 190/242 [00:02<00:00, 80.47it/s]\u001b[A\n",
      " 82%|████████▏ | 199/242 [00:02<00:00, 78.82it/s]\u001b[A\n",
      " 86%|████████▋ | 209/242 [00:02<00:00, 80.97it/s]\u001b[A\n",
      " 90%|█████████ | 218/242 [00:02<00:00, 80.21it/s]\u001b[A\n",
      " 94%|█████████▍| 227/242 [00:03<00:00, 82.65it/s]\u001b[A\n",
      "100%|██████████| 242/242 [00:03<00:00, 75.61it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 17%|█▋        | 502/2883 [00:15<11:03,  3.59it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.24640735773136616\n",
      "f1: 0.6507185837517381\n",
      "Test Loss: 0.004007, Acc: 0.756909\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 35%|███▍      | 998/2883 [00:26<00:38, 49.06it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 9/242 [00:00<00:02, 89.91it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4331\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 19/242 [00:00<00:02, 90.23it/s]\u001b[A\n",
      " 12%|█▏        | 29/242 [00:00<00:02, 91.06it/s]\u001b[A\n",
      " 16%|█▌        | 39/242 [00:00<00:02, 91.73it/s]\u001b[A\n",
      " 20%|██        | 49/242 [00:00<00:02, 92.51it/s]\u001b[A\n",
      " 24%|██▍       | 59/242 [00:00<00:01, 93.05it/s]\u001b[A\n",
      " 29%|██▊       | 69/242 [00:00<00:01, 93.68it/s]\u001b[A\n",
      " 33%|███▎      | 79/242 [00:00<00:01, 93.58it/s]\u001b[A\n",
      " 36%|███▋      | 88/242 [00:00<00:01, 92.43it/s]\u001b[A\n",
      " 40%|████      | 98/242 [00:01<00:01, 92.99it/s]\u001b[A\n",
      " 45%|████▍     | 108/242 [00:01<00:01, 82.52it/s]\u001b[A\n",
      " 49%|████▉     | 118/242 [00:01<00:01, 86.36it/s]\u001b[A\n",
      " 53%|█████▎    | 128/242 [00:01<00:01, 87.95it/s]\u001b[A\n",
      " 57%|█████▋    | 138/242 [00:01<00:01, 90.72it/s]\u001b[A\n",
      " 61%|██████    | 148/242 [00:01<00:01, 92.96it/s]\u001b[A\n",
      " 65%|██████▌   | 158/242 [00:01<00:00, 94.66it/s]\u001b[A\n",
      " 69%|██████▉   | 168/242 [00:01<00:00, 95.12it/s]\u001b[A\n",
      " 74%|███████▎  | 178/242 [00:01<00:00, 95.36it/s]\u001b[A\n",
      " 78%|███████▊  | 188/242 [00:02<00:00, 92.90it/s]\u001b[A\n",
      " 82%|████████▏ | 198/242 [00:02<00:00, 93.60it/s]\u001b[A\n",
      " 86%|████████▌ | 208/242 [00:02<00:00, 94.15it/s]\u001b[A\n",
      " 90%|█████████ | 218/242 [00:02<00:00, 94.51it/s]\u001b[A\n",
      " 94%|█████████▍| 228/242 [00:02<00:00, 94.52it/s]\u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 92.56it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n",
      "em: 0.2563709522897107\n",
      "f1: 0.6628021241069781\n",
      "Test Loss: 0.003996, Acc: 0.760234\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 52%|█████▏    | 1498/2883 [00:41<00:30, 45.82it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▍         | 11/242 [00:00<00:02, 107.37it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.51\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  9%|▉         | 22/242 [00:00<00:02, 105.67it/s]\u001b[A\n",
      " 14%|█▎        | 33/242 [00:00<00:01, 106.29it/s]\u001b[A\n",
      " 18%|█▊        | 44/242 [00:00<00:01, 106.65it/s]\u001b[A\n",
      " 23%|██▎       | 55/242 [00:00<00:01, 107.06it/s]\u001b[A\n",
      " 27%|██▋       | 66/242 [00:00<00:01, 106.63it/s]\u001b[A\n",
      " 32%|███▏      | 77/242 [00:00<00:01, 107.41it/s]\u001b[A\n",
      " 36%|███▋      | 88/242 [00:00<00:01, 107.07it/s]\u001b[A\n",
      " 41%|████▏     | 100/242 [00:00<00:01, 108.26it/s]\u001b[A\n",
      " 46%|████▌     | 111/242 [00:01<00:01, 107.48it/s]\u001b[A\n",
      " 51%|█████     | 123/242 [00:01<00:01, 108.49it/s]\u001b[A\n",
      " 55%|█████▌    | 134/242 [00:01<00:00, 108.59it/s]\u001b[A\n",
      " 60%|██████    | 146/242 [00:01<00:00, 109.28it/s]\u001b[A\n",
      " 65%|██████▍   | 157/242 [00:01<00:00, 108.69it/s]\u001b[A\n",
      " 70%|██████▉   | 169/242 [00:01<00:00, 109.33it/s]\u001b[A\n",
      " 74%|███████▍  | 180/242 [00:01<00:00, 109.11it/s]\u001b[A\n",
      " 79%|███████▉  | 191/242 [00:01<00:00, 108.00it/s]\u001b[A\n",
      " 83%|████████▎ | 202/242 [00:01<00:00, 98.70it/s] \u001b[A\n",
      " 88%|████████▊ | 213/242 [00:02<00:00, 99.77it/s]\u001b[A\n",
      " 93%|█████████▎| 224/242 [00:02<00:00, 100.97it/s]\u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 105.71it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n",
      "em: 0.2266717762023376\n",
      "f1: 0.7094747463687993\n",
      "Test Loss: 0.004053, Acc: 0.749774\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 69%|██████▉   | 1997/2883 [00:56<00:21, 40.75it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 9/242 [00:00<00:02, 80.72it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.4256\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 19/242 [00:00<00:02, 84.61it/s]\u001b[A\n",
      " 12%|█▏        | 29/242 [00:00<00:02, 88.05it/s]\u001b[A\n",
      " 16%|█▌        | 39/242 [00:00<00:02, 89.93it/s]\u001b[A\n",
      " 21%|██        | 50/242 [00:00<00:02, 92.96it/s]\u001b[A\n",
      " 25%|██▍       | 60/242 [00:00<00:01, 94.36it/s]\u001b[A\n",
      " 29%|██▉       | 70/242 [00:00<00:01, 95.73it/s]\u001b[A\n",
      " 33%|███▎      | 80/242 [00:00<00:01, 95.81it/s]\u001b[A\n",
      " 37%|███▋      | 90/242 [00:00<00:01, 96.83it/s]\u001b[A\n",
      " 41%|████▏     | 100/242 [00:01<00:01, 96.31it/s]\u001b[A\n",
      " 45%|████▌     | 110/242 [00:01<00:01, 97.13it/s]\u001b[A\n",
      " 50%|████▉     | 120/242 [00:01<00:01, 97.57it/s]\u001b[A\n",
      " 54%|█████▍    | 131/242 [00:01<00:01, 98.53it/s]\u001b[A\n",
      " 58%|█████▊    | 141/242 [00:01<00:01, 97.66it/s]\u001b[A\n",
      " 63%|██████▎   | 152/242 [00:01<00:00, 98.47it/s]\u001b[A\n",
      " 67%|██████▋   | 162/242 [00:01<00:00, 98.13it/s]\u001b[A\n",
      " 71%|███████   | 172/242 [00:01<00:00, 98.68it/s]\u001b[A\n",
      " 75%|███████▌  | 182/242 [00:01<00:00, 97.67it/s]\u001b[A\n",
      " 79%|███████▉  | 192/242 [00:01<00:00, 97.74it/s]\u001b[A\n",
      " 84%|████████▍ | 203/242 [00:02<00:00, 98.64it/s]\u001b[A\n",
      " 88%|████████▊ | 214/242 [00:02<00:00, 98.49it/s]\u001b[A\n",
      " 93%|█████████▎| 224/242 [00:02<00:00, 98.29it/s]\u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 97.23it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 69%|██████▉   | 2002/2883 [01:00<04:14,  3.46it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.26192757233186437\n",
      "f1: 0.6744927721360067\n",
      "Test Loss: 0.003933, Acc: 0.761299\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 87%|████████▋ | 2498/2883 [01:10<00:07, 48.13it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▍         | 11/242 [00:00<00:02, 105.53it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.4782\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  9%|▉         | 22/242 [00:00<00:02, 105.91it/s]\u001b[A\n",
      " 14%|█▍        | 34/242 [00:00<00:01, 107.32it/s]\u001b[A\n",
      " 19%|█▉        | 46/242 [00:00<00:01, 108.54it/s]\u001b[A\n",
      " 24%|██▎       | 57/242 [00:00<00:01, 107.71it/s]\u001b[A\n",
      " 28%|██▊       | 68/242 [00:00<00:01, 107.96it/s]\u001b[A\n",
      " 32%|███▏      | 78/242 [00:00<00:01, 100.43it/s]\u001b[A\n",
      " 37%|███▋      | 89/242 [00:00<00:01, 103.07it/s]\u001b[A\n",
      " 42%|████▏     | 101/242 [00:00<00:01, 105.34it/s]\u001b[A\n",
      " 46%|████▋     | 112/242 [00:01<00:01, 105.91it/s]\u001b[A\n",
      " 51%|█████     | 123/242 [00:01<00:01, 106.66it/s]\u001b[A\n",
      " 55%|█████▌    | 134/242 [00:01<00:01, 106.32it/s]\u001b[A\n",
      " 60%|█████▉    | 145/242 [00:01<00:00, 106.32it/s]\u001b[A\n",
      " 64%|██████▍   | 156/242 [00:01<00:00, 106.69it/s]\u001b[A\n",
      " 69%|██████▉   | 167/242 [00:01<00:00, 107.34it/s]\u001b[A\n",
      " 74%|███████▎  | 178/242 [00:01<00:00, 107.43it/s]\u001b[A\n",
      " 78%|███████▊  | 189/242 [00:01<00:00, 107.87it/s]\u001b[A\n",
      " 83%|████████▎ | 200/242 [00:01<00:00, 108.48it/s]\u001b[A\n",
      " 87%|████████▋ | 211/242 [00:01<00:00, 107.98it/s]\u001b[A\n",
      " 92%|█████████▏| 223/242 [00:02<00:00, 108.81it/s]\u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 107.23it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n",
      "em: 0.26058631921824105\n",
      "f1: 0.686214193504871\n",
      "Test Loss: 0.003902, Acc: 0.761525\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 2883/2883 [01:23<00:00, 34.63it/s]\n",
      "  4%|▍         | 10/242 [00:00<00:02, 91.36it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 242/242 [00:02<00:00, 106.17it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n",
      "em: 0.24506610461774286\n",
      "f1: 0.6465489344489196\n",
      "Test Loss: 0.003930, Acc: 0.758264\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/baiyq/.conda/envs/learn_py3/lib/python3.7/site-packages/torch/serialization.py:256: UserWarning: Couldn't retrieve source code for container of type conv_pos_power_net. It won't be checked for correctness upon loading.\n",
      "  \"type \" + obj.__name__ + \". It won't be checked \"\n",
      "/home/baiyq/.conda/envs/learn_py3/lib/python3.7/site-packages/torch/serialization.py:256: UserWarning: Couldn't retrieve source code for container of type conv_one_sent_layer. It won't be checked for correctness upon loading.\n",
      "  \"type \" + obj.__name__ + \". It won't be checked \"\n",
      "/home/baiyq/.conda/envs/learn_py3/lib/python3.7/site-packages/torch/serialization.py:256: UserWarning: Couldn't retrieve source code for container of type conv_article_layer. It won't be checked for correctness upon loading.\n",
      "  \"type \" + obj.__name__ + \". It won't be checked \"\n",
      " 17%|█▋        | 498/2883 [00:10<00:46, 51.07it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 10/242 [00:00<00:02, 99.46it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.462\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 19/242 [00:00<00:02, 94.63it/s]\u001b[A\n",
      " 12%|█▏        | 29/242 [00:00<00:02, 93.87it/s]\u001b[A\n",
      " 17%|█▋        | 40/242 [00:00<00:02, 97.81it/s]\u001b[A\n",
      " 20%|██        | 49/242 [00:00<00:02, 95.29it/s]\u001b[A\n",
      " 25%|██▌       | 61/242 [00:00<00:01, 99.48it/s]\u001b[A\n",
      " 30%|██▉       | 72/242 [00:00<00:01, 101.76it/s]\u001b[A\n",
      " 34%|███▍      | 83/242 [00:00<00:01, 103.31it/s]\u001b[A\n",
      " 39%|███▉      | 94/242 [00:00<00:01, 104.36it/s]\u001b[A\n",
      " 43%|████▎     | 105/242 [00:01<00:01, 105.44it/s]\u001b[A\n",
      " 48%|████▊     | 116/242 [00:01<00:01, 106.27it/s]\u001b[A\n",
      " 52%|█████▏    | 127/242 [00:01<00:01, 106.95it/s]\u001b[A\n",
      " 57%|█████▋    | 138/242 [00:01<00:00, 106.87it/s]\u001b[A\n",
      " 62%|██████▏   | 149/242 [00:01<00:00, 107.48it/s]\u001b[A\n",
      " 66%|██████▌   | 160/242 [00:01<00:00, 94.59it/s] \u001b[A\n",
      " 71%|███████   | 172/242 [00:01<00:00, 98.97it/s]\u001b[A\n",
      " 76%|███████▌  | 183/242 [00:01<00:00, 101.59it/s]\u001b[A\n",
      " 80%|████████  | 194/242 [00:01<00:00, 103.76it/s]\u001b[A\n",
      " 85%|████████▌ | 206/242 [00:02<00:00, 105.77it/s]\u001b[A\n",
      " 90%|█████████ | 218/242 [00:02<00:00, 107.51it/s]\u001b[A\n",
      " 95%|█████████▌| 230/242 [00:02<00:00, 108.89it/s]\u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 103.55it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 17%|█▋        | 504/2883 [00:14<09:07,  4.34it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2454493197930638\n",
      "f1: 0.7137817697407772\n",
      "Test Loss: 0.004013, Acc: 0.754939\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 34%|███▍      | 994/2883 [00:24<00:37, 50.48it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▍         | 11/242 [00:00<00:02, 103.22it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4336\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  9%|▉         | 22/242 [00:00<00:02, 103.57it/s]\u001b[A\n",
      " 14%|█▎        | 33/242 [00:00<00:02, 103.40it/s]\u001b[A\n",
      " 18%|█▊        | 44/242 [00:00<00:01, 104.30it/s]\u001b[A\n",
      " 22%|██▏       | 54/242 [00:00<00:01, 102.45it/s]\u001b[A\n",
      " 27%|██▋       | 65/242 [00:00<00:01, 103.29it/s]\u001b[A\n",
      " 31%|███▏      | 76/242 [00:00<00:01, 103.61it/s]\u001b[A\n",
      " 36%|███▌      | 87/242 [00:00<00:01, 104.88it/s]\u001b[A\n",
      " 40%|████      | 98/242 [00:00<00:01, 105.37it/s]\u001b[A\n",
      " 45%|████▌     | 109/242 [00:01<00:01, 105.95it/s]\u001b[A\n",
      " 50%|████▉     | 120/242 [00:01<00:01, 105.78it/s]\u001b[A\n",
      " 54%|█████▍    | 131/242 [00:01<00:01, 105.61it/s]\u001b[A\n",
      " 59%|█████▊    | 142/242 [00:01<00:00, 104.22it/s]\u001b[A\n",
      " 63%|██████▎   | 153/242 [00:01<00:00, 104.90it/s]\u001b[A\n",
      " 68%|██████▊   | 164/242 [00:01<00:00, 104.84it/s]\u001b[A\n",
      " 72%|███████▏  | 175/242 [00:01<00:00, 105.43it/s]\u001b[A\n",
      " 77%|███████▋  | 186/242 [00:01<00:00, 93.54it/s] \u001b[A\n",
      " 81%|████████▏ | 197/242 [00:01<00:00, 97.22it/s]\u001b[A\n",
      " 86%|████████▌ | 208/242 [00:02<00:00, 100.01it/s]\u001b[A\n",
      " 90%|█████████ | 219/242 [00:02<00:00, 102.37it/s]\u001b[A\n",
      " 95%|█████████▌| 230/242 [00:02<00:00, 100.58it/s]\u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 101.47it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 35%|███▍      | 1000/2883 [00:34<17:15,  1.82it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.25445487641310593\n",
      "f1: 0.6602636830075086\n",
      "Test Loss: 0.003986, Acc: 0.761267\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 52%|█████▏    | 1495/2883 [00:46<00:31, 43.53it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 9/242 [00:00<00:02, 87.39it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.5032\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 18/242 [00:00<00:02, 87.71it/s]\u001b[A\n",
      " 11%|█         | 27/242 [00:00<00:02, 86.34it/s]\u001b[A\n",
      " 15%|█▍        | 36/242 [00:00<00:02, 86.71it/s]\u001b[A\n",
      " 19%|█▉        | 46/242 [00:00<00:02, 89.94it/s]\u001b[A\n",
      " 24%|██▎       | 57/242 [00:00<00:01, 93.00it/s]\u001b[A\n",
      " 28%|██▊       | 68/242 [00:00<00:01, 96.52it/s]\u001b[A\n",
      " 33%|███▎      | 79/242 [00:00<00:01, 99.28it/s]\u001b[A\n",
      " 37%|███▋      | 90/242 [00:00<00:01, 99.88it/s]\u001b[A\n",
      " 42%|████▏     | 101/242 [00:01<00:01, 102.08it/s]\u001b[A\n",
      " 46%|████▋     | 112/242 [00:01<00:01, 103.76it/s]\u001b[A\n",
      " 51%|█████     | 123/242 [00:01<00:01, 105.14it/s]\u001b[A\n",
      " 55%|█████▌    | 134/242 [00:01<00:01, 104.49it/s]\u001b[A\n",
      " 60%|█████▉    | 145/242 [00:01<00:00, 105.56it/s]\u001b[A\n",
      " 64%|██████▍   | 156/242 [00:01<00:00, 106.41it/s]\u001b[A\n",
      " 69%|██████▉   | 167/242 [00:01<00:00, 104.92it/s]\u001b[A\n",
      " 74%|███████▎  | 178/242 [00:01<00:00, 104.94it/s]\u001b[A\n",
      " 78%|███████▊  | 189/242 [00:01<00:00, 105.69it/s]\u001b[A\n",
      " 83%|████████▎ | 200/242 [00:01<00:00, 105.40it/s]\u001b[A\n",
      " 88%|████████▊ | 212/242 [00:02<00:00, 107.06it/s]\u001b[A\n",
      " 92%|█████████▏| 223/242 [00:02<00:00, 103.09it/s]\u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 101.38it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 52%|█████▏    | 1500/2883 [00:50<06:27,  3.57it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2511975474228779\n",
      "f1: 0.688460868504947\n",
      "Test Loss: 0.003921, Acc: 0.762203\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 69%|██████▉   | 1999/2883 [01:00<00:19, 44.60it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 9/242 [00:00<00:02, 81.43it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.4225\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 17/242 [00:00<00:02, 80.39it/s]\u001b[A\n",
      " 11%|█         | 26/242 [00:00<00:02, 81.57it/s]\u001b[A\n",
      " 14%|█▍        | 35/242 [00:00<00:02, 82.10it/s]\u001b[A\n",
      " 19%|█▊        | 45/242 [00:00<00:02, 85.69it/s]\u001b[A\n",
      " 23%|██▎       | 55/242 [00:00<00:02, 87.91it/s]\u001b[A\n",
      " 27%|██▋       | 65/242 [00:00<00:01, 91.12it/s]\u001b[A\n",
      " 31%|███▏      | 76/242 [00:00<00:01, 93.73it/s]\u001b[A\n",
      " 36%|███▌      | 87/242 [00:00<00:01, 95.73it/s]\u001b[A\n",
      " 40%|████      | 97/242 [00:01<00:01, 95.76it/s]\u001b[A\n",
      " 44%|████▍     | 107/242 [00:01<00:01, 96.81it/s]\u001b[A\n",
      " 49%|████▉     | 118/242 [00:01<00:01, 98.37it/s]\u001b[A\n",
      " 53%|█████▎    | 128/242 [00:01<00:01, 92.96it/s]\u001b[A\n",
      " 57%|█████▋    | 138/242 [00:01<00:01, 94.44it/s]\u001b[A\n",
      " 61%|██████    | 148/242 [00:01<00:00, 95.29it/s]\u001b[A\n",
      " 65%|██████▌   | 158/242 [00:01<00:00, 96.29it/s]\u001b[A\n",
      " 69%|██████▉   | 168/242 [00:01<00:00, 84.25it/s]\u001b[A\n",
      " 74%|███████▍  | 179/242 [00:01<00:00, 88.76it/s]\u001b[A\n",
      " 79%|███████▊  | 190/242 [00:02<00:00, 92.03it/s]\u001b[A\n",
      " 83%|████████▎ | 201/242 [00:02<00:00, 94.89it/s]\u001b[A\n",
      " 88%|████████▊ | 212/242 [00:02<00:00, 96.51it/s]\u001b[A\n",
      " 92%|█████████▏| 223/242 [00:02<00:00, 98.35it/s]\u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 93.70it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n",
      "em: 0.26384364820846906\n",
      "f1: 0.6778164587323502\n",
      "Test Loss: 0.003906, Acc: 0.762849\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 87%|████████▋ | 2496/2883 [01:16<00:08, 48.25it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▍         | 11/242 [00:00<00:02, 103.19it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.4652\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  9%|▉         | 22/242 [00:00<00:02, 103.38it/s]\u001b[A\n",
      " 14%|█▎        | 33/242 [00:00<00:02, 104.37it/s]\u001b[A\n",
      " 18%|█▊        | 44/242 [00:00<00:01, 104.71it/s]\u001b[A\n",
      " 23%|██▎       | 55/242 [00:00<00:01, 104.83it/s]\u001b[A\n",
      " 27%|██▋       | 66/242 [00:00<00:01, 104.86it/s]\u001b[A\n",
      " 32%|███▏      | 77/242 [00:00<00:01, 105.55it/s]\u001b[A\n",
      " 36%|███▋      | 88/242 [00:00<00:01, 105.89it/s]\u001b[A\n",
      " 41%|████      | 99/242 [00:00<00:01, 105.73it/s]\u001b[A\n",
      " 45%|████▌     | 110/242 [00:01<00:01, 105.95it/s]\u001b[A\n",
      " 50%|█████     | 121/242 [00:01<00:01, 106.19it/s]\u001b[A\n",
      " 55%|█████▍    | 132/242 [00:01<00:01, 106.11it/s]\u001b[A\n",
      " 59%|█████▉    | 143/242 [00:01<00:00, 106.13it/s]\u001b[A\n",
      " 64%|██████▎   | 154/242 [00:01<00:00, 106.12it/s]\u001b[A\n",
      " 68%|██████▊   | 165/242 [00:01<00:00, 106.00it/s]\u001b[A\n",
      " 73%|███████▎  | 176/242 [00:01<00:00, 106.31it/s]\u001b[A\n",
      " 77%|███████▋  | 187/242 [00:01<00:00, 106.11it/s]\u001b[A\n",
      " 82%|████████▏ | 198/242 [00:01<00:00, 106.21it/s]\u001b[A\n",
      " 86%|████████▋ | 209/242 [00:02<00:00, 91.62it/s] \u001b[A\n",
      " 91%|█████████ | 220/242 [00:02<00:00, 95.59it/s]\u001b[A\n",
      " 95%|█████████▌| 231/242 [00:02<00:00, 98.10it/s]\u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 103.16it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 87%|████████▋ | 2501/2883 [01:20<01:46,  3.60it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2621191799195248\n",
      "f1: 0.6940121661141111\n",
      "Test Loss: 0.003886, Acc: 0.763171\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 2883/2883 [01:28<00:00, 32.59it/s]\n",
      "  4%|▎         | 9/242 [00:00<00:02, 89.90it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 242/242 [00:02<00:00, 97.83it/s] \n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n",
      "em: 0.2462157501437057\n",
      "f1: 0.6395912012026247\n",
      "Test Loss: 0.003962, Acc: 0.758458\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 17%|█▋        | 499/2883 [00:11<00:50, 47.53it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▍         | 11/242 [00:00<00:02, 102.09it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.4431\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  9%|▉         | 22/242 [00:00<00:02, 102.43it/s]\u001b[A\n",
      " 14%|█▎        | 33/242 [00:00<00:02, 103.34it/s]\u001b[A\n",
      " 18%|█▊        | 44/242 [00:00<00:01, 103.43it/s]\u001b[A\n",
      " 23%|██▎       | 55/242 [00:00<00:01, 103.90it/s]\u001b[A\n",
      " 27%|██▋       | 66/242 [00:00<00:01, 103.90it/s]\u001b[A\n",
      " 32%|███▏      | 77/242 [00:00<00:01, 104.46it/s]\u001b[A\n",
      " 36%|███▌      | 87/242 [00:00<00:01, 97.74it/s] \u001b[A\n",
      " 40%|████      | 98/242 [00:00<00:01, 100.57it/s]\u001b[A\n",
      " 45%|████▌     | 109/242 [00:01<00:01, 101.39it/s]\u001b[A\n",
      " 50%|████▉     | 120/242 [00:01<00:01, 102.79it/s]\u001b[A\n",
      " 54%|█████▍    | 131/242 [00:01<00:01, 103.57it/s]\u001b[A\n",
      " 59%|█████▊    | 142/242 [00:01<00:00, 103.96it/s]\u001b[A\n",
      " 63%|██████▎   | 153/242 [00:01<00:00, 103.63it/s]\u001b[A\n",
      " 68%|██████▊   | 164/242 [00:01<00:00, 102.73it/s]\u001b[A\n",
      " 72%|███████▏  | 175/242 [00:01<00:00, 103.13it/s]\u001b[A\n",
      " 77%|███████▋  | 186/242 [00:01<00:00, 103.70it/s]\u001b[A\n",
      " 81%|████████▏ | 197/242 [00:01<00:00, 103.99it/s]\u001b[A\n",
      " 86%|████████▌ | 208/242 [00:02<00:00, 104.40it/s]\u001b[A\n",
      " 90%|█████████ | 219/242 [00:02<00:00, 104.53it/s]\u001b[A\n",
      " 95%|█████████▌| 230/242 [00:02<00:00, 104.96it/s]\u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 100.99it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n",
      "em: 0.2626940026825062\n",
      "f1: 0.6954936826176632\n",
      "Test Loss: 0.003889, Acc: 0.763527\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 35%|███▍      | 998/2883 [00:25<00:43, 43.20it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 9/242 [00:00<00:02, 86.73it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4509\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 18/242 [00:00<00:02, 86.69it/s]\u001b[A\n",
      " 11%|█         | 27/242 [00:00<00:02, 87.27it/s]\u001b[A\n",
      " 15%|█▍        | 36/242 [00:00<00:02, 87.65it/s]\u001b[A\n",
      " 19%|█▉        | 46/242 [00:00<00:02, 89.13it/s]\u001b[A\n",
      " 23%|██▎       | 56/242 [00:00<00:02, 91.95it/s]\u001b[A\n",
      " 28%|██▊       | 67/242 [00:00<00:01, 96.60it/s]\u001b[A\n",
      " 32%|███▏      | 78/242 [00:00<00:01, 99.34it/s]\u001b[A\n",
      " 37%|███▋      | 90/242 [00:00<00:01, 102.67it/s]\u001b[A\n",
      " 42%|████▏     | 101/242 [00:01<00:01, 104.37it/s]\u001b[A\n",
      " 46%|████▋     | 112/242 [00:01<00:01, 105.25it/s]\u001b[A\n",
      " 51%|█████     | 123/242 [00:01<00:01, 106.05it/s]\u001b[A\n",
      " 55%|█████▌    | 134/242 [00:01<00:01, 106.64it/s]\u001b[A\n",
      " 60%|█████▉    | 145/242 [00:01<00:00, 107.46it/s]\u001b[A\n",
      " 64%|██████▍   | 156/242 [00:01<00:00, 107.73it/s]\u001b[A\n",
      " 69%|██████▉   | 167/242 [00:01<00:00, 107.54it/s]\u001b[A\n",
      " 74%|███████▎  | 178/242 [00:01<00:00, 94.06it/s] \u001b[A\n",
      " 78%|███████▊  | 189/242 [00:01<00:00, 97.97it/s]\u001b[A\n",
      " 83%|████████▎ | 201/242 [00:02<00:00, 101.51it/s]\u001b[A\n",
      " 88%|████████▊ | 212/242 [00:02<00:00, 103.77it/s]\u001b[A\n",
      " 93%|█████████▎| 224/242 [00:02<00:00, 106.47it/s]\u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 102.05it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 35%|███▍      | 1003/2883 [00:30<08:46,  3.57it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2623107875071853\n",
      "f1: 0.6927463394794408\n",
      "Test Loss: 0.003994, Acc: 0.761525\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 52%|█████▏    | 1497/2883 [00:40<00:27, 50.91it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▍         | 11/242 [00:00<00:02, 108.30it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.4932\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  9%|▉         | 22/242 [00:00<00:02, 106.19it/s]\u001b[A\n",
      " 14%|█▎        | 33/242 [00:00<00:01, 106.76it/s]\u001b[A\n",
      " 18%|█▊        | 44/242 [00:00<00:01, 107.09it/s]\u001b[A\n",
      " 23%|██▎       | 56/242 [00:00<00:01, 108.14it/s]\u001b[A\n",
      " 28%|██▊       | 67/242 [00:00<00:01, 108.19it/s]\u001b[A\n",
      " 32%|███▏      | 78/242 [00:00<00:01, 108.71it/s]\u001b[A\n",
      " 37%|███▋      | 89/242 [00:00<00:01, 108.87it/s]\u001b[A\n",
      " 41%|████▏     | 100/242 [00:00<00:01, 107.94it/s]\u001b[A\n",
      " 46%|████▌     | 111/242 [00:01<00:01, 108.12it/s]\u001b[A\n",
      " 50%|█████     | 122/242 [00:01<00:01, 101.78it/s]\u001b[A\n",
      " 55%|█████▍    | 133/242 [00:01<00:01, 91.28it/s] \u001b[A\n",
      " 60%|█████▉    | 144/242 [00:01<00:01, 95.15it/s]\u001b[A\n",
      " 64%|██████▍   | 156/242 [00:01<00:00, 99.83it/s]\u001b[A\n",
      " 69%|██████▉   | 168/242 [00:01<00:00, 103.35it/s]\u001b[A\n",
      " 74%|███████▍  | 179/242 [00:01<00:00, 104.92it/s]\u001b[A\n",
      " 79%|███████▉  | 191/242 [00:01<00:00, 107.46it/s]\u001b[A\n",
      " 84%|████████▍ | 203/242 [00:01<00:00, 109.14it/s]\u001b[A\n",
      " 89%|████████▉ | 215/242 [00:02<00:00, 110.55it/s]\u001b[A\n",
      " 94%|█████████▍| 227/242 [00:02<00:00, 111.56it/s]\u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 105.01it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n",
      "em: 0.254071661237785\n",
      "f1: 0.6726851771208993\n",
      "Test Loss: 0.003914, Acc: 0.760718\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 69%|██████▉   | 1995/2883 [00:55<00:20, 43.10it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  3%|▎         | 8/242 [00:00<00:02, 79.19it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.4175\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 16/242 [00:00<00:02, 77.58it/s]\u001b[A\n",
      " 10%|█         | 25/242 [00:00<00:02, 78.95it/s]\u001b[A\n",
      " 14%|█▍        | 34/242 [00:00<00:02, 81.86it/s]\u001b[A\n",
      " 19%|█▊        | 45/242 [00:00<00:02, 86.88it/s]\u001b[A\n",
      " 23%|██▎       | 56/242 [00:00<00:02, 90.59it/s]\u001b[A\n",
      " 28%|██▊       | 67/242 [00:00<00:01, 93.71it/s]\u001b[A\n",
      " 32%|███▏      | 77/242 [00:00<00:01, 95.39it/s]\u001b[A\n",
      " 36%|███▋      | 88/242 [00:00<00:01, 97.72it/s]\u001b[A\n",
      " 41%|████      | 99/242 [00:01<00:01, 99.01it/s]\u001b[A\n",
      " 45%|████▌     | 110/242 [00:01<00:01, 100.47it/s]\u001b[A\n",
      " 50%|█████     | 121/242 [00:01<00:01, 100.68it/s]\u001b[A\n",
      " 55%|█████▍    | 132/242 [00:01<00:01, 101.48it/s]\u001b[A\n",
      " 59%|█████▉    | 143/242 [00:01<00:00, 101.56it/s]\u001b[A\n",
      " 64%|██████▎   | 154/242 [00:01<00:00, 101.21it/s]\u001b[A\n",
      " 68%|██████▊   | 165/242 [00:01<00:00, 93.84it/s] \u001b[A\n",
      " 72%|███████▏  | 175/242 [00:01<00:00, 95.40it/s]\u001b[A\n",
      " 77%|███████▋  | 186/242 [00:01<00:00, 96.99it/s]\u001b[A\n",
      " 81%|████████▏ | 197/242 [00:02<00:00, 98.61it/s]\u001b[A\n",
      " 86%|████████▌ | 208/242 [00:02<00:00, 99.76it/s]\u001b[A\n",
      " 90%|█████████ | 219/242 [00:02<00:00, 100.48it/s]\u001b[A\n",
      " 95%|█████████▌| 230/242 [00:02<00:00, 88.20it/s] \u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 95.05it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 69%|██████▉   | 2000/2883 [00:59<04:15,  3.45it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2588618509292968\n",
      "f1: 0.6759916245641561\n",
      "Test Loss: 0.003908, Acc: 0.760976\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 87%|████████▋ | 2495/2883 [01:10<00:09, 42.78it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 9/242 [00:00<00:02, 86.30it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.4596\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 19/242 [00:00<00:02, 89.12it/s]\u001b[A\n",
      " 12%|█▏        | 30/242 [00:00<00:02, 93.89it/s]\u001b[A\n",
      " 17%|█▋        | 42/242 [00:00<00:02, 98.22it/s]\u001b[A\n",
      " 22%|██▏       | 53/242 [00:00<00:01, 99.68it/s]\u001b[A\n",
      " 26%|██▌       | 62/242 [00:00<00:01, 94.71it/s]\u001b[A\n",
      " 29%|██▉       | 71/242 [00:00<00:02, 79.00it/s]\u001b[A\n",
      " 33%|███▎      | 81/242 [00:00<00:01, 83.47it/s]\u001b[A\n",
      " 38%|███▊      | 92/242 [00:00<00:01, 89.57it/s]\u001b[A\n",
      " 43%|████▎     | 103/242 [00:01<00:01, 94.06it/s]\u001b[A\n",
      " 47%|████▋     | 114/242 [00:01<00:01, 97.57it/s]\u001b[A\n",
      " 52%|█████▏    | 125/242 [00:01<00:01, 100.64it/s]\u001b[A\n",
      " 56%|█████▌    | 136/242 [00:01<00:01, 102.65it/s]\u001b[A\n",
      " 61%|██████    | 147/242 [00:01<00:00, 104.14it/s]\u001b[A\n",
      " 65%|██████▌   | 158/242 [00:01<00:00, 104.99it/s]\u001b[A\n",
      " 70%|██████▉   | 169/242 [00:01<00:00, 106.20it/s]\u001b[A\n",
      " 74%|███████▍  | 180/242 [00:01<00:00, 106.57it/s]\u001b[A\n",
      " 79%|███████▉  | 192/242 [00:01<00:00, 107.70it/s]\u001b[A\n",
      " 84%|████████▍ | 203/242 [00:02<00:00, 107.21it/s]\u001b[A\n",
      " 88%|████████▊ | 214/242 [00:02<00:00, 107.96it/s]\u001b[A\n",
      " 93%|█████████▎| 225/242 [00:02<00:00, 107.93it/s]\u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 101.30it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 87%|████████▋ | 2500/2883 [01:14<01:47,  3.56it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2603947116305806\n",
      "f1: 0.6801749048252296\n",
      "Test Loss: 0.003898, Acc: 0.762461\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 2883/2883 [01:23<00:00, 34.42it/s]\n",
      "  4%|▍         | 10/242 [00:00<00:02, 91.42it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 242/242 [00:02<00:00, 105.80it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n",
      "em: 0.2603947116305806\n",
      "f1: 0.6949759826939459\n",
      "Test Loss: 0.003891, Acc: 0.763720\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 17%|█▋        | 498/2883 [00:10<00:52, 45.20it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 9/242 [00:00<00:02, 83.95it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.4622\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 18/242 [00:00<00:02, 83.73it/s]\u001b[A\n",
      " 12%|█▏        | 28/242 [00:00<00:02, 86.82it/s]\u001b[A\n",
      " 16%|█▌        | 39/242 [00:00<00:02, 90.76it/s]\u001b[A\n",
      " 20%|██        | 49/242 [00:00<00:02, 93.15it/s]\u001b[A\n",
      " 25%|██▍       | 60/242 [00:00<00:01, 96.14it/s]\u001b[A\n",
      " 29%|██▉       | 71/242 [00:00<00:01, 97.77it/s]\u001b[A\n",
      " 34%|███▍      | 82/242 [00:00<00:01, 99.08it/s]\u001b[A\n",
      " 38%|███▊      | 93/242 [00:00<00:01, 100.28it/s]\u001b[A\n",
      " 43%|████▎     | 104/242 [00:01<00:01, 101.57it/s]\u001b[A\n",
      " 48%|████▊     | 115/242 [00:01<00:01, 100.91it/s]\u001b[A\n",
      " 52%|█████▏    | 125/242 [00:01<00:01, 100.59it/s]\u001b[A\n",
      " 56%|█████▌    | 135/242 [00:01<00:01, 92.78it/s] \u001b[A\n",
      " 60%|██████    | 146/242 [00:01<00:00, 96.37it/s]\u001b[A\n",
      " 65%|██████▍   | 157/242 [00:01<00:00, 98.15it/s]\u001b[A\n",
      " 69%|██████▉   | 168/242 [00:01<00:00, 99.46it/s]\u001b[A\n",
      " 74%|███████▍  | 179/242 [00:01<00:00, 100.59it/s]\u001b[A\n",
      " 79%|███████▊  | 190/242 [00:01<00:00, 100.60it/s]\u001b[A\n",
      " 83%|████████▎ | 201/242 [00:02<00:00, 101.05it/s]\u001b[A\n",
      " 88%|████████▊ | 212/242 [00:02<00:00, 101.68it/s]\u001b[A\n",
      " 92%|█████████▏| 223/242 [00:02<00:00, 102.59it/s]\u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 99.28it/s] \u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 17%|█▋        | 503/2883 [00:14<11:25,  3.47it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.24334163632879863\n",
      "f1: 0.7132052603886384\n",
      "Test Loss: 0.003959, Acc: 0.755940\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 35%|███▍      | 995/2883 [00:24<00:36, 51.34it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▍         | 11/242 [00:00<00:02, 109.22it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4499\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  9%|▉         | 22/242 [00:00<00:02, 109.27it/s]\u001b[A\n",
      " 14%|█▍        | 34/242 [00:00<00:01, 111.13it/s]\u001b[A\n",
      " 19%|█▉        | 46/242 [00:00<00:01, 111.09it/s]\u001b[A\n",
      " 24%|██▎       | 57/242 [00:00<00:01, 109.90it/s]\u001b[A\n",
      " 28%|██▊       | 68/242 [00:00<00:01, 109.09it/s]\u001b[A\n",
      " 33%|███▎      | 79/242 [00:00<00:01, 109.16it/s]\u001b[A\n",
      " 38%|███▊      | 91/242 [00:00<00:01, 109.56it/s]\u001b[A\n",
      " 42%|████▏     | 102/242 [00:00<00:01, 109.59it/s]\u001b[A\n",
      " 47%|████▋     | 114/242 [00:01<00:01, 110.17it/s]\u001b[A\n",
      " 52%|█████▏    | 125/242 [00:01<00:01, 109.81it/s]\u001b[A\n",
      " 57%|█████▋    | 137/242 [00:01<00:00, 110.39it/s]\u001b[A\n",
      " 61%|██████    | 148/242 [00:01<00:00, 109.44it/s]\u001b[A\n",
      " 66%|██████▌   | 160/242 [00:01<00:00, 110.12it/s]\u001b[A\n",
      " 71%|███████   | 171/242 [00:01<00:00, 109.97it/s]\u001b[A\n",
      " 76%|███████▌  | 183/242 [00:01<00:00, 110.40it/s]\u001b[A\n",
      " 81%|████████  | 195/242 [00:01<00:00, 110.71it/s]\u001b[A\n",
      " 86%|████████▌ | 207/242 [00:01<00:00, 110.82it/s]\u001b[A\n",
      " 90%|█████████ | 219/242 [00:01<00:00, 110.79it/s]\u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 110.25it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n",
      "em: 0.2626940026825062\n",
      "f1: 0.6914546610273855\n",
      "Test Loss: 0.003902, Acc: 0.763107\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 52%|█████▏    | 1496/2883 [00:37<00:23, 58.88it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▍         | 12/242 [00:00<00:02, 113.52it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.4991\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 10%|▉         | 24/242 [00:00<00:01, 113.62it/s]\u001b[A\n",
      " 15%|█▍        | 36/242 [00:00<00:01, 114.21it/s]\u001b[A\n",
      " 20%|█▉        | 48/242 [00:00<00:01, 115.27it/s]\u001b[A\n",
      " 25%|██▍       | 60/242 [00:00<00:01, 116.43it/s]\u001b[A\n",
      " 30%|██▉       | 72/242 [00:00<00:01, 117.15it/s]\u001b[A\n",
      " 35%|███▍      | 84/242 [00:00<00:01, 117.73it/s]\u001b[A\n",
      " 40%|███▉      | 96/242 [00:00<00:01, 117.07it/s]\u001b[A\n",
      " 45%|████▍     | 108/242 [00:00<00:01, 117.53it/s]\u001b[A\n",
      " 50%|████▉     | 120/242 [00:01<00:01, 117.66it/s]\u001b[A\n",
      " 55%|█████▍    | 132/242 [00:01<00:00, 117.95it/s]\u001b[A\n",
      " 60%|█████▉    | 144/242 [00:01<00:00, 118.28it/s]\u001b[A\n",
      " 64%|██████▍   | 156/242 [00:01<00:00, 117.36it/s]\u001b[A\n",
      " 70%|██████▉   | 169/242 [00:01<00:00, 118.25it/s]\u001b[A\n",
      " 75%|███████▍  | 181/242 [00:01<00:00, 117.50it/s]\u001b[A\n",
      " 80%|███████▉  | 193/242 [00:01<00:00, 117.92it/s]\u001b[A\n",
      " 85%|████████▍ | 205/242 [00:01<00:00, 112.85it/s]\u001b[A\n",
      " 90%|████████▉ | 217/242 [00:01<00:00, 112.06it/s]\u001b[A\n",
      " 95%|█████████▍| 229/242 [00:01<00:00, 112.21it/s]\u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 115.55it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 52%|█████▏    | 1502/2883 [00:41<05:01,  4.57it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.25502969917608737\n",
      "f1: 0.677649942614503\n",
      "Test Loss: 0.003924, Acc: 0.762784\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 69%|██████▉   | 1998/2883 [00:50<00:19, 46.16it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 10/242 [00:00<00:02, 97.34it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.417\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 20/242 [00:00<00:02, 96.11it/s]\u001b[A\n",
      " 12%|█▏        | 30/242 [00:00<00:02, 97.09it/s]\u001b[A\n",
      " 17%|█▋        | 40/242 [00:00<00:02, 96.97it/s]\u001b[A\n",
      " 21%|██        | 50/242 [00:00<00:01, 97.49it/s]\u001b[A\n",
      " 25%|██▍       | 60/242 [00:00<00:01, 97.87it/s]\u001b[A\n",
      " 29%|██▉       | 70/242 [00:00<00:01, 98.22it/s]\u001b[A\n",
      " 33%|███▎      | 80/242 [00:00<00:01, 97.99it/s]\u001b[A\n",
      " 37%|███▋      | 90/242 [00:00<00:01, 85.38it/s]\u001b[A\n",
      " 41%|████▏     | 100/242 [00:01<00:01, 88.95it/s]\u001b[A\n",
      " 45%|████▌     | 110/242 [00:01<00:01, 91.68it/s]\u001b[A\n",
      " 50%|████▉     | 120/242 [00:01<00:01, 93.81it/s]\u001b[A\n",
      " 54%|█████▎    | 130/242 [00:01<00:01, 94.10it/s]\u001b[A\n",
      " 58%|█████▊    | 140/242 [00:01<00:01, 95.32it/s]\u001b[A\n",
      " 62%|██████▏   | 150/242 [00:01<00:00, 95.92it/s]\u001b[A\n",
      " 66%|██████▌   | 160/242 [00:01<00:00, 97.00it/s]\u001b[A\n",
      " 70%|███████   | 170/242 [00:01<00:00, 96.68it/s]\u001b[A\n",
      " 74%|███████▍  | 180/242 [00:01<00:00, 94.13it/s]\u001b[A\n",
      " 79%|███████▊  | 190/242 [00:02<00:00, 94.28it/s]\u001b[A\n",
      " 83%|████████▎ | 200/242 [00:02<00:00, 92.82it/s]\u001b[A\n",
      " 87%|████████▋ | 211/242 [00:02<00:00, 96.47it/s]\u001b[A\n",
      " 92%|█████████▏| 223/242 [00:02<00:00, 100.85it/s]\u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 96.81it/s] \u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 69%|██████▉   | 2003/2883 [00:55<04:07,  3.56it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2567541674650316\n",
      "f1: 0.6809221191190997\n",
      "Test Loss: 0.003910, Acc: 0.761719\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 87%|████████▋ | 2498/2883 [01:05<00:07, 48.15it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▍         | 12/242 [00:00<00:02, 110.45it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.4586\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 10%|▉         | 23/242 [00:00<00:01, 109.65it/s]\u001b[A\n",
      " 14%|█▍        | 34/242 [00:00<00:01, 107.34it/s]\u001b[A\n",
      " 19%|█▊        | 45/242 [00:00<00:01, 107.66it/s]\u001b[A\n",
      " 24%|██▎       | 57/242 [00:00<00:01, 108.66it/s]\u001b[A\n",
      " 29%|██▊       | 69/242 [00:00<00:01, 109.91it/s]\u001b[A\n",
      " 33%|███▎      | 81/242 [00:00<00:01, 110.37it/s]\u001b[A\n",
      " 38%|███▊      | 93/242 [00:00<00:01, 110.77it/s]\u001b[A\n",
      " 43%|████▎     | 105/242 [00:00<00:01, 111.24it/s]\u001b[A\n",
      " 48%|████▊     | 117/242 [00:01<00:01, 112.16it/s]\u001b[A\n",
      " 53%|█████▎    | 128/242 [00:01<00:01, 110.26it/s]\u001b[A\n",
      " 57%|█████▋    | 139/242 [00:01<00:00, 109.91it/s]\u001b[A\n",
      " 62%|██████▏   | 151/242 [00:01<00:00, 110.67it/s]\u001b[A\n",
      " 67%|██████▋   | 163/242 [00:01<00:00, 111.03it/s]\u001b[A\n",
      " 72%|███████▏  | 175/242 [00:01<00:00, 110.67it/s]\u001b[A\n",
      " 77%|███████▋  | 187/242 [00:01<00:00, 111.50it/s]\u001b[A\n",
      " 82%|████████▏ | 199/242 [00:01<00:00, 112.11it/s]\u001b[A\n",
      " 87%|████████▋ | 211/242 [00:01<00:00, 108.85it/s]\u001b[A\n",
      " 92%|█████████▏| 222/242 [00:02<00:00, 95.98it/s] \u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 107.90it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 87%|████████▋ | 2503/2883 [01:09<01:41,  3.73it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2623107875071853\n",
      "f1: 0.6777559350657718\n",
      "Test Loss: 0.003899, Acc: 0.763139\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 2883/2883 [01:18<00:00, 36.84it/s]\n",
      "  4%|▍         | 10/242 [00:00<00:02, 90.63it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 242/242 [00:02<00:00, 95.19it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/2883 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.26135274956888294\n",
      "f1: 0.6840908468806627\n",
      "Test Loss: 0.003887, Acc: 0.763753\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 17%|█▋        | 496/2883 [00:11<00:47, 49.76it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▍         | 11/242 [00:00<00:02, 104.55it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.4487\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  9%|▉         | 22/242 [00:00<00:02, 105.34it/s]\u001b[A\n",
      " 14%|█▎        | 33/242 [00:00<00:01, 105.17it/s]\u001b[A\n",
      " 18%|█▊        | 44/242 [00:00<00:01, 105.80it/s]\u001b[A\n",
      " 23%|██▎       | 55/242 [00:00<00:01, 105.63it/s]\u001b[A\n",
      " 27%|██▋       | 66/242 [00:00<00:01, 106.40it/s]\u001b[A\n",
      " 32%|███▏      | 77/242 [00:00<00:01, 106.52it/s]\u001b[A\n",
      " 36%|███▋      | 88/242 [00:00<00:01, 106.81it/s]\u001b[A\n",
      " 41%|████      | 99/242 [00:00<00:01, 106.88it/s]\u001b[A\n",
      " 45%|████▌     | 110/242 [00:01<00:01, 106.44it/s]\u001b[A\n",
      " 50%|█████     | 121/242 [00:01<00:01, 106.79it/s]\u001b[A\n",
      " 55%|█████▍    | 133/242 [00:01<00:01, 108.21it/s]\u001b[A\n",
      " 60%|█████▉    | 144/242 [00:01<00:00, 107.89it/s]\u001b[A\n",
      " 64%|██████▍   | 155/242 [00:01<00:00, 108.41it/s]\u001b[A\n",
      " 69%|██████▊   | 166/242 [00:01<00:00, 108.08it/s]\u001b[A\n",
      " 73%|███████▎  | 177/242 [00:01<00:00, 108.58it/s]\u001b[A\n",
      " 78%|███████▊  | 188/242 [00:01<00:00, 108.25it/s]\u001b[A\n",
      " 82%|████████▏ | 199/242 [00:01<00:00, 108.53it/s]\u001b[A\n",
      " 87%|████████▋ | 210/242 [00:01<00:00, 108.47it/s]\u001b[A\n",
      " 92%|█████████▏| 222/242 [00:02<00:00, 109.23it/s]\u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 107.75it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 17%|█▋        | 501/2883 [00:15<10:48,  3.67it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.26173596474420385\n",
      "f1: 0.6888126052908677\n",
      "Test Loss: 0.003885, Acc: 0.764398\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 35%|███▍      | 995/2883 [00:24<00:41, 45.31it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  3%|▎         | 8/242 [00:00<00:03, 76.08it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4529\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 16/242 [00:00<00:02, 76.26it/s]\u001b[A\n",
      " 11%|█         | 26/242 [00:00<00:02, 80.45it/s]\u001b[A\n",
      " 15%|█▍        | 36/242 [00:00<00:02, 83.71it/s]\u001b[A\n",
      " 19%|█▉        | 46/242 [00:00<00:02, 86.83it/s]\u001b[A\n",
      " 23%|██▎       | 56/242 [00:00<00:02, 87.89it/s]\u001b[A\n",
      " 27%|██▋       | 66/242 [00:00<00:01, 90.14it/s]\u001b[A\n",
      " 31%|███▏      | 76/242 [00:00<00:01, 91.62it/s]\u001b[A\n",
      " 36%|███▌      | 86/242 [00:00<00:01, 92.69it/s]\u001b[A\n",
      " 40%|███▉      | 96/242 [00:01<00:01, 93.89it/s]\u001b[A\n",
      " 44%|████▍     | 106/242 [00:01<00:01, 93.29it/s]\u001b[A\n",
      " 48%|████▊     | 116/242 [00:01<00:01, 94.01it/s]\u001b[A\n",
      " 52%|█████▏    | 126/242 [00:01<00:01, 94.58it/s]\u001b[A\n",
      " 56%|█████▌    | 136/242 [00:01<00:01, 94.94it/s]\u001b[A\n",
      " 60%|██████    | 146/242 [00:01<00:01, 94.87it/s]\u001b[A\n",
      " 64%|██████▍   | 156/242 [00:01<00:00, 94.24it/s]\u001b[A\n",
      " 69%|██████▊   | 166/242 [00:01<00:00, 95.00it/s]\u001b[A\n",
      " 73%|███████▎  | 176/242 [00:01<00:00, 95.49it/s]\u001b[A\n",
      " 77%|███████▋  | 186/242 [00:02<00:00, 95.28it/s]\u001b[A\n",
      " 81%|████████  | 196/242 [00:02<00:00, 94.78it/s]\u001b[A\n",
      " 85%|████████▌ | 206/242 [00:02<00:00, 95.17it/s]\u001b[A\n",
      " 89%|████████▉ | 216/242 [00:02<00:00, 95.15it/s]\u001b[A\n",
      " 93%|█████████▎| 226/242 [00:02<00:00, 95.76it/s]\u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 93.11it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 35%|███▍      | 1000/2883 [00:29<09:18,  3.37it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2600114964552596\n",
      "f1: 0.693448596495167\n",
      "Test Loss: 0.003914, Acc: 0.761622\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 52%|█████▏    | 1496/2883 [00:39<00:27, 49.89it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▍         | 11/242 [00:00<00:02, 101.60it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.4986\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  9%|▉         | 22/242 [00:00<00:02, 101.75it/s]\u001b[A\n",
      " 14%|█▎        | 33/242 [00:00<00:02, 102.58it/s]\u001b[A\n",
      " 18%|█▊        | 44/242 [00:00<00:01, 103.53it/s]\u001b[A\n",
      " 23%|██▎       | 55/242 [00:00<00:01, 103.52it/s]\u001b[A\n",
      " 27%|██▋       | 66/242 [00:00<00:01, 103.78it/s]\u001b[A\n",
      " 32%|███▏      | 77/242 [00:00<00:01, 103.95it/s]\u001b[A\n",
      " 36%|███▋      | 88/242 [00:00<00:01, 104.71it/s]\u001b[A\n",
      " 41%|████      | 99/242 [00:00<00:01, 104.85it/s]\u001b[A\n",
      " 45%|████▌     | 110/242 [00:01<00:01, 103.72it/s]\u001b[A\n",
      " 50%|█████     | 121/242 [00:01<00:01, 104.44it/s]\u001b[A\n",
      " 55%|█████▍    | 132/242 [00:01<00:01, 104.04it/s]\u001b[A\n",
      " 59%|█████▉    | 143/242 [00:01<00:00, 104.47it/s]\u001b[A\n",
      " 64%|██████▎   | 154/242 [00:01<00:00, 103.80it/s]\u001b[A\n",
      " 68%|██████▊   | 165/242 [00:01<00:00, 104.28it/s]\u001b[A\n",
      " 73%|███████▎  | 176/242 [00:01<00:00, 93.22it/s] \u001b[A\n",
      " 77%|███████▋  | 186/242 [00:01<00:00, 91.13it/s]\u001b[A\n",
      " 81%|████████  | 196/242 [00:01<00:00, 91.63it/s]\u001b[A\n",
      " 86%|████████▌ | 207/242 [00:02<00:00, 95.88it/s]\u001b[A\n",
      " 90%|█████████ | 218/242 [00:02<00:00, 99.57it/s]\u001b[A\n",
      " 95%|█████████▍| 229/242 [00:02<00:00, 101.98it/s]\u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 101.54it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 52%|█████▏    | 1501/2883 [00:44<06:26,  3.58it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2552213067637478\n",
      "f1: 0.6895271799372295\n",
      "Test Loss: 0.003926, Acc: 0.761945\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 69%|██████▉   | 1998/2883 [00:55<00:23, 37.85it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  3%|▎         | 8/242 [00:00<00:03, 72.80it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.4105\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 16/242 [00:00<00:03, 73.60it/s]\u001b[A\n",
      " 10%|▉         | 24/242 [00:00<00:02, 74.59it/s]\u001b[A\n",
      " 14%|█▍        | 34/242 [00:00<00:02, 79.08it/s]\u001b[A\n",
      " 18%|█▊        | 43/242 [00:00<00:02, 81.06it/s]\u001b[A\n",
      " 21%|██▏       | 52/242 [00:00<00:02, 82.34it/s]\u001b[A\n",
      " 25%|██▌       | 61/242 [00:00<00:02, 82.99it/s]\u001b[A\n",
      " 29%|██▉       | 70/242 [00:00<00:02, 84.06it/s]\u001b[A\n",
      " 33%|███▎      | 79/242 [00:00<00:01, 84.90it/s]\u001b[A\n",
      " 37%|███▋      | 89/242 [00:01<00:01, 85.67it/s]\u001b[A\n",
      " 40%|████      | 98/242 [00:01<00:01, 86.62it/s]\u001b[A\n",
      " 44%|████▍     | 107/242 [00:01<00:01, 86.52it/s]\u001b[A\n",
      " 48%|████▊     | 117/242 [00:01<00:01, 87.67it/s]\u001b[A\n",
      " 52%|█████▏    | 126/242 [00:01<00:01, 87.59it/s]\u001b[A\n",
      " 56%|█████▌    | 135/242 [00:01<00:01, 75.09it/s]\u001b[A\n",
      " 60%|█████▉    | 144/242 [00:01<00:01, 78.07it/s]\u001b[A\n",
      " 63%|██████▎   | 153/242 [00:01<00:01, 78.29it/s]\u001b[A\n",
      " 67%|██████▋   | 162/242 [00:01<00:00, 80.79it/s]\u001b[A\n",
      " 71%|███████   | 172/242 [00:02<00:00, 85.05it/s]\u001b[A\n",
      " 75%|███████▌  | 182/242 [00:02<00:00, 88.29it/s]\u001b[A\n",
      " 79%|███████▉  | 192/242 [00:02<00:00, 90.40it/s]\u001b[A\n",
      " 83%|████████▎ | 202/242 [00:02<00:00, 92.57it/s]\u001b[A\n",
      " 88%|████████▊ | 212/242 [00:02<00:00, 93.50it/s]\u001b[A\n",
      " 92%|█████████▏| 222/242 [00:02<00:00, 88.18it/s]\u001b[A\n",
      " 96%|█████████▌| 232/242 [00:02<00:00, 90.76it/s]\u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 85.86it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 69%|██████▉   | 2002/2883 [01:05<11:30,  1.28it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2563709522897107\n",
      "f1: 0.6804331188600277\n",
      "Test Loss: 0.003915, Acc: 0.760008\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 87%|████████▋ | 2499/2883 [01:16<00:08, 43.99it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 9/242 [00:00<00:02, 88.09it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.4593\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 18/242 [00:00<00:02, 88.01it/s]\u001b[A\n",
      " 11%|█         | 27/242 [00:00<00:02, 87.76it/s]\u001b[A\n",
      " 15%|█▍        | 36/242 [00:00<00:02, 88.01it/s]\u001b[A\n",
      " 19%|█▉        | 46/242 [00:00<00:02, 88.85it/s]\u001b[A\n",
      " 24%|██▎       | 57/242 [00:00<00:01, 94.27it/s]\u001b[A\n",
      " 29%|██▊       | 69/242 [00:00<00:01, 98.98it/s]\u001b[A\n",
      " 33%|███▎      | 80/242 [00:00<00:01, 102.04it/s]\u001b[A\n",
      " 38%|███▊      | 91/242 [00:00<00:01, 103.90it/s]\u001b[A\n",
      " 43%|████▎     | 103/242 [00:01<00:01, 106.54it/s]\u001b[A\n",
      " 48%|████▊     | 115/242 [00:01<00:01, 108.46it/s]\u001b[A\n",
      " 52%|█████▏    | 127/242 [00:01<00:01, 109.72it/s]\u001b[A\n",
      " 57%|█████▋    | 139/242 [00:01<00:00, 110.27it/s]\u001b[A\n",
      " 62%|██████▏   | 151/242 [00:01<00:00, 111.12it/s]\u001b[A\n",
      " 67%|██████▋   | 163/242 [00:01<00:00, 94.96it/s] \u001b[A\n",
      " 72%|███████▏  | 175/242 [00:01<00:00, 99.07it/s]\u001b[A\n",
      " 77%|███████▋  | 187/242 [00:01<00:00, 102.35it/s]\u001b[A\n",
      " 82%|████████▏ | 199/242 [00:01<00:00, 104.91it/s]\u001b[A\n",
      " 87%|████████▋ | 211/242 [00:02<00:00, 106.80it/s]\u001b[A\n",
      " 92%|█████████▏| 222/242 [00:02<00:00, 99.49it/s] \u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 101.80it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 87%|████████▋ | 2504/2883 [01:20<01:45,  3.59it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2571373826403526\n",
      "f1: 0.6711187090570186\n",
      "Test Loss: 0.003911, Acc: 0.761138\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 2883/2883 [01:28<00:00, 32.43it/s]\n",
      "  4%|▍         | 10/242 [00:00<00:02, 98.39it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 242/242 [00:02<00:00, 98.08it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/2883 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.25905345851695727\n",
      "f1: 0.677312196223873\n",
      "Test Loss: 0.003903, Acc: 0.762332\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 17%|█▋        | 495/2883 [00:11<00:52, 45.20it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 10/242 [00:00<00:02, 96.89it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.4447\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 20/242 [00:00<00:02, 96.59it/s]\u001b[A\n",
      " 12%|█▏        | 30/242 [00:00<00:02, 95.67it/s]\u001b[A\n",
      " 17%|█▋        | 40/242 [00:00<00:02, 95.60it/s]\u001b[A\n",
      " 21%|██        | 50/242 [00:00<00:02, 95.44it/s]\u001b[A\n",
      " 25%|██▍       | 60/242 [00:00<00:01, 95.81it/s]\u001b[A\n",
      " 29%|██▉       | 70/242 [00:00<00:01, 95.77it/s]\u001b[A\n",
      " 33%|███▎      | 79/242 [00:00<00:01, 93.54it/s]\u001b[A\n",
      " 37%|███▋      | 89/242 [00:00<00:01, 94.58it/s]\u001b[A\n",
      " 41%|████      | 99/242 [00:01<00:01, 94.95it/s]\u001b[A\n",
      " 45%|████▌     | 109/242 [00:01<00:01, 95.41it/s]\u001b[A\n",
      " 49%|████▉     | 119/242 [00:01<00:01, 95.05it/s]\u001b[A\n",
      " 53%|█████▎    | 129/242 [00:01<00:01, 79.21it/s]\u001b[A\n",
      " 58%|█████▊    | 140/242 [00:01<00:01, 86.26it/s]\u001b[A\n",
      " 62%|██████▏   | 151/242 [00:01<00:00, 92.13it/s]\u001b[A\n",
      " 67%|██████▋   | 162/242 [00:01<00:00, 95.42it/s]\u001b[A\n",
      " 72%|███████▏  | 174/242 [00:01<00:00, 99.68it/s]\u001b[A\n",
      " 77%|███████▋  | 186/242 [00:01<00:00, 102.70it/s]\u001b[A\n",
      " 82%|████████▏ | 198/242 [00:02<00:00, 105.25it/s]\u001b[A\n",
      " 87%|████████▋ | 210/242 [00:02<00:00, 106.93it/s]\u001b[A\n",
      " 92%|█████████▏| 222/242 [00:02<00:00, 108.02it/s]\u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 98.53it/s] \u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 17%|█▋        | 500/2883 [00:15<11:15,  3.53it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2621191799195248\n",
      "f1: 0.680303251495059\n",
      "Test Loss: 0.003885, Acc: 0.764108\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 34%|███▍      | 994/2883 [00:26<00:37, 50.45it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 10/242 [00:00<00:02, 92.76it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4521\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 20/242 [00:00<00:02, 92.55it/s]\u001b[A\n",
      " 12%|█▏        | 28/242 [00:00<00:02, 85.72it/s]\u001b[A\n",
      " 16%|█▌        | 38/242 [00:00<00:02, 88.43it/s]\u001b[A\n",
      " 20%|█▉        | 48/242 [00:00<00:02, 90.55it/s]\u001b[A\n",
      " 23%|██▎       | 56/242 [00:00<00:02, 78.23it/s]\u001b[A\n",
      " 27%|██▋       | 66/242 [00:00<00:02, 82.93it/s]\u001b[A\n",
      " 31%|███▏      | 76/242 [00:00<00:01, 87.02it/s]\u001b[A\n",
      " 36%|███▌      | 86/242 [00:00<00:01, 88.72it/s]\u001b[A\n",
      " 40%|███▉      | 96/242 [00:01<00:01, 90.99it/s]\u001b[A\n",
      " 44%|████▍     | 106/242 [00:01<00:01, 92.63it/s]\u001b[A\n",
      " 48%|████▊     | 116/242 [00:01<00:01, 93.73it/s]\u001b[A\n",
      " 52%|█████▏    | 126/242 [00:01<00:01, 94.53it/s]\u001b[A\n",
      " 56%|█████▌    | 136/242 [00:01<00:01, 95.05it/s]\u001b[A\n",
      " 60%|██████    | 146/242 [00:01<00:01, 94.44it/s]\u001b[A\n",
      " 64%|██████▍   | 156/242 [00:01<00:00, 94.91it/s]\u001b[A\n",
      " 69%|██████▊   | 166/242 [00:01<00:00, 94.01it/s]\u001b[A\n",
      " 73%|███████▎  | 176/242 [00:01<00:00, 94.30it/s]\u001b[A\n",
      " 77%|███████▋  | 186/242 [00:02<00:00, 94.37it/s]\u001b[A\n",
      " 81%|████████  | 196/242 [00:02<00:00, 94.24it/s]\u001b[A\n",
      " 85%|████████▌ | 206/242 [00:02<00:00, 93.53it/s]\u001b[A\n",
      " 89%|████████▉ | 216/242 [00:02<00:00, 93.46it/s]\u001b[A\n",
      " 93%|█████████▎| 226/242 [00:02<00:00, 94.50it/s]\u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 91.93it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 35%|███▍      | 1000/2883 [00:30<07:50,  4.00it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.26058631921824105\n",
      "f1: 0.6868808662543182\n",
      "Test Loss: 0.003925, Acc: 0.762397\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 52%|█████▏    | 1497/2883 [00:41<00:32, 43.17it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▍         | 11/242 [00:00<00:02, 107.02it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.4929\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  9%|▉         | 22/242 [00:00<00:02, 107.74it/s]\u001b[A\n",
      " 14%|█▎        | 33/242 [00:00<00:01, 108.04it/s]\u001b[A\n",
      " 18%|█▊        | 44/242 [00:00<00:01, 108.53it/s]\u001b[A\n",
      " 23%|██▎       | 55/242 [00:00<00:01, 107.71it/s]\u001b[A\n",
      " 27%|██▋       | 66/242 [00:00<00:01, 108.39it/s]\u001b[A\n",
      " 32%|███▏      | 77/242 [00:00<00:01, 108.02it/s]\u001b[A\n",
      " 36%|███▋      | 88/242 [00:00<00:01, 108.53it/s]\u001b[A\n",
      " 41%|████      | 99/242 [00:00<00:01, 108.32it/s]\u001b[A\n",
      " 46%|████▌     | 111/242 [00:01<00:01, 109.50it/s]\u001b[A\n",
      " 51%|█████     | 123/242 [00:01<00:01, 109.97it/s]\u001b[A\n",
      " 56%|█████▌    | 135/242 [00:01<00:00, 110.65it/s]\u001b[A\n",
      " 61%|██████    | 147/242 [00:01<00:00, 110.83it/s]\u001b[A\n",
      " 66%|██████▌   | 159/242 [00:01<00:00, 111.15it/s]\u001b[A\n",
      " 71%|███████   | 171/242 [00:01<00:00, 111.81it/s]\u001b[A\n",
      " 76%|███████▌  | 183/242 [00:01<00:00, 112.36it/s]\u001b[A\n",
      " 81%|████████  | 195/242 [00:01<00:00, 112.12it/s]\u001b[A\n",
      " 86%|████████▌ | 207/242 [00:01<00:00, 112.32it/s]\u001b[A\n",
      " 90%|█████████ | 219/242 [00:01<00:00, 111.40it/s]\u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 110.15it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n",
      "em: 0.25790381299099446\n",
      "f1: 0.6879520439110485\n",
      "Test Loss: 0.003922, Acc: 0.762429\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 69%|██████▉   | 1996/2883 [00:55<00:20, 43.88it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 9/242 [00:00<00:02, 81.14it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.4134\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 18/242 [00:00<00:02, 81.64it/s]\u001b[A\n",
      " 11%|█         | 27/242 [00:00<00:02, 81.98it/s]\u001b[A\n",
      " 15%|█▍        | 36/242 [00:00<00:02, 82.51it/s]\u001b[A\n",
      " 19%|█▊        | 45/242 [00:00<00:02, 84.53it/s]\u001b[A\n",
      " 23%|██▎       | 55/242 [00:00<00:02, 88.07it/s]\u001b[A\n",
      " 27%|██▋       | 65/242 [00:00<00:01, 89.79it/s]\u001b[A\n",
      " 31%|███       | 75/242 [00:00<00:01, 91.96it/s]\u001b[A\n",
      " 35%|███▌      | 85/242 [00:00<00:01, 93.54it/s]\u001b[A\n",
      " 39%|███▉      | 95/242 [00:01<00:01, 94.98it/s]\u001b[A\n",
      " 43%|████▎     | 105/242 [00:01<00:01, 83.70it/s]\u001b[A\n",
      " 48%|████▊     | 116/242 [00:01<00:01, 88.37it/s]\u001b[A\n",
      " 52%|█████▏    | 126/242 [00:01<00:01, 90.59it/s]\u001b[A\n",
      " 56%|█████▌    | 136/242 [00:01<00:01, 93.19it/s]\u001b[A\n",
      " 60%|██████    | 146/242 [00:01<00:01, 94.32it/s]\u001b[A\n",
      " 64%|██████▍   | 156/242 [00:01<00:00, 95.23it/s]\u001b[A\n",
      " 69%|██████▊   | 166/242 [00:01<00:00, 96.47it/s]\u001b[A\n",
      " 73%|███████▎  | 177/242 [00:01<00:00, 97.61it/s]\u001b[A\n",
      " 78%|███████▊  | 188/242 [00:02<00:00, 97.68it/s]\u001b[A\n",
      " 82%|████████▏ | 198/242 [00:02<00:00, 92.34it/s]\u001b[A\n",
      " 86%|████████▋ | 209/242 [00:02<00:00, 94.56it/s]\u001b[A\n",
      " 91%|█████████ | 220/242 [00:02<00:00, 96.60it/s]\u001b[A\n",
      " 95%|█████████▌| 231/242 [00:02<00:00, 98.43it/s]\u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 93.03it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 69%|██████▉   | 2001/2883 [01:00<04:20,  3.39it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.254071661237785\n",
      "f1: 0.6856271498827617\n",
      "Test Loss: 0.003912, Acc: 0.760363\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 87%|████████▋ | 2495/2883 [01:10<00:08, 43.68it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 10/242 [00:00<00:02, 90.77it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.4607\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 20/242 [00:00<00:02, 91.86it/s]\u001b[A\n",
      " 12%|█▏        | 29/242 [00:00<00:02, 91.12it/s]\u001b[A\n",
      " 16%|█▌        | 39/242 [00:00<00:02, 92.74it/s]\u001b[A\n",
      " 19%|█▉        | 47/242 [00:00<00:02, 85.64it/s]\u001b[A\n",
      " 24%|██▎       | 57/242 [00:00<00:02, 87.97it/s]\u001b[A\n",
      " 28%|██▊       | 68/242 [00:00<00:01, 93.57it/s]\u001b[A\n",
      " 33%|███▎      | 79/242 [00:00<00:01, 97.58it/s]\u001b[A\n",
      " 37%|███▋      | 90/242 [00:00<00:01, 100.76it/s]\u001b[A\n",
      " 42%|████▏     | 102/242 [00:01<00:01, 103.48it/s]\u001b[A\n",
      " 47%|████▋     | 114/242 [00:01<00:01, 105.52it/s]\u001b[A\n",
      " 52%|█████▏    | 125/242 [00:01<00:01, 106.83it/s]\u001b[A\n",
      " 57%|█████▋    | 137/242 [00:01<00:00, 108.06it/s]\u001b[A\n",
      " 62%|██████▏   | 149/242 [00:01<00:00, 109.17it/s]\u001b[A\n",
      " 67%|██████▋   | 161/242 [00:01<00:00, 110.37it/s]\u001b[A\n",
      " 71%|███████▏  | 173/242 [00:01<00:00, 110.74it/s]\u001b[A\n",
      " 76%|███████▋  | 185/242 [00:01<00:00, 110.95it/s]\u001b[A\n",
      " 81%|████████▏ | 197/242 [00:01<00:00, 111.31it/s]\u001b[A\n",
      " 86%|████████▋ | 209/242 [00:02<00:00, 110.86it/s]\u001b[A\n",
      " 91%|█████████▏| 221/242 [00:02<00:00, 110.67it/s]\u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 104.97it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 87%|████████▋ | 2500/2883 [01:14<01:45,  3.64it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2603947116305806\n",
      "f1: 0.6939843788665491\n",
      "Test Loss: 0.003907, Acc: 0.762106\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 2883/2883 [01:23<00:00, 34.47it/s]\n",
      "  4%|▍         | 10/242 [00:00<00:02, 92.25it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 242/242 [00:02<00:00, 97.39it/s] \n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/2883 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2548380915884269\n",
      "f1: 0.6496484705816034\n",
      "Test Loss: 0.003966, Acc: 0.759524\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 17%|█▋        | 495/2883 [00:09<00:46, 51.08it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▍         | 11/242 [00:00<00:02, 107.97it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.4581\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  9%|▉         | 22/242 [00:00<00:02, 106.04it/s]\u001b[A\n",
      " 14%|█▎        | 33/242 [00:00<00:01, 105.33it/s]\u001b[A\n",
      " 18%|█▊        | 44/242 [00:00<00:01, 106.34it/s]\u001b[A\n",
      " 23%|██▎       | 55/242 [00:00<00:01, 107.31it/s]\u001b[A\n",
      " 27%|██▋       | 66/242 [00:00<00:01, 107.80it/s]\u001b[A\n",
      " 32%|███▏      | 77/242 [00:00<00:01, 108.02it/s]\u001b[A\n",
      " 37%|███▋      | 89/242 [00:00<00:01, 108.69it/s]\u001b[A\n",
      " 41%|████▏     | 100/242 [00:00<00:01, 108.51it/s]\u001b[A\n",
      " 46%|████▌     | 111/242 [00:01<00:01, 108.81it/s]\u001b[A\n",
      " 50%|█████     | 122/242 [00:01<00:01, 108.26it/s]\u001b[A\n",
      " 55%|█████▍    | 133/242 [00:01<00:01, 108.55it/s]\u001b[A\n",
      " 60%|█████▉    | 144/242 [00:01<00:01, 93.41it/s] \u001b[A\n",
      " 64%|██████▍   | 155/242 [00:01<00:00, 97.20it/s]\u001b[A\n",
      " 69%|██████▊   | 166/242 [00:01<00:00, 100.40it/s]\u001b[A\n",
      " 73%|███████▎  | 177/242 [00:01<00:00, 102.70it/s]\u001b[A\n",
      " 78%|███████▊  | 188/242 [00:01<00:00, 95.17it/s] \u001b[A\n",
      " 82%|████████▏ | 198/242 [00:01<00:00, 95.50it/s]\u001b[A\n",
      " 86%|████████▌ | 208/242 [00:02<00:00, 96.09it/s]\u001b[A\n",
      " 90%|█████████ | 218/242 [00:02<00:00, 95.98it/s]\u001b[A\n",
      " 94%|█████████▍| 228/242 [00:02<00:00, 96.20it/s]\u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 101.17it/s][A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 17%|█▋        | 501/2883 [00:15<10:39,  3.73it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2588618509292968\n",
      "f1: 0.6909703749232489\n",
      "Test Loss: 0.003886, Acc: 0.763107\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 35%|███▍      | 995/2883 [00:26<00:37, 50.90it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 10/242 [00:00<00:02, 94.11it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4545\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 20/242 [00:00<00:02, 94.43it/s]\u001b[A\n",
      " 12%|█▏        | 30/242 [00:00<00:02, 95.67it/s]\u001b[A\n",
      " 17%|█▋        | 40/242 [00:00<00:02, 95.66it/s]\u001b[A\n",
      " 21%|██        | 50/242 [00:00<00:01, 96.48it/s]\u001b[A\n",
      " 25%|██▍       | 60/242 [00:00<00:01, 96.46it/s]\u001b[A\n",
      " 29%|██▉       | 70/242 [00:00<00:01, 96.98it/s]\u001b[A\n",
      " 33%|███▎      | 80/242 [00:00<00:01, 96.55it/s]\u001b[A\n",
      " 37%|███▋      | 90/242 [00:00<00:01, 97.22it/s]\u001b[A\n",
      " 41%|████▏     | 100/242 [00:01<00:01, 97.95it/s]\u001b[A\n",
      " 46%|████▌     | 111/242 [00:01<00:01, 98.93it/s]\u001b[A\n",
      " 50%|█████     | 122/242 [00:01<00:01, 99.11it/s]\u001b[A\n",
      " 55%|█████▍    | 132/242 [00:01<00:01, 97.92it/s]\u001b[A\n",
      " 59%|█████▊    | 142/242 [00:01<00:01, 97.69it/s]\u001b[A\n",
      " 63%|██████▎   | 152/242 [00:01<00:00, 97.87it/s]\u001b[A\n",
      " 67%|██████▋   | 162/242 [00:01<00:00, 98.01it/s]\u001b[A\n",
      " 71%|███████   | 172/242 [00:01<00:00, 97.80it/s]\u001b[A\n",
      " 75%|███████▌  | 182/242 [00:01<00:00, 98.24it/s]\u001b[A\n",
      " 79%|███████▉  | 192/242 [00:01<00:00, 98.26it/s]\u001b[A\n",
      " 83%|████████▎ | 202/242 [00:02<00:00, 98.11it/s]\u001b[A\n",
      " 88%|████████▊ | 212/242 [00:02<00:00, 96.62it/s]\u001b[A\n",
      " 92%|█████████▏| 222/242 [00:02<00:00, 91.64it/s]\u001b[A\n",
      " 96%|█████████▌| 232/242 [00:02<00:00, 93.55it/s]\u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 96.61it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 35%|███▍      | 1001/2883 [00:36<16:21,  1.92it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2567541674650316\n",
      "f1: 0.6926316237678655\n",
      "Test Loss: 0.003920, Acc: 0.761912\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 52%|█████▏    | 1499/2883 [00:46<00:30, 45.11it/s]\n",
      "  0%|          | 0/242 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 10/242 [00:00<00:02, 94.34it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.4901\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  9%|▊         | 21/242 [00:00<00:02, 97.42it/s]\u001b[A\n",
      " 13%|█▎        | 32/242 [00:00<00:02, 100.75it/s]\u001b[A\n",
      " 18%|█▊        | 43/242 [00:00<00:01, 103.09it/s]\u001b[A\n",
      " 22%|██▏       | 54/242 [00:00<00:01, 103.83it/s]\u001b[A\n",
      " 27%|██▋       | 65/242 [00:00<00:01, 104.30it/s]\u001b[A\n",
      " 32%|███▏      | 77/242 [00:00<00:01, 106.15it/s]\u001b[A\n",
      " 36%|███▋      | 88/242 [00:00<00:01, 107.23it/s]\u001b[A\n",
      " 41%|████▏     | 100/242 [00:00<00:01, 109.00it/s]\u001b[A\n",
      " 46%|████▌     | 111/242 [00:01<00:01, 108.79it/s]\u001b[A\n",
      " 50%|█████     | 122/242 [00:01<00:01, 106.92it/s]\u001b[A\n",
      " 55%|█████▌    | 134/242 [00:01<00:00, 108.16it/s]\u001b[A\n",
      " 60%|█████▉    | 145/242 [00:01<00:00, 107.95it/s]\u001b[A\n",
      " 64%|██████▍   | 156/242 [00:01<00:00, 107.66it/s]\u001b[A\n",
      " 69%|██████▉   | 167/242 [00:01<00:00, 108.09it/s]\u001b[A\n",
      " 74%|███████▎  | 178/242 [00:01<00:00, 107.63it/s]\u001b[A\n",
      " 78%|███████▊  | 189/242 [00:01<00:00, 107.25it/s]\u001b[A\n",
      " 83%|████████▎ | 201/242 [00:01<00:00, 108.40it/s]\u001b[A\n",
      " 88%|████████▊ | 213/242 [00:01<00:00, 109.24it/s]\u001b[A\n",
      " 93%|█████████▎| 224/242 [00:02<00:00, 109.36it/s]\u001b[A\n",
      "100%|██████████| 242/242 [00:02<00:00, 105.19it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 30976 dev_label_list: 30976 example_id_list: 30976\n",
      "num: 30976\n",
      "n: 5219\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 52%|█████▏    | 1504/2883 [00:50<06:19,  3.63it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.25330523088714313\n",
      "f1: 0.6926314716983514\n",
      "Test Loss: 0.003936, Acc: 0.760815\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 53%|█████▎    | 1527/2883 [00:50<00:45, 29.97it/s]\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-15-250ebbbd6b68>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m#sent10verb4\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0mepoch_num\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mtrain_code_confing\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_loader\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdev_loader\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mepoch_num\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m<ipython-input-12-36cf930af26f>\u001b[0m in \u001b[0;36mtrain_code_confing\u001b[0;34m(train_loader, dev_loader, epoch_num)\u001b[0m\n\u001b[1;32m     76\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     77\u001b[0m                 \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mzero_grad\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 78\u001b[0;31m                 \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     79\u001b[0m                 \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     80\u001b[0m                 \u001b[0mepoch\u001b[0m\u001b[0;34m+=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.conda/envs/learn_py3/lib/python3.7/site-packages/torch/tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph)\u001b[0m\n\u001b[1;32m    116\u001b[0m                 \u001b[0mproducts\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mDefaults\u001b[0m \u001b[0mto\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    117\u001b[0m         \"\"\"\n\u001b[0;32m--> 118\u001b[0;31m         \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    119\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    120\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mregister_hook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.conda/envs/learn_py3/lib/python3.7/site-packages/torch/autograd/__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables)\u001b[0m\n\u001b[1;32m     91\u001b[0m     Variable._execution_engine.run_backward(\n\u001b[1;32m     92\u001b[0m         \u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrad_tensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 93\u001b[0;31m         allow_unreachable=True)  # allow_unreachable flag\n\u001b[0m\u001b[1;32m     94\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     95\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "#sent10verb4\n",
    "epoch_num = 10\n",
    "train_code_confing(train_loader,dev_loader,epoch_num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 159,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 28%|██▊       | 499/1782 [00:09<00:22, 56.48it/s]\n",
      "  0%|          | 0/157 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 10/157 [00:00<00:01, 92.90it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.5318\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 14%|█▍        | 22/157 [00:00<00:01, 97.86it/s]\u001b[A\n",
      " 21%|██        | 33/157 [00:00<00:01, 100.15it/s]\u001b[A\n",
      " 28%|██▊       | 44/157 [00:00<00:01, 102.85it/s]\u001b[A\n",
      " 35%|███▌      | 55/157 [00:00<00:00, 104.85it/s]\u001b[A\n",
      " 43%|████▎     | 67/157 [00:00<00:00, 106.67it/s]\u001b[A\n",
      " 50%|████▉     | 78/157 [00:00<00:00, 107.65it/s]\u001b[A\n",
      " 57%|█████▋    | 89/157 [00:00<00:00, 107.92it/s]\u001b[A\n",
      " 64%|██████▎   | 100/157 [00:00<00:00, 107.29it/s]\u001b[A\n",
      " 71%|███████▏  | 112/157 [00:01<00:00, 108.53it/s]\u001b[A\n",
      " 78%|███████▊  | 123/157 [00:01<00:00, 108.64it/s]\u001b[A\n",
      " 86%|████████▌ | 135/157 [00:01<00:00, 109.50it/s]\u001b[A\n",
      "100%|██████████| 157/157 [00:01<00:00, 108.54it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 28%|██▊       | 505/1782 [00:12<03:20,  6.36it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.27634194831013914\n",
      "f1: 0.6993871239682142\n",
      "Test Loss: 0.003956, Acc: 0.761395\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 56%|█████▌    | 997/1782 [00:21<00:14, 54.23it/s]\n",
      "  0%|          | 0/157 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 12/157 [00:00<00:01, 116.26it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.5245\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 16%|█▌        | 25/157 [00:00<00:01, 117.81it/s]\u001b[A\n",
      " 24%|██▍       | 38/157 [00:00<00:01, 118.80it/s]\u001b[A\n",
      " 32%|███▏      | 51/157 [00:00<00:00, 119.51it/s]\u001b[A\n",
      " 41%|████      | 64/157 [00:00<00:00, 121.03it/s]\u001b[A\n",
      " 48%|████▊     | 76/157 [00:00<00:00, 119.63it/s]\u001b[A\n",
      " 57%|█████▋    | 89/157 [00:00<00:00, 120.96it/s]\u001b[A\n",
      " 64%|██████▍   | 101/157 [00:00<00:00, 120.47it/s]\u001b[A\n",
      " 72%|███████▏  | 113/157 [00:00<00:00, 119.27it/s]\u001b[A\n",
      " 80%|███████▉  | 125/157 [00:01<00:00, 119.46it/s]\u001b[A\n",
      " 88%|████████▊ | 138/157 [00:01<00:00, 119.97it/s]\u001b[A\n",
      "100%|██████████| 157/157 [00:01<00:00, 119.76it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 56%|█████▋    | 1003/1782 [00:24<01:53,  6.85it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.25646123260437376\n",
      "f1: 0.7216016824224785\n",
      "Test Loss: 0.003989, Acc: 0.756021\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 84%|████████▍ | 1495/1782 [00:33<00:05, 51.32it/s]\n",
      "  0%|          | 0/157 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▌         | 8/157 [00:00<00:02, 71.79it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.4509\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 20/157 [00:00<00:01, 80.28it/s]\u001b[A\n",
      " 20%|██        | 32/157 [00:00<00:01, 88.35it/s]\u001b[A\n",
      " 28%|██▊       | 44/157 [00:00<00:01, 94.45it/s]\u001b[A\n",
      " 36%|███▌      | 56/157 [00:00<00:01, 100.47it/s]\u001b[A\n",
      " 43%|████▎     | 68/157 [00:00<00:00, 104.18it/s]\u001b[A\n",
      " 52%|█████▏    | 81/157 [00:00<00:00, 108.85it/s]\u001b[A\n",
      " 59%|█████▉    | 93/157 [00:00<00:00, 110.59it/s]\u001b[A\n",
      " 67%|██████▋   | 105/157 [00:00<00:00, 113.11it/s]\u001b[A\n",
      " 75%|███████▍  | 117/157 [00:01<00:00, 113.02it/s]\u001b[A\n",
      " 82%|████████▏ | 129/157 [00:01<00:00, 114.69it/s]\u001b[A\n",
      " 90%|████████▉ | 141/157 [00:01<00:00, 114.75it/s]\u001b[A\n",
      "100%|██████████| 157/157 [00:01<00:00, 112.20it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 85%|████████▍ | 1507/1782 [00:36<00:30,  8.99it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.24424879295654645\n",
      "f1: 0.7266191963863139\n",
      "Test Loss: 0.004080, Acc: 0.748855\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 1782/1782 [00:41<00:00, 42.88it/s]\n",
      "  7%|▋         | 11/157 [00:00<00:01, 107.02it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 157/157 [00:01<00:00, 112.26it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|          | 7/1782 [00:00<00:28, 62.61it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2760579380857711\n",
      "f1: 0.6980157603134082\n",
      "Test Loss: 0.003901, Acc: 0.762540\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 28%|██▊       | 498/1782 [00:08<00:20, 61.21it/s]\n",
      "  0%|          | 0/157 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 12/157 [00:00<00:01, 114.01it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.5071\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 15%|█▌        | 24/157 [00:00<00:01, 114.81it/s]\u001b[A\n",
      " 23%|██▎       | 36/157 [00:00<00:01, 115.08it/s]\u001b[A\n",
      " 31%|███       | 48/157 [00:00<00:00, 115.94it/s]\u001b[A\n",
      " 38%|███▊      | 60/157 [00:00<00:00, 116.44it/s]\u001b[A\n",
      " 46%|████▌     | 72/157 [00:00<00:00, 116.05it/s]\u001b[A\n",
      " 54%|█████▎    | 84/157 [00:00<00:00, 117.10it/s]\u001b[A\n",
      " 61%|██████    | 96/157 [00:00<00:00, 117.67it/s]\u001b[A\n",
      " 69%|██████▉   | 108/157 [00:00<00:00, 117.74it/s]\u001b[A\n",
      " 76%|███████▋  | 120/157 [00:01<00:00, 118.15it/s]\u001b[A\n",
      " 84%|████████▍ | 132/157 [00:01<00:00, 118.15it/s]\u001b[A\n",
      " 92%|█████████▏| 144/157 [00:01<00:00, 116.94it/s]\u001b[A\n",
      "100%|██████████| 157/157 [00:01<00:00, 117.02it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 28%|██▊       | 505/1782 [00:11<02:44,  7.76it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2797500710025561\n",
      "f1: 0.7037135463860882\n",
      "Test Loss: 0.003893, Acc: 0.763137\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 56%|█████▌    | 997/1782 [00:19<00:14, 54.11it/s]\n",
      "  0%|          | 0/157 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 12/157 [00:00<00:01, 118.14it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4883\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 15%|█▌        | 24/157 [00:00<00:01, 118.68it/s]\u001b[A\n",
      " 23%|██▎       | 36/157 [00:00<00:01, 118.45it/s]\u001b[A\n",
      " 31%|███       | 48/157 [00:00<00:00, 118.35it/s]\u001b[A\n",
      " 38%|███▊      | 60/157 [00:00<00:00, 118.82it/s]\u001b[A\n",
      " 46%|████▌     | 72/157 [00:00<00:00, 118.12it/s]\u001b[A\n",
      " 54%|█████▎    | 84/157 [00:00<00:00, 118.19it/s]\u001b[A\n",
      " 61%|██████    | 96/157 [00:00<00:00, 118.73it/s]\u001b[A\n",
      " 69%|██████▉   | 109/157 [00:00<00:00, 119.99it/s]\u001b[A\n",
      " 77%|███████▋  | 121/157 [00:01<00:00, 119.84it/s]\u001b[A\n",
      " 85%|████████▌ | 134/157 [00:01<00:00, 120.33it/s]\u001b[A\n",
      "100%|██████████| 157/157 [00:01<00:00, 119.42it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 56%|█████▋    | 1003/1782 [00:22<01:53,  6.86it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2732178358420903\n",
      "f1: 0.6792512498703964\n",
      "Test Loss: 0.003907, Acc: 0.761943\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 84%|████████▍ | 1495/1782 [00:31<00:05, 53.28it/s]\n",
      "  0%|          | 0/157 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 13/157 [00:00<00:01, 121.71it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.4434\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 16%|█▌        | 25/157 [00:00<00:01, 120.83it/s]\u001b[A\n",
      " 24%|██▍       | 38/157 [00:00<00:00, 121.62it/s]\u001b[A\n",
      " 32%|███▏      | 50/157 [00:00<00:00, 120.30it/s]\u001b[A\n",
      " 40%|████      | 63/157 [00:00<00:00, 121.02it/s]\u001b[A\n",
      " 48%|████▊     | 76/157 [00:00<00:00, 122.30it/s]\u001b[A\n",
      " 57%|█████▋    | 89/157 [00:00<00:00, 123.10it/s]\u001b[A\n",
      " 65%|██████▍   | 102/157 [00:00<00:00, 122.48it/s]\u001b[A\n",
      " 73%|███████▎  | 115/157 [00:00<00:00, 122.73it/s]\u001b[A\n",
      " 82%|████████▏ | 128/157 [00:01<00:00, 122.67it/s]\u001b[A\n",
      " 90%|████████▉ | 141/157 [00:01<00:00, 122.35it/s]\u001b[A\n",
      "100%|██████████| 157/157 [00:01<00:00, 121.71it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 85%|████████▍ | 1507/1782 [00:34<00:28,  9.57it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.272081794944618\n",
      "f1: 0.720919832479055\n",
      "Test Loss: 0.003940, Acc: 0.759853\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 1782/1782 [00:39<00:00, 45.23it/s]\n",
      "  8%|▊         | 12/157 [00:00<00:01, 119.46it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 157/157 [00:01<00:00, 123.35it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|          | 6/1782 [00:00<00:33, 52.95it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.27378585629082647\n",
      "f1: 0.7215527695505041\n",
      "Test Loss: 0.003928, Acc: 0.759206\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 28%|██▊       | 496/1782 [00:09<00:23, 55.90it/s]\n",
      "  0%|          | 0/157 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 12/157 [00:00<00:01, 112.54it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.5068\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 15%|█▌        | 24/157 [00:00<00:01, 112.28it/s]\u001b[A\n",
      " 22%|██▏       | 35/157 [00:00<00:01, 104.50it/s]\u001b[A\n",
      " 28%|██▊       | 44/157 [00:00<00:01, 98.56it/s] \u001b[A\n",
      " 36%|███▌      | 56/157 [00:00<00:00, 102.13it/s]\u001b[A\n",
      " 43%|████▎     | 68/157 [00:00<00:00, 105.15it/s]\u001b[A\n",
      " 50%|█████     | 79/157 [00:00<00:00, 106.15it/s]\u001b[A\n",
      " 57%|█████▋    | 90/157 [00:00<00:00, 106.05it/s]\u001b[A\n",
      " 65%|██████▍   | 102/157 [00:00<00:00, 107.38it/s]\u001b[A\n",
      " 73%|███████▎  | 114/157 [00:01<00:00, 108.54it/s]\u001b[A\n",
      " 80%|████████  | 126/157 [00:01<00:00, 109.39it/s]\u001b[A\n",
      " 88%|████████▊ | 138/157 [00:01<00:00, 110.21it/s]\u001b[A\n",
      "100%|██████████| 157/157 [00:01<00:00, 107.39it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 28%|██▊       | 502/1782 [00:11<03:15,  6.55it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2664015904572565\n",
      "f1: 0.7261301291406432\n",
      "Test Loss: 0.003956, Acc: 0.758708\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 56%|█████▌    | 994/1782 [00:20<00:14, 54.18it/s]\n",
      "  0%|          | 0/157 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 12/157 [00:00<00:01, 114.13it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4801\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 15%|█▌        | 24/157 [00:00<00:01, 113.37it/s]\u001b[A\n",
      " 23%|██▎       | 36/157 [00:00<00:01, 113.34it/s]\u001b[A\n",
      " 31%|███       | 48/157 [00:00<00:00, 112.67it/s]\u001b[A\n",
      " 38%|███▊      | 60/157 [00:00<00:00, 112.74it/s]\u001b[A\n",
      " 46%|████▌     | 72/157 [00:00<00:00, 113.46it/s]\u001b[A\n",
      " 54%|█████▎    | 84/157 [00:00<00:00, 115.34it/s]\u001b[A\n",
      " 61%|██████    | 96/157 [00:00<00:00, 115.20it/s]\u001b[A\n",
      " 69%|██████▉   | 108/157 [00:00<00:00, 114.74it/s]\u001b[A\n",
      " 76%|███████▋  | 120/157 [00:01<00:00, 115.43it/s]\u001b[A\n",
      " 84%|████████▍ | 132/157 [00:01<00:00, 115.54it/s]\u001b[A\n",
      " 92%|█████████▏| 144/157 [00:01<00:00, 102.29it/s]\u001b[A\n",
      "100%|██████████| 157/157 [00:01<00:00, 110.84it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 56%|█████▋    | 1006/1782 [00:23<01:25,  9.05it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.28145413234876454\n",
      "f1: 0.7060054638157502\n",
      "Test Loss: 0.003890, Acc: 0.764082\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 84%|████████▍ | 1499/1782 [00:32<00:04, 58.38it/s]\n",
      "  0%|          | 0/157 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 12/157 [00:00<00:01, 116.87it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.4423\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 15%|█▌        | 24/157 [00:00<00:01, 115.60it/s]\u001b[A\n",
      " 24%|██▎       | 37/157 [00:00<00:01, 117.36it/s]\u001b[A\n",
      " 31%|███       | 49/157 [00:00<00:00, 116.65it/s]\u001b[A\n",
      " 39%|███▉      | 61/157 [00:00<00:00, 117.13it/s]\u001b[A\n",
      " 46%|████▋     | 73/157 [00:00<00:00, 117.03it/s]\u001b[A\n",
      " 54%|█████▍    | 85/157 [00:00<00:00, 117.21it/s]\u001b[A\n",
      " 62%|██████▏   | 97/157 [00:00<00:00, 116.36it/s]\u001b[A\n",
      " 69%|██████▉   | 109/157 [00:00<00:00, 115.12it/s]\u001b[A\n",
      " 77%|███████▋  | 121/157 [00:01<00:00, 115.76it/s]\u001b[A\n",
      " 85%|████████▍ | 133/157 [00:01<00:00, 115.62it/s]\u001b[A\n",
      " 92%|█████████▏| 145/157 [00:01<00:00, 116.65it/s]\u001b[A\n",
      "100%|██████████| 157/157 [00:01<00:00, 116.19it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 84%|████████▍ | 1505/1782 [00:35<00:39,  6.94it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.27577392786140303\n",
      "f1: 0.7204627112607862\n",
      "Test Loss: 0.003917, Acc: 0.760947\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 1782/1782 [00:40<00:00, 44.24it/s]\n",
      "  8%|▊         | 13/157 [00:00<00:01, 125.18it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 157/157 [00:01<00:00, 125.33it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/1782 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.27776199943197954\n",
      "f1: 0.7137456440495404\n",
      "Test Loss: 0.003889, Acc: 0.762291\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 28%|██▊       | 494/1782 [00:09<00:23, 54.00it/s]\n",
      "  0%|          | 0/157 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 12/157 [00:00<00:01, 116.35it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.499\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 15%|█▌        | 24/157 [00:00<00:01, 116.43it/s]\u001b[A\n",
      " 23%|██▎       | 36/157 [00:00<00:01, 116.16it/s]\u001b[A\n",
      " 31%|███       | 48/157 [00:00<00:00, 116.67it/s]\u001b[A\n",
      " 38%|███▊      | 60/157 [00:00<00:00, 116.04it/s]\u001b[A\n",
      " 46%|████▌     | 72/157 [00:00<00:00, 116.35it/s]\u001b[A\n",
      " 54%|█████▎    | 84/157 [00:00<00:00, 116.92it/s]\u001b[A\n",
      " 62%|██████▏   | 97/157 [00:00<00:00, 118.05it/s]\u001b[A\n",
      " 69%|██████▉   | 109/157 [00:00<00:00, 117.17it/s]\u001b[A\n",
      " 77%|███████▋  | 121/157 [00:01<00:00, 116.18it/s]\u001b[A\n",
      " 85%|████████▍ | 133/157 [00:01<00:00, 115.89it/s]\u001b[A\n",
      " 92%|█████████▏| 145/157 [00:01<00:00, 115.25it/s]\u001b[A\n",
      "100%|██████████| 157/157 [00:01<00:00, 113.78it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 28%|██▊       | 505/1782 [00:12<02:19,  9.18it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2874183470604942\n",
      "f1: 0.7050242761120401\n",
      "Test Loss: 0.003895, Acc: 0.765277\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 56%|█████▌    | 997/1782 [00:21<00:16, 48.06it/s]\n",
      "  0%|          | 0/157 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 13/157 [00:00<00:01, 122.62it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.481\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 20/157 [00:00<00:01, 95.37it/s] \u001b[A\n",
      " 21%|██        | 33/157 [00:00<00:01, 101.73it/s]\u001b[A\n",
      " 29%|██▉       | 46/157 [00:00<00:01, 106.74it/s]\u001b[A\n",
      " 37%|███▋      | 58/157 [00:00<00:00, 109.99it/s]\u001b[A\n",
      " 45%|████▌     | 71/157 [00:00<00:00, 113.22it/s]\u001b[A\n",
      " 54%|█████▎    | 84/157 [00:00<00:00, 116.29it/s]\u001b[A\n",
      " 61%|██████    | 96/157 [00:00<00:00, 116.73it/s]\u001b[A\n",
      " 69%|██████▉   | 108/157 [00:00<00:00, 115.71it/s]\u001b[A\n",
      " 76%|███████▋  | 120/157 [00:01<00:00, 116.87it/s]\u001b[A\n",
      " 84%|████████▍ | 132/157 [00:01<00:00, 116.65it/s]\u001b[A\n",
      " 92%|█████████▏| 144/157 [00:01<00:00, 116.30it/s]\u001b[A\n",
      "100%|██████████| 157/157 [00:01<00:00, 113.87it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 56%|█████▌    | 1002/1782 [00:24<02:15,  5.76it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2806021016756603\n",
      "f1: 0.7075817205609943\n",
      "Test Loss: 0.003890, Acc: 0.763535\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 84%|████████▍ | 1494/1782 [00:33<00:05, 54.75it/s]\n",
      "  0%|          | 0/157 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 13/157 [00:00<00:01, 120.58it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.4378\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 17%|█▋        | 26/157 [00:00<00:01, 120.74it/s]\u001b[A\n",
      " 24%|██▍       | 38/157 [00:00<00:00, 120.26it/s]\u001b[A\n",
      " 32%|███▏      | 51/157 [00:00<00:00, 121.10it/s]\u001b[A\n",
      " 41%|████      | 64/157 [00:00<00:00, 121.66it/s]\u001b[A\n",
      " 49%|████▉     | 77/157 [00:00<00:00, 122.31it/s]\u001b[A\n",
      " 57%|█████▋    | 90/157 [00:00<00:00, 122.46it/s]\u001b[A\n",
      " 65%|██████▍   | 102/157 [00:00<00:00, 121.17it/s]\u001b[A\n",
      " 73%|███████▎  | 115/157 [00:00<00:00, 121.97it/s]\u001b[A\n",
      " 82%|████████▏ | 128/157 [00:01<00:00, 121.96it/s]\u001b[A\n",
      " 90%|████████▉ | 141/157 [00:01<00:00, 122.17it/s]\u001b[A\n",
      "100%|██████████| 157/157 [00:01<00:00, 121.71it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 84%|████████▍ | 1505/1782 [00:36<00:29,  9.39it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.27037773359840955\n",
      "f1: 0.7230438232284367\n",
      "Test Loss: 0.003931, Acc: 0.759902\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 1782/1782 [00:41<00:00, 43.04it/s]\n",
      "  8%|▊         | 13/157 [00:00<00:01, 121.57it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 157/157 [00:01<00:00, 113.55it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/1782 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.27747798920761146\n",
      "f1: 0.7202997434891848\n",
      "Test Loss: 0.003904, Acc: 0.762689\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 28%|██▊       | 497/1782 [00:09<00:22, 56.13it/s]\n",
      "  0%|          | 0/157 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 12/157 [00:00<00:01, 114.06it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.4909\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 21/157 [00:00<00:01, 102.82it/s]\u001b[A\n",
      " 21%|██        | 33/157 [00:00<00:01, 105.40it/s]\u001b[A\n",
      " 29%|██▊       | 45/157 [00:00<00:01, 107.67it/s]\u001b[A\n",
      " 36%|███▋      | 57/157 [00:00<00:00, 109.00it/s]\u001b[A\n",
      " 44%|████▍     | 69/157 [00:00<00:00, 110.45it/s]\u001b[A\n",
      " 52%|█████▏    | 81/157 [00:00<00:00, 110.87it/s]\u001b[A\n",
      " 59%|█████▉    | 93/157 [00:00<00:00, 111.55it/s]\u001b[A\n",
      " 67%|██████▋   | 105/157 [00:00<00:00, 111.75it/s]\u001b[A\n",
      " 74%|███████▍  | 116/157 [00:01<00:00, 97.73it/s] \u001b[A\n",
      " 82%|████████▏ | 128/157 [00:01<00:00, 103.04it/s]\u001b[A\n",
      " 90%|████████▉ | 141/157 [00:01<00:00, 107.88it/s]\u001b[A\n",
      "100%|██████████| 157/157 [00:01<00:00, 108.65it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 28%|██▊       | 503/1782 [00:11<03:10,  6.72it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2885543879579665\n",
      "f1: 0.7150304522073954\n",
      "Test Loss: 0.003883, Acc: 0.766023\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 56%|█████▌    | 997/1782 [00:20<00:13, 56.08it/s]\n",
      "  0%|          | 0/157 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 13/157 [00:00<00:01, 122.40it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.481\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 17%|█▋        | 26/157 [00:00<00:01, 122.45it/s]\u001b[A\n",
      " 25%|██▍       | 39/157 [00:00<00:00, 122.24it/s]\u001b[A\n",
      " 33%|███▎      | 52/157 [00:00<00:00, 122.20it/s]\u001b[A\n",
      " 41%|████▏     | 65/157 [00:00<00:00, 122.13it/s]\u001b[A\n",
      " 50%|████▉     | 78/157 [00:00<00:00, 122.34it/s]\u001b[A\n",
      " 58%|█████▊    | 91/157 [00:00<00:00, 122.72it/s]\u001b[A\n",
      " 66%|██████▌   | 103/157 [00:00<00:00, 121.26it/s]\u001b[A\n",
      " 74%|███████▍  | 116/157 [00:00<00:00, 121.53it/s]\u001b[A\n",
      " 82%|████████▏ | 129/157 [00:01<00:00, 122.90it/s]\u001b[A\n",
      " 90%|█████████ | 142/157 [00:01<00:00, 122.92it/s]\u001b[A\n",
      "100%|██████████| 157/157 [00:01<00:00, 122.31it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 56%|█████▋    | 1003/1782 [00:23<01:50,  7.07it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2780460096563476\n",
      "f1: 0.6783104096509415\n",
      "Test Loss: 0.003922, Acc: 0.761843\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 84%|████████▍ | 1497/1782 [00:32<00:05, 55.10it/s]\n",
      "  0%|          | 0/157 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 12/157 [00:00<00:01, 119.40it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.4398\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 12%|█▏        | 19/157 [00:00<00:01, 96.55it/s] \u001b[A\n",
      " 20%|██        | 32/157 [00:00<00:01, 102.89it/s]\u001b[A\n",
      " 29%|██▊       | 45/157 [00:00<00:01, 107.88it/s]\u001b[A\n",
      " 37%|███▋      | 58/157 [00:00<00:00, 112.17it/s]\u001b[A\n",
      " 45%|████▌     | 71/157 [00:00<00:00, 114.29it/s]\u001b[A\n",
      " 54%|█████▎    | 84/157 [00:00<00:00, 116.50it/s]\u001b[A\n",
      " 62%|██████▏   | 97/157 [00:00<00:00, 118.25it/s]\u001b[A\n",
      " 70%|███████   | 110/157 [00:00<00:00, 119.51it/s]\u001b[A\n",
      " 78%|███████▊  | 123/157 [00:01<00:00, 120.15it/s]\u001b[A\n",
      " 87%|████████▋ | 136/157 [00:01<00:00, 120.63it/s]\u001b[A\n",
      "100%|██████████| 157/157 [00:01<00:00, 117.12it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 84%|████████▍ | 1503/1782 [00:35<00:40,  6.94it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.25788128372621416\n",
      "f1: 0.7279100904775497\n",
      "Test Loss: 0.003969, Acc: 0.757116\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 1782/1782 [00:40<00:00, 44.48it/s]\n",
      "  8%|▊         | 13/157 [00:00<00:01, 129.41it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 157/157 [00:01<00:00, 130.12it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|          | 6/1782 [00:00<00:31, 55.72it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.28997443907980686\n",
      "f1: 0.7052756026201116\n",
      "Test Loss: 0.003879, Acc: 0.765426\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 28%|██▊       | 494/1782 [00:08<00:22, 57.39it/s]\n",
      "  0%|          | 0/157 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 13/157 [00:00<00:01, 124.23it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.4914\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 17%|█▋        | 26/157 [00:00<00:01, 124.47it/s]\u001b[A\n",
      " 21%|██        | 33/157 [00:00<00:01, 99.65it/s] \u001b[A\n",
      " 27%|██▋       | 43/157 [00:00<00:01, 99.01it/s]\u001b[A\n",
      " 33%|███▎      | 52/157 [00:00<00:01, 96.10it/s]\u001b[A\n",
      " 41%|████▏     | 65/157 [00:00<00:00, 102.71it/s]\u001b[A\n",
      " 50%|████▉     | 78/157 [00:00<00:00, 108.06it/s]\u001b[A\n",
      " 58%|█████▊    | 91/157 [00:00<00:00, 111.50it/s]\u001b[A\n",
      " 66%|██████▌   | 104/157 [00:00<00:00, 114.43it/s]\u001b[A\n",
      " 75%|███████▍  | 117/157 [00:01<00:00, 116.55it/s]\u001b[A\n",
      " 83%|████████▎ | 130/157 [00:01<00:00, 118.56it/s]\u001b[A\n",
      " 91%|█████████ | 143/157 [00:01<00:00, 119.43it/s]\u001b[A\n",
      "100%|██████████| 157/157 [00:01<00:00, 113.88it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 28%|██▊       | 505/1782 [00:11<02:19,  9.18it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.27406986651519455\n",
      "f1: 0.7229705666229431\n",
      "Test Loss: 0.003916, Acc: 0.761545\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 56%|█████▌    | 996/1782 [00:20<00:13, 56.16it/s]\n",
      "  0%|          | 0/157 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 12/157 [00:00<00:01, 119.87it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4775\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 15%|█▌        | 24/157 [00:00<00:01, 119.50it/s]\u001b[A\n",
      " 23%|██▎       | 36/157 [00:00<00:01, 116.47it/s]\u001b[A\n",
      " 31%|███       | 48/157 [00:00<00:00, 116.12it/s]\u001b[A\n",
      " 38%|███▊      | 60/157 [00:00<00:00, 115.13it/s]\u001b[A\n",
      " 45%|████▌     | 71/157 [00:00<00:00, 111.89it/s]\u001b[A\n",
      " 52%|█████▏    | 82/157 [00:00<00:00, 111.13it/s]\u001b[A\n",
      " 59%|█████▉    | 93/157 [00:00<00:00, 109.18it/s]\u001b[A\n",
      " 67%|██████▋   | 105/157 [00:00<00:00, 109.91it/s]\u001b[A\n",
      " 75%|███████▍  | 117/157 [00:01<00:00, 110.15it/s]\u001b[A\n",
      " 82%|████████▏ | 129/157 [00:01<00:00, 111.09it/s]\u001b[A\n",
      " 89%|████████▉ | 140/157 [00:01<00:00, 109.00it/s]\u001b[A\n",
      "100%|██████████| 157/157 [00:01<00:00, 111.11it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 56%|█████▌    | 1002/1782 [00:23<01:56,  6.68it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.28628230616302186\n",
      "f1: 0.7068441955973962\n",
      "Test Loss: 0.003880, Acc: 0.765476\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 84%|████████▍ | 1495/1782 [00:32<00:04, 63.18it/s]\n",
      "  0%|          | 0/157 [00:00<?, ?it/s]\u001b[A\n",
      "  7%|▋         | 11/157 [00:00<00:01, 109.59it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.4331\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 15%|█▍        | 23/157 [00:00<00:01, 110.39it/s]\u001b[A\n",
      " 23%|██▎       | 36/157 [00:00<00:01, 113.26it/s]\u001b[A\n",
      " 31%|███       | 48/157 [00:00<00:00, 113.33it/s]\u001b[A\n",
      " 39%|███▉      | 61/157 [00:00<00:00, 116.63it/s]\u001b[A\n",
      " 47%|████▋     | 74/157 [00:00<00:00, 118.30it/s]\u001b[A\n",
      " 55%|█████▌    | 87/157 [00:00<00:00, 119.82it/s]\u001b[A\n",
      " 64%|██████▎   | 100/157 [00:00<00:00, 120.80it/s]\u001b[A\n",
      " 72%|███████▏  | 113/157 [00:00<00:00, 121.44it/s]\u001b[A\n",
      " 80%|████████  | 126/157 [00:01<00:00, 121.83it/s]\u001b[A\n",
      " 89%|████████▊ | 139/157 [00:01<00:00, 122.20it/s]\u001b[A\n",
      "100%|██████████| 157/157 [00:01<00:00, 119.83it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 84%|████████▍ | 1495/1782 [00:47<00:04, 63.18it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 85%|████████▍ | 1507/1782 [00:47<03:00,  1.52it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2717977847202499\n",
      "f1: 0.7207383815023747\n",
      "Test Loss: 0.003942, Acc: 0.759902\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 1782/1782 [00:52<00:00, 34.00it/s]\n",
      "  8%|▊         | 12/157 [00:00<00:01, 119.17it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 157/157 [00:01<00:00, 120.32it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|          | 6/1782 [00:00<00:33, 52.93it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.28628230616302186\n",
      "f1: 0.7024203982454532\n",
      "Test Loss: 0.003881, Acc: 0.764132\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 28%|██▊       | 496/1782 [00:09<00:24, 52.97it/s]\n",
      "  0%|          | 0/157 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 10/157 [00:00<00:01, 97.20it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.4881\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 20/157 [00:00<00:01, 96.13it/s]\u001b[A\n",
      " 20%|██        | 32/157 [00:00<00:01, 101.26it/s]\u001b[A\n",
      " 29%|██▊       | 45/157 [00:00<00:01, 106.45it/s]\u001b[A\n",
      " 36%|███▋      | 57/157 [00:00<00:00, 109.27it/s]\u001b[A\n",
      " 45%|████▍     | 70/157 [00:00<00:00, 113.34it/s]\u001b[A\n",
      " 52%|█████▏    | 82/157 [00:00<00:00, 115.17it/s]\u001b[A\n",
      " 61%|██████    | 95/157 [00:00<00:00, 118.00it/s]\u001b[A\n",
      " 68%|██████▊   | 107/157 [00:00<00:00, 116.37it/s]\u001b[A\n",
      " 76%|███████▌  | 119/157 [00:01<00:00, 116.85it/s]\u001b[A\n",
      " 83%|████████▎ | 131/157 [00:01<00:00, 103.56it/s]\u001b[A\n",
      " 91%|█████████ | 143/157 [00:01<00:00, 107.71it/s]\u001b[A\n",
      "100%|██████████| 157/157 [00:01<00:00, 111.93it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 28%|██▊       | 502/1782 [00:12<03:09,  6.74it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.28770235728486226\n",
      "f1: 0.6976001136040944\n",
      "Test Loss: 0.003882, Acc: 0.765774\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 56%|█████▌    | 998/1782 [00:21<00:13, 59.09it/s]\n",
      "  0%|          | 0/157 [00:00<?, ?it/s]\u001b[A\n",
      "  7%|▋         | 11/157 [00:00<00:01, 106.31it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4812\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 14%|█▍        | 22/157 [00:00<00:01, 104.97it/s]\u001b[A\n",
      " 21%|██        | 33/157 [00:00<00:01, 104.73it/s]\u001b[A\n",
      " 28%|██▊       | 44/157 [00:00<00:01, 103.84it/s]\u001b[A\n",
      " 35%|███▌      | 55/157 [00:00<00:00, 104.00it/s]\u001b[A\n",
      " 42%|████▏     | 66/157 [00:00<00:00, 104.78it/s]\u001b[A\n",
      " 49%|████▉     | 77/157 [00:00<00:00, 103.97it/s]\u001b[A\n",
      " 56%|█████▌    | 88/157 [00:00<00:00, 105.35it/s]\u001b[A\n",
      " 63%|██████▎   | 99/157 [00:00<00:00, 106.19it/s]\u001b[A\n",
      " 71%|███████   | 111/157 [00:01<00:00, 107.67it/s]\u001b[A\n",
      " 78%|███████▊  | 122/157 [00:01<00:00, 105.95it/s]\u001b[A\n",
      " 85%|████████▍ | 133/157 [00:01<00:00, 107.09it/s]\u001b[A\n",
      " 92%|█████████▏| 144/157 [00:01<00:00, 107.06it/s]\u001b[A\n",
      "100%|██████████| 157/157 [00:01<00:00, 105.52it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 56%|█████▋    | 1004/1782 [00:24<01:58,  6.56it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2709457540471457\n",
      "f1: 0.6639313777200774\n",
      "Test Loss: 0.003923, Acc: 0.759007\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 84%|████████▍ | 1495/1782 [00:33<00:05, 48.56it/s]\n",
      "  0%|          | 0/157 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 13/157 [00:00<00:01, 123.56it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.4481\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 16%|█▌        | 25/157 [00:00<00:01, 120.27it/s]\u001b[A\n",
      " 24%|██▍       | 38/157 [00:00<00:00, 120.84it/s]\u001b[A\n",
      " 32%|███▏      | 51/157 [00:00<00:00, 121.52it/s]\u001b[A\n",
      " 41%|████      | 64/157 [00:00<00:00, 122.02it/s]\u001b[A\n",
      " 48%|████▊     | 76/157 [00:00<00:00, 121.18it/s]\u001b[A\n",
      " 57%|█████▋    | 89/157 [00:00<00:00, 122.82it/s]\u001b[A\n",
      " 65%|██████▍   | 102/157 [00:00<00:00, 123.45it/s]\u001b[A\n",
      " 73%|███████▎  | 115/157 [00:00<00:00, 124.13it/s]\u001b[A\n",
      " 82%|████████▏ | 128/157 [00:01<00:00, 123.06it/s]\u001b[A\n",
      " 90%|████████▉ | 141/157 [00:01<00:00, 122.49it/s]\u001b[A\n",
      "100%|██████████| 157/157 [00:01<00:00, 121.77it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 85%|████████▍ | 1506/1782 [00:35<00:33,  8.16it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.26043737574552683\n",
      "f1: 0.7294214306000784\n",
      "Test Loss: 0.003972, Acc: 0.758808\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 1782/1782 [00:41<00:00, 43.30it/s]\n",
      "  8%|▊         | 12/157 [00:00<00:01, 119.50it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 157/157 [00:01<00:00, 119.85it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/1782 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.28259017324623686\n",
      "f1: 0.7159088552584778\n",
      "Test Loss: 0.003894, Acc: 0.763635\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 28%|██▊       | 498/1782 [00:09<00:23, 55.59it/s]\n",
      "  0%|          | 0/157 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 13/157 [00:00<00:01, 125.02it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.4888\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 16%|█▌        | 25/157 [00:00<00:01, 123.24it/s]\u001b[A\n",
      " 24%|██▎       | 37/157 [00:00<00:00, 121.84it/s]\u001b[A\n",
      " 29%|██▊       | 45/157 [00:00<00:01, 99.63it/s] \u001b[A\n",
      " 37%|███▋      | 58/157 [00:00<00:00, 106.30it/s]\u001b[A\n",
      " 45%|████▌     | 71/157 [00:00<00:00, 111.31it/s]\u001b[A\n",
      " 54%|█████▎    | 84/157 [00:00<00:00, 114.21it/s]\u001b[A\n",
      " 61%|██████    | 95/157 [00:00<00:00, 106.57it/s]\u001b[A\n",
      " 69%|██████▉   | 108/157 [00:00<00:00, 110.71it/s]\u001b[A\n",
      " 77%|███████▋  | 121/157 [00:01<00:00, 113.69it/s]\u001b[A\n",
      " 85%|████████▌ | 134/157 [00:01<00:00, 116.49it/s]\u001b[A\n",
      "100%|██████████| 157/157 [00:01<00:00, 115.42it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 28%|██▊       | 504/1782 [00:12<03:05,  6.88it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2800340812269242\n",
      "f1: 0.7062892486351783\n",
      "Test Loss: 0.003885, Acc: 0.764879\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 56%|█████▌    | 995/1782 [00:21<00:15, 52.45it/s]\n",
      "  0%|          | 0/157 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 12/157 [00:00<00:01, 116.37it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4877\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 15%|█▌        | 24/157 [00:00<00:01, 116.41it/s]\u001b[A\n",
      " 23%|██▎       | 36/157 [00:00<00:01, 115.25it/s]\u001b[A\n",
      " 31%|███       | 48/157 [00:00<00:00, 115.16it/s]\u001b[A\n",
      " 38%|███▊      | 59/157 [00:00<00:00, 112.81it/s]\u001b[A\n",
      " 45%|████▌     | 71/157 [00:00<00:00, 113.06it/s]\u001b[A\n",
      " 53%|█████▎    | 83/157 [00:00<00:00, 114.26it/s]\u001b[A\n",
      " 61%|██████    | 95/157 [00:00<00:00, 115.13it/s]\u001b[A\n",
      " 68%|██████▊   | 107/157 [00:00<00:00, 115.32it/s]\u001b[A\n",
      " 76%|███████▌  | 119/157 [00:01<00:00, 115.66it/s]\u001b[A\n",
      " 83%|████████▎ | 131/157 [00:01<00:00, 114.41it/s]\u001b[A\n",
      " 91%|█████████ | 143/157 [00:01<00:00, 114.51it/s]\u001b[A\n",
      "100%|██████████| 157/157 [00:01<00:00, 114.39it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 57%|█████▋    | 1007/1782 [00:24<01:24,  9.18it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.27918205055381995\n",
      "f1: 0.6727796486387837\n",
      "Test Loss: 0.003932, Acc: 0.761346\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 84%|████████▍ | 1499/1782 [00:33<00:05, 54.02it/s]\n",
      "  0%|          | 0/157 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 12/157 [00:00<00:01, 119.75it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.4397\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 15%|█▌        | 24/157 [00:00<00:01, 117.86it/s]\u001b[A\n",
      " 23%|██▎       | 36/157 [00:00<00:01, 117.66it/s]\u001b[A\n",
      " 31%|███       | 48/157 [00:00<00:00, 117.78it/s]\u001b[A\n",
      " 38%|███▊      | 60/157 [00:00<00:00, 118.43it/s]\u001b[A\n",
      " 46%|████▌     | 72/157 [00:00<00:00, 117.20it/s]\u001b[A\n",
      " 54%|█████▎    | 84/157 [00:00<00:00, 117.92it/s]\u001b[A\n",
      " 62%|██████▏   | 97/157 [00:00<00:00, 118.74it/s]\u001b[A\n",
      " 69%|██████▉   | 109/157 [00:00<00:00, 118.58it/s]\u001b[A\n",
      " 77%|███████▋  | 121/157 [00:01<00:00, 117.67it/s]\u001b[A\n",
      " 85%|████████▍ | 133/157 [00:01<00:00, 117.49it/s]\u001b[A\n",
      " 92%|█████████▏| 145/157 [00:01<00:00, 117.25it/s]\u001b[A\n",
      "100%|██████████| 157/157 [00:01<00:00, 117.48it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 84%|████████▍ | 1505/1782 [00:36<00:40,  6.87it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2510650383413803\n",
      "f1: 0.7296423274412536\n",
      "Test Loss: 0.004006, Acc: 0.754479\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 1782/1782 [00:41<00:00, 42.91it/s]\n",
      "  7%|▋         | 11/157 [00:00<00:01, 106.59it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 157/157 [00:01<00:00, 106.48it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/1782 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.28429423459244535\n",
      "f1: 0.7142165149691471\n",
      "Test Loss: 0.003890, Acc: 0.765326\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 28%|██▊       | 498/1782 [00:09<00:24, 52.94it/s]\n",
      "  0%|          | 0/157 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 12/157 [00:00<00:01, 114.63it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.4786\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 15%|█▌        | 24/157 [00:00<00:01, 102.22it/s]\u001b[A\n",
      " 22%|██▏       | 34/157 [00:00<00:01, 98.95it/s] \u001b[A\n",
      " 29%|██▉       | 46/157 [00:00<00:01, 103.05it/s]\u001b[A\n",
      " 38%|███▊      | 59/157 [00:00<00:00, 108.68it/s]\u001b[A\n",
      " 46%|████▌     | 72/157 [00:00<00:00, 113.65it/s]\u001b[A\n",
      " 54%|█████▍    | 85/157 [00:00<00:00, 117.53it/s]\u001b[A\n",
      " 62%|██████▏   | 98/157 [00:00<00:00, 119.94it/s]\u001b[A\n",
      " 71%|███████   | 111/157 [00:00<00:00, 121.34it/s]\u001b[A\n",
      " 79%|███████▉  | 124/157 [00:01<00:00, 122.62it/s]\u001b[A\n",
      " 87%|████████▋ | 137/157 [00:01<00:00, 122.88it/s]\u001b[A\n",
      "100%|██████████| 157/157 [00:01<00:00, 116.13it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 28%|██▊       | 504/1782 [00:11<03:04,  6.94it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.28628230616302186\n",
      "f1: 0.7078513048692023\n",
      "Test Loss: 0.003887, Acc: 0.764729\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 56%|█████▌    | 994/1782 [00:21<00:15, 50.94it/s]\n",
      "  0%|          | 0/157 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 12/157 [00:00<00:01, 115.21it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4769\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 15%|█▌        | 24/157 [00:00<00:01, 116.06it/s]\u001b[A\n",
      " 23%|██▎       | 36/157 [00:00<00:01, 116.57it/s]\u001b[A\n",
      " 31%|███       | 49/157 [00:00<00:00, 117.44it/s]\u001b[A\n",
      " 39%|███▉      | 62/157 [00:00<00:00, 118.66it/s]\u001b[A\n",
      " 47%|████▋     | 74/157 [00:00<00:00, 117.63it/s]\u001b[A\n",
      " 55%|█████▍    | 86/157 [00:00<00:00, 118.28it/s]\u001b[A\n",
      " 63%|██████▎   | 99/157 [00:00<00:00, 119.42it/s]\u001b[A\n",
      " 71%|███████▏  | 112/157 [00:00<00:00, 119.99it/s]\u001b[A\n",
      " 79%|███████▉  | 124/157 [00:01<00:00, 119.30it/s]\u001b[A\n",
      " 87%|████████▋ | 136/157 [00:01<00:00, 118.66it/s]\u001b[A\n",
      "100%|██████████| 157/157 [00:01<00:00, 119.01it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 56%|█████▋    | 1005/1782 [00:23<01:23,  9.30it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2797500710025561\n",
      "f1: 0.7094803063704003\n",
      "Test Loss: 0.003895, Acc: 0.764182\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 84%|████████▍ | 1496/1782 [00:32<00:04, 57.57it/s]\n",
      "  0%|          | 0/157 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 13/157 [00:00<00:01, 123.93it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.4249\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 17%|█▋        | 26/157 [00:00<00:01, 123.63it/s]\u001b[A\n",
      " 25%|██▍       | 39/157 [00:00<00:00, 124.40it/s]\u001b[A\n",
      " 33%|███▎      | 52/157 [00:00<00:00, 124.69it/s]\u001b[A\n",
      " 41%|████▏     | 65/157 [00:00<00:00, 125.26it/s]\u001b[A\n",
      " 50%|████▉     | 78/157 [00:00<00:00, 126.39it/s]\u001b[A\n",
      " 58%|█████▊    | 91/157 [00:00<00:00, 126.37it/s]\u001b[A\n",
      " 66%|██████▌   | 104/157 [00:00<00:00, 127.07it/s]\u001b[A\n",
      " 75%|███████▍  | 117/157 [00:00<00:00, 127.28it/s]\u001b[A\n",
      " 83%|████████▎ | 130/157 [00:01<00:00, 126.49it/s]\u001b[A\n",
      " 91%|█████████ | 143/157 [00:01<00:00, 125.97it/s]\u001b[A\n",
      "100%|██████████| 157/157 [00:01<00:00, 125.75it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 84%|████████▍ | 1502/1782 [00:34<00:39,  7.12it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.27378585629082647\n",
      "f1: 0.7167448821808434\n",
      "Test Loss: 0.003909, Acc: 0.760898\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 1782/1782 [00:39<00:00, 44.66it/s]\n",
      "  8%|▊         | 13/157 [00:00<00:01, 124.25it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 157/157 [00:01<00:00, 123.88it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|          | 6/1782 [00:00<00:32, 55.19it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2817381425731326\n",
      "f1: 0.7181284177024082\n",
      "Test Loss: 0.003898, Acc: 0.764381\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 28%|██▊       | 495/1782 [00:08<00:22, 56.58it/s]\n",
      "  0%|          | 0/157 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 13/157 [00:00<00:01, 124.47it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.4809\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 21/157 [00:00<00:01, 98.60it/s] \u001b[A\n",
      " 20%|█▉        | 31/157 [00:00<00:01, 98.78it/s]\u001b[A\n",
      " 27%|██▋       | 42/157 [00:00<00:01, 101.22it/s]\u001b[A\n",
      " 34%|███▍      | 54/157 [00:00<00:00, 105.43it/s]\u001b[A\n",
      " 42%|████▏     | 66/157 [00:00<00:00, 108.86it/s]\u001b[A\n",
      " 50%|████▉     | 78/157 [00:00<00:00, 111.77it/s]\u001b[A\n",
      " 57%|█████▋    | 90/157 [00:00<00:00, 112.75it/s]\u001b[A\n",
      " 65%|██████▍   | 102/157 [00:00<00:00, 114.13it/s]\u001b[A\n",
      " 73%|███████▎  | 114/157 [00:01<00:00, 115.17it/s]\u001b[A\n",
      " 80%|████████  | 126/157 [00:01<00:00, 115.97it/s]\u001b[A\n",
      " 88%|████████▊ | 138/157 [00:01<00:00, 116.43it/s]\u001b[A\n",
      "100%|██████████| 157/157 [00:01<00:00, 111.29it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 28%|██▊       | 501/1782 [00:11<03:11,  6.70it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2859982959386538\n",
      "f1: 0.7053028766178485\n",
      "Test Loss: 0.003885, Acc: 0.765028\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 56%|█████▌    | 994/1782 [00:20<00:14, 55.78it/s]\n",
      "  0%|          | 0/157 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 13/157 [00:00<00:01, 124.16it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4767\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 17%|█▋        | 26/157 [00:00<00:01, 123.92it/s]\u001b[A\n",
      " 25%|██▍       | 39/157 [00:00<00:00, 123.36it/s]\u001b[A\n",
      " 33%|███▎      | 52/157 [00:00<00:00, 122.90it/s]\u001b[A\n",
      " 41%|████▏     | 65/157 [00:00<00:00, 122.24it/s]\u001b[A\n",
      " 48%|████▊     | 76/157 [00:00<00:00, 103.30it/s]\u001b[A\n",
      " 57%|█████▋    | 89/157 [00:00<00:00, 109.64it/s]\u001b[A\n",
      " 65%|██████▍   | 102/157 [00:00<00:00, 114.01it/s]\u001b[A\n",
      " 73%|███████▎  | 115/157 [00:00<00:00, 117.06it/s]\u001b[A\n",
      " 82%|████████▏ | 128/157 [00:01<00:00, 117.87it/s]\u001b[A\n",
      " 90%|████████▉ | 141/157 [00:01<00:00, 119.09it/s]\u001b[A\n",
      "100%|██████████| 157/157 [00:01<00:00, 117.70it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 56%|█████▌    | 1000/1782 [00:22<01:52,  6.97it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2820221527975007\n",
      "f1: 0.7121802067414167\n",
      "Test Loss: 0.003897, Acc: 0.764978\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 84%|████████▍ | 1499/1782 [00:31<00:04, 57.71it/s]\n",
      "  0%|          | 0/157 [00:00<?, ?it/s]\u001b[A\n",
      "  7%|▋         | 11/157 [00:00<00:01, 106.75it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.429\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 14%|█▍        | 22/157 [00:00<00:01, 106.60it/s]\u001b[A\n",
      " 22%|██▏       | 34/157 [00:00<00:01, 107.90it/s]\u001b[A\n",
      " 29%|██▊       | 45/157 [00:00<00:01, 106.01it/s]\u001b[A\n",
      " 36%|███▌      | 56/157 [00:00<00:00, 106.97it/s]\u001b[A\n",
      " 43%|████▎     | 67/157 [00:00<00:00, 107.63it/s]\u001b[A\n",
      " 50%|████▉     | 78/157 [00:00<00:00, 107.49it/s]\u001b[A\n",
      " 57%|█████▋    | 89/157 [00:00<00:00, 106.79it/s]\u001b[A\n",
      " 64%|██████▎   | 100/157 [00:00<00:00, 107.67it/s]\u001b[A\n",
      " 71%|███████   | 111/157 [00:01<00:00, 107.13it/s]\u001b[A\n",
      " 78%|███████▊  | 122/157 [00:01<00:00, 106.05it/s]\u001b[A\n",
      " 85%|████████▍ | 133/157 [00:01<00:00, 106.67it/s]\u001b[A\n",
      " 92%|█████████▏| 144/157 [00:01<00:00, 107.14it/s]\u001b[A\n",
      "100%|██████████| 157/157 [00:01<00:00, 106.82it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 84%|████████▍ | 1505/1782 [00:34<00:43,  6.40it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2621414370917353\n",
      "f1: 0.7293716161083442\n",
      "Test Loss: 0.003979, Acc: 0.759305\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 1782/1782 [00:39<00:00, 45.56it/s]\n",
      "  8%|▊         | 12/157 [00:00<00:01, 119.10it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 157/157 [00:01<00:00, 118.02it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20096 dev_label_list: 20096 example_id_list: 20096\n",
      "num: 20096\n",
      "n: 3521\n",
      "em: 0.2854302754899176\n",
      "f1: 0.7019323965504075\n",
      "Test Loss: 0.003883, Acc: 0.764082\n"
     ]
    }
   ],
   "source": [
    "#sent10verb3\n",
    "epoch_num = 10\n",
    "train_code_confing(train_loader,dev_loader,epoch_num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 147,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 19%|█▊        | 496/2681 [00:09<00:41, 52.53it/s]\n",
      "  0%|          | 0/226 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 10/226 [00:00<00:02, 97.79it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.4674\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 19/226 [00:00<00:02, 94.97it/s]\u001b[A\n",
      " 11%|█         | 24/226 [00:00<00:02, 70.67it/s]\u001b[A\n",
      " 15%|█▌        | 35/226 [00:00<00:02, 77.91it/s]\u001b[A\n",
      " 20%|██        | 46/226 [00:00<00:02, 83.53it/s]\u001b[A\n",
      " 25%|██▍       | 56/226 [00:00<00:01, 87.64it/s]\u001b[A\n",
      " 29%|██▉       | 66/226 [00:00<00:01, 90.66it/s]\u001b[A\n",
      " 34%|███▎      | 76/226 [00:00<00:01, 92.68it/s]\u001b[A\n",
      " 38%|███▊      | 86/226 [00:00<00:01, 93.84it/s]\u001b[A\n",
      " 43%|████▎     | 97/226 [00:01<00:01, 95.83it/s]\u001b[A\n",
      " 47%|████▋     | 107/226 [00:01<00:01, 96.39it/s]\u001b[A\n",
      " 52%|█████▏    | 117/226 [00:01<00:01, 97.35it/s]\u001b[A\n",
      " 56%|█████▌    | 127/226 [00:01<00:01, 97.27it/s]\u001b[A\n",
      " 61%|██████    | 138/226 [00:01<00:00, 98.13it/s]\u001b[A\n",
      " 65%|██████▌   | 148/226 [00:01<00:00, 98.64it/s]\u001b[A\n",
      " 70%|███████   | 159/226 [00:01<00:00, 99.60it/s]\u001b[A\n",
      " 75%|███████▍  | 169/226 [00:01<00:00, 99.19it/s]\u001b[A\n",
      " 80%|███████▉  | 180/226 [00:01<00:00, 101.03it/s]\u001b[A\n",
      " 85%|████████▍ | 191/226 [00:01<00:00, 102.98it/s]\u001b[A\n",
      " 90%|████████▉ | 203/226 [00:02<00:00, 105.15it/s]\u001b[A\n",
      " 95%|█████████▍| 214/226 [00:02<00:00, 105.68it/s]\u001b[A\n",
      "100%|██████████| 226/226 [00:02<00:00, 97.99it/s] \u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 19%|█▊        | 502/2681 [00:13<08:07,  4.47it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2418391719745223\n",
      "f1: 0.6383284046102567\n",
      "Test Loss: 0.004050, Acc: 0.752247\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 37%|███▋      | 999/2681 [00:23<00:32, 51.45it/s]\n",
      "  0%|          | 0/226 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▌         | 12/226 [00:00<00:01, 118.69it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4836\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█         | 24/226 [00:00<00:01, 118.00it/s]\u001b[A\n",
      " 16%|█▌        | 36/226 [00:00<00:01, 118.09it/s]\u001b[A\n",
      " 22%|██▏       | 49/226 [00:00<00:01, 119.14it/s]\u001b[A\n",
      " 27%|██▋       | 61/226 [00:00<00:01, 119.31it/s]\u001b[A\n",
      " 31%|███▏      | 71/226 [00:00<00:01, 99.74it/s] \u001b[A\n",
      " 37%|███▋      | 84/226 [00:00<00:01, 106.14it/s]\u001b[A\n",
      " 43%|████▎     | 97/226 [00:00<00:01, 110.64it/s]\u001b[A\n",
      " 49%|████▊     | 110/226 [00:00<00:01, 113.81it/s]\u001b[A\n",
      " 54%|█████▍    | 123/226 [00:01<00:00, 116.08it/s]\u001b[A\n",
      " 60%|██████    | 136/226 [00:01<00:00, 118.43it/s]\u001b[A\n",
      " 66%|██████▌   | 149/226 [00:01<00:00, 120.65it/s]\u001b[A\n",
      " 72%|███████▏  | 162/226 [00:01<00:00, 122.20it/s]\u001b[A\n",
      " 77%|███████▋  | 175/226 [00:01<00:00, 121.85it/s]\u001b[A\n",
      " 83%|████████▎ | 188/226 [00:01<00:00, 122.87it/s]\u001b[A\n",
      " 89%|████████▉ | 201/226 [00:01<00:00, 122.30it/s]\u001b[A\n",
      "100%|██████████| 226/226 [00:01<00:00, 118.12it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 37%|███▋      | 1005/2681 [00:26<05:40,  4.93it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2575636942675159\n",
      "f1: 0.6805112228977749\n",
      "Test Loss: 0.004005, Acc: 0.759403\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 56%|█████▌    | 1495/2681 [00:35<00:21, 54.85it/s]\n",
      "  0%|          | 0/226 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▌         | 12/226 [00:00<00:01, 115.28it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.4558\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█         | 24/226 [00:00<00:01, 115.61it/s]\u001b[A\n",
      " 16%|█▋        | 37/226 [00:00<00:01, 117.26it/s]\u001b[A\n",
      " 22%|██▏       | 50/226 [00:00<00:01, 118.40it/s]\u001b[A\n",
      " 28%|██▊       | 63/226 [00:00<00:01, 119.33it/s]\u001b[A\n",
      " 34%|███▎      | 76/226 [00:00<00:01, 120.11it/s]\u001b[A\n",
      " 39%|███▉      | 89/226 [00:00<00:01, 120.69it/s]\u001b[A\n",
      " 45%|████▌     | 102/226 [00:00<00:01, 121.79it/s]\u001b[A\n",
      " 50%|█████     | 114/226 [00:00<00:00, 121.10it/s]\u001b[A\n",
      " 56%|█████▌    | 126/226 [00:01<00:00, 120.59it/s]\u001b[A\n",
      " 62%|██████▏   | 139/226 [00:01<00:00, 121.41it/s]\u001b[A\n",
      " 67%|██████▋   | 152/226 [00:01<00:00, 122.39it/s]\u001b[A\n",
      " 73%|███████▎  | 165/226 [00:01<00:00, 123.10it/s]\u001b[A\n",
      " 79%|███████▉  | 178/226 [00:01<00:00, 123.23it/s]\u001b[A\n",
      " 85%|████████▍ | 191/226 [00:01<00:00, 122.60it/s]\u001b[A\n",
      " 90%|█████████ | 204/226 [00:01<00:00, 122.57it/s]\u001b[A\n",
      "100%|██████████| 226/226 [00:01<00:00, 121.05it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 56%|█████▌    | 1501/2681 [00:39<03:59,  4.92it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.26711783439490444\n",
      "f1: 0.6984666242727637\n",
      "Test Loss: 0.003999, Acc: 0.761615\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 74%|███████▍  | 1996/2681 [00:49<00:13, 51.91it/s]\n",
      "  0%|          | 0/226 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▍         | 11/226 [00:00<00:02, 105.37it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.5247\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 10%|▉         | 22/226 [00:00<00:01, 105.85it/s]\u001b[A\n",
      " 15%|█▌        | 34/226 [00:00<00:01, 107.84it/s]\u001b[A\n",
      " 20%|██        | 46/226 [00:00<00:01, 110.93it/s]\u001b[A\n",
      " 26%|██▌       | 59/226 [00:00<00:01, 114.22it/s]\u001b[A\n",
      " 32%|███▏      | 72/226 [00:00<00:01, 116.61it/s]\u001b[A\n",
      " 37%|███▋      | 84/226 [00:00<00:01, 116.97it/s]\u001b[A\n",
      " 42%|████▏     | 96/226 [00:00<00:01, 117.74it/s]\u001b[A\n",
      " 48%|████▊     | 109/226 [00:00<00:00, 118.91it/s]\u001b[A\n",
      " 54%|█████▍    | 122/226 [00:01<00:00, 120.03it/s]\u001b[A\n",
      " 60%|█████▉    | 135/226 [00:01<00:00, 120.78it/s]\u001b[A\n",
      " 65%|██████▌   | 147/226 [00:01<00:00, 118.37it/s]\u001b[A\n",
      " 71%|███████   | 160/226 [00:01<00:00, 119.25it/s]\u001b[A\n",
      " 76%|███████▌  | 172/226 [00:01<00:00, 110.68it/s]\u001b[A\n",
      " 82%|████████▏ | 185/226 [00:01<00:00, 114.37it/s]\u001b[A\n",
      " 87%|████████▋ | 197/226 [00:01<00:00, 115.99it/s]\u001b[A\n",
      " 92%|█████████▏| 209/226 [00:01<00:00, 115.64it/s]\u001b[A\n",
      "100%|██████████| 226/226 [00:01<00:00, 116.41it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 75%|███████▍  | 2002/2681 [00:52<02:21,  4.80it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2633359872611465\n",
      "f1: 0.6843608399739044\n",
      "Test Loss: 0.003953, Acc: 0.760509\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 93%|█████████▎| 2493/2681 [01:02<00:03, 60.28it/s]\n",
      "  0%|          | 0/226 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▌         | 12/226 [00:00<00:01, 117.20it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.4199\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█         | 24/226 [00:00<00:01, 116.48it/s]\u001b[A\n",
      " 16%|█▋        | 37/226 [00:00<00:01, 117.91it/s]\u001b[A\n",
      " 22%|██▏       | 50/226 [00:00<00:01, 118.79it/s]\u001b[A\n",
      " 28%|██▊       | 63/226 [00:00<00:01, 119.50it/s]\u001b[A\n",
      " 33%|███▎      | 75/226 [00:00<00:01, 119.04it/s]\u001b[A\n",
      " 39%|███▉      | 88/226 [00:00<00:01, 119.69it/s]\u001b[A\n",
      " 45%|████▍     | 101/226 [00:00<00:01, 120.44it/s]\u001b[A\n",
      " 50%|█████     | 114/226 [00:00<00:00, 120.74it/s]\u001b[A\n",
      " 56%|█████▌    | 127/226 [00:01<00:00, 120.95it/s]\u001b[A\n",
      " 62%|██████▏   | 139/226 [00:01<00:00, 120.29it/s]\u001b[A\n",
      " 67%|██████▋   | 152/226 [00:01<00:00, 120.42it/s]\u001b[A\n",
      " 73%|███████▎  | 165/226 [00:01<00:00, 120.81it/s]\u001b[A\n",
      " 79%|███████▉  | 178/226 [00:01<00:00, 120.87it/s]\u001b[A\n",
      " 85%|████████▍ | 191/226 [00:01<00:00, 120.71it/s]\u001b[A\n",
      " 90%|█████████ | 204/226 [00:01<00:00, 111.38it/s]\u001b[A\n",
      "100%|██████████| 226/226 [00:01<00:00, 118.11it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 93%|█████████▎| 2506/2681 [01:06<00:22,  7.72it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.24940286624203822\n",
      "f1: 0.7172612297910987\n",
      "Test Loss: 0.004026, Acc: 0.754010\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 2681/2681 [01:09<00:00, 38.63it/s]\n",
      "  4%|▍         | 10/226 [00:00<00:02, 97.19it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 226/226 [00:02<00:00, 104.32it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/2681 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.22054140127388536\n",
      "f1: 0.6027556617126701\n",
      "Test Loss: 0.004076, Acc: 0.745368\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 19%|█▊        | 497/2681 [00:09<00:40, 54.51it/s]\n",
      "  0%|          | 0/226 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▌         | 12/226 [00:00<00:01, 114.77it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.4599\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█         | 24/226 [00:00<00:01, 114.64it/s]\u001b[A\n",
      " 16%|█▋        | 37/226 [00:00<00:01, 116.70it/s]\u001b[A\n",
      " 22%|██▏       | 50/226 [00:00<00:01, 118.12it/s]\u001b[A\n",
      " 27%|██▋       | 62/226 [00:00<00:01, 118.01it/s]\u001b[A\n",
      " 33%|███▎      | 74/226 [00:00<00:01, 117.09it/s]\u001b[A\n",
      " 38%|███▊      | 86/226 [00:00<00:01, 117.56it/s]\u001b[A\n",
      " 44%|████▍     | 99/226 [00:00<00:01, 119.01it/s]\u001b[A\n",
      " 49%|████▉     | 111/226 [00:00<00:00, 119.12it/s]\u001b[A\n",
      " 55%|█████▍    | 124/226 [00:01<00:00, 120.50it/s]\u001b[A\n",
      " 61%|██████    | 137/226 [00:01<00:00, 120.62it/s]\u001b[A\n",
      " 66%|██████▌   | 149/226 [00:01<00:00, 119.43it/s]\u001b[A\n",
      " 71%|███████   | 161/226 [00:01<00:00, 118.71it/s]\u001b[A\n",
      " 77%|███████▋  | 174/226 [00:01<00:00, 119.19it/s]\u001b[A\n",
      " 82%|████████▏ | 186/226 [00:01<00:00, 119.31it/s]\u001b[A\n",
      " 88%|████████▊ | 198/226 [00:01<00:00, 118.15it/s]\u001b[A\n",
      " 93%|█████████▎| 210/226 [00:01<00:00, 118.58it/s]\u001b[A\n",
      "100%|██████████| 226/226 [00:01<00:00, 118.50it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 19%|█▊        | 497/2681 [00:21<00:40, 54.51it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "num: 28928\n",
      "n: 5024\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 19%|█▉        | 505/2681 [00:22<33:08,  1.09it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2633359872611465\n",
      "f1: 0.7152036458764279\n",
      "Test Loss: 0.003974, Acc: 0.758020\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 37%|███▋      | 997/2681 [00:30<00:29, 57.75it/s]\n",
      "  0%|          | 0/226 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▌         | 12/226 [00:00<00:01, 119.73it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4577\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█         | 24/226 [00:00<00:01, 118.59it/s]\u001b[A\n",
      " 16%|█▋        | 37/226 [00:00<00:01, 121.00it/s]\u001b[A\n",
      " 22%|██▏       | 50/226 [00:00<00:01, 122.48it/s]\u001b[A\n",
      " 28%|██▊       | 63/226 [00:00<00:01, 123.21it/s]\u001b[A\n",
      " 34%|███▎      | 76/226 [00:00<00:01, 123.39it/s]\u001b[A\n",
      " 39%|███▉      | 89/226 [00:00<00:01, 124.01it/s]\u001b[A\n",
      " 45%|████▌     | 102/226 [00:00<00:00, 124.69it/s]\u001b[A\n",
      " 51%|█████     | 115/226 [00:00<00:00, 123.98it/s]\u001b[A\n",
      " 57%|█████▋    | 128/226 [00:01<00:00, 125.28it/s]\u001b[A\n",
      " 62%|██████▏   | 141/226 [00:01<00:00, 125.19it/s]\u001b[A\n",
      " 68%|██████▊   | 154/226 [00:01<00:00, 125.36it/s]\u001b[A\n",
      " 74%|███████▍  | 167/226 [00:01<00:00, 124.17it/s]\u001b[A\n",
      " 80%|███████▉  | 180/226 [00:01<00:00, 123.05it/s]\u001b[A\n",
      " 85%|████████▌ | 193/226 [00:01<00:00, 123.16it/s]\u001b[A\n",
      " 91%|█████████ | 206/226 [00:01<00:00, 124.20it/s]\u001b[A\n",
      "100%|██████████| 226/226 [00:01<00:00, 123.77it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 37%|███▋      | 1003/2681 [00:34<05:35,  5.01it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2649283439490446\n",
      "f1: 0.690740534898584\n",
      "Test Loss: 0.003927, Acc: 0.761062\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 56%|█████▌    | 1494/2681 [00:43<00:21, 54.36it/s]\n",
      "  0%|          | 0/226 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▌         | 12/226 [00:00<00:01, 112.27it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.4179\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█         | 24/226 [00:00<00:01, 112.53it/s]\u001b[A\n",
      " 16%|█▌        | 36/226 [00:00<00:01, 113.27it/s]\u001b[A\n",
      " 21%|██        | 48/226 [00:00<00:01, 113.82it/s]\u001b[A\n",
      " 27%|██▋       | 60/226 [00:00<00:01, 113.99it/s]\u001b[A\n",
      " 32%|███▏      | 72/226 [00:00<00:01, 114.97it/s]\u001b[A\n",
      " 37%|███▋      | 84/226 [00:00<00:01, 116.05it/s]\u001b[A\n",
      " 42%|████▏     | 96/226 [00:00<00:01, 116.82it/s]\u001b[A\n",
      " 48%|████▊     | 108/226 [00:00<00:01, 116.66it/s]\u001b[A\n",
      " 53%|█████▎    | 120/226 [00:01<00:00, 115.92it/s]\u001b[A\n",
      " 58%|█████▊    | 132/226 [00:01<00:00, 115.92it/s]\u001b[A\n",
      " 64%|██████▎   | 144/226 [00:01<00:00, 116.30it/s]\u001b[A\n",
      " 69%|██████▉   | 156/226 [00:01<00:00, 115.95it/s]\u001b[A\n",
      " 74%|███████▍  | 168/226 [00:01<00:00, 116.65it/s]\u001b[A\n",
      " 80%|███████▉  | 180/226 [00:01<00:00, 116.20it/s]\u001b[A\n",
      " 85%|████████▍ | 192/226 [00:01<00:00, 116.26it/s]\u001b[A\n",
      " 90%|█████████ | 204/226 [00:01<00:00, 115.95it/s]\u001b[A\n",
      "100%|██████████| 226/226 [00:01<00:00, 115.70it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 56%|█████▌    | 1500/2681 [00:47<04:04,  4.84it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2679140127388535\n",
      "f1: 0.704420269000295\n",
      "Test Loss: 0.003949, Acc: 0.761373\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 74%|███████▍  | 1996/2681 [00:57<00:12, 56.08it/s]\n",
      "  0%|          | 0/226 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▌         | 12/226 [00:00<00:01, 118.64it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.5188\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█         | 24/226 [00:00<00:01, 118.09it/s]\u001b[A\n",
      " 16%|█▋        | 37/226 [00:00<00:01, 119.81it/s]\u001b[A\n",
      " 22%|██▏       | 50/226 [00:00<00:01, 120.26it/s]\u001b[A\n",
      " 28%|██▊       | 63/226 [00:00<00:01, 122.21it/s]\u001b[A\n",
      " 34%|███▎      | 76/226 [00:00<00:01, 123.13it/s]\u001b[A\n",
      " 39%|███▉      | 89/226 [00:00<00:01, 124.05it/s]\u001b[A\n",
      " 45%|████▌     | 102/226 [00:00<00:00, 124.95it/s]\u001b[A\n",
      " 51%|█████     | 115/226 [00:00<00:00, 124.03it/s]\u001b[A\n",
      " 57%|█████▋    | 128/226 [00:01<00:00, 124.66it/s]\u001b[A\n",
      " 62%|██████▏   | 141/226 [00:01<00:00, 124.65it/s]\u001b[A\n",
      " 68%|██████▊   | 154/226 [00:01<00:00, 124.97it/s]\u001b[A\n",
      " 74%|███████▍  | 167/226 [00:01<00:00, 125.75it/s]\u001b[A\n",
      " 80%|███████▉  | 180/226 [00:01<00:00, 124.27it/s]\u001b[A\n",
      " 85%|████████▌ | 193/226 [00:01<00:00, 124.20it/s]\u001b[A\n",
      " 91%|█████████ | 206/226 [00:01<00:00, 124.16it/s]\u001b[A\n",
      "100%|██████████| 226/226 [00:01<00:00, 123.74it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 75%|███████▍  | 2002/2681 [01:00<02:15,  5.00it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2718949044585987\n",
      "f1: 0.6935457997858563\n",
      "Test Loss: 0.003929, Acc: 0.762030\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 93%|█████████▎| 2494/2681 [01:09<00:03, 56.24it/s]\n",
      "  0%|          | 0/226 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▌         | 12/226 [00:00<00:01, 117.26it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.4101\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 16/226 [00:00<00:02, 74.20it/s] \u001b[A\n",
      " 12%|█▏        | 28/226 [00:00<00:02, 83.48it/s]\u001b[A\n",
      " 18%|█▊        | 40/226 [00:00<00:02, 91.48it/s]\u001b[A\n",
      " 23%|██▎       | 52/226 [00:00<00:01, 98.30it/s]\u001b[A\n",
      " 29%|██▉       | 65/226 [00:00<00:01, 104.32it/s]\u001b[A\n",
      " 35%|███▍      | 78/226 [00:00<00:01, 109.36it/s]\u001b[A\n",
      " 40%|███▉      | 90/226 [00:00<00:01, 112.17it/s]\u001b[A\n",
      " 46%|████▌     | 103/226 [00:00<00:01, 115.23it/s]\u001b[A\n",
      " 51%|█████▏    | 116/226 [00:01<00:00, 117.37it/s]\u001b[A\n",
      " 57%|█████▋    | 129/226 [00:01<00:00, 119.26it/s]\u001b[A\n",
      " 63%|██████▎   | 142/226 [00:01<00:00, 120.17it/s]\u001b[A\n",
      " 68%|██████▊   | 154/226 [00:01<00:00, 119.96it/s]\u001b[A\n",
      " 73%|███████▎  | 166/226 [00:01<00:00, 118.25it/s]\u001b[A\n",
      " 79%|███████▉  | 179/226 [00:01<00:00, 119.61it/s]\u001b[A\n",
      " 85%|████████▍ | 191/226 [00:01<00:00, 119.28it/s]\u001b[A\n",
      " 90%|█████████ | 204/226 [00:01<00:00, 120.28it/s]\u001b[A\n",
      "100%|██████████| 226/226 [00:01<00:00, 115.39it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 93%|█████████▎| 2505/2681 [01:12<00:26,  6.68it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2382563694267516\n",
      "f1: 0.7193524617191144\n",
      "Test Loss: 0.004039, Acc: 0.751106\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 2681/2681 [01:16<00:00, 35.12it/s]\n",
      "  4%|▍         | 9/226 [00:00<00:02, 80.68it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 226/226 [00:02<00:00, 103.20it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/2681 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2577627388535032\n",
      "f1: 0.662571460740256\n",
      "Test Loss: 0.003949, Acc: 0.757605\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 19%|█▊        | 497/2681 [00:09<00:39, 55.69it/s]\n",
      "  0%|          | 0/226 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▌         | 12/226 [00:00<00:01, 114.25it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.469\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█         | 24/226 [00:00<00:01, 113.86it/s]\u001b[A\n",
      " 16%|█▌        | 36/226 [00:00<00:01, 114.31it/s]\u001b[A\n",
      " 19%|█▉        | 44/226 [00:00<00:01, 94.97it/s] \u001b[A\n",
      " 25%|██▌       | 57/226 [00:00<00:01, 101.80it/s]\u001b[A\n",
      " 31%|███       | 69/226 [00:00<00:01, 106.59it/s]\u001b[A\n",
      " 35%|███▍      | 79/226 [00:00<00:01, 101.88it/s]\u001b[A\n",
      " 41%|████      | 92/226 [00:00<00:01, 107.96it/s]\u001b[A\n",
      " 46%|████▋     | 105/226 [00:00<00:01, 112.01it/s]\u001b[A\n",
      " 52%|█████▏    | 117/226 [00:01<00:00, 113.19it/s]\u001b[A\n",
      " 58%|█████▊    | 130/226 [00:01<00:00, 115.45it/s]\u001b[A\n",
      " 63%|██████▎   | 143/226 [00:01<00:00, 117.49it/s]\u001b[A\n",
      " 69%|██████▉   | 156/226 [00:01<00:00, 119.08it/s]\u001b[A\n",
      " 75%|███████▍  | 169/226 [00:01<00:00, 121.22it/s]\u001b[A\n",
      " 81%|████████  | 182/226 [00:01<00:00, 122.11it/s]\u001b[A\n",
      " 86%|████████▋ | 195/226 [00:01<00:00, 122.09it/s]\u001b[A\n",
      " 92%|█████████▏| 208/226 [00:01<00:00, 120.32it/s]\u001b[A\n",
      "100%|██████████| 226/226 [00:01<00:00, 115.51it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 19%|█▉        | 503/2681 [00:13<07:40,  4.73it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2557722929936306\n",
      "f1: 0.717827401057906\n",
      "Test Loss: 0.004012, Acc: 0.756326\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 37%|███▋      | 997/2681 [00:21<00:32, 51.64it/s]\n",
      "  0%|          | 0/226 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▌         | 12/226 [00:00<00:01, 116.36it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4548\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█         | 24/226 [00:00<00:01, 115.06it/s]\u001b[A\n",
      " 16%|█▋        | 37/226 [00:00<00:01, 116.73it/s]\u001b[A\n",
      " 22%|██▏       | 50/226 [00:00<00:01, 118.84it/s]\u001b[A\n",
      " 28%|██▊       | 63/226 [00:00<00:01, 120.41it/s]\u001b[A\n",
      " 34%|███▎      | 76/226 [00:00<00:01, 121.13it/s]\u001b[A\n",
      " 39%|███▉      | 89/226 [00:00<00:01, 122.15it/s]\u001b[A\n",
      " 45%|████▍     | 101/226 [00:00<00:01, 120.82it/s]\u001b[A\n",
      " 50%|█████     | 114/226 [00:00<00:00, 121.88it/s]\u001b[A\n",
      " 56%|█████▌    | 127/226 [00:01<00:00, 121.95it/s]\u001b[A\n",
      " 62%|██████▏   | 140/226 [00:01<00:00, 123.45it/s]\u001b[A\n",
      " 68%|██████▊   | 153/226 [00:01<00:00, 123.71it/s]\u001b[A\n",
      " 73%|███████▎  | 166/226 [00:01<00:00, 122.79it/s]\u001b[A\n",
      " 79%|███████▉  | 179/226 [00:01<00:00, 122.48it/s]\u001b[A\n",
      " 85%|████████▍ | 192/226 [00:01<00:00, 122.10it/s]\u001b[A\n",
      " 91%|█████████ | 205/226 [00:01<00:00, 122.32it/s]\u001b[A\n",
      "100%|██████████| 226/226 [00:01<00:00, 121.45it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 37%|███▋      | 1003/2681 [00:25<05:38,  4.95it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.26393312101910826\n",
      "f1: 0.6994264614985255\n",
      "Test Loss: 0.003931, Acc: 0.760336\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 56%|█████▌    | 1499/2681 [00:34<00:21, 55.58it/s]\n",
      "  0%|          | 0/226 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▌         | 12/226 [00:00<00:01, 118.34it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.4365\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█         | 24/226 [00:00<00:01, 117.44it/s]\u001b[A\n",
      " 16%|█▋        | 37/226 [00:00<00:01, 118.58it/s]\u001b[A\n",
      " 22%|██▏       | 50/226 [00:00<00:01, 120.67it/s]\u001b[A\n",
      " 28%|██▊       | 63/226 [00:00<00:01, 121.30it/s]\u001b[A\n",
      " 34%|███▎      | 76/226 [00:00<00:01, 121.78it/s]\u001b[A\n",
      " 39%|███▉      | 89/226 [00:00<00:01, 122.65it/s]\u001b[A\n",
      " 45%|████▌     | 102/226 [00:00<00:01, 123.04it/s]\u001b[A\n",
      " 51%|█████     | 115/226 [00:00<00:00, 124.10it/s]\u001b[A\n",
      " 57%|█████▋    | 128/226 [00:01<00:00, 123.37it/s]\u001b[A\n",
      " 62%|██████▏   | 141/226 [00:01<00:00, 123.77it/s]\u001b[A\n",
      " 68%|██████▊   | 154/226 [00:01<00:00, 122.93it/s]\u001b[A\n",
      " 74%|███████▍  | 167/226 [00:01<00:00, 123.04it/s]\u001b[A\n",
      " 80%|███████▉  | 180/226 [00:01<00:00, 124.04it/s]\u001b[A\n",
      " 85%|████████▌ | 193/226 [00:01<00:00, 123.70it/s]\u001b[A\n",
      " 91%|█████████ | 206/226 [00:01<00:00, 123.98it/s]\u001b[A\n",
      "100%|██████████| 226/226 [00:01<00:00, 122.58it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n",
      "em: 0.23168789808917198\n",
      "f1: 0.7226083055302908\n",
      "Test Loss: 0.004106, Acc: 0.745368\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 75%|███████▍  | 1998/2681 [00:47<00:12, 54.86it/s]\n",
      "  0%|          | 0/226 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▌         | 12/226 [00:00<00:01, 115.91it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.5132\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█         | 24/226 [00:00<00:01, 116.80it/s]\u001b[A\n",
      " 16%|█▋        | 37/226 [00:00<00:01, 118.10it/s]\u001b[A\n",
      " 22%|██▏       | 50/226 [00:00<00:01, 119.13it/s]\u001b[A\n",
      " 27%|██▋       | 62/226 [00:00<00:01, 119.38it/s]\u001b[A\n",
      " 33%|███▎      | 75/226 [00:00<00:01, 120.55it/s]\u001b[A\n",
      " 39%|███▉      | 88/226 [00:00<00:01, 120.58it/s]\u001b[A\n",
      " 45%|████▍     | 101/226 [00:00<00:01, 121.07it/s]\u001b[A\n",
      " 50%|█████     | 114/226 [00:00<00:00, 121.20it/s]\u001b[A\n",
      " 56%|█████▌    | 127/226 [00:01<00:00, 121.32it/s]\u001b[A\n",
      " 62%|██████▏   | 140/226 [00:01<00:00, 121.86it/s]\u001b[A\n",
      " 68%|██████▊   | 153/226 [00:01<00:00, 122.25it/s]\u001b[A\n",
      " 73%|███████▎  | 166/226 [00:01<00:00, 122.51it/s]\u001b[A\n",
      " 79%|███████▉  | 179/226 [00:01<00:00, 123.64it/s]\u001b[A\n",
      " 85%|████████▍ | 192/226 [00:01<00:00, 122.21it/s]\u001b[A\n",
      " 91%|█████████ | 205/226 [00:01<00:00, 122.35it/s]\u001b[A\n",
      "100%|██████████| 226/226 [00:01<00:00, 121.38it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n",
      "em: 0.2571656050955414\n",
      "f1: 0.7167792000992745\n",
      "Test Loss: 0.004004, Acc: 0.756015\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 93%|█████████▎| 2496/2681 [01:00<00:03, 53.33it/s]\n",
      "  0%|          | 0/226 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 10/226 [00:00<00:02, 91.69it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.3879\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  9%|▉         | 20/226 [00:00<00:02, 92.46it/s]\u001b[A\n",
      " 14%|█▍        | 32/226 [00:00<00:01, 99.19it/s]\u001b[A\n",
      " 20%|█▉        | 45/226 [00:00<00:01, 105.03it/s]\u001b[A\n",
      " 25%|██▌       | 57/226 [00:00<00:01, 108.49it/s]\u001b[A\n",
      " 31%|███       | 70/226 [00:00<00:01, 111.92it/s]\u001b[A\n",
      " 37%|███▋      | 83/226 [00:00<00:01, 114.55it/s]\u001b[A\n",
      " 42%|████▏     | 96/226 [00:00<00:01, 116.53it/s]\u001b[A\n",
      " 48%|████▊     | 109/226 [00:00<00:00, 117.74it/s]\u001b[A\n",
      " 54%|█████▎    | 121/226 [00:01<00:00, 117.68it/s]\u001b[A\n",
      " 59%|█████▉    | 134/226 [00:01<00:00, 118.69it/s]\u001b[A\n",
      " 65%|██████▍   | 146/226 [00:01<00:00, 118.97it/s]\u001b[A\n",
      " 70%|███████   | 159/226 [00:01<00:00, 119.73it/s]\u001b[A\n",
      " 76%|███████▌  | 171/226 [00:01<00:00, 119.77it/s]\u001b[A\n",
      " 81%|████████▏ | 184/226 [00:01<00:00, 120.31it/s]\u001b[A\n",
      " 87%|████████▋ | 196/226 [00:01<00:00, 118.17it/s]\u001b[A\n",
      " 92%|█████████▏| 208/226 [00:01<00:00, 118.08it/s]\u001b[A\n",
      "100%|██████████| 226/226 [00:01<00:00, 116.44it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 93%|█████████▎| 2502/2681 [01:04<00:36,  4.84it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2535828025477707\n",
      "f1: 0.7142969877344995\n",
      "Test Loss: 0.003980, Acc: 0.756361\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 2681/2681 [01:07<00:00, 39.48it/s]\n",
      "  5%|▌         | 12/226 [00:00<00:01, 111.57it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 226/226 [00:01<00:00, 118.96it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/2681 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2635350318471338\n",
      "f1: 0.6985332884118807\n",
      "Test Loss: 0.003925, Acc: 0.759887\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 19%|█▊        | 499/2681 [00:09<00:40, 54.10it/s]\n",
      "  0%|          | 0/226 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 10/226 [00:00<00:02, 98.58it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.4573\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  9%|▉         | 20/226 [00:00<00:02, 97.73it/s]\u001b[A\n",
      " 14%|█▎        | 31/226 [00:00<00:01, 100.21it/s]\u001b[A\n",
      " 18%|█▊        | 40/226 [00:00<00:01, 94.31it/s] \u001b[A\n",
      " 23%|██▎       | 51/226 [00:00<00:01, 98.28it/s]\u001b[A\n",
      " 27%|██▋       | 62/226 [00:00<00:01, 101.28it/s]\u001b[A\n",
      " 32%|███▏      | 73/226 [00:00<00:01, 101.48it/s]\u001b[A\n",
      " 37%|███▋      | 84/226 [00:00<00:01, 103.50it/s]\u001b[A\n",
      " 42%|████▏     | 95/226 [00:00<00:01, 104.64it/s]\u001b[A\n",
      " 47%|████▋     | 107/226 [00:01<00:01, 106.58it/s]\u001b[A\n",
      " 53%|█████▎    | 119/226 [00:01<00:00, 107.86it/s]\u001b[A\n",
      " 58%|█████▊    | 130/226 [00:01<00:00, 107.54it/s]\u001b[A\n",
      " 62%|██████▏   | 141/226 [00:01<00:00, 107.40it/s]\u001b[A\n",
      " 67%|██████▋   | 152/226 [00:01<00:00, 105.93it/s]\u001b[A\n",
      " 72%|███████▏  | 163/226 [00:01<00:00, 106.38it/s]\u001b[A\n",
      " 77%|███████▋  | 174/226 [00:01<00:00, 107.09it/s]\u001b[A\n",
      " 82%|████████▏ | 185/226 [00:01<00:00, 107.24it/s]\u001b[A\n",
      " 87%|████████▋ | 196/226 [00:01<00:00, 105.46it/s]\u001b[A\n",
      " 92%|█████████▏| 207/226 [00:01<00:00, 105.57it/s]\u001b[A\n",
      "100%|██████████| 226/226 [00:02<00:00, 104.09it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n",
      "em: 0.2669187898089172\n",
      "f1: 0.7003445941443648\n",
      "Test Loss: 0.003950, Acc: 0.761650\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 37%|███▋      | 994/2681 [00:22<00:30, 55.81it/s]\n",
      "  0%|          | 0/226 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▍         | 11/226 [00:00<00:01, 109.04it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4535\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 10%|▉         | 22/226 [00:00<00:01, 108.96it/s]\u001b[A\n",
      " 15%|█▌        | 34/226 [00:00<00:01, 110.74it/s]\u001b[A\n",
      " 20%|██        | 46/226 [00:00<00:01, 111.44it/s]\u001b[A\n",
      " 26%|██▌       | 58/226 [00:00<00:01, 112.86it/s]\u001b[A\n",
      " 31%|███       | 70/226 [00:00<00:01, 112.67it/s]\u001b[A\n",
      " 36%|███▋      | 82/226 [00:00<00:01, 113.25it/s]\u001b[A\n",
      " 42%|████▏     | 94/226 [00:00<00:01, 113.81it/s]\u001b[A\n",
      " 47%|████▋     | 106/226 [00:00<00:01, 115.27it/s]\u001b[A\n",
      " 52%|█████▏    | 118/226 [00:01<00:00, 115.01it/s]\u001b[A\n",
      " 58%|█████▊    | 130/226 [00:01<00:00, 116.30it/s]\u001b[A\n",
      " 63%|██████▎   | 142/226 [00:01<00:00, 117.19it/s]\u001b[A\n",
      " 68%|██████▊   | 154/226 [00:01<00:00, 117.78it/s]\u001b[A\n",
      " 73%|███████▎  | 166/226 [00:01<00:00, 116.98it/s]\u001b[A\n",
      " 79%|███████▉  | 178/226 [00:01<00:00, 108.67it/s]\u001b[A\n",
      " 84%|████████▍ | 190/226 [00:01<00:00, 110.77it/s]\u001b[A\n",
      " 89%|████████▉ | 202/226 [00:01<00:00, 113.32it/s]\u001b[A\n",
      " 95%|█████████▍| 214/226 [00:01<00:00, 115.05it/s]\u001b[A\n",
      "100%|██████████| 226/226 [00:01<00:00, 114.16it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 37%|███▋      | 1005/2681 [00:26<04:16,  6.54it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2561703821656051\n",
      "f1: 0.6919316429766358\n",
      "Test Loss: 0.003937, Acc: 0.758193\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 56%|█████▌    | 1499/2681 [00:35<00:18, 64.22it/s]\n",
      "  0%|          | 0/226 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▌         | 12/226 [00:00<00:01, 112.56it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.4303\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█         | 24/226 [00:00<00:01, 112.76it/s]\u001b[A\n",
      " 16%|█▌        | 36/226 [00:00<00:01, 114.28it/s]\u001b[A\n",
      " 19%|█▉        | 44/226 [00:00<00:01, 95.13it/s] \u001b[A\n",
      " 25%|██▌       | 57/226 [00:00<00:01, 101.70it/s]\u001b[A\n",
      " 31%|███       | 69/226 [00:00<00:01, 105.76it/s]\u001b[A\n",
      " 36%|███▌      | 81/226 [00:00<00:01, 109.42it/s]\u001b[A\n",
      " 41%|████      | 93/226 [00:00<00:01, 111.44it/s]\u001b[A\n",
      " 46%|████▋     | 105/226 [00:00<00:01, 113.54it/s]\u001b[A\n",
      " 52%|█████▏    | 118/226 [00:01<00:00, 116.02it/s]\u001b[A\n",
      " 58%|█████▊    | 131/226 [00:01<00:00, 117.47it/s]\u001b[A\n",
      " 63%|██████▎   | 143/226 [00:01<00:00, 117.24it/s]\u001b[A\n",
      " 69%|██████▊   | 155/226 [00:01<00:00, 117.88it/s]\u001b[A\n",
      " 74%|███████▍  | 167/226 [00:01<00:00, 118.37it/s]\u001b[A\n",
      " 79%|███████▉  | 179/226 [00:01<00:00, 118.05it/s]\u001b[A\n",
      " 85%|████████▍ | 191/226 [00:01<00:00, 118.27it/s]\u001b[A\n",
      " 90%|████████▉ | 203/226 [00:01<00:00, 118.02it/s]\u001b[A\n",
      "100%|██████████| 226/226 [00:01<00:00, 114.86it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n",
      "em: 0.24263535031847133\n",
      "f1: 0.7204079724680945\n",
      "Test Loss: 0.004035, Acc: 0.749620\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 74%|███████▍  | 1997/2681 [00:47<00:12, 54.70it/s]\n",
      "  0%|          | 0/226 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▌         | 12/226 [00:00<00:01, 110.76it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.5081\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█         | 24/226 [00:00<00:01, 110.86it/s]\u001b[A\n",
      " 16%|█▌        | 36/226 [00:00<00:01, 111.39it/s]\u001b[A\n",
      " 21%|██        | 48/226 [00:00<00:01, 111.88it/s]\u001b[A\n",
      " 27%|██▋       | 60/226 [00:00<00:01, 113.27it/s]\u001b[A\n",
      " 32%|███▏      | 72/226 [00:00<00:01, 113.94it/s]\u001b[A\n",
      " 37%|███▋      | 84/226 [00:00<00:01, 115.11it/s]\u001b[A\n",
      " 42%|████▏     | 96/226 [00:00<00:01, 114.48it/s]\u001b[A\n",
      " 48%|████▊     | 108/226 [00:00<00:01, 115.14it/s]\u001b[A\n",
      " 53%|█████▎    | 120/226 [00:01<00:00, 116.30it/s]\u001b[A\n",
      " 58%|█████▊    | 132/226 [00:01<00:00, 117.07it/s]\u001b[A\n",
      " 64%|██████▎   | 144/226 [00:01<00:00, 117.31it/s]\u001b[A\n",
      " 69%|██████▉   | 156/226 [00:01<00:00, 117.10it/s]\u001b[A\n",
      " 74%|███████▍  | 168/226 [00:01<00:00, 116.09it/s]\u001b[A\n",
      " 80%|███████▉  | 180/226 [00:01<00:00, 115.67it/s]\u001b[A\n",
      " 85%|████████▍ | 192/226 [00:01<00:00, 116.18it/s]\u001b[A\n",
      " 90%|█████████ | 204/226 [00:01<00:00, 116.47it/s]\u001b[A\n",
      "100%|██████████| 226/226 [00:01<00:00, 115.31it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 75%|███████▍  | 2003/2681 [00:51<02:22,  4.76it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.25398089171974525\n",
      "f1: 0.7161365704359446\n",
      "Test Loss: 0.003990, Acc: 0.755566\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 93%|█████████▎| 2499/2681 [01:00<00:03, 55.21it/s]\n",
      "  0%|          | 0/226 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▌         | 12/226 [00:00<00:01, 114.33it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.3943\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█         | 24/226 [00:00<00:01, 113.43it/s]\u001b[A\n",
      " 16%|█▌        | 36/226 [00:00<00:01, 114.78it/s]\u001b[A\n",
      " 21%|██        | 48/226 [00:00<00:01, 114.92it/s]\u001b[A\n",
      " 27%|██▋       | 60/226 [00:00<00:01, 115.60it/s]\u001b[A\n",
      " 32%|███▏      | 72/226 [00:00<00:01, 115.75it/s]\u001b[A\n",
      " 37%|███▋      | 84/226 [00:00<00:01, 116.99it/s]\u001b[A\n",
      " 43%|████▎     | 97/226 [00:00<00:01, 118.08it/s]\u001b[A\n",
      " 48%|████▊     | 109/226 [00:00<00:00, 117.54it/s]\u001b[A\n",
      " 54%|█████▎    | 121/226 [00:01<00:00, 117.68it/s]\u001b[A\n",
      " 59%|█████▉    | 134/226 [00:01<00:00, 118.88it/s]\u001b[A\n",
      " 65%|██████▍   | 146/226 [00:01<00:00, 118.64it/s]\u001b[A\n",
      " 70%|███████   | 159/226 [00:01<00:00, 119.29it/s]\u001b[A\n",
      " 76%|███████▌  | 172/226 [00:01<00:00, 119.74it/s]\u001b[A\n",
      " 81%|████████▏ | 184/226 [00:01<00:00, 117.01it/s]\u001b[A\n",
      " 87%|████████▋ | 196/226 [00:01<00:00, 116.61it/s]\u001b[A\n",
      " 92%|█████████▏| 208/226 [00:01<00:00, 116.92it/s]\u001b[A\n",
      "100%|██████████| 226/226 [00:01<00:00, 117.28it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 93%|█████████▎| 2505/2681 [01:04<00:35,  4.93it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2326831210191083\n",
      "f1: 0.7197716616805117\n",
      "Test Loss: 0.004040, Acc: 0.749412\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 2681/2681 [01:07<00:00, 39.51it/s]\n",
      "  5%|▌         | 12/226 [00:00<00:01, 116.10it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 226/226 [00:10<00:00, 20.92it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/2681 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2677149681528662\n",
      "f1: 0.6866506425722277\n",
      "Test Loss: 0.003923, Acc: 0.761166\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 18%|█▊        | 495/2681 [00:08<00:38, 57.43it/s]\n",
      "  0%|          | 0/226 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▌         | 12/226 [00:00<00:01, 110.54it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.4594\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█         | 24/226 [00:00<00:01, 109.65it/s]\u001b[A\n",
      " 16%|█▌        | 36/226 [00:00<00:01, 112.47it/s]\u001b[A\n",
      " 22%|██▏       | 49/226 [00:00<00:01, 115.26it/s]\u001b[A\n",
      " 27%|██▋       | 61/226 [00:00<00:01, 116.64it/s]\u001b[A\n",
      " 33%|███▎      | 74/226 [00:00<00:01, 118.91it/s]\u001b[A\n",
      " 38%|███▊      | 87/226 [00:00<00:01, 121.83it/s]\u001b[A\n",
      " 44%|████▍     | 100/226 [00:00<00:01, 122.59it/s]\u001b[A\n",
      " 50%|█████     | 113/226 [00:00<00:00, 122.35it/s]\u001b[A\n",
      " 56%|█████▌    | 126/226 [00:01<00:00, 123.33it/s]\u001b[A\n",
      " 62%|██████▏   | 139/226 [00:01<00:00, 123.73it/s]\u001b[A\n",
      " 67%|██████▋   | 152/226 [00:01<00:00, 125.40it/s]\u001b[A\n",
      " 73%|███████▎  | 165/226 [00:01<00:00, 126.17it/s]\u001b[A\n",
      " 79%|███████▉  | 178/226 [00:01<00:00, 125.98it/s]\u001b[A\n",
      " 85%|████████▍ | 191/226 [00:01<00:00, 126.17it/s]\u001b[A\n",
      " 90%|█████████ | 204/226 [00:01<00:00, 126.17it/s]\u001b[A\n",
      "100%|██████████| 226/226 [00:01<00:00, 123.09it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 19%|█▊        | 501/2681 [00:12<07:16,  4.99it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.25557324840764334\n",
      "f1: 0.7156012036883957\n",
      "Test Loss: 0.003977, Acc: 0.755531\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 37%|███▋      | 998/2681 [00:20<00:28, 58.07it/s]\n",
      "  0%|          | 0/226 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▌         | 12/226 [00:00<00:01, 114.94it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4554\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█         | 24/226 [00:00<00:01, 115.70it/s]\u001b[A\n",
      " 16%|█▋        | 37/226 [00:00<00:01, 117.47it/s]\u001b[A\n",
      " 22%|██▏       | 49/226 [00:00<00:01, 118.20it/s]\u001b[A\n",
      " 27%|██▋       | 62/226 [00:00<00:01, 118.92it/s]\u001b[A\n",
      " 33%|███▎      | 74/226 [00:00<00:01, 119.22it/s]\u001b[A\n",
      " 38%|███▊      | 86/226 [00:00<00:01, 118.23it/s]\u001b[A\n",
      " 44%|████▍     | 99/226 [00:00<00:01, 119.49it/s]\u001b[A\n",
      " 49%|████▉     | 111/226 [00:00<00:01, 111.53it/s]\u001b[A\n",
      " 55%|█████▍    | 124/226 [00:01<00:00, 115.35it/s]\u001b[A\n",
      " 61%|██████    | 137/226 [00:01<00:00, 117.86it/s]\u001b[A\n",
      " 66%|██████▌   | 149/226 [00:01<00:00, 118.08it/s]\u001b[A\n",
      " 71%|███████   | 161/226 [00:01<00:00, 118.30it/s]\u001b[A\n",
      " 77%|███████▋  | 174/226 [00:01<00:00, 119.12it/s]\u001b[A\n",
      " 83%|████████▎ | 187/226 [00:01<00:00, 119.61it/s]\u001b[A\n",
      " 88%|████████▊ | 199/226 [00:01<00:00, 119.64it/s]\u001b[A\n",
      " 93%|█████████▎| 211/226 [00:01<00:00, 119.74it/s]\u001b[A\n",
      "100%|██████████| 226/226 [00:01<00:00, 118.06it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n",
      "em: 0.2567675159235669\n",
      "f1: 0.703911125587091\n",
      "Test Loss: 0.003952, Acc: 0.756845\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 56%|█████▌    | 1496/2681 [00:33<00:22, 51.84it/s]\n",
      "  0%|          | 0/226 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▌         | 12/226 [00:00<00:01, 119.55it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.4038\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█         | 24/226 [00:00<00:01, 118.97it/s]\u001b[A\n",
      " 16%|█▋        | 37/226 [00:00<00:01, 120.69it/s]\u001b[A\n",
      " 22%|██▏       | 49/226 [00:00<00:01, 120.42it/s]\u001b[A\n",
      " 27%|██▋       | 62/226 [00:00<00:01, 121.94it/s]\u001b[A\n",
      " 32%|███▏      | 73/226 [00:00<00:01, 111.30it/s]\u001b[A\n",
      " 37%|███▋      | 84/226 [00:00<00:01, 108.42it/s]\u001b[A\n",
      " 42%|████▏     | 96/226 [00:00<00:01, 110.39it/s]\u001b[A\n",
      " 48%|████▊     | 108/226 [00:00<00:01, 111.37it/s]\u001b[A\n",
      " 53%|█████▎    | 119/226 [00:01<00:00, 110.68it/s]\u001b[A\n",
      " 58%|█████▊    | 131/226 [00:01<00:00, 112.30it/s]\u001b[A\n",
      " 63%|██████▎   | 143/226 [00:01<00:00, 112.86it/s]\u001b[A\n",
      " 69%|██████▊   | 155/226 [00:01<00:00, 114.83it/s]\u001b[A\n",
      " 74%|███████▍  | 167/226 [00:01<00:00, 115.29it/s]\u001b[A\n",
      " 79%|███████▉  | 179/226 [00:01<00:00, 115.62it/s]\u001b[A\n",
      " 85%|████████▍ | 191/226 [00:01<00:00, 113.91it/s]\u001b[A\n",
      " 90%|████████▉ | 203/226 [00:01<00:00, 114.77it/s]\u001b[A\n",
      "100%|██████████| 226/226 [00:01<00:00, 114.27it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n",
      "em: 0.2635350318471338\n",
      "f1: 0.6748052780994729\n",
      "Test Loss: 0.003934, Acc: 0.759334\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 74%|███████▍  | 1994/2681 [00:47<00:13, 52.44it/s]\n",
      "  0%|          | 0/226 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▍         | 11/226 [00:00<00:02, 106.52it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.5078\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 10%|█         | 23/226 [00:00<00:01, 109.33it/s]\u001b[A\n",
      " 15%|█▌        | 35/226 [00:00<00:01, 111.78it/s]\u001b[A\n",
      " 21%|██        | 48/226 [00:00<00:01, 115.06it/s]\u001b[A\n",
      " 27%|██▋       | 61/226 [00:00<00:01, 117.05it/s]\u001b[A\n",
      " 33%|███▎      | 74/226 [00:00<00:01, 118.70it/s]\u001b[A\n",
      " 38%|███▊      | 87/226 [00:00<00:01, 119.75it/s]\u001b[A\n",
      " 44%|████▍     | 100/226 [00:00<00:01, 120.60it/s]\u001b[A\n",
      " 50%|█████     | 113/226 [00:00<00:00, 121.47it/s]\u001b[A\n",
      " 56%|█████▌    | 126/226 [00:01<00:00, 122.15it/s]\u001b[A\n",
      " 62%|██████▏   | 139/226 [00:01<00:00, 122.55it/s]\u001b[A\n",
      " 67%|██████▋   | 152/226 [00:01<00:00, 122.86it/s]\u001b[A\n",
      " 73%|███████▎  | 165/226 [00:01<00:00, 121.94it/s]\u001b[A\n",
      " 79%|███████▉  | 178/226 [00:01<00:00, 122.81it/s]\u001b[A\n",
      " 85%|████████▍ | 191/226 [00:01<00:00, 122.60it/s]\u001b[A\n",
      " 90%|█████████ | 204/226 [00:01<00:00, 122.54it/s]\u001b[A\n",
      "100%|██████████| 226/226 [00:01<00:00, 120.86it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 75%|███████▍  | 2005/2681 [00:51<01:41,  6.69it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2703025477707006\n",
      "f1: 0.69213969196056\n",
      "Test Loss: 0.003931, Acc: 0.761166\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 93%|█████████▎| 2495/2681 [01:00<00:03, 50.61it/s]\n",
      "  0%|          | 0/226 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▌         | 12/226 [00:00<00:01, 112.58it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.4026\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 10%|█         | 23/226 [00:00<00:01, 110.99it/s]\u001b[A\n",
      " 15%|█▌        | 35/226 [00:00<00:01, 112.14it/s]\u001b[A\n",
      " 21%|██        | 47/226 [00:00<00:01, 113.96it/s]\u001b[A\n",
      " 26%|██▌       | 59/226 [00:00<00:01, 114.17it/s]\u001b[A\n",
      " 31%|███       | 69/226 [00:00<00:01, 98.81it/s] \u001b[A\n",
      " 36%|███▌      | 81/226 [00:00<00:01, 103.59it/s]\u001b[A\n",
      " 41%|████      | 93/226 [00:00<00:01, 106.89it/s]\u001b[A\n",
      " 46%|████▋     | 105/226 [00:00<00:01, 109.79it/s]\u001b[A\n",
      " 52%|█████▏    | 117/226 [00:01<00:00, 111.93it/s]\u001b[A\n",
      " 57%|█████▋    | 129/226 [00:01<00:00, 112.70it/s]\u001b[A\n",
      " 62%|██████▏   | 141/226 [00:01<00:00, 114.44it/s]\u001b[A\n",
      " 68%|██████▊   | 153/226 [00:01<00:00, 114.02it/s]\u001b[A\n",
      " 73%|███████▎  | 165/226 [00:01<00:00, 115.59it/s]\u001b[A\n",
      " 78%|███████▊  | 177/226 [00:01<00:00, 116.26it/s]\u001b[A\n",
      " 84%|████████▎ | 189/226 [00:01<00:00, 115.82it/s]\u001b[A\n",
      " 89%|████████▉ | 201/226 [00:01<00:00, 115.54it/s]\u001b[A\n",
      " 94%|█████████▍| 213/226 [00:01<00:00, 115.10it/s]\u001b[A\n",
      "100%|██████████| 226/226 [00:02<00:00, 112.47it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 93%|█████████▎| 2501/2681 [01:04<00:37,  4.78it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2366640127388535\n",
      "f1: 0.7193086460602497\n",
      "Test Loss: 0.004045, Acc: 0.749654\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 2681/2681 [01:07<00:00, 39.56it/s]\n",
      "  5%|▌         | 12/226 [00:00<00:01, 116.78it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 226/226 [00:01<00:00, 117.96it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/2681 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.26054936305732485\n",
      "f1: 0.6638657244648544\n",
      "Test Loss: 0.003968, Acc: 0.757812\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 19%|█▊        | 497/2681 [00:09<00:42, 51.14it/s]\n",
      "  0%|          | 0/226 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 10/226 [00:00<00:02, 97.72it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.4541\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  9%|▉         | 20/226 [00:00<00:02, 96.96it/s]\u001b[A\n",
      " 14%|█▎        | 31/226 [00:00<00:01, 99.81it/s]\u001b[A\n",
      " 19%|█▉        | 43/226 [00:00<00:01, 103.15it/s]\u001b[A\n",
      " 24%|██▍       | 54/226 [00:00<00:01, 105.05it/s]\u001b[A\n",
      " 29%|██▉       | 65/226 [00:00<00:01, 104.86it/s]\u001b[A\n",
      " 34%|███▎      | 76/226 [00:00<00:01, 106.10it/s]\u001b[A\n",
      " 38%|███▊      | 87/226 [00:00<00:01, 106.95it/s]\u001b[A\n",
      " 43%|████▎     | 98/226 [00:00<00:01, 107.81it/s]\u001b[A\n",
      " 48%|████▊     | 109/226 [00:01<00:01, 107.62it/s]\u001b[A\n",
      " 53%|█████▎    | 120/226 [00:01<00:00, 107.41it/s]\u001b[A\n",
      " 58%|█████▊    | 131/226 [00:01<00:00, 107.14it/s]\u001b[A\n",
      " 63%|██████▎   | 142/226 [00:01<00:00, 106.37it/s]\u001b[A\n",
      " 68%|██████▊   | 153/226 [00:01<00:00, 106.89it/s]\u001b[A\n",
      " 73%|███████▎  | 164/226 [00:01<00:00, 107.31it/s]\u001b[A\n",
      " 77%|███████▋  | 175/226 [00:01<00:00, 107.03it/s]\u001b[A\n",
      " 82%|████████▏ | 186/226 [00:01<00:00, 107.34it/s]\u001b[A\n",
      " 87%|████████▋ | 197/226 [00:01<00:00, 106.66it/s]\u001b[A\n",
      " 92%|█████████▏| 208/226 [00:01<00:00, 106.95it/s]\u001b[A\n",
      "100%|██████████| 226/226 [00:02<00:00, 105.83it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 19%|█▉        | 503/2681 [00:13<07:46,  4.67it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2693073248407643\n",
      "f1: 0.6983639425418972\n",
      "Test Loss: 0.003947, Acc: 0.761892\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 37%|███▋      | 995/2681 [00:22<00:31, 53.26it/s]\n",
      "  0%|          | 0/226 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 8/226 [00:00<00:03, 69.12it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4545\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 18/226 [00:00<00:02, 75.90it/s]\u001b[A\n",
      " 11%|█         | 25/226 [00:00<00:02, 71.59it/s]\u001b[A\n",
      " 16%|█▌        | 36/226 [00:00<00:02, 79.05it/s]\u001b[A\n",
      " 21%|██        | 48/226 [00:00<00:02, 86.87it/s]\u001b[A\n",
      " 27%|██▋       | 60/226 [00:00<00:01, 93.80it/s]\u001b[A\n",
      " 32%|███▏      | 72/226 [00:00<00:01, 99.22it/s]\u001b[A\n",
      " 37%|███▋      | 83/226 [00:00<00:01, 101.94it/s]\u001b[A\n",
      " 42%|████▏     | 95/226 [00:00<00:01, 105.90it/s]\u001b[A\n",
      " 47%|████▋     | 107/226 [00:01<00:01, 109.29it/s]\u001b[A\n",
      " 53%|█████▎    | 119/226 [00:01<00:00, 112.26it/s]\u001b[A\n",
      " 58%|█████▊    | 131/226 [00:01<00:00, 113.25it/s]\u001b[A\n",
      " 63%|██████▎   | 143/226 [00:01<00:00, 114.40it/s]\u001b[A\n",
      " 69%|██████▊   | 155/226 [00:01<00:00, 115.33it/s]\u001b[A\n",
      " 74%|███████▍  | 167/226 [00:01<00:00, 114.12it/s]\u001b[A\n",
      " 79%|███████▉  | 179/226 [00:01<00:00, 114.86it/s]\u001b[A\n",
      " 85%|████████▍ | 191/226 [00:01<00:00, 115.16it/s]\u001b[A\n",
      " 90%|████████▉ | 203/226 [00:01<00:00, 115.72it/s]\u001b[A\n",
      "100%|██████████| 226/226 [00:02<00:00, 108.68it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 37%|███▋      | 1001/2681 [00:26<06:03,  4.62it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2559713375796178\n",
      "f1: 0.7048779135761722\n",
      "Test Loss: 0.003953, Acc: 0.756983\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 56%|█████▌    | 1497/2681 [00:36<00:22, 52.35it/s]\n",
      "  0%|          | 0/226 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▍         | 11/226 [00:00<00:02, 103.77it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.4033\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 10%|▉         | 22/226 [00:00<00:01, 104.81it/s]\u001b[A\n",
      " 15%|█▌        | 34/226 [00:00<00:01, 107.43it/s]\u001b[A\n",
      " 20%|██        | 46/226 [00:00<00:01, 110.00it/s]\u001b[A\n",
      " 26%|██▌       | 58/226 [00:00<00:01, 111.65it/s]\u001b[A\n",
      " 31%|███       | 70/226 [00:00<00:01, 112.82it/s]\u001b[A\n",
      " 36%|███▌      | 81/226 [00:00<00:01, 96.91it/s] \u001b[A\n",
      " 40%|████      | 91/226 [00:00<00:01, 89.84it/s]\u001b[A\n",
      " 45%|████▍     | 101/226 [00:01<00:01, 92.60it/s]\u001b[A\n",
      " 50%|█████     | 113/226 [00:01<00:01, 98.99it/s]\u001b[A\n",
      " 56%|█████▌    | 126/226 [00:01<00:00, 104.75it/s]\u001b[A\n",
      " 61%|██████    | 138/226 [00:01<00:00, 107.99it/s]\u001b[A\n",
      " 67%|██████▋   | 151/226 [00:01<00:00, 111.67it/s]\u001b[A\n",
      " 73%|███████▎  | 164/226 [00:01<00:00, 114.23it/s]\u001b[A\n",
      " 78%|███████▊  | 176/226 [00:01<00:00, 115.82it/s]\u001b[A\n",
      " 84%|████████▎ | 189/226 [00:01<00:00, 117.55it/s]\u001b[A\n",
      " 89%|████████▉ | 201/226 [00:01<00:00, 117.07it/s]\u001b[A\n",
      " 94%|█████████▍| 213/226 [00:01<00:00, 117.61it/s]\u001b[A\n",
      "100%|██████████| 226/226 [00:02<00:00, 109.86it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n",
      "em: 0.26731687898089174\n",
      "f1: 0.6889148537237796\n",
      "Test Loss: 0.003927, Acc: 0.761027\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 75%|███████▍  | 1998/2681 [00:49<00:13, 51.85it/s]\n",
      "  0%|          | 0/226 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▌         | 12/226 [00:00<00:01, 114.45it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.5034\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█         | 24/226 [00:00<00:01, 113.43it/s]\u001b[A\n",
      " 16%|█▌        | 36/226 [00:00<00:01, 114.53it/s]\u001b[A\n",
      " 21%|██        | 48/226 [00:00<00:01, 115.46it/s]\u001b[A\n",
      " 27%|██▋       | 60/226 [00:00<00:01, 115.28it/s]\u001b[A\n",
      " 32%|███▏      | 72/226 [00:00<00:01, 116.04it/s]\u001b[A\n",
      " 37%|███▋      | 84/226 [00:00<00:01, 116.37it/s]\u001b[A\n",
      " 42%|████▏     | 96/226 [00:00<00:01, 116.83it/s]\u001b[A\n",
      " 48%|████▊     | 108/226 [00:00<00:01, 101.36it/s]\u001b[A\n",
      " 53%|█████▎    | 120/226 [00:01<00:01, 105.31it/s]\u001b[A\n",
      " 58%|█████▊    | 131/226 [00:01<00:00, 100.60it/s]\u001b[A\n",
      " 63%|██████▎   | 143/226 [00:01<00:00, 105.50it/s]\u001b[A\n",
      " 68%|██████▊   | 154/226 [00:01<00:00, 101.97it/s]\u001b[A\n",
      " 73%|███████▎  | 165/226 [00:01<00:00, 102.87it/s]\u001b[A\n",
      " 78%|███████▊  | 177/226 [00:01<00:00, 106.96it/s]\u001b[A\n",
      " 84%|████████▍ | 190/226 [00:01<00:00, 111.15it/s]\u001b[A\n",
      " 89%|████████▉ | 202/226 [00:01<00:00, 113.65it/s]\u001b[A\n",
      " 95%|█████████▍| 214/226 [00:01<00:00, 115.30it/s]\u001b[A\n",
      "100%|██████████| 226/226 [00:02<00:00, 110.64it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n",
      "em: 0.2705015923566879\n",
      "f1: 0.6942593588294311\n",
      "Test Loss: 0.003937, Acc: 0.760163\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 93%|█████████▎| 2499/2681 [01:03<00:03, 52.92it/s]\n",
      "  0%|          | 0/226 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▌         | 12/226 [00:00<00:01, 112.03it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.404\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█         | 24/226 [00:00<00:01, 111.82it/s]\u001b[A\n",
      " 16%|█▌        | 36/226 [00:00<00:01, 112.37it/s]\u001b[A\n",
      " 21%|██        | 48/226 [00:00<00:01, 113.75it/s]\u001b[A\n",
      " 27%|██▋       | 60/226 [00:00<00:01, 115.18it/s]\u001b[A\n",
      " 32%|███▏      | 72/226 [00:00<00:01, 115.30it/s]\u001b[A\n",
      " 37%|███▋      | 84/226 [00:00<00:01, 116.44it/s]\u001b[A\n",
      " 42%|████▏     | 96/226 [00:00<00:01, 115.38it/s]\u001b[A\n",
      " 48%|████▊     | 108/226 [00:00<00:01, 115.92it/s]\u001b[A\n",
      " 53%|█████▎    | 120/226 [00:01<00:00, 116.94it/s]\u001b[A\n",
      " 58%|█████▊    | 132/226 [00:01<00:00, 117.37it/s]\u001b[A\n",
      " 64%|██████▎   | 144/226 [00:01<00:00, 116.83it/s]\u001b[A\n",
      " 69%|██████▉   | 156/226 [00:01<00:00, 116.65it/s]\u001b[A\n",
      " 74%|███████▍  | 168/226 [00:01<00:00, 88.13it/s] \u001b[A\n",
      " 80%|████████  | 181/226 [00:01<00:00, 96.28it/s]\u001b[A\n",
      " 86%|████████▌ | 194/226 [00:01<00:00, 102.69it/s]\u001b[A\n",
      " 91%|█████████ | 206/226 [00:01<00:00, 106.63it/s]\u001b[A\n",
      "100%|██████████| 226/226 [00:02<00:00, 110.21it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 93%|█████████▎| 2505/2681 [01:07<00:37,  4.73it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2366640127388535\n",
      "f1: 0.7189015524903365\n",
      "Test Loss: 0.004034, Acc: 0.749412\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 2681/2681 [01:10<00:00, 37.99it/s]\n",
      "  5%|▌         | 12/226 [00:00<00:01, 112.98it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 226/226 [00:01<00:00, 118.87it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 28928 dev_label_list: 28928 example_id_list: 28928\n",
      "num: 28928\n",
      "n: 5024\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/2681 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.26253980891719747\n",
      "f1: 0.6704593133335243\n",
      "Test Loss: 0.003949, Acc: 0.758193\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 13%|█▎        | 357/2681 [00:06<00:43, 53.05it/s]\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-147-042e5cd32d73>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m#sent9verb4\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0mepoch_num\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mtrain_code_confing\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_loader\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdev_loader\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mepoch_num\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m<ipython-input-144-36cf930af26f>\u001b[0m in \u001b[0;36mtrain_code_confing\u001b[0;34m(train_loader, dev_loader, epoch_num)\u001b[0m\n\u001b[1;32m     76\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     77\u001b[0m                 \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mzero_grad\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 78\u001b[0;31m                 \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     79\u001b[0m                 \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     80\u001b[0m                 \u001b[0mepoch\u001b[0m\u001b[0;34m+=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.conda/envs/learn_py3/lib/python3.7/site-packages/torch/tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph)\u001b[0m\n\u001b[1;32m    116\u001b[0m                 \u001b[0mproducts\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mDefaults\u001b[0m \u001b[0mto\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    117\u001b[0m         \"\"\"\n\u001b[0;32m--> 118\u001b[0;31m         \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    119\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    120\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mregister_hook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.conda/envs/learn_py3/lib/python3.7/site-packages/torch/autograd/__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables)\u001b[0m\n\u001b[1;32m     91\u001b[0m     Variable._execution_engine.run_backward(\n\u001b[1;32m     92\u001b[0m         \u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrad_tensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 93\u001b[0;31m         allow_unreachable=True)  # allow_unreachable flag\n\u001b[0m\u001b[1;32m     94\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     95\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "#sent9verb4\n",
    "epoch_num = 10\n",
    "train_code_confing(train_loader,dev_loader,epoch_num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 29%|██▉       | 496/1683 [00:09<00:22, 53.03it/s]\n",
      "  0%|          | 0/147 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 6/147 [00:00<00:02, 54.33it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.5258\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 11/147 [00:00<00:02, 52.35it/s]\u001b[A\n",
      " 12%|█▏        | 17/147 [00:00<00:02, 51.81it/s]\u001b[A\n",
      " 15%|█▍        | 22/147 [00:00<00:02, 50.61it/s]\u001b[A\n",
      " 19%|█▉        | 28/147 [00:00<00:02, 50.63it/s]\u001b[A\n",
      " 23%|██▎       | 34/147 [00:00<00:02, 51.12it/s]\u001b[A\n",
      " 28%|██▊       | 41/147 [00:00<00:01, 53.77it/s]\u001b[A\n",
      " 33%|███▎      | 49/147 [00:00<00:01, 58.88it/s]\u001b[A\n",
      " 39%|███▉      | 58/147 [00:00<00:01, 65.53it/s]\u001b[A\n",
      " 46%|████▌     | 67/147 [00:01<00:01, 70.83it/s]\u001b[A\n",
      " 52%|█████▏    | 77/147 [00:01<00:00, 75.78it/s]\u001b[A\n",
      " 59%|█████▉    | 87/147 [00:01<00:00, 80.57it/s]\u001b[A\n",
      " 66%|██████▌   | 97/147 [00:01<00:00, 84.35it/s]\u001b[A\n",
      " 73%|███████▎  | 107/147 [00:01<00:00, 86.63it/s]\u001b[A\n",
      " 79%|███████▉  | 116/147 [00:01<00:00, 87.24it/s]\u001b[A\n",
      " 86%|████████▌ | 126/147 [00:01<00:00, 88.55it/s]\u001b[A\n",
      " 93%|█████████▎| 136/147 [00:01<00:00, 90.48it/s]\u001b[A\n",
      "100%|██████████| 147/147 [00:01<00:00, 75.00it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 18816 dev_label_list: 18816 example_id_list: 18816\n",
      "num: 18816\n",
      "n: 3382\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 30%|██▉       | 502/1683 [00:13<03:52,  5.09it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.26049674748669427\n",
      "f1: 0.7323714888173843\n",
      "Test Loss: 0.004033, Acc: 0.751488\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 59%|█████▉    | 998/1683 [00:22<00:12, 52.97it/s]\n",
      "  0%|          | 0/147 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▍         | 7/147 [00:00<00:02, 69.70it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.476\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  9%|▉         | 13/147 [00:00<00:02, 65.69it/s]\u001b[A\n",
      " 13%|█▎        | 19/147 [00:00<00:02, 63.62it/s]\u001b[A\n",
      " 18%|█▊        | 26/147 [00:00<00:01, 64.47it/s]\u001b[A\n",
      " 22%|██▏       | 32/147 [00:00<00:01, 62.72it/s]\u001b[A\n",
      " 27%|██▋       | 39/147 [00:00<00:01, 63.49it/s]\u001b[A\n",
      " 32%|███▏      | 47/147 [00:00<00:01, 67.49it/s]\u001b[A\n",
      " 39%|███▉      | 58/147 [00:00<00:01, 75.24it/s]\u001b[A\n",
      " 46%|████▋     | 68/147 [00:00<00:00, 80.60it/s]\u001b[A\n",
      " 53%|█████▎    | 78/147 [00:01<00:00, 83.82it/s]\u001b[A\n",
      " 60%|█████▉    | 88/147 [00:01<00:00, 87.33it/s]\u001b[A\n",
      " 67%|██████▋   | 98/147 [00:01<00:00, 89.70it/s]\u001b[A\n",
      " 73%|███████▎  | 108/147 [00:01<00:00, 92.00it/s]\u001b[A\n",
      " 80%|████████  | 118/147 [00:01<00:00, 92.05it/s]\u001b[A\n",
      " 87%|████████▋ | 128/147 [00:01<00:00, 92.06it/s]\u001b[A\n",
      "100%|██████████| 147/147 [00:01<00:00, 82.98it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 18816 dev_label_list: 18816 example_id_list: 18816\n",
      "num: 18816\n",
      "n: 3382\n",
      "em: 0.2773506800709639\n",
      "f1: 0.7014568255845648\n",
      "Test Loss: 0.003942, Acc: 0.758769\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 89%|████████▉ | 1495/1683 [00:35<00:03, 51.26it/s]\n",
      "  0%|          | 0/147 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▍         | 7/147 [00:00<00:02, 66.76it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.4883\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  9%|▉         | 13/147 [00:00<00:02, 63.81it/s]\u001b[A\n",
      " 13%|█▎        | 19/147 [00:00<00:02, 60.13it/s]\u001b[A\n",
      " 16%|█▋        | 24/147 [00:00<00:02, 54.76it/s]\u001b[A\n",
      " 20%|█▉        | 29/147 [00:00<00:02, 52.42it/s]\u001b[A\n",
      " 24%|██▍       | 35/147 [00:00<00:02, 52.47it/s]\u001b[A\n",
      " 28%|██▊       | 41/147 [00:00<00:02, 52.59it/s]\u001b[A\n",
      " 33%|███▎      | 49/147 [00:00<00:01, 58.16it/s]\u001b[A\n",
      " 40%|████      | 59/147 [00:00<00:01, 65.90it/s]\u001b[A\n",
      " 46%|████▋     | 68/147 [00:01<00:01, 70.98it/s]\u001b[A\n",
      " 52%|█████▏    | 77/147 [00:01<00:00, 74.96it/s]\u001b[A\n",
      " 59%|█████▊    | 86/147 [00:01<00:00, 77.92it/s]\u001b[A\n",
      " 65%|██████▍   | 95/147 [00:01<00:00, 80.91it/s]\u001b[A\n",
      " 71%|███████   | 104/147 [00:01<00:00, 82.31it/s]\u001b[A\n",
      " 77%|███████▋  | 113/147 [00:01<00:00, 83.00it/s]\u001b[A\n",
      " 83%|████████▎ | 122/147 [00:01<00:00, 84.61it/s]\u001b[A\n",
      " 89%|████████▉ | 131/147 [00:01<00:00, 85.69it/s]\u001b[A\n",
      "100%|██████████| 147/147 [00:01<00:00, 73.94it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 18816 dev_label_list: 18816 example_id_list: 18816\n",
      "num: 18816\n",
      "n: 3382\n",
      "em: 0.2814902424600828\n",
      "f1: 0.7193766250833121\n",
      "Test Loss: 0.003959, Acc: 0.760736\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 1683/1683 [00:43<00:00, 39.03it/s]\n",
      "  5%|▌         | 8/147 [00:00<00:01, 75.81it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 147/147 [00:01<00:00, 89.89it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 18816 dev_label_list: 18816 example_id_list: 18816\n",
      "num: 18816\n",
      "n: 3382\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/1683 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.28503843879361324\n",
      "f1: 0.7172521659955176\n",
      "Test Loss: 0.003937, Acc: 0.759938\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 29%|██▉       | 496/1683 [00:09<00:23, 50.46it/s]\n",
      "  0%|          | 0/147 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▌         | 8/147 [00:00<00:01, 78.30it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.5535\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 10%|█         | 15/147 [00:00<00:01, 75.26it/s]\u001b[A\n",
      " 15%|█▍        | 22/147 [00:00<00:01, 72.95it/s]\u001b[A\n",
      " 20%|██        | 30/147 [00:00<00:01, 74.04it/s]\u001b[A\n",
      " 26%|██▌       | 38/147 [00:00<00:01, 74.05it/s]\u001b[A\n",
      " 31%|███▏      | 46/147 [00:00<00:01, 75.15it/s]\u001b[A\n",
      " 38%|███▊      | 56/147 [00:00<00:01, 81.04it/s]\u001b[A\n",
      " 45%|████▍     | 66/147 [00:00<00:00, 85.21it/s]\u001b[A\n",
      " 52%|█████▏    | 76/147 [00:00<00:00, 86.93it/s]\u001b[A\n",
      " 59%|█████▊    | 86/147 [00:01<00:00, 90.01it/s]\u001b[A\n",
      " 65%|██████▌   | 96/147 [00:01<00:00, 92.10it/s]\u001b[A\n",
      " 72%|███████▏  | 106/147 [00:01<00:00, 91.91it/s]\u001b[A\n",
      " 79%|███████▉  | 116/147 [00:01<00:00, 93.85it/s]\u001b[A\n",
      " 86%|████████▌ | 126/147 [00:01<00:00, 95.07it/s]\u001b[A\n",
      " 93%|█████████▎| 136/147 [00:01<00:00, 96.31it/s]\u001b[A\n",
      "100%|██████████| 147/147 [00:01<00:00, 87.69it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 18816 dev_label_list: 18816 example_id_list: 18816\n",
      "num: 18816\n",
      "n: 3382\n",
      "em: 0.2826729745712596\n",
      "f1: 0.7063930894652402\n",
      "Test Loss: 0.003938, Acc: 0.759673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 59%|█████▉    | 994/1683 [00:22<00:13, 49.67it/s]\n",
      "  0%|          | 0/147 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▌         | 9/147 [00:00<00:01, 89.57it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 16.84\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 10%|▉         | 14/147 [00:00<00:01, 70.84it/s]\u001b[A\n",
      " 14%|█▎        | 20/147 [00:00<00:01, 66.65it/s]\u001b[A\n",
      " 18%|█▊        | 26/147 [00:00<00:01, 64.32it/s]\u001b[A\n",
      " 22%|██▏       | 33/147 [00:00<00:01, 64.04it/s]\u001b[A\n",
      " 27%|██▋       | 40/147 [00:00<00:01, 64.53it/s]\u001b[A\n",
      " 33%|███▎      | 49/147 [00:00<00:01, 69.59it/s]\u001b[A\n",
      " 40%|████      | 59/147 [00:00<00:01, 76.57it/s]\u001b[A\n",
      " 47%|████▋     | 69/147 [00:00<00:00, 82.23it/s]\u001b[A\n",
      " 54%|█████▎    | 79/147 [00:01<00:00, 86.13it/s]\u001b[A\n",
      " 61%|██████    | 90/147 [00:01<00:00, 90.04it/s]\u001b[A\n",
      " 68%|██████▊   | 100/147 [00:01<00:00, 92.53it/s]\u001b[A\n",
      " 75%|███████▍  | 110/147 [00:01<00:00, 94.46it/s]\u001b[A\n",
      " 82%|████████▏ | 120/147 [00:01<00:00, 94.34it/s]\u001b[A\n",
      " 88%|████████▊ | 130/147 [00:01<00:00, 95.75it/s]\u001b[A\n",
      "100%|██████████| 147/147 [00:01<00:00, 84.54it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 18816 dev_label_list: 18816 example_id_list: 18816\n",
      "num: 18816\n",
      "n: 3382\n",
      "em: 0.06091070372560615\n",
      "f1: 0.6230046770037929\n",
      "Test Loss: 0.123284, Acc: 0.428890\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 89%|████████▉ | 1496/1683 [00:35<00:03, 52.90it/s]\n",
      "  0%|          | 0/147 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▌         | 9/147 [00:00<00:01, 80.09it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 17.92\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 12%|█▏        | 17/147 [00:00<00:01, 77.06it/s]\u001b[A\n",
      " 16%|█▋        | 24/147 [00:00<00:01, 73.69it/s]\u001b[A\n",
      " 22%|██▏       | 32/147 [00:00<00:01, 74.65it/s]\u001b[A\n",
      " 26%|██▌       | 38/147 [00:00<00:01, 63.37it/s]\u001b[A\n",
      " 32%|███▏      | 47/147 [00:00<00:01, 68.15it/s]\u001b[A\n",
      " 39%|███▉      | 58/147 [00:00<00:01, 76.62it/s]\u001b[A\n",
      " 47%|████▋     | 69/147 [00:00<00:00, 83.10it/s]\u001b[A\n",
      " 54%|█████▍    | 80/147 [00:00<00:00, 89.11it/s]\u001b[A\n",
      " 62%|██████▏   | 91/147 [00:01<00:00, 93.20it/s]\u001b[A\n",
      " 70%|███████   | 103/147 [00:01<00:00, 97.82it/s]\u001b[A\n",
      " 78%|███████▊  | 114/147 [00:01<00:00, 100.00it/s]\u001b[A\n",
      " 85%|████████▌ | 125/147 [00:01<00:00, 92.61it/s] \u001b[A\n",
      " 93%|█████████▎| 136/147 [00:01<00:00, 95.57it/s]\u001b[A\n",
      "100%|██████████| 147/147 [00:01<00:00, 89.14it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 18816 dev_label_list: 18816 example_id_list: 18816\n",
      "num: 18816\n",
      "n: 3382\n",
      "em: 0.06091070372560615\n",
      "f1: 0.6230046770037929\n",
      "Test Loss: 0.123284, Acc: 0.428890\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 1683/1683 [00:41<00:00, 40.19it/s]\n",
      "  7%|▋         | 10/147 [00:00<00:01, 96.83it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 147/147 [00:01<00:00, 98.40it/s] \n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 18816 dev_label_list: 18816 example_id_list: 18816\n",
      "num: 18816\n",
      "n: 3382\n",
      "em: 0.06091070372560615\n",
      "f1: 0.6230046770037929\n",
      "Test Loss: 0.123284, Acc: 0.428890\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 29%|██▉       | 495/1683 [00:09<00:21, 55.96it/s]\n",
      "  0%|          | 0/147 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▍         | 7/147 [00:00<00:02, 61.49it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 15.11\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  9%|▉         | 13/147 [00:00<00:02, 60.18it/s]\u001b[A\n",
      " 12%|█▏        | 18/147 [00:00<00:02, 55.40it/s]\u001b[A\n",
      " 16%|█▋        | 24/147 [00:00<00:02, 56.18it/s]\u001b[A\n",
      " 20%|█▉        | 29/147 [00:00<00:02, 43.50it/s]\u001b[A\n",
      " 24%|██▍       | 35/147 [00:00<00:02, 47.12it/s]\u001b[A\n",
      " 29%|██▊       | 42/147 [00:00<00:02, 50.27it/s]\u001b[A\n",
      " 35%|███▍      | 51/147 [00:00<00:01, 57.06it/s]\u001b[A\n",
      " 41%|████▏     | 61/147 [00:01<00:01, 64.69it/s]\u001b[A\n",
      " 49%|████▉     | 72/147 [00:01<00:01, 72.75it/s]\u001b[A\n",
      " 56%|█████▌    | 82/147 [00:01<00:00, 79.07it/s]\u001b[A\n",
      " 63%|██████▎   | 93/147 [00:01<00:00, 85.46it/s]\u001b[A\n",
      " 71%|███████   | 104/147 [00:01<00:00, 89.73it/s]\u001b[A\n",
      " 78%|███████▊  | 115/147 [00:01<00:00, 93.46it/s]\u001b[A\n",
      " 85%|████████▌ | 125/147 [00:01<00:00, 94.19it/s]\u001b[A\n",
      " 93%|█████████▎| 136/147 [00:01<00:00, 97.15it/s]\u001b[A\n",
      "100%|██████████| 147/147 [00:01<00:00, 78.07it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 18816 dev_label_list: 18816 example_id_list: 18816\n",
      "num: 18816\n",
      "n: 3382\n",
      "em: 0.06091070372560615\n",
      "f1: 0.6230046770037929\n",
      "Test Loss: 0.123284, Acc: 0.428890\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 59%|█████▉    | 999/1683 [00:21<00:12, 56.68it/s]\n",
      "  0%|          | 0/147 [00:00<?, ?it/s]\u001b[A\n",
      "  7%|▋         | 10/147 [00:00<00:01, 92.98it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 16.84\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 12%|█▏        | 18/147 [00:00<00:01, 88.00it/s]\u001b[A\n",
      " 18%|█▊        | 26/147 [00:00<00:01, 83.03it/s]\u001b[A\n",
      " 23%|██▎       | 34/147 [00:00<00:01, 81.78it/s]\u001b[A\n",
      " 29%|██▉       | 43/147 [00:00<00:01, 83.44it/s]\u001b[A\n",
      " 37%|███▋      | 55/147 [00:00<00:01, 90.25it/s]\u001b[A\n",
      " 46%|████▌     | 67/147 [00:00<00:00, 96.00it/s]\u001b[A\n",
      " 53%|█████▎    | 78/147 [00:00<00:00, 98.91it/s]\u001b[A\n",
      " 61%|██████    | 89/147 [00:00<00:00, 99.90it/s]\u001b[A\n",
      " 68%|██████▊   | 100/147 [00:01<00:00, 102.40it/s]\u001b[A\n",
      " 76%|███████▌  | 111/147 [00:01<00:00, 103.28it/s]\u001b[A\n",
      " 83%|████████▎ | 122/147 [00:01<00:00, 104.07it/s]\u001b[A\n",
      " 90%|█████████ | 133/147 [00:01<00:00, 105.05it/s]\u001b[A\n",
      "100%|██████████| 147/147 [00:01<00:00, 98.20it/s] \u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 18816 dev_label_list: 18816 example_id_list: 18816\n",
      "num: 18816\n",
      "n: 3382\n",
      "em: 0.06091070372560615\n",
      "f1: 0.6230046770037929\n",
      "Test Loss: 0.123284, Acc: 0.428890\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 81%|████████  | 1363/1683 [00:30<00:07, 44.03it/s]\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-135-1fae45a67e3f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m#sent9verb3\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0mepoch_num\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mtrain_code_confing\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_loader\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdev_loader\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mepoch_num\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m<ipython-input-130-36cf930af26f>\u001b[0m in \u001b[0;36mtrain_code_confing\u001b[0;34m(train_loader, dev_loader, epoch_num)\u001b[0m\n\u001b[1;32m     76\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     77\u001b[0m                 \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mzero_grad\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 78\u001b[0;31m                 \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     79\u001b[0m                 \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     80\u001b[0m                 \u001b[0mepoch\u001b[0m\u001b[0;34m+=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.conda/envs/learn_py3/lib/python3.7/site-packages/torch/tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph)\u001b[0m\n\u001b[1;32m    116\u001b[0m                 \u001b[0mproducts\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mDefaults\u001b[0m \u001b[0mto\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    117\u001b[0m         \"\"\"\n\u001b[0;32m--> 118\u001b[0;31m         \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    119\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    120\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mregister_hook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.conda/envs/learn_py3/lib/python3.7/site-packages/torch/autograd/__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables)\u001b[0m\n\u001b[1;32m     91\u001b[0m     Variable._execution_engine.run_backward(\n\u001b[1;32m     92\u001b[0m         \u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrad_tensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 93\u001b[0;31m         allow_unreachable=True)  # allow_unreachable flag\n\u001b[0m\u001b[1;32m     94\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     95\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "#sent9verb3\n",
    "epoch_num = 10\n",
    "train_code_confing(train_loader,dev_loader,epoch_num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 10%|█         | 494/4749 [00:08<01:09, 61.02it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 15/403 [00:00<00:02, 145.03it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.5226\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 29/403 [00:00<00:02, 142.83it/s]\u001b[A\n",
      " 11%|█         | 44/403 [00:00<00:02, 144.23it/s]\u001b[A\n",
      " 15%|█▍        | 59/403 [00:00<00:02, 144.64it/s]\u001b[A\n",
      " 18%|█▊        | 72/403 [00:00<00:02, 138.39it/s]\u001b[A\n",
      " 21%|██        | 84/403 [00:00<00:02, 126.33it/s]\u001b[A\n",
      " 24%|██▍       | 97/403 [00:00<00:02, 127.37it/s]\u001b[A\n",
      " 27%|██▋       | 110/403 [00:00<00:02, 126.04it/s]\u001b[A\n",
      " 31%|███       | 124/403 [00:00<00:02, 128.79it/s]\u001b[A\n",
      " 34%|███▍      | 138/403 [00:01<00:02, 131.25it/s]\u001b[A\n",
      " 38%|███▊      | 153/403 [00:01<00:01, 134.24it/s]\u001b[A\n",
      " 41%|████▏     | 167/403 [00:01<00:01, 133.28it/s]\u001b[A\n",
      " 45%|████▌     | 182/403 [00:01<00:01, 136.42it/s]\u001b[A\n",
      " 49%|████▊     | 196/403 [00:01<00:01, 131.00it/s]\u001b[A\n",
      " 52%|█████▏    | 210/403 [00:01<00:01, 131.84it/s]\u001b[A\n",
      " 56%|█████▌    | 224/403 [00:01<00:01, 133.39it/s]\u001b[A\n",
      " 59%|█████▉    | 238/403 [00:01<00:01, 133.90it/s]\u001b[A\n",
      " 63%|██████▎   | 252/403 [00:01<00:01, 134.13it/s]\u001b[A\n",
      " 66%|██████▋   | 267/403 [00:01<00:00, 136.30it/s]\u001b[A\n",
      " 70%|██████▉   | 281/403 [00:02<00:01, 119.33it/s]\u001b[A\n",
      " 73%|███████▎  | 296/403 [00:02<00:00, 126.09it/s]\u001b[A\n",
      " 77%|███████▋  | 309/403 [00:02<00:00, 116.79it/s]\u001b[A\n",
      " 80%|███████▉  | 322/403 [00:02<00:00, 117.40it/s]\u001b[A\n",
      " 83%|████████▎ | 335/403 [00:02<00:00, 120.75it/s]\u001b[A\n",
      " 87%|████████▋ | 350/403 [00:02<00:00, 125.53it/s]\u001b[A\n",
      " 90%|█████████ | 363/403 [00:02<00:00, 126.55it/s]\u001b[A\n",
      " 94%|█████████▎| 377/403 [00:02<00:00, 127.66it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:03<00:00, 129.84it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 11%|█         | 501/4749 [00:13<15:42,  4.51it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.24695056708752408\n",
      "f1: 0.6466290646367746\n",
      "Test Loss: 0.008434, Acc: 0.746627\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 21%|██        | 993/4749 [00:21<00:59, 63.14it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 15/403 [00:00<00:02, 147.19it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.5646\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 30/403 [00:00<00:02, 146.72it/s]\u001b[A\n",
      " 11%|█▏        | 46/403 [00:00<00:02, 148.10it/s]\u001b[A\n",
      " 15%|█▌        | 61/403 [00:00<00:02, 148.64it/s]\u001b[A\n",
      " 19%|█▉        | 77/403 [00:00<00:02, 149.50it/s]\u001b[A\n",
      " 23%|██▎       | 93/403 [00:00<00:02, 150.09it/s]\u001b[A\n",
      " 27%|██▋       | 109/403 [00:00<00:01, 150.98it/s]\u001b[A\n",
      " 31%|███       | 124/403 [00:00<00:01, 148.89it/s]\u001b[A\n",
      " 35%|███▍      | 140/403 [00:00<00:01, 150.33it/s]\u001b[A\n",
      " 38%|███▊      | 155/403 [00:01<00:01, 149.84it/s]\u001b[A\n",
      " 42%|████▏     | 171/403 [00:01<00:01, 151.26it/s]\u001b[A\n",
      " 46%|████▋     | 187/403 [00:01<00:01, 151.73it/s]\u001b[A\n",
      " 50%|█████     | 202/403 [00:01<00:01, 133.49it/s]\u001b[A\n",
      " 54%|█████▍    | 218/403 [00:01<00:01, 139.36it/s]\u001b[A\n",
      " 58%|█████▊    | 234/403 [00:01<00:01, 144.78it/s]\u001b[A\n",
      " 62%|██████▏   | 250/403 [00:01<00:01, 148.27it/s]\u001b[A\n",
      " 66%|██████▌   | 266/403 [00:01<00:00, 150.02it/s]\u001b[A\n",
      " 70%|██████▉   | 282/403 [00:01<00:00, 152.31it/s]\u001b[A\n",
      " 74%|███████▍  | 299/403 [00:02<00:00, 154.62it/s]\u001b[A\n",
      " 78%|███████▊  | 315/403 [00:02<00:00, 155.23it/s]\u001b[A\n",
      " 82%|████████▏ | 332/403 [00:02<00:00, 156.92it/s]\u001b[A\n",
      " 86%|████████▋ | 348/403 [00:02<00:00, 157.16it/s]\u001b[A\n",
      " 91%|█████████ | 365/403 [00:02<00:00, 159.06it/s]\u001b[A\n",
      " 95%|█████████▍| 381/403 [00:02<00:00, 158.40it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 151.53it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 21%|██        | 1005/4749 [00:25<09:06,  6.85it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.21848919323774876\n",
      "f1: 0.5988162323920962\n",
      "Test Loss: 0.008328, Acc: 0.735848\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 31%|███▏      | 1495/4749 [00:33<00:52, 61.79it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  3%|▎         | 14/403 [00:00<00:02, 135.47it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.5304\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 29/403 [00:00<00:02, 137.77it/s]\u001b[A\n",
      " 11%|█         | 44/403 [00:00<00:02, 139.14it/s]\u001b[A\n",
      " 15%|█▍        | 59/403 [00:00<00:02, 141.21it/s]\u001b[A\n",
      " 18%|█▊        | 74/403 [00:00<00:02, 142.30it/s]\u001b[A\n",
      " 22%|██▏       | 90/403 [00:00<00:02, 144.83it/s]\u001b[A\n",
      " 26%|██▌       | 105/403 [00:00<00:02, 143.79it/s]\u001b[A\n",
      " 30%|███       | 121/403 [00:00<00:01, 146.68it/s]\u001b[A\n",
      " 34%|███▎      | 136/403 [00:00<00:01, 147.11it/s]\u001b[A\n",
      " 38%|███▊      | 152/403 [00:01<00:01, 148.54it/s]\u001b[A\n",
      " 42%|████▏     | 168/403 [00:01<00:01, 149.42it/s]\u001b[A\n",
      " 45%|████▌     | 183/403 [00:01<00:01, 149.31it/s]\u001b[A\n",
      " 49%|████▉     | 198/403 [00:01<00:01, 148.52it/s]\u001b[A\n",
      " 53%|█████▎    | 213/403 [00:01<00:01, 147.67it/s]\u001b[A\n",
      " 57%|█████▋    | 228/403 [00:01<00:01, 148.16it/s]\u001b[A\n",
      " 61%|██████    | 244/403 [00:01<00:01, 148.54it/s]\u001b[A\n",
      " 65%|██████▍   | 260/403 [00:01<00:00, 149.32it/s]\u001b[A\n",
      " 68%|██████▊   | 276/403 [00:01<00:00, 149.32it/s]\u001b[A\n",
      " 72%|███████▏  | 292/403 [00:01<00:00, 149.99it/s]\u001b[A\n",
      " 76%|███████▌  | 307/403 [00:02<00:00, 148.20it/s]\u001b[A\n",
      " 80%|████████  | 323/403 [00:02<00:00, 149.41it/s]\u001b[A\n",
      " 84%|████████▍ | 339/403 [00:02<00:00, 150.08it/s]\u001b[A\n",
      " 88%|████████▊ | 355/403 [00:02<00:00, 151.63it/s]\u001b[A\n",
      " 92%|█████████▏| 371/403 [00:02<00:00, 150.29it/s]\u001b[A\n",
      " 96%|█████████▌| 387/403 [00:02<00:00, 151.74it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 146.01it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n",
      "em: 0.2501604964690777\n",
      "f1: 0.6537693742166298\n",
      "Test Loss: 0.008132, Acc: 0.748682\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 42%|████▏     | 1994/4749 [00:46<00:49, 56.16it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  2%|▏         | 9/403 [00:00<00:04, 86.17it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.5465\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  5%|▌         | 21/403 [00:00<00:04, 93.05it/s]\u001b[A\n",
      "  8%|▊         | 33/403 [00:00<00:03, 99.62it/s]\u001b[A\n",
      " 11%|█         | 45/403 [00:00<00:03, 104.47it/s]\u001b[A\n",
      " 14%|█▍        | 58/403 [00:00<00:03, 109.47it/s]\u001b[A\n",
      " 18%|█▊        | 71/403 [00:00<00:02, 113.55it/s]\u001b[A\n",
      " 21%|██▏       | 86/403 [00:00<00:02, 120.79it/s]\u001b[A\n",
      " 25%|██▌       | 101/403 [00:00<00:02, 127.70it/s]\u001b[A\n",
      " 29%|██▉       | 117/403 [00:00<00:02, 134.58it/s]\u001b[A\n",
      " 33%|███▎      | 132/403 [00:01<00:01, 138.25it/s]\u001b[A\n",
      " 36%|███▋      | 147/403 [00:01<00:01, 141.38it/s]\u001b[A\n",
      " 40%|████      | 162/403 [00:01<00:01, 142.88it/s]\u001b[A\n",
      " 44%|████▍     | 178/403 [00:01<00:01, 145.60it/s]\u001b[A\n",
      " 48%|████▊     | 193/403 [00:01<00:01, 146.77it/s]\u001b[A\n",
      " 52%|█████▏    | 209/403 [00:01<00:01, 148.46it/s]\u001b[A\n",
      " 56%|█████▌    | 224/403 [00:01<00:01, 148.17it/s]\u001b[A\n",
      " 60%|█████▉    | 240/403 [00:01<00:01, 149.91it/s]\u001b[A\n",
      " 64%|██████▎   | 256/403 [00:01<00:00, 150.87it/s]\u001b[A\n",
      " 67%|██████▋   | 272/403 [00:01<00:00, 152.27it/s]\u001b[A\n",
      " 71%|███████▏  | 288/403 [00:02<00:00, 152.18it/s]\u001b[A\n",
      " 75%|███████▌  | 304/403 [00:02<00:00, 152.58it/s]\u001b[A\n",
      " 79%|███████▉  | 320/403 [00:02<00:00, 153.61it/s]\u001b[A\n",
      " 83%|████████▎ | 336/403 [00:02<00:00, 154.55it/s]\u001b[A\n",
      " 87%|████████▋ | 352/403 [00:02<00:00, 152.59it/s]\u001b[A\n",
      " 91%|█████████▏| 368/403 [00:02<00:00, 153.65it/s]\u001b[A\n",
      " 95%|█████████▌| 384/403 [00:02<00:00, 154.08it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 143.21it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 42%|████▏     | 2000/4749 [00:50<11:09,  4.11it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2582923175690135\n",
      "f1: 0.7267070138887058\n",
      "Test Loss: 0.008316, Acc: 0.750543\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 53%|█████▎    | 2494/4749 [00:59<00:36, 60.98it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 16/403 [00:00<00:02, 154.66it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.5219\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 32/403 [00:00<00:02, 155.03it/s]\u001b[A\n",
      " 12%|█▏        | 48/403 [00:00<00:02, 155.02it/s]\u001b[A\n",
      " 16%|█▌        | 64/403 [00:00<00:02, 154.74it/s]\u001b[A\n",
      " 20%|█▉        | 79/403 [00:00<00:02, 152.93it/s]\u001b[A\n",
      " 24%|██▎       | 95/403 [00:00<00:02, 153.82it/s]\u001b[A\n",
      " 28%|██▊       | 111/403 [00:00<00:01, 153.71it/s]\u001b[A\n",
      " 32%|███▏      | 127/403 [00:00<00:01, 155.06it/s]\u001b[A\n",
      " 35%|███▌      | 142/403 [00:00<00:01, 153.35it/s]\u001b[A\n",
      " 39%|███▉      | 158/403 [00:01<00:01, 154.07it/s]\u001b[A\n",
      " 43%|████▎     | 174/403 [00:01<00:01, 154.10it/s]\u001b[A\n",
      " 47%|████▋     | 190/403 [00:01<00:01, 155.55it/s]\u001b[A\n",
      " 51%|█████     | 206/403 [00:01<00:01, 144.05it/s]\u001b[A\n",
      " 55%|█████▌    | 222/403 [00:01<00:01, 147.27it/s]\u001b[A\n",
      " 59%|█████▉    | 238/403 [00:01<00:01, 150.17it/s]\u001b[A\n",
      " 63%|██████▎   | 254/403 [00:01<00:00, 151.18it/s]\u001b[A\n",
      " 67%|██████▋   | 270/403 [00:01<00:00, 152.20it/s]\u001b[A\n",
      " 71%|███████   | 286/403 [00:01<00:00, 152.16it/s]\u001b[A\n",
      " 75%|███████▍  | 302/403 [00:01<00:00, 152.81it/s]\u001b[A\n",
      " 79%|███████▉  | 318/403 [00:02<00:00, 154.28it/s]\u001b[A\n",
      " 83%|████████▎ | 334/403 [00:02<00:00, 154.32it/s]\u001b[A\n",
      " 87%|████████▋ | 350/403 [00:02<00:00, 155.40it/s]\u001b[A\n",
      " 91%|█████████ | 366/403 [00:02<00:00, 155.36it/s]\u001b[A\n",
      " 95%|█████████▍| 382/403 [00:02<00:00, 156.12it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 153.41it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 53%|█████▎    | 2501/4749 [01:03<07:30,  4.99it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.26043227049004924\n",
      "f1: 0.721120439823639\n",
      "Test Loss: 0.008079, Acc: 0.752792\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 63%|██████▎   | 2999/4749 [01:11<00:31, 55.57it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 16/403 [00:00<00:02, 152.09it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 3000, loss: 0.4793\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 31/403 [00:00<00:02, 142.34it/s]\u001b[A\n",
      " 10%|▉         | 40/403 [00:00<00:03, 118.94it/s]\u001b[A\n",
      " 14%|█▍        | 56/403 [00:00<00:02, 127.42it/s]\u001b[A\n",
      " 18%|█▊        | 72/403 [00:00<00:02, 135.68it/s]\u001b[A\n",
      " 22%|██▏       | 88/403 [00:00<00:02, 141.97it/s]\u001b[A\n",
      " 26%|██▌       | 105/403 [00:00<00:02, 147.40it/s]\u001b[A\n",
      " 30%|███       | 121/403 [00:00<00:01, 149.36it/s]\u001b[A\n",
      " 34%|███▍      | 137/403 [00:00<00:01, 152.34it/s]\u001b[A\n",
      " 38%|███▊      | 154/403 [00:01<00:01, 155.32it/s]\u001b[A\n",
      " 42%|████▏     | 171/403 [00:01<00:01, 157.59it/s]\u001b[A\n",
      " 46%|████▋     | 187/403 [00:01<00:01, 157.32it/s]\u001b[A\n",
      " 51%|█████     | 204/403 [00:01<00:01, 158.74it/s]\u001b[A\n",
      " 55%|█████▍    | 221/403 [00:01<00:01, 160.16it/s]\u001b[A\n",
      " 59%|█████▉    | 238/403 [00:01<00:01, 160.89it/s]\u001b[A\n",
      " 63%|██████▎   | 255/403 [00:01<00:00, 158.40it/s]\u001b[A\n",
      " 67%|██████▋   | 271/403 [00:01<00:00, 158.41it/s]\u001b[A\n",
      " 71%|███████▏  | 288/403 [00:01<00:00, 159.76it/s]\u001b[A\n",
      " 76%|███████▌  | 305/403 [00:01<00:00, 160.26it/s]\u001b[A\n",
      " 80%|███████▉  | 322/403 [00:02<00:00, 159.46it/s]\u001b[A\n",
      " 84%|████████▍ | 339/403 [00:02<00:00, 160.65it/s]\u001b[A\n",
      " 88%|████████▊ | 356/403 [00:02<00:00, 161.46it/s]\u001b[A\n",
      " 93%|█████████▎| 373/403 [00:02<00:00, 162.14it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 155.13it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n",
      "em: 0.2687780868820886\n",
      "f1: 0.7145259726364039\n",
      "Test Loss: 0.008277, Acc: 0.755661\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 74%|███████▎  | 3493/4749 [01:24<00:20, 60.25it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 17/403 [00:00<00:02, 161.55it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 3500, loss: 0.3841\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 34/403 [00:00<00:02, 161.41it/s]\u001b[A\n",
      " 12%|█▏        | 50/403 [00:00<00:02, 158.85it/s]\u001b[A\n",
      " 17%|█▋        | 67/403 [00:00<00:02, 160.15it/s]\u001b[A\n",
      " 21%|██        | 84/403 [00:00<00:01, 161.47it/s]\u001b[A\n",
      " 25%|██▌       | 101/403 [00:00<00:01, 162.00it/s]\u001b[A\n",
      " 29%|██▉       | 117/403 [00:00<00:01, 160.84it/s]\u001b[A\n",
      " 33%|███▎      | 134/403 [00:00<00:01, 161.49it/s]\u001b[A\n",
      " 37%|███▋      | 150/403 [00:00<00:01, 139.92it/s]\u001b[A\n",
      " 41%|████▏     | 167/403 [00:01<00:01, 146.00it/s]\u001b[A\n",
      " 46%|████▌     | 184/403 [00:01<00:01, 151.63it/s]\u001b[A\n",
      " 50%|████▉     | 200/403 [00:01<00:01, 153.85it/s]\u001b[A\n",
      " 54%|█████▍    | 217/403 [00:01<00:01, 156.07it/s]\u001b[A\n",
      " 58%|█████▊    | 234/403 [00:01<00:01, 158.22it/s]\u001b[A\n",
      " 62%|██████▏   | 251/403 [00:01<00:00, 159.87it/s]\u001b[A\n",
      " 67%|██████▋   | 268/403 [00:01<00:00, 161.05it/s]\u001b[A\n",
      " 71%|███████   | 285/403 [00:01<00:00, 162.27it/s]\u001b[A\n",
      " 75%|███████▍  | 302/403 [00:01<00:00, 163.77it/s]\u001b[A\n",
      " 79%|███████▉  | 319/403 [00:02<00:00, 162.20it/s]\u001b[A\n",
      " 83%|████████▎ | 336/403 [00:02<00:00, 163.25it/s]\u001b[A\n",
      " 88%|████████▊ | 353/403 [00:02<00:00, 162.84it/s]\u001b[A\n",
      " 92%|█████████▏| 370/403 [00:02<00:00, 162.65it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 158.89it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 74%|███████▎  | 3500/4749 [01:28<04:01,  5.18it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.26599614808474215\n",
      "f1: 0.67668708113819\n",
      "Test Loss: 0.008006, Acc: 0.754265\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 84%|████████▍ | 3993/4749 [01:36<00:11, 64.26it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 17/403 [00:00<00:02, 164.74it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 4000, loss: 0.5917\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 34/403 [00:00<00:02, 163.60it/s]\u001b[A\n",
      " 13%|█▎        | 51/403 [00:00<00:02, 164.44it/s]\u001b[A\n",
      " 15%|█▌        | 62/403 [00:00<00:02, 136.70it/s]\u001b[A\n",
      " 20%|█▉        | 80/403 [00:00<00:02, 145.77it/s]\u001b[A\n",
      " 24%|██▍       | 98/403 [00:00<00:01, 153.01it/s]\u001b[A\n",
      " 29%|██▊       | 115/403 [00:00<00:01, 157.13it/s]\u001b[A\n",
      " 33%|███▎      | 132/403 [00:00<00:01, 159.11it/s]\u001b[A\n",
      " 37%|███▋      | 149/403 [00:00<00:01, 161.57it/s]\u001b[A\n",
      " 41%|████      | 166/403 [00:01<00:01, 162.95it/s]\u001b[A\n",
      " 45%|████▌     | 183/403 [00:01<00:01, 164.11it/s]\u001b[A\n",
      " 50%|████▉     | 201/403 [00:01<00:01, 166.40it/s]\u001b[A\n",
      " 54%|█████▍    | 218/403 [00:01<00:01, 167.35it/s]\u001b[A\n",
      " 59%|█████▊    | 236/403 [00:01<00:00, 168.47it/s]\u001b[A\n",
      " 63%|██████▎   | 253/403 [00:01<00:00, 164.83it/s]\u001b[A\n",
      " 67%|██████▋   | 270/403 [00:01<00:00, 163.87it/s]\u001b[A\n",
      " 71%|███████   | 287/403 [00:01<00:00, 164.48it/s]\u001b[A\n",
      " 75%|███████▌  | 304/403 [00:01<00:00, 151.51it/s]\u001b[A\n",
      " 80%|███████▉  | 321/403 [00:02<00:00, 154.92it/s]\u001b[A\n",
      " 84%|████████▎ | 337/403 [00:02<00:00, 155.28it/s]\u001b[A\n",
      " 88%|████████▊ | 354/403 [00:02<00:00, 157.78it/s]\u001b[A\n",
      " 92%|█████████▏| 371/403 [00:02<00:00, 160.77it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 160.79it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 84%|████████▍ | 4000/4749 [01:40<02:23,  5.22it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.27134603038733146\n",
      "f1: 0.6925329230058609\n",
      "Test Loss: 0.007941, Acc: 0.756746\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 95%|█████████▍| 4493/4749 [01:48<00:04, 63.38it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 17/403 [00:00<00:02, 161.64it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 4500, loss: 0.5306\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 34/403 [00:00<00:02, 161.54it/s]\u001b[A\n",
      " 13%|█▎        | 51/403 [00:00<00:02, 162.11it/s]\u001b[A\n",
      " 17%|█▋        | 68/403 [00:00<00:02, 162.71it/s]\u001b[A\n",
      " 21%|██        | 85/403 [00:00<00:01, 163.68it/s]\u001b[A\n",
      " 25%|██▌       | 102/403 [00:00<00:01, 164.35it/s]\u001b[A\n",
      " 30%|██▉       | 119/403 [00:00<00:01, 164.04it/s]\u001b[A\n",
      " 34%|███▎      | 136/403 [00:00<00:01, 164.10it/s]\u001b[A\n",
      " 38%|███▊      | 153/403 [00:00<00:01, 163.22it/s]\u001b[A\n",
      " 42%|████▏     | 170/403 [00:01<00:01, 165.15it/s]\u001b[A\n",
      " 46%|████▋     | 187/403 [00:01<00:01, 164.05it/s]\u001b[A\n",
      " 51%|█████     | 204/403 [00:01<00:01, 164.21it/s]\u001b[A\n",
      " 55%|█████▍    | 221/403 [00:01<00:01, 163.38it/s]\u001b[A\n",
      " 59%|█████▉    | 238/403 [00:01<00:01, 163.08it/s]\u001b[A\n",
      " 63%|██████▎   | 255/403 [00:01<00:00, 162.90it/s]\u001b[A\n",
      " 67%|██████▋   | 272/403 [00:01<00:00, 163.76it/s]\u001b[A\n",
      " 72%|███████▏  | 289/403 [00:01<00:00, 163.17it/s]\u001b[A\n",
      " 76%|███████▌  | 306/403 [00:01<00:00, 163.47it/s]\u001b[A\n",
      " 80%|████████  | 323/403 [00:01<00:00, 163.70it/s]\u001b[A\n",
      " 84%|████████▍ | 340/403 [00:02<00:00, 163.51it/s]\u001b[A\n",
      " 89%|████████▊ | 357/403 [00:02<00:00, 163.32it/s]\u001b[A\n",
      " 93%|█████████▎| 374/403 [00:02<00:00, 163.67it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 163.71it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 95%|█████████▍| 4505/4749 [01:52<00:33,  7.19it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2377487695270704\n",
      "f1: 0.6356594281909959\n",
      "Test Loss: 0.008426, Acc: 0.744417\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 4749/4749 [01:55<00:00, 40.96it/s]\n",
      "  4%|▍         | 17/403 [00:00<00:02, 162.39it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 403/403 [00:02<00:00, 163.68it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/4749 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2490905200085598\n",
      "f1: 0.6562357548768899\n",
      "Test Loss: 0.008066, Acc: 0.750775\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 10%|█         | 495/4749 [00:07<01:08, 62.02it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 16/403 [00:00<00:02, 150.67it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.443\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 31/403 [00:00<00:02, 149.95it/s]\u001b[A\n",
      " 12%|█▏        | 47/403 [00:00<00:02, 150.38it/s]\u001b[A\n",
      " 15%|█▌        | 62/403 [00:00<00:02, 150.22it/s]\u001b[A\n",
      " 19%|█▉        | 77/403 [00:00<00:02, 150.14it/s]\u001b[A\n",
      " 23%|██▎       | 93/403 [00:00<00:02, 150.75it/s]\u001b[A\n",
      " 27%|██▋       | 108/403 [00:00<00:01, 150.52it/s]\u001b[A\n",
      " 31%|███       | 124/403 [00:00<00:01, 151.56it/s]\u001b[A\n",
      " 35%|███▍      | 140/403 [00:00<00:01, 152.63it/s]\u001b[A\n",
      " 39%|███▊      | 156/403 [00:01<00:01, 153.83it/s]\u001b[A\n",
      " 43%|████▎     | 172/403 [00:01<00:01, 154.98it/s]\u001b[A\n",
      " 47%|████▋     | 188/403 [00:01<00:01, 154.42it/s]\u001b[A\n",
      " 51%|█████     | 204/403 [00:01<00:01, 155.07it/s]\u001b[A\n",
      " 55%|█████▍    | 220/403 [00:01<00:01, 155.91it/s]\u001b[A\n",
      " 59%|█████▊    | 236/403 [00:01<00:01, 156.12it/s]\u001b[A\n",
      " 63%|██████▎   | 252/403 [00:01<00:00, 155.95it/s]\u001b[A\n",
      " 67%|██████▋   | 268/403 [00:01<00:00, 139.64it/s]\u001b[A\n",
      " 70%|███████   | 283/403 [00:01<00:00, 140.40it/s]\u001b[A\n",
      " 74%|███████▍  | 300/403 [00:01<00:00, 145.93it/s]\u001b[A\n",
      " 78%|███████▊  | 316/403 [00:02<00:00, 149.41it/s]\u001b[A\n",
      " 83%|████████▎ | 333/403 [00:02<00:00, 152.56it/s]\u001b[A\n",
      " 87%|████████▋ | 350/403 [00:02<00:00, 154.91it/s]\u001b[A\n",
      " 91%|█████████ | 366/403 [00:02<00:00, 156.39it/s]\u001b[A\n",
      " 95%|█████████▍| 382/403 [00:02<00:00, 156.12it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 152.35it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 11%|█         | 502/4749 [00:12<14:02,  5.04it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.27113203509522793\n",
      "f1: 0.7022491584550321\n",
      "Test Loss: 0.008113, Acc: 0.754808\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 21%|██        | 999/4749 [00:20<01:06, 56.58it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 16/403 [00:00<00:02, 150.46it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.5255\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 31/403 [00:00<00:02, 150.20it/s]\u001b[A\n",
      " 12%|█▏        | 47/403 [00:00<00:02, 151.36it/s]\u001b[A\n",
      " 15%|█▌        | 62/403 [00:00<00:02, 150.95it/s]\u001b[A\n",
      " 19%|█▉        | 78/403 [00:00<00:02, 151.90it/s]\u001b[A\n",
      " 23%|██▎       | 94/403 [00:00<00:02, 153.86it/s]\u001b[A\n",
      " 28%|██▊       | 111/403 [00:00<00:01, 156.41it/s]\u001b[A\n",
      " 32%|███▏      | 127/403 [00:00<00:01, 156.87it/s]\u001b[A\n",
      " 35%|███▌      | 142/403 [00:00<00:01, 147.47it/s]\u001b[A\n",
      " 39%|███▉      | 157/403 [00:01<00:01, 141.28it/s]\u001b[A\n",
      " 43%|████▎     | 172/403 [00:01<00:01, 142.98it/s]\u001b[A\n",
      " 47%|████▋     | 188/403 [00:01<00:01, 147.20it/s]\u001b[A\n",
      " 51%|█████     | 204/403 [00:01<00:01, 148.63it/s]\u001b[A\n",
      " 55%|█████▍    | 220/403 [00:01<00:01, 151.17it/s]\u001b[A\n",
      " 59%|█████▊    | 236/403 [00:01<00:01, 151.02it/s]\u001b[A\n",
      " 63%|██████▎   | 252/403 [00:01<00:00, 153.00it/s]\u001b[A\n",
      " 67%|██████▋   | 268/403 [00:01<00:00, 151.98it/s]\u001b[A\n",
      " 70%|███████   | 284/403 [00:01<00:00, 153.58it/s]\u001b[A\n",
      " 74%|███████▍  | 300/403 [00:01<00:00, 153.15it/s]\u001b[A\n",
      " 78%|███████▊  | 316/403 [00:02<00:00, 153.68it/s]\u001b[A\n",
      " 82%|████████▏ | 332/403 [00:02<00:00, 153.81it/s]\u001b[A\n",
      " 86%|████████▋ | 348/403 [00:02<00:00, 153.73it/s]\u001b[A\n",
      " 90%|█████████ | 364/403 [00:02<00:00, 153.02it/s]\u001b[A\n",
      " 94%|█████████▍| 380/403 [00:02<00:00, 153.34it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 151.62it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 21%|██        | 999/4749 [00:31<01:06, 56.58it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 21%|██        | 1005/4749 [00:33<2:47:50,  2.69s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2672801198373636\n",
      "f1: 0.7271335840135424\n",
      "Test Loss: 0.008169, Acc: 0.752714\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 32%|███▏      | 1498/4749 [00:41<00:58, 55.69it/s]  \n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  3%|▎         | 14/403 [00:00<00:02, 138.43it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.483\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 28/403 [00:00<00:02, 138.67it/s]\u001b[A\n",
      " 10%|█         | 42/403 [00:00<00:02, 136.46it/s]\u001b[A\n",
      " 14%|█▍        | 56/403 [00:00<00:02, 137.00it/s]\u001b[A\n",
      " 18%|█▊        | 71/403 [00:00<00:02, 139.31it/s]\u001b[A\n",
      " 22%|██▏       | 89/403 [00:00<00:02, 148.23it/s]\u001b[A\n",
      " 26%|██▋       | 106/403 [00:00<00:01, 153.37it/s]\u001b[A\n",
      " 31%|███       | 123/403 [00:00<00:01, 157.84it/s]\u001b[A\n",
      " 35%|███▍      | 141/403 [00:00<00:01, 161.58it/s]\u001b[A\n",
      " 39%|███▉      | 158/403 [00:01<00:01, 163.83it/s]\u001b[A\n",
      " 43%|████▎     | 175/403 [00:01<00:01, 165.57it/s]\u001b[A\n",
      " 48%|████▊     | 192/403 [00:01<00:01, 158.23it/s]\u001b[A\n",
      " 52%|█████▏    | 210/403 [00:01<00:01, 162.97it/s]\u001b[A\n",
      " 57%|█████▋    | 228/403 [00:01<00:01, 165.75it/s]\u001b[A\n",
      " 61%|██████    | 246/403 [00:01<00:00, 167.55it/s]\u001b[A\n",
      " 66%|██████▌   | 264/403 [00:01<00:00, 170.11it/s]\u001b[A\n",
      " 70%|██████▉   | 282/403 [00:01<00:00, 169.47it/s]\u001b[A\n",
      " 74%|███████▍  | 300/403 [00:01<00:00, 171.49it/s]\u001b[A\n",
      " 79%|███████▉  | 318/403 [00:01<00:00, 172.01it/s]\u001b[A\n",
      " 83%|████████▎ | 336/403 [00:02<00:00, 172.49it/s]\u001b[A\n",
      " 88%|████████▊ | 354/403 [00:02<00:00, 173.47it/s]\u001b[A\n",
      " 92%|█████████▏| 372/403 [00:02<00:00, 173.16it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 163.78it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 32%|███▏      | 1504/4749 [00:45<12:00,  4.51it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2732719880162636\n",
      "f1: 0.7221876371245182\n",
      "Test Loss: 0.007954, Acc: 0.755195\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 42%|████▏     | 1993/4749 [00:53<00:46, 59.52it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 16/403 [00:00<00:02, 158.76it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.5247\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 33/403 [00:00<00:02, 159.07it/s]\u001b[A\n",
      " 12%|█▏        | 50/403 [00:00<00:02, 160.57it/s]\u001b[A\n",
      " 17%|█▋        | 67/403 [00:00<00:02, 159.50it/s]\u001b[A\n",
      " 21%|██        | 83/403 [00:00<00:02, 156.79it/s]\u001b[A\n",
      " 25%|██▍       | 99/403 [00:00<00:01, 157.63it/s]\u001b[A\n",
      " 29%|██▊       | 115/403 [00:00<00:01, 157.68it/s]\u001b[A\n",
      " 33%|███▎      | 131/403 [00:00<00:01, 156.36it/s]\u001b[A\n",
      " 36%|███▋      | 147/403 [00:00<00:01, 156.39it/s]\u001b[A\n",
      " 40%|████      | 163/403 [00:01<00:01, 156.21it/s]\u001b[A\n",
      " 44%|████▍     | 179/403 [00:01<00:01, 157.32it/s]\u001b[A\n",
      " 48%|████▊     | 195/403 [00:01<00:01, 155.75it/s]\u001b[A\n",
      " 52%|█████▏    | 211/403 [00:01<00:01, 156.87it/s]\u001b[A\n",
      " 57%|█████▋    | 228/403 [00:01<00:01, 158.41it/s]\u001b[A\n",
      " 61%|██████    | 244/403 [00:01<00:01, 157.71it/s]\u001b[A\n",
      " 65%|██████▍   | 260/403 [00:01<00:01, 122.42it/s]\u001b[A\n",
      " 68%|██████▊   | 274/403 [00:01<00:01, 125.04it/s]\u001b[A\n",
      " 71%|███████▏  | 288/403 [00:01<00:00, 126.19it/s]\u001b[A\n",
      " 75%|███████▌  | 304/403 [00:02<00:00, 132.58it/s]\u001b[A\n",
      " 80%|███████▉  | 321/403 [00:02<00:00, 140.90it/s]\u001b[A\n",
      " 84%|████████▎ | 337/403 [00:02<00:00, 145.46it/s]\u001b[A\n",
      " 88%|████████▊ | 353/403 [00:02<00:00, 149.43it/s]\u001b[A\n",
      " 92%|█████████▏| 369/403 [00:02<00:00, 151.55it/s]\u001b[A\n",
      " 96%|█████████▌| 385/403 [00:02<00:00, 153.41it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 149.73it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 42%|████▏     | 2000/4749 [00:57<09:20,  4.90it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2730579927241601\n",
      "f1: 0.6949629584339706\n",
      "Test Loss: 0.007985, Acc: 0.756514\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 53%|█████▎    | 2497/4749 [01:05<00:38, 58.79it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  3%|▎         | 14/403 [00:00<00:02, 134.51it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.5149\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 28/403 [00:00<00:02, 135.47it/s]\u001b[A\n",
      " 11%|█         | 43/403 [00:00<00:02, 137.68it/s]\u001b[A\n",
      " 14%|█▍        | 58/403 [00:00<00:02, 138.83it/s]\u001b[A\n",
      " 18%|█▊        | 73/403 [00:00<00:02, 139.91it/s]\u001b[A\n",
      " 22%|██▏       | 87/403 [00:00<00:02, 138.38it/s]\u001b[A\n",
      " 25%|██▌       | 101/403 [00:00<00:02, 138.05it/s]\u001b[A\n",
      " 29%|██▉       | 116/403 [00:00<00:02, 139.49it/s]\u001b[A\n",
      " 32%|███▏      | 130/403 [00:00<00:01, 138.06it/s]\u001b[A\n",
      " 36%|███▌      | 145/403 [00:01<00:01, 140.18it/s]\u001b[A\n",
      " 39%|███▉      | 159/403 [00:01<00:01, 138.72it/s]\u001b[A\n",
      " 43%|████▎     | 174/403 [00:01<00:01, 140.81it/s]\u001b[A\n",
      " 47%|████▋     | 188/403 [00:01<00:01, 139.46it/s]\u001b[A\n",
      " 50%|█████     | 203/403 [00:01<00:01, 142.08it/s]\u001b[A\n",
      " 54%|█████▍    | 218/403 [00:01<00:01, 143.14it/s]\u001b[A\n",
      " 58%|█████▊    | 233/403 [00:01<00:01, 142.00it/s]\u001b[A\n",
      " 62%|██████▏   | 248/403 [00:01<00:01, 143.13it/s]\u001b[A\n",
      " 65%|██████▌   | 263/403 [00:01<00:00, 144.50it/s]\u001b[A\n",
      " 69%|██████▉   | 278/403 [00:01<00:00, 143.23it/s]\u001b[A\n",
      " 73%|███████▎  | 293/403 [00:02<00:00, 141.10it/s]\u001b[A\n",
      " 76%|███████▋  | 308/403 [00:02<00:00, 143.08it/s]\u001b[A\n",
      " 80%|████████  | 323/403 [00:02<00:00, 143.75it/s]\u001b[A\n",
      " 84%|████████▍ | 338/403 [00:02<00:00, 144.14it/s]\u001b[A\n",
      " 88%|████████▊ | 353/403 [00:02<00:00, 143.89it/s]\u001b[A\n",
      " 91%|█████████▏| 368/403 [00:02<00:00, 144.92it/s]\u001b[A\n",
      " 95%|█████████▌| 383/403 [00:02<00:00, 127.92it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 139.95it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n",
      "em: 0.27220201155574575\n",
      "f1: 0.7191132380946308\n",
      "Test Loss: 0.007946, Acc: 0.755777\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 63%|██████▎   | 2994/4749 [01:18<00:31, 55.94it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  3%|▎         | 14/403 [00:00<00:02, 134.68it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 3000, loss: 0.4772\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  6%|▋         | 26/403 [00:00<00:02, 129.18it/s]\u001b[A\n",
      " 11%|█         | 43/403 [00:00<00:02, 138.46it/s]\u001b[A\n",
      " 15%|█▍        | 60/403 [00:00<00:02, 146.23it/s]\u001b[A\n",
      " 19%|█▉        | 77/403 [00:00<00:02, 152.08it/s]\u001b[A\n",
      " 23%|██▎       | 94/403 [00:00<00:01, 156.36it/s]\u001b[A\n",
      " 27%|██▋       | 110/403 [00:00<00:01, 156.15it/s]\u001b[A\n",
      " 32%|███▏      | 127/403 [00:00<00:01, 157.54it/s]\u001b[A\n",
      " 35%|███▌      | 143/403 [00:00<00:01, 157.61it/s]\u001b[A\n",
      " 40%|███▉      | 160/403 [00:01<00:01, 159.22it/s]\u001b[A\n",
      " 44%|████▎     | 176/403 [00:01<00:01, 159.06it/s]\u001b[A\n",
      " 48%|████▊     | 193/403 [00:01<00:01, 160.09it/s]\u001b[A\n",
      " 52%|█████▏    | 209/403 [00:01<00:01, 157.52it/s]\u001b[A\n",
      " 56%|█████▌    | 225/403 [00:01<00:01, 157.56it/s]\u001b[A\n",
      " 60%|██████    | 242/403 [00:01<00:01, 158.97it/s]\u001b[A\n",
      " 64%|██████▍   | 259/403 [00:01<00:00, 161.45it/s]\u001b[A\n",
      " 68%|██████▊   | 276/403 [00:01<00:00, 160.70it/s]\u001b[A\n",
      " 73%|███████▎  | 293/403 [00:01<00:00, 159.69it/s]\u001b[A\n",
      " 77%|███████▋  | 309/403 [00:01<00:00, 158.90it/s]\u001b[A\n",
      " 81%|████████  | 326/403 [00:02<00:00, 161.40it/s]\u001b[A\n",
      " 85%|████████▌ | 343/403 [00:02<00:00, 161.04it/s]\u001b[A\n",
      " 89%|████████▉ | 360/403 [00:02<00:00, 162.44it/s]\u001b[A\n",
      " 94%|█████████▎| 377/403 [00:02<00:00, 162.17it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 159.01it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 63%|██████▎   | 3000/4749 [01:23<06:39,  4.38it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2719880162636422\n",
      "f1: 0.7081671473068953\n",
      "Test Loss: 0.007979, Acc: 0.756514\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 74%|███████▎  | 3494/4749 [01:31<00:20, 61.67it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 15/403 [00:00<00:02, 147.39it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 3500, loss: 0.3716\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 30/403 [00:00<00:02, 147.73it/s]\u001b[A\n",
      " 11%|█▏        | 46/403 [00:00<00:02, 149.37it/s]\u001b[A\n",
      " 15%|█▌        | 62/403 [00:00<00:02, 149.95it/s]\u001b[A\n",
      " 19%|█▉        | 78/403 [00:00<00:02, 150.19it/s]\u001b[A\n",
      " 23%|██▎       | 94/403 [00:00<00:02, 151.15it/s]\u001b[A\n",
      " 27%|██▋       | 109/403 [00:00<00:01, 150.16it/s]\u001b[A\n",
      " 31%|███       | 125/403 [00:00<00:01, 151.22it/s]\u001b[A\n",
      " 35%|███▍      | 140/403 [00:00<00:01, 146.79it/s]\u001b[A\n",
      " 39%|███▊      | 156/403 [00:01<00:01, 148.35it/s]\u001b[A\n",
      " 43%|████▎     | 172/403 [00:01<00:01, 150.29it/s]\u001b[A\n",
      " 47%|████▋     | 188/403 [00:01<00:01, 151.50it/s]\u001b[A\n",
      " 51%|█████     | 204/403 [00:01<00:01, 153.24it/s]\u001b[A\n",
      " 55%|█████▍    | 220/403 [00:01<00:01, 151.27it/s]\u001b[A\n",
      " 59%|█████▊    | 236/403 [00:01<00:01, 152.25it/s]\u001b[A\n",
      " 63%|██████▎   | 252/403 [00:01<00:00, 154.02it/s]\u001b[A\n",
      " 67%|██████▋   | 268/403 [00:01<00:00, 154.46it/s]\u001b[A\n",
      " 70%|███████   | 284/403 [00:01<00:00, 155.37it/s]\u001b[A\n",
      " 74%|███████▍  | 300/403 [00:02<00:00, 142.88it/s]\u001b[A\n",
      " 79%|███████▊  | 317/403 [00:02<00:00, 149.18it/s]\u001b[A\n",
      " 83%|████████▎ | 333/403 [00:02<00:00, 152.09it/s]\u001b[A\n",
      " 87%|████████▋ | 349/403 [00:02<00:00, 152.51it/s]\u001b[A\n",
      " 91%|█████████ | 365/403 [00:02<00:00, 154.38it/s]\u001b[A\n",
      " 95%|█████████▍| 381/403 [00:02<00:00, 154.14it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 151.80it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 74%|███████▎  | 3501/4749 [01:35<04:12,  4.95it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.27562593622940296\n",
      "f1: 0.6968800845111651\n",
      "Test Loss: 0.007962, Acc: 0.757871\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 84%|████████▍ | 3993/4749 [01:42<00:12, 62.55it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 16/403 [00:00<00:02, 157.65it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 4000, loss: 0.5771\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 32/403 [00:00<00:02, 157.30it/s]\u001b[A\n",
      " 12%|█▏        | 48/403 [00:00<00:02, 157.44it/s]\u001b[A\n",
      " 15%|█▍        | 59/403 [00:00<00:02, 123.56it/s]\u001b[A\n",
      " 19%|█▉        | 76/403 [00:00<00:02, 132.68it/s]\u001b[A\n",
      " 23%|██▎       | 92/403 [00:00<00:02, 138.71it/s]\u001b[A\n",
      " 27%|██▋       | 109/403 [00:00<00:02, 144.71it/s]\u001b[A\n",
      " 31%|███▏      | 126/403 [00:00<00:01, 149.28it/s]\u001b[A\n",
      " 35%|███▌      | 143/403 [00:00<00:01, 153.11it/s]\u001b[A\n",
      " 40%|███▉      | 160/403 [00:01<00:01, 155.31it/s]\u001b[A\n",
      " 44%|████▍     | 177/403 [00:01<00:01, 157.49it/s]\u001b[A\n",
      " 48%|████▊     | 194/403 [00:01<00:01, 159.27it/s]\u001b[A\n",
      " 52%|█████▏    | 211/403 [00:01<00:01, 160.33it/s]\u001b[A\n",
      " 56%|█████▋    | 227/403 [00:01<00:01, 141.57it/s]\u001b[A\n",
      " 61%|██████    | 245/403 [00:01<00:01, 149.57it/s]\u001b[A\n",
      " 65%|██████▌   | 262/403 [00:01<00:00, 155.12it/s]\u001b[A\n",
      " 69%|██████▉   | 279/403 [00:01<00:00, 157.88it/s]\u001b[A\n",
      " 73%|███████▎  | 296/403 [00:01<00:00, 160.16it/s]\u001b[A\n",
      " 78%|███████▊  | 313/403 [00:02<00:00, 161.66it/s]\u001b[A\n",
      " 82%|████████▏ | 330/403 [00:02<00:00, 163.69it/s]\u001b[A\n",
      " 86%|████████▌ | 347/403 [00:02<00:00, 162.86it/s]\u001b[A\n",
      " 90%|█████████ | 364/403 [00:02<00:00, 164.00it/s]\u001b[A\n",
      " 95%|█████████▍| 381/403 [00:02<00:00, 165.36it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 156.21it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 84%|████████▍ | 4000/4749 [01:47<02:25,  5.14it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.27091803980312434\n",
      "f1: 0.6974300184443653\n",
      "Test Loss: 0.007928, Acc: 0.756281\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 95%|█████████▍| 4494/4749 [01:55<00:04, 59.51it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 15/403 [00:00<00:02, 149.54it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 4500, loss: 0.5213\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 30/403 [00:00<00:02, 149.39it/s]\u001b[A\n",
      " 11%|█         | 45/403 [00:00<00:02, 148.76it/s]\u001b[A\n",
      " 15%|█▍        | 60/403 [00:00<00:02, 148.86it/s]\u001b[A\n",
      " 19%|█▊        | 75/403 [00:00<00:02, 148.87it/s]\u001b[A\n",
      " 23%|██▎       | 91/403 [00:00<00:02, 149.35it/s]\u001b[A\n",
      " 26%|██▋       | 106/403 [00:00<00:02, 148.39it/s]\u001b[A\n",
      " 30%|███       | 122/403 [00:00<00:01, 149.77it/s]\u001b[A\n",
      " 34%|███▍      | 138/403 [00:00<00:01, 151.63it/s]\u001b[A\n",
      " 38%|███▊      | 154/403 [00:01<00:01, 151.79it/s]\u001b[A\n",
      " 42%|████▏     | 170/403 [00:01<00:01, 151.77it/s]\u001b[A\n",
      " 46%|████▌     | 186/403 [00:01<00:01, 151.58it/s]\u001b[A\n",
      " 50%|█████     | 202/403 [00:01<00:01, 152.14it/s]\u001b[A\n",
      " 54%|█████▍    | 218/403 [00:01<00:01, 152.36it/s]\u001b[A\n",
      " 58%|█████▊    | 234/403 [00:01<00:01, 150.90it/s]\u001b[A\n",
      " 62%|██████▏   | 250/403 [00:01<00:01, 150.12it/s]\u001b[A\n",
      " 66%|██████▌   | 266/403 [00:01<00:00, 150.71it/s]\u001b[A\n",
      " 70%|██████▉   | 282/403 [00:01<00:00, 149.96it/s]\u001b[A\n",
      " 74%|███████▎  | 297/403 [00:01<00:00, 149.84it/s]\u001b[A\n",
      " 78%|███████▊  | 313/403 [00:02<00:00, 150.98it/s]\u001b[A\n",
      " 82%|████████▏ | 329/403 [00:02<00:00, 150.42it/s]\u001b[A\n",
      " 86%|████████▌ | 345/403 [00:02<00:00, 151.63it/s]\u001b[A\n",
      " 90%|████████▉ | 361/403 [00:02<00:00, 150.53it/s]\u001b[A\n",
      " 94%|█████████▎| 377/403 [00:02<00:00, 151.37it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 150.85it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 95%|█████████▍| 4500/4749 [01:59<00:57,  4.30it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.27541194093729937\n",
      "f1: 0.7169701200195625\n",
      "Test Loss: 0.007973, Acc: 0.755622\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 4749/4749 [02:03<00:00, 38.31it/s]\n",
      "  4%|▎         | 15/403 [00:00<00:02, 145.98it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 403/403 [00:02<00:00, 158.69it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/4749 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.27070404451102076\n",
      "f1: 0.7145289216335755\n",
      "Test Loss: 0.007927, Acc: 0.756359\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 10%|█         | 493/4749 [00:07<01:07, 63.07it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 15/403 [00:00<00:02, 149.34it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.4477\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 31/403 [00:00<00:02, 151.64it/s]\u001b[A\n",
      " 12%|█▏        | 47/403 [00:00<00:02, 153.63it/s]\u001b[A\n",
      " 16%|█▌        | 63/403 [00:00<00:02, 153.17it/s]\u001b[A\n",
      " 20%|█▉        | 79/403 [00:00<00:02, 153.28it/s]\u001b[A\n",
      " 24%|██▎       | 95/403 [00:00<00:01, 154.16it/s]\u001b[A\n",
      " 27%|██▋       | 110/403 [00:00<00:01, 151.24it/s]\u001b[A\n",
      " 31%|███▏      | 126/403 [00:00<00:01, 152.92it/s]\u001b[A\n",
      " 35%|███▌      | 143/403 [00:00<00:01, 155.11it/s]\u001b[A\n",
      " 39%|███▉      | 159/403 [00:01<00:01, 156.22it/s]\u001b[A\n",
      " 44%|████▎     | 176/403 [00:01<00:01, 157.77it/s]\u001b[A\n",
      " 48%|████▊     | 192/403 [00:01<00:01, 157.27it/s]\u001b[A\n",
      " 52%|█████▏    | 208/403 [00:01<00:01, 156.38it/s]\u001b[A\n",
      " 56%|█████▌    | 224/403 [00:01<00:01, 156.96it/s]\u001b[A\n",
      " 60%|█████▉    | 240/403 [00:01<00:01, 157.58it/s]\u001b[A\n",
      " 64%|██████▎   | 256/403 [00:01<00:00, 157.97it/s]\u001b[A\n",
      " 67%|██████▋   | 272/403 [00:01<00:00, 156.53it/s]\u001b[A\n",
      " 71%|███████▏  | 288/403 [00:01<00:00, 157.26it/s]\u001b[A\n",
      " 75%|███████▌  | 304/403 [00:01<00:00, 142.31it/s]\u001b[A\n",
      " 79%|███████▉  | 320/403 [00:02<00:00, 146.17it/s]\u001b[A\n",
      " 83%|████████▎ | 336/403 [00:02<00:00, 149.33it/s]\u001b[A\n",
      " 88%|████████▊ | 353/403 [00:02<00:00, 154.00it/s]\u001b[A\n",
      " 92%|█████████▏| 370/403 [00:02<00:00, 156.58it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 152.32it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 11%|█         | 500/4749 [00:11<14:01,  5.05it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2751979456451958\n",
      "f1: 0.7103862445185039\n",
      "Test Loss: 0.007978, Acc: 0.756048\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 21%|██        | 994/4749 [00:20<01:01, 61.41it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 16/403 [00:00<00:02, 151.61it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.5135\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 32/403 [00:00<00:02, 152.67it/s]\u001b[A\n",
      " 12%|█▏        | 48/403 [00:00<00:02, 153.52it/s]\u001b[A\n",
      " 16%|█▌        | 64/403 [00:00<00:02, 155.01it/s]\u001b[A\n",
      " 20%|█▉        | 80/403 [00:00<00:02, 154.49it/s]\u001b[A\n",
      " 24%|██▎       | 95/403 [00:00<00:02, 152.75it/s]\u001b[A\n",
      " 28%|██▊       | 112/403 [00:00<00:01, 155.33it/s]\u001b[A\n",
      " 32%|███▏      | 128/403 [00:00<00:01, 156.03it/s]\u001b[A\n",
      " 36%|███▌      | 144/403 [00:00<00:01, 156.90it/s]\u001b[A\n",
      " 40%|███▉      | 160/403 [00:01<00:01, 157.02it/s]\u001b[A\n",
      " 44%|████▎     | 176/403 [00:01<00:01, 156.59it/s]\u001b[A\n",
      " 48%|████▊     | 192/403 [00:01<00:01, 155.64it/s]\u001b[A\n",
      " 52%|█████▏    | 209/403 [00:01<00:01, 157.39it/s]\u001b[A\n",
      " 56%|█████▌    | 226/403 [00:01<00:01, 158.82it/s]\u001b[A\n",
      " 60%|██████    | 242/403 [00:01<00:01, 149.05it/s]\u001b[A\n",
      " 64%|██████▍   | 259/403 [00:01<00:00, 152.35it/s]\u001b[A\n",
      " 68%|██████▊   | 276/403 [00:01<00:00, 154.90it/s]\u001b[A\n",
      " 72%|███████▏  | 292/403 [00:01<00:00, 155.64it/s]\u001b[A\n",
      " 77%|███████▋  | 309/403 [00:01<00:00, 157.02it/s]\u001b[A\n",
      " 81%|████████  | 326/403 [00:02<00:00, 158.60it/s]\u001b[A\n",
      " 85%|████████▍ | 342/403 [00:02<00:00, 158.95it/s]\u001b[A\n",
      " 89%|████████▉ | 359/403 [00:02<00:00, 160.44it/s]\u001b[A\n",
      " 93%|█████████▎| 376/403 [00:02<00:00, 160.43it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 156.67it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 21%|██        | 1001/4749 [00:24<12:19,  5.07it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.25551037877166705\n",
      "f1: 0.72728446150232\n",
      "Test Loss: 0.008174, Acc: 0.746045\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 31%|███▏      | 1495/4749 [00:32<00:52, 62.12it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 15/403 [00:00<00:02, 146.29it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.4804\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 31/403 [00:00<00:02, 148.59it/s]\u001b[A\n",
      " 12%|█▏        | 47/403 [00:00<00:02, 150.66it/s]\u001b[A\n",
      " 16%|█▌        | 63/403 [00:00<00:02, 151.41it/s]\u001b[A\n",
      " 20%|█▉        | 79/403 [00:00<00:02, 153.06it/s]\u001b[A\n",
      " 24%|██▎       | 95/403 [00:00<00:01, 154.55it/s]\u001b[A\n",
      " 28%|██▊       | 111/403 [00:00<00:01, 154.93it/s]\u001b[A\n",
      " 32%|███▏      | 127/403 [00:00<00:01, 154.22it/s]\u001b[A\n",
      " 35%|███▌      | 143/403 [00:00<00:01, 154.86it/s]\u001b[A\n",
      " 39%|███▉      | 159/403 [00:01<00:01, 156.14it/s]\u001b[A\n",
      " 44%|████▎     | 176/403 [00:01<00:01, 157.66it/s]\u001b[A\n",
      " 48%|████▊     | 192/403 [00:01<00:01, 157.84it/s]\u001b[A\n",
      " 52%|█████▏    | 209/403 [00:01<00:01, 158.82it/s]\u001b[A\n",
      " 56%|█████▌    | 225/403 [00:01<00:01, 157.78it/s]\u001b[A\n",
      " 60%|██████    | 242/403 [00:01<00:01, 158.79it/s]\u001b[A\n",
      " 64%|██████▍   | 259/403 [00:01<00:00, 160.28it/s]\u001b[A\n",
      " 68%|██████▊   | 275/403 [00:01<00:00, 160.13it/s]\u001b[A\n",
      " 72%|███████▏  | 292/403 [00:01<00:00, 160.85it/s]\u001b[A\n",
      " 77%|███████▋  | 309/403 [00:01<00:00, 161.10it/s]\u001b[A\n",
      " 81%|████████  | 326/403 [00:02<00:00, 160.49it/s]\u001b[A\n",
      " 85%|████████▌ | 343/403 [00:02<00:00, 161.58it/s]\u001b[A\n",
      " 89%|████████▉ | 360/403 [00:02<00:00, 145.04it/s]\u001b[A\n",
      " 94%|█████████▎| 377/403 [00:02<00:00, 149.34it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 155.98it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 32%|███▏      | 1502/4749 [00:36<10:43,  5.04it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.26407019045581\n",
      "f1: 0.7256672685091361\n",
      "Test Loss: 0.008010, Acc: 0.752404\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 42%|████▏     | 1997/4749 [00:45<00:47, 57.72it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  3%|▎         | 14/403 [00:00<00:03, 118.44it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.5335\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 27/403 [00:00<00:03, 119.70it/s]\u001b[A\n",
      " 11%|█         | 43/403 [00:00<00:02, 127.68it/s]\u001b[A\n",
      " 15%|█▍        | 59/403 [00:00<00:02, 135.49it/s]\u001b[A\n",
      " 19%|█▊        | 75/403 [00:00<00:02, 141.12it/s]\u001b[A\n",
      " 23%|██▎       | 91/403 [00:00<00:02, 144.06it/s]\u001b[A\n",
      " 27%|██▋       | 107/403 [00:00<00:02, 147.80it/s]\u001b[A\n",
      " 31%|███       | 123/403 [00:00<00:01, 150.71it/s]\u001b[A\n",
      " 34%|███▍      | 139/403 [00:00<00:01, 152.80it/s]\u001b[A\n",
      " 39%|███▊      | 156/403 [00:01<00:01, 155.02it/s]\u001b[A\n",
      " 43%|████▎     | 172/403 [00:01<00:01, 155.26it/s]\u001b[A\n",
      " 47%|████▋     | 189/403 [00:01<00:01, 157.10it/s]\u001b[A\n",
      " 51%|█████     | 206/403 [00:01<00:01, 158.89it/s]\u001b[A\n",
      " 55%|█████▌    | 222/403 [00:01<00:01, 159.16it/s]\u001b[A\n",
      " 59%|█████▉    | 239/403 [00:01<00:01, 159.94it/s]\u001b[A\n",
      " 63%|██████▎   | 255/403 [00:01<00:00, 157.14it/s]\u001b[A\n",
      " 67%|██████▋   | 271/403 [00:01<00:00, 157.64it/s]\u001b[A\n",
      " 71%|███████   | 287/403 [00:01<00:00, 157.62it/s]\u001b[A\n",
      " 75%|███████▌  | 303/403 [00:01<00:00, 157.44it/s]\u001b[A\n",
      " 79%|███████▉  | 320/403 [00:02<00:00, 158.62it/s]\u001b[A\n",
      " 83%|████████▎ | 336/403 [00:02<00:00, 158.73it/s]\u001b[A\n",
      " 87%|████████▋ | 352/403 [00:02<00:00, 156.69it/s]\u001b[A\n",
      " 91%|█████████▏| 368/403 [00:02<00:00, 157.24it/s]\u001b[A\n",
      " 95%|█████████▌| 384/403 [00:02<00:00, 157.06it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 154.44it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 42%|████▏     | 2003/4749 [00:49<10:27,  4.38it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2769099079820244\n",
      "f1: 0.6969222659785553\n",
      "Test Loss: 0.008047, Acc: 0.757250\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 53%|█████▎    | 2494/4749 [00:57<00:37, 59.76it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 16/403 [00:00<00:02, 154.82it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.526\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 32/403 [00:00<00:02, 155.06it/s]\u001b[A\n",
      " 12%|█▏        | 48/403 [00:00<00:02, 155.93it/s]\u001b[A\n",
      " 16%|█▌        | 64/403 [00:00<00:02, 156.22it/s]\u001b[A\n",
      " 20%|██        | 81/403 [00:00<00:02, 157.69it/s]\u001b[A\n",
      " 24%|██▍       | 98/403 [00:00<00:01, 158.58it/s]\u001b[A\n",
      " 28%|██▊       | 114/403 [00:00<00:01, 156.76it/s]\u001b[A\n",
      " 33%|███▎      | 131/403 [00:00<00:01, 158.38it/s]\u001b[A\n",
      " 36%|███▋      | 147/403 [00:00<00:01, 158.69it/s]\u001b[A\n",
      " 41%|████      | 164/403 [00:01<00:01, 159.70it/s]\u001b[A\n",
      " 45%|████▍     | 181/403 [00:01<00:01, 161.20it/s]\u001b[A\n",
      " 49%|████▉     | 198/403 [00:01<00:01, 161.90it/s]\u001b[A\n",
      " 53%|█████▎    | 215/403 [00:01<00:01, 161.75it/s]\u001b[A\n",
      " 58%|█████▊    | 232/403 [00:01<00:01, 161.45it/s]\u001b[A\n",
      " 62%|██████▏   | 249/403 [00:01<00:00, 161.50it/s]\u001b[A\n",
      " 66%|██████▌   | 266/403 [00:01<00:00, 162.99it/s]\u001b[A\n",
      " 70%|███████   | 283/403 [00:01<00:00, 163.40it/s]\u001b[A\n",
      " 74%|███████▍  | 300/403 [00:01<00:00, 163.42it/s]\u001b[A\n",
      " 79%|███████▊  | 317/403 [00:01<00:00, 161.72it/s]\u001b[A\n",
      " 83%|████████▎ | 334/403 [00:02<00:00, 161.51it/s]\u001b[A\n",
      " 87%|████████▋ | 351/403 [00:02<00:00, 163.18it/s]\u001b[A\n",
      " 91%|█████████▏| 368/403 [00:02<00:00, 163.64it/s]\u001b[A\n",
      " 96%|█████████▌| 385/403 [00:02<00:00, 164.58it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 161.28it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 53%|█████▎    | 2501/4749 [01:01<07:16,  5.15it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.27241600684784933\n",
      "f1: 0.7197830155673179\n",
      "Test Loss: 0.007951, Acc: 0.755506\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 63%|██████▎   | 2995/4749 [01:09<00:26, 66.28it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 15/403 [00:00<00:02, 148.00it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 3000, loss: 0.4818\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 31/403 [00:00<00:02, 149.64it/s]\u001b[A\n",
      " 12%|█▏        | 47/403 [00:00<00:02, 150.67it/s]\u001b[A\n",
      " 16%|█▌        | 63/403 [00:00<00:02, 150.86it/s]\u001b[A\n",
      " 19%|█▊        | 75/403 [00:00<00:02, 126.48it/s]\u001b[A\n",
      " 23%|██▎       | 91/403 [00:00<00:02, 134.11it/s]\u001b[A\n",
      " 27%|██▋       | 107/403 [00:00<00:02, 140.44it/s]\u001b[A\n",
      " 31%|███       | 123/403 [00:00<00:01, 144.29it/s]\u001b[A\n",
      " 34%|███▍      | 139/403 [00:00<00:01, 148.59it/s]\u001b[A\n",
      " 38%|███▊      | 155/403 [00:01<00:01, 150.54it/s]\u001b[A\n",
      " 42%|████▏     | 171/403 [00:01<00:01, 151.84it/s]\u001b[A\n",
      " 46%|████▋     | 187/403 [00:01<00:01, 152.71it/s]\u001b[A\n",
      " 50%|█████     | 203/403 [00:01<00:01, 151.01it/s]\u001b[A\n",
      " 54%|█████▍    | 219/403 [00:01<00:01, 151.09it/s]\u001b[A\n",
      " 58%|█████▊    | 235/403 [00:01<00:01, 151.75it/s]\u001b[A\n",
      " 62%|██████▏   | 251/403 [00:01<00:00, 153.43it/s]\u001b[A\n",
      " 66%|██████▋   | 267/403 [00:01<00:00, 153.07it/s]\u001b[A\n",
      " 70%|███████   | 283/403 [00:01<00:00, 140.22it/s]\u001b[A\n",
      " 74%|███████▍  | 299/403 [00:02<00:00, 144.71it/s]\u001b[A\n",
      " 78%|███████▊  | 315/403 [00:02<00:00, 147.20it/s]\u001b[A\n",
      " 82%|████████▏ | 331/403 [00:02<00:00, 150.19it/s]\u001b[A\n",
      " 86%|████████▌ | 347/403 [00:02<00:00, 151.51it/s]\u001b[A\n",
      " 90%|█████████ | 363/403 [00:02<00:00, 153.52it/s]\u001b[A\n",
      " 94%|█████████▍| 379/403 [00:02<00:00, 154.72it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 149.51it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 63%|██████▎   | 3002/4749 [01:14<05:54,  4.93it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.27626792210571366\n",
      "f1: 0.7136972735517664\n",
      "Test Loss: 0.007963, Acc: 0.756552\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 74%|███████▎  | 3496/4749 [01:22<00:20, 62.40it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 16/403 [00:00<00:02, 158.65it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 3500, loss: 0.3639\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 32/403 [00:00<00:02, 158.86it/s]\u001b[A\n",
      " 12%|█▏        | 48/403 [00:00<00:02, 157.14it/s]\u001b[A\n",
      " 16%|█▌        | 64/403 [00:00<00:02, 157.61it/s]\u001b[A\n",
      " 20%|██        | 81/403 [00:00<00:02, 159.35it/s]\u001b[A\n",
      " 24%|██▍       | 98/403 [00:00<00:01, 160.07it/s]\u001b[A\n",
      " 29%|██▊       | 115/403 [00:00<00:01, 160.34it/s]\u001b[A\n",
      " 33%|███▎      | 132/403 [00:00<00:01, 161.30it/s]\u001b[A\n",
      " 37%|███▋      | 149/403 [00:00<00:01, 161.09it/s]\u001b[A\n",
      " 41%|████      | 165/403 [00:01<00:01, 159.43it/s]\u001b[A\n",
      " 45%|████▌     | 182/403 [00:01<00:01, 161.36it/s]\u001b[A\n",
      " 49%|████▉     | 199/403 [00:01<00:01, 163.15it/s]\u001b[A\n",
      " 54%|█████▎    | 216/403 [00:01<00:01, 164.59it/s]\u001b[A\n",
      " 58%|█████▊    | 233/403 [00:01<00:01, 165.08it/s]\u001b[A\n",
      " 62%|██████▏   | 250/403 [00:01<00:00, 164.90it/s]\u001b[A\n",
      " 66%|██████▋   | 267/403 [00:01<00:00, 164.92it/s]\u001b[A\n",
      " 70%|███████   | 284/403 [00:01<00:00, 164.12it/s]\u001b[A\n",
      " 75%|███████▍  | 301/403 [00:01<00:00, 162.89it/s]\u001b[A\n",
      " 79%|███████▉  | 318/403 [00:01<00:00, 163.23it/s]\u001b[A\n",
      " 83%|████████▎ | 335/403 [00:02<00:00, 164.22it/s]\u001b[A\n",
      " 87%|████████▋ | 352/403 [00:02<00:00, 165.43it/s]\u001b[A\n",
      " 92%|█████████▏| 369/403 [00:02<00:00, 165.87it/s]\u001b[A\n",
      " 96%|█████████▌| 386/403 [00:02<00:00, 166.75it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 162.75it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 74%|███████▍  | 3503/4749 [01:26<03:58,  5.23it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2739139738925744\n",
      "f1: 0.718650924888899\n",
      "Test Loss: 0.007996, Acc: 0.755389\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 84%|████████▍ | 3996/4749 [01:34<00:12, 58.31it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 17/403 [00:00<00:02, 163.77it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 4000, loss: 0.5893\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 34/403 [00:00<00:02, 163.28it/s]\u001b[A\n",
      " 13%|█▎        | 51/403 [00:00<00:02, 163.90it/s]\u001b[A\n",
      " 17%|█▋        | 67/403 [00:00<00:02, 161.77it/s]\u001b[A\n",
      " 21%|██        | 83/403 [00:00<00:01, 160.80it/s]\u001b[A\n",
      " 25%|██▍       | 100/403 [00:00<00:01, 161.01it/s]\u001b[A\n",
      " 29%|██▉       | 117/403 [00:00<00:01, 162.06it/s]\u001b[A\n",
      " 33%|███▎      | 132/403 [00:00<00:01, 141.42it/s]\u001b[A\n",
      " 37%|███▋      | 149/403 [00:00<00:01, 147.60it/s]\u001b[A\n",
      " 41%|████      | 165/403 [00:01<00:01, 150.06it/s]\u001b[A\n",
      " 45%|████▌     | 182/403 [00:01<00:01, 153.49it/s]\u001b[A\n",
      " 49%|████▉     | 199/403 [00:01<00:01, 155.75it/s]\u001b[A\n",
      " 54%|█████▎    | 216/403 [00:01<00:01, 159.02it/s]\u001b[A\n",
      " 58%|█████▊    | 233/403 [00:01<00:01, 161.61it/s]\u001b[A\n",
      " 62%|██████▏   | 250/403 [00:01<00:00, 162.72it/s]\u001b[A\n",
      " 66%|██████▋   | 267/403 [00:01<00:00, 161.12it/s]\u001b[A\n",
      " 70%|███████   | 284/403 [00:01<00:00, 161.20it/s]\u001b[A\n",
      " 75%|███████▍  | 301/403 [00:01<00:00, 162.28it/s]\u001b[A\n",
      " 79%|███████▉  | 318/403 [00:02<00:00, 162.39it/s]\u001b[A\n",
      " 83%|████████▎ | 335/403 [00:02<00:00, 161.95it/s]\u001b[A\n",
      " 87%|████████▋ | 352/403 [00:02<00:00, 162.52it/s]\u001b[A\n",
      " 92%|█████████▏| 369/403 [00:02<00:00, 162.11it/s]\u001b[A\n",
      " 96%|█████████▌| 386/403 [00:02<00:00, 161.95it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 159.27it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 84%|████████▍ | 4002/4749 [01:38<02:46,  4.47it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2677081104215707\n",
      "f1: 0.6964588874282953\n",
      "Test Loss: 0.007965, Acc: 0.755389\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 95%|█████████▍| 4498/4749 [01:46<00:04, 60.75it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 15/403 [00:00<00:02, 149.49it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 4500, loss: 0.5515\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 30/403 [00:00<00:02, 149.33it/s]\u001b[A\n",
      " 11%|█▏        | 46/403 [00:00<00:02, 150.26it/s]\u001b[A\n",
      " 14%|█▍        | 56/403 [00:00<00:02, 121.98it/s]\u001b[A\n",
      " 18%|█▊        | 71/403 [00:00<00:02, 128.89it/s]\u001b[A\n",
      " 21%|██▏       | 86/403 [00:00<00:02, 133.70it/s]\u001b[A\n",
      " 25%|██▌       | 101/403 [00:00<00:02, 137.19it/s]\u001b[A\n",
      " 29%|██▉       | 116/403 [00:00<00:02, 139.63it/s]\u001b[A\n",
      " 33%|███▎      | 132/403 [00:00<00:01, 142.96it/s]\u001b[A\n",
      " 36%|███▋      | 147/403 [00:01<00:01, 143.68it/s]\u001b[A\n",
      " 40%|████      | 163/403 [00:01<00:01, 146.49it/s]\u001b[A\n",
      " 44%|████▍     | 179/403 [00:01<00:01, 148.85it/s]\u001b[A\n",
      " 48%|████▊     | 194/403 [00:01<00:01, 147.75it/s]\u001b[A\n",
      " 52%|█████▏    | 209/403 [00:01<00:01, 147.71it/s]\u001b[A\n",
      " 56%|█████▌    | 224/403 [00:01<00:01, 147.40it/s]\u001b[A\n",
      " 95%|█████████▍| 4498/4749 [01:58<00:04, 60.75it/s][A\n",
      " 58%|█████▊    | 233/403 [00:11<00:59,  2.85it/s] \u001b[A\n",
      " 62%|██████▏   | 250/403 [00:12<00:37,  4.04it/s]\u001b[A\n",
      " 66%|██████▋   | 267/403 [00:12<00:23,  5.72it/s]\u001b[A\n",
      " 70%|███████   | 284/403 [00:12<00:14,  8.05it/s]\u001b[A\n",
      " 75%|███████▍  | 302/403 [00:12<00:08, 11.27it/s]\u001b[A\n",
      " 79%|███████▉  | 319/403 [00:12<00:05, 15.65it/s]\u001b[A\n",
      " 84%|████████▎ | 337/403 [00:12<00:03, 21.51it/s]\u001b[A\n",
      " 88%|████████▊ | 355/403 [00:12<00:01, 29.18it/s]\u001b[A\n",
      " 92%|█████████▏| 372/403 [00:12<00:00, 38.78it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:12<00:00, 31.14it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 95%|█████████▍| 4500/4749 [02:01<09:12,  2.22s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.26428418574791357\n",
      "f1: 0.7257741580767577\n",
      "Test Loss: 0.008118, Acc: 0.750892\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 4749/4749 [02:05<00:00, 37.98it/s]\n",
      "  4%|▍         | 16/403 [00:00<00:02, 158.23it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 403/403 [00:02<00:00, 162.49it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n",
      "em: 0.27006205863471006\n",
      "f1: 0.6952758609296258\n",
      "Test Loss: 0.007911, Acc: 0.756901\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 10%|█         | 498/4749 [00:08<01:07, 63.22it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  2%|▏         | 7/403 [00:00<00:05, 68.36it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.4396\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  6%|▌         | 23/403 [00:00<00:04, 82.32it/s]\u001b[A\n",
      " 10%|▉         | 39/403 [00:00<00:03, 95.87it/s]\u001b[A\n",
      " 14%|█▎        | 55/403 [00:00<00:03, 108.51it/s]\u001b[A\n",
      " 18%|█▊        | 71/403 [00:00<00:02, 119.63it/s]\u001b[A\n",
      " 22%|██▏       | 87/403 [00:00<00:02, 128.37it/s]\u001b[A\n",
      " 26%|██▌       | 103/403 [00:00<00:02, 136.11it/s]\u001b[A\n",
      " 30%|██▉       | 119/403 [00:00<00:02, 140.75it/s]\u001b[A\n",
      " 34%|███▎      | 136/403 [00:00<00:01, 146.77it/s]\u001b[A\n",
      " 38%|███▊      | 153/403 [00:01<00:01, 150.62it/s]\u001b[A\n",
      " 42%|████▏     | 169/403 [00:01<00:01, 151.70it/s]\u001b[A\n",
      " 46%|████▌     | 186/403 [00:01<00:01, 154.82it/s]\u001b[A\n",
      " 50%|█████     | 203/403 [00:01<00:01, 156.70it/s]\u001b[A\n",
      " 54%|█████▍    | 219/403 [00:01<00:01, 155.19it/s]\u001b[A\n",
      " 59%|█████▊    | 236/403 [00:01<00:01, 157.05it/s]\u001b[A\n",
      " 63%|██████▎   | 253/403 [00:01<00:00, 158.13it/s]\u001b[A\n",
      " 67%|██████▋   | 270/403 [00:01<00:00, 159.22it/s]\u001b[A\n",
      " 71%|███████   | 286/403 [00:01<00:00, 157.84it/s]\u001b[A\n",
      " 75%|███████▌  | 303/403 [00:01<00:00, 159.33it/s]\u001b[A\n",
      " 79%|███████▉  | 320/403 [00:02<00:00, 160.07it/s]\u001b[A\n",
      " 84%|████████▎ | 337/403 [00:02<00:00, 158.65it/s]\u001b[A\n",
      " 88%|████████▊ | 354/403 [00:02<00:00, 159.32it/s]\u001b[A\n",
      " 92%|█████████▏| 371/403 [00:02<00:00, 160.52it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 155.29it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n",
      "em: 0.27626792210571366\n",
      "f1: 0.7077975808341841\n",
      "Test Loss: 0.007993, Acc: 0.757095\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 21%|██        | 993/4749 [00:20<01:01, 60.85it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  3%|▎         | 11/403 [00:00<00:04, 91.29it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.5211\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 28/403 [00:00<00:03, 104.89it/s]\u001b[A\n",
      " 11%|█         | 45/403 [00:00<00:03, 117.38it/s]\u001b[A\n",
      " 15%|█▌        | 62/403 [00:00<00:02, 127.82it/s]\u001b[A\n",
      " 20%|█▉        | 79/403 [00:00<00:02, 136.85it/s]\u001b[A\n",
      " 24%|██▍       | 96/403 [00:00<00:02, 144.95it/s]\u001b[A\n",
      " 28%|██▊       | 113/403 [00:00<00:01, 149.89it/s]\u001b[A\n",
      " 32%|███▏      | 129/403 [00:00<00:01, 152.22it/s]\u001b[A\n",
      " 36%|███▌      | 146/403 [00:00<00:01, 155.00it/s]\u001b[A\n",
      " 40%|████      | 163/403 [00:01<00:01, 157.82it/s]\u001b[A\n",
      " 45%|████▍     | 180/403 [00:01<00:01, 158.91it/s]\u001b[A\n",
      " 49%|████▉     | 197/403 [00:01<00:01, 160.57it/s]\u001b[A\n",
      " 53%|█████▎    | 214/403 [00:01<00:01, 161.44it/s]\u001b[A\n",
      " 57%|█████▋    | 231/403 [00:01<00:01, 162.13it/s]\u001b[A\n",
      " 62%|██████▏   | 248/403 [00:01<00:00, 163.82it/s]\u001b[A\n",
      " 66%|██████▌   | 265/403 [00:01<00:00, 162.85it/s]\u001b[A\n",
      " 70%|███████   | 283/403 [00:01<00:00, 165.16it/s]\u001b[A\n",
      " 75%|███████▍  | 301/403 [00:01<00:00, 166.86it/s]\u001b[A\n",
      " 79%|███████▉  | 319/403 [00:01<00:00, 168.56it/s]\u001b[A\n",
      " 84%|████████▎ | 337/403 [00:02<00:00, 169.65it/s]\u001b[A\n",
      " 88%|████████▊ | 355/403 [00:02<00:00, 170.41it/s]\u001b[A\n",
      " 93%|█████████▎| 373/403 [00:02<00:00, 169.70it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 162.13it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 21%|██        | 1000/4749 [00:24<11:51,  5.27it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.26086026107425636\n",
      "f1: 0.7241806805791511\n",
      "Test Loss: 0.008110, Acc: 0.749147\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 31%|███▏      | 1495/4749 [00:32<00:53, 60.96it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 15/403 [00:00<00:02, 142.48it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.4845\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 30/403 [00:00<00:02, 143.81it/s]\u001b[A\n",
      " 11%|█         | 45/403 [00:00<00:02, 143.43it/s]\u001b[A\n",
      " 15%|█▍        | 60/403 [00:00<00:02, 143.75it/s]\u001b[A\n",
      " 19%|█▊        | 75/403 [00:00<00:02, 145.44it/s]\u001b[A\n",
      " 22%|██▏       | 88/403 [00:00<00:02, 118.96it/s]\u001b[A\n",
      " 26%|██▌       | 103/403 [00:00<00:02, 125.92it/s]\u001b[A\n",
      " 29%|██▉       | 118/403 [00:00<00:02, 131.91it/s]\u001b[A\n",
      " 33%|███▎      | 133/403 [00:00<00:01, 136.62it/s]\u001b[A\n",
      " 37%|███▋      | 148/403 [00:01<00:01, 139.27it/s]\u001b[A\n",
      " 41%|████      | 164/403 [00:01<00:01, 143.19it/s]\u001b[A\n",
      " 45%|████▍     | 180/403 [00:01<00:01, 146.05it/s]\u001b[A\n",
      " 48%|████▊     | 195/403 [00:01<00:01, 137.20it/s]\u001b[A\n",
      " 52%|█████▏    | 211/403 [00:01<00:01, 141.16it/s]\u001b[A\n",
      " 56%|█████▌    | 226/403 [00:01<00:01, 143.62it/s]\u001b[A\n",
      " 60%|██████    | 242/403 [00:01<00:01, 145.63it/s]\u001b[A\n",
      " 64%|██████▍   | 258/403 [00:01<00:00, 147.34it/s]\u001b[A\n",
      " 68%|██████▊   | 273/403 [00:01<00:00, 147.30it/s]\u001b[A\n",
      " 72%|███████▏  | 289/403 [00:02<00:00, 148.68it/s]\u001b[A\n",
      " 76%|███████▌  | 305/403 [00:02<00:00, 149.68it/s]\u001b[A\n",
      " 80%|███████▉  | 321/403 [00:02<00:00, 150.85it/s]\u001b[A\n",
      " 84%|████████▎ | 337/403 [00:02<00:00, 151.25it/s]\u001b[A\n",
      " 88%|████████▊ | 353/403 [00:02<00:00, 151.92it/s]\u001b[A\n",
      " 92%|█████████▏| 369/403 [00:02<00:00, 132.77it/s]\u001b[A\n",
      " 96%|█████████▌| 385/403 [00:02<00:00, 138.78it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 142.13it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n",
      "em: 0.2781938797346458\n",
      "f1: 0.7040322827183608\n",
      "Test Loss: 0.007918, Acc: 0.756436\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 42%|████▏     | 1994/4749 [00:45<00:45, 61.04it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  3%|▎         | 14/403 [00:00<00:02, 138.93it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.5255\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 29/403 [00:00<00:02, 141.09it/s]\u001b[A\n",
      " 11%|█         | 44/403 [00:00<00:02, 141.97it/s]\u001b[A\n",
      " 15%|█▍        | 60/403 [00:00<00:02, 144.50it/s]\u001b[A\n",
      " 19%|█▊        | 75/403 [00:00<00:02, 145.29it/s]\u001b[A\n",
      " 22%|██▏       | 90/403 [00:00<00:02, 146.55it/s]\u001b[A\n",
      " 26%|██▌       | 105/403 [00:00<00:02, 147.49it/s]\u001b[A\n",
      " 30%|███       | 121/403 [00:00<00:01, 148.58it/s]\u001b[A\n",
      " 34%|███▎      | 136/403 [00:00<00:01, 146.41it/s]\u001b[A\n",
      " 37%|███▋      | 151/403 [00:01<00:01, 147.25it/s]\u001b[A\n",
      " 41%|████      | 166/403 [00:01<00:01, 147.97it/s]\u001b[A\n",
      " 45%|████▍     | 181/403 [00:01<00:01, 147.85it/s]\u001b[A\n",
      " 49%|████▉     | 197/403 [00:01<00:01, 148.96it/s]\u001b[A\n",
      " 53%|█████▎    | 212/403 [00:01<00:01, 148.77it/s]\u001b[A\n",
      " 57%|█████▋    | 228/403 [00:01<00:01, 149.68it/s]\u001b[A\n",
      " 61%|██████    | 244/403 [00:01<00:01, 150.97it/s]\u001b[A\n",
      " 65%|██████▍   | 260/403 [00:01<00:00, 152.59it/s]\u001b[A\n",
      " 68%|██████▊   | 276/403 [00:01<00:00, 152.25it/s]\u001b[A\n",
      " 72%|███████▏  | 292/403 [00:01<00:00, 153.04it/s]\u001b[A\n",
      " 76%|███████▋  | 308/403 [00:02<00:00, 153.45it/s]\u001b[A\n",
      " 80%|████████  | 324/403 [00:02<00:00, 153.06it/s]\u001b[A\n",
      " 84%|████████▍ | 340/403 [00:02<00:00, 150.71it/s]\u001b[A\n",
      " 88%|████████▊ | 356/403 [00:02<00:00, 133.08it/s]\u001b[A\n",
      " 92%|█████████▏| 372/403 [00:02<00:00, 139.35it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 147.73it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 42%|████▏     | 2001/4749 [00:50<09:19,  4.91it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.27220201155574575\n",
      "f1: 0.7247222307141097\n",
      "Test Loss: 0.008075, Acc: 0.754885\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 53%|█████▎    | 2497/4749 [00:58<00:37, 60.02it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 15/403 [00:00<00:02, 146.92it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.5243\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 28/403 [00:00<00:02, 126.47it/s]\u001b[A\n",
      " 11%|█         | 44/403 [00:00<00:02, 133.32it/s]\u001b[A\n",
      " 15%|█▍        | 60/403 [00:00<00:02, 138.59it/s]\u001b[A\n",
      " 19%|█▉        | 76/403 [00:00<00:02, 143.21it/s]\u001b[A\n",
      " 23%|██▎       | 92/403 [00:00<00:02, 147.30it/s]\u001b[A\n",
      " 27%|██▋       | 108/403 [00:00<00:01, 149.13it/s]\u001b[A\n",
      " 31%|███       | 124/403 [00:00<00:01, 151.20it/s]\u001b[A\n",
      " 35%|███▍      | 140/403 [00:00<00:01, 153.06it/s]\u001b[A\n",
      " 39%|███▊      | 156/403 [00:01<00:01, 154.21it/s]\u001b[A\n",
      " 43%|████▎     | 172/403 [00:01<00:01, 154.29it/s]\u001b[A\n",
      " 47%|████▋     | 188/403 [00:01<00:01, 155.80it/s]\u001b[A\n",
      " 51%|█████     | 204/403 [00:01<00:01, 156.45it/s]\u001b[A\n",
      " 55%|█████▍    | 220/403 [00:01<00:01, 156.88it/s]\u001b[A\n",
      " 59%|█████▊    | 236/403 [00:01<00:01, 155.93it/s]\u001b[A\n",
      " 63%|██████▎   | 252/403 [00:01<00:00, 155.81it/s]\u001b[A\n",
      " 67%|██████▋   | 268/403 [00:01<00:00, 156.35it/s]\u001b[A\n",
      " 70%|███████   | 284/403 [00:01<00:00, 156.21it/s]\u001b[A\n",
      " 74%|███████▍  | 300/403 [00:01<00:00, 157.05it/s]\u001b[A\n",
      " 78%|███████▊  | 316/403 [00:02<00:00, 157.52it/s]\u001b[A\n",
      " 82%|████████▏ | 332/403 [00:02<00:00, 157.50it/s]\u001b[A\n",
      " 86%|████████▋ | 348/403 [00:02<00:00, 156.30it/s]\u001b[A\n",
      " 90%|█████████ | 364/403 [00:02<00:00, 156.85it/s]\u001b[A\n",
      " 94%|█████████▍| 380/403 [00:02<00:00, 156.81it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 152.88it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n",
      "em: 0.26599614808474215\n",
      "f1: 0.7250163430026598\n",
      "Test Loss: 0.008046, Acc: 0.750233\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 63%|██████▎   | 2993/4749 [01:10<00:23, 73.47it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  3%|▎         | 13/403 [00:00<00:03, 107.19it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 3000, loss: 0.4917\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 28/403 [00:00<00:03, 116.12it/s]\u001b[A\n",
      " 11%|█         | 44/403 [00:00<00:02, 126.02it/s]\u001b[A\n",
      " 15%|█▌        | 61/403 [00:00<00:02, 134.67it/s]\u001b[A\n",
      " 19%|█▉        | 77/403 [00:00<00:02, 141.30it/s]\u001b[A\n",
      " 23%|██▎       | 94/403 [00:00<00:02, 146.74it/s]\u001b[A\n",
      " 28%|██▊       | 111/403 [00:00<00:01, 150.61it/s]\u001b[A\n",
      " 32%|███▏      | 127/403 [00:00<00:01, 147.55it/s]\u001b[A\n",
      " 35%|███▌      | 142/403 [00:00<00:01, 146.63it/s]\u001b[A\n",
      " 39%|███▉      | 159/403 [00:01<00:01, 150.60it/s]\u001b[A\n",
      " 43%|████▎     | 175/403 [00:01<00:01, 151.61it/s]\u001b[A\n",
      " 48%|████▊     | 192/403 [00:01<00:01, 154.22it/s]\u001b[A\n",
      " 52%|█████▏    | 209/403 [00:01<00:01, 156.09it/s]\u001b[A\n",
      " 56%|█████▌    | 226/403 [00:01<00:01, 157.36it/s]\u001b[A\n",
      " 60%|██████    | 243/403 [00:01<00:01, 159.62it/s]\u001b[A\n",
      " 65%|██████▍   | 260/403 [00:01<00:00, 160.03it/s]\u001b[A\n",
      " 69%|██████▊   | 277/403 [00:01<00:00, 160.40it/s]\u001b[A\n",
      " 73%|███████▎  | 294/403 [00:01<00:00, 160.18it/s]\u001b[A\n",
      " 77%|███████▋  | 311/403 [00:02<00:00, 161.36it/s]\u001b[A\n",
      " 81%|████████▏ | 328/403 [00:02<00:00, 162.44it/s]\u001b[A\n",
      " 86%|████████▌ | 345/403 [00:02<00:00, 162.08it/s]\u001b[A\n",
      " 90%|████████▉ | 362/403 [00:02<00:00, 163.24it/s]\u001b[A\n",
      " 94%|█████████▍| 379/403 [00:02<00:00, 164.04it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 156.27it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 63%|██████▎   | 3001/4749 [01:14<04:57,  5.87it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2687780868820886\n",
      "f1: 0.6903323380853952\n",
      "Test Loss: 0.007988, Acc: 0.754730\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 74%|███████▎  | 3498/4749 [01:22<00:19, 63.65it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 16/403 [00:00<00:02, 159.93it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 3500, loss: 0.3679\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 33/403 [00:00<00:02, 159.11it/s]\u001b[A\n",
      " 12%|█▏        | 50/403 [00:00<00:02, 160.15it/s]\u001b[A\n",
      " 17%|█▋        | 67/403 [00:00<00:02, 160.42it/s]\u001b[A\n",
      " 21%|██        | 84/403 [00:00<00:01, 161.67it/s]\u001b[A\n",
      " 25%|██▌       | 101/403 [00:00<00:01, 161.84it/s]\u001b[A\n",
      " 29%|██▉       | 118/403 [00:00<00:01, 162.23it/s]\u001b[A\n",
      " 33%|███▎      | 135/403 [00:00<00:01, 163.51it/s]\u001b[A\n",
      " 37%|███▋      | 151/403 [00:00<00:01, 162.19it/s]\u001b[A\n",
      " 42%|████▏     | 168/403 [00:01<00:01, 161.91it/s]\u001b[A\n",
      " 46%|████▌     | 185/403 [00:01<00:01, 162.19it/s]\u001b[A\n",
      " 50%|████▉     | 201/403 [00:01<00:01, 161.47it/s]\u001b[A\n",
      " 54%|█████▍    | 218/403 [00:01<00:01, 162.89it/s]\u001b[A\n",
      " 58%|█████▊    | 235/403 [00:01<00:01, 162.88it/s]\u001b[A\n",
      " 63%|██████▎   | 252/403 [00:01<00:00, 161.64it/s]\u001b[A\n",
      " 67%|██████▋   | 269/403 [00:01<00:00, 144.43it/s]\u001b[A\n",
      " 70%|███████   | 284/403 [00:01<00:00, 142.06it/s]\u001b[A\n",
      " 74%|███████▍  | 299/403 [00:01<00:00, 137.15it/s]\u001b[A\n",
      " 78%|███████▊  | 316/403 [00:02<00:00, 144.97it/s]\u001b[A\n",
      " 83%|████████▎ | 333/403 [00:02<00:00, 150.50it/s]\u001b[A\n",
      " 87%|████████▋ | 349/403 [00:02<00:00, 152.83it/s]\u001b[A\n",
      " 91%|█████████ | 366/403 [00:02<00:00, 155.64it/s]\u001b[A\n",
      " 95%|█████████▌| 383/403 [00:02<00:00, 157.81it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 156.90it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n",
      "em: 0.27241600684784933\n",
      "f1: 0.7057105091451441\n",
      "Test Loss: 0.007981, Acc: 0.756630\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 84%|████████▍ | 3993/4749 [01:34<00:12, 61.30it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 15/403 [00:00<00:02, 145.94it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 4000, loss: 0.571\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 30/403 [00:00<00:02, 146.56it/s]\u001b[A\n",
      " 11%|█         | 45/403 [00:00<00:02, 146.57it/s]\u001b[A\n",
      " 15%|█▌        | 61/403 [00:00<00:02, 147.72it/s]\u001b[A\n",
      " 19%|█▉        | 76/403 [00:00<00:02, 147.19it/s]\u001b[A\n",
      " 23%|██▎       | 91/403 [00:00<00:02, 147.42it/s]\u001b[A\n",
      " 27%|██▋       | 107/403 [00:00<00:01, 148.51it/s]\u001b[A\n",
      " 31%|███       | 123/403 [00:00<00:01, 149.91it/s]\u001b[A\n",
      " 34%|███▍      | 138/403 [00:00<00:01, 149.07it/s]\u001b[A\n",
      " 38%|███▊      | 154/403 [00:01<00:01, 149.58it/s]\u001b[A\n",
      " 42%|████▏     | 169/403 [00:01<00:01, 148.43it/s]\u001b[A\n",
      " 46%|████▌     | 184/403 [00:01<00:01, 148.73it/s]\u001b[A\n",
      " 50%|████▉     | 200/403 [00:01<00:01, 149.83it/s]\u001b[A\n",
      " 54%|█████▎    | 216/403 [00:01<00:01, 150.23it/s]\u001b[A\n",
      " 58%|█████▊    | 232/403 [00:01<00:01, 151.01it/s]\u001b[A\n",
      " 62%|██████▏   | 248/403 [00:01<00:01, 150.96it/s]\u001b[A\n",
      " 66%|██████▌   | 264/403 [00:01<00:00, 150.55it/s]\u001b[A\n",
      " 69%|██████▉   | 280/403 [00:01<00:00, 150.73it/s]\u001b[A\n",
      " 73%|███████▎  | 296/403 [00:01<00:00, 151.19it/s]\u001b[A\n",
      " 77%|███████▋  | 312/403 [00:02<00:00, 151.77it/s]\u001b[A\n",
      " 81%|████████▏ | 328/403 [00:02<00:00, 152.45it/s]\u001b[A\n",
      " 85%|████████▌ | 344/403 [00:02<00:00, 152.50it/s]\u001b[A\n",
      " 89%|████████▉ | 360/403 [00:02<00:00, 152.59it/s]\u001b[A\n",
      " 93%|█████████▎| 376/403 [00:02<00:00, 152.53it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 150.64it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 84%|████████▍ | 4000/4749 [01:38<02:30,  4.97it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.27070404451102076\n",
      "f1: 0.6911665800495326\n",
      "Test Loss: 0.008021, Acc: 0.755350\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 95%|█████████▍| 4495/4749 [01:47<00:03, 67.54it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  3%|▎         | 13/403 [00:00<00:03, 122.34it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 4500, loss: 0.5388\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  6%|▋         | 26/403 [00:00<00:03, 123.40it/s]\u001b[A\n",
      " 10%|█         | 42/403 [00:00<00:02, 131.62it/s]\u001b[A\n",
      " 14%|█▍        | 58/403 [00:00<00:02, 138.09it/s]\u001b[A\n",
      " 18%|█▊        | 73/403 [00:00<00:02, 141.35it/s]\u001b[A\n",
      " 22%|██▏       | 89/403 [00:00<00:02, 146.24it/s]\u001b[A\n",
      " 26%|██▌       | 105/403 [00:00<00:01, 149.47it/s]\u001b[A\n",
      " 30%|███       | 121/403 [00:00<00:01, 152.06it/s]\u001b[A\n",
      " 34%|███▍      | 137/403 [00:00<00:01, 153.40it/s]\u001b[A\n",
      " 38%|███▊      | 153/403 [00:01<00:01, 154.25it/s]\u001b[A\n",
      " 42%|████▏     | 169/403 [00:01<00:01, 153.45it/s]\u001b[A\n",
      " 46%|████▌     | 185/403 [00:01<00:01, 154.84it/s]\u001b[A\n",
      " 50%|████▉     | 201/403 [00:01<00:01, 155.71it/s]\u001b[A\n",
      " 54%|█████▍    | 218/403 [00:01<00:01, 157.38it/s]\u001b[A\n",
      " 58%|█████▊    | 234/403 [00:01<00:01, 157.29it/s]\u001b[A\n",
      " 62%|██████▏   | 251/403 [00:01<00:00, 158.93it/s]\u001b[A\n",
      " 66%|██████▋   | 267/403 [00:01<00:00, 159.04it/s]\u001b[A\n",
      " 70%|███████   | 283/403 [00:01<00:00, 159.16it/s]\u001b[A\n",
      " 74%|███████▍  | 299/403 [00:01<00:00, 156.92it/s]\u001b[A\n",
      " 78%|███████▊  | 315/403 [00:02<00:00, 157.28it/s]\u001b[A\n",
      " 82%|████████▏ | 331/403 [00:02<00:00, 136.89it/s]\u001b[A\n",
      " 86%|████████▌ | 347/403 [00:02<00:00, 142.82it/s]\u001b[A\n",
      " 90%|█████████ | 363/403 [00:02<00:00, 147.19it/s]\u001b[A\n",
      " 94%|█████████▍| 379/403 [00:02<00:00, 148.84it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 151.12it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n",
      "em: 0.2689920821741922\n",
      "f1: 0.7187186591851784\n",
      "Test Loss: 0.008194, Acc: 0.751279\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 4749/4749 [01:55<00:00, 41.22it/s]\n",
      "  4%|▍         | 16/403 [00:00<00:02, 159.04it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 403/403 [00:02<00:00, 159.61it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/4749 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.26171624224267065\n",
      "f1: 0.6701546200904274\n",
      "Test Loss: 0.008027, Acc: 0.754381\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 11%|█         | 499/4749 [00:08<01:11, 59.45it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 16/403 [00:00<00:02, 152.28it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.4331\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 32/403 [00:00<00:02, 152.00it/s]\u001b[A\n",
      " 12%|█▏        | 48/403 [00:00<00:02, 152.39it/s]\u001b[A\n",
      " 15%|█▍        | 59/403 [00:00<00:02, 135.07it/s]\u001b[A\n",
      " 19%|█▊        | 75/403 [00:00<00:02, 140.31it/s]\u001b[A\n",
      " 23%|██▎       | 91/403 [00:00<00:02, 144.53it/s]\u001b[A\n",
      " 26%|██▋       | 106/403 [00:00<00:02, 145.49it/s]\u001b[A\n",
      " 30%|███       | 122/403 [00:00<00:01, 148.87it/s]\u001b[A\n",
      " 34%|███▍      | 138/403 [00:00<00:01, 150.62it/s]\u001b[A\n",
      " 38%|███▊      | 154/403 [00:01<00:01, 152.38it/s]\u001b[A\n",
      " 42%|████▏     | 170/403 [00:01<00:01, 153.58it/s]\u001b[A\n",
      " 46%|████▌     | 186/403 [00:01<00:01, 153.84it/s]\u001b[A\n",
      " 50%|█████     | 202/403 [00:01<00:01, 152.35it/s]\u001b[A\n",
      " 54%|█████▍    | 218/403 [00:01<00:01, 153.97it/s]\u001b[A\n",
      " 58%|█████▊    | 234/403 [00:01<00:01, 153.67it/s]\u001b[A\n",
      " 62%|██████▏   | 250/403 [00:01<00:00, 154.88it/s]\u001b[A\n",
      " 66%|██████▌   | 266/403 [00:01<00:00, 155.74it/s]\u001b[A\n",
      " 70%|██████▉   | 282/403 [00:01<00:00, 154.93it/s]\u001b[A\n",
      " 74%|███████▍  | 298/403 [00:01<00:00, 156.22it/s]\u001b[A\n",
      " 78%|███████▊  | 314/403 [00:02<00:00, 156.94it/s]\u001b[A\n",
      " 82%|████████▏ | 330/403 [00:02<00:00, 154.18it/s]\u001b[A\n",
      " 86%|████████▌ | 346/403 [00:02<00:00, 154.53it/s]\u001b[A\n",
      " 90%|████████▉ | 362/403 [00:02<00:00, 155.28it/s]\u001b[A\n",
      " 94%|█████████▍| 378/403 [00:02<00:00, 155.99it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 153.10it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n",
      "em: 0.2696340680505029\n",
      "f1: 0.7037661473035012\n",
      "Test Loss: 0.008023, Acc: 0.754924\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 21%|██        | 995/4749 [00:21<01:02, 59.72it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 15/403 [00:00<00:02, 143.25it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.5015\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 30/403 [00:00<00:02, 144.54it/s]\u001b[A\n",
      " 11%|█         | 45/403 [00:00<00:02, 144.91it/s]\u001b[A\n",
      " 15%|█▍        | 60/403 [00:00<00:02, 145.41it/s]\u001b[A\n",
      " 19%|█▉        | 76/403 [00:00<00:02, 147.24it/s]\u001b[A\n",
      " 23%|██▎       | 91/403 [00:00<00:02, 146.55it/s]\u001b[A\n",
      " 26%|██▌       | 104/403 [00:00<00:02, 124.48it/s]\u001b[A\n",
      " 30%|██▉       | 120/403 [00:00<00:02, 131.89it/s]\u001b[A\n",
      " 34%|███▎      | 136/403 [00:00<00:01, 137.02it/s]\u001b[A\n",
      " 38%|███▊      | 152/403 [00:01<00:01, 141.59it/s]\u001b[A\n",
      " 42%|████▏     | 168/403 [00:01<00:01, 144.33it/s]\u001b[A\n",
      " 46%|████▌     | 184/403 [00:01<00:01, 147.24it/s]\u001b[A\n",
      " 50%|████▉     | 200/403 [00:01<00:01, 149.03it/s]\u001b[A\n",
      " 53%|█████▎    | 215/403 [00:01<00:01, 147.82it/s]\u001b[A\n",
      " 57%|█████▋    | 231/403 [00:01<00:01, 150.05it/s]\u001b[A\n",
      " 61%|██████▏   | 247/403 [00:01<00:01, 151.64it/s]\u001b[A\n",
      " 65%|██████▌   | 263/403 [00:01<00:00, 152.50it/s]\u001b[A\n",
      " 69%|██████▉   | 279/403 [00:01<00:00, 152.86it/s]\u001b[A\n",
      " 73%|███████▎  | 295/403 [00:02<00:00, 142.29it/s]\u001b[A\n",
      " 77%|███████▋  | 311/403 [00:02<00:00, 146.47it/s]\u001b[A\n",
      " 81%|████████  | 327/403 [00:02<00:00, 148.54it/s]\u001b[A\n",
      " 85%|████████▌ | 343/403 [00:02<00:00, 151.15it/s]\u001b[A\n",
      " 89%|████████▉ | 359/403 [00:02<00:00, 134.79it/s]\u001b[A\n",
      " 93%|█████████▎| 375/403 [00:02<00:00, 140.63it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 145.05it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n",
      "em: 0.25551037877166705\n",
      "f1: 0.7283773636544998\n",
      "Test Loss: 0.008161, Acc: 0.748023\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 31%|███▏      | 1495/4749 [00:33<00:52, 62.20it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 16/403 [00:00<00:02, 155.47it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.4801\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 31/403 [00:00<00:02, 153.47it/s]\u001b[A\n",
      " 12%|█▏        | 47/403 [00:00<00:02, 154.31it/s]\u001b[A\n",
      " 16%|█▌        | 63/403 [00:00<00:02, 154.85it/s]\u001b[A\n",
      " 20%|█▉        | 80/403 [00:00<00:02, 156.65it/s]\u001b[A\n",
      " 24%|██▍       | 97/403 [00:00<00:01, 158.69it/s]\u001b[A\n",
      " 28%|██▊       | 112/403 [00:00<00:02, 136.50it/s]\u001b[A\n",
      " 32%|███▏      | 128/403 [00:00<00:01, 142.14it/s]\u001b[A\n",
      " 36%|███▌      | 145/403 [00:00<00:01, 147.58it/s]\u001b[A\n",
      " 40%|████      | 162/403 [00:01<00:01, 151.82it/s]\u001b[A\n",
      " 44%|████▍     | 179/403 [00:01<00:01, 154.43it/s]\u001b[A\n",
      " 48%|████▊     | 195/403 [00:01<00:01, 155.96it/s]\u001b[A\n",
      " 53%|█████▎    | 212/403 [00:01<00:01, 158.30it/s]\u001b[A\n",
      " 57%|█████▋    | 228/403 [00:01<00:01, 158.55it/s]\u001b[A\n",
      " 61%|██████    | 244/403 [00:01<00:01, 147.49it/s]\u001b[A\n",
      " 65%|██████▍   | 261/403 [00:01<00:00, 151.75it/s]\u001b[A\n",
      " 69%|██████▉   | 278/403 [00:01<00:00, 155.03it/s]\u001b[A\n",
      " 73%|███████▎  | 295/403 [00:01<00:00, 159.02it/s]\u001b[A\n",
      " 77%|███████▋  | 312/403 [00:02<00:00, 161.42it/s]\u001b[A\n",
      " 82%|████████▏ | 329/403 [00:02<00:00, 160.43it/s]\u001b[A\n",
      " 86%|████████▌ | 346/403 [00:02<00:00, 161.02it/s]\u001b[A\n",
      " 90%|█████████ | 363/403 [00:02<00:00, 162.60it/s]\u001b[A\n",
      " 94%|█████████▍| 380/403 [00:02<00:00, 162.41it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 155.93it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 32%|███▏      | 1502/4749 [00:38<10:37,  5.09it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.28054782794778516\n",
      "f1: 0.7119251424087811\n",
      "Test Loss: 0.007912, Acc: 0.757832\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 42%|████▏     | 1994/4749 [00:46<00:45, 60.53it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  3%|▎         | 14/403 [00:00<00:02, 136.38it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.5142\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 28/403 [00:00<00:02, 136.77it/s]\u001b[A\n",
      " 11%|█         | 43/403 [00:00<00:02, 138.12it/s]\u001b[A\n",
      " 14%|█▍        | 58/403 [00:00<00:02, 141.03it/s]\u001b[A\n",
      " 18%|█▊        | 73/403 [00:00<00:02, 141.73it/s]\u001b[A\n",
      " 22%|██▏       | 88/403 [00:00<00:02, 141.90it/s]\u001b[A\n",
      " 26%|██▌       | 103/403 [00:00<00:02, 141.85it/s]\u001b[A\n",
      " 29%|██▉       | 118/403 [00:00<00:02, 142.18it/s]\u001b[A\n",
      " 33%|███▎      | 133/403 [00:00<00:01, 142.47it/s]\u001b[A\n",
      " 37%|███▋      | 148/403 [00:01<00:01, 142.55it/s]\u001b[A\n",
      " 40%|████      | 163/403 [00:01<00:01, 143.30it/s]\u001b[A\n",
      " 44%|████▍     | 178/403 [00:01<00:01, 143.74it/s]\u001b[A\n",
      " 48%|████▊     | 193/403 [00:01<00:01, 145.18it/s]\u001b[A\n",
      " 52%|█████▏    | 208/403 [00:01<00:01, 145.91it/s]\u001b[A\n",
      " 55%|█████▌    | 223/403 [00:01<00:01, 143.70it/s]\u001b[A\n",
      " 59%|█████▉    | 238/403 [00:01<00:01, 145.47it/s]\u001b[A\n",
      " 63%|██████▎   | 253/403 [00:01<00:01, 145.39it/s]\u001b[A\n",
      " 67%|██████▋   | 268/403 [00:01<00:00, 143.72it/s]\u001b[A\n",
      " 70%|███████   | 283/403 [00:01<00:00, 142.38it/s]\u001b[A\n",
      " 74%|███████▍  | 298/403 [00:02<00:00, 142.84it/s]\u001b[A\n",
      " 78%|███████▊  | 313/403 [00:02<00:00, 142.78it/s]\u001b[A\n",
      " 81%|████████▏ | 328/403 [00:02<00:00, 142.75it/s]\u001b[A\n",
      " 85%|████████▌ | 343/403 [00:02<00:00, 143.41it/s]\u001b[A\n",
      " 89%|████████▉ | 358/403 [00:02<00:00, 143.22it/s]\u001b[A\n",
      " 93%|█████████▎| 373/403 [00:02<00:00, 128.20it/s]\u001b[A\n",
      " 96%|█████████▋| 388/403 [00:02<00:00, 133.53it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 141.23it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 42%|████▏     | 2001/4749 [00:50<09:47,  4.67it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.26193023753477424\n",
      "f1: 0.7301966811275704\n",
      "Test Loss: 0.008212, Acc: 0.752210\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 53%|█████▎    | 2497/4749 [00:58<00:36, 61.77it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 15/403 [00:00<00:02, 144.36it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.5242\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 31/403 [00:00<00:02, 146.76it/s]\u001b[A\n",
      " 11%|█▏        | 46/403 [00:00<00:02, 147.59it/s]\u001b[A\n",
      " 15%|█▌        | 62/403 [00:00<00:02, 149.12it/s]\u001b[A\n",
      " 19%|█▉        | 78/403 [00:00<00:02, 149.93it/s]\u001b[A\n",
      " 23%|██▎       | 91/403 [00:00<00:02, 139.82it/s]\u001b[A\n",
      " 26%|██▋       | 106/403 [00:00<00:02, 141.69it/s]\u001b[A\n",
      " 30%|███       | 122/403 [00:00<00:01, 145.34it/s]\u001b[A\n",
      " 34%|███▍      | 138/403 [00:00<00:01, 147.38it/s]\u001b[A\n",
      " 38%|███▊      | 154/403 [00:01<00:01, 149.93it/s]\u001b[A\n",
      " 42%|████▏     | 170/403 [00:01<00:01, 151.87it/s]\u001b[A\n",
      " 46%|████▌     | 186/403 [00:01<00:01, 153.65it/s]\u001b[A\n",
      " 50%|█████     | 202/403 [00:01<00:01, 153.68it/s]\u001b[A\n",
      " 54%|█████▍    | 218/403 [00:01<00:01, 154.55it/s]\u001b[A\n",
      " 58%|█████▊    | 234/403 [00:01<00:01, 154.23it/s]\u001b[A\n",
      " 62%|██████▏   | 251/403 [00:01<00:00, 156.34it/s]\u001b[A\n",
      " 66%|██████▋   | 267/403 [00:01<00:00, 156.20it/s]\u001b[A\n",
      " 70%|███████   | 283/403 [00:01<00:00, 156.10it/s]\u001b[A\n",
      " 74%|███████▍  | 300/403 [00:01<00:00, 157.63it/s]\u001b[A\n",
      " 79%|███████▊  | 317/403 [00:02<00:00, 158.85it/s]\u001b[A\n",
      " 83%|████████▎ | 333/403 [00:02<00:00, 158.59it/s]\u001b[A\n",
      " 87%|████████▋ | 349/403 [00:02<00:00, 158.34it/s]\u001b[A\n",
      " 91%|█████████ | 366/403 [00:02<00:00, 159.59it/s]\u001b[A\n",
      " 95%|█████████▌| 383/403 [00:02<00:00, 161.61it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 154.24it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 53%|█████▎    | 2497/4749 [01:10<00:36, 61.77it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 53%|█████▎    | 2500/4749 [01:11<51:27,  1.37s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.26514016691632786\n",
      "f1: 0.7233596859426217\n",
      "Test Loss: 0.008029, Acc: 0.751939\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 63%|██████▎   | 2999/4749 [01:19<00:23, 74.05it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 16/403 [00:00<00:02, 154.10it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 3000, loss: 0.4949\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 31/403 [00:00<00:02, 150.73it/s]\u001b[A\n",
      " 12%|█▏        | 47/403 [00:00<00:02, 152.77it/s]\u001b[A\n",
      " 16%|█▌        | 64/403 [00:00<00:02, 155.37it/s]\u001b[A\n",
      " 20%|█▉        | 80/403 [00:00<00:02, 154.12it/s]\u001b[A\n",
      " 24%|██▍       | 96/403 [00:00<00:01, 154.33it/s]\u001b[A\n",
      " 27%|██▋       | 110/403 [00:00<00:02, 143.35it/s]\u001b[A\n",
      " 32%|███▏      | 127/403 [00:00<00:01, 148.17it/s]\u001b[A\n",
      " 35%|███▌      | 143/403 [00:00<00:01, 150.28it/s]\u001b[A\n",
      " 40%|███▉      | 160/403 [00:01<00:01, 154.24it/s]\u001b[A\n",
      " 44%|████▍     | 177/403 [00:01<00:01, 156.40it/s]\u001b[A\n",
      " 48%|████▊     | 193/403 [00:01<00:01, 156.72it/s]\u001b[A\n",
      " 52%|█████▏    | 210/403 [00:01<00:01, 157.75it/s]\u001b[A\n",
      " 56%|█████▋    | 227/403 [00:01<00:01, 159.28it/s]\u001b[A\n",
      " 60%|██████    | 243/403 [00:01<00:01, 157.96it/s]\u001b[A\n",
      " 64%|██████▍   | 259/403 [00:01<00:00, 157.38it/s]\u001b[A\n",
      " 68%|██████▊   | 276/403 [00:01<00:00, 159.89it/s]\u001b[A\n",
      " 73%|███████▎  | 293/403 [00:01<00:00, 161.24it/s]\u001b[A\n",
      " 77%|███████▋  | 310/403 [00:01<00:00, 161.35it/s]\u001b[A\n",
      " 81%|████████  | 327/403 [00:02<00:00, 161.54it/s]\u001b[A\n",
      " 85%|████████▌ | 344/403 [00:02<00:00, 162.21it/s]\u001b[A\n",
      " 90%|████████▉ | 361/403 [00:02<00:00, 162.22it/s]\u001b[A\n",
      " 94%|█████████▍| 378/403 [00:02<00:00, 164.00it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 157.55it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n",
      "em: 0.27562593622940296\n",
      "f1: 0.6942110876735393\n",
      "Test Loss: 0.007984, Acc: 0.756397\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 74%|███████▎  | 3492/4749 [01:31<00:19, 63.63it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 17/403 [00:00<00:02, 161.23it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 3500, loss: 0.3679\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 33/403 [00:00<00:02, 160.54it/s]\u001b[A\n",
      " 12%|█▏        | 49/403 [00:00<00:02, 158.77it/s]\u001b[A\n",
      " 16%|█▌        | 65/403 [00:00<00:02, 158.13it/s]\u001b[A\n",
      " 20%|██        | 82/403 [00:00<00:02, 159.12it/s]\u001b[A\n",
      " 25%|██▍       | 99/403 [00:00<00:01, 161.41it/s]\u001b[A\n",
      " 29%|██▊       | 115/403 [00:00<00:01, 160.47it/s]\u001b[A\n",
      " 33%|███▎      | 131/403 [00:00<00:01, 158.19it/s]\u001b[A\n",
      " 37%|███▋      | 148/403 [00:00<00:01, 159.51it/s]\u001b[A\n",
      " 41%|████      | 164/403 [00:01<00:01, 158.77it/s]\u001b[A\n",
      " 45%|████▍     | 181/403 [00:01<00:01, 160.15it/s]\u001b[A\n",
      " 49%|████▉     | 197/403 [00:01<00:01, 159.97it/s]\u001b[A\n",
      " 53%|█████▎    | 214/403 [00:01<00:01, 161.54it/s]\u001b[A\n",
      " 57%|█████▋    | 231/403 [00:01<00:01, 160.21it/s]\u001b[A\n",
      " 61%|██████▏   | 247/403 [00:01<00:00, 160.12it/s]\u001b[A\n",
      " 66%|██████▌   | 264/403 [00:01<00:00, 160.53it/s]\u001b[A\n",
      " 70%|██████▉   | 281/403 [00:01<00:00, 161.25it/s]\u001b[A\n",
      " 74%|███████▍  | 298/403 [00:01<00:00, 162.29it/s]\u001b[A\n",
      " 78%|███████▊  | 315/403 [00:01<00:00, 160.75it/s]\u001b[A\n",
      " 82%|████████▏ | 332/403 [00:02<00:00, 161.17it/s]\u001b[A\n",
      " 87%|████████▋ | 349/403 [00:02<00:00, 162.77it/s]\u001b[A\n",
      " 91%|█████████ | 366/403 [00:02<00:00, 162.69it/s]\u001b[A\n",
      " 95%|█████████▌| 383/403 [00:02<00:00, 162.08it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 160.67it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 74%|███████▎  | 3500/4749 [01:35<03:31,  5.90it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2726300021399529\n",
      "f1: 0.686246386706484\n",
      "Test Loss: 0.008050, Acc: 0.757212\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 84%|████████▍ | 3993/4749 [01:42<00:11, 63.31it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 16/403 [00:00<00:02, 159.27it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 4000, loss: 0.6238\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 32/403 [00:00<00:02, 157.90it/s]\u001b[A\n",
      " 12%|█▏        | 48/403 [00:00<00:02, 157.06it/s]\u001b[A\n",
      " 16%|█▌        | 64/403 [00:00<00:02, 157.90it/s]\u001b[A\n",
      " 20%|█▉        | 80/403 [00:00<00:02, 157.73it/s]\u001b[A\n",
      " 24%|██▍       | 96/403 [00:00<00:01, 156.33it/s]\u001b[A\n",
      " 28%|██▊       | 112/403 [00:00<00:01, 155.79it/s]\u001b[A\n",
      " 32%|███▏      | 128/403 [00:00<00:01, 155.87it/s]\u001b[A\n",
      " 36%|███▌      | 145/403 [00:00<00:01, 157.90it/s]\u001b[A\n",
      " 40%|███▉      | 161/403 [00:01<00:01, 158.28it/s]\u001b[A\n",
      " 44%|████▍     | 178/403 [00:01<00:01, 158.96it/s]\u001b[A\n",
      " 48%|████▊     | 195/403 [00:01<00:01, 159.55it/s]\u001b[A\n",
      " 53%|█████▎    | 212/403 [00:01<00:01, 160.50it/s]\u001b[A\n",
      " 57%|█████▋    | 228/403 [00:01<00:01, 158.37it/s]\u001b[A\n",
      " 61%|██████    | 245/403 [00:01<00:00, 159.20it/s]\u001b[A\n",
      " 65%|██████▍   | 261/403 [00:01<00:00, 159.35it/s]\u001b[A\n",
      " 69%|██████▉   | 278/403 [00:01<00:00, 160.28it/s]\u001b[A\n",
      " 73%|███████▎  | 294/403 [00:01<00:00, 159.60it/s]\u001b[A\n",
      " 77%|███████▋  | 310/403 [00:01<00:00, 159.07it/s]\u001b[A\n",
      " 81%|████████  | 327/403 [00:02<00:00, 160.34it/s]\u001b[A\n",
      " 85%|████████▌ | 344/403 [00:02<00:00, 162.23it/s]\u001b[A\n",
      " 90%|████████▉ | 361/403 [00:02<00:00, 161.64it/s]\u001b[A\n",
      " 94%|█████████▍| 378/403 [00:02<00:00, 161.88it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 159.52it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n",
      "em: 0.26578215279263856\n",
      "f1: 0.6907368910899918\n",
      "Test Loss: 0.008104, Acc: 0.754226\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 95%|█████████▍| 4495/4749 [01:55<00:04, 60.87it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 15/403 [00:00<00:02, 145.25it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 4500, loss: 0.5319\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 28/403 [00:00<00:02, 138.05it/s]\u001b[A\n",
      " 10%|█         | 42/403 [00:00<00:02, 136.29it/s]\u001b[A\n",
      " 14%|█▍        | 58/403 [00:00<00:02, 140.92it/s]\u001b[A\n",
      " 17%|█▋        | 69/403 [00:00<00:02, 116.79it/s]\u001b[A\n",
      " 21%|██        | 85/403 [00:00<00:02, 125.79it/s]\u001b[A\n",
      " 25%|██▌       | 101/403 [00:00<00:02, 133.39it/s]\u001b[A\n",
      " 29%|██▉       | 116/403 [00:00<00:02, 137.29it/s]\u001b[A\n",
      " 33%|███▎      | 132/403 [00:00<00:01, 140.04it/s]\u001b[A\n",
      " 37%|███▋      | 148/403 [00:01<00:01, 142.99it/s]\u001b[A\n",
      " 41%|████      | 164/403 [00:01<00:01, 145.56it/s]\u001b[A\n",
      " 45%|████▍     | 180/403 [00:01<00:01, 147.59it/s]\u001b[A\n",
      " 49%|████▊     | 196/403 [00:01<00:01, 149.96it/s]\u001b[A\n",
      " 53%|█████▎    | 212/403 [00:01<00:01, 151.03it/s]\u001b[A\n",
      " 57%|█████▋    | 228/403 [00:01<00:01, 149.44it/s]\u001b[A\n",
      " 61%|██████    | 244/403 [00:01<00:01, 150.84it/s]\u001b[A\n",
      " 65%|██████▍   | 260/403 [00:01<00:00, 152.01it/s]\u001b[A\n",
      " 68%|██████▊   | 276/403 [00:01<00:00, 152.81it/s]\u001b[A\n",
      " 72%|███████▏  | 292/403 [00:02<00:00, 150.50it/s]\u001b[A\n",
      " 76%|███████▋  | 308/403 [00:02<00:00, 150.35it/s]\u001b[A\n",
      " 80%|████████  | 324/403 [00:02<00:00, 150.47it/s]\u001b[A\n",
      " 84%|████████▍ | 340/403 [00:02<00:00, 151.51it/s]\u001b[A\n",
      " 88%|████████▊ | 356/403 [00:02<00:00, 150.96it/s]\u001b[A\n",
      " 92%|█████████▏| 372/403 [00:02<00:00, 151.43it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 146.63it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n",
      "em: 0.2636421998716028\n",
      "f1: 0.6793927628830321\n",
      "Test Loss: 0.008210, Acc: 0.754187\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 4749/4749 [02:04<00:00, 38.25it/s]\n",
      "  4%|▍         | 16/403 [00:00<00:02, 155.77it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 403/403 [00:02<00:00, 154.52it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/4749 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.265568157500535\n",
      "f1: 0.6720765016185586\n",
      "Test Loss: 0.008031, Acc: 0.754691\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 10%|█         | 493/4749 [00:08<01:08, 61.78it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 16/403 [00:00<00:02, 157.61it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.4291\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 32/403 [00:00<00:02, 157.43it/s]\u001b[A\n",
      " 12%|█▏        | 49/403 [00:00<00:02, 158.45it/s]\u001b[A\n",
      " 16%|█▌        | 65/403 [00:00<00:02, 158.80it/s]\u001b[A\n",
      " 20%|██        | 81/403 [00:00<00:02, 158.76it/s]\u001b[A\n",
      " 24%|██▍       | 96/403 [00:00<00:02, 149.31it/s]\u001b[A\n",
      " 28%|██▊       | 111/403 [00:00<00:01, 148.24it/s]\u001b[A\n",
      " 32%|███▏      | 127/403 [00:00<00:01, 151.58it/s]\u001b[A\n",
      " 35%|███▌      | 143/403 [00:00<00:01, 153.40it/s]\u001b[A\n",
      " 39%|███▉      | 159/403 [00:01<00:01, 154.88it/s]\u001b[A\n",
      " 44%|████▎     | 176/403 [00:01<00:01, 158.27it/s]\u001b[A\n",
      " 48%|████▊     | 192/403 [00:01<00:01, 157.90it/s]\u001b[A\n",
      " 52%|█████▏    | 209/403 [00:01<00:01, 160.38it/s]\u001b[A\n",
      " 56%|█████▌    | 225/403 [00:01<00:01, 158.82it/s]\u001b[A\n",
      " 60%|█████▉    | 241/403 [00:01<00:01, 157.95it/s]\u001b[A\n",
      " 64%|██████▍   | 258/403 [00:01<00:00, 159.29it/s]\u001b[A\n",
      " 68%|██████▊   | 274/403 [00:01<00:00, 159.28it/s]\u001b[A\n",
      " 72%|███████▏  | 290/403 [00:01<00:00, 157.97it/s]\u001b[A\n",
      " 76%|███████▌  | 306/403 [00:01<00:00, 158.26it/s]\u001b[A\n",
      " 80%|████████  | 323/403 [00:02<00:00, 158.93it/s]\u001b[A\n",
      " 84%|████████▍ | 340/403 [00:02<00:00, 159.88it/s]\u001b[A\n",
      " 89%|████████▊ | 357/403 [00:02<00:00, 160.34it/s]\u001b[A\n",
      " 93%|█████████▎| 374/403 [00:02<00:00, 160.26it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 157.94it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 11%|█         | 500/4749 [00:12<13:59,  5.06it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.27134603038733146\n",
      "f1: 0.7105107971896014\n",
      "Test Loss: 0.007976, Acc: 0.756552\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 21%|██        | 995/4749 [00:20<01:00, 62.51it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 17/403 [00:00<00:02, 167.26it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.5017\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 33/403 [00:00<00:02, 164.93it/s]\u001b[A\n",
      " 12%|█▏        | 50/403 [00:00<00:02, 164.65it/s]\u001b[A\n",
      " 17%|█▋        | 67/403 [00:00<00:02, 165.19it/s]\u001b[A\n",
      " 21%|██        | 84/403 [00:00<00:01, 165.43it/s]\u001b[A\n",
      " 25%|██▌       | 101/403 [00:00<00:01, 165.65it/s]\u001b[A\n",
      " 29%|██▉       | 118/403 [00:00<00:01, 165.89it/s]\u001b[A\n",
      " 33%|███▎      | 134/403 [00:00<00:01, 163.33it/s]\u001b[A\n",
      " 37%|███▋      | 151/403 [00:00<00:01, 163.88it/s]\u001b[A\n",
      " 42%|████▏     | 168/403 [00:01<00:01, 164.20it/s]\u001b[A\n",
      " 46%|████▌     | 185/403 [00:01<00:01, 164.63it/s]\u001b[A\n",
      " 50%|█████     | 202/403 [00:01<00:01, 164.44it/s]\u001b[A\n",
      " 54%|█████▍    | 219/403 [00:01<00:01, 163.87it/s]\u001b[A\n",
      " 59%|█████▊    | 236/403 [00:01<00:01, 163.57it/s]\u001b[A\n",
      " 63%|██████▎   | 253/403 [00:01<00:00, 163.43it/s]\u001b[A\n",
      " 67%|██████▋   | 270/403 [00:01<00:00, 145.28it/s]\u001b[A\n",
      " 71%|███████   | 287/403 [00:01<00:00, 150.89it/s]\u001b[A\n",
      " 75%|███████▌  | 304/403 [00:01<00:00, 154.93it/s]\u001b[A\n",
      " 80%|███████▉  | 321/403 [00:01<00:00, 158.19it/s]\u001b[A\n",
      " 84%|████████▍ | 338/403 [00:02<00:00, 158.98it/s]\u001b[A\n",
      " 88%|████████▊ | 355/403 [00:02<00:00, 159.74it/s]\u001b[A\n",
      " 92%|█████████▏| 372/403 [00:02<00:00, 162.48it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 161.55it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 21%|██        | 1002/4749 [00:24<11:58,  5.22it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.25144446822169914\n",
      "f1: 0.7282601757564421\n",
      "Test Loss: 0.008185, Acc: 0.746627\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 32%|███▏      | 1497/4749 [00:31<00:51, 63.51it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 17/403 [00:00<00:02, 164.71it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.4776\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 34/403 [00:00<00:02, 163.80it/s]\u001b[A\n",
      " 13%|█▎        | 51/403 [00:00<00:02, 164.75it/s]\u001b[A\n",
      " 17%|█▋        | 68/403 [00:00<00:02, 165.01it/s]\u001b[A\n",
      " 21%|██        | 85/403 [00:00<00:01, 164.95it/s]\u001b[A\n",
      " 25%|██▌       | 102/403 [00:00<00:01, 165.49it/s]\u001b[A\n",
      " 30%|██▉       | 119/403 [00:00<00:01, 166.56it/s]\u001b[A\n",
      " 34%|███▎      | 136/403 [00:00<00:01, 165.64it/s]\u001b[A\n",
      " 38%|███▊      | 153/403 [00:00<00:01, 165.12it/s]\u001b[A\n",
      " 42%|████▏     | 169/403 [00:01<00:01, 146.83it/s]\u001b[A\n",
      " 46%|████▌     | 186/403 [00:01<00:01, 152.49it/s]\u001b[A\n",
      " 50%|█████     | 203/403 [00:01<00:01, 155.43it/s]\u001b[A\n",
      " 55%|█████▍    | 220/403 [00:01<00:01, 158.33it/s]\u001b[A\n",
      " 59%|█████▉    | 237/403 [00:01<00:01, 160.38it/s]\u001b[A\n",
      " 63%|██████▎   | 254/403 [00:01<00:00, 161.91it/s]\u001b[A\n",
      " 67%|██████▋   | 271/403 [00:01<00:00, 163.78it/s]\u001b[A\n",
      " 71%|███████▏  | 288/403 [00:01<00:00, 164.59it/s]\u001b[A\n",
      " 76%|███████▌  | 305/403 [00:01<00:00, 164.88it/s]\u001b[A\n",
      " 80%|███████▉  | 322/403 [00:01<00:00, 166.13it/s]\u001b[A\n",
      " 84%|████████▍ | 339/403 [00:02<00:00, 164.84it/s]\u001b[A\n",
      " 88%|████████▊ | 356/403 [00:02<00:00, 164.77it/s]\u001b[A\n",
      " 93%|█████████▎| 373/403 [00:02<00:00, 164.41it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 162.94it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n",
      "em: 0.27156002567943505\n",
      "f1: 0.6873041348645966\n",
      "Test Loss: 0.007990, Acc: 0.757987\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 42%|████▏     | 1994/4749 [00:43<00:43, 62.86it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 16/403 [00:00<00:02, 159.78it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.5066\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 32/403 [00:00<00:02, 159.58it/s]\u001b[A\n",
      " 12%|█▏        | 48/403 [00:00<00:02, 159.67it/s]\u001b[A\n",
      " 16%|█▌        | 64/403 [00:00<00:02, 159.53it/s]\u001b[A\n",
      " 20%|██        | 81/403 [00:00<00:01, 161.09it/s]\u001b[A\n",
      " 24%|██▍       | 98/403 [00:00<00:01, 161.38it/s]\u001b[A\n",
      " 29%|██▊       | 115/403 [00:00<00:01, 162.01it/s]\u001b[A\n",
      " 33%|███▎      | 131/403 [00:00<00:01, 161.19it/s]\u001b[A\n",
      " 37%|███▋      | 148/403 [00:00<00:01, 162.40it/s]\u001b[A\n",
      " 41%|████      | 164/403 [00:01<00:01, 139.48it/s]\u001b[A\n",
      " 45%|████▍     | 181/403 [00:01<00:01, 145.38it/s]\u001b[A\n",
      " 49%|████▉     | 197/403 [00:01<00:01, 149.42it/s]\u001b[A\n",
      " 53%|█████▎    | 214/403 [00:01<00:01, 154.55it/s]\u001b[A\n",
      " 57%|█████▋    | 231/403 [00:01<00:01, 158.15it/s]\u001b[A\n",
      " 62%|██████▏   | 248/403 [00:01<00:00, 161.04it/s]\u001b[A\n",
      " 66%|██████▌   | 265/403 [00:01<00:00, 161.98it/s]\u001b[A\n",
      " 70%|███████   | 283/403 [00:01<00:00, 164.62it/s]\u001b[A\n",
      " 74%|███████▍  | 300/403 [00:01<00:00, 164.98it/s]\u001b[A\n",
      " 79%|███████▊  | 317/403 [00:01<00:00, 165.10it/s]\u001b[A\n",
      " 83%|████████▎ | 334/403 [00:02<00:00, 166.37it/s]\u001b[A\n",
      " 87%|████████▋ | 351/403 [00:02<00:00, 167.17it/s]\u001b[A\n",
      " 91%|█████████▏| 368/403 [00:02<00:00, 165.95it/s]\u001b[A\n",
      " 96%|█████████▌| 385/403 [00:02<00:00, 164.69it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 160.12it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 42%|████▏     | 2001/4749 [00:47<09:01,  5.07it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2630002139952921\n",
      "f1: 0.7267064889363307\n",
      "Test Loss: 0.008130, Acc: 0.752326\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 53%|█████▎    | 2497/4749 [00:55<00:36, 61.48it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 17/403 [00:00<00:02, 160.26it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.5293\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 33/403 [00:00<00:02, 159.95it/s]\u001b[A\n",
      " 12%|█▏        | 50/403 [00:00<00:02, 160.62it/s]\u001b[A\n",
      " 16%|█▋        | 66/403 [00:00<00:02, 158.50it/s]\u001b[A\n",
      " 21%|██        | 83/403 [00:00<00:02, 159.20it/s]\u001b[A\n",
      " 25%|██▍       | 100/403 [00:00<00:01, 160.05it/s]\u001b[A\n",
      " 29%|██▊       | 115/403 [00:00<00:01, 146.96it/s]\u001b[A\n",
      " 33%|███▎      | 131/403 [00:00<00:01, 148.19it/s]\u001b[A\n",
      " 36%|███▋      | 147/403 [00:00<00:01, 151.42it/s]\u001b[A\n",
      " 40%|████      | 163/403 [00:01<00:01, 151.54it/s]\u001b[A\n",
      " 44%|████▍     | 179/403 [00:01<00:01, 153.61it/s]\u001b[A\n",
      " 48%|████▊     | 195/403 [00:01<00:01, 143.90it/s]\u001b[A\n",
      " 52%|█████▏    | 210/403 [00:01<00:01, 145.63it/s]\u001b[A\n",
      " 56%|█████▌    | 226/403 [00:01<00:01, 149.35it/s]\u001b[A\n",
      " 60%|██████    | 242/403 [00:01<00:01, 151.90it/s]\u001b[A\n",
      " 64%|██████▍   | 258/403 [00:01<00:00, 151.96it/s]\u001b[A\n",
      " 68%|██████▊   | 274/403 [00:01<00:00, 153.51it/s]\u001b[A\n",
      " 72%|███████▏  | 290/403 [00:01<00:00, 154.34it/s]\u001b[A\n",
      " 76%|███████▌  | 306/403 [00:02<00:00, 155.36it/s]\u001b[A\n",
      " 80%|███████▉  | 322/403 [00:02<00:00, 156.13it/s]\u001b[A\n",
      " 84%|████████▍ | 339/403 [00:02<00:00, 157.81it/s]\u001b[A\n",
      " 88%|████████▊ | 356/403 [00:02<00:00, 159.42it/s]\u001b[A\n",
      " 92%|█████████▏| 372/403 [00:02<00:00, 158.56it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 154.51it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n",
      "em: 0.26021827519794566\n",
      "f1: 0.7269115600380436\n",
      "Test Loss: 0.008069, Acc: 0.749574\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 63%|██████▎   | 2999/4749 [01:08<00:24, 72.65it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 17/403 [00:00<00:02, 163.50it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 3000, loss: 0.4913\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 34/403 [00:00<00:02, 163.84it/s]\u001b[A\n",
      " 12%|█▏        | 50/403 [00:00<00:02, 162.40it/s]\u001b[A\n",
      " 17%|█▋        | 68/403 [00:00<00:02, 164.92it/s]\u001b[A\n",
      " 21%|██        | 85/403 [00:00<00:01, 165.49it/s]\u001b[A\n",
      " 25%|██▌       | 101/403 [00:00<00:01, 162.10it/s]\u001b[A\n",
      " 29%|██▉       | 118/403 [00:00<00:01, 163.41it/s]\u001b[A\n",
      " 33%|███▎      | 135/403 [00:00<00:01, 163.79it/s]\u001b[A\n",
      " 38%|███▊      | 152/403 [00:00<00:01, 165.09it/s]\u001b[A\n",
      " 42%|████▏     | 169/403 [00:01<00:01, 166.33it/s]\u001b[A\n",
      " 46%|████▌     | 186/403 [00:01<00:01, 165.63it/s]\u001b[A\n",
      " 50%|█████     | 203/403 [00:01<00:01, 166.21it/s]\u001b[A\n",
      " 55%|█████▍    | 221/403 [00:01<00:01, 167.79it/s]\u001b[A\n",
      " 59%|█████▉    | 238/403 [00:01<00:00, 167.31it/s]\u001b[A\n",
      " 63%|██████▎   | 255/403 [00:01<00:00, 167.28it/s]\u001b[A\n",
      " 67%|██████▋   | 272/403 [00:01<00:00, 167.43it/s]\u001b[A\n",
      " 72%|███████▏  | 289/403 [00:01<00:00, 166.52it/s]\u001b[A\n",
      " 76%|███████▌  | 307/403 [00:01<00:00, 167.74it/s]\u001b[A\n",
      " 80%|████████  | 324/403 [00:01<00:00, 167.55it/s]\u001b[A\n",
      " 85%|████████▍ | 341/403 [00:02<00:00, 166.97it/s]\u001b[A\n",
      " 89%|████████▉ | 358/403 [00:02<00:00, 167.78it/s]\u001b[A\n",
      " 93%|█████████▎| 375/403 [00:02<00:00, 166.66it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 166.13it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n",
      "em: 0.2775518938583351\n",
      "f1: 0.6982747903355732\n",
      "Test Loss: 0.007961, Acc: 0.757560\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 74%|███████▎  | 3495/4749 [01:20<00:22, 56.28it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  3%|▎         | 13/403 [00:00<00:03, 125.21it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 3500, loss: 0.3665\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  6%|▋         | 26/403 [00:00<00:02, 126.57it/s]\u001b[A\n",
      " 10%|█         | 42/403 [00:00<00:02, 134.45it/s]\u001b[A\n",
      " 15%|█▍        | 59/403 [00:00<00:02, 142.16it/s]\u001b[A\n",
      " 19%|█▉        | 76/403 [00:00<00:02, 148.29it/s]\u001b[A\n",
      " 23%|██▎       | 93/403 [00:00<00:02, 153.03it/s]\u001b[A\n",
      " 27%|██▋       | 108/403 [00:00<00:02, 127.09it/s]\u001b[A\n",
      " 30%|███       | 122/403 [00:00<00:02, 129.12it/s]\u001b[A\n",
      " 34%|███▍      | 139/403 [00:00<00:01, 138.29it/s]\u001b[A\n",
      " 39%|███▊      | 156/403 [00:01<00:01, 145.19it/s]\u001b[A\n",
      " 43%|████▎     | 173/403 [00:01<00:01, 150.97it/s]\u001b[A\n",
      " 47%|████▋     | 190/403 [00:01<00:01, 154.10it/s]\u001b[A\n",
      " 51%|█████     | 206/403 [00:01<00:01, 154.35it/s]\u001b[A\n",
      " 55%|█████▌    | 223/403 [00:01<00:01, 157.32it/s]\u001b[A\n",
      " 60%|█████▉    | 240/403 [00:01<00:01, 159.26it/s]\u001b[A\n",
      " 64%|██████▍   | 257/403 [00:01<00:00, 160.11it/s]\u001b[A\n",
      " 68%|██████▊   | 274/403 [00:01<00:00, 161.68it/s]\u001b[A\n",
      " 72%|███████▏  | 291/403 [00:01<00:00, 163.16it/s]\u001b[A\n",
      " 76%|███████▋  | 308/403 [00:02<00:00, 162.29it/s]\u001b[A\n",
      " 81%|████████  | 325/403 [00:02<00:00, 162.83it/s]\u001b[A\n",
      " 85%|████████▍ | 342/403 [00:02<00:00, 164.38it/s]\u001b[A\n",
      " 89%|████████▉ | 359/403 [00:02<00:00, 165.36it/s]\u001b[A\n",
      " 93%|█████████▎| 376/403 [00:02<00:00, 164.73it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 155.23it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 74%|███████▎  | 3501/4749 [01:24<04:44,  4.38it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.27541194093729937\n",
      "f1: 0.7030559484978581\n",
      "Test Loss: 0.007982, Acc: 0.757444\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 84%|████████▍ | 3993/4749 [01:32<00:11, 63.89it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 16/403 [00:00<00:02, 157.95it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 4000, loss: 0.5726\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 32/403 [00:00<00:02, 158.44it/s]\u001b[A\n",
      " 12%|█▏        | 48/403 [00:00<00:02, 158.68it/s]\u001b[A\n",
      " 16%|█▌        | 65/403 [00:00<00:02, 160.43it/s]\u001b[A\n",
      " 20%|██        | 82/403 [00:00<00:01, 160.88it/s]\u001b[A\n",
      " 24%|██▍       | 98/403 [00:00<00:01, 160.42it/s]\u001b[A\n",
      " 29%|██▊       | 115/403 [00:00<00:01, 161.92it/s]\u001b[A\n",
      " 33%|███▎      | 132/403 [00:00<00:01, 162.86it/s]\u001b[A\n",
      " 37%|███▋      | 149/403 [00:00<00:01, 164.39it/s]\u001b[A\n",
      " 41%|████      | 166/403 [00:01<00:01, 164.31it/s]\u001b[A\n",
      " 45%|████▌     | 182/403 [00:01<00:01, 162.97it/s]\u001b[A\n",
      " 49%|████▉     | 199/403 [00:01<00:01, 162.22it/s]\u001b[A\n",
      " 54%|█████▎    | 216/403 [00:01<00:01, 163.63it/s]\u001b[A\n",
      " 58%|█████▊    | 233/403 [00:01<00:01, 164.74it/s]\u001b[A\n",
      " 62%|██████▏   | 250/403 [00:01<00:00, 165.79it/s]\u001b[A\n",
      " 66%|██████▋   | 267/403 [00:01<00:00, 164.73it/s]\u001b[A\n",
      " 70%|███████   | 284/403 [00:01<00:00, 165.79it/s]\u001b[A\n",
      " 75%|███████▍  | 301/403 [00:01<00:00, 163.65it/s]\u001b[A\n",
      " 79%|███████▉  | 318/403 [00:01<00:00, 163.88it/s]\u001b[A\n",
      " 83%|████████▎ | 335/403 [00:02<00:00, 153.39it/s]\u001b[A\n",
      " 87%|████████▋ | 352/403 [00:02<00:00, 156.93it/s]\u001b[A\n",
      " 91%|█████████▏| 368/403 [00:02<00:00, 140.47it/s]\u001b[A\n",
      " 96%|█████████▌| 385/403 [00:02<00:00, 146.96it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 159.17it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 84%|████████▍ | 4000/4749 [01:36<02:23,  5.21it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.27091803980312434\n",
      "f1: 0.6925172979527864\n",
      "Test Loss: 0.007957, Acc: 0.756048\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 95%|█████████▍| 4494/4749 [01:44<00:04, 63.50it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 16/403 [00:00<00:02, 158.06it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 4500, loss: 0.5356\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 32/403 [00:00<00:02, 157.94it/s]\u001b[A\n",
      " 12%|█▏        | 49/403 [00:00<00:02, 159.99it/s]\u001b[A\n",
      " 16%|█▋        | 66/403 [00:00<00:02, 160.63it/s]\u001b[A\n",
      " 21%|██        | 83/403 [00:00<00:01, 161.93it/s]\u001b[A\n",
      " 25%|██▍       | 100/403 [00:00<00:01, 162.92it/s]\u001b[A\n",
      " 29%|██▉       | 117/403 [00:00<00:01, 163.22it/s]\u001b[A\n",
      " 33%|███▎      | 134/403 [00:00<00:01, 164.44it/s]\u001b[A\n",
      " 37%|███▋      | 151/403 [00:00<00:01, 164.74it/s]\u001b[A\n",
      " 42%|████▏     | 168/403 [00:01<00:01, 164.42it/s]\u001b[A\n",
      " 46%|████▌     | 185/403 [00:01<00:01, 165.23it/s]\u001b[A\n",
      " 50%|█████     | 202/403 [00:01<00:01, 165.13it/s]\u001b[A\n",
      " 54%|█████▍    | 219/403 [00:01<00:01, 165.61it/s]\u001b[A\n",
      " 59%|█████▊    | 236/403 [00:01<00:01, 166.31it/s]\u001b[A\n",
      " 63%|██████▎   | 253/403 [00:01<00:00, 166.15it/s]\u001b[A\n",
      " 67%|██████▋   | 270/403 [00:01<00:00, 166.10it/s]\u001b[A\n",
      " 71%|███████   | 287/403 [00:01<00:00, 166.07it/s]\u001b[A\n",
      " 75%|███████▌  | 304/403 [00:01<00:00, 166.72it/s]\u001b[A\n",
      " 80%|███████▉  | 322/403 [00:01<00:00, 167.92it/s]\u001b[A\n",
      " 84%|████████▍ | 339/403 [00:02<00:00, 167.87it/s]\u001b[A\n",
      " 88%|████████▊ | 356/403 [00:02<00:00, 167.90it/s]\u001b[A\n",
      " 93%|█████████▎| 373/403 [00:02<00:00, 167.52it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 165.44it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 95%|█████████▍| 4501/4749 [01:48<00:47,  5.19it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2743419644767815\n",
      "f1: 0.7195939400732985\n",
      "Test Loss: 0.008012, Acc: 0.758065\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 4749/4749 [01:52<00:00, 42.13it/s]\n",
      "  4%|▎         | 15/403 [00:00<00:02, 149.62it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 403/403 [00:02<00:00, 149.87it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/4749 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2781938797346458\n",
      "f1: 0.7168468642896306\n",
      "Test Loss: 0.007910, Acc: 0.758801\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 10%|█         | 494/4749 [00:07<01:06, 63.96it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 17/403 [00:00<00:02, 160.21it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.4237\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 33/403 [00:00<00:02, 158.67it/s]\u001b[A\n",
      " 12%|█▏        | 49/403 [00:00<00:02, 158.18it/s]\u001b[A\n",
      " 16%|█▌        | 65/403 [00:00<00:02, 158.63it/s]\u001b[A\n",
      " 20%|█▉        | 80/403 [00:00<00:02, 155.42it/s]\u001b[A\n",
      " 24%|██▍       | 96/403 [00:00<00:01, 156.06it/s]\u001b[A\n",
      " 28%|██▊       | 112/403 [00:00<00:01, 157.15it/s]\u001b[A\n",
      " 32%|███▏      | 128/403 [00:00<00:01, 157.84it/s]\u001b[A\n",
      " 36%|███▌      | 144/403 [00:00<00:01, 157.87it/s]\u001b[A\n",
      " 40%|███▉      | 160/403 [00:01<00:01, 157.86it/s]\u001b[A\n",
      " 44%|████▍     | 177/403 [00:01<00:01, 159.02it/s]\u001b[A\n",
      " 48%|████▊     | 194/403 [00:01<00:01, 160.13it/s]\u001b[A\n",
      " 52%|█████▏    | 210/403 [00:01<00:01, 159.50it/s]\u001b[A\n",
      " 56%|█████▋    | 227/403 [00:01<00:01, 160.63it/s]\u001b[A\n",
      " 61%|██████    | 244/403 [00:01<00:00, 161.02it/s]\u001b[A\n",
      " 65%|██████▍   | 261/403 [00:01<00:00, 161.27it/s]\u001b[A\n",
      " 69%|██████▉   | 278/403 [00:01<00:00, 161.84it/s]\u001b[A\n",
      " 73%|███████▎  | 295/403 [00:01<00:00, 161.45it/s]\u001b[A\n",
      " 77%|███████▋  | 312/403 [00:01<00:00, 160.99it/s]\u001b[A\n",
      " 82%|████████▏ | 329/403 [00:02<00:00, 161.87it/s]\u001b[A\n",
      " 86%|████████▌ | 346/403 [00:02<00:00, 162.74it/s]\u001b[A\n",
      " 90%|█████████ | 363/403 [00:02<00:00, 163.61it/s]\u001b[A\n",
      " 94%|█████████▍| 380/403 [00:02<00:00, 164.49it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 160.50it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 11%|█         | 500/4749 [00:20<43:43,  1.62it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.27113203509522793\n",
      "f1: 0.7027517077164094\n",
      "Test Loss: 0.008014, Acc: 0.756940\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 21%|██        | 996/4749 [00:27<01:01, 61.12it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 17/403 [00:00<00:02, 169.95it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.497\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 34/403 [00:00<00:02, 169.13it/s]\u001b[A\n",
      " 12%|█▏        | 50/403 [00:00<00:02, 164.92it/s]\u001b[A\n",
      " 17%|█▋        | 67/403 [00:00<00:02, 166.25it/s]\u001b[A\n",
      " 21%|██        | 84/403 [00:00<00:01, 167.28it/s]\u001b[A\n",
      " 25%|██▌       | 101/403 [00:00<00:01, 167.43it/s]\u001b[A\n",
      " 29%|██▉       | 118/403 [00:00<00:01, 167.75it/s]\u001b[A\n",
      " 33%|███▎      | 135/403 [00:00<00:01, 167.61it/s]\u001b[A\n",
      " 38%|███▊      | 152/403 [00:00<00:01, 167.64it/s]\u001b[A\n",
      " 42%|████▏     | 170/403 [00:01<00:01, 168.62it/s]\u001b[A\n",
      " 46%|████▋     | 187/403 [00:01<00:01, 166.96it/s]\u001b[A\n",
      " 51%|█████     | 204/403 [00:01<00:01, 167.64it/s]\u001b[A\n",
      " 55%|█████▍    | 221/403 [00:01<00:01, 167.26it/s]\u001b[A\n",
      " 59%|█████▉    | 239/403 [00:01<00:00, 168.20it/s]\u001b[A\n",
      " 64%|██████▎   | 256/403 [00:01<00:00, 168.73it/s]\u001b[A\n",
      " 68%|██████▊   | 273/403 [00:01<00:00, 168.68it/s]\u001b[A\n",
      " 72%|███████▏  | 290/403 [00:01<00:00, 166.63it/s]\u001b[A\n",
      " 76%|███████▌  | 307/403 [00:01<00:00, 166.68it/s]\u001b[A\n",
      " 80%|████████  | 324/403 [00:01<00:00, 167.29it/s]\u001b[A\n",
      " 85%|████████▍ | 341/403 [00:02<00:00, 167.54it/s]\u001b[A\n",
      " 89%|████████▉ | 358/403 [00:02<00:00, 167.44it/s]\u001b[A\n",
      " 93%|█████████▎| 375/403 [00:02<00:00, 167.35it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 166.95it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 21%|██        | 1003/4749 [00:31<11:50,  5.27it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2490905200085598\n",
      "f1: 0.7296744746562956\n",
      "Test Loss: 0.008213, Acc: 0.746433\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 32%|███▏      | 1496/4749 [00:39<00:52, 61.89it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 17/403 [00:00<00:02, 168.04it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.4791\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 34/403 [00:00<00:02, 167.73it/s]\u001b[A\n",
      " 13%|█▎        | 51/403 [00:00<00:02, 167.65it/s]\u001b[A\n",
      " 17%|█▋        | 68/403 [00:00<00:02, 166.87it/s]\u001b[A\n",
      " 20%|██        | 81/403 [00:00<00:02, 138.10it/s]\u001b[A\n",
      " 24%|██▍       | 97/403 [00:00<00:02, 143.64it/s]\u001b[A\n",
      " 29%|██▊       | 115/403 [00:00<00:01, 151.82it/s]\u001b[A\n",
      " 33%|███▎      | 133/403 [00:00<00:01, 158.10it/s]\u001b[A\n",
      " 37%|███▋      | 151/403 [00:00<00:01, 162.74it/s]\u001b[A\n",
      " 42%|████▏     | 169/403 [00:01<00:01, 165.97it/s]\u001b[A\n",
      " 46%|████▌     | 186/403 [00:01<00:01, 166.35it/s]\u001b[A\n",
      " 51%|█████     | 204/403 [00:01<00:01, 168.36it/s]\u001b[A\n",
      " 55%|█████▍    | 221/403 [00:01<00:01, 168.50it/s]\u001b[A\n",
      " 59%|█████▉    | 239/403 [00:01<00:00, 170.07it/s]\u001b[A\n",
      " 64%|██████▎   | 256/403 [00:01<00:00, 169.82it/s]\u001b[A\n",
      " 68%|██████▊   | 274/403 [00:01<00:00, 171.23it/s]\u001b[A\n",
      " 72%|███████▏  | 292/403 [00:01<00:00, 171.35it/s]\u001b[A\n",
      " 77%|███████▋  | 310/403 [00:01<00:00, 170.26it/s]\u001b[A\n",
      " 81%|████████▏ | 328/403 [00:02<00:00, 163.14it/s]\u001b[A\n",
      " 86%|████████▌ | 345/403 [00:02<00:00, 162.20it/s]\u001b[A\n",
      " 90%|████████▉ | 362/403 [00:02<00:00, 162.73it/s]\u001b[A\n",
      " 94%|█████████▍| 379/403 [00:02<00:00, 161.65it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 164.08it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n",
      "em: 0.27733789856623153\n",
      "f1: 0.6909121634244761\n",
      "Test Loss: 0.007973, Acc: 0.757367\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 42%|████▏     | 1997/4749 [00:51<00:43, 63.92it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 16/403 [00:00<00:02, 152.37it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.5122\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 33/403 [00:00<00:02, 154.84it/s]\u001b[A\n",
      " 12%|█▏        | 49/403 [00:00<00:02, 155.44it/s]\u001b[A\n",
      " 16%|█▋        | 66/403 [00:00<00:02, 158.21it/s]\u001b[A\n",
      " 21%|██        | 83/403 [00:00<00:01, 161.32it/s]\u001b[A\n",
      " 25%|██▍       | 100/403 [00:00<00:01, 162.91it/s]\u001b[A\n",
      " 29%|██▉       | 117/403 [00:00<00:01, 164.50it/s]\u001b[A\n",
      " 33%|███▎      | 134/403 [00:00<00:01, 163.36it/s]\u001b[A\n",
      " 37%|███▋      | 151/403 [00:00<00:01, 164.83it/s]\u001b[A\n",
      " 42%|████▏     | 169/403 [00:01<00:01, 166.90it/s]\u001b[A\n",
      " 46%|████▋     | 187/403 [00:01<00:01, 168.39it/s]\u001b[A\n",
      " 51%|█████     | 205/403 [00:01<00:01, 170.07it/s]\u001b[A\n",
      " 55%|█████▌    | 223/403 [00:01<00:01, 171.97it/s]\u001b[A\n",
      " 60%|█████▉    | 241/403 [00:01<00:00, 170.61it/s]\u001b[A\n",
      " 64%|██████▍   | 258/403 [00:01<00:00, 148.75it/s]\u001b[A\n",
      " 68%|██████▊   | 276/403 [00:01<00:00, 155.54it/s]\u001b[A\n",
      " 73%|███████▎  | 294/403 [00:01<00:00, 160.34it/s]\u001b[A\n",
      " 77%|███████▋  | 311/403 [00:01<00:00, 162.59it/s]\u001b[A\n",
      " 82%|████████▏ | 329/403 [00:02<00:00, 166.23it/s]\u001b[A\n",
      " 86%|████████▌ | 346/403 [00:02<00:00, 166.99it/s]\u001b[A\n",
      " 90%|█████████ | 364/403 [00:02<00:00, 168.63it/s]\u001b[A\n",
      " 95%|█████████▍| 381/403 [00:02<00:00, 168.77it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 164.88it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 42%|████▏     | 2004/4749 [00:55<08:36,  5.31it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2704900492189172\n",
      "f1: 0.7256651686996324\n",
      "Test Loss: 0.008127, Acc: 0.755467\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 52%|█████▏    | 2492/4749 [01:02<00:29, 75.54it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 17/403 [00:00<00:02, 160.64it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.5249\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 34/403 [00:00<00:02, 160.71it/s]\u001b[A\n",
      " 13%|█▎        | 51/403 [00:00<00:02, 161.52it/s]\u001b[A\n",
      " 17%|█▋        | 68/403 [00:00<00:02, 162.28it/s]\u001b[A\n",
      " 21%|██        | 85/403 [00:00<00:01, 162.48it/s]\u001b[A\n",
      " 25%|██▌       | 102/403 [00:00<00:01, 162.57it/s]\u001b[A\n",
      " 30%|██▉       | 119/403 [00:00<00:01, 162.22it/s]\u001b[A\n",
      " 34%|███▎      | 136/403 [00:00<00:01, 163.69it/s]\u001b[A\n",
      " 38%|███▊      | 153/403 [00:00<00:01, 164.70it/s]\u001b[A\n",
      " 42%|████▏     | 169/403 [00:01<00:01, 162.89it/s]\u001b[A\n",
      " 46%|████▌     | 186/403 [00:01<00:01, 164.62it/s]\u001b[A\n",
      " 50%|█████     | 203/403 [00:01<00:01, 163.77it/s]\u001b[A\n",
      " 55%|█████▍    | 220/403 [00:01<00:01, 163.84it/s]\u001b[A\n",
      " 59%|█████▉    | 237/403 [00:01<00:01, 164.71it/s]\u001b[A\n",
      " 63%|██████▎   | 254/403 [00:01<00:00, 164.90it/s]\u001b[A\n",
      " 67%|██████▋   | 271/403 [00:01<00:00, 151.76it/s]\u001b[A\n",
      " 71%|███████▏  | 288/403 [00:01<00:00, 155.58it/s]\u001b[A\n",
      " 76%|███████▌  | 305/403 [00:01<00:00, 159.37it/s]\u001b[A\n",
      " 80%|███████▉  | 322/403 [00:01<00:00, 162.26it/s]\u001b[A\n",
      " 84%|████████▍ | 339/403 [00:02<00:00, 162.79it/s]\u001b[A\n",
      " 88%|████████▊ | 356/403 [00:02<00:00, 164.83it/s]\u001b[A\n",
      " 93%|█████████▎| 373/403 [00:02<00:00, 165.36it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 162.91it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 53%|█████▎    | 2500/4749 [01:07<06:26,  5.83it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2597902846137385\n",
      "f1: 0.7253774021589257\n",
      "Test Loss: 0.008048, Acc: 0.750078\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 63%|██████▎   | 2995/4749 [01:15<00:28, 61.38it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  3%|▎         | 13/403 [00:00<00:03, 123.65it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 3000, loss: 0.4905\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 28/403 [00:00<00:02, 129.40it/s]\u001b[A\n",
      " 11%|█         | 44/403 [00:00<00:02, 135.55it/s]\u001b[A\n",
      " 15%|█▍        | 59/403 [00:00<00:02, 139.00it/s]\u001b[A\n",
      " 19%|█▊        | 75/403 [00:00<00:02, 142.50it/s]\u001b[A\n",
      " 22%|██▏       | 90/403 [00:00<00:02, 144.63it/s]\u001b[A\n",
      " 26%|██▋       | 106/403 [00:00<00:02, 146.89it/s]\u001b[A\n",
      " 30%|███       | 122/403 [00:00<00:01, 149.21it/s]\u001b[A\n",
      " 34%|███▍      | 137/403 [00:00<00:01, 149.07it/s]\u001b[A\n",
      " 38%|███▊      | 152/403 [00:01<00:01, 149.28it/s]\u001b[A\n",
      " 42%|████▏     | 168/403 [00:01<00:01, 151.03it/s]\u001b[A\n",
      " 45%|████▌     | 183/403 [00:01<00:01, 150.45it/s]\u001b[A\n",
      " 49%|████▉     | 199/403 [00:01<00:01, 151.37it/s]\u001b[A\n",
      " 53%|█████▎    | 215/403 [00:01<00:01, 152.60it/s]\u001b[A\n",
      " 57%|█████▋    | 231/403 [00:01<00:01, 152.83it/s]\u001b[A\n",
      " 61%|██████▏   | 247/403 [00:01<00:01, 153.49it/s]\u001b[A\n",
      " 65%|██████▌   | 263/403 [00:01<00:00, 153.86it/s]\u001b[A\n",
      " 69%|██████▉   | 279/403 [00:01<00:00, 155.09it/s]\u001b[A\n",
      " 73%|███████▎  | 295/403 [00:01<00:00, 154.79it/s]\u001b[A\n",
      " 77%|███████▋  | 311/403 [00:02<00:00, 155.29it/s]\u001b[A\n",
      " 81%|████████  | 327/403 [00:02<00:00, 156.55it/s]\u001b[A\n",
      " 85%|████████▌ | 343/403 [00:02<00:00, 156.80it/s]\u001b[A\n",
      " 89%|████████▉ | 359/403 [00:02<00:00, 155.16it/s]\u001b[A\n",
      " 93%|█████████▎| 375/403 [00:02<00:00, 156.44it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 152.20it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n",
      "em: 0.2760539268136101\n",
      "f1: 0.6996504743562392\n",
      "Test Loss: 0.007950, Acc: 0.758181\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 74%|███████▎  | 3496/4749 [01:27<00:22, 54.74it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 16/403 [00:00<00:02, 154.37it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 3500, loss: 0.377\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 32/403 [00:00<00:02, 153.84it/s]\u001b[A\n",
      " 12%|█▏        | 47/403 [00:00<00:02, 152.47it/s]\u001b[A\n",
      " 16%|█▌        | 64/403 [00:00<00:02, 155.10it/s]\u001b[A\n",
      " 20%|█▉        | 80/403 [00:00<00:02, 156.34it/s]\u001b[A\n",
      " 24%|██▍       | 97/403 [00:00<00:01, 158.61it/s]\u001b[A\n",
      " 28%|██▊       | 114/403 [00:00<00:01, 159.50it/s]\u001b[A\n",
      " 32%|███▏      | 130/403 [00:00<00:01, 159.39it/s]\u001b[A\n",
      " 36%|███▋      | 147/403 [00:00<00:01, 160.01it/s]\u001b[A\n",
      " 41%|████      | 164/403 [00:01<00:01, 160.50it/s]\u001b[A\n",
      " 45%|████▍     | 180/403 [00:01<00:01, 159.92it/s]\u001b[A\n",
      " 49%|████▉     | 197/403 [00:01<00:01, 161.00it/s]\u001b[A\n",
      " 53%|█████▎    | 214/403 [00:01<00:01, 162.57it/s]\u001b[A\n",
      " 57%|█████▋    | 231/403 [00:01<00:01, 162.36it/s]\u001b[A\n",
      " 62%|██████▏   | 248/403 [00:01<00:00, 161.65it/s]\u001b[A\n",
      " 66%|██████▌   | 265/403 [00:01<00:00, 161.92it/s]\u001b[A\n",
      " 70%|██████▉   | 282/403 [00:01<00:00, 160.75it/s]\u001b[A\n",
      " 74%|███████▍  | 299/403 [00:01<00:00, 160.21it/s]\u001b[A\n",
      " 78%|███████▊  | 316/403 [00:01<00:00, 161.66it/s]\u001b[A\n",
      " 83%|████████▎ | 333/403 [00:02<00:00, 161.89it/s]\u001b[A\n",
      " 87%|████████▋ | 350/403 [00:02<00:00, 162.15it/s]\u001b[A\n",
      " 91%|█████████ | 367/403 [00:02<00:00, 162.75it/s]\u001b[A\n",
      " 95%|█████████▌| 384/403 [00:02<00:00, 163.64it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 160.29it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 74%|███████▎  | 3502/4749 [01:32<04:40,  4.45it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.27177402097153863\n",
      "f1: 0.6868878630701949\n",
      "Test Loss: 0.008050, Acc: 0.756591\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 84%|████████▍ | 3994/4749 [01:40<00:11, 63.14it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 16/403 [00:00<00:02, 158.01it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 4000, loss: 0.5776\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 31/403 [00:00<00:02, 144.11it/s]\u001b[A\n",
      " 12%|█▏        | 48/403 [00:00<00:02, 148.93it/s]\u001b[A\n",
      " 16%|█▌        | 64/403 [00:00<00:02, 149.70it/s]\u001b[A\n",
      " 20%|█▉        | 80/403 [00:00<00:02, 152.36it/s]\u001b[A\n",
      " 24%|██▍       | 96/403 [00:00<00:02, 153.37it/s]\u001b[A\n",
      " 28%|██▊       | 112/403 [00:00<00:01, 155.01it/s]\u001b[A\n",
      " 32%|███▏      | 127/403 [00:00<00:02, 136.75it/s]\u001b[A\n",
      " 35%|███▌      | 143/403 [00:00<00:01, 142.60it/s]\u001b[A\n",
      " 40%|███▉      | 160/403 [00:01<00:01, 147.70it/s]\u001b[A\n",
      " 44%|████▎     | 176/403 [00:01<00:01, 149.59it/s]\u001b[A\n",
      " 48%|████▊     | 193/403 [00:01<00:01, 153.26it/s]\u001b[A\n",
      " 52%|█████▏    | 210/403 [00:01<00:01, 155.77it/s]\u001b[A\n",
      " 56%|█████▋    | 227/403 [00:01<00:01, 157.45it/s]\u001b[A\n",
      " 61%|██████    | 244/403 [00:01<00:01, 158.75it/s]\u001b[A\n",
      " 65%|██████▍   | 261/403 [00:01<00:00, 159.94it/s]\u001b[A\n",
      " 69%|██████▊   | 277/403 [00:01<00:00, 157.57it/s]\u001b[A\n",
      " 73%|███████▎  | 294/403 [00:01<00:00, 158.97it/s]\u001b[A\n",
      " 77%|███████▋  | 310/403 [00:02<00:00, 158.81it/s]\u001b[A\n",
      " 81%|████████  | 327/403 [00:02<00:00, 160.26it/s]\u001b[A\n",
      " 85%|████████▌ | 344/403 [00:02<00:00, 161.11it/s]\u001b[A\n",
      " 90%|████████▉ | 361/403 [00:02<00:00, 161.10it/s]\u001b[A\n",
      " 94%|█████████▍| 378/403 [00:02<00:00, 160.83it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 154.77it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 84%|████████▍ | 4001/4749 [01:44<02:27,  5.08it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2719880162636422\n",
      "f1: 0.6939053801133911\n",
      "Test Loss: 0.007985, Acc: 0.757871\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 95%|█████████▍| 4499/4749 [01:53<00:04, 59.85it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 16/403 [00:00<00:02, 153.72it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 4500, loss: 0.5346\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 31/403 [00:00<00:02, 151.96it/s]\u001b[A\n",
      " 12%|█▏        | 47/403 [00:00<00:02, 153.19it/s]\u001b[A\n",
      " 16%|█▌        | 63/403 [00:00<00:02, 152.71it/s]\u001b[A\n",
      " 20%|█▉        | 79/403 [00:00<00:02, 152.96it/s]\u001b[A\n",
      " 24%|██▎       | 95/403 [00:00<00:02, 152.69it/s]\u001b[A\n",
      " 28%|██▊       | 111/403 [00:00<00:01, 153.89it/s]\u001b[A\n",
      " 32%|███▏      | 127/403 [00:00<00:01, 153.25it/s]\u001b[A\n",
      " 35%|███▌      | 143/403 [00:00<00:01, 154.04it/s]\u001b[A\n",
      " 39%|███▉      | 159/403 [00:01<00:01, 152.85it/s]\u001b[A\n",
      " 43%|████▎     | 175/403 [00:01<00:01, 154.67it/s]\u001b[A\n",
      " 48%|████▊     | 192/403 [00:01<00:01, 156.28it/s]\u001b[A\n",
      " 52%|█████▏    | 208/403 [00:01<00:01, 155.40it/s]\u001b[A\n",
      " 56%|█████▌    | 224/403 [00:01<00:01, 156.49it/s]\u001b[A\n",
      " 60%|█████▉    | 240/403 [00:01<00:01, 156.37it/s]\u001b[A\n",
      " 64%|██████▎   | 256/403 [00:01<00:01, 145.13it/s]\u001b[A\n",
      " 67%|██████▋   | 272/403 [00:01<00:00, 149.13it/s]\u001b[A\n",
      " 71%|███████▏  | 288/403 [00:01<00:00, 152.20it/s]\u001b[A\n",
      " 75%|███████▌  | 304/403 [00:01<00:00, 152.74it/s]\u001b[A\n",
      " 79%|███████▉  | 320/403 [00:02<00:00, 134.53it/s]\u001b[A\n",
      " 84%|████████▎ | 337/403 [00:02<00:00, 141.46it/s]\u001b[A\n",
      " 88%|████████▊ | 353/403 [00:02<00:00, 146.38it/s]\u001b[A\n",
      " 92%|█████████▏| 370/403 [00:02<00:00, 151.11it/s]\u001b[A\n",
      " 96%|█████████▌| 386/403 [00:02<00:00, 152.24it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 151.50it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 95%|█████████▍| 4505/4749 [01:57<00:56,  4.29it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2719880162636422\n",
      "f1: 0.7131323475962977\n",
      "Test Loss: 0.008030, Acc: 0.756475\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 4749/4749 [02:01<00:00, 39.11it/s]\n",
      "  4%|▍         | 16/403 [00:00<00:02, 156.36it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 403/403 [00:02<00:00, 155.58it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n",
      "em: 0.2694200727583993\n",
      "f1: 0.6755335446112313\n",
      "Test Loss: 0.008013, Acc: 0.755855\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 10%|█         | 495/4749 [00:08<01:08, 62.28it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 16/403 [00:00<00:02, 157.12it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.4231\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 31/403 [00:00<00:02, 154.70it/s]\u001b[A\n",
      " 12%|█▏        | 47/403 [00:00<00:02, 154.61it/s]\u001b[A\n",
      " 15%|█▌        | 62/403 [00:00<00:02, 152.74it/s]\u001b[A\n",
      " 19%|█▉        | 78/403 [00:00<00:02, 152.61it/s]\u001b[A\n",
      " 23%|██▎       | 94/403 [00:00<00:02, 152.22it/s]\u001b[A\n",
      " 27%|██▋       | 110/403 [00:00<00:01, 153.66it/s]\u001b[A\n",
      " 31%|███▏      | 126/403 [00:00<00:01, 154.15it/s]\u001b[A\n",
      " 35%|███▌      | 142/403 [00:00<00:01, 154.17it/s]\u001b[A\n",
      " 39%|███▉      | 158/403 [00:01<00:01, 155.60it/s]\u001b[A\n",
      " 43%|████▎     | 174/403 [00:01<00:01, 155.20it/s]\u001b[A\n",
      " 47%|████▋     | 190/403 [00:01<00:01, 156.55it/s]\u001b[A\n",
      " 51%|█████     | 206/403 [00:01<00:01, 155.70it/s]\u001b[A\n",
      " 55%|█████▌    | 222/403 [00:01<00:01, 156.67it/s]\u001b[A\n",
      " 59%|█████▉    | 238/403 [00:01<00:01, 156.32it/s]\u001b[A\n",
      " 63%|██████▎   | 254/403 [00:01<00:00, 155.83it/s]\u001b[A\n",
      " 67%|██████▋   | 270/403 [00:01<00:00, 154.76it/s]\u001b[A\n",
      " 71%|███████   | 286/403 [00:01<00:00, 134.68it/s]\u001b[A\n",
      " 75%|███████▌  | 303/403 [00:02<00:00, 141.79it/s]\u001b[A\n",
      " 79%|███████▉  | 319/403 [00:02<00:00, 145.63it/s]\u001b[A\n",
      " 83%|████████▎ | 335/403 [00:02<00:00, 148.94it/s]\u001b[A\n",
      " 87%|████████▋ | 352/403 [00:02<00:00, 152.63it/s]\u001b[A\n",
      " 92%|█████████▏| 369/403 [00:02<00:00, 155.53it/s]\u001b[A\n",
      " 96%|█████████▌| 385/403 [00:02<00:00, 156.59it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 152.80it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 11%|█         | 502/4749 [00:12<14:05,  5.03it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.268564091589985\n",
      "f1: 0.7090535911528961\n",
      "Test Loss: 0.008010, Acc: 0.755738\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 21%|██        | 996/4749 [00:20<00:59, 62.92it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 17/403 [00:00<00:02, 163.40it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.5031\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 33/403 [00:00<00:02, 162.27it/s]\u001b[A\n",
      " 12%|█▏        | 49/403 [00:00<00:02, 161.53it/s]\u001b[A\n",
      " 16%|█▋        | 66/403 [00:00<00:02, 161.97it/s]\u001b[A\n",
      " 21%|██        | 83/403 [00:00<00:01, 163.43it/s]\u001b[A\n",
      " 25%|██▍       | 100/403 [00:00<00:01, 163.76it/s]\u001b[A\n",
      " 29%|██▉       | 117/403 [00:00<00:01, 163.30it/s]\u001b[A\n",
      " 33%|███▎      | 134/403 [00:00<00:01, 164.11it/s]\u001b[A\n",
      " 37%|███▋      | 151/403 [00:00<00:01, 165.58it/s]\u001b[A\n",
      " 42%|████▏     | 169/403 [00:01<00:01, 167.16it/s]\u001b[A\n",
      " 46%|████▌     | 186/403 [00:01<00:01, 166.21it/s]\u001b[A\n",
      " 51%|█████     | 204/403 [00:01<00:01, 169.14it/s]\u001b[A\n",
      " 55%|█████▌    | 222/403 [00:01<00:01, 169.86it/s]\u001b[A\n",
      " 60%|█████▉    | 240/403 [00:01<00:00, 170.91it/s]\u001b[A\n",
      " 64%|██████▍   | 257/403 [00:01<00:00, 168.36it/s]\u001b[A\n",
      " 68%|██████▊   | 275/403 [00:01<00:00, 169.12it/s]\u001b[A\n",
      " 72%|███████▏  | 292/403 [00:01<00:00, 169.02it/s]\u001b[A\n",
      " 77%|███████▋  | 309/403 [00:01<00:00, 167.92it/s]\u001b[A\n",
      " 81%|████████  | 326/403 [00:01<00:00, 164.97it/s]\u001b[A\n",
      " 85%|████████▌ | 343/403 [00:02<00:00, 165.84it/s]\u001b[A\n",
      " 89%|████████▉ | 360/403 [00:02<00:00, 165.62it/s]\u001b[A\n",
      " 94%|█████████▎| 377/403 [00:02<00:00, 166.85it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 166.15it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n",
      "em: 0.2523004493901134\n",
      "f1: 0.7283955232061486\n",
      "Test Loss: 0.008177, Acc: 0.746937\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 32%|███▏      | 1496/4749 [00:32<00:54, 59.58it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 15/403 [00:00<00:02, 143.50it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.4818\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 30/403 [00:00<00:02, 142.78it/s]\u001b[A\n",
      " 11%|█         | 45/403 [00:00<00:02, 144.64it/s]\u001b[A\n",
      " 15%|█▍        | 60/403 [00:00<00:02, 146.09it/s]\u001b[A\n",
      " 19%|█▊        | 75/403 [00:00<00:02, 145.86it/s]\u001b[A\n",
      " 22%|██▏       | 90/403 [00:00<00:02, 145.60it/s]\u001b[A\n",
      " 26%|██▌       | 103/403 [00:00<00:02, 137.86it/s]\u001b[A\n",
      " 29%|██▉       | 116/403 [00:00<00:02, 131.68it/s]\u001b[A\n",
      " 32%|███▏      | 130/403 [00:00<00:02, 133.78it/s]\u001b[A\n",
      " 36%|███▌      | 145/403 [00:01<00:01, 137.11it/s]\u001b[A\n",
      " 40%|███▉      | 160/403 [00:01<00:01, 139.89it/s]\u001b[A\n",
      " 44%|████▎     | 176/403 [00:01<00:01, 143.53it/s]\u001b[A\n",
      " 48%|████▊     | 192/403 [00:01<00:01, 145.97it/s]\u001b[A\n",
      " 51%|█████▏    | 207/403 [00:01<00:01, 146.67it/s]\u001b[A\n",
      " 55%|█████▌    | 222/403 [00:01<00:01, 147.22it/s]\u001b[A\n",
      " 59%|█████▉    | 238/403 [00:01<00:01, 148.88it/s]\u001b[A\n",
      " 63%|██████▎   | 253/403 [00:01<00:01, 148.63it/s]\u001b[A\n",
      " 67%|██████▋   | 269/403 [00:01<00:00, 149.24it/s]\u001b[A\n",
      " 70%|███████   | 284/403 [00:01<00:00, 147.72it/s]\u001b[A\n",
      " 74%|███████▍  | 300/403 [00:02<00:00, 148.56it/s]\u001b[A\n",
      " 78%|███████▊  | 316/403 [00:02<00:00, 149.93it/s]\u001b[A\n",
      " 82%|████████▏ | 331/403 [00:02<00:00, 149.41it/s]\u001b[A\n",
      " 86%|████████▌ | 346/403 [00:02<00:00, 149.18it/s]\u001b[A\n",
      " 90%|████████▉ | 362/403 [00:02<00:00, 149.86it/s]\u001b[A\n",
      " 94%|█████████▎| 377/403 [00:02<00:00, 149.75it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 145.75it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 32%|███▏      | 1502/4749 [00:37<12:52,  4.20it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2818317997004066\n",
      "f1: 0.696560789948343\n",
      "Test Loss: 0.007956, Acc: 0.758801\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 42%|████▏     | 1997/4749 [00:45<00:44, 61.31it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 16/403 [00:00<00:02, 154.00it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.5185\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 32/403 [00:00<00:02, 153.40it/s]\u001b[A\n",
      " 12%|█▏        | 48/403 [00:00<00:02, 154.69it/s]\u001b[A\n",
      " 16%|█▌        | 65/403 [00:00<00:02, 156.77it/s]\u001b[A\n",
      " 20%|█▉        | 80/403 [00:00<00:02, 154.11it/s]\u001b[A\n",
      " 24%|██▍       | 96/403 [00:00<00:01, 155.13it/s]\u001b[A\n",
      " 28%|██▊       | 112/403 [00:00<00:01, 156.04it/s]\u001b[A\n",
      " 32%|███▏      | 129/403 [00:00<00:01, 157.61it/s]\u001b[A\n",
      " 36%|███▌      | 145/403 [00:00<00:01, 158.15it/s]\u001b[A\n",
      " 40%|███▉      | 161/403 [00:01<00:01, 157.42it/s]\u001b[A\n",
      " 44%|████▍     | 178/403 [00:01<00:01, 158.39it/s]\u001b[A\n",
      " 48%|████▊     | 195/403 [00:01<00:01, 159.64it/s]\u001b[A\n",
      " 53%|█████▎    | 212/403 [00:01<00:01, 161.24it/s]\u001b[A\n",
      " 57%|█████▋    | 228/403 [00:01<00:01, 159.11it/s]\u001b[A\n",
      " 61%|██████    | 245/403 [00:01<00:00, 159.59it/s]\u001b[A\n",
      " 65%|██████▍   | 261/403 [00:01<00:00, 159.22it/s]\u001b[A\n",
      " 69%|██████▊   | 277/403 [00:01<00:00, 158.00it/s]\u001b[A\n",
      " 73%|███████▎  | 293/403 [00:01<00:00, 156.63it/s]\u001b[A\n",
      " 77%|███████▋  | 309/403 [00:01<00:00, 157.46it/s]\u001b[A\n",
      " 81%|████████  | 325/403 [00:02<00:00, 158.04it/s]\u001b[A\n",
      " 85%|████████▍ | 341/403 [00:02<00:00, 158.52it/s]\u001b[A\n",
      " 89%|████████▊ | 357/403 [00:02<00:00, 148.09it/s]\u001b[A\n",
      " 93%|█████████▎| 374/403 [00:02<00:00, 152.14it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 156.97it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n",
      "em: 0.27626792210571366\n",
      "f1: 0.7153948151380297\n",
      "Test Loss: 0.008057, Acc: 0.757289\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 52%|█████▏    | 2493/4749 [00:57<00:38, 58.79it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 16/403 [00:00<00:02, 152.96it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.5241\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 32/403 [00:00<00:02, 152.32it/s]\u001b[A\n",
      " 12%|█▏        | 48/403 [00:00<00:02, 152.70it/s]\u001b[A\n",
      " 16%|█▌        | 64/403 [00:00<00:02, 153.11it/s]\u001b[A\n",
      " 20%|█▉        | 80/403 [00:00<00:02, 152.82it/s]\u001b[A\n",
      " 24%|██▍       | 96/403 [00:00<00:02, 153.07it/s]\u001b[A\n",
      " 27%|██▋       | 110/403 [00:00<00:02, 136.16it/s]\u001b[A\n",
      " 31%|███▏      | 126/403 [00:00<00:01, 141.87it/s]\u001b[A\n",
      " 35%|███▌      | 142/403 [00:00<00:01, 146.36it/s]\u001b[A\n",
      " 39%|███▉      | 158/403 [00:01<00:01, 147.75it/s]\u001b[A\n",
      " 43%|████▎     | 174/403 [00:01<00:01, 148.73it/s]\u001b[A\n",
      " 47%|████▋     | 189/403 [00:01<00:01, 149.07it/s]\u001b[A\n",
      " 51%|█████     | 205/403 [00:01<00:01, 150.15it/s]\u001b[A\n",
      " 55%|█████▍    | 221/403 [00:01<00:01, 151.07it/s]\u001b[A\n",
      " 59%|█████▉    | 237/403 [00:01<00:01, 151.17it/s]\u001b[A\n",
      " 63%|██████▎   | 253/403 [00:01<00:00, 151.98it/s]\u001b[A\n",
      " 67%|██████▋   | 269/403 [00:01<00:00, 152.84it/s]\u001b[A\n",
      " 71%|███████   | 285/403 [00:01<00:00, 151.92it/s]\u001b[A\n",
      " 75%|███████▍  | 301/403 [00:02<00:00, 152.99it/s]\u001b[A\n",
      " 79%|███████▊  | 317/403 [00:02<00:00, 153.71it/s]\u001b[A\n",
      " 83%|████████▎ | 333/403 [00:02<00:00, 153.94it/s]\u001b[A\n",
      " 87%|████████▋ | 349/403 [00:02<00:00, 153.42it/s]\u001b[A\n",
      " 91%|█████████ | 365/403 [00:02<00:00, 151.44it/s]\u001b[A\n",
      " 95%|█████████▍| 381/403 [00:02<00:00, 152.47it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 150.86it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 53%|█████▎    | 2500/4749 [01:02<07:28,  5.01it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.26193023753477424\n",
      "f1: 0.7242985478273424\n",
      "Test Loss: 0.008031, Acc: 0.751435\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 63%|██████▎   | 2996/4749 [01:10<00:28, 60.99it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 17/403 [00:00<00:02, 165.41it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 3000, loss: 0.49\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 34/403 [00:00<00:02, 166.18it/s]\u001b[A\n",
      " 13%|█▎        | 51/403 [00:00<00:02, 165.29it/s]\u001b[A\n",
      " 17%|█▋        | 68/403 [00:00<00:02, 164.08it/s]\u001b[A\n",
      " 21%|██        | 84/403 [00:00<00:01, 162.34it/s]\u001b[A\n",
      " 25%|██▌       | 101/403 [00:00<00:01, 162.92it/s]\u001b[A\n",
      " 29%|██▉       | 118/403 [00:00<00:01, 163.58it/s]\u001b[A\n",
      " 33%|███▎      | 135/403 [00:00<00:01, 163.19it/s]\u001b[A\n",
      " 38%|███▊      | 152/403 [00:00<00:01, 164.14it/s]\u001b[A\n",
      " 42%|████▏     | 169/403 [00:01<00:01, 165.31it/s]\u001b[A\n",
      " 46%|████▌     | 186/403 [00:01<00:01, 164.27it/s]\u001b[A\n",
      " 50%|█████     | 203/403 [00:01<00:01, 163.48it/s]\u001b[A\n",
      " 55%|█████▍    | 220/403 [00:01<00:01, 163.16it/s]\u001b[A\n",
      " 59%|█████▉    | 237/403 [00:01<00:01, 163.69it/s]\u001b[A\n",
      " 63%|██████▎   | 254/403 [00:01<00:00, 164.85it/s]\u001b[A\n",
      " 67%|██████▋   | 271/403 [00:01<00:00, 164.61it/s]\u001b[A\n",
      " 71%|███████▏  | 288/403 [00:01<00:00, 164.53it/s]\u001b[A\n",
      " 76%|███████▌  | 305/403 [00:01<00:00, 162.42it/s]\u001b[A\n",
      " 80%|███████▉  | 322/403 [00:01<00:00, 162.69it/s]\u001b[A\n",
      " 84%|████████▍ | 339/403 [00:02<00:00, 163.45it/s]\u001b[A\n",
      " 88%|████████▊ | 356/403 [00:02<00:00, 164.10it/s]\u001b[A\n",
      " 93%|█████████▎| 373/403 [00:02<00:00, 163.47it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 163.27it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 63%|██████▎   | 3003/4749 [01:14<05:40,  5.12it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.27498395035309225\n",
      "f1: 0.6921117259229894\n",
      "Test Loss: 0.007983, Acc: 0.757328\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 74%|███████▎  | 3499/4749 [01:22<00:17, 69.95it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 17/403 [00:00<00:02, 160.83it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 3500, loss: 0.3728\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 33/403 [00:00<00:02, 159.52it/s]\u001b[A\n",
      " 12%|█▏        | 49/403 [00:00<00:02, 159.52it/s]\u001b[A\n",
      " 16%|█▌        | 65/403 [00:00<00:02, 159.33it/s]\u001b[A\n",
      " 20%|██        | 81/403 [00:00<00:02, 158.02it/s]\u001b[A\n",
      " 24%|██▍       | 97/403 [00:00<00:01, 157.41it/s]\u001b[A\n",
      " 28%|██▊       | 113/403 [00:00<00:01, 157.48it/s]\u001b[A\n",
      " 32%|███▏      | 129/403 [00:00<00:01, 157.17it/s]\u001b[A\n",
      " 36%|███▌      | 146/403 [00:00<00:01, 158.41it/s]\u001b[A\n",
      " 40%|████      | 163/403 [00:01<00:01, 159.04it/s]\u001b[A\n",
      " 44%|████▍     | 179/403 [00:01<00:01, 147.23it/s]\u001b[A\n",
      " 48%|████▊     | 195/403 [00:01<00:01, 150.56it/s]\u001b[A\n",
      " 53%|█████▎    | 212/403 [00:01<00:01, 153.60it/s]\u001b[A\n",
      " 57%|█████▋    | 229/403 [00:01<00:01, 156.40it/s]\u001b[A\n",
      " 61%|██████    | 246/403 [00:01<00:00, 158.35it/s]\u001b[A\n",
      " 65%|██████▌   | 263/403 [00:01<00:00, 159.81it/s]\u001b[A\n",
      " 69%|██████▉   | 280/403 [00:01<00:00, 160.23it/s]\u001b[A\n",
      " 74%|███████▎  | 297/403 [00:01<00:00, 160.84it/s]\u001b[A\n",
      " 78%|███████▊  | 314/403 [00:01<00:00, 161.23it/s]\u001b[A\n",
      " 82%|████████▏ | 331/403 [00:02<00:00, 160.92it/s]\u001b[A\n",
      " 86%|████████▋ | 348/403 [00:02<00:00, 161.14it/s]\u001b[A\n",
      " 91%|█████████ | 365/403 [00:02<00:00, 161.15it/s]\u001b[A\n",
      " 95%|█████████▍| 382/403 [00:02<00:00, 160.91it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 158.22it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 74%|███████▎  | 3499/4749 [01:33<00:17, 69.95it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 74%|███████▎  | 3500/4749 [01:34<1:14:53,  3.60s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2769099079820244\n",
      "f1: 0.7015065206805078\n",
      "Test Loss: 0.007995, Acc: 0.757871\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 84%|████████▍ | 3992/4749 [01:40<00:09, 76.47it/s]  \n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 17/403 [00:00<00:02, 161.55it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 4000, loss: 0.5782\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 34/403 [00:00<00:02, 162.09it/s]\u001b[A\n",
      " 13%|█▎        | 51/403 [00:00<00:02, 163.17it/s]\u001b[A\n",
      " 17%|█▋        | 68/403 [00:00<00:02, 163.33it/s]\u001b[A\n",
      " 20%|██        | 81/403 [00:00<00:02, 138.22it/s]\u001b[A\n",
      " 24%|██▍       | 97/403 [00:00<00:02, 144.04it/s]\u001b[A\n",
      " 28%|██▊       | 114/403 [00:00<00:01, 149.98it/s]\u001b[A\n",
      " 33%|███▎      | 131/403 [00:00<00:01, 154.36it/s]\u001b[A\n",
      " 37%|███▋      | 148/403 [00:00<00:01, 157.80it/s]\u001b[A\n",
      " 41%|████      | 165/403 [00:01<00:01, 160.17it/s]\u001b[A\n",
      " 45%|████▌     | 182/403 [00:01<00:01, 160.97it/s]\u001b[A\n",
      " 49%|████▉     | 199/403 [00:01<00:01, 163.12it/s]\u001b[A\n",
      " 54%|█████▍    | 217/403 [00:01<00:01, 165.43it/s]\u001b[A\n",
      " 58%|█████▊    | 234/403 [00:01<00:01, 163.27it/s]\u001b[A\n",
      " 62%|██████▏   | 251/403 [00:01<00:00, 164.00it/s]\u001b[A\n",
      " 67%|██████▋   | 268/403 [00:01<00:00, 164.77it/s]\u001b[A\n",
      " 71%|███████   | 285/403 [00:01<00:00, 165.69it/s]\u001b[A\n",
      " 75%|███████▍  | 302/403 [00:01<00:00, 165.59it/s]\u001b[A\n",
      " 79%|███████▉  | 319/403 [00:01<00:00, 165.95it/s]\u001b[A\n",
      " 83%|████████▎ | 336/403 [00:02<00:00, 166.11it/s]\u001b[A\n",
      " 88%|████████▊ | 353/403 [00:02<00:00, 164.26it/s]\u001b[A\n",
      " 92%|█████████▏| 371/403 [00:02<00:00, 166.45it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 162.00it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 84%|████████▍ | 4000/4749 [01:45<02:04,  6.01it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2736999786004708\n",
      "f1: 0.6953936664187117\n",
      "Test Loss: 0.007945, Acc: 0.758258\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 95%|█████████▍| 4493/4749 [01:51<00:03, 77.15it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 17/403 [00:00<00:02, 169.36it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 4500, loss: 0.5478\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 34/403 [00:00<00:02, 168.53it/s]\u001b[A\n",
      " 12%|█▏        | 50/403 [00:00<00:02, 165.01it/s]\u001b[A\n",
      " 17%|█▋        | 67/403 [00:00<00:02, 165.38it/s]\u001b[A\n",
      " 21%|██        | 84/403 [00:00<00:01, 166.72it/s]\u001b[A\n",
      " 25%|██▌       | 102/403 [00:00<00:01, 167.70it/s]\u001b[A\n",
      " 30%|██▉       | 120/403 [00:00<00:01, 168.72it/s]\u001b[A\n",
      " 34%|███▍      | 138/403 [00:00<00:01, 170.27it/s]\u001b[A\n",
      " 38%|███▊      | 155/403 [00:00<00:01, 155.47it/s]\u001b[A\n",
      " 43%|████▎     | 173/403 [00:01<00:01, 160.13it/s]\u001b[A\n",
      " 47%|████▋     | 190/403 [00:01<00:01, 162.36it/s]\u001b[A\n",
      " 52%|█████▏    | 208/403 [00:01<00:01, 165.78it/s]\u001b[A\n",
      " 56%|█████▌    | 226/403 [00:01<00:01, 167.92it/s]\u001b[A\n",
      " 61%|██████    | 244/403 [00:01<00:00, 169.76it/s]\u001b[A\n",
      " 65%|██████▌   | 262/403 [00:01<00:00, 171.56it/s]\u001b[A\n",
      " 69%|██████▉   | 280/403 [00:01<00:00, 173.04it/s]\u001b[A\n",
      " 74%|███████▍  | 298/403 [00:01<00:00, 171.11it/s]\u001b[A\n",
      " 78%|███████▊  | 316/403 [00:01<00:00, 172.83it/s]\u001b[A\n",
      " 83%|████████▎ | 334/403 [00:01<00:00, 173.58it/s]\u001b[A\n",
      " 87%|████████▋ | 352/403 [00:02<00:00, 174.55it/s]\u001b[A\n",
      " 92%|█████████▏| 370/403 [00:02<00:00, 174.79it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 169.13it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 95%|█████████▍| 4501/4749 [01:55<00:40,  6.11it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2677081104215707\n",
      "f1: 0.7160146603758936\n",
      "Test Loss: 0.008062, Acc: 0.755002\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 4749/4749 [01:59<00:00, 39.64it/s]\n",
      "  4%|▍         | 18/403 [00:00<00:02, 179.57it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 403/403 [00:02<00:00, 180.91it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/4749 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2781938797346458\n",
      "f1: 0.7138821339870017\n",
      "Test Loss: 0.007920, Acc: 0.758413\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 11%|█         | 499/4749 [00:08<01:05, 64.87it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 18/403 [00:00<00:02, 173.01it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.4253\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  9%|▉         | 36/403 [00:00<00:02, 173.43it/s]\u001b[A\n",
      " 13%|█▎        | 54/403 [00:00<00:02, 173.28it/s]\u001b[A\n",
      " 18%|█▊        | 72/403 [00:00<00:01, 172.96it/s]\u001b[A\n",
      " 22%|██▏       | 90/403 [00:00<00:01, 173.19it/s]\u001b[A\n",
      " 27%|██▋       | 107/403 [00:00<00:01, 171.59it/s]\u001b[A\n",
      " 31%|███       | 125/403 [00:00<00:01, 171.87it/s]\u001b[A\n",
      " 35%|███▌      | 142/403 [00:00<00:01, 170.50it/s]\u001b[A\n",
      " 40%|███▉      | 160/403 [00:00<00:01, 171.37it/s]\u001b[A\n",
      " 44%|████▍     | 177/403 [00:01<00:01, 156.59it/s]\u001b[A\n",
      " 48%|████▊     | 194/403 [00:01<00:01, 158.06it/s]\u001b[A\n",
      " 52%|█████▏    | 211/403 [00:01<00:01, 160.60it/s]\u001b[A\n",
      " 57%|█████▋    | 228/403 [00:01<00:01, 161.51it/s]\u001b[A\n",
      " 61%|██████    | 245/403 [00:01<00:00, 163.31it/s]\u001b[A\n",
      " 65%|██████▌   | 262/403 [00:01<00:00, 162.57it/s]\u001b[A\n",
      " 69%|██████▉   | 279/403 [00:01<00:00, 164.44it/s]\u001b[A\n",
      " 73%|███████▎  | 296/403 [00:01<00:00, 165.44it/s]\u001b[A\n",
      " 78%|███████▊  | 313/403 [00:01<00:00, 164.24it/s]\u001b[A\n",
      " 82%|████████▏ | 330/403 [00:01<00:00, 165.35it/s]\u001b[A\n",
      " 86%|████████▌ | 347/403 [00:02<00:00, 165.64it/s]\u001b[A\n",
      " 90%|█████████ | 364/403 [00:02<00:00, 166.55it/s]\u001b[A\n",
      " 95%|█████████▍| 381/403 [00:02<00:00, 158.34it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 163.00it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n",
      "em: 0.27733789856623153\n",
      "f1: 0.7063381669095439\n",
      "Test Loss: 0.007988, Acc: 0.758258\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 21%|██        | 993/4749 [00:20<01:03, 58.88it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 15/403 [00:00<00:02, 145.56it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4987\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 30/403 [00:00<00:02, 146.47it/s]\u001b[A\n",
      " 11%|█▏        | 46/403 [00:00<00:02, 148.34it/s]\u001b[A\n",
      " 15%|█▌        | 61/403 [00:00<00:02, 147.63it/s]\u001b[A\n",
      " 19%|█▉        | 76/403 [00:00<00:02, 147.91it/s]\u001b[A\n",
      " 23%|██▎       | 91/403 [00:00<00:02, 148.31it/s]\u001b[A\n",
      " 26%|██▋       | 106/403 [00:00<00:01, 148.68it/s]\u001b[A\n",
      " 30%|███       | 122/403 [00:00<00:01, 149.70it/s]\u001b[A\n",
      " 34%|███▍      | 138/403 [00:00<00:01, 150.25it/s]\u001b[A\n",
      " 38%|███▊      | 154/403 [00:01<00:01, 151.41it/s]\u001b[A\n",
      " 42%|████▏     | 170/403 [00:01<00:01, 152.60it/s]\u001b[A\n",
      " 46%|████▌     | 186/403 [00:01<00:01, 152.54it/s]\u001b[A\n",
      " 50%|█████     | 202/403 [00:01<00:01, 153.52it/s]\u001b[A\n",
      " 54%|█████▍    | 218/403 [00:01<00:01, 153.23it/s]\u001b[A\n",
      " 58%|█████▊    | 234/403 [00:01<00:01, 153.46it/s]\u001b[A\n",
      " 62%|██████▏   | 250/403 [00:01<00:00, 153.53it/s]\u001b[A\n",
      " 66%|██████▌   | 266/403 [00:01<00:00, 153.93it/s]\u001b[A\n",
      " 70%|██████▉   | 282/403 [00:01<00:00, 152.43it/s]\u001b[A\n",
      " 74%|███████▍  | 298/403 [00:01<00:00, 154.44it/s]\u001b[A\n",
      " 78%|███████▊  | 314/403 [00:02<00:00, 155.44it/s]\u001b[A\n",
      " 82%|████████▏ | 330/403 [00:02<00:00, 153.61it/s]\u001b[A\n",
      " 86%|████████▌ | 346/403 [00:02<00:00, 154.88it/s]\u001b[A\n",
      " 90%|████████▉ | 362/403 [00:02<00:00, 152.08it/s]\u001b[A\n",
      " 94%|█████████▍| 378/403 [00:02<00:00, 152.79it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 152.07it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 21%|██        | 1000/4749 [00:24<12:35,  4.96it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.24994650117697412\n",
      "f1: 0.7287338395727114\n",
      "Test Loss: 0.008179, Acc: 0.747053\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 31%|███▏      | 1495/4749 [00:33<00:58, 55.49it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 15/403 [00:00<00:02, 140.65it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.4706\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 29/403 [00:00<00:02, 140.16it/s]\u001b[A\n",
      " 11%|█         | 44/403 [00:00<00:02, 141.92it/s]\u001b[A\n",
      " 15%|█▍        | 59/403 [00:00<00:02, 143.46it/s]\u001b[A\n",
      " 18%|█▊        | 74/403 [00:00<00:02, 143.99it/s]\u001b[A\n",
      " 22%|██▏       | 88/403 [00:00<00:02, 142.74it/s]\u001b[A\n",
      " 26%|██▌       | 103/403 [00:00<00:02, 143.94it/s]\u001b[A\n",
      " 29%|██▉       | 117/403 [00:00<00:02, 141.83it/s]\u001b[A\n",
      " 33%|███▎      | 132/403 [00:00<00:01, 142.73it/s]\u001b[A\n",
      " 36%|███▋      | 147/403 [00:01<00:01, 143.41it/s]\u001b[A\n",
      " 40%|████      | 162/403 [00:01<00:01, 142.92it/s]\u001b[A\n",
      " 44%|████▍     | 178/403 [00:01<00:01, 145.28it/s]\u001b[A\n",
      " 48%|████▊     | 193/403 [00:01<00:01, 143.78it/s]\u001b[A\n",
      " 52%|█████▏    | 208/403 [00:01<00:01, 144.63it/s]\u001b[A\n",
      " 55%|█████▌    | 223/403 [00:01<00:01, 142.82it/s]\u001b[A\n",
      " 59%|█████▉    | 238/403 [00:01<00:01, 143.57it/s]\u001b[A\n",
      " 63%|██████▎   | 253/403 [00:01<00:01, 143.46it/s]\u001b[A\n",
      " 67%|██████▋   | 269/403 [00:01<00:00, 145.81it/s]\u001b[A\n",
      " 70%|███████   | 284/403 [00:01<00:00, 146.13it/s]\u001b[A\n",
      " 74%|███████▍  | 299/403 [00:02<00:00, 146.70it/s]\u001b[A\n",
      " 78%|███████▊  | 314/403 [00:02<00:00, 146.93it/s]\u001b[A\n",
      " 82%|████████▏ | 329/403 [00:02<00:00, 129.98it/s]\u001b[A\n",
      " 85%|████████▌ | 344/403 [00:02<00:00, 135.24it/s]\u001b[A\n",
      " 89%|████████▉ | 359/403 [00:02<00:00, 139.09it/s]\u001b[A\n",
      " 93%|█████████▎| 375/403 [00:02<00:00, 142.10it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 143.25it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 32%|███▏      | 1501/4749 [00:37<12:59,  4.17it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2501604964690777\n",
      "f1: 0.7198501631521208\n",
      "Test Loss: 0.008083, Acc: 0.749574\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 42%|████▏     | 1993/4749 [00:45<00:44, 61.90it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  3%|▎         | 12/403 [00:00<00:03, 118.49it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.5145\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  6%|▌         | 25/403 [00:00<00:03, 119.35it/s]\u001b[A\n",
      " 10%|▉         | 40/403 [00:00<00:02, 125.45it/s]\u001b[A\n",
      " 14%|█▍        | 56/403 [00:00<00:02, 133.28it/s]\u001b[A\n",
      " 18%|█▊        | 72/403 [00:00<00:02, 140.04it/s]\u001b[A\n",
      " 22%|██▏       | 88/403 [00:00<00:02, 143.85it/s]\u001b[A\n",
      " 26%|██▌       | 104/403 [00:00<00:02, 147.72it/s]\u001b[A\n",
      " 30%|██▉       | 120/403 [00:00<00:01, 150.95it/s]\u001b[A\n",
      " 34%|███▎      | 136/403 [00:00<00:01, 151.95it/s]\u001b[A\n",
      " 38%|███▊      | 152/403 [00:01<00:01, 152.46it/s]\u001b[A\n",
      " 41%|████▏     | 167/403 [00:01<00:01, 143.86it/s]\u001b[A\n",
      " 45%|████▌     | 183/403 [00:01<00:01, 148.15it/s]\u001b[A\n",
      " 50%|████▉     | 200/403 [00:01<00:01, 151.80it/s]\u001b[A\n",
      " 54%|█████▎    | 216/403 [00:01<00:01, 153.57it/s]\u001b[A\n",
      " 58%|█████▊    | 232/403 [00:01<00:01, 155.11it/s]\u001b[A\n",
      " 62%|██████▏   | 249/403 [00:01<00:00, 157.22it/s]\u001b[A\n",
      " 66%|██████▌   | 266/403 [00:01<00:00, 158.84it/s]\u001b[A\n",
      " 70%|██████▉   | 282/403 [00:01<00:00, 158.51it/s]\u001b[A\n",
      " 74%|███████▍  | 299/403 [00:01<00:00, 159.93it/s]\u001b[A\n",
      " 78%|███████▊  | 316/403 [00:02<00:00, 145.70it/s]\u001b[A\n",
      " 82%|████████▏ | 331/403 [00:02<00:00, 141.61it/s]\u001b[A\n",
      " 86%|████████▌ | 347/403 [00:02<00:00, 145.08it/s]\u001b[A\n",
      " 90%|█████████ | 363/403 [00:02<00:00, 147.81it/s]\u001b[A\n",
      " 94%|█████████▍| 379/403 [00:02<00:00, 150.20it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 149.64it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 42%|████▏     | 2000/4749 [00:49<09:11,  4.98it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2550823881874599\n",
      "f1: 0.7233025587722899\n",
      "Test Loss: 0.008202, Acc: 0.750155\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 53%|█████▎    | 2494/4749 [00:57<00:36, 62.44it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 16/403 [00:00<00:02, 151.86it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.5234\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 32/403 [00:00<00:02, 152.08it/s]\u001b[A\n",
      " 12%|█▏        | 48/403 [00:00<00:02, 152.67it/s]\u001b[A\n",
      " 16%|█▌        | 64/403 [00:00<00:02, 153.29it/s]\u001b[A\n",
      " 20%|█▉        | 80/403 [00:00<00:02, 153.88it/s]\u001b[A\n",
      " 24%|██▍       | 96/403 [00:00<00:01, 154.95it/s]\u001b[A\n",
      " 28%|██▊       | 112/403 [00:00<00:01, 154.72it/s]\u001b[A\n",
      " 32%|███▏      | 128/403 [00:00<00:01, 154.43it/s]\u001b[A\n",
      " 36%|███▌      | 144/403 [00:00<00:01, 155.63it/s]\u001b[A\n",
      " 40%|███▉      | 160/403 [00:01<00:01, 156.15it/s]\u001b[A\n",
      " 44%|████▎     | 176/403 [00:01<00:01, 156.25it/s]\u001b[A\n",
      " 48%|████▊     | 192/403 [00:01<00:01, 156.91it/s]\u001b[A\n",
      " 52%|█████▏    | 208/403 [00:01<00:01, 156.81it/s]\u001b[A\n",
      " 56%|█████▌    | 224/403 [00:01<00:01, 157.33it/s]\u001b[A\n",
      " 60%|█████▉    | 240/403 [00:01<00:01, 156.61it/s]\u001b[A\n",
      " 64%|██████▎   | 256/403 [00:01<00:00, 156.35it/s]\u001b[A\n",
      " 67%|██████▋   | 272/403 [00:01<00:00, 157.12it/s]\u001b[A\n",
      " 71%|███████▏  | 288/403 [00:01<00:00, 157.10it/s]\u001b[A\n",
      " 76%|███████▌  | 305/403 [00:01<00:00, 158.02it/s]\u001b[A\n",
      " 80%|███████▉  | 321/403 [00:02<00:00, 158.60it/s]\u001b[A\n",
      " 84%|████████▍ | 338/403 [00:02<00:00, 159.20it/s]\u001b[A\n",
      " 88%|████████▊ | 355/403 [00:02<00:00, 159.78it/s]\u001b[A\n",
      " 92%|█████████▏| 371/403 [00:02<00:00, 159.80it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 154.15it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 53%|█████▎    | 2501/4749 [01:02<07:31,  4.98it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2644981810400171\n",
      "f1: 0.7237628184878472\n",
      "Test Loss: 0.008016, Acc: 0.751822\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 63%|██████▎   | 2997/4749 [01:10<00:28, 62.29it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 16/403 [00:00<00:02, 150.68it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 3000, loss: 0.4895\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 32/403 [00:00<00:02, 151.14it/s]\u001b[A\n",
      " 12%|█▏        | 48/403 [00:00<00:02, 152.49it/s]\u001b[A\n",
      " 16%|█▌        | 64/403 [00:00<00:02, 154.37it/s]\u001b[A\n",
      " 20%|█▉        | 80/403 [00:00<00:02, 154.20it/s]\u001b[A\n",
      " 24%|██▍       | 96/403 [00:00<00:01, 154.50it/s]\u001b[A\n",
      " 28%|██▊       | 113/403 [00:00<00:01, 156.65it/s]\u001b[A\n",
      " 32%|███▏      | 130/403 [00:00<00:01, 158.25it/s]\u001b[A\n",
      " 36%|███▌      | 146/403 [00:00<00:01, 158.27it/s]\u001b[A\n",
      " 40%|████      | 162/403 [00:01<00:01, 157.50it/s]\u001b[A\n",
      " 44%|████▍     | 178/403 [00:01<00:01, 157.98it/s]\u001b[A\n",
      " 48%|████▊     | 195/403 [00:01<00:01, 159.36it/s]\u001b[A\n",
      " 53%|█████▎    | 212/403 [00:01<00:01, 160.23it/s]\u001b[A\n",
      " 57%|█████▋    | 228/403 [00:01<00:01, 159.02it/s]\u001b[A\n",
      " 61%|██████    | 245/403 [00:01<00:00, 161.07it/s]\u001b[A\n",
      " 65%|██████▌   | 262/403 [00:01<00:00, 162.34it/s]\u001b[A\n",
      " 69%|██████▉   | 279/403 [00:01<00:00, 161.22it/s]\u001b[A\n",
      " 73%|███████▎  | 296/403 [00:01<00:00, 160.81it/s]\u001b[A\n",
      " 78%|███████▊  | 313/403 [00:01<00:00, 160.91it/s]\u001b[A\n",
      " 82%|████████▏ | 330/403 [00:02<00:00, 161.88it/s]\u001b[A\n",
      " 86%|████████▌ | 347/403 [00:02<00:00, 162.57it/s]\u001b[A\n",
      " 90%|█████████ | 364/403 [00:02<00:00, 160.31it/s]\u001b[A\n",
      " 95%|█████████▍| 381/403 [00:02<00:00, 161.31it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 159.27it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n",
      "em: 0.27498395035309225\n",
      "f1: 0.6919685528823206\n",
      "Test Loss: 0.007970, Acc: 0.757095\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 74%|███████▎  | 3499/4749 [01:22<00:22, 55.70it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 17/403 [00:00<00:02, 165.43it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 3500, loss: 0.3761\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 33/403 [00:00<00:02, 163.73it/s]\u001b[A\n",
      " 12%|█▏        | 50/403 [00:00<00:02, 164.15it/s]\u001b[A\n",
      " 17%|█▋        | 67/403 [00:00<00:02, 165.08it/s]\u001b[A\n",
      " 21%|██        | 84/403 [00:00<00:01, 166.53it/s]\u001b[A\n",
      " 25%|██▍       | 100/403 [00:00<00:01, 163.00it/s]\u001b[A\n",
      " 29%|██▉       | 117/403 [00:00<00:01, 163.71it/s]\u001b[A\n",
      " 33%|███▎      | 135/403 [00:00<00:01, 165.82it/s]\u001b[A\n",
      " 38%|███▊      | 152/403 [00:00<00:01, 165.28it/s]\u001b[A\n",
      " 42%|████▏     | 169/403 [00:01<00:01, 165.71it/s]\u001b[A\n",
      " 46%|████▌     | 186/403 [00:01<00:01, 163.01it/s]\u001b[A\n",
      " 50%|█████     | 203/403 [00:01<00:01, 163.32it/s]\u001b[A\n",
      " 55%|█████▍    | 220/403 [00:01<00:01, 163.95it/s]\u001b[A\n",
      " 59%|█████▉    | 238/403 [00:01<00:00, 167.02it/s]\u001b[A\n",
      " 63%|██████▎   | 255/403 [00:01<00:00, 167.60it/s]\u001b[A\n",
      " 67%|██████▋   | 272/403 [00:01<00:00, 167.26it/s]\u001b[A\n",
      " 72%|███████▏  | 289/403 [00:01<00:00, 166.64it/s]\u001b[A\n",
      " 76%|███████▌  | 307/403 [00:01<00:00, 167.72it/s]\u001b[A\n",
      " 80%|████████  | 324/403 [00:01<00:00, 167.14it/s]\u001b[A\n",
      " 85%|████████▍ | 341/403 [00:02<00:00, 166.34it/s]\u001b[A\n",
      " 89%|████████▉ | 358/403 [00:02<00:00, 166.81it/s]\u001b[A\n",
      " 93%|█████████▎| 375/403 [00:02<00:00, 165.92it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 165.63it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n",
      "em: 0.27498395035309225\n",
      "f1: 0.6854974031841217\n",
      "Test Loss: 0.008040, Acc: 0.756669\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 84%|████████▍ | 3995/4749 [01:34<00:11, 63.58it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 16/403 [00:00<00:02, 146.49it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 4000, loss: 0.591\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 32/403 [00:00<00:02, 150.16it/s]\u001b[A\n",
      " 12%|█▏        | 49/403 [00:00<00:02, 154.05it/s]\u001b[A\n",
      " 16%|█▋        | 66/403 [00:00<00:02, 156.93it/s]\u001b[A\n",
      " 21%|██        | 83/403 [00:00<00:02, 158.74it/s]\u001b[A\n",
      " 25%|██▍       | 100/403 [00:00<00:01, 159.72it/s]\u001b[A\n",
      " 29%|██▉       | 116/403 [00:00<00:01, 158.71it/s]\u001b[A\n",
      " 33%|███▎      | 131/403 [00:00<00:01, 137.13it/s]\u001b[A\n",
      " 37%|███▋      | 148/403 [00:00<00:01, 143.70it/s]\u001b[A\n",
      " 41%|████      | 165/403 [00:01<00:01, 149.03it/s]\u001b[A\n",
      " 45%|████▌     | 182/403 [00:01<00:01, 152.74it/s]\u001b[A\n",
      " 49%|████▉     | 199/403 [00:01<00:01, 157.24it/s]\u001b[A\n",
      " 54%|█████▎    | 216/403 [00:01<00:01, 159.33it/s]\u001b[A\n",
      " 58%|█████▊    | 233/403 [00:01<00:01, 160.01it/s]\u001b[A\n",
      " 62%|██████▏   | 250/403 [00:01<00:00, 160.79it/s]\u001b[A\n",
      " 66%|██████▋   | 267/403 [00:01<00:00, 161.46it/s]\u001b[A\n",
      " 70%|███████   | 284/403 [00:01<00:00, 161.79it/s]\u001b[A\n",
      " 75%|███████▍  | 301/403 [00:01<00:00, 162.90it/s]\u001b[A\n",
      " 79%|███████▉  | 318/403 [00:02<00:00, 134.07it/s]\u001b[A\n",
      " 83%|████████▎ | 335/403 [00:02<00:00, 142.07it/s]\u001b[A\n",
      " 88%|████████▊ | 353/403 [00:02<00:00, 149.69it/s]\u001b[A\n",
      " 92%|█████████▏| 370/403 [00:02<00:00, 153.25it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 154.94it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 84%|████████▍ | 4002/4749 [01:39<02:28,  5.04it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.27220201155574575\n",
      "f1: 0.7041491309413505\n",
      "Test Loss: 0.007952, Acc: 0.757405\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 95%|█████████▍| 4494/4749 [01:47<00:03, 70.94it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 15/403 [00:00<00:02, 143.06it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 4500, loss: 0.5339\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 29/403 [00:00<00:02, 141.60it/s]\u001b[A\n",
      " 11%|█         | 44/403 [00:00<00:02, 142.82it/s]\u001b[A\n",
      " 15%|█▍        | 59/403 [00:00<00:02, 142.96it/s]\u001b[A\n",
      " 18%|█▊        | 74/403 [00:00<00:02, 142.54it/s]\u001b[A\n",
      " 22%|██▏       | 89/403 [00:00<00:02, 142.94it/s]\u001b[A\n",
      " 26%|██▌       | 104/403 [00:00<00:02, 144.76it/s]\u001b[A\n",
      " 30%|██▉       | 119/403 [00:00<00:01, 144.95it/s]\u001b[A\n",
      " 33%|███▎      | 134/403 [00:00<00:01, 145.84it/s]\u001b[A\n",
      " 37%|███▋      | 149/403 [00:01<00:01, 146.98it/s]\u001b[A\n",
      " 41%|████      | 164/403 [00:01<00:01, 146.72it/s]\u001b[A\n",
      " 45%|████▍     | 180/403 [00:01<00:01, 148.06it/s]\u001b[A\n",
      " 49%|████▊     | 196/403 [00:01<00:01, 149.90it/s]\u001b[A\n",
      " 53%|█████▎    | 212/403 [00:01<00:01, 150.95it/s]\u001b[A\n",
      " 57%|█████▋    | 228/403 [00:01<00:01, 151.58it/s]\u001b[A\n",
      " 61%|██████    | 244/403 [00:01<00:01, 152.12it/s]\u001b[A\n",
      " 65%|██████▍   | 260/403 [00:01<00:00, 153.48it/s]\u001b[A\n",
      " 68%|██████▊   | 276/403 [00:01<00:00, 152.03it/s]\u001b[A\n",
      " 72%|███████▏  | 292/403 [00:01<00:00, 152.45it/s]\u001b[A\n",
      " 76%|███████▋  | 308/403 [00:02<00:00, 153.65it/s]\u001b[A\n",
      " 80%|████████  | 324/403 [00:02<00:00, 154.59it/s]\u001b[A\n",
      " 84%|████████▍ | 340/403 [00:02<00:00, 155.19it/s]\u001b[A\n",
      " 88%|████████▊ | 356/403 [00:02<00:00, 155.89it/s]\u001b[A\n",
      " 92%|█████████▏| 372/403 [00:02<00:00, 155.60it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 147.48it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n",
      "em: 0.27156002567943505\n",
      "f1: 0.7134845906406017\n",
      "Test Loss: 0.008010, Acc: 0.756824\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 4749/4749 [01:55<00:00, 41.06it/s]\n",
      "  4%|▍         | 16/403 [00:00<00:02, 159.98it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 403/403 [00:02<00:00, 160.75it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/4749 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2704900492189172\n",
      "f1: 0.701754591557725\n",
      "Test Loss: 0.007908, Acc: 0.757638\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 10%|█         | 498/4749 [00:07<00:59, 71.60it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 15/403 [00:00<00:02, 142.90it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.4341\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 30/403 [00:00<00:02, 143.43it/s]\u001b[A\n",
      " 11%|█         | 45/403 [00:00<00:02, 143.32it/s]\u001b[A\n",
      " 15%|█▍        | 60/403 [00:00<00:02, 145.18it/s]\u001b[A\n",
      " 19%|█▊        | 75/403 [00:00<00:02, 143.27it/s]\u001b[A\n",
      " 22%|██▏       | 90/403 [00:00<00:02, 144.19it/s]\u001b[A\n",
      " 26%|██▌       | 105/403 [00:00<00:02, 145.43it/s]\u001b[A\n",
      " 30%|██▉       | 119/403 [00:00<00:01, 143.72it/s]\u001b[A\n",
      " 33%|███▎      | 134/403 [00:00<00:01, 143.81it/s]\u001b[A\n",
      " 37%|███▋      | 149/403 [00:01<00:01, 143.79it/s]\u001b[A\n",
      " 41%|████      | 164/403 [00:01<00:01, 144.46it/s]\u001b[A\n",
      " 44%|████▍     | 179/403 [00:01<00:01, 144.27it/s]\u001b[A\n",
      " 48%|████▊     | 195/403 [00:01<00:01, 146.84it/s]\u001b[A\n",
      " 52%|█████▏    | 210/403 [00:01<00:01, 146.67it/s]\u001b[A\n",
      " 56%|█████▌    | 226/403 [00:01<00:01, 148.38it/s]\u001b[A\n",
      " 60%|█████▉    | 241/403 [00:01<00:01, 148.68it/s]\u001b[A\n",
      " 64%|██████▍   | 257/403 [00:01<00:00, 149.69it/s]\u001b[A\n",
      " 67%|██████▋   | 272/403 [00:01<00:00, 132.27it/s]\u001b[A\n",
      " 71%|███████▏  | 288/403 [00:02<00:00, 137.56it/s]\u001b[A\n",
      " 75%|███████▌  | 304/403 [00:02<00:00, 141.85it/s]\u001b[A\n",
      " 79%|███████▉  | 320/403 [00:02<00:00, 144.50it/s]\u001b[A\n",
      " 83%|████████▎ | 335/403 [00:02<00:00, 128.78it/s]\u001b[A\n",
      " 87%|████████▋ | 351/403 [00:02<00:00, 135.50it/s]\u001b[A\n",
      " 91%|█████████ | 366/403 [00:02<00:00, 139.03it/s]\u001b[A\n",
      " 95%|█████████▍| 382/403 [00:02<00:00, 143.49it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 143.22it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n",
      "em: 0.2732719880162636\n",
      "f1: 0.6960996810451213\n",
      "Test Loss: 0.008034, Acc: 0.756746\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 21%|██        | 998/4749 [00:19<01:03, 58.70it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 16/403 [00:00<00:02, 152.41it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4925\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 32/403 [00:00<00:02, 152.16it/s]\u001b[A\n",
      " 12%|█▏        | 48/403 [00:00<00:02, 151.84it/s]\u001b[A\n",
      " 16%|█▌        | 64/403 [00:00<00:02, 152.08it/s]\u001b[A\n",
      " 20%|█▉        | 80/403 [00:00<00:02, 152.51it/s]\u001b[A\n",
      " 24%|██▍       | 96/403 [00:00<00:02, 152.66it/s]\u001b[A\n",
      " 28%|██▊       | 112/403 [00:00<00:01, 153.99it/s]\u001b[A\n",
      " 32%|███▏      | 128/403 [00:00<00:01, 154.78it/s]\u001b[A\n",
      " 36%|███▌      | 144/403 [00:00<00:01, 154.69it/s]\u001b[A\n",
      " 39%|███▉      | 159/403 [00:01<00:01, 144.91it/s]\u001b[A\n",
      " 43%|████▎     | 175/403 [00:01<00:01, 148.54it/s]\u001b[A\n",
      " 47%|████▋     | 191/403 [00:01<00:01, 150.85it/s]\u001b[A\n",
      " 51%|█████▏    | 207/403 [00:01<00:01, 152.26it/s]\u001b[A\n",
      " 55%|█████▌    | 223/403 [00:01<00:01, 152.64it/s]\u001b[A\n",
      " 59%|█████▉    | 239/403 [00:01<00:01, 153.84it/s]\u001b[A\n",
      " 63%|██████▎   | 255/403 [00:01<00:00, 154.40it/s]\u001b[A\n",
      " 67%|██████▋   | 271/403 [00:01<00:00, 155.09it/s]\u001b[A\n",
      " 71%|███████   | 287/403 [00:01<00:00, 156.49it/s]\u001b[A\n",
      " 75%|███████▌  | 303/403 [00:01<00:00, 156.63it/s]\u001b[A\n",
      " 79%|███████▉  | 319/403 [00:02<00:00, 155.04it/s]\u001b[A\n",
      " 83%|████████▎ | 335/403 [00:02<00:00, 155.75it/s]\u001b[A\n",
      " 87%|████████▋ | 351/403 [00:02<00:00, 155.01it/s]\u001b[A\n",
      " 91%|█████████ | 367/403 [00:02<00:00, 154.47it/s]\u001b[A\n",
      " 95%|█████████▌| 383/403 [00:02<00:00, 153.91it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 153.31it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 21%|██        | 1004/4749 [00:23<14:22,  4.34it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2501604964690777\n",
      "f1: 0.7301117445457377\n",
      "Test Loss: 0.008189, Acc: 0.747674\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 32%|███▏      | 1498/4749 [00:31<00:54, 60.19it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 18/403 [00:00<00:02, 175.48it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.4754\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  9%|▊         | 35/403 [00:00<00:02, 173.54it/s]\u001b[A\n",
      " 13%|█▎        | 53/403 [00:00<00:02, 173.94it/s]\u001b[A\n",
      " 17%|█▋        | 70/403 [00:00<00:01, 171.51it/s]\u001b[A\n",
      " 22%|██▏       | 88/403 [00:00<00:01, 171.99it/s]\u001b[A\n",
      " 26%|██▋       | 106/403 [00:00<00:01, 171.91it/s]\u001b[A\n",
      " 31%|███       | 124/403 [00:00<00:01, 172.13it/s]\u001b[A\n",
      " 35%|███▍      | 141/403 [00:00<00:01, 171.35it/s]\u001b[A\n",
      " 39%|███▉      | 158/403 [00:00<00:01, 170.45it/s]\u001b[A\n",
      " 44%|████▎     | 176/403 [00:01<00:01, 170.85it/s]\u001b[A\n",
      " 48%|████▊     | 194/403 [00:01<00:01, 172.14it/s]\u001b[A\n",
      " 52%|█████▏    | 211/403 [00:01<00:01, 169.80it/s]\u001b[A\n",
      " 57%|█████▋    | 228/403 [00:01<00:01, 169.46it/s]\u001b[A\n",
      " 61%|██████    | 246/403 [00:01<00:00, 170.50it/s]\u001b[A\n",
      " 65%|██████▌   | 263/403 [00:01<00:00, 170.32it/s]\u001b[A\n",
      " 69%|██████▉   | 280/403 [00:01<00:00, 169.70it/s]\u001b[A\n",
      " 74%|███████▎  | 297/403 [00:01<00:00, 168.90it/s]\u001b[A\n",
      " 78%|███████▊  | 314/403 [00:01<00:00, 168.68it/s]\u001b[A\n",
      " 82%|████████▏ | 332/403 [00:01<00:00, 169.04it/s]\u001b[A\n",
      " 87%|████████▋ | 349/403 [00:02<00:00, 167.77it/s]\u001b[A\n",
      " 91%|█████████ | 366/403 [00:02<00:00, 167.19it/s]\u001b[A\n",
      " 95%|█████████▌| 383/403 [00:02<00:00, 167.89it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 169.59it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n",
      "num: 25792\n",
      "n: 4673\n",
      "em: 0.278621870318853\n",
      "f1: 0.6963651371098484\n",
      "Test Loss: 0.007948, Acc: 0.758142\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 42%|████▏     | 1993/4749 [00:43<00:46, 59.90it/s]\n",
      "  0%|          | 0/403 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 16/403 [00:00<00:02, 154.51it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.507\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 32/403 [00:00<00:02, 154.72it/s]\u001b[A\n",
      " 12%|█▏        | 48/403 [00:00<00:02, 154.55it/s]\u001b[A\n",
      " 16%|█▌        | 64/403 [00:00<00:02, 155.25it/s]\u001b[A\n",
      " 20%|█▉        | 80/403 [00:00<00:02, 155.70it/s]\u001b[A\n",
      " 24%|██▍       | 96/403 [00:00<00:01, 155.69it/s]\u001b[A\n",
      " 28%|██▊       | 113/403 [00:00<00:01, 157.30it/s]\u001b[A\n",
      " 32%|███▏      | 129/403 [00:00<00:01, 158.09it/s]\u001b[A\n",
      " 36%|███▌      | 145/403 [00:00<00:01, 157.94it/s]\u001b[A\n",
      " 40%|███▉      | 161/403 [00:01<00:01, 156.28it/s]\u001b[A\n",
      " 44%|████▍     | 178/403 [00:01<00:01, 157.73it/s]\u001b[A\n",
      " 48%|████▊     | 194/403 [00:01<00:01, 158.32it/s]\u001b[A\n",
      " 52%|█████▏    | 211/403 [00:01<00:01, 159.29it/s]\u001b[A\n",
      " 56%|█████▋    | 227/403 [00:01<00:01, 157.09it/s]\u001b[A\n",
      " 61%|██████    | 244/403 [00:01<00:01, 158.17it/s]\u001b[A\n",
      " 65%|██████▍   | 260/403 [00:01<00:00, 158.44it/s]\u001b[A\n",
      " 69%|██████▊   | 277/403 [00:01<00:00, 158.86it/s]\u001b[A\n",
      " 73%|███████▎  | 293/403 [00:01<00:00, 157.13it/s]\u001b[A\n",
      " 77%|███████▋  | 310/403 [00:01<00:00, 157.75it/s]\u001b[A\n",
      " 81%|████████  | 326/403 [00:02<00:00, 157.83it/s]\u001b[A\n",
      " 85%|████████▌ | 343/403 [00:02<00:00, 159.12it/s]\u001b[A\n",
      " 89%|████████▉ | 359/403 [00:02<00:00, 158.04it/s]\u001b[A\n",
      " 93%|█████████▎| 375/403 [00:02<00:00, 158.24it/s]\u001b[A\n",
      "100%|██████████| 403/403 [00:02<00:00, 157.85it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 25792 dev_label_list: 25792 example_id_list: 25792\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 42%|████▏     | 1993/4749 [00:55<00:46, 59.90it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "num: 25792\n",
      "n: 4673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 42%|████▏     | 2006/4749 [00:56<18:01,  2.54it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2781938797346458\n",
      "f1: 0.713905787723472\n",
      "Test Loss: 0.008150, Acc: 0.759499\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 49%|████▊     | 2314/4749 [01:01<01:04, 37.61it/s]\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-117-9725eba13125>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m#sent8verb4\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0mepoch_num\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mtrain_code_confing\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_loader\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdev_loader\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mepoch_num\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m<ipython-input-114-36cf930af26f>\u001b[0m in \u001b[0;36mtrain_code_confing\u001b[0;34m(train_loader, dev_loader, epoch_num)\u001b[0m\n\u001b[1;32m     76\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     77\u001b[0m                 \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mzero_grad\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 78\u001b[0;31m                 \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     79\u001b[0m                 \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     80\u001b[0m                 \u001b[0mepoch\u001b[0m\u001b[0;34m+=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.conda/envs/learn_py3/lib/python3.7/site-packages/torch/tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph)\u001b[0m\n\u001b[1;32m    116\u001b[0m                 \u001b[0mproducts\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mDefaults\u001b[0m \u001b[0mto\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    117\u001b[0m         \"\"\"\n\u001b[0;32m--> 118\u001b[0;31m         \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    119\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    120\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mregister_hook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.conda/envs/learn_py3/lib/python3.7/site-packages/torch/autograd/__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables)\u001b[0m\n\u001b[1;32m     91\u001b[0m     Variable._execution_engine.run_backward(\n\u001b[1;32m     92\u001b[0m         \u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrad_tensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 93\u001b[0;31m         allow_unreachable=True)  # allow_unreachable flag\n\u001b[0m\u001b[1;32m     94\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     95\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "#sent8verb4\n",
    "epoch_num = 10\n",
    "train_code_confing(train_loader,dev_loader,epoch_num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 16%|█▋        | 497/3047 [00:08<00:41, 61.39it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▌         | 15/271 [00:00<00:01, 142.42it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.5403\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█         | 29/271 [00:00<00:01, 140.87it/s]\u001b[A\n",
      " 16%|█▌        | 44/271 [00:00<00:01, 140.92it/s]\u001b[A\n",
      " 22%|██▏       | 59/271 [00:00<00:01, 141.09it/s]\u001b[A\n",
      " 27%|██▋       | 74/271 [00:00<00:01, 141.48it/s]\u001b[A\n",
      " 33%|███▎      | 89/271 [00:00<00:01, 142.94it/s]\u001b[A\n",
      " 38%|███▊      | 104/271 [00:00<00:01, 144.77it/s]\u001b[A\n",
      " 44%|████▎     | 118/271 [00:00<00:01, 143.20it/s]\u001b[A\n",
      " 49%|████▉     | 133/271 [00:00<00:00, 142.72it/s]\u001b[A\n",
      " 54%|█████▍    | 147/271 [00:01<00:00, 140.09it/s]\u001b[A\n",
      " 59%|█████▉    | 161/271 [00:01<00:00, 120.52it/s]\u001b[A\n",
      " 64%|██████▍   | 174/271 [00:01<00:00, 117.78it/s]\u001b[A\n",
      " 69%|██████▉   | 188/271 [00:01<00:00, 122.12it/s]\u001b[A\n",
      " 74%|███████▍  | 201/271 [00:01<00:00, 123.21it/s]\u001b[A\n",
      " 79%|███████▉  | 215/271 [00:01<00:00, 125.96it/s]\u001b[A\n",
      " 84%|████████▍ | 228/271 [00:01<00:00, 121.58it/s]\u001b[A\n",
      " 89%|████████▉ | 242/271 [00:01<00:00, 125.41it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:02<00:00, 132.83it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 17%|█▋        | 504/3047 [00:11<06:24,  6.61it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.28\n",
      "f1: 0.702068413928883\n",
      "Test Loss: 0.008089, Acc: 0.755535\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 33%|███▎      | 995/3047 [00:19<00:35, 58.32it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▌         | 16/271 [00:00<00:01, 157.48it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4469\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 12%|█▏        | 32/271 [00:00<00:01, 156.88it/s]\u001b[A\n",
      " 18%|█▊        | 48/271 [00:00<00:01, 157.70it/s]\u001b[A\n",
      " 24%|██▍       | 65/271 [00:00<00:01, 158.98it/s]\u001b[A\n",
      " 30%|██▉       | 81/271 [00:00<00:01, 158.45it/s]\u001b[A\n",
      " 36%|███▌      | 97/271 [00:00<00:01, 158.29it/s]\u001b[A\n",
      " 42%|████▏     | 113/271 [00:00<00:00, 158.47it/s]\u001b[A\n",
      " 48%|████▊     | 130/271 [00:00<00:00, 159.05it/s]\u001b[A\n",
      " 54%|█████▍    | 146/271 [00:00<00:00, 158.37it/s]\u001b[A\n",
      " 60%|█████▉    | 162/271 [00:01<00:00, 146.90it/s]\u001b[A\n",
      " 66%|██████▌   | 178/271 [00:01<00:00, 150.48it/s]\u001b[A\n",
      " 72%|███████▏  | 195/271 [00:01<00:00, 154.98it/s]\u001b[A\n",
      " 78%|███████▊  | 212/271 [00:01<00:00, 157.16it/s]\u001b[A\n",
      " 84%|████████▍ | 228/271 [00:01<00:00, 147.16it/s]\u001b[A\n",
      " 90%|████████▉ | 243/271 [00:01<00:00, 147.09it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 154.27it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 33%|███▎      | 1001/3047 [00:22<05:25,  6.28it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2877519379844961\n",
      "f1: 0.7104675772117671\n",
      "Test Loss: 0.007995, Acc: 0.758187\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 49%|████▉     | 1499/3047 [00:30<00:24, 62.62it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 17/271 [00:00<00:01, 166.00it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.3759\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 34/271 [00:00<00:01, 165.00it/s]\u001b[A\n",
      " 19%|█▉        | 51/271 [00:00<00:01, 164.87it/s]\u001b[A\n",
      " 25%|██▌       | 68/271 [00:00<00:01, 163.65it/s]\u001b[A\n",
      " 31%|███▏      | 85/271 [00:00<00:01, 163.78it/s]\u001b[A\n",
      " 38%|███▊      | 102/271 [00:00<00:01, 162.81it/s]\u001b[A\n",
      " 44%|████▍     | 119/271 [00:00<00:00, 162.59it/s]\u001b[A\n",
      " 50%|████▉     | 135/271 [00:00<00:00, 159.52it/s]\u001b[A\n",
      " 56%|█████▌    | 151/271 [00:00<00:00, 147.83it/s]\u001b[A\n",
      " 61%|██████▏   | 166/271 [00:01<00:00, 147.22it/s]\u001b[A\n",
      " 67%|██████▋   | 181/271 [00:01<00:00, 147.23it/s]\u001b[A\n",
      " 73%|███████▎  | 197/271 [00:01<00:00, 148.80it/s]\u001b[A\n",
      " 78%|███████▊  | 212/271 [00:01<00:00, 147.53it/s]\u001b[A\n",
      " 84%|████████▍ | 227/271 [00:01<00:00, 146.63it/s]\u001b[A\n",
      " 89%|████████▉ | 242/271 [00:01<00:00, 145.38it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 152.25it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 49%|████▉     | 1506/3047 [00:33<03:40,  6.99it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2824806201550388\n",
      "f1: 0.7253190599237161\n",
      "Test Loss: 0.007955, Acc: 0.758476\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 66%|██████▌   | 1999/3047 [00:41<00:14, 73.63it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▌         | 16/271 [00:00<00:01, 150.65it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.5192\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█▏        | 31/271 [00:00<00:01, 149.98it/s]\u001b[A\n",
      " 17%|█▋        | 47/271 [00:00<00:01, 150.26it/s]\u001b[A\n",
      " 23%|██▎       | 62/271 [00:00<00:01, 150.06it/s]\u001b[A\n",
      " 29%|██▉       | 78/271 [00:00<00:01, 151.56it/s]\u001b[A\n",
      " 35%|███▍      | 94/271 [00:00<00:01, 153.90it/s]\u001b[A\n",
      " 41%|████      | 110/271 [00:00<00:01, 155.62it/s]\u001b[A\n",
      " 46%|████▋     | 126/271 [00:00<00:00, 154.20it/s]\u001b[A\n",
      " 52%|█████▏    | 142/271 [00:00<00:00, 155.51it/s]\u001b[A\n",
      " 58%|█████▊    | 158/271 [00:01<00:00, 156.50it/s]\u001b[A\n",
      " 64%|██████▍   | 174/271 [00:01<00:00, 146.93it/s]\u001b[A\n",
      " 70%|███████   | 191/271 [00:01<00:00, 152.57it/s]\u001b[A\n",
      " 77%|███████▋  | 208/271 [00:01<00:00, 137.58it/s]\u001b[A\n",
      " 83%|████████▎ | 225/271 [00:01<00:00, 144.70it/s]\u001b[A\n",
      " 90%|████████▉ | 243/271 [00:01<00:00, 151.85it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 152.66it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 66%|██████▌   | 2007/3047 [00:44<02:05,  8.30it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.28589147286821703\n",
      "f1: 0.6992865756121605\n",
      "Test Loss: 0.007934, Acc: 0.757322\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 82%|████████▏ | 2493/3047 [00:51<00:08, 62.61it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 17/271 [00:00<00:01, 162.72it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.4848\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 12%|█▏        | 33/271 [00:00<00:01, 160.06it/s]\u001b[A\n",
      " 18%|█▊        | 50/271 [00:00<00:01, 160.19it/s]\u001b[A\n",
      " 25%|██▍       | 67/271 [00:00<00:01, 160.83it/s]\u001b[A\n",
      " 31%|███       | 83/271 [00:00<00:01, 159.69it/s]\u001b[A\n",
      " 37%|███▋      | 100/271 [00:00<00:01, 160.92it/s]\u001b[A\n",
      " 43%|████▎     | 116/271 [00:00<00:00, 160.54it/s]\u001b[A\n",
      " 49%|████▊     | 132/271 [00:00<00:00, 159.65it/s]\u001b[A\n",
      " 55%|█████▍    | 149/271 [00:00<00:00, 160.10it/s]\u001b[A\n",
      " 61%|██████▏   | 166/271 [00:01<00:00, 160.92it/s]\u001b[A\n",
      " 68%|██████▊   | 183/271 [00:01<00:00, 161.10it/s]\u001b[A\n",
      " 74%|███████▍  | 200/271 [00:01<00:00, 161.90it/s]\u001b[A\n",
      " 80%|████████  | 217/271 [00:01<00:00, 161.94it/s]\u001b[A\n",
      " 86%|████████▋ | 234/271 [00:01<00:00, 160.77it/s]\u001b[A\n",
      " 93%|█████████▎| 251/271 [00:01<00:00, 161.24it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 160.50it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 82%|████████▏ | 2500/3047 [00:54<01:13,  7.44it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2908527131782946\n",
      "f1: 0.7104931709117792\n",
      "Test Loss: 0.007951, Acc: 0.759225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 98%|█████████▊| 2997/3047 [01:02<00:00, 62.38it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 17/271 [00:00<00:01, 164.54it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 3000, loss: 0.3085\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 34/271 [00:00<00:01, 165.77it/s]\u001b[A\n",
      " 19%|█▉        | 51/271 [00:00<00:01, 166.54it/s]\u001b[A\n",
      " 25%|██▌       | 69/271 [00:00<00:01, 167.91it/s]\u001b[A\n",
      " 32%|███▏      | 87/271 [00:00<00:01, 168.88it/s]\u001b[A\n",
      " 38%|███▊      | 104/271 [00:00<00:00, 168.06it/s]\u001b[A\n",
      " 45%|████▍     | 121/271 [00:00<00:00, 167.75it/s]\u001b[A\n",
      " 51%|█████     | 138/271 [00:00<00:00, 168.30it/s]\u001b[A\n",
      " 57%|█████▋    | 155/271 [00:00<00:00, 168.48it/s]\u001b[A\n",
      " 63%|██████▎   | 172/271 [00:01<00:00, 167.72it/s]\u001b[A\n",
      " 70%|███████   | 190/271 [00:01<00:00, 170.20it/s]\u001b[A\n",
      " 77%|███████▋  | 208/271 [00:01<00:00, 170.24it/s]\u001b[A\n",
      " 83%|████████▎ | 226/271 [00:01<00:00, 170.52it/s]\u001b[A\n",
      " 90%|████████▉ | 243/271 [00:01<00:00, 167.22it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 168.84it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 99%|█████████▊| 3004/3047 [01:05<00:05,  7.53it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.29023255813953486\n",
      "f1: 0.7040241171404003\n",
      "Test Loss: 0.007962, Acc: 0.758879\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 3047/3047 [01:06<00:00, 45.89it/s]\n",
      "  6%|▋         | 17/271 [00:00<00:01, 167.64it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 271/271 [00:01<00:00, 173.23it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/3047 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2744186046511628\n",
      "f1: 0.6678191214470321\n",
      "Test Loss: 0.007984, Acc: 0.752940\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 16%|█▋        | 499/3047 [00:08<00:43, 58.56it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▌         | 15/271 [00:00<00:01, 146.67it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.488\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█         | 30/271 [00:00<00:01, 146.11it/s]\u001b[A\n",
      " 16%|█▌        | 44/271 [00:00<00:01, 143.87it/s]\u001b[A\n",
      " 22%|██▏       | 59/271 [00:00<00:01, 144.34it/s]\u001b[A\n",
      " 27%|██▋       | 74/271 [00:00<00:01, 144.34it/s]\u001b[A\n",
      " 33%|███▎      | 90/271 [00:00<00:01, 146.47it/s]\u001b[A\n",
      " 39%|███▊      | 105/271 [00:00<00:01, 147.49it/s]\u001b[A\n",
      " 45%|████▍     | 121/271 [00:00<00:01, 148.53it/s]\u001b[A\n",
      " 50%|█████     | 136/271 [00:00<00:00, 148.16it/s]\u001b[A\n",
      " 56%|█████▌    | 151/271 [00:01<00:00, 148.43it/s]\u001b[A\n",
      " 62%|██████▏   | 167/271 [00:01<00:00, 149.59it/s]\u001b[A\n",
      " 68%|██████▊   | 183/271 [00:01<00:00, 152.09it/s]\u001b[A\n",
      " 73%|███████▎  | 199/271 [00:01<00:00, 153.36it/s]\u001b[A\n",
      " 79%|███████▉  | 215/271 [00:01<00:00, 154.43it/s]\u001b[A\n",
      " 85%|████████▌ | 231/271 [00:01<00:00, 153.15it/s]\u001b[A\n",
      " 91%|█████████ | 247/271 [00:01<00:00, 152.94it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 150.27it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 17%|█▋        | 505/3047 [00:11<06:57,  6.09it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.27813953488372095\n",
      "f1: 0.7314042081949106\n",
      "Test Loss: 0.007918, Acc: 0.757611\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 33%|███▎      | 994/3047 [00:18<00:33, 62.21it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 17/271 [00:00<00:01, 166.52it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4584\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 34/271 [00:00<00:01, 166.39it/s]\u001b[A\n",
      " 19%|█▉        | 51/271 [00:00<00:01, 166.67it/s]\u001b[A\n",
      " 25%|██▌       | 68/271 [00:00<00:01, 166.29it/s]\u001b[A\n",
      " 31%|███▏      | 85/271 [00:00<00:01, 166.86it/s]\u001b[A\n",
      " 38%|███▊      | 102/271 [00:00<00:01, 167.68it/s]\u001b[A\n",
      " 44%|████▍     | 119/271 [00:00<00:00, 165.59it/s]\u001b[A\n",
      " 50%|█████     | 136/271 [00:00<00:00, 166.48it/s]\u001b[A\n",
      " 56%|█████▋    | 153/271 [00:00<00:00, 166.13it/s]\u001b[A\n",
      " 63%|██████▎   | 171/271 [00:01<00:00, 168.07it/s]\u001b[A\n",
      " 69%|██████▉   | 188/271 [00:01<00:00, 168.56it/s]\u001b[A\n",
      " 76%|███████▌  | 205/271 [00:01<00:00, 168.90it/s]\u001b[A\n",
      " 82%|████████▏ | 222/271 [00:01<00:00, 168.57it/s]\u001b[A\n",
      " 88%|████████▊ | 239/271 [00:01<00:00, 165.74it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 166.24it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 33%|███▎      | 1001/3047 [00:21<04:35,  7.41it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.29023255813953486\n",
      "f1: 0.7024186046511669\n",
      "Test Loss: 0.007915, Acc: 0.760032\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 49%|████▉     | 1498/3047 [00:29<00:20, 74.20it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 17/271 [00:00<00:01, 164.02it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.383\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 12%|█▏        | 33/271 [00:00<00:01, 162.79it/s]\u001b[A\n",
      " 18%|█▊        | 50/271 [00:00<00:01, 162.49it/s]\u001b[A\n",
      " 25%|██▍       | 67/271 [00:00<00:01, 163.04it/s]\u001b[A\n",
      " 31%|███       | 84/271 [00:00<00:01, 163.64it/s]\u001b[A\n",
      " 37%|███▋      | 101/271 [00:00<00:01, 163.26it/s]\u001b[A\n",
      " 43%|████▎     | 117/271 [00:00<00:00, 160.76it/s]\u001b[A\n",
      " 49%|████▉     | 134/271 [00:00<00:00, 162.42it/s]\u001b[A\n",
      " 56%|█████▌    | 151/271 [00:00<00:00, 162.76it/s]\u001b[A\n",
      " 62%|██████▏   | 168/271 [00:01<00:00, 164.51it/s]\u001b[A\n",
      " 68%|██████▊   | 185/271 [00:01<00:00, 164.72it/s]\u001b[A\n",
      " 75%|███████▍  | 202/271 [00:01<00:00, 165.43it/s]\u001b[A\n",
      " 81%|████████  | 219/271 [00:01<00:00, 166.02it/s]\u001b[A\n",
      " 87%|████████▋ | 236/271 [00:01<00:00, 164.69it/s]\u001b[A\n",
      " 93%|█████████▎| 253/271 [00:01<00:00, 164.79it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 160.12it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 49%|████▉     | 1506/3047 [00:32<03:00,  8.55it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.28589147286821703\n",
      "f1: 0.6987530454042117\n",
      "Test Loss: 0.007902, Acc: 0.757611\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 65%|██████▌   | 1995/3047 [00:40<00:16, 62.69it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  7%|▋         | 18/271 [00:00<00:01, 171.84it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.526\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 36/271 [00:00<00:01, 171.63it/s]\u001b[A\n",
      " 20%|█▉        | 54/271 [00:00<00:01, 171.78it/s]\u001b[A\n",
      " 26%|██▌       | 71/271 [00:00<00:01, 170.10it/s]\u001b[A\n",
      " 33%|███▎      | 89/271 [00:00<00:01, 171.25it/s]\u001b[A\n",
      " 39%|███▉      | 107/271 [00:00<00:00, 172.82it/s]\u001b[A\n",
      " 46%|████▌     | 124/271 [00:00<00:00, 171.19it/s]\u001b[A\n",
      " 52%|█████▏    | 141/271 [00:00<00:00, 169.39it/s]\u001b[A\n",
      " 58%|█████▊    | 157/271 [00:00<00:00, 146.75it/s]\u001b[A\n",
      " 65%|██████▍   | 175/271 [00:01<00:00, 153.43it/s]\u001b[A\n",
      " 71%|███████   | 192/271 [00:01<00:00, 157.03it/s]\u001b[A\n",
      " 77%|███████▋  | 210/271 [00:01<00:00, 161.10it/s]\u001b[A\n",
      " 84%|████████▍ | 227/271 [00:01<00:00, 162.01it/s]\u001b[A\n",
      " 90%|█████████ | 244/271 [00:01<00:00, 162.86it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 164.04it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 66%|██████▌   | 2002/3047 [00:43<02:21,  7.37it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2874418604651163\n",
      "f1: 0.7068278577580943\n",
      "Test Loss: 0.007960, Acc: 0.756631\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 82%|████████▏ | 2498/3047 [00:51<00:08, 61.95it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 17/271 [00:00<00:01, 162.00it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.4798\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 12%|█▏        | 33/271 [00:00<00:01, 160.01it/s]\u001b[A\n",
      " 18%|█▊        | 49/271 [00:00<00:01, 158.82it/s]\u001b[A\n",
      " 24%|██▍       | 65/271 [00:00<00:01, 158.21it/s]\u001b[A\n",
      " 30%|██▉       | 81/271 [00:00<00:01, 157.45it/s]\u001b[A\n",
      " 36%|███▌      | 98/271 [00:00<00:01, 158.42it/s]\u001b[A\n",
      " 42%|████▏     | 114/271 [00:00<00:00, 158.67it/s]\u001b[A\n",
      " 48%|████▊     | 130/271 [00:00<00:00, 158.72it/s]\u001b[A\n",
      " 54%|█████▍    | 146/271 [00:00<00:00, 156.54it/s]\u001b[A\n",
      " 60%|█████▉    | 162/271 [00:01<00:00, 157.37it/s]\u001b[A\n",
      " 66%|██████▌   | 179/271 [00:01<00:00, 158.28it/s]\u001b[A\n",
      " 72%|███████▏  | 195/271 [00:01<00:00, 158.37it/s]\u001b[A\n",
      " 78%|███████▊  | 211/271 [00:01<00:00, 158.29it/s]\u001b[A\n",
      " 84%|████████▍ | 227/271 [00:01<00:00, 157.63it/s]\u001b[A\n",
      " 90%|█████████ | 244/271 [00:01<00:00, 159.30it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 158.01it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 82%|████████▏ | 2505/3047 [00:54<01:14,  7.23it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.28930232558139535\n",
      "f1: 0.7012235757352071\n",
      "Test Loss: 0.007881, Acc: 0.757726\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 98%|█████████▊| 2994/3047 [01:02<00:00, 64.17it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 17/271 [00:00<00:01, 162.69it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 3000, loss: 0.3198\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 12%|█▏        | 33/271 [00:00<00:01, 160.20it/s]\u001b[A\n",
      " 18%|█▊        | 49/271 [00:00<00:01, 160.09it/s]\u001b[A\n",
      " 24%|██▍       | 66/271 [00:00<00:01, 160.61it/s]\u001b[A\n",
      " 31%|███       | 83/271 [00:00<00:01, 162.40it/s]\u001b[A\n",
      " 37%|███▋      | 100/271 [00:00<00:01, 164.18it/s]\u001b[A\n",
      " 43%|████▎     | 117/271 [00:00<00:00, 163.80it/s]\u001b[A\n",
      " 49%|████▉     | 134/271 [00:00<00:00, 163.40it/s]\u001b[A\n",
      " 56%|█████▌    | 151/271 [00:00<00:00, 163.91it/s]\u001b[A\n",
      " 62%|██████▏   | 168/271 [00:01<00:00, 165.10it/s]\u001b[A\n",
      " 68%|██████▊   | 185/271 [00:01<00:00, 163.72it/s]\u001b[A\n",
      " 75%|███████▍  | 202/271 [00:01<00:00, 164.49it/s]\u001b[A\n",
      " 81%|████████  | 219/271 [00:01<00:00, 165.49it/s]\u001b[A\n",
      " 87%|████████▋ | 236/271 [00:01<00:00, 163.23it/s]\u001b[A\n",
      " 93%|█████████▎| 253/271 [00:01<00:00, 163.20it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 163.11it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 98%|█████████▊| 3001/3047 [01:05<00:06,  7.38it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.29643410852713176\n",
      "f1: 0.7147862679955738\n",
      "Test Loss: 0.007912, Acc: 0.760609\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 3047/3047 [01:05<00:00, 46.34it/s]\n",
      "  6%|▌         | 16/271 [00:00<00:01, 158.83it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 271/271 [00:01<00:00, 162.39it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/3047 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.26976744186046514\n",
      "f1: 0.6650882244370653\n",
      "Test Loss: 0.008033, Acc: 0.752018\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 16%|█▋        | 499/3047 [00:08<00:40, 63.67it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  7%|▋         | 18/271 [00:00<00:01, 172.15it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.4734\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 35/271 [00:00<00:01, 171.31it/s]\u001b[A\n",
      " 19%|█▉        | 52/271 [00:00<00:01, 170.77it/s]\u001b[A\n",
      " 26%|██▌       | 70/271 [00:00<00:01, 170.75it/s]\u001b[A\n",
      " 32%|███▏      | 88/271 [00:00<00:01, 171.02it/s]\u001b[A\n",
      " 39%|███▉      | 106/271 [00:00<00:00, 170.94it/s]\u001b[A\n",
      " 46%|████▌     | 124/271 [00:00<00:00, 170.97it/s]\u001b[A\n",
      " 52%|█████▏    | 140/271 [00:00<00:00, 146.32it/s]\u001b[A\n",
      " 58%|█████▊    | 158/271 [00:00<00:00, 152.82it/s]\u001b[A\n",
      " 65%|██████▍   | 176/271 [00:01<00:00, 158.17it/s]\u001b[A\n",
      " 72%|███████▏  | 194/271 [00:01<00:00, 162.72it/s]\u001b[A\n",
      " 78%|███████▊  | 212/271 [00:01<00:00, 165.68it/s]\u001b[A\n",
      " 85%|████████▍ | 229/271 [00:01<00:00, 164.92it/s]\u001b[A\n",
      " 91%|█████████ | 247/271 [00:01<00:00, 167.46it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 165.31it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 17%|█▋        | 506/3047 [00:10<05:36,  7.56it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2750387596899225\n",
      "f1: 0.7349011935523607\n",
      "Test Loss: 0.007994, Acc: 0.755708\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 33%|███▎      | 993/3047 [00:18<00:32, 63.36it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▌         | 15/271 [00:00<00:01, 142.22it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4657\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█         | 30/271 [00:00<00:01, 142.34it/s]\u001b[A\n",
      " 17%|█▋        | 45/271 [00:00<00:01, 143.30it/s]\u001b[A\n",
      " 22%|██▏       | 60/271 [00:00<00:01, 143.97it/s]\u001b[A\n",
      " 28%|██▊       | 75/271 [00:00<00:01, 143.71it/s]\u001b[A\n",
      " 33%|███▎      | 90/271 [00:00<00:01, 145.42it/s]\u001b[A\n",
      " 39%|███▊      | 105/271 [00:00<00:01, 145.58it/s]\u001b[A\n",
      " 45%|████▍     | 121/271 [00:00<00:01, 147.16it/s]\u001b[A\n",
      " 51%|█████     | 137/271 [00:00<00:00, 148.37it/s]\u001b[A\n",
      " 56%|█████▋    | 153/271 [00:01<00:00, 150.14it/s]\u001b[A\n",
      " 62%|██████▏   | 169/271 [00:01<00:00, 152.81it/s]\u001b[A\n",
      " 68%|██████▊   | 185/271 [00:01<00:00, 154.46it/s]\u001b[A\n",
      " 74%|███████▍  | 201/271 [00:01<00:00, 155.73it/s]\u001b[A\n",
      " 80%|████████  | 217/271 [00:01<00:00, 154.90it/s]\u001b[A\n",
      " 86%|████████▌ | 233/271 [00:01<00:00, 153.32it/s]\u001b[A\n",
      " 92%|█████████▏| 249/271 [00:01<00:00, 155.03it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 150.80it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 33%|███▎      | 1000/3047 [00:21<04:50,  7.04it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.293953488372093\n",
      "f1: 0.7004956318444727\n",
      "Test Loss: 0.007932, Acc: 0.758879\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 49%|████▉     | 1498/3047 [00:29<00:24, 62.15it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 17/271 [00:00<00:01, 169.34it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.3931\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 34/271 [00:00<00:01, 168.15it/s]\u001b[A\n",
      " 19%|█▉        | 51/271 [00:00<00:01, 166.89it/s]\u001b[A\n",
      " 25%|██▌       | 68/271 [00:00<00:01, 166.28it/s]\u001b[A\n",
      " 31%|███▏      | 85/271 [00:00<00:01, 166.28it/s]\u001b[A\n",
      " 38%|███▊      | 103/271 [00:00<00:00, 168.56it/s]\u001b[A\n",
      " 44%|████▍     | 120/271 [00:00<00:00, 167.80it/s]\u001b[A\n",
      " 51%|█████     | 137/271 [00:00<00:00, 166.90it/s]\u001b[A\n",
      " 57%|█████▋    | 155/271 [00:00<00:00, 169.19it/s]\u001b[A\n",
      " 64%|██████▍   | 173/271 [00:01<00:00, 169.99it/s]\u001b[A\n",
      " 70%|███████   | 191/271 [00:01<00:00, 171.45it/s]\u001b[A\n",
      " 77%|███████▋  | 209/271 [00:01<00:00, 172.70it/s]\u001b[A\n",
      " 84%|████████▍ | 227/271 [00:01<00:00, 171.78it/s]\u001b[A\n",
      " 90%|█████████ | 245/271 [00:01<00:00, 170.76it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 163.33it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 49%|████▉     | 1505/3047 [00:32<03:25,  7.49it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.29023255813953486\n",
      "f1: 0.7075909929863455\n",
      "Test Loss: 0.007865, Acc: 0.760897\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 65%|██████▌   | 1995/3047 [00:40<00:16, 62.53it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 17/271 [00:00<00:01, 161.85it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.51\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 34/271 [00:00<00:01, 161.64it/s]\u001b[A\n",
      " 19%|█▉        | 51/271 [00:00<00:01, 163.18it/s]\u001b[A\n",
      " 25%|██▌       | 68/271 [00:00<00:01, 164.56it/s]\u001b[A\n",
      " 30%|██▉       | 81/271 [00:06<00:29,  6.45it/s] \u001b[A\n",
      " 37%|███▋      | 99/271 [00:07<00:18,  9.07it/s]\u001b[A\n",
      " 43%|████▎     | 117/271 [00:07<00:12, 12.67it/s]\u001b[A\n",
      " 50%|████▉     | 135/271 [00:07<00:07, 17.55it/s]\u001b[A\n",
      " 56%|█████▋    | 153/271 [00:07<00:04, 24.04it/s]\u001b[A\n",
      " 63%|██████▎   | 171/271 [00:07<00:03, 32.49it/s]\u001b[A\n",
      " 69%|██████▉   | 188/271 [00:07<00:01, 41.57it/s]\u001b[A\n",
      " 76%|███████▋  | 207/271 [00:07<00:01, 54.15it/s]\u001b[A\n",
      " 83%|████████▎ | 226/271 [00:07<00:00, 68.62it/s]\u001b[A\n",
      " 90%|█████████ | 244/271 [00:07<00:00, 84.11it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:08<00:00, 33.61it/s] \u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 66%|██████▌   | 2002/3047 [00:49<07:09,  2.43it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2868217054263566\n",
      "f1: 0.699338747385263\n",
      "Test Loss: 0.007915, Acc: 0.758591\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 82%|████████▏ | 2498/3047 [00:57<00:08, 61.07it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 17/271 [00:00<00:01, 169.51it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.4817\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 34/271 [00:00<00:01, 167.93it/s]\u001b[A\n",
      " 19%|█▉        | 51/271 [00:00<00:01, 167.83it/s]\u001b[A\n",
      " 25%|██▌       | 69/271 [00:00<00:01, 168.86it/s]\u001b[A\n",
      " 32%|███▏      | 86/271 [00:00<00:01, 168.55it/s]\u001b[A\n",
      " 38%|███▊      | 104/271 [00:00<00:00, 170.66it/s]\u001b[A\n",
      " 45%|████▍     | 121/271 [00:00<00:00, 169.52it/s]\u001b[A\n",
      " 51%|█████▏    | 139/271 [00:00<00:00, 169.86it/s]\u001b[A\n",
      " 58%|█████▊    | 157/271 [00:00<00:00, 170.13it/s]\u001b[A\n",
      " 65%|██████▍   | 175/271 [00:01<00:00, 170.60it/s]\u001b[A\n",
      " 71%|███████   | 193/271 [00:01<00:00, 171.93it/s]\u001b[A\n",
      " 77%|███████▋  | 210/271 [00:01<00:00, 171.20it/s]\u001b[A\n",
      " 84%|████████▍ | 227/271 [00:01<00:00, 169.93it/s]\u001b[A\n",
      " 90%|█████████ | 244/271 [00:01<00:00, 167.77it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 169.21it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 82%|████████▏ | 2505/3047 [01:00<01:11,  7.59it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.1813953488372093\n",
      "f1: 0.7318551371109541\n",
      "Test Loss: 0.009078, Acc: 0.699954\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 98%|█████████▊| 2996/3047 [01:07<00:00, 72.42it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 17/271 [00:00<00:01, 161.52it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 3000, loss: 0.3247\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 34/271 [00:00<00:01, 161.45it/s]\u001b[A\n",
      " 18%|█▊        | 50/271 [00:00<00:01, 159.89it/s]\u001b[A\n",
      " 25%|██▍       | 67/271 [00:00<00:01, 160.93it/s]\u001b[A\n",
      " 31%|███       | 83/271 [00:00<00:01, 160.44it/s]\u001b[A\n",
      " 37%|███▋      | 101/271 [00:00<00:01, 164.33it/s]\u001b[A\n",
      " 44%|████▎     | 118/271 [00:00<00:00, 165.30it/s]\u001b[A\n",
      " 49%|████▉     | 134/271 [00:00<00:00, 163.30it/s]\u001b[A\n",
      " 56%|█████▌    | 151/271 [00:00<00:00, 164.69it/s]\u001b[A\n",
      " 62%|██████▏   | 168/271 [00:01<00:00, 163.89it/s]\u001b[A\n",
      " 69%|██████▊   | 186/271 [00:01<00:00, 166.04it/s]\u001b[A\n",
      " 75%|███████▍  | 203/271 [00:01<00:00, 163.88it/s]\u001b[A\n",
      " 81%|████████  | 220/271 [00:01<00:00, 164.76it/s]\u001b[A\n",
      " 87%|████████▋ | 237/271 [00:01<00:00, 164.17it/s]\u001b[A\n",
      " 94%|█████████▎| 254/271 [00:01<00:00, 163.19it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 163.67it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 99%|█████████▊| 3004/3047 [01:10<00:05,  8.18it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2961240310077519\n",
      "f1: 0.7166745416512895\n",
      "Test Loss: 0.007886, Acc: 0.760263\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 3047/3047 [01:11<00:00, 42.78it/s]\n",
      "  4%|▍         | 12/271 [00:00<00:02, 117.39it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 271/271 [00:01<00:00, 147.59it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/3047 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2713178294573643\n",
      "f1: 0.6621698043558553\n",
      "Test Loss: 0.008051, Acc: 0.750461\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 16%|█▌        | 494/3047 [00:08<00:41, 61.02it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▌         | 16/271 [00:00<00:01, 159.50it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.4684\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 12%|█▏        | 32/271 [00:00<00:01, 158.53it/s]\u001b[A\n",
      " 18%|█▊        | 48/271 [00:00<00:01, 157.92it/s]\u001b[A\n",
      " 24%|██▎       | 64/271 [00:00<00:01, 156.73it/s]\u001b[A\n",
      " 30%|██▉       | 81/271 [00:00<00:01, 158.67it/s]\u001b[A\n",
      " 36%|███▌      | 98/271 [00:00<00:01, 160.46it/s]\u001b[A\n",
      " 42%|████▏     | 113/271 [00:00<00:01, 139.57it/s]\u001b[A\n",
      " 48%|████▊     | 130/271 [00:00<00:00, 145.84it/s]\u001b[A\n",
      " 54%|█████▍    | 146/271 [00:00<00:00, 149.08it/s]\u001b[A\n",
      " 60%|██████    | 163/271 [00:01<00:00, 153.94it/s]\u001b[A\n",
      " 67%|██████▋   | 181/271 [00:01<00:00, 159.65it/s]\u001b[A\n",
      " 73%|███████▎  | 199/271 [00:01<00:00, 163.04it/s]\u001b[A\n",
      " 80%|████████  | 217/271 [00:01<00:00, 165.49it/s]\u001b[A\n",
      " 86%|████████▋ | 234/271 [00:01<00:00, 164.92it/s]\u001b[A\n",
      " 93%|█████████▎| 251/271 [00:01<00:00, 166.08it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 159.41it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 16%|█▋        | 501/3047 [00:10<05:45,  7.38it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2812403100775194\n",
      "f1: 0.729069275255326\n",
      "Test Loss: 0.007932, Acc: 0.756919\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 33%|███▎      | 994/3047 [00:18<00:38, 53.19it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▌         | 16/271 [00:00<00:01, 150.79it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4648\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█▏        | 31/271 [00:00<00:01, 150.23it/s]\u001b[A\n",
      " 17%|█▋        | 46/271 [00:00<00:01, 148.71it/s]\u001b[A\n",
      " 23%|██▎       | 62/271 [00:00<00:01, 149.21it/s]\u001b[A\n",
      " 28%|██▊       | 77/271 [00:00<00:01, 149.19it/s]\u001b[A\n",
      " 34%|███▍      | 92/271 [00:00<00:01, 149.29it/s]\u001b[A\n",
      " 39%|███▉      | 106/271 [00:00<00:01, 145.88it/s]\u001b[A\n",
      " 44%|████▍     | 120/271 [00:00<00:01, 143.45it/s]\u001b[A\n",
      " 49%|████▉     | 134/271 [00:00<00:00, 139.02it/s]\u001b[A\n",
      " 55%|█████▍    | 148/271 [00:01<00:00, 138.31it/s]\u001b[A\n",
      " 60%|██████    | 163/271 [00:01<00:00, 139.26it/s]\u001b[A\n",
      " 65%|██████▌   | 177/271 [00:01<00:00, 138.26it/s]\u001b[A\n",
      " 71%|███████   | 192/271 [00:01<00:00, 140.12it/s]\u001b[A\n",
      " 76%|███████▌  | 206/271 [00:01<00:00, 139.76it/s]\u001b[A\n",
      " 82%|████████▏ | 221/271 [00:01<00:00, 140.91it/s]\u001b[A\n",
      " 87%|████████▋ | 236/271 [00:01<00:00, 137.67it/s]\u001b[A\n",
      " 93%|█████████▎| 251/271 [00:01<00:00, 139.85it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 142.22it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 33%|███▎      | 1006/3047 [00:21<04:17,  7.93it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.29426356589147284\n",
      "f1: 0.701414790205492\n",
      "Test Loss: 0.007935, Acc: 0.759917\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 49%|████▉     | 1493/3047 [00:30<00:26, 59.33it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▌         | 15/271 [00:00<00:01, 142.98it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.392\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█         | 30/271 [00:00<00:01, 143.54it/s]\u001b[A\n",
      " 16%|█▌        | 44/271 [00:00<00:01, 141.86it/s]\u001b[A\n",
      " 22%|██▏       | 59/271 [00:00<00:01, 143.23it/s]\u001b[A\n",
      " 27%|██▋       | 74/271 [00:00<00:01, 144.15it/s]\u001b[A\n",
      " 33%|███▎      | 89/271 [00:00<00:01, 145.85it/s]\u001b[A\n",
      " 38%|███▊      | 104/271 [00:00<00:01, 147.00it/s]\u001b[A\n",
      " 44%|████▍     | 120/271 [00:00<00:01, 148.48it/s]\u001b[A\n",
      " 50%|█████     | 136/271 [00:00<00:00, 149.12it/s]\u001b[A\n",
      " 56%|█████▌    | 152/271 [00:01<00:00, 151.88it/s]\u001b[A\n",
      " 62%|██████▏   | 167/271 [00:01<00:00, 150.88it/s]\u001b[A\n",
      " 68%|██████▊   | 183/271 [00:01<00:00, 152.64it/s]\u001b[A\n",
      " 74%|███████▍  | 200/271 [00:01<00:00, 155.48it/s]\u001b[A\n",
      " 80%|████████  | 217/271 [00:01<00:00, 157.13it/s]\u001b[A\n",
      " 86%|████████▌ | 233/271 [00:01<00:00, 153.73it/s]\u001b[A\n",
      " 92%|█████████▏| 250/271 [00:01<00:00, 156.32it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 151.80it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 49%|████▉     | 1506/3047 [00:33<02:43,  9.41it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.29054263565891475\n",
      "f1: 0.7184454288175264\n",
      "Test Loss: 0.007860, Acc: 0.760839\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 65%|██████▌   | 1993/3047 [00:41<00:16, 62.27it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 17/271 [00:00<00:01, 164.57it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.4972\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 34/271 [00:00<00:01, 164.93it/s]\u001b[A\n",
      " 19%|█▉        | 51/271 [00:00<00:01, 164.54it/s]\u001b[A\n",
      " 25%|██▌       | 68/271 [00:00<00:01, 165.26it/s]\u001b[A\n",
      " 31%|███▏      | 85/271 [00:00<00:01, 165.59it/s]\u001b[A\n",
      " 38%|███▊      | 103/271 [00:00<00:01, 167.76it/s]\u001b[A\n",
      " 44%|████▎     | 118/271 [00:00<00:00, 154.67it/s]\u001b[A\n",
      " 49%|████▉     | 133/271 [00:00<00:00, 147.99it/s]\u001b[A\n",
      " 55%|█████▌    | 150/271 [00:00<00:00, 152.19it/s]\u001b[A\n",
      " 62%|██████▏   | 167/271 [00:01<00:00, 156.54it/s]\u001b[A\n",
      " 68%|██████▊   | 184/271 [00:01<00:00, 159.73it/s]\u001b[A\n",
      " 75%|███████▍  | 202/271 [00:01<00:00, 163.13it/s]\u001b[A\n",
      " 81%|████████  | 219/271 [00:01<00:00, 163.70it/s]\u001b[A\n",
      " 87%|████████▋ | 236/271 [00:01<00:00, 164.96it/s]\u001b[A\n",
      " 93%|█████████▎| 253/271 [00:01<00:00, 165.16it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 161.76it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 66%|██████▌   | 2000/3047 [00:44<02:20,  7.46it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.29364341085271317\n",
      "f1: 0.702960994216812\n",
      "Test Loss: 0.007906, Acc: 0.757899\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 82%|████████▏ | 2498/3047 [00:52<00:10, 54.47it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▌         | 16/271 [00:00<00:01, 157.18it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.4703\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 12%|█▏        | 32/271 [00:00<00:01, 157.26it/s]\u001b[A\n",
      " 18%|█▊        | 48/271 [00:00<00:01, 157.79it/s]\u001b[A\n",
      " 24%|██▎       | 64/271 [00:00<00:01, 157.25it/s]\u001b[A\n",
      " 30%|██▉       | 80/271 [00:00<00:01, 155.86it/s]\u001b[A\n",
      " 36%|███▌      | 97/271 [00:00<00:01, 157.63it/s]\u001b[A\n",
      " 41%|████▏     | 112/271 [00:00<00:01, 154.94it/s]\u001b[A\n",
      " 47%|████▋     | 128/271 [00:00<00:00, 154.95it/s]\u001b[A\n",
      " 53%|█████▎    | 144/271 [00:00<00:00, 155.03it/s]\u001b[A\n",
      " 59%|█████▉    | 160/271 [00:01<00:00, 155.81it/s]\u001b[A\n",
      " 65%|██████▌   | 177/271 [00:01<00:00, 157.45it/s]\u001b[A\n",
      " 71%|███████   | 193/271 [00:01<00:00, 156.41it/s]\u001b[A\n",
      " 77%|███████▋  | 210/271 [00:01<00:00, 158.24it/s]\u001b[A\n",
      " 83%|████████▎ | 226/271 [00:01<00:00, 157.35it/s]\u001b[A\n",
      " 89%|████████▉ | 242/271 [00:01<00:00, 157.50it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 154.41it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 82%|████████▏ | 2504/3047 [00:55<01:27,  6.20it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.25736434108527134\n",
      "f1: 0.7369267872523739\n",
      "Test Loss: 0.008131, Acc: 0.746425\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 98%|█████████▊| 2993/3047 [01:03<00:00, 61.65it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▌         | 16/271 [00:00<00:01, 153.98it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 3000, loss: 0.3185\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█▏        | 31/271 [00:00<00:01, 151.51it/s]\u001b[A\n",
      " 17%|█▋        | 46/271 [00:00<00:01, 150.31it/s]\u001b[A\n",
      " 23%|██▎       | 61/271 [00:00<00:01, 149.46it/s]\u001b[A\n",
      " 28%|██▊       | 76/271 [00:00<00:01, 148.97it/s]\u001b[A\n",
      " 34%|███▎      | 91/271 [00:00<00:01, 147.10it/s]\u001b[A\n",
      " 39%|███▉      | 107/271 [00:00<00:01, 148.10it/s]\u001b[A\n",
      " 45%|████▌     | 122/271 [00:00<00:01, 147.76it/s]\u001b[A\n",
      " 51%|█████     | 137/271 [00:00<00:00, 148.18it/s]\u001b[A\n",
      " 56%|█████▌    | 152/271 [00:01<00:00, 148.08it/s]\u001b[A\n",
      " 62%|██████▏   | 168/271 [00:01<00:00, 149.82it/s]\u001b[A\n",
      " 68%|██████▊   | 184/271 [00:01<00:00, 151.26it/s]\u001b[A\n",
      " 74%|███████▍  | 201/271 [00:01<00:00, 153.92it/s]\u001b[A\n",
      " 80%|████████  | 217/271 [00:01<00:00, 154.81it/s]\u001b[A\n",
      " 86%|████████▌ | 233/271 [00:01<00:00, 132.72it/s]\u001b[A\n",
      " 92%|█████████▏| 249/271 [00:01<00:00, 138.74it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 146.51it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 99%|█████████▊| 3006/3047 [01:07<00:04,  9.30it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.29457364341085274\n",
      "f1: 0.7155742586440299\n",
      "Test Loss: 0.007921, Acc: 0.759225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 3047/3047 [01:07<00:00, 44.98it/s]\n",
      "  6%|▌         | 16/271 [00:00<00:01, 158.27it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 271/271 [00:01<00:00, 167.34it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/3047 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.27472868217054264\n",
      "f1: 0.6665404208194947\n",
      "Test Loss: 0.008031, Acc: 0.752018\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 16%|█▋        | 496/3047 [00:07<00:40, 62.89it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 17/271 [00:00<00:01, 168.46it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.4734\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 34/271 [00:00<00:01, 167.73it/s]\u001b[A\n",
      " 19%|█▉        | 51/271 [00:00<00:01, 168.07it/s]\u001b[A\n",
      " 25%|██▌       | 68/271 [00:00<00:01, 166.77it/s]\u001b[A\n",
      " 31%|███▏      | 85/271 [00:00<00:01, 166.49it/s]\u001b[A\n",
      " 38%|███▊      | 102/271 [00:00<00:01, 166.95it/s]\u001b[A\n",
      " 44%|████▍     | 119/271 [00:00<00:00, 167.26it/s]\u001b[A\n",
      " 50%|█████     | 136/271 [00:00<00:00, 165.60it/s]\u001b[A\n",
      " 56%|█████▋    | 153/271 [00:00<00:00, 166.66it/s]\u001b[A\n",
      " 62%|██████▏   | 169/271 [00:01<00:00, 143.14it/s]\u001b[A\n",
      " 69%|██████▊   | 186/271 [00:01<00:00, 150.06it/s]\u001b[A\n",
      " 75%|███████▍  | 203/271 [00:01<00:00, 153.73it/s]\u001b[A\n",
      " 81%|████████  | 220/271 [00:01<00:00, 156.22it/s]\u001b[A\n",
      " 87%|████████▋ | 237/271 [00:01<00:00, 157.86it/s]\u001b[A\n",
      " 94%|█████████▎| 254/271 [00:01<00:00, 159.49it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 160.42it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 17%|█▋        | 503/3047 [00:10<05:42,  7.42it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2682170542635659\n",
      "f1: 0.7305081826012103\n",
      "Test Loss: 0.008066, Acc: 0.750115\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 33%|███▎      | 998/3047 [00:18<00:32, 63.02it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 17/271 [00:00<00:01, 165.92it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4691\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 34/271 [00:00<00:01, 164.57it/s]\u001b[A\n",
      " 19%|█▉        | 51/271 [00:00<00:01, 165.30it/s]\u001b[A\n",
      " 25%|██▌       | 68/271 [00:00<00:01, 163.98it/s]\u001b[A\n",
      " 31%|███▏      | 85/271 [00:00<00:01, 163.68it/s]\u001b[A\n",
      " 38%|███▊      | 102/271 [00:00<00:01, 165.27it/s]\u001b[A\n",
      " 44%|████▍     | 119/271 [00:00<00:00, 164.95it/s]\u001b[A\n",
      " 50%|█████     | 136/271 [00:00<00:00, 165.87it/s]\u001b[A\n",
      " 56%|█████▌    | 152/271 [00:00<00:00, 152.01it/s]\u001b[A\n",
      " 62%|██████▏   | 169/271 [00:01<00:00, 155.37it/s]\u001b[A\n",
      " 69%|██████▊   | 186/271 [00:01<00:00, 159.22it/s]\u001b[A\n",
      " 75%|███████▍  | 203/271 [00:01<00:00, 161.47it/s]\u001b[A\n",
      " 81%|████████  | 220/271 [00:01<00:00, 163.49it/s]\u001b[A\n",
      " 87%|████████▋ | 237/271 [00:01<00:00, 163.63it/s]\u001b[A\n",
      " 94%|█████████▎| 254/271 [00:01<00:00, 165.25it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 162.67it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 33%|███▎      | 1005/3047 [00:21<04:37,  7.35it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.27782945736434106\n",
      "f1: 0.6767606742955623\n",
      "Test Loss: 0.007968, Acc: 0.754440\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 49%|████▉     | 1494/3047 [00:29<00:25, 60.27it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  3%|▎         | 9/271 [00:00<00:03, 84.02it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.3871\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  9%|▉         | 25/271 [00:00<00:02, 97.85it/s]\u001b[A\n",
      " 15%|█▌        | 42/271 [00:00<00:02, 110.42it/s]\u001b[A\n",
      " 22%|██▏       | 59/271 [00:00<00:01, 122.05it/s]\u001b[A\n",
      " 28%|██▊       | 75/271 [00:00<00:01, 131.35it/s]\u001b[A\n",
      " 34%|███▍      | 92/271 [00:00<00:01, 140.03it/s]\u001b[A\n",
      " 40%|████      | 109/271 [00:00<00:01, 145.83it/s]\u001b[A\n",
      " 46%|████▋     | 126/271 [00:00<00:00, 151.07it/s]\u001b[A\n",
      " 53%|█████▎    | 143/271 [00:00<00:00, 155.05it/s]\u001b[A\n",
      " 59%|█████▉    | 160/271 [00:01<00:00, 158.32it/s]\u001b[A\n",
      " 65%|██████▌   | 177/271 [00:01<00:00, 161.16it/s]\u001b[A\n",
      " 72%|███████▏  | 194/271 [00:01<00:00, 163.19it/s]\u001b[A\n",
      " 78%|███████▊  | 212/271 [00:01<00:00, 165.66it/s]\u001b[A\n",
      " 85%|████████▍ | 229/271 [00:01<00:00, 165.93it/s]\u001b[A\n",
      " 91%|█████████ | 246/271 [00:01<00:00, 167.00it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 159.77it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 49%|████▉     | 1501/3047 [00:32<03:30,  7.34it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2958139534883721\n",
      "f1: 0.7097305278700675\n",
      "Test Loss: 0.007864, Acc: 0.761243\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 66%|██████▌   | 1997/3047 [00:40<00:16, 64.51it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  7%|▋         | 18/271 [00:00<00:01, 174.06it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.4834\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 36/271 [00:00<00:01, 173.52it/s]\u001b[A\n",
      " 20%|█▉        | 54/271 [00:00<00:01, 173.01it/s]\u001b[A\n",
      " 26%|██▌       | 71/271 [00:00<00:01, 171.85it/s]\u001b[A\n",
      " 33%|███▎      | 89/271 [00:00<00:01, 172.60it/s]\u001b[A\n",
      " 39%|███▉      | 107/271 [00:00<00:00, 174.62it/s]\u001b[A\n",
      " 46%|████▌     | 125/271 [00:00<00:00, 175.24it/s]\u001b[A\n",
      " 52%|█████▏    | 142/271 [00:00<00:00, 162.60it/s]\u001b[A\n",
      " 59%|█████▊    | 159/271 [00:00<00:00, 164.69it/s]\u001b[A\n",
      " 65%|██████▌   | 177/271 [00:01<00:00, 166.44it/s]\u001b[A\n",
      " 72%|███████▏  | 195/271 [00:01<00:00, 168.27it/s]\u001b[A\n",
      " 79%|███████▊  | 213/271 [00:01<00:00, 169.96it/s]\u001b[A\n",
      " 85%|████████▍ | 230/271 [00:01<00:00, 169.48it/s]\u001b[A\n",
      " 91%|█████████ | 247/271 [00:01<00:00, 168.44it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 169.60it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 66%|██████▌   | 2004/3047 [00:43<02:15,  7.68it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2908527131782946\n",
      "f1: 0.6942977728559165\n",
      "Test Loss: 0.007982, Acc: 0.756458\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 82%|████████▏ | 2493/3047 [00:51<00:09, 58.04it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 17/271 [00:00<00:01, 168.66it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.4609\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 34/271 [00:00<00:01, 167.70it/s]\u001b[A\n",
      " 19%|█▉        | 51/271 [00:00<00:01, 167.76it/s]\u001b[A\n",
      " 25%|██▌       | 68/271 [00:00<00:01, 167.01it/s]\u001b[A\n",
      " 31%|███▏      | 85/271 [00:00<00:01, 167.61it/s]\u001b[A\n",
      " 38%|███▊      | 103/271 [00:00<00:00, 168.40it/s]\u001b[A\n",
      " 44%|████▍     | 120/271 [00:00<00:00, 168.26it/s]\u001b[A\n",
      " 51%|█████     | 138/271 [00:00<00:00, 170.33it/s]\u001b[A\n",
      " 58%|█████▊    | 156/271 [00:00<00:00, 171.20it/s]\u001b[A\n",
      " 64%|██████▍   | 174/271 [00:01<00:00, 171.08it/s]\u001b[A\n",
      " 71%|███████   | 192/271 [00:01<00:00, 171.22it/s]\u001b[A\n",
      " 77%|███████▋  | 210/271 [00:01<00:00, 171.71it/s]\u001b[A\n",
      " 84%|████████▍ | 228/271 [00:01<00:00, 171.32it/s]\u001b[A\n",
      " 90%|█████████ | 245/271 [00:01<00:00, 170.39it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 169.79it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 82%|████████▏ | 2500/3047 [00:54<01:12,  7.57it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.25333333333333335\n",
      "f1: 0.7368765842254267\n",
      "Test Loss: 0.008142, Acc: 0.745214\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 98%|█████████▊| 2996/3047 [01:01<00:00, 65.29it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 17/271 [00:00<00:01, 161.79it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 3000, loss: 0.3255\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 12%|█▏        | 33/271 [00:00<00:01, 161.18it/s]\u001b[A\n",
      " 18%|█▊        | 50/271 [00:00<00:01, 161.08it/s]\u001b[A\n",
      " 24%|██▍       | 66/271 [00:00<00:01, 160.71it/s]\u001b[A\n",
      " 31%|███       | 83/271 [00:00<00:01, 161.50it/s]\u001b[A\n",
      " 36%|███▌      | 97/271 [00:00<00:01, 144.49it/s]\u001b[A\n",
      " 42%|████▏     | 114/271 [00:00<00:01, 150.45it/s]\u001b[A\n",
      " 48%|████▊     | 129/271 [00:00<00:01, 133.32it/s]\u001b[A\n",
      " 54%|█████▍    | 146/271 [00:00<00:00, 140.48it/s]\u001b[A\n",
      " 60%|██████    | 163/271 [00:01<00:00, 146.85it/s]\u001b[A\n",
      " 66%|██████▋   | 180/271 [00:01<00:00, 151.03it/s]\u001b[A\n",
      " 73%|███████▎  | 197/271 [00:01<00:00, 154.85it/s]\u001b[A\n",
      " 79%|███████▉  | 214/271 [00:01<00:00, 157.82it/s]\u001b[A\n",
      " 85%|████████▌ | 231/271 [00:01<00:00, 158.68it/s]\u001b[A\n",
      " 92%|█████████▏| 248/271 [00:01<00:00, 161.33it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 154.94it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n",
      "em: 0.29643410852713176\n",
      "f1: 0.7219234649932368\n",
      "Test Loss: 0.007905, Acc: 0.758360\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 3047/3047 [01:05<00:00, 46.49it/s]\n",
      "  6%|▋         | 17/271 [00:00<00:01, 161.39it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 271/271 [00:01<00:00, 168.24it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/3047 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2924031007751938\n",
      "f1: 0.6913042943275547\n",
      "Test Loss: 0.007933, Acc: 0.757207\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 16%|█▋        | 498/3047 [00:08<00:40, 62.43it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▌         | 14/271 [00:00<00:01, 138.77it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.4719\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█         | 29/271 [00:00<00:01, 140.31it/s]\u001b[A\n",
      " 16%|█▌        | 44/271 [00:00<00:01, 140.04it/s]\u001b[A\n",
      " 22%|██▏       | 59/271 [00:00<00:01, 140.30it/s]\u001b[A\n",
      " 27%|██▋       | 73/271 [00:00<00:01, 139.80it/s]\u001b[A\n",
      " 32%|███▏      | 88/271 [00:00<00:01, 140.48it/s]\u001b[A\n",
      " 38%|███▊      | 103/271 [00:00<00:01, 143.11it/s]\u001b[A\n",
      " 44%|████▎     | 118/271 [00:00<00:01, 144.11it/s]\u001b[A\n",
      " 49%|████▉     | 133/271 [00:00<00:00, 142.84it/s]\u001b[A\n",
      " 54%|█████▍    | 147/271 [00:01<00:00, 127.88it/s]\u001b[A\n",
      " 60%|██████    | 163/271 [00:01<00:00, 134.50it/s]\u001b[A\n",
      " 66%|██████▌   | 179/271 [00:01<00:00, 139.52it/s]\u001b[A\n",
      " 72%|███████▏  | 195/271 [00:01<00:00, 143.17it/s]\u001b[A\n",
      " 78%|███████▊  | 211/271 [00:01<00:00, 146.37it/s]\u001b[A\n",
      " 83%|████████▎ | 226/271 [00:01<00:00, 146.74it/s]\u001b[A\n",
      " 89%|████████▉ | 242/271 [00:01<00:00, 148.24it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 143.48it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n",
      "em: 0.2896124031007752\n",
      "f1: 0.7069157130552517\n",
      "Test Loss: 0.007917, Acc: 0.755593\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 33%|███▎      | 994/3047 [00:19<00:32, 62.30it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 17/271 [00:00<00:01, 162.13it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4605\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 34/271 [00:00<00:01, 161.87it/s]\u001b[A\n",
      " 18%|█▊        | 50/271 [00:00<00:01, 156.83it/s]\u001b[A\n",
      " 23%|██▎       | 61/271 [00:00<00:01, 134.50it/s]\u001b[A\n",
      " 29%|██▉       | 78/271 [00:00<00:01, 141.42it/s]\u001b[A\n",
      " 35%|███▍      | 94/271 [00:00<00:01, 145.86it/s]\u001b[A\n",
      " 41%|████      | 110/271 [00:00<00:01, 149.30it/s]\u001b[A\n",
      " 46%|████▋     | 126/271 [00:00<00:00, 150.67it/s]\u001b[A\n",
      " 52%|█████▏    | 142/271 [00:00<00:00, 151.05it/s]\u001b[A\n",
      " 58%|█████▊    | 158/271 [00:01<00:00, 153.00it/s]\u001b[A\n",
      " 64%|██████▍   | 174/271 [00:01<00:00, 152.44it/s]\u001b[A\n",
      " 70%|███████   | 190/271 [00:01<00:00, 154.28it/s]\u001b[A\n",
      " 76%|███████▌  | 206/271 [00:01<00:00, 155.21it/s]\u001b[A\n",
      " 82%|████████▏ | 222/271 [00:01<00:00, 155.09it/s]\u001b[A\n",
      " 88%|████████▊ | 238/271 [00:01<00:00, 154.58it/s]\u001b[A\n",
      " 94%|█████████▎| 254/271 [00:01<00:00, 155.73it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 152.69it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 33%|███▎      | 1001/3047 [00:22<04:46,  7.14it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.29147286821705426\n",
      "f1: 0.6961124646240968\n",
      "Test Loss: 0.007959, Acc: 0.757841\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 49%|████▉     | 1494/3047 [00:30<00:24, 63.39it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 17/271 [00:00<00:01, 168.86it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.3854\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 34/271 [00:00<00:01, 167.03it/s]\u001b[A\n",
      " 17%|█▋        | 46/271 [00:00<00:01, 146.43it/s]\u001b[A\n",
      " 23%|██▎       | 63/271 [00:00<00:01, 152.45it/s]\u001b[A\n",
      " 30%|██▉       | 80/271 [00:00<00:01, 155.24it/s]\u001b[A\n",
      " 36%|███▌      | 97/271 [00:00<00:01, 157.88it/s]\u001b[A\n",
      " 42%|████▏     | 114/271 [00:00<00:00, 160.15it/s]\u001b[A\n",
      " 48%|████▊     | 131/271 [00:00<00:00, 162.41it/s]\u001b[A\n",
      " 54%|█████▍    | 147/271 [00:00<00:00, 143.31it/s]\u001b[A\n",
      " 61%|██████    | 165/271 [00:01<00:00, 150.91it/s]\u001b[A\n",
      " 67%|██████▋   | 182/271 [00:01<00:00, 155.93it/s]\u001b[A\n",
      " 73%|███████▎  | 199/271 [00:01<00:00, 159.79it/s]\u001b[A\n",
      " 80%|███████▉  | 216/271 [00:01<00:00, 160.75it/s]\u001b[A\n",
      " 86%|████████▋ | 234/271 [00:01<00:00, 163.99it/s]\u001b[A\n",
      " 93%|█████████▎| 252/271 [00:01<00:00, 166.24it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 159.50it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 49%|████▉     | 1508/3047 [00:33<02:32, 10.08it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.293953488372093\n",
      "f1: 0.7172715639227314\n",
      "Test Loss: 0.007878, Acc: 0.759513\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 66%|██████▌   | 1997/3047 [00:41<00:16, 64.18it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  7%|▋         | 18/271 [00:00<00:01, 167.94it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.5\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 35/271 [00:00<00:01, 167.95it/s]\u001b[A\n",
      " 20%|█▉        | 53/271 [00:00<00:01, 168.75it/s]\u001b[A\n",
      " 26%|██▌       | 71/271 [00:00<00:01, 169.54it/s]\u001b[A\n",
      " 33%|███▎      | 89/271 [00:00<00:01, 170.42it/s]\u001b[A\n",
      " 38%|███▊      | 104/271 [00:00<00:01, 158.89it/s]\u001b[A\n",
      " 44%|████▍     | 119/271 [00:00<00:00, 152.51it/s]\u001b[A\n",
      " 49%|████▉     | 134/271 [00:00<00:00, 147.50it/s]\u001b[A\n",
      " 55%|█████▌    | 150/271 [00:00<00:00, 149.93it/s]\u001b[A\n",
      " 62%|██████▏   | 167/271 [00:01<00:00, 153.96it/s]\u001b[A\n",
      " 68%|██████▊   | 184/271 [00:01<00:00, 157.63it/s]\u001b[A\n",
      " 75%|███████▍  | 202/271 [00:01<00:00, 161.43it/s]\u001b[A\n",
      " 81%|████████  | 219/271 [00:01<00:00, 163.33it/s]\u001b[A\n",
      " 87%|████████▋ | 236/271 [00:01<00:00, 164.04it/s]\u001b[A\n",
      " 93%|█████████▎| 253/271 [00:01<00:00, 164.77it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 161.25it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 66%|██████▌   | 2004/3047 [00:50<06:56,  2.51it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.293953488372093\n",
      "f1: 0.7050129198966446\n",
      "Test Loss: 0.007900, Acc: 0.758649\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 82%|████████▏ | 2496/3047 [00:57<00:07, 73.30it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 17/271 [00:00<00:01, 167.61it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.4617\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 12%|█▏        | 33/271 [00:00<00:01, 162.58it/s]\u001b[A\n",
      " 18%|█▊        | 50/271 [00:00<00:01, 162.91it/s]\u001b[A\n",
      " 25%|██▍       | 67/271 [00:00<00:01, 163.67it/s]\u001b[A\n",
      " 31%|███       | 84/271 [00:00<00:01, 164.19it/s]\u001b[A\n",
      " 37%|███▋      | 101/271 [00:00<00:01, 165.01it/s]\u001b[A\n",
      " 44%|████▎     | 118/271 [00:00<00:00, 165.41it/s]\u001b[A\n",
      " 50%|████▉     | 135/271 [00:00<00:00, 165.95it/s]\u001b[A\n",
      " 56%|█████▌    | 152/271 [00:00<00:00, 166.27it/s]\u001b[A\n",
      " 63%|██████▎   | 170/271 [00:01<00:00, 167.60it/s]\u001b[A\n",
      " 69%|██████▉   | 187/271 [00:01<00:00, 166.31it/s]\u001b[A\n",
      " 76%|███████▌  | 205/271 [00:01<00:00, 167.76it/s]\u001b[A\n",
      " 82%|████████▏ | 222/271 [00:01<00:00, 168.33it/s]\u001b[A\n",
      " 88%|████████▊ | 239/271 [00:01<00:00, 157.70it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 163.91it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 82%|████████▏ | 2504/3047 [01:00<01:04,  8.44it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.30046511627906974\n",
      "f1: 0.7137950043066358\n",
      "Test Loss: 0.007880, Acc: 0.760667\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 98%|█████████▊| 2997/3047 [01:07<00:00, 61.48it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 17/271 [00:00<00:01, 163.39it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 3000, loss: 0.3182\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 35/271 [00:00<00:01, 165.68it/s]\u001b[A\n",
      " 20%|█▉        | 53/271 [00:00<00:01, 169.24it/s]\u001b[A\n",
      " 26%|██▌       | 71/271 [00:00<00:01, 170.76it/s]\u001b[A\n",
      " 33%|███▎      | 89/271 [00:00<00:01, 172.84it/s]\u001b[A\n",
      " 40%|███▉      | 108/271 [00:00<00:00, 175.27it/s]\u001b[A\n",
      " 46%|████▋     | 126/271 [00:00<00:00, 174.33it/s]\u001b[A\n",
      " 53%|█████▎    | 144/271 [00:00<00:00, 174.26it/s]\u001b[A\n",
      " 59%|█████▉    | 161/271 [00:00<00:00, 172.73it/s]\u001b[A\n",
      " 66%|██████▌   | 179/271 [00:01<00:00, 174.07it/s]\u001b[A\n",
      " 73%|███████▎  | 197/271 [00:01<00:00, 175.71it/s]\u001b[A\n",
      " 79%|███████▉  | 215/271 [00:01<00:00, 175.68it/s]\u001b[A\n",
      " 86%|████████▌ | 233/271 [00:01<00:00, 174.93it/s]\u001b[A\n",
      " 93%|█████████▎| 251/271 [00:01<00:00, 175.31it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 174.36it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 99%|█████████▊| 3004/3047 [01:09<00:05,  7.75it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.29674418604651165\n",
      "f1: 0.7109875722899023\n",
      "Test Loss: 0.007920, Acc: 0.757495\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 3047/3047 [01:10<00:00, 43.08it/s]\n",
      "  6%|▋         | 17/271 [00:00<00:01, 164.93it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 271/271 [00:01<00:00, 169.78it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|          | 7/3047 [00:00<00:48, 63.21it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.266046511627907\n",
      "f1: 0.6484518272425296\n",
      "Test Loss: 0.008153, Acc: 0.747982\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 16%|█▋        | 496/3047 [00:07<00:39, 65.25it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▌         | 15/271 [00:00<00:01, 147.54it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.456\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█         | 30/271 [00:00<00:01, 146.56it/s]\u001b[A\n",
      " 17%|█▋        | 45/271 [00:00<00:01, 147.37it/s]\u001b[A\n",
      " 22%|██▏       | 60/271 [00:00<00:01, 147.31it/s]\u001b[A\n",
      " 28%|██▊       | 75/271 [00:00<00:01, 144.90it/s]\u001b[A\n",
      " 34%|███▎      | 91/271 [00:00<00:01, 147.59it/s]\u001b[A\n",
      " 39%|███▉      | 107/271 [00:00<00:01, 149.39it/s]\u001b[A\n",
      " 45%|████▌     | 123/271 [00:00<00:00, 150.98it/s]\u001b[A\n",
      " 51%|█████▏    | 139/271 [00:00<00:00, 152.19it/s]\u001b[A\n",
      " 58%|█████▊    | 156/271 [00:01<00:00, 154.85it/s]\u001b[A\n",
      " 64%|██████▍   | 173/271 [00:01<00:00, 157.05it/s]\u001b[A\n",
      " 70%|███████   | 190/271 [00:01<00:00, 159.20it/s]\u001b[A\n",
      " 76%|███████▌  | 206/271 [00:01<00:00, 159.01it/s]\u001b[A\n",
      " 82%|████████▏ | 222/271 [00:01<00:00, 155.80it/s]\u001b[A\n",
      " 88%|████████▊ | 238/271 [00:01<00:00, 155.20it/s]\u001b[A\n",
      " 94%|█████████▍| 255/271 [00:01<00:00, 157.62it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 153.73it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 17%|█▋        | 503/3047 [00:10<05:54,  7.18it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.27968992248062013\n",
      "f1: 0.7240979451212048\n",
      "Test Loss: 0.007950, Acc: 0.755074\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 33%|███▎      | 993/3047 [00:18<00:31, 64.31it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 17/271 [00:00<00:01, 168.10it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.461\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 34/271 [00:00<00:01, 166.28it/s]\u001b[A\n",
      " 19%|█▉        | 51/271 [00:00<00:01, 165.93it/s]\u001b[A\n",
      " 25%|██▌       | 68/271 [00:00<00:01, 166.03it/s]\u001b[A\n",
      " 31%|███▏      | 85/271 [00:00<00:01, 165.63it/s]\u001b[A\n",
      " 38%|███▊      | 102/271 [00:00<00:01, 166.55it/s]\u001b[A\n",
      " 44%|████▎     | 118/271 [00:00<00:00, 163.40it/s]\u001b[A\n",
      " 50%|█████     | 136/271 [00:00<00:00, 165.49it/s]\u001b[A\n",
      " 57%|█████▋    | 154/271 [00:00<00:00, 168.46it/s]\u001b[A\n",
      " 63%|██████▎   | 172/271 [00:01<00:00, 171.39it/s]\u001b[A\n",
      " 70%|███████   | 190/271 [00:01<00:00, 173.10it/s]\u001b[A\n",
      " 77%|███████▋  | 208/271 [00:01<00:00, 172.76it/s]\u001b[A\n",
      " 83%|████████▎ | 226/271 [00:01<00:00, 171.25it/s]\u001b[A\n",
      " 90%|████████▉ | 243/271 [00:01<00:00, 170.48it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 168.47it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 33%|███▎      | 1006/3047 [00:21<03:21, 10.13it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2837209302325581\n",
      "f1: 0.6851271071736226\n",
      "Test Loss: 0.008008, Acc: 0.754324\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 49%|████▉     | 1499/3047 [00:29<00:21, 71.73it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 17/271 [00:00<00:01, 163.33it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.3806\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 12%|█▏        | 32/271 [00:00<00:01, 158.48it/s]\u001b[A\n",
      " 18%|█▊        | 48/271 [00:00<00:01, 157.75it/s]\u001b[A\n",
      " 24%|██▎       | 64/271 [00:00<00:01, 156.51it/s]\u001b[A\n",
      " 30%|██▉       | 80/271 [00:00<00:01, 157.14it/s]\u001b[A\n",
      " 36%|███▌      | 97/271 [00:00<00:01, 159.21it/s]\u001b[A\n",
      " 41%|████▏     | 112/271 [00:00<00:01, 137.64it/s]\u001b[A\n",
      " 48%|████▊     | 129/271 [00:00<00:00, 144.39it/s]\u001b[A\n",
      " 54%|█████▎    | 145/271 [00:00<00:00, 147.86it/s]\u001b[A\n",
      " 59%|█████▉    | 161/271 [00:01<00:00, 149.55it/s]\u001b[A\n",
      " 66%|██████▌   | 178/271 [00:01<00:00, 153.59it/s]\u001b[A\n",
      " 72%|███████▏  | 194/271 [00:01<00:00, 155.32it/s]\u001b[A\n",
      " 78%|███████▊  | 211/271 [00:01<00:00, 157.98it/s]\u001b[A\n",
      " 84%|████████▍ | 228/271 [00:01<00:00, 158.99it/s]\u001b[A\n",
      " 90%|█████████ | 245/271 [00:01<00:00, 160.76it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 154.93it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 49%|████▉     | 1507/3047 [00:32<03:05,  8.28it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.297984496124031\n",
      "f1: 0.7151896148640383\n",
      "Test Loss: 0.007868, Acc: 0.759340\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 66%|██████▌   | 1998/3047 [00:40<00:16, 62.50it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 17/271 [00:00<00:01, 162.64it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.494\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 12%|█▏        | 33/271 [00:00<00:01, 160.47it/s]\u001b[A\n",
      " 18%|█▊        | 49/271 [00:00<00:01, 158.79it/s]\u001b[A\n",
      " 24%|██▍       | 66/271 [00:00<00:01, 159.89it/s]\u001b[A\n",
      " 30%|███       | 82/271 [00:00<00:01, 159.63it/s]\u001b[A\n",
      " 35%|███▌      | 96/271 [00:00<00:01, 135.98it/s]\u001b[A\n",
      " 42%|████▏     | 113/271 [00:00<00:01, 143.32it/s]\u001b[A\n",
      " 48%|████▊     | 130/271 [00:00<00:00, 148.22it/s]\u001b[A\n",
      " 54%|█████▍    | 146/271 [00:00<00:00, 149.10it/s]\u001b[A\n",
      " 60%|██████    | 163/271 [00:01<00:00, 152.86it/s]\u001b[A\n",
      " 66%|██████▋   | 180/271 [00:01<00:00, 155.29it/s]\u001b[A\n",
      " 73%|███████▎  | 197/271 [00:01<00:00, 157.87it/s]\u001b[A\n",
      " 79%|███████▉  | 214/271 [00:01<00:00, 160.02it/s]\u001b[A\n",
      " 85%|████████▍ | 230/271 [00:01<00:00, 158.50it/s]\u001b[A\n",
      " 91%|█████████ | 247/271 [00:01<00:00, 159.19it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 154.99it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 66%|██████▌   | 2005/3047 [00:43<02:25,  7.17it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2924031007751938\n",
      "f1: 0.6998171527008774\n",
      "Test Loss: 0.007945, Acc: 0.756227\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 82%|████████▏ | 2492/3047 [00:50<00:07, 73.63it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 17/271 [00:00<00:01, 168.88it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.4605\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 34/271 [00:00<00:01, 168.09it/s]\u001b[A\n",
      " 19%|█▉        | 51/271 [00:00<00:01, 166.97it/s]\u001b[A\n",
      " 25%|██▌       | 68/271 [00:00<00:01, 167.22it/s]\u001b[A\n",
      " 31%|███▏      | 85/271 [00:00<00:01, 167.94it/s]\u001b[A\n",
      " 38%|███▊      | 103/271 [00:00<00:00, 168.99it/s]\u001b[A\n",
      " 44%|████▍     | 120/271 [00:00<00:00, 168.33it/s]\u001b[A\n",
      " 51%|█████     | 137/271 [00:00<00:00, 168.21it/s]\u001b[A\n",
      " 57%|█████▋    | 155/271 [00:00<00:00, 169.08it/s]\u001b[A\n",
      " 64%|██████▍   | 173/271 [00:01<00:00, 169.65it/s]\u001b[A\n",
      " 70%|███████   | 191/271 [00:01<00:00, 169.99it/s]\u001b[A\n",
      " 77%|███████▋  | 209/271 [00:01<00:00, 170.66it/s]\u001b[A\n",
      " 83%|████████▎ | 226/271 [00:01<00:00, 170.12it/s]\u001b[A\n",
      " 90%|█████████ | 244/271 [00:01<00:00, 170.36it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 168.98it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 82%|████████▏ | 2507/3047 [00:53<00:45, 11.83it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2970542635658915\n",
      "f1: 0.6983095853328447\n",
      "Test Loss: 0.007936, Acc: 0.757495\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 98%|█████████▊| 2994/3047 [01:01<00:00, 61.96it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▌         | 16/271 [00:00<00:01, 158.80it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 3000, loss: 0.3099\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 12%|█▏        | 32/271 [00:00<00:01, 158.64it/s]\u001b[A\n",
      " 18%|█▊        | 49/271 [00:00<00:01, 159.50it/s]\u001b[A\n",
      " 24%|██▍       | 66/271 [00:00<00:01, 160.48it/s]\u001b[A\n",
      " 30%|███       | 82/271 [00:00<00:01, 160.27it/s]\u001b[A\n",
      " 37%|███▋      | 99/271 [00:00<00:01, 160.69it/s]\u001b[A\n",
      " 42%|████▏     | 115/271 [00:00<00:00, 160.43it/s]\u001b[A\n",
      " 48%|████▊     | 131/271 [00:00<00:00, 159.56it/s]\u001b[A\n",
      " 54%|█████▍    | 147/271 [00:00<00:00, 157.48it/s]\u001b[A\n",
      " 61%|██████    | 164/271 [00:01<00:00, 159.10it/s]\u001b[A\n",
      " 67%|██████▋   | 181/271 [00:01<00:00, 161.38it/s]\u001b[A\n",
      " 73%|███████▎  | 198/271 [00:01<00:00, 162.88it/s]\u001b[A\n",
      " 79%|███████▉  | 215/271 [00:01<00:00, 146.45it/s]\u001b[A\n",
      " 86%|████████▌ | 232/271 [00:01<00:00, 151.28it/s]\u001b[A\n",
      " 92%|█████████▏| 249/271 [00:01<00:00, 155.08it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 157.60it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 98%|█████████▊| 3001/3047 [01:04<00:06,  7.10it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.29364341085271317\n",
      "f1: 0.7053023255813998\n",
      "Test Loss: 0.008016, Acc: 0.757726\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 3047/3047 [01:05<00:00, 46.81it/s]\n",
      "  6%|▋         | 17/271 [00:00<00:01, 161.01it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 271/271 [00:01<00:00, 163.30it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/3047 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2868217054263566\n",
      "f1: 0.6963789836348012\n",
      "Test Loss: 0.007931, Acc: 0.757495\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 16%|█▌        | 494/3047 [00:08<00:40, 63.62it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 17/271 [00:00<00:01, 167.12it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.458\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 34/271 [00:00<00:01, 165.56it/s]\u001b[A\n",
      " 19%|█▉        | 51/271 [00:00<00:01, 165.82it/s]\u001b[A\n",
      " 25%|██▌       | 68/271 [00:00<00:01, 166.40it/s]\u001b[A\n",
      " 30%|██▉       | 81/271 [00:00<00:01, 139.13it/s]\u001b[A\n",
      " 36%|███▌      | 98/271 [00:00<00:01, 146.78it/s]\u001b[A\n",
      " 43%|████▎     | 116/271 [00:00<00:01, 153.88it/s]\u001b[A\n",
      " 49%|████▉     | 133/271 [00:00<00:00, 157.57it/s]\u001b[A\n",
      " 55%|█████▌    | 150/271 [00:00<00:00, 160.26it/s]\u001b[A\n",
      " 62%|██████▏   | 168/271 [00:01<00:00, 163.39it/s]\u001b[A\n",
      " 69%|██████▊   | 186/271 [00:01<00:00, 165.64it/s]\u001b[A\n",
      " 75%|███████▌  | 204/271 [00:01<00:00, 168.27it/s]\u001b[A\n",
      " 82%|████████▏ | 221/271 [00:01<00:00, 166.07it/s]\u001b[A\n",
      " 88%|████████▊ | 238/271 [00:01<00:00, 166.57it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 163.22it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 16%|█▋        | 501/3047 [00:10<05:41,  7.47it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2837209302325581\n",
      "f1: 0.723562200073832\n",
      "Test Loss: 0.007924, Acc: 0.756573\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 33%|███▎      | 996/3047 [00:18<00:33, 61.20it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▌         | 14/271 [00:00<00:01, 137.18it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4594\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 10%|█         | 28/271 [00:00<00:01, 137.99it/s]\u001b[A\n",
      " 16%|█▌        | 43/271 [00:00<00:01, 139.35it/s]\u001b[A\n",
      " 21%|██▏       | 58/271 [00:00<00:01, 139.73it/s]\u001b[A\n",
      " 27%|██▋       | 73/271 [00:00<00:01, 139.72it/s]\u001b[A\n",
      " 32%|███▏      | 88/271 [00:00<00:01, 142.47it/s]\u001b[A\n",
      " 38%|███▊      | 103/271 [00:00<00:01, 142.66it/s]\u001b[A\n",
      " 43%|████▎     | 117/271 [00:00<00:01, 141.66it/s]\u001b[A\n",
      " 48%|████▊     | 131/271 [00:00<00:00, 140.70it/s]\u001b[A\n",
      " 54%|█████▎    | 145/271 [00:01<00:00, 139.96it/s]\u001b[A\n",
      " 59%|█████▉    | 160/271 [00:01<00:00, 141.57it/s]\u001b[A\n",
      " 65%|██████▍   | 175/271 [00:01<00:00, 143.90it/s]\u001b[A\n",
      " 70%|███████   | 191/271 [00:01<00:00, 146.82it/s]\u001b[A\n",
      " 76%|███████▌  | 206/271 [00:01<00:00, 146.69it/s]\u001b[A\n",
      " 82%|████████▏ | 221/271 [00:01<00:00, 146.61it/s]\u001b[A\n",
      " 87%|████████▋ | 236/271 [00:01<00:00, 146.36it/s]\u001b[A\n",
      " 93%|█████████▎| 252/271 [00:01<00:00, 147.56it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 143.92it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 33%|███▎      | 1003/3047 [00:22<05:04,  6.71it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.29023255813953486\n",
      "f1: 0.6912430171034866\n",
      "Test Loss: 0.008013, Acc: 0.756169\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 49%|████▉     | 1497/3047 [00:30<00:25, 61.62it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 17/271 [00:00<00:01, 166.00it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.3822\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 34/271 [00:00<00:01, 164.74it/s]\u001b[A\n",
      " 19%|█▉        | 51/271 [00:00<00:01, 163.62it/s]\u001b[A\n",
      " 25%|██▌       | 68/271 [00:00<00:01, 162.93it/s]\u001b[A\n",
      " 31%|███▏      | 85/271 [00:00<00:01, 163.13it/s]\u001b[A\n",
      " 38%|███▊      | 103/271 [00:00<00:01, 165.48it/s]\u001b[A\n",
      " 44%|████▍     | 120/271 [00:00<00:00, 165.47it/s]\u001b[A\n",
      " 51%|█████     | 137/271 [00:00<00:00, 166.01it/s]\u001b[A\n",
      " 57%|█████▋    | 154/271 [00:00<00:00, 166.30it/s]\u001b[A\n",
      " 63%|██████▎   | 171/271 [00:01<00:00, 167.33it/s]\u001b[A\n",
      " 69%|██████▉   | 188/271 [00:01<00:00, 165.93it/s]\u001b[A\n",
      " 76%|███████▌  | 205/271 [00:01<00:00, 165.94it/s]\u001b[A\n",
      " 82%|████████▏ | 222/271 [00:01<00:00, 151.86it/s]\u001b[A\n",
      " 88%|████████▊ | 239/271 [00:01<00:00, 155.93it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 162.87it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 49%|████▉     | 1504/3047 [00:33<03:26,  7.46it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2924031007751938\n",
      "f1: 0.7134475206103157\n",
      "Test Loss: 0.007907, Acc: 0.759629\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 65%|██████▌   | 1995/3047 [00:40<00:16, 62.06it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▌         | 16/271 [00:00<00:01, 155.27it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.4978\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 12%|█▏        | 33/271 [00:00<00:01, 156.88it/s]\u001b[A\n",
      " 18%|█▊        | 49/271 [00:00<00:01, 157.79it/s]\u001b[A\n",
      " 24%|██▍       | 65/271 [00:00<00:01, 157.89it/s]\u001b[A\n",
      " 30%|██▉       | 81/271 [00:00<00:01, 156.46it/s]\u001b[A\n",
      " 36%|███▌      | 98/271 [00:00<00:01, 158.12it/s]\u001b[A\n",
      " 42%|████▏     | 114/271 [00:00<00:00, 158.62it/s]\u001b[A\n",
      " 48%|████▊     | 131/271 [00:00<00:00, 159.58it/s]\u001b[A\n",
      " 55%|█████▍    | 148/271 [00:00<00:00, 160.70it/s]\u001b[A\n",
      " 61%|██████    | 164/271 [00:01<00:00, 159.84it/s]\u001b[A\n",
      " 67%|██████▋   | 181/271 [00:01<00:00, 161.27it/s]\u001b[A\n",
      " 73%|███████▎  | 198/271 [00:01<00:00, 163.46it/s]\u001b[A\n",
      " 79%|███████▉  | 215/271 [00:01<00:00, 163.40it/s]\u001b[A\n",
      " 86%|████████▌ | 232/271 [00:01<00:00, 162.82it/s]\u001b[A\n",
      " 92%|█████████▏| 249/271 [00:01<00:00, 161.67it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 160.51it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 66%|██████▌   | 2002/3047 [00:43<02:23,  7.29it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2911627906976744\n",
      "f1: 0.7055183954718884\n",
      "Test Loss: 0.007891, Acc: 0.757841\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 82%|████████▏ | 2495/3047 [00:51<00:08, 61.70it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▌         | 15/271 [00:00<00:01, 142.86it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.4521\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█         | 30/271 [00:00<00:01, 142.42it/s]\u001b[A\n",
      " 16%|█▌        | 44/271 [00:00<00:01, 141.39it/s]\u001b[A\n",
      " 22%|██▏       | 59/271 [00:00<00:01, 142.37it/s]\u001b[A\n",
      " 27%|██▋       | 74/271 [00:00<00:01, 141.92it/s]\u001b[A\n",
      " 33%|███▎      | 90/271 [00:00<00:01, 144.84it/s]\u001b[A\n",
      " 39%|███▉      | 106/271 [00:00<00:01, 148.03it/s]\u001b[A\n",
      " 45%|████▌     | 122/271 [00:00<00:01, 148.71it/s]\u001b[A\n",
      " 51%|█████     | 137/271 [00:00<00:00, 138.05it/s]\u001b[A\n",
      " 56%|█████▋    | 153/271 [00:01<00:00, 143.08it/s]\u001b[A\n",
      " 62%|██████▏   | 169/271 [00:01<00:00, 146.93it/s]\u001b[A\n",
      " 69%|██████▊   | 186/271 [00:01<00:00, 151.43it/s]\u001b[A\n",
      " 75%|███████▍  | 202/271 [00:01<00:00, 152.98it/s]\u001b[A\n",
      " 80%|████████  | 218/271 [00:01<00:00, 154.89it/s]\u001b[A\n",
      " 86%|████████▋ | 234/271 [00:01<00:00, 152.94it/s]\u001b[A\n",
      " 92%|█████████▏| 250/271 [00:01<00:00, 153.95it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 148.37it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 82%|████████▏ | 2502/3047 [00:55<01:19,  6.87it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.29364341085271317\n",
      "f1: 0.6971475329149783\n",
      "Test Loss: 0.007925, Acc: 0.756342\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 98%|█████████▊| 2997/3047 [01:03<00:00, 60.23it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  7%|▋         | 18/271 [00:00<00:01, 171.77it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 3000, loss: 0.3267\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 36/271 [00:00<00:01, 172.26it/s]\u001b[A\n",
      " 20%|█▉        | 54/271 [00:00<00:01, 172.68it/s]\u001b[A\n",
      " 25%|██▌       | 69/271 [00:00<00:01, 161.30it/s]\u001b[A\n",
      " 30%|███       | 82/271 [00:00<00:01, 145.36it/s]\u001b[A\n",
      " 37%|███▋      | 99/271 [00:00<00:01, 151.04it/s]\u001b[A\n",
      " 42%|████▏     | 115/271 [00:00<00:01, 153.08it/s]\u001b[A\n",
      " 49%|████▊     | 132/271 [00:00<00:00, 157.04it/s]\u001b[A\n",
      " 55%|█████▍    | 149/271 [00:00<00:00, 159.73it/s]\u001b[A\n",
      " 62%|██████▏   | 167/271 [00:01<00:00, 163.73it/s]\u001b[A\n",
      " 69%|██████▊   | 186/271 [00:01<00:00, 168.35it/s]\u001b[A\n",
      " 75%|███████▍  | 203/271 [00:01<00:00, 168.37it/s]\u001b[A\n",
      " 82%|████████▏ | 221/271 [00:01<00:00, 169.88it/s]\u001b[A\n",
      " 88%|████████▊ | 238/271 [00:01<00:00, 169.36it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 164.17it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 99%|█████████▊| 3004/3047 [01:06<00:05,  7.46it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.29333333333333333\n",
      "f1: 0.7232211148025142\n",
      "Test Loss: 0.007923, Acc: 0.758072\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 3047/3047 [01:06<00:00, 45.67it/s]\n",
      "  6%|▋         | 17/271 [00:00<00:01, 162.74it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 271/271 [00:01<00:00, 165.89it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/3047 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2834108527131783\n",
      "f1: 0.6805359911406467\n",
      "Test Loss: 0.008000, Acc: 0.754267\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 16%|█▋        | 497/3047 [00:08<00:43, 58.24it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▌         | 15/271 [00:00<00:01, 143.97it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.4568\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█         | 29/271 [00:00<00:01, 142.52it/s]\u001b[A\n",
      " 16%|█▌        | 44/271 [00:00<00:01, 143.05it/s]\u001b[A\n",
      " 22%|██▏       | 59/271 [00:00<00:01, 143.35it/s]\u001b[A\n",
      " 27%|██▋       | 74/271 [00:00<00:01, 143.13it/s]\u001b[A\n",
      " 33%|███▎      | 89/271 [00:00<00:01, 143.36it/s]\u001b[A\n",
      " 39%|███▊      | 105/271 [00:00<00:01, 145.40it/s]\u001b[A\n",
      " 44%|████▍     | 120/271 [00:00<00:01, 145.92it/s]\u001b[A\n",
      " 50%|████▉     | 135/271 [00:00<00:00, 146.13it/s]\u001b[A\n",
      " 55%|█████▌    | 150/271 [00:01<00:00, 146.38it/s]\u001b[A\n",
      " 61%|██████▏   | 166/271 [00:01<00:00, 149.44it/s]\u001b[A\n",
      " 67%|██████▋   | 182/271 [00:01<00:00, 151.14it/s]\u001b[A\n",
      " 73%|███████▎  | 198/271 [00:01<00:00, 152.86it/s]\u001b[A\n",
      " 79%|███████▉  | 214/271 [00:01<00:00, 152.37it/s]\u001b[A\n",
      " 85%|████████▍ | 230/271 [00:01<00:00, 151.35it/s]\u001b[A\n",
      " 91%|█████████ | 246/271 [00:01<00:00, 151.63it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 148.71it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 17%|█▋        | 503/3047 [00:11<07:04,  5.99it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2951937984496124\n",
      "f1: 0.7238580041835893\n",
      "Test Loss: 0.007884, Acc: 0.758418\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 33%|███▎      | 996/3047 [00:18<00:28, 72.69it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 17/271 [00:00<00:01, 161.57it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.464\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█         | 29/271 [00:00<00:01, 130.24it/s]\u001b[A\n",
      " 16%|█▌        | 43/271 [00:00<00:01, 131.64it/s]\u001b[A\n",
      " 22%|██▏       | 59/271 [00:00<00:01, 137.55it/s]\u001b[A\n",
      " 28%|██▊       | 75/271 [00:00<00:01, 141.93it/s]\u001b[A\n",
      " 33%|███▎      | 90/271 [00:00<00:01, 141.30it/s]\u001b[A\n",
      " 39%|███▊      | 105/271 [00:00<00:01, 141.67it/s]\u001b[A\n",
      " 45%|████▍     | 121/271 [00:00<00:01, 145.50it/s]\u001b[A\n",
      " 50%|█████     | 136/271 [00:00<00:00, 146.71it/s]\u001b[A\n",
      " 56%|█████▌    | 152/271 [00:01<00:00, 149.58it/s]\u001b[A\n",
      " 62%|██████▏   | 168/271 [00:01<00:00, 152.39it/s]\u001b[A\n",
      " 68%|██████▊   | 184/271 [00:01<00:00, 154.19it/s]\u001b[A\n",
      " 74%|███████▍  | 200/271 [00:01<00:00, 155.15it/s]\u001b[A\n",
      " 80%|████████  | 217/271 [00:01<00:00, 156.62it/s]\u001b[A\n",
      " 86%|████████▌ | 233/271 [00:01<00:00, 155.89it/s]\u001b[A\n",
      " 92%|█████████▏| 249/271 [00:01<00:00, 156.73it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 148.51it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 33%|███▎      | 1004/3047 [00:21<04:13,  8.06it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2924031007751938\n",
      "f1: 0.701091423649567\n",
      "Test Loss: 0.007970, Acc: 0.758187\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 49%|████▉     | 1495/3047 [00:29<00:24, 63.36it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 17/271 [00:00<00:01, 166.65it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.3848\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 34/271 [00:00<00:01, 165.74it/s]\u001b[A\n",
      " 18%|█▊        | 50/271 [00:00<00:01, 163.31it/s]\u001b[A\n",
      " 25%|██▍       | 67/271 [00:00<00:01, 163.62it/s]\u001b[A\n",
      " 31%|███       | 84/271 [00:00<00:01, 164.62it/s]\u001b[A\n",
      " 37%|███▋      | 101/271 [00:00<00:01, 165.15it/s]\u001b[A\n",
      " 44%|████▎     | 118/271 [00:00<00:00, 164.07it/s]\u001b[A\n",
      " 50%|████▉     | 135/271 [00:00<00:00, 163.42it/s]\u001b[A\n",
      " 56%|█████▌    | 152/271 [00:00<00:00, 163.92it/s]\u001b[A\n",
      " 62%|██████▏   | 169/271 [00:01<00:00, 164.98it/s]\u001b[A\n",
      " 69%|██████▊   | 186/271 [00:01<00:00, 165.06it/s]\u001b[A\n",
      " 75%|███████▍  | 203/271 [00:01<00:00, 166.33it/s]\u001b[A\n",
      " 81%|████████  | 220/271 [00:01<00:00, 166.48it/s]\u001b[A\n",
      " 87%|████████▋ | 237/271 [00:01<00:00, 165.77it/s]\u001b[A\n",
      " 94%|█████████▎| 254/271 [00:01<00:00, 165.76it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 164.63it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 49%|████▉     | 1502/3047 [00:32<03:25,  7.50it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2924031007751938\n",
      "f1: 0.7032238218284775\n",
      "Test Loss: 0.007934, Acc: 0.757957\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 65%|██████▌   | 1992/3047 [00:39<00:14, 73.67it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 17/271 [00:00<00:01, 166.32it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.5124\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 34/271 [00:00<00:01, 166.16it/s]\u001b[A\n",
      " 19%|█▉        | 51/271 [00:00<00:01, 166.06it/s]\u001b[A\n",
      " 25%|██▌       | 68/271 [00:00<00:01, 164.55it/s]\u001b[A\n",
      " 31%|███▏      | 85/271 [00:00<00:01, 165.18it/s]\u001b[A\n",
      " 38%|███▊      | 102/271 [00:00<00:01, 159.75it/s]\u001b[A\n",
      " 43%|████▎     | 117/271 [00:00<00:01, 153.34it/s]\u001b[A\n",
      " 49%|████▉     | 133/271 [00:00<00:00, 154.75it/s]\u001b[A\n",
      " 55%|█████▌    | 150/271 [00:00<00:00, 156.93it/s]\u001b[A\n",
      " 62%|██████▏   | 167/271 [00:01<00:00, 160.36it/s]\u001b[A\n",
      " 68%|██████▊   | 184/271 [00:01<00:00, 162.14it/s]\u001b[A\n",
      " 74%|███████▍  | 201/271 [00:01<00:00, 161.86it/s]\u001b[A\n",
      " 80%|████████  | 218/271 [00:01<00:00, 162.84it/s]\u001b[A\n",
      " 87%|████████▋ | 235/271 [00:01<00:00, 163.46it/s]\u001b[A\n",
      " 93%|█████████▎| 253/271 [00:01<00:00, 165.88it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 162.06it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n",
      "num: 17344\n",
      "n: 3225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 66%|██████▌   | 2000/3047 [00:42<02:04,  8.44it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2886821705426357\n",
      "f1: 0.7133842746400932\n",
      "Test Loss: 0.007908, Acc: 0.755996\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 82%|████████▏ | 2499/3047 [00:50<00:08, 62.36it/s]\n",
      "  0%|          | 0/271 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▌         | 15/271 [00:00<00:01, 148.57it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.4578\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█         | 30/271 [00:00<00:01, 148.11it/s]\u001b[A\n",
      " 17%|█▋        | 45/271 [00:00<00:01, 147.65it/s]\u001b[A\n",
      " 23%|██▎       | 61/271 [00:00<00:01, 147.08it/s]\u001b[A\n",
      " 28%|██▊       | 76/271 [00:00<00:01, 147.30it/s]\u001b[A\n",
      " 34%|███▍      | 92/271 [00:00<00:01, 150.29it/s]\u001b[A\n",
      " 40%|███▉      | 108/271 [00:00<00:01, 151.23it/s]\u001b[A\n",
      " 45%|████▌     | 123/271 [00:00<00:00, 150.58it/s]\u001b[A\n",
      " 51%|█████▏    | 139/271 [00:00<00:00, 150.83it/s]\u001b[A\n",
      " 57%|█████▋    | 155/271 [00:01<00:00, 153.13it/s]\u001b[A\n",
      " 63%|██████▎   | 171/271 [00:01<00:00, 154.99it/s]\u001b[A\n",
      " 69%|██████▉   | 187/271 [00:01<00:00, 154.75it/s]\u001b[A\n",
      " 75%|███████▌  | 204/271 [00:01<00:00, 156.76it/s]\u001b[A\n",
      " 81%|████████  | 220/271 [00:01<00:00, 157.10it/s]\u001b[A\n",
      " 87%|████████▋ | 236/271 [00:01<00:00, 135.99it/s]\u001b[A\n",
      " 93%|█████████▎| 253/271 [00:01<00:00, 143.02it/s]\u001b[A\n",
      "100%|██████████| 271/271 [00:01<00:00, 149.26it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 17344 dev_label_list: 17344 example_id_list: 17344\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 82%|████████▏ | 2499/3047 [00:59<00:12, 42.29it/s]\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-105-5240e1104d98>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m#sent8verb3\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0mepoch_num\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mtrain_code_confing\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_loader\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdev_loader\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mepoch_num\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m<ipython-input-102-36cf930af26f>\u001b[0m in \u001b[0;36mtrain_code_confing\u001b[0;34m(train_loader, dev_loader, epoch_num)\u001b[0m\n\u001b[1;32m     81\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0mepoch\u001b[0m\u001b[0;34m%\u001b[0m\u001b[0;36m500\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     82\u001b[0m                     \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'epoch: {}, loss: {:.4}'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mepoch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 83\u001b[0;31m                     \u001b[0meval_code_confing\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdev_loader\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     84\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     85\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m<ipython-input-104-f4090bbb5068>\u001b[0m in \u001b[0;36meval_code_confing\u001b[0;34m(dev_loader)\u001b[0m\n\u001b[1;32m     91\u001b[0m     \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"predict_label_list:\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpredict_label_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\"dev_label_list:\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdev_label_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\"example_id_list:\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexample_id_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     92\u001b[0m     \u001b[0msent_num\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdev_loader\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0;36m64\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 93\u001b[0;31m     \u001b[0mcontext_exact\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mexact_example_num\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpredict_label_list\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdev_label_list\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mexample_id_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     94\u001b[0m     print('Test Loss: {:.6f}, Acc: {:.6f}'.format(\n\u001b[1;32m     95\u001b[0m         \u001b[0meval_loss\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0msent_num\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m<ipython-input-103-b8a0024c5115>\u001b[0m in \u001b[0;36mexact_example_num\u001b[0;34m(predict_label_list, dev_label_list, example_id_list)\u001b[0m\n\u001b[1;32m      3\u001b[0m     \u001b[0;31m#predict_label_list = predict_label_list.detach().cpu()\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m     \u001b[0mexample_id_list\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdetach\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcpu\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mexample_id_list\u001b[0m \u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m     \u001b[0mpredict_label_list\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdetach\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcpu\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mpredict_label_list\u001b[0m \u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      6\u001b[0m     \u001b[0mdev_label_list\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdetach\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcpu\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdev_label_list\u001b[0m \u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m<ipython-input-103-b8a0024c5115>\u001b[0m in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m      3\u001b[0m     \u001b[0;31m#predict_label_list = predict_label_list.detach().cpu()\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m     \u001b[0mexample_id_list\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdetach\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcpu\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mexample_id_list\u001b[0m \u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m     \u001b[0mpredict_label_list\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdetach\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcpu\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mpredict_label_list\u001b[0m \u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      6\u001b[0m     \u001b[0mdev_label_list\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdetach\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcpu\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdev_label_list\u001b[0m \u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "#sent8verb3\n",
    "epoch_num = 10\n",
    "train_code_confing(train_loader,dev_loader,epoch_num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 13%|█▎        | 495/3920 [00:08<00:56, 60.21it/s]\n",
      "  0%|          | 0/328 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▍         | 15/328 [00:00<00:02, 146.88it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.39\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  9%|▉         | 30/328 [00:00<00:02, 147.00it/s]\u001b[A\n",
      " 14%|█▍        | 46/328 [00:00<00:01, 148.07it/s]\u001b[A\n",
      " 19%|█▊        | 61/328 [00:00<00:01, 147.41it/s]\u001b[A\n",
      " 23%|██▎       | 76/328 [00:00<00:01, 145.42it/s]\u001b[A\n",
      " 27%|██▋       | 90/328 [00:00<00:01, 143.32it/s]\u001b[A\n",
      " 32%|███▏      | 105/328 [00:00<00:01, 143.15it/s]\u001b[A\n",
      " 37%|███▋      | 120/328 [00:00<00:01, 143.02it/s]\u001b[A\n",
      " 41%|████▏     | 136/328 [00:00<00:01, 145.69it/s]\u001b[A\n",
      " 46%|████▌     | 151/328 [00:01<00:01, 144.34it/s]\u001b[A\n",
      " 51%|█████     | 166/328 [00:01<00:01, 143.15it/s]\u001b[A\n",
      " 55%|█████▌    | 181/328 [00:01<00:01, 142.62it/s]\u001b[A\n",
      " 60%|█████▉    | 196/328 [00:01<00:00, 141.95it/s]\u001b[A\n",
      " 64%|██████▍   | 211/328 [00:01<00:00, 141.70it/s]\u001b[A\n",
      " 69%|██████▉   | 226/328 [00:01<00:00, 139.92it/s]\u001b[A\n",
      " 73%|███████▎  | 241/328 [00:01<00:00, 142.01it/s]\u001b[A\n",
      " 78%|███████▊  | 256/328 [00:01<00:00, 141.53it/s]\u001b[A\n",
      " 83%|████████▎ | 271/328 [00:01<00:00, 137.89it/s]\u001b[A\n",
      " 87%|████████▋ | 285/328 [00:02<00:00, 135.03it/s]\u001b[A\n",
      " 91%|█████████ | 299/328 [00:02<00:00, 132.89it/s]\u001b[A\n",
      " 95%|█████████▌| 313/328 [00:02<00:00, 133.74it/s]\u001b[A\n",
      "100%|██████████| 328/328 [00:02<00:00, 140.51it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20992 dev_label_list: 20992 example_id_list: 20992\n",
      "num: 20992\n",
      "n: 4075\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 13%|█▎        | 502/3920 [00:12<09:53,  5.76it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.26969325153374235\n",
      "f1: 0.7334973220372082\n",
      "Test Loss: 0.008367, Acc: 0.743521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 25%|██▌       | 997/3920 [00:19<00:45, 64.37it/s]\n",
      "  0%|          | 0/328 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▌         | 17/328 [00:00<00:01, 164.29it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.5023\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 10%|█         | 34/328 [00:00<00:01, 165.00it/s]\u001b[A\n",
      " 16%|█▌        | 51/328 [00:00<00:01, 165.77it/s]\u001b[A\n",
      " 21%|██        | 68/328 [00:00<00:01, 165.06it/s]\u001b[A\n",
      " 26%|██▌       | 85/328 [00:00<00:01, 165.45it/s]\u001b[A\n",
      " 31%|███       | 102/328 [00:00<00:01, 165.91it/s]\u001b[A\n",
      " 36%|███▋      | 119/328 [00:00<00:01, 166.82it/s]\u001b[A\n",
      " 41%|████      | 135/328 [00:00<00:01, 152.33it/s]\u001b[A\n",
      " 46%|████▌     | 151/328 [00:00<00:01, 153.74it/s]\u001b[A\n",
      " 51%|█████     | 166/328 [00:01<00:01, 142.64it/s]\u001b[A\n",
      " 55%|█████▌    | 181/328 [00:01<00:01, 143.69it/s]\u001b[A\n",
      " 60%|█████▉    | 196/328 [00:01<00:00, 144.98it/s]\u001b[A\n",
      " 65%|██████▍   | 212/328 [00:01<00:00, 147.75it/s]\u001b[A\n",
      " 70%|██████▉   | 229/328 [00:01<00:00, 151.53it/s]\u001b[A\n",
      " 75%|███████▌  | 246/328 [00:01<00:00, 154.84it/s]\u001b[A\n",
      " 80%|███████▉  | 262/328 [00:01<00:00, 155.49it/s]\u001b[A\n",
      " 85%|████████▌ | 279/328 [00:01<00:00, 156.92it/s]\u001b[A\n",
      " 90%|████████▉ | 295/328 [00:01<00:00, 156.78it/s]\u001b[A\n",
      " 95%|█████████▍| 311/328 [00:01<00:00, 157.22it/s]\u001b[A\n",
      "100%|██████████| 328/328 [00:02<00:00, 155.68it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20992 dev_label_list: 20992 example_id_list: 20992\n",
      "num: 20992\n",
      "n: 4075\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 26%|██▌       | 1004/3920 [00:23<07:59,  6.08it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2809815950920245\n",
      "f1: 0.7204109455643286\n",
      "Test Loss: 0.008093, Acc: 0.751715\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 38%|███▊      | 1493/3920 [00:31<00:41, 59.18it/s]\n",
      "  0%|          | 0/328 [00:00<?, ?it/s]\u001b[A\n",
      "  3%|▎         | 9/328 [00:00<00:03, 83.54it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.4985\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 25/328 [00:00<00:03, 96.97it/s]\u001b[A\n",
      " 12%|█▏        | 40/328 [00:00<00:02, 108.13it/s]\u001b[A\n",
      " 17%|█▋        | 56/328 [00:00<00:02, 118.65it/s]\u001b[A\n",
      " 22%|██▏       | 71/328 [00:00<00:02, 126.23it/s]\u001b[A\n",
      " 26%|██▌       | 86/328 [00:00<00:01, 131.81it/s]\u001b[A\n",
      " 31%|███       | 102/328 [00:00<00:01, 137.46it/s]\u001b[A\n",
      " 36%|███▌      | 118/328 [00:00<00:01, 141.68it/s]\u001b[A\n",
      " 41%|████      | 134/328 [00:00<00:01, 145.92it/s]\u001b[A\n",
      " 46%|████▌     | 150/328 [00:01<00:01, 148.71it/s]\u001b[A\n",
      " 51%|█████     | 166/328 [00:01<00:01, 149.88it/s]\u001b[A\n",
      " 55%|█████▌    | 182/328 [00:01<00:00, 150.61it/s]\u001b[A\n",
      " 60%|██████    | 198/328 [00:01<00:00, 152.91it/s]\u001b[A\n",
      " 65%|██████▌   | 214/328 [00:01<00:00, 154.19it/s]\u001b[A\n",
      " 70%|███████   | 230/328 [00:01<00:00, 154.62it/s]\u001b[A\n",
      " 75%|███████▌  | 246/328 [00:01<00:00, 156.14it/s]\u001b[A\n",
      " 80%|████████  | 263/328 [00:01<00:00, 157.73it/s]\u001b[A\n",
      " 85%|████████▌ | 279/328 [00:01<00:00, 158.32it/s]\u001b[A\n",
      " 90%|████████▉ | 295/328 [00:01<00:00, 157.16it/s]\u001b[A\n",
      " 95%|█████████▍| 311/328 [00:02<00:00, 156.49it/s]\u001b[A\n",
      "100%|██████████| 328/328 [00:02<00:00, 151.25it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20992 dev_label_list: 20992 example_id_list: 20992\n",
      "num: 20992\n",
      "n: 4075\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 38%|███▊      | 1500/3920 [00:35<06:44,  5.98it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2878527607361963\n",
      "f1: 0.708590709903601\n",
      "Test Loss: 0.008097, Acc: 0.753239\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 51%|█████     | 1995/3920 [00:43<00:32, 59.61it/s]\n",
      "  0%|          | 0/328 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▍         | 16/328 [00:00<00:01, 156.33it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.4557\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 10%|▉         | 32/328 [00:00<00:01, 155.80it/s]\u001b[A\n",
      " 51%|█████     | 1995/3920 [00:54<00:32, 59.61it/s]A\n",
      " 13%|█▎        | 44/328 [00:11<01:20,  3.51it/s] \u001b[A\n",
      " 14%|█▎        | 45/328 [00:12<01:50,  2.56it/s]\u001b[A\n",
      " 17%|█▋        | 57/328 [00:12<01:14,  3.62it/s]\u001b[A\n",
      " 21%|██        | 69/328 [00:12<00:50,  5.11it/s]\u001b[A\n",
      " 25%|██▍       | 81/328 [00:12<00:34,  7.16it/s]\u001b[A\n",
      " 29%|██▉       | 95/328 [00:12<00:23, 10.01it/s]\u001b[A\n",
      " 33%|███▎      | 109/328 [00:12<00:15, 13.86it/s]\u001b[A\n",
      " 38%|███▊      | 123/328 [00:12<00:10, 19.00it/s]\u001b[A\n",
      " 42%|████▏     | 138/328 [00:12<00:07, 25.61it/s]\u001b[A\n",
      " 46%|████▌     | 151/328 [00:12<00:05, 33.36it/s]\u001b[A\n",
      " 50%|█████     | 165/328 [00:12<00:03, 43.22it/s]\u001b[A\n",
      " 55%|█████▍    | 179/328 [00:13<00:02, 54.36it/s]\u001b[A\n",
      " 59%|█████▉    | 193/328 [00:13<00:02, 66.53it/s]\u001b[A\n",
      " 63%|██████▎   | 208/328 [00:13<00:01, 79.17it/s]\u001b[A\n",
      " 68%|██████▊   | 223/328 [00:13<00:01, 91.71it/s]\u001b[A\n",
      " 73%|███████▎  | 239/328 [00:13<00:00, 105.14it/s]\u001b[A\n",
      " 78%|███████▊  | 255/328 [00:13<00:00, 116.41it/s]\u001b[A\n",
      " 82%|████████▏ | 270/328 [00:13<00:00, 123.94it/s]\u001b[A\n",
      " 87%|████████▋ | 285/328 [00:13<00:00, 129.11it/s]\u001b[A\n",
      " 91%|█████████▏| 300/328 [00:13<00:00, 133.98it/s]\u001b[A\n",
      "100%|██████████| 328/328 [00:14<00:00, 23.27it/s] \u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20992 dev_label_list: 20992 example_id_list: 20992\n",
      "num: 20992\n",
      "n: 4075\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 51%|█████     | 2000/3920 [00:59<30:42,  1.04it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.28049079754601225\n",
      "f1: 0.6829144025708503\n",
      "Test Loss: 0.008187, Acc: 0.747237\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 64%|██████▎   | 2496/3920 [01:07<00:24, 58.16it/s]\n",
      "  0%|          | 0/328 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▍         | 15/328 [00:00<00:02, 145.04it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.5742\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  9%|▉         | 30/328 [00:00<00:02, 144.81it/s]\u001b[A\n",
      " 14%|█▎        | 45/328 [00:00<00:01, 144.69it/s]\u001b[A\n",
      " 18%|█▊        | 60/328 [00:00<00:01, 145.21it/s]\u001b[A\n",
      " 23%|██▎       | 76/328 [00:00<00:01, 146.83it/s]\u001b[A\n",
      " 28%|██▊       | 91/328 [00:00<00:01, 147.57it/s]\u001b[A\n",
      " 33%|███▎      | 107/328 [00:00<00:01, 150.08it/s]\u001b[A\n",
      " 38%|███▊      | 123/328 [00:00<00:01, 151.77it/s]\u001b[A\n",
      " 42%|████▏     | 139/328 [00:00<00:01, 153.98it/s]\u001b[A\n",
      " 47%|████▋     | 155/328 [00:01<00:01, 155.26it/s]\u001b[A\n",
      " 52%|█████▏    | 171/328 [00:01<00:01, 155.54it/s]\u001b[A\n",
      " 57%|█████▋    | 187/328 [00:01<00:00, 155.62it/s]\u001b[A\n",
      " 62%|██████▏   | 203/328 [00:01<00:00, 156.24it/s]\u001b[A\n",
      " 67%|██████▋   | 219/328 [00:01<00:00, 156.02it/s]\u001b[A\n",
      " 72%|███████▏  | 236/328 [00:01<00:00, 158.38it/s]\u001b[A\n",
      " 77%|███████▋  | 253/328 [00:01<00:00, 160.92it/s]\u001b[A\n",
      " 82%|████████▏ | 270/328 [00:01<00:00, 161.20it/s]\u001b[A\n",
      " 88%|████████▊ | 287/328 [00:01<00:00, 161.81it/s]\u001b[A\n",
      " 93%|█████████▎| 304/328 [00:01<00:00, 162.06it/s]\u001b[A\n",
      "100%|██████████| 328/328 [00:02<00:00, 155.31it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20992 dev_label_list: 20992 example_id_list: 20992\n",
      "num: 20992\n",
      "n: 4075\n",
      "em: 0.28368098159509203\n",
      "f1: 0.7002668932975747\n",
      "Test Loss: 0.008141, Acc: 0.749714\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 76%|███████▋  | 2997/3920 [01:18<00:14, 65.41it/s]\n",
      "  0%|          | 0/328 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 14/328 [00:00<00:02, 137.97it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 3000, loss: 0.6858\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  9%|▉         | 30/328 [00:00<00:02, 141.83it/s]\u001b[A\n",
      " 14%|█▍        | 47/328 [00:00<00:01, 147.31it/s]\u001b[A\n",
      " 19%|█▉        | 63/328 [00:00<00:01, 149.78it/s]\u001b[A\n",
      " 24%|██▍       | 80/328 [00:00<00:01, 153.57it/s]\u001b[A\n",
      " 29%|██▊       | 94/328 [00:00<00:01, 132.37it/s]\u001b[A\n",
      " 34%|███▎      | 110/328 [00:00<00:01, 138.52it/s]\u001b[A\n",
      " 39%|███▊      | 127/328 [00:00<00:01, 146.30it/s]\u001b[A\n",
      " 44%|████▍     | 144/328 [00:00<00:01, 152.06it/s]\u001b[A\n",
      " 49%|████▉     | 161/328 [00:01<00:01, 155.76it/s]\u001b[A\n",
      " 54%|█████▍    | 178/328 [00:01<00:00, 158.83it/s]\u001b[A\n",
      " 59%|█████▉    | 195/328 [00:01<00:00, 160.65it/s]\u001b[A\n",
      " 65%|██████▍   | 212/328 [00:01<00:00, 162.28it/s]\u001b[A\n",
      " 70%|██████▉   | 229/328 [00:01<00:00, 160.06it/s]\u001b[A\n",
      " 75%|███████▌  | 247/328 [00:01<00:00, 163.19it/s]\u001b[A\n",
      " 81%|████████  | 265/328 [00:01<00:00, 165.94it/s]\u001b[A\n",
      " 86%|████████▌ | 282/328 [00:01<00:00, 165.31it/s]\u001b[A\n",
      " 91%|█████████ | 299/328 [00:01<00:00, 165.50it/s]\u001b[A\n",
      "100%|██████████| 328/328 [00:02<00:00, 157.73it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20992 dev_label_list: 20992 example_id_list: 20992\n",
      "num: 20992\n",
      "n: 4075\n",
      "em: 0.2792638036809816\n",
      "f1: 0.6824354854416259\n",
      "Test Loss: 0.008314, Acc: 0.747618\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 89%|████████▉ | 3495/3920 [01:30<00:06, 62.21it/s]\n",
      "  0%|          | 0/328 [00:00<?, ?it/s]\u001b[A\n",
      "  3%|▎         | 11/328 [00:00<00:03, 105.56it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 3500, loss: 0.5982\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 22/328 [00:00<00:02, 105.58it/s]\u001b[A\n",
      " 10%|▉         | 32/328 [00:00<00:02, 102.75it/s]\u001b[A\n",
      " 14%|█▎        | 45/328 [00:00<00:02, 108.97it/s]\u001b[A\n",
      " 18%|█▊        | 58/328 [00:00<00:02, 112.89it/s]\u001b[A\n",
      " 22%|██▏       | 71/328 [00:00<00:02, 116.16it/s]\u001b[A\n",
      " 26%|██▌       | 84/328 [00:00<00:02, 117.50it/s]\u001b[A\n",
      " 30%|██▉       | 98/328 [00:00<00:01, 121.88it/s]\u001b[A\n",
      " 34%|███▍      | 111/328 [00:00<00:01, 123.95it/s]\u001b[A\n",
      " 38%|███▊      | 124/328 [00:01<00:01, 125.07it/s]\u001b[A\n",
      " 42%|████▏     | 138/328 [00:01<00:01, 127.59it/s]\u001b[A\n",
      " 46%|████▌     | 151/328 [00:01<00:01, 127.10it/s]\u001b[A\n",
      " 50%|█████     | 164/328 [00:01<00:01, 127.17it/s]\u001b[A\n",
      " 54%|█████▍    | 177/328 [00:01<00:01, 108.97it/s]\u001b[A\n",
      " 58%|█████▊    | 191/328 [00:01<00:01, 114.96it/s]\u001b[A\n",
      " 62%|██████▎   | 205/328 [00:01<00:01, 119.46it/s]\u001b[A\n",
      " 67%|██████▋   | 219/328 [00:01<00:00, 121.70it/s]\u001b[A\n",
      " 71%|███████   | 232/328 [00:01<00:00, 123.78it/s]\u001b[A\n",
      " 75%|███████▍  | 245/328 [00:02<00:00, 114.90it/s]\u001b[A\n",
      " 79%|███████▊  | 258/328 [00:02<00:00, 118.54it/s]\u001b[A\n",
      " 83%|████████▎ | 272/328 [00:02<00:00, 122.13it/s]\u001b[A\n",
      " 87%|████████▋ | 285/328 [00:02<00:00, 122.29it/s]\u001b[A\n",
      " 91%|█████████ | 298/328 [00:02<00:00, 122.17it/s]\u001b[A\n",
      " 95%|█████████▍| 311/328 [00:02<00:00, 124.13it/s]\u001b[A\n",
      "100%|██████████| 328/328 [00:02<00:00, 119.98it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20992 dev_label_list: 20992 example_id_list: 20992\n",
      "num: 20992\n",
      "n: 4075\n",
      "em: 0.27680981595092025\n",
      "f1: 0.7409905540948597\n",
      "Test Loss: 0.008327, Acc: 0.750572\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 3920/3920 [01:41<00:00, 38.58it/s]\n",
      "  4%|▎         | 12/328 [00:00<00:02, 119.69it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 328/328 [00:02<00:00, 130.45it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20992 dev_label_list: 20992 example_id_list: 20992\n",
      "num: 20992\n",
      "n: 4075\n",
      "em: 0.2623312883435583\n",
      "f1: 0.6551243548544202\n",
      "Test Loss: 0.008261, Acc: 0.741139\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 13%|█▎        | 499/3920 [00:08<00:50, 68.02it/s]\n",
      "  0%|          | 0/328 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 12/328 [00:00<00:02, 117.02it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.3671\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 23/328 [00:00<00:02, 113.09it/s]\u001b[A\n",
      " 10%|█         | 34/328 [00:00<00:02, 110.08it/s]\u001b[A\n",
      " 15%|█▍        | 49/328 [00:00<00:02, 117.78it/s]\u001b[A\n",
      " 19%|█▉        | 63/328 [00:00<00:02, 122.10it/s]\u001b[A\n",
      " 23%|██▎       | 76/328 [00:00<00:02, 122.21it/s]\u001b[A\n",
      " 27%|██▋       | 90/328 [00:00<00:01, 126.07it/s]\u001b[A\n",
      " 32%|███▏      | 104/328 [00:00<00:01, 128.86it/s]\u001b[A\n",
      " 36%|███▌      | 117/328 [00:00<00:01, 128.97it/s]\u001b[A\n",
      " 40%|███▉      | 130/328 [00:01<00:01, 128.95it/s]\u001b[A\n",
      " 44%|████▍     | 144/328 [00:01<00:01, 129.57it/s]\u001b[A\n",
      " 48%|████▊     | 157/328 [00:01<00:01, 127.56it/s]\u001b[A\n",
      " 52%|█████▏    | 171/328 [00:01<00:01, 129.52it/s]\u001b[A\n",
      " 56%|█████▌    | 184/328 [00:01<00:01, 129.00it/s]\u001b[A\n",
      " 60%|██████    | 198/328 [00:01<00:00, 130.70it/s]\u001b[A\n",
      " 65%|██████▍   | 212/328 [00:01<00:00, 132.08it/s]\u001b[A\n",
      " 69%|██████▉   | 227/328 [00:01<00:00, 134.57it/s]\u001b[A\n",
      " 74%|███████▍  | 242/328 [00:01<00:00, 135.70it/s]\u001b[A\n",
      " 78%|███████▊  | 256/328 [00:01<00:00, 136.80it/s]\u001b[A\n",
      " 82%|████████▏ | 270/328 [00:02<00:00, 136.77it/s]\u001b[A\n",
      " 87%|████████▋ | 284/328 [00:02<00:00, 134.89it/s]\u001b[A\n",
      " 91%|█████████ | 298/328 [00:02<00:00, 134.47it/s]\u001b[A\n",
      " 95%|█████████▌| 312/328 [00:02<00:00, 135.12it/s]\u001b[A\n",
      "100%|██████████| 328/328 [00:02<00:00, 129.82it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20992 dev_label_list: 20992 example_id_list: 20992\n",
      "num: 20992\n",
      "n: 4075\n",
      "em: 0.29030674846625765\n",
      "f1: 0.7202898750874294\n",
      "Test Loss: 0.008003, Acc: 0.753668\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 25%|██▌       | 996/3920 [00:20<00:51, 56.55it/s]\n",
      "  0%|          | 0/328 [00:00<?, ?it/s]\u001b[A\n",
      "  3%|▎         | 9/328 [00:00<00:03, 83.74it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.5041\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  5%|▌         | 18/328 [00:00<00:03, 84.41it/s]\u001b[A\n",
      "  8%|▊         | 27/328 [00:00<00:03, 84.92it/s]\u001b[A\n",
      " 11%|█         | 35/328 [00:00<00:03, 81.66it/s]\u001b[A\n",
      " 15%|█▍        | 49/328 [00:00<00:03, 92.45it/s]\u001b[A\n",
      " 19%|█▉        | 63/328 [00:00<00:02, 101.22it/s]\u001b[A\n",
      " 23%|██▎       | 77/328 [00:00<00:02, 109.46it/s]\u001b[A\n",
      " 28%|██▊       | 92/328 [00:00<00:02, 117.06it/s]\u001b[A\n",
      " 32%|███▏      | 106/328 [00:00<00:01, 111.44it/s]\u001b[A\n",
      " 36%|███▋      | 119/328 [00:01<00:01, 115.69it/s]\u001b[A\n",
      " 41%|████      | 134/328 [00:01<00:01, 122.40it/s]\u001b[A\n",
      " 45%|████▌     | 148/328 [00:01<00:01, 126.80it/s]\u001b[A\n",
      " 50%|████▉     | 163/328 [00:01<00:01, 130.89it/s]\u001b[A\n",
      " 54%|█████▍    | 178/328 [00:01<00:01, 135.98it/s]\u001b[A\n",
      " 59%|█████▉    | 194/328 [00:01<00:00, 140.12it/s]\u001b[A\n",
      " 64%|██████▎   | 209/328 [00:01<00:00, 141.57it/s]\u001b[A\n",
      " 69%|██████▊   | 225/328 [00:01<00:00, 144.56it/s]\u001b[A\n",
      " 73%|███████▎  | 241/328 [00:01<00:00, 147.02it/s]\u001b[A\n",
      " 78%|███████▊  | 257/328 [00:02<00:00, 150.12it/s]\u001b[A\n",
      " 83%|████████▎ | 273/328 [00:02<00:00, 149.34it/s]\u001b[A\n",
      " 88%|████████▊ | 288/328 [00:02<00:00, 147.36it/s]\u001b[A\n",
      " 92%|█████████▏| 303/328 [00:02<00:00, 145.97it/s]\u001b[A\n",
      "100%|██████████| 328/328 [00:02<00:00, 129.49it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20992 dev_label_list: 20992 example_id_list: 20992\n",
      "num: 20992\n",
      "n: 4075\n",
      "em: 0.28294478527607364\n",
      "f1: 0.7198815853539855\n",
      "Test Loss: 0.008097, Acc: 0.752906\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 38%|███▊      | 1498/3920 [00:32<00:38, 62.99it/s]\n",
      "  0%|          | 0/328 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 12/328 [00:00<00:02, 115.94it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.512\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 24/328 [00:00<00:02, 115.17it/s]\u001b[A\n",
      " 11%|█▏        | 37/328 [00:00<00:02, 117.43it/s]\u001b[A\n",
      " 16%|█▌        | 53/328 [00:00<00:02, 126.18it/s]\u001b[A\n",
      " 20%|██        | 67/328 [00:00<00:02, 128.59it/s]\u001b[A\n",
      " 25%|██▌       | 82/328 [00:00<00:01, 132.57it/s]\u001b[A\n",
      " 30%|██▉       | 97/328 [00:00<00:01, 136.75it/s]\u001b[A\n",
      " 34%|███▍      | 112/328 [00:00<00:01, 139.18it/s]\u001b[A\n",
      " 39%|███▉      | 128/328 [00:00<00:01, 143.08it/s]\u001b[A\n",
      " 44%|████▍     | 144/328 [00:01<00:01, 147.04it/s]\u001b[A\n",
      " 48%|████▊     | 159/328 [00:01<00:01, 146.45it/s]\u001b[A\n",
      " 53%|█████▎    | 175/328 [00:01<00:01, 147.99it/s]\u001b[A\n",
      " 58%|█████▊    | 191/328 [00:01<00:00, 149.20it/s]\u001b[A\n",
      " 63%|██████▎   | 206/328 [00:01<00:00, 148.98it/s]\u001b[A\n",
      " 68%|██████▊   | 222/328 [00:01<00:00, 150.56it/s]\u001b[A\n",
      " 73%|███████▎  | 238/328 [00:01<00:00, 152.09it/s]\u001b[A\n",
      " 77%|███████▋  | 254/328 [00:01<00:00, 152.78it/s]\u001b[A\n",
      " 82%|████████▏ | 270/328 [00:01<00:00, 152.71it/s]\u001b[A\n",
      " 87%|████████▋ | 286/328 [00:01<00:00, 152.06it/s]\u001b[A\n",
      " 92%|█████████▏| 302/328 [00:02<00:00, 150.94it/s]\u001b[A\n",
      "100%|██████████| 328/328 [00:02<00:00, 144.76it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20992 dev_label_list: 20992 example_id_list: 20992\n",
      "num: 20992\n",
      "n: 4075\n",
      "em: 0.29226993865030676\n",
      "f1: 0.7138870386600517\n",
      "Test Loss: 0.008090, Acc: 0.754668\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 51%|█████     | 1993/3920 [00:44<00:30, 64.13it/s]\n",
      "  0%|          | 0/328 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 14/328 [00:00<00:02, 133.86it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.4514\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 26/328 [00:00<00:02, 123.11it/s]\u001b[A\n",
      " 11%|█         | 36/328 [00:00<00:02, 112.87it/s]\u001b[A\n",
      " 16%|█▌        | 52/328 [00:00<00:02, 122.05it/s]\u001b[A\n",
      " 20%|██        | 66/328 [00:00<00:02, 126.20it/s]\u001b[A\n",
      " 24%|██▍       | 80/328 [00:00<00:01, 128.40it/s]\u001b[A\n",
      " 29%|██▉       | 95/328 [00:00<00:01, 133.23it/s]\u001b[A\n",
      " 34%|███▎      | 110/328 [00:00<00:01, 136.25it/s]\u001b[A\n",
      " 38%|███▊      | 125/328 [00:00<00:01, 139.37it/s]\u001b[A\n",
      " 42%|████▏     | 139/328 [00:01<00:01, 139.16it/s]\u001b[A\n",
      " 47%|████▋     | 154/328 [00:01<00:01, 139.95it/s]\u001b[A\n",
      " 51%|█████     | 168/328 [00:01<00:01, 125.16it/s]\u001b[A\n",
      " 56%|█████▌    | 183/328 [00:01<00:01, 130.75it/s]\u001b[A\n",
      " 60%|██████    | 198/328 [00:01<00:00, 133.84it/s]\u001b[A\n",
      " 65%|██████▍   | 213/328 [00:01<00:00, 137.57it/s]\u001b[A\n",
      " 70%|██████▉   | 229/328 [00:01<00:00, 142.33it/s]\u001b[A\n",
      " 75%|███████▍  | 245/328 [00:01<00:00, 145.45it/s]\u001b[A\n",
      " 80%|███████▉  | 261/328 [00:01<00:00, 146.99it/s]\u001b[A\n",
      " 84%|████████▍ | 276/328 [00:02<00:00, 147.76it/s]\u001b[A\n",
      " 89%|████████▊ | 291/328 [00:02<00:00, 146.69it/s]\u001b[A\n",
      " 93%|█████████▎| 306/328 [00:02<00:00, 146.71it/s]\u001b[A\n",
      "100%|██████████| 328/328 [00:02<00:00, 136.71it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20992 dev_label_list: 20992 example_id_list: 20992\n",
      "num: 20992\n",
      "n: 4075\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 51%|█████     | 2000/3920 [00:48<05:47,  5.53it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2885889570552147\n",
      "f1: 0.7038147823546664\n",
      "Test Loss: 0.008093, Acc: 0.751000\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 64%|██████▎   | 2496/3920 [00:56<00:22, 61.96it/s]\n",
      "  0%|          | 0/328 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 12/328 [00:00<00:02, 118.76it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.5751\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 24/328 [00:00<00:02, 118.13it/s]\u001b[A\n",
      " 11%|█         | 35/328 [00:00<00:02, 113.31it/s]\u001b[A\n",
      " 15%|█▍        | 49/328 [00:00<00:02, 119.99it/s]\u001b[A\n",
      " 19%|█▉        | 62/328 [00:00<00:02, 121.53it/s]\u001b[A\n",
      " 23%|██▎       | 75/328 [00:00<00:02, 122.94it/s]\u001b[A\n",
      " 27%|██▋       | 89/328 [00:00<00:01, 125.17it/s]\u001b[A\n",
      " 31%|███▏      | 103/328 [00:00<00:01, 128.06it/s]\u001b[A\n",
      " 36%|███▌      | 117/328 [00:00<00:01, 129.05it/s]\u001b[A\n",
      " 40%|███▉      | 131/328 [00:01<00:01, 131.45it/s]\u001b[A\n",
      " 44%|████▍     | 145/328 [00:01<00:01, 132.36it/s]\u001b[A\n",
      " 48%|████▊     | 159/328 [00:01<00:01, 132.01it/s]\u001b[A\n",
      " 53%|█████▎    | 173/328 [00:01<00:01, 131.96it/s]\u001b[A\n",
      " 57%|█████▋    | 187/328 [00:01<00:01, 133.92it/s]\u001b[A\n",
      " 61%|██████▏   | 201/328 [00:01<00:00, 134.14it/s]\u001b[A\n",
      " 66%|██████▌   | 215/328 [00:01<00:00, 135.33it/s]\u001b[A\n",
      " 70%|███████   | 230/328 [00:01<00:00, 138.10it/s]\u001b[A\n",
      " 75%|███████▍  | 245/328 [00:01<00:00, 139.35it/s]\u001b[A\n",
      " 79%|███████▉  | 259/328 [00:01<00:00, 138.96it/s]\u001b[A\n",
      " 84%|████████▎ | 274/328 [00:02<00:00, 139.96it/s]\u001b[A\n",
      " 88%|████████▊ | 288/328 [00:02<00:00, 137.31it/s]\u001b[A\n",
      " 92%|█████████▏| 302/328 [00:02<00:00, 135.63it/s]\u001b[A\n",
      "100%|██████████| 328/328 [00:02<00:00, 131.37it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20992 dev_label_list: 20992 example_id_list: 20992\n",
      "num: 20992\n",
      "n: 4075\n",
      "em: 0.2917791411042945\n",
      "f1: 0.7125157977673384\n",
      "Test Loss: 0.008114, Acc: 0.753811\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 77%|███████▋  | 2999/3920 [01:08<00:14, 62.83it/s]\n",
      "  0%|          | 0/328 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▎         | 12/328 [00:00<00:02, 112.50it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 3000, loss: 0.6572\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  7%|▋         | 23/328 [00:00<00:02, 109.95it/s]\u001b[A\n",
      " 10%|█         | 33/328 [00:00<00:02, 106.11it/s]\u001b[A\n",
      " 15%|█▍        | 48/328 [00:00<00:02, 115.08it/s]\u001b[A\n",
      " 19%|█▉        | 62/328 [00:00<00:02, 121.19it/s]\u001b[A\n",
      " 23%|██▎       | 76/328 [00:00<00:02, 124.13it/s]\u001b[A\n",
      " 27%|██▋       | 88/328 [00:00<00:02, 110.29it/s]\u001b[A\n",
      " 31%|███       | 102/328 [00:00<00:01, 117.60it/s]\u001b[A\n",
      " 36%|███▌      | 117/328 [00:00<00:01, 123.70it/s]\u001b[A\n",
      " 40%|███▉      | 131/328 [00:01<00:01, 126.93it/s]\u001b[A\n",
      " 44%|████▍     | 145/328 [00:01<00:01, 129.93it/s]\u001b[A\n",
      " 49%|████▉     | 160/328 [00:01<00:01, 133.67it/s]\u001b[A\n",
      " 53%|█████▎    | 174/328 [00:04<00:10, 14.37it/s] \u001b[A\n",
      " 58%|█████▊    | 190/328 [00:04<00:06, 19.73it/s]\u001b[A\n",
      " 63%|██████▎   | 206/328 [00:04<00:04, 26.71it/s]\u001b[A\n",
      " 68%|██████▊   | 222/328 [00:04<00:02, 35.51it/s]\u001b[A\n",
      " 73%|███████▎  | 238/328 [00:04<00:01, 46.28it/s]\u001b[A\n",
      " 77%|███████▋  | 254/328 [00:04<00:01, 58.80it/s]\u001b[A\n",
      " 82%|████████▏ | 270/328 [00:04<00:00, 72.11it/s]\u001b[A\n",
      " 88%|████████▊ | 287/328 [00:05<00:00, 86.34it/s]\u001b[A\n",
      " 92%|█████████▏| 303/328 [00:05<00:00, 99.53it/s]\u001b[A\n",
      "100%|██████████| 328/328 [00:05<00:00, 62.15it/s] \u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20992 dev_label_list: 20992 example_id_list: 20992\n",
      "num: 20992\n",
      "n: 4075\n",
      "em: 0.28883435582822087\n",
      "f1: 0.7268974583698606\n",
      "Test Loss: 0.008030, Acc: 0.753763\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 89%|████████▉ | 3499/3920 [01:23<00:06, 68.12it/s]\n",
      "  0%|          | 0/328 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▍         | 15/328 [00:00<00:02, 148.88it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 3500, loss: 0.5852\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  9%|▉         | 30/328 [00:00<00:02, 147.25it/s]\u001b[A\n",
      " 14%|█▍        | 46/328 [00:00<00:01, 148.39it/s]\u001b[A\n",
      " 19%|█▉        | 62/328 [00:00<00:01, 150.00it/s]\u001b[A\n",
      " 24%|██▍       | 78/328 [00:00<00:01, 150.95it/s]\u001b[A\n",
      " 28%|██▊       | 93/328 [00:00<00:01, 150.23it/s]\u001b[A\n",
      " 33%|███▎      | 109/328 [00:00<00:01, 152.32it/s]\u001b[A\n",
      " 38%|███▊      | 124/328 [00:00<00:01, 151.42it/s]\u001b[A\n",
      " 43%|████▎     | 140/328 [00:00<00:01, 151.27it/s]\u001b[A\n",
      " 48%|████▊     | 156/328 [00:01<00:01, 152.28it/s]\u001b[A\n",
      " 53%|█████▎    | 173/328 [00:01<00:01, 154.64it/s]\u001b[A\n",
      " 58%|█████▊    | 189/328 [00:01<00:00, 155.63it/s]\u001b[A\n",
      " 62%|██████▎   | 205/328 [00:01<00:00, 155.17it/s]\u001b[A\n",
      " 67%|██████▋   | 221/328 [00:01<00:00, 155.29it/s]\u001b[A\n",
      " 72%|███████▏  | 237/328 [00:01<00:00, 155.91it/s]\u001b[A\n",
      " 77%|███████▋  | 253/328 [00:01<00:00, 153.99it/s]\u001b[A\n",
      " 82%|████████▏ | 269/328 [00:01<00:00, 154.35it/s]\u001b[A\n",
      " 87%|████████▋ | 285/328 [00:01<00:00, 155.02it/s]\u001b[A\n",
      " 92%|█████████▏| 301/328 [00:01<00:00, 153.42it/s]\u001b[A\n",
      "100%|██████████| 328/328 [00:02<00:00, 152.39it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20992 dev_label_list: 20992 example_id_list: 20992\n",
      "num: 20992\n",
      "n: 4075\n",
      "em: 0.2743558282208589\n",
      "f1: 0.736401862622732\n",
      "Test Loss: 0.008363, Acc: 0.748095\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 3920/3920 [01:33<00:00, 42.13it/s]\n",
      "  4%|▍         | 13/328 [00:00<00:02, 129.70it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 328/328 [00:02<00:00, 157.15it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20992 dev_label_list: 20992 example_id_list: 20992\n",
      "num: 20992\n",
      "n: 4075\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/3920 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.28\n",
      "f1: 0.6869775051124795\n",
      "Test Loss: 0.008122, Acc: 0.749190\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 13%|█▎        | 497/3920 [00:07<00:53, 64.19it/s]\n",
      "  0%|          | 0/328 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 14/328 [00:00<00:02, 131.98it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.3745\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  9%|▉         | 29/328 [00:00<00:02, 134.56it/s]\u001b[A\n",
      " 13%|█▎        | 44/328 [00:00<00:02, 137.56it/s]\u001b[A\n",
      " 18%|█▊        | 60/328 [00:00<00:01, 142.18it/s]\u001b[A\n",
      " 23%|██▎       | 76/328 [00:00<00:01, 144.95it/s]\u001b[A\n",
      " 27%|██▋       | 89/328 [00:00<00:01, 124.69it/s]\u001b[A\n",
      " 32%|███▏      | 105/328 [00:00<00:01, 132.23it/s]\u001b[A\n",
      " 37%|███▋      | 121/328 [00:00<00:01, 137.72it/s]\u001b[A\n",
      " 42%|████▏     | 137/328 [00:00<00:01, 142.17it/s]\u001b[A\n",
      " 47%|████▋     | 153/328 [00:01<00:01, 145.81it/s]\u001b[A\n",
      " 52%|█████▏    | 170/328 [00:01<00:01, 150.47it/s]\u001b[A\n",
      " 57%|█████▋    | 187/328 [00:01<00:00, 154.10it/s]\u001b[A\n",
      " 62%|██████▏   | 203/328 [00:01<00:00, 155.64it/s]\u001b[A\n",
      " 67%|██████▋   | 219/328 [00:01<00:00, 155.07it/s]\u001b[A\n",
      " 72%|███████▏  | 236/328 [00:01<00:00, 158.38it/s]\u001b[A\n",
      " 77%|███████▋  | 253/328 [00:01<00:00, 161.08it/s]\u001b[A\n",
      " 82%|████████▏ | 270/328 [00:01<00:00, 160.20it/s]\u001b[A\n",
      " 88%|████████▊ | 287/328 [00:01<00:00, 160.21it/s]\u001b[A\n",
      " 93%|█████████▎| 304/328 [00:02<00:00, 161.34it/s]\u001b[A\n",
      "100%|██████████| 328/328 [00:02<00:00, 151.16it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20992 dev_label_list: 20992 example_id_list: 20992\n",
      "num: 20992\n",
      "n: 4075\n",
      "em: 0.2880981595092025\n",
      "f1: 0.7284600961410865\n",
      "Test Loss: 0.008011, Acc: 0.752858\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 25%|██▌       | 999/3920 [00:18<00:37, 77.53it/s]\n",
      "  0%|          | 0/328 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▍         | 15/328 [00:00<00:02, 145.43it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.5014\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  9%|▊         | 28/328 [00:00<00:02, 139.20it/s]\u001b[A\n",
      " 13%|█▎        | 42/328 [00:00<00:02, 139.41it/s]\u001b[A\n",
      " 18%|█▊        | 58/328 [00:00<00:01, 142.86it/s]\u001b[A\n",
      " 22%|██▏       | 73/328 [00:00<00:01, 143.97it/s]\u001b[A\n",
      " 27%|██▋       | 89/328 [00:00<00:01, 146.49it/s]\u001b[A\n",
      " 32%|███▏      | 105/328 [00:00<00:01, 148.07it/s]\u001b[A\n",
      " 37%|███▋      | 121/328 [00:00<00:01, 150.18it/s]\u001b[A\n",
      " 41%|████▏     | 136/328 [00:00<00:01, 143.01it/s]\u001b[A\n",
      " 46%|████▌     | 150/328 [00:01<00:01, 137.96it/s]\u001b[A\n",
      " 51%|█████     | 166/328 [00:01<00:01, 141.75it/s]\u001b[A\n",
      " 55%|█████▌    | 182/328 [00:01<00:01, 144.59it/s]\u001b[A\n",
      " 60%|██████    | 197/328 [00:01<00:01, 127.25it/s]\u001b[A\n",
      " 65%|██████▍   | 213/328 [00:01<00:00, 135.08it/s]\u001b[A\n",
      " 70%|██████▉   | 229/328 [00:01<00:00, 141.53it/s]\u001b[A\n",
      " 75%|███████▍  | 245/328 [00:01<00:00, 145.87it/s]\u001b[A\n",
      " 80%|███████▉  | 261/328 [00:01<00:00, 149.07it/s]\u001b[A\n",
      " 84%|████████▍ | 277/328 [00:01<00:00, 152.08it/s]\u001b[A\n",
      " 89%|████████▉ | 293/328 [00:02<00:00, 153.46it/s]\u001b[A\n",
      " 94%|█████████▍| 309/328 [00:02<00:00, 153.46it/s]\u001b[A\n",
      "100%|██████████| 328/328 [00:02<00:00, 145.41it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20992 dev_label_list: 20992 example_id_list: 20992\n",
      "num: 20992\n",
      "n: 4075\n",
      "em: 0.2878527607361963\n",
      "f1: 0.7159723439478121\n",
      "Test Loss: 0.008067, Acc: 0.753287\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 38%|███▊      | 1498/3920 [00:29<00:37, 65.23it/s]\n",
      "  0%|          | 0/328 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 13/328 [00:00<00:02, 125.94it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.5057\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  8%|▊         | 26/328 [00:00<00:02, 124.93it/s]\u001b[A\n",
      " 12%|█▏        | 39/328 [00:00<00:02, 124.44it/s]\u001b[A\n",
      " 16%|█▌        | 53/328 [00:00<00:02, 127.37it/s]\u001b[A\n",
      " 20%|██        | 67/328 [00:00<00:02, 129.64it/s]\u001b[A\n",
      " 25%|██▍       | 81/328 [00:00<00:01, 130.55it/s]\u001b[A\n",
      " 29%|██▉       | 95/328 [00:00<00:01, 132.82it/s]\u001b[A\n",
      " 33%|███▎      | 109/328 [00:00<00:01, 133.26it/s]\u001b[A\n",
      " 38%|███▊      | 123/328 [00:00<00:01, 133.80it/s]\u001b[A\n",
      " 42%|████▏     | 138/328 [00:01<00:01, 136.20it/s]\u001b[A\n",
      " 46%|████▋     | 152/328 [00:01<00:01, 136.70it/s]\u001b[A\n",
      " 51%|█████     | 166/328 [00:01<00:01, 137.11it/s]\u001b[A\n",
      " 55%|█████▍    | 180/328 [00:01<00:01, 136.09it/s]\u001b[A\n",
      " 59%|█████▉    | 194/328 [00:01<00:00, 135.24it/s]\u001b[A\n",
      " 64%|██████▎   | 209/328 [00:01<00:00, 137.13it/s]\u001b[A\n",
      " 68%|██████▊   | 224/328 [00:01<00:00, 138.92it/s]\u001b[A\n",
      " 73%|███████▎  | 239/328 [00:01<00:00, 141.60it/s]\u001b[A\n",
      " 77%|███████▋  | 254/328 [00:01<00:00, 143.67it/s]\u001b[A\n",
      " 82%|████████▏ | 269/328 [00:01<00:00, 143.35it/s]\u001b[A\n",
      " 87%|████████▋ | 284/328 [00:02<00:00, 142.47it/s]\u001b[A\n",
      " 91%|█████████ | 299/328 [00:02<00:00, 142.45it/s]\u001b[A\n",
      "100%|██████████| 328/328 [00:02<00:00, 136.90it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 20992 dev_label_list: 20992 example_id_list: 20992\n",
      "num: 20992\n",
      "n: 4075\n",
      "em: 0.2841717791411043\n",
      "f1: 0.707093972149193\n",
      "Test Loss: 0.008054, Acc: 0.752048\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 47%|████▋     | 1852/3920 [00:39<00:43, 47.10it/s]\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-93-3056bd8d464c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m#sent7verb4\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0mepoch_num\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mtrain_code_confing\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_loader\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdev_loader\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mepoch_num\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m<ipython-input-90-36cf930af26f>\u001b[0m in \u001b[0;36mtrain_code_confing\u001b[0;34m(train_loader, dev_loader, epoch_num)\u001b[0m\n\u001b[1;32m     76\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     77\u001b[0m                 \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mzero_grad\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 78\u001b[0;31m                 \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     79\u001b[0m                 \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     80\u001b[0m                 \u001b[0mepoch\u001b[0m\u001b[0;34m+=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.conda/envs/learn_py3/lib/python3.7/site-packages/torch/tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph)\u001b[0m\n\u001b[1;32m    116\u001b[0m                 \u001b[0mproducts\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mDefaults\u001b[0m \u001b[0mto\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    117\u001b[0m         \"\"\"\n\u001b[0;32m--> 118\u001b[0;31m         \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    119\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    120\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mregister_hook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.conda/envs/learn_py3/lib/python3.7/site-packages/torch/autograd/__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables)\u001b[0m\n\u001b[1;32m     91\u001b[0m     Variable._execution_engine.run_backward(\n\u001b[1;32m     92\u001b[0m         \u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrad_tensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 93\u001b[0;31m         allow_unreachable=True)  # allow_unreachable flag\n\u001b[0m\u001b[1;32m     94\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     95\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "#sent7verb4\n",
    "epoch_num = 10\n",
    "train_code_confing(train_loader,dev_loader,epoch_num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 19%|█▉        | 498/2585 [00:08<00:33, 62.79it/s]\n",
      "  0%|          | 0/225 [00:00<?, ?it/s]\u001b[A\n",
      "  7%|▋         | 15/225 [00:00<00:01, 144.78it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.6687\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 29/225 [00:00<00:01, 143.00it/s]\u001b[A\n",
      " 20%|█▉        | 44/225 [00:00<00:01, 144.34it/s]\u001b[A\n",
      " 26%|██▌       | 59/225 [00:00<00:01, 144.57it/s]\u001b[A\n",
      " 33%|███▎      | 74/225 [00:00<00:01, 145.16it/s]\u001b[A\n",
      " 40%|███▉      | 89/225 [00:00<00:00, 145.64it/s]\u001b[A\n",
      " 46%|████▌     | 104/225 [00:00<00:00, 145.91it/s]\u001b[A\n",
      " 53%|█████▎    | 119/225 [00:00<00:00, 145.30it/s]\u001b[A\n",
      " 60%|██████    | 135/225 [00:00<00:00, 147.13it/s]\u001b[A\n",
      " 67%|██████▋   | 150/225 [00:01<00:00, 147.52it/s]\u001b[A\n",
      " 74%|███████▍  | 166/225 [00:01<00:00, 149.02it/s]\u001b[A\n",
      " 81%|████████  | 182/225 [00:01<00:00, 149.91it/s]\u001b[A\n",
      " 88%|████████▊ | 198/225 [00:01<00:00, 151.21it/s]\u001b[A\n",
      "100%|██████████| 225/225 [00:01<00:00, 148.20it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 14400 dev_label_list: 14400 example_id_list: 14400\n",
      "num: 14400\n",
      "n: 2855\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 20%|█▉        | 505/2585 [00:10<04:06,  8.45it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.29772329246935203\n",
      "f1: 0.7183009479331737\n",
      "Test Loss: 0.008121, Acc: 0.748681\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 38%|███▊      | 993/2585 [00:18<00:25, 61.43it/s]\n",
      "  0%|          | 0/225 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 17/225 [00:00<00:01, 161.50it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4994\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 12%|█▏        | 26/225 [00:00<00:01, 127.62it/s]\u001b[A\n",
      " 19%|█▉        | 43/225 [00:00<00:01, 136.18it/s]\u001b[A\n",
      " 27%|██▋       | 60/225 [00:00<00:01, 144.26it/s]\u001b[A\n",
      " 34%|███▍      | 77/225 [00:00<00:00, 149.82it/s]\u001b[A\n",
      " 42%|████▏     | 94/225 [00:00<00:00, 154.68it/s]\u001b[A\n",
      " 49%|████▉     | 111/225 [00:00<00:00, 157.45it/s]\u001b[A\n",
      " 57%|█████▋    | 128/225 [00:00<00:00, 159.96it/s]\u001b[A\n",
      " 64%|██████▍   | 144/225 [00:00<00:00, 159.75it/s]\u001b[A\n",
      " 72%|███████▏  | 161/225 [00:01<00:00, 161.64it/s]\u001b[A\n",
      " 79%|███████▊  | 177/225 [00:01<00:00, 161.00it/s]\u001b[A\n",
      " 86%|████████▌ | 194/225 [00:01<00:00, 161.43it/s]\u001b[A\n",
      "100%|██████████| 225/225 [00:01<00:00, 157.72it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 14400 dev_label_list: 14400 example_id_list: 14400\n",
      "num: 14400\n",
      "n: 2855\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 39%|███▉      | 1007/2585 [00:20<02:17, 11.51it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.25499124343257445\n",
      "f1: 0.7484731225186594\n",
      "Test Loss: 0.008475, Acc: 0.730278\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 58%|█████▊    | 1494/2585 [00:28<00:18, 59.14it/s]\n",
      "  0%|          | 0/225 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▌         | 12/225 [00:00<00:01, 118.97it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.5311\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 12%|█▏        | 28/225 [00:00<00:01, 127.95it/s]\u001b[A\n",
      " 20%|█▉        | 44/225 [00:00<00:01, 134.95it/s]\u001b[A\n",
      " 27%|██▋       | 60/225 [00:00<00:01, 141.58it/s]\u001b[A\n",
      " 34%|███▍      | 76/225 [00:00<00:01, 144.32it/s]\u001b[A\n",
      " 41%|████▏     | 93/225 [00:00<00:00, 148.91it/s]\u001b[A\n",
      " 48%|████▊     | 109/225 [00:00<00:00, 151.72it/s]\u001b[A\n",
      " 56%|█████▌    | 125/225 [00:00<00:00, 152.83it/s]\u001b[A\n",
      " 63%|██████▎   | 141/225 [00:00<00:00, 152.80it/s]\u001b[A\n",
      " 70%|██████▉   | 157/225 [00:01<00:00, 154.12it/s]\u001b[A\n",
      " 77%|███████▋  | 174/225 [00:01<00:00, 157.18it/s]\u001b[A\n",
      " 85%|████████▍ | 191/225 [00:01<00:00, 159.01it/s]\u001b[A\n",
      " 92%|█████████▏| 207/225 [00:01<00:00, 158.39it/s]\u001b[A\n",
      "100%|██████████| 225/225 [00:01<00:00, 155.28it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 14400 dev_label_list: 14400 example_id_list: 14400\n",
      "num: 14400\n",
      "n: 2855\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 58%|█████▊    | 1508/2585 [00:31<01:35, 11.24it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2991243432574431\n",
      "f1: 0.7318694576487941\n",
      "Test Loss: 0.008257, Acc: 0.752222\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 77%|███████▋  | 1999/2585 [00:39<00:10, 54.69it/s]\n",
      "  0%|          | 0/225 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 17/225 [00:00<00:01, 164.23it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.3787\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 15%|█▌        | 34/225 [00:00<00:01, 164.87it/s]\u001b[A\n",
      " 23%|██▎       | 51/225 [00:00<00:01, 165.76it/s]\u001b[A\n",
      " 30%|███       | 68/225 [00:00<00:00, 166.44it/s]\u001b[A\n",
      " 38%|███▊      | 85/225 [00:00<00:00, 164.96it/s]\u001b[A\n",
      " 45%|████▍     | 101/225 [00:00<00:00, 163.41it/s]\u001b[A\n",
      " 52%|█████▏    | 118/225 [00:00<00:00, 162.85it/s]\u001b[A\n",
      " 60%|██████    | 135/225 [00:00<00:00, 163.18it/s]\u001b[A\n",
      " 68%|██████▊   | 152/225 [00:00<00:00, 163.39it/s]\u001b[A\n",
      " 75%|███████▍  | 168/225 [00:01<00:00, 161.08it/s]\u001b[A\n",
      " 82%|████████▏ | 184/225 [00:01<00:00, 160.71it/s]\u001b[A\n",
      " 89%|████████▉ | 201/225 [00:01<00:00, 160.67it/s]\u001b[A\n",
      "100%|██████████| 225/225 [00:01<00:00, 162.58it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 14400 dev_label_list: 14400 example_id_list: 14400\n",
      "num: 14400\n",
      "n: 2855\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 78%|███████▊  | 2005/2585 [00:41<01:17,  7.49it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.31138353765323995\n",
      "f1: 0.7113576849303659\n",
      "Test Loss: 0.008038, Acc: 0.753403\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 96%|█████████▋| 2494/2585 [00:49<00:01, 61.84it/s]\n",
      "  0%|          | 0/225 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 17/225 [00:00<00:01, 165.06it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.4046\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 15%|█▌        | 34/225 [00:00<00:01, 164.57it/s]\u001b[A\n",
      " 20%|██        | 46/225 [00:00<00:01, 146.85it/s]\u001b[A\n",
      " 28%|██▊       | 64/225 [00:00<00:01, 153.22it/s]\u001b[A\n",
      " 36%|███▌      | 80/225 [00:00<00:00, 155.10it/s]\u001b[A\n",
      " 43%|████▎     | 97/225 [00:00<00:00, 158.68it/s]\u001b[A\n",
      " 51%|█████     | 114/225 [00:00<00:00, 161.36it/s]\u001b[A\n",
      " 58%|█████▊    | 131/225 [00:00<00:00, 162.39it/s]\u001b[A\n",
      " 66%|██████▌   | 148/225 [00:00<00:00, 163.58it/s]\u001b[A\n",
      " 73%|███████▎  | 165/225 [00:01<00:00, 164.10it/s]\u001b[A\n",
      " 81%|████████  | 182/225 [00:01<00:00, 164.02it/s]\u001b[A\n",
      " 88%|████████▊ | 199/225 [00:01<00:00, 164.55it/s]\u001b[A\n",
      "100%|██████████| 225/225 [00:01<00:00, 161.91it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 14400 dev_label_list: 14400 example_id_list: 14400\n",
      "num: 14400\n",
      "n: 2855\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 97%|█████████▋| 2508/2585 [00:52<00:06, 11.76it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.30192644483362524\n",
      "f1: 0.6986031190059231\n",
      "Test Loss: 0.008102, Acc: 0.750069\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 2585/2585 [00:53<00:00, 48.32it/s]\n",
      "  5%|▌         | 12/225 [00:00<00:01, 112.65it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 225/225 [00:01<00:00, 154.89it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 14400 dev_label_list: 14400 example_id_list: 14400\n",
      "num: 14400\n",
      "n: 2855\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|          | 7/2585 [00:00<00:40, 62.91it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.30367775831873906\n",
      "f1: 0.7056806493759226\n",
      "Test Loss: 0.008054, Acc: 0.751458\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 19%|█▉        | 498/2585 [00:07<00:34, 60.94it/s]\n",
      "  0%|          | 0/225 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 17/225 [00:00<00:01, 166.11it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.6237\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 15%|█▌        | 34/225 [00:00<00:01, 165.37it/s]\u001b[A\n",
      " 22%|██▏       | 50/225 [00:00<00:01, 161.11it/s]\u001b[A\n",
      " 28%|██▊       | 63/225 [00:00<00:01, 149.23it/s]\u001b[A\n",
      " 36%|███▌      | 80/225 [00:00<00:00, 152.83it/s]\u001b[A\n",
      " 43%|████▎     | 96/225 [00:00<00:00, 154.54it/s]\u001b[A\n",
      " 50%|█████     | 113/225 [00:00<00:00, 157.23it/s]\u001b[A\n",
      " 58%|█████▊    | 130/225 [00:00<00:00, 158.70it/s]\u001b[A\n",
      " 65%|██████▍   | 146/225 [00:00<00:00, 159.07it/s]\u001b[A\n",
      " 72%|███████▏  | 163/225 [00:01<00:00, 159.52it/s]\u001b[A\n",
      " 80%|███████▉  | 179/225 [00:01<00:00, 158.41it/s]\u001b[A\n",
      " 87%|████████▋ | 196/225 [00:01<00:00, 161.67it/s]\u001b[A\n",
      "100%|██████████| 225/225 [00:01<00:00, 159.14it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 14400 dev_label_list: 14400 example_id_list: 14400\n",
      "num: 14400\n",
      "n: 2855\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 20%|█▉        | 505/2585 [00:10<04:08,  8.36it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2991243432574431\n",
      "f1: 0.7399813749200818\n",
      "Test Loss: 0.008109, Acc: 0.752639\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 39%|███▊      | 998/2585 [00:18<00:25, 61.25it/s]\n",
      "  0%|          | 0/225 [00:00<?, ?it/s]\u001b[A\n",
      "  7%|▋         | 15/225 [00:00<00:01, 145.16it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.483\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 30/225 [00:00<00:01, 144.70it/s]\u001b[A\n",
      " 20%|██        | 45/225 [00:00<00:01, 143.79it/s]\u001b[A\n",
      " 27%|██▋       | 60/225 [00:00<00:01, 143.98it/s]\u001b[A\n",
      " 34%|███▍      | 76/225 [00:00<00:01, 146.29it/s]\u001b[A\n",
      " 40%|████      | 91/225 [00:00<00:00, 146.55it/s]\u001b[A\n",
      " 48%|████▊     | 107/225 [00:00<00:00, 148.60it/s]\u001b[A\n",
      " 54%|█████▍    | 122/225 [00:00<00:00, 148.24it/s]\u001b[A\n",
      " 61%|██████    | 137/225 [00:00<00:00, 148.12it/s]\u001b[A\n",
      " 68%|██████▊   | 153/225 [00:01<00:00, 149.70it/s]\u001b[A\n",
      " 75%|███████▍  | 168/225 [00:01<00:00, 149.50it/s]\u001b[A\n",
      " 81%|████████▏ | 183/225 [00:01<00:00, 148.68it/s]\u001b[A\n",
      " 88%|████████▊ | 198/225 [00:01<00:00, 148.66it/s]\u001b[A\n",
      "100%|██████████| 225/225 [00:01<00:00, 148.33it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 14400 dev_label_list: 14400 example_id_list: 14400\n",
      "num: 14400\n",
      "n: 2855\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 39%|███▉      | 1005/2585 [00:21<03:12,  8.20it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2739054290718039\n",
      "f1: 0.7512382519212668\n",
      "Test Loss: 0.008252, Acc: 0.741111\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 58%|█████▊    | 1494/2585 [00:28<00:14, 72.92it/s]\n",
      "  0%|          | 0/225 [00:00<?, ?it/s]\u001b[A\n",
      "  7%|▋         | 15/225 [00:00<00:01, 143.69it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.5003\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 30/225 [00:00<00:01, 142.96it/s]\u001b[A\n",
      " 20%|██        | 45/225 [00:00<00:01, 143.34it/s]\u001b[A\n",
      " 27%|██▋       | 60/225 [00:00<00:01, 144.96it/s]\u001b[A\n",
      " 34%|███▍      | 76/225 [00:00<00:01, 147.26it/s]\u001b[A\n",
      " 40%|████      | 91/225 [00:00<00:00, 146.29it/s]\u001b[A\n",
      " 47%|████▋     | 106/225 [00:00<00:00, 146.94it/s]\u001b[A\n",
      " 54%|█████▍    | 122/225 [00:00<00:00, 148.33it/s]\u001b[A\n",
      " 61%|██████▏   | 138/225 [00:00<00:00, 149.89it/s]\u001b[A\n",
      " 68%|██████▊   | 153/225 [00:01<00:00, 136.79it/s]\u001b[A\n",
      " 74%|███████▍  | 167/225 [00:01<00:00, 132.38it/s]\u001b[A\n",
      " 80%|████████  | 181/225 [00:01<00:00, 130.66it/s]\u001b[A\n",
      " 88%|████████▊ | 197/225 [00:01<00:00, 137.18it/s]\u001b[A\n",
      "100%|██████████| 225/225 [00:01<00:00, 142.18it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 14400 dev_label_list: 14400 example_id_list: 14400\n",
      "num: 14400\n",
      "n: 2855\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 58%|█████▊    | 1502/2585 [00:31<01:59,  9.05it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.30017513134851137\n",
      "f1: 0.7311800517054474\n",
      "Test Loss: 0.008060, Acc: 0.754444\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 77%|███████▋  | 1999/2585 [00:38<00:08, 69.69it/s]\n",
      "  0%|          | 0/225 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 17/225 [00:00<00:01, 167.14it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.3674\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 12%|█▏        | 26/225 [00:00<00:01, 130.28it/s]\u001b[A\n",
      " 19%|█▉        | 43/225 [00:00<00:01, 138.44it/s]\u001b[A\n",
      " 25%|██▌       | 57/225 [00:00<00:01, 138.65it/s]\u001b[A\n",
      " 32%|███▏      | 72/225 [00:00<00:01, 140.28it/s]\u001b[A\n",
      " 39%|███▊      | 87/225 [00:00<00:00, 140.84it/s]\u001b[A\n",
      " 45%|████▍     | 101/225 [00:00<00:00, 140.21it/s]\u001b[A\n",
      " 52%|█████▏    | 116/225 [00:00<00:00, 140.75it/s]\u001b[A\n",
      " 59%|█████▊    | 132/225 [00:00<00:00, 143.57it/s]\u001b[A\n",
      " 65%|██████▌   | 147/225 [00:01<00:00, 143.88it/s]\u001b[A\n",
      " 72%|███████▏  | 162/225 [00:01<00:00, 128.30it/s]\u001b[A\n",
      " 79%|███████▉  | 178/225 [00:01<00:00, 135.11it/s]\u001b[A\n",
      " 86%|████████▌ | 194/225 [00:01<00:00, 140.48it/s]\u001b[A\n",
      "100%|██████████| 225/225 [00:01<00:00, 141.18it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 14400 dev_label_list: 14400 example_id_list: 14400\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 77%|███████▋  | 1999/2585 [00:49<00:08, 69.69it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "num: 14400\n",
      "n: 2855\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 77%|███████▋  | 2000/2585 [00:51<38:08,  3.91s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.30718038528896674\n",
      "f1: 0.723354460289663\n",
      "Test Loss: 0.008004, Acc: 0.753819\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 97%|█████████▋| 2498/2585 [00:59<00:01, 61.15it/s]\n",
      "  0%|          | 0/225 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▌         | 14/225 [00:00<00:01, 132.54it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.4025\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 29/225 [00:00<00:01, 136.04it/s]\u001b[A\n",
      " 20%|█▉        | 44/225 [00:00<00:01, 139.65it/s]\u001b[A\n",
      " 26%|██▌       | 59/225 [00:00<00:01, 142.25it/s]\u001b[A\n",
      " 33%|███▎      | 74/225 [00:00<00:01, 144.23it/s]\u001b[A\n",
      " 40%|███▉      | 89/225 [00:00<00:00, 144.26it/s]\u001b[A\n",
      " 46%|████▌     | 104/225 [00:00<00:00, 145.17it/s]\u001b[A\n",
      " 53%|█████▎    | 120/225 [00:00<00:00, 148.31it/s]\u001b[A\n",
      " 60%|██████    | 136/225 [00:00<00:00, 148.72it/s]\u001b[A\n",
      " 67%|██████▋   | 151/225 [00:01<00:00, 148.28it/s]\u001b[A\n",
      " 74%|███████▍  | 166/225 [00:01<00:00, 148.40it/s]\u001b[A\n",
      " 80%|████████  | 181/225 [00:01<00:00, 146.45it/s]\u001b[A\n",
      " 87%|████████▋ | 196/225 [00:01<00:00, 147.25it/s]\u001b[A\n",
      "100%|██████████| 225/225 [00:01<00:00, 147.61it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 14400 dev_label_list: 14400 example_id_list: 14400\n",
      "num: 14400\n",
      "n: 2855\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 97%|█████████▋| 2505/2585 [01:01<00:09,  8.10it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.302276707530648\n",
      "f1: 0.7143229644455581\n",
      "Test Loss: 0.008034, Acc: 0.753056\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 2585/2585 [01:03<00:00, 41.03it/s]\n",
      "  7%|▋         | 15/225 [00:00<00:01, 141.02it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 225/225 [00:01<00:00, 150.19it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 14400 dev_label_list: 14400 example_id_list: 14400\n",
      "num: 14400\n",
      "n: 2855\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/2585 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.30542907180385287\n",
      "f1: 0.7195830205987839\n",
      "Test Loss: 0.008001, Acc: 0.754167\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 19%|█▉        | 494/2585 [00:07<00:30, 68.75it/s]\n",
      "  0%|          | 0/225 [00:00<?, ?it/s]\u001b[A\n",
      "  4%|▍         | 10/225 [00:00<00:02, 88.94it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.6644\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  9%|▉         | 21/225 [00:00<00:02, 92.80it/s]\u001b[A\n",
      " 15%|█▌        | 34/225 [00:00<00:01, 100.50it/s]\u001b[A\n",
      " 21%|██        | 47/225 [00:00<00:01, 107.67it/s]\u001b[A\n",
      " 27%|██▋       | 60/225 [00:00<00:01, 113.41it/s]\u001b[A\n",
      " 32%|███▏      | 73/225 [00:00<00:01, 116.09it/s]\u001b[A\n",
      " 38%|███▊      | 86/225 [00:00<00:01, 119.52it/s]\u001b[A\n",
      " 44%|████▎     | 98/225 [00:00<00:01, 107.06it/s]\u001b[A\n",
      " 49%|████▉     | 111/225 [00:00<00:01, 112.68it/s]\u001b[A\n",
      " 55%|█████▌    | 124/225 [00:01<00:00, 117.29it/s]\u001b[A\n",
      " 61%|██████    | 137/225 [00:01<00:00, 120.71it/s]\u001b[A\n",
      " 67%|██████▋   | 150/225 [00:01<00:00, 123.22it/s]\u001b[A\n",
      " 72%|███████▏  | 163/225 [00:01<00:00, 124.26it/s]\u001b[A\n",
      " 78%|███████▊  | 176/225 [00:01<00:00, 124.13it/s]\u001b[A\n",
      " 84%|████████▍ | 190/225 [00:01<00:00, 128.17it/s]\u001b[A\n",
      " 91%|█████████ | 204/225 [00:01<00:00, 130.24it/s]\u001b[A\n",
      "100%|██████████| 225/225 [00:01<00:00, 122.79it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 14400 dev_label_list: 14400 example_id_list: 14400\n",
      "num: 14400\n",
      "n: 2855\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 19%|█▉        | 502/2585 [00:10<04:09,  8.34it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.29597197898423816\n",
      "f1: 0.7414400699514561\n",
      "Test Loss: 0.008075, Acc: 0.750903\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 39%|███▊      | 999/2585 [00:18<00:24, 64.51it/s]\n",
      "  0%|          | 0/225 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▌         | 14/225 [00:00<00:01, 131.39it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4589\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 30/225 [00:00<00:01, 136.84it/s]\u001b[A\n",
      " 20%|██        | 45/225 [00:00<00:01, 138.87it/s]\u001b[A\n",
      " 27%|██▋       | 60/225 [00:00<00:01, 141.74it/s]\u001b[A\n",
      " 34%|███▍      | 76/225 [00:00<00:01, 144.32it/s]\u001b[A\n",
      " 40%|████      | 91/225 [00:00<00:00, 142.88it/s]\u001b[A\n",
      " 48%|████▊     | 107/225 [00:00<00:00, 147.24it/s]\u001b[A\n",
      " 55%|█████▍    | 123/225 [00:00<00:00, 150.24it/s]\u001b[A\n",
      " 62%|██████▏   | 139/225 [00:00<00:00, 150.48it/s]\u001b[A\n",
      " 69%|██████▉   | 155/225 [00:01<00:00, 153.21it/s]\u001b[A\n",
      " 76%|███████▌  | 171/225 [00:01<00:00, 154.98it/s]\u001b[A\n",
      " 83%|████████▎ | 187/225 [00:01<00:00, 154.70it/s]\u001b[A\n",
      " 90%|█████████ | 203/225 [00:01<00:00, 156.15it/s]\u001b[A\n",
      "100%|██████████| 225/225 [00:01<00:00, 151.86it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 14400 dev_label_list: 14400 example_id_list: 14400\n",
      "num: 14400\n",
      "n: 2855\n",
      "em: 0.30542907180385287\n",
      "f1: 0.7378661774108383\n",
      "Test Loss: 0.008008, Acc: 0.754583\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 58%|█████▊    | 1494/2585 [00:28<00:17, 62.40it/s]\n",
      "  0%|          | 0/225 [00:00<?, ?it/s]\u001b[A\n",
      "  7%|▋         | 15/225 [00:00<00:01, 142.56it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.4986\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 14%|█▍        | 31/225 [00:00<00:01, 147.24it/s]\u001b[A\n",
      " 21%|██▏       | 48/225 [00:00<00:01, 151.46it/s]\u001b[A\n",
      " 28%|██▊       | 64/225 [00:00<00:01, 153.78it/s]\u001b[A\n",
      " 36%|███▌      | 80/225 [00:00<00:00, 153.63it/s]\u001b[A\n",
      " 43%|████▎     | 96/225 [00:00<00:00, 153.96it/s]\u001b[A\n",
      " 49%|████▉     | 110/225 [00:00<00:00, 141.83it/s]\u001b[A\n",
      " 56%|█████▌    | 125/225 [00:00<00:00, 143.65it/s]\u001b[A\n",
      " 63%|██████▎   | 141/225 [00:00<00:00, 148.01it/s]\u001b[A\n",
      " 70%|███████   | 158/225 [00:01<00:00, 152.40it/s]\u001b[A\n",
      " 77%|███████▋  | 173/225 [00:01<00:00, 150.71it/s]\u001b[A\n",
      " 84%|████████▍ | 189/225 [00:01<00:00, 151.83it/s]\u001b[A\n",
      " 91%|█████████ | 205/225 [00:01<00:00, 153.62it/s]\u001b[A\n",
      "100%|██████████| 225/225 [00:01<00:00, 152.74it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 14400 dev_label_list: 14400 example_id_list: 14400\n",
      "num: 14400\n",
      "n: 2855\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 58%|█████▊    | 1501/2585 [00:31<02:10,  8.31it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2987740805604203\n",
      "f1: 0.74098907513969\n",
      "Test Loss: 0.008048, Acc: 0.752361\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 77%|███████▋  | 1996/2585 [00:38<00:09, 62.41it/s]\n",
      "  0%|          | 0/225 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▌         | 14/225 [00:00<00:01, 134.30it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.3682\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 30/225 [00:00<00:01, 140.06it/s]\u001b[A\n",
      " 20%|██        | 46/225 [00:00<00:01, 143.13it/s]\u001b[A\n",
      " 27%|██▋       | 61/225 [00:00<00:01, 144.72it/s]\u001b[A\n",
      " 34%|███▍      | 76/225 [00:00<00:01, 145.21it/s]\u001b[A\n",
      " 40%|████      | 91/225 [00:00<00:00, 144.88it/s]\u001b[A\n",
      " 47%|████▋     | 106/225 [00:00<00:00, 145.96it/s]\u001b[A\n",
      " 54%|█████▍    | 122/225 [00:00<00:00, 148.24it/s]\u001b[A\n",
      " 61%|██████    | 137/225 [00:00<00:00, 147.73it/s]\u001b[A\n",
      " 68%|██████▊   | 153/225 [00:01<00:00, 150.68it/s]\u001b[A\n",
      " 75%|███████▍  | 168/225 [00:01<00:00, 148.28it/s]\u001b[A\n",
      " 81%|████████▏ | 183/225 [00:01<00:00, 145.25it/s]\u001b[A\n",
      " 88%|████████▊ | 198/225 [00:01<00:00, 146.49it/s]\u001b[A\n",
      "100%|██████████| 225/225 [00:01<00:00, 147.77it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 14400 dev_label_list: 14400 example_id_list: 14400\n",
      "num: 14400\n",
      "n: 2855\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 77%|███████▋  | 2003/2585 [00:41<01:12,  8.01it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.3075306479859895\n",
      "f1: 0.7263216857087275\n",
      "Test Loss: 0.008010, Acc: 0.753750\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 78%|███████▊  | 2010/2585 [00:41<00:11, 48.19it/s]\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-80-6a701f163f8b>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m#sent7verb3\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0mepoch_num\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mtrain_code_confing\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_loader\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdev_loader\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mepoch_num\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m<ipython-input-77-36cf930af26f>\u001b[0m in \u001b[0;36mtrain_code_confing\u001b[0;34m(train_loader, dev_loader, epoch_num)\u001b[0m\n\u001b[1;32m     47\u001b[0m                             \u001b[0mqu_article_power_sum\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     48\u001b[0m                             \u001b[0mcore_article_info\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 49\u001b[0;31m                             \u001b[0mqu_article_info_sum\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     50\u001b[0m \u001b[0;31m#                             data['flow_once_qu_article_power'].float().cuda(),\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     51\u001b[0m \u001b[0;31m#                             data['flow_once_qu_core_power'].float().cuda(),\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.conda/envs/learn_py3/lib/python3.7/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m    545\u001b[0m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    546\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 547\u001b[0;31m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    548\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mhook\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    549\u001b[0m             \u001b[0mhook_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m<ipython-input-75-922d2b5614c2>\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, core_graph, qu_graph, core_article_power, core_qu_info, core_qu_power, article_sum, qu_article_power_sum, core_article_info, qu_article_info_sum)\u001b[0m\n\u001b[1;32m     56\u001b[0m         \u001b[0mcore_sent_article_power\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mcore_sent_article_pos_i_power\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcore_article_power_conv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcore_article_power\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     57\u001b[0m         \u001b[0mcontext_power\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mcontext_pos_i_power\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marticle_sum_power_conv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marticle_sum\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 58\u001b[0;31m         \u001b[0mqu_article_power\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mqu_article_pos_i_power\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mqu_article_power_conv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mqu_article_power_sum\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     59\u001b[0m         \u001b[0mcore_in_qu_power\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mcore_in_qu_pos_i_power\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcqp_power_conv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcore_qu_power\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     60\u001b[0m         \u001b[0mqu_in_core_power\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mqu_in_core_pos_i_power\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcqi_info_conv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcore_qu_info\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.conda/envs/learn_py3/lib/python3.7/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m    575\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_load_state_dict_pre_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mOrderedDict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    576\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 577\u001b[0;31m     \u001b[0;32mdef\u001b[0m \u001b[0m__getattr__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    578\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;34m'_parameters'\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__dict__\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    579\u001b[0m             \u001b[0m_parameters\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__dict__\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'_parameters'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "#sent7verb3\n",
    "epoch_num = 10\n",
    "train_code_confing(train_loader,dev_loader,epoch_num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 17%|█▋        | 499/2877 [00:08<00:38, 62.44it/s]\n",
      "  0%|          | 0/239 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 15/239 [00:00<00:01, 148.56it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.455\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 30/239 [00:00<00:01, 148.91it/s]\u001b[A\n",
      " 19%|█▉        | 45/239 [00:00<00:01, 148.61it/s]\u001b[A\n",
      " 25%|██▌       | 60/239 [00:00<00:01, 147.17it/s]\u001b[A\n",
      " 32%|███▏      | 76/239 [00:00<00:01, 149.63it/s]\u001b[A\n",
      " 38%|███▊      | 92/239 [00:00<00:00, 151.10it/s]\u001b[A\n",
      " 45%|████▍     | 107/239 [00:00<00:00, 150.75it/s]\u001b[A\n",
      " 51%|█████▏    | 123/239 [00:00<00:00, 150.86it/s]\u001b[A\n",
      " 58%|█████▊    | 139/239 [00:00<00:00, 150.90it/s]\u001b[A\n",
      " 64%|██████▍   | 154/239 [00:01<00:00, 143.86it/s]\u001b[A\n",
      " 71%|███████   | 169/239 [00:01<00:00, 127.69it/s]\u001b[A\n",
      " 76%|███████▌  | 182/239 [00:01<00:00, 127.41it/s]\u001b[A\n",
      " 82%|████████▏ | 195/239 [00:01<00:00, 126.66it/s]\u001b[A\n",
      " 87%|████████▋ | 208/239 [00:01<00:00, 125.15it/s]\u001b[A\n",
      " 93%|█████████▎| 222/239 [00:01<00:00, 128.84it/s]\u001b[A\n",
      "100%|██████████| 239/239 [00:01<00:00, 137.87it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 15296 dev_label_list: 15296 example_id_list: 15296\n",
      "num: 15296\n",
      "n: 3262\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 18%|█▊        | 506/2877 [00:11<05:11,  7.60it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2562844880441447\n",
      "f1: 0.7620648545541302\n",
      "Test Loss: 0.008521, Acc: 0.729668\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 35%|███▍      | 997/2877 [00:18<00:30, 62.67it/s]\n",
      "  0%|          | 0/239 [00:00<?, ?it/s]\u001b[A\n",
      "  7%|▋         | 16/239 [00:00<00:01, 154.09it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.5839\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 32/239 [00:00<00:01, 153.69it/s]\u001b[A\n",
      " 20%|█▉        | 47/239 [00:00<00:01, 151.66it/s]\u001b[A\n",
      " 26%|██▋       | 63/239 [00:00<00:01, 153.13it/s]\u001b[A\n",
      " 33%|███▎      | 80/239 [00:00<00:01, 155.43it/s]\u001b[A\n",
      " 41%|████      | 97/239 [00:00<00:00, 156.98it/s]\u001b[A\n",
      " 47%|████▋     | 112/239 [00:00<00:00, 154.10it/s]\u001b[A\n",
      " 54%|█████▍    | 129/239 [00:00<00:00, 155.96it/s]\u001b[A\n",
      " 61%|██████    | 145/239 [00:00<00:00, 155.94it/s]\u001b[A\n",
      " 67%|██████▋   | 161/239 [00:01<00:00, 155.35it/s]\u001b[A\n",
      " 74%|███████▍  | 177/239 [00:01<00:00, 154.16it/s]\u001b[A\n",
      " 81%|████████  | 193/239 [00:01<00:00, 155.08it/s]\u001b[A\n",
      " 87%|████████▋ | 209/239 [00:01<00:00, 154.39it/s]\u001b[A\n",
      "100%|██████████| 239/239 [00:01<00:00, 153.98it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 15296 dev_label_list: 15296 example_id_list: 15296\n",
      "num: 15296\n",
      "n: 3262\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 35%|███▍      | 1004/2877 [00:21<03:56,  7.93it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.29858982219497243\n",
      "f1: 0.6855558961782187\n",
      "Test Loss: 0.008503, Acc: 0.735356\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 52%|█████▏    | 1494/2877 [00:29<00:20, 67.31it/s]\n",
      "  0%|          | 0/239 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 19/239 [00:00<00:01, 185.86it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.5642\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 15%|█▌        | 37/239 [00:00<00:01, 182.80it/s]\u001b[A\n",
      " 23%|██▎       | 55/239 [00:00<00:01, 180.96it/s]\u001b[A\n",
      " 31%|███       | 74/239 [00:00<00:00, 181.24it/s]\u001b[A\n",
      " 39%|███▉      | 93/239 [00:00<00:00, 179.05it/s]\u001b[A\n",
      " 45%|████▌     | 108/239 [00:00<00:00, 164.13it/s]\u001b[A\n",
      " 53%|█████▎    | 126/239 [00:00<00:00, 167.67it/s]\u001b[A\n",
      " 61%|██████    | 145/239 [00:00<00:00, 172.98it/s]\u001b[A\n",
      " 69%|██████▊   | 164/239 [00:00<00:00, 175.53it/s]\u001b[A\n",
      " 77%|███████▋  | 183/239 [00:01<00:00, 177.93it/s]\u001b[A\n",
      " 85%|████████▍ | 202/239 [00:01<00:00, 179.29it/s]\u001b[A\n",
      " 92%|█████████▏| 221/239 [00:01<00:00, 180.13it/s]\u001b[A\n",
      "100%|██████████| 239/239 [00:01<00:00, 176.37it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 15296 dev_label_list: 15296 example_id_list: 15296\n",
      "num: 15296\n",
      "n: 3262\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 52%|█████▏    | 1508/2877 [00:31<01:54, 11.99it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2792765174739424\n",
      "f1: 0.7634823314161204\n",
      "Test Loss: 0.008469, Acc: 0.739278\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 69%|██████▉   | 1994/2877 [00:39<00:13, 64.26it/s]\n",
      "  0%|          | 0/239 [00:00<?, ?it/s]\u001b[A\n",
      "  7%|▋         | 17/239 [00:00<00:01, 168.47it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.5739\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 15%|█▍        | 35/239 [00:00<00:01, 169.75it/s]\u001b[A\n",
      " 22%|██▏       | 52/239 [00:00<00:01, 168.49it/s]\u001b[A\n",
      " 29%|██▉       | 70/239 [00:00<00:00, 171.02it/s]\u001b[A\n",
      " 36%|███▋      | 87/239 [00:00<00:00, 170.70it/s]\u001b[A\n",
      " 44%|████▍     | 105/239 [00:00<00:00, 171.05it/s]\u001b[A\n",
      " 51%|█████▏    | 123/239 [00:00<00:00, 172.12it/s]\u001b[A\n",
      " 59%|█████▉    | 141/239 [00:00<00:00, 172.76it/s]\u001b[A\n",
      " 67%|██████▋   | 159/239 [00:00<00:00, 173.82it/s]\u001b[A\n",
      " 74%|███████▎  | 176/239 [00:01<00:00, 153.90it/s]\u001b[A\n",
      " 81%|████████  | 194/239 [00:01<00:00, 158.68it/s]\u001b[A\n",
      " 88%|████████▊ | 211/239 [00:01<00:00, 160.72it/s]\u001b[A\n",
      "100%|██████████| 239/239 [00:01<00:00, 166.49it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 15296 dev_label_list: 15296 example_id_list: 15296\n",
      "num: 15296\n",
      "n: 3262\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 70%|██████▉   | 2008/2877 [00:41<01:15, 11.51it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.3120784794604537\n",
      "f1: 0.762197697390836\n",
      "Test Loss: 0.008195, Acc: 0.747646\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 87%|████████▋ | 2497/2877 [00:49<00:05, 64.77it/s]\n",
      "  0%|          | 0/239 [00:00<?, ?it/s]\u001b[A\n",
      "  7%|▋         | 16/239 [00:00<00:01, 150.42it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.476\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 32/239 [00:00<00:01, 151.62it/s]\u001b[A\n",
      " 20%|██        | 48/239 [00:00<00:01, 152.40it/s]\u001b[A\n",
      " 27%|██▋       | 64/239 [00:00<00:01, 153.92it/s]\u001b[A\n",
      " 33%|███▎      | 79/239 [00:00<00:01, 152.09it/s]\u001b[A\n",
      " 40%|███▉      | 95/239 [00:00<00:00, 152.67it/s]\u001b[A\n",
      " 46%|████▋     | 111/239 [00:00<00:00, 154.63it/s]\u001b[A\n",
      " 53%|█████▎    | 127/239 [00:00<00:00, 156.00it/s]\u001b[A\n",
      " 60%|█████▉    | 143/239 [00:00<00:00, 155.19it/s]\u001b[A\n",
      " 67%|██████▋   | 159/239 [00:01<00:00, 155.18it/s]\u001b[A\n",
      " 73%|███████▎  | 175/239 [00:01<00:00, 155.89it/s]\u001b[A\n",
      " 80%|███████▉  | 191/239 [00:01<00:00, 156.99it/s]\u001b[A\n",
      " 87%|████████▋ | 207/239 [00:01<00:00, 154.58it/s]\u001b[A\n",
      " 93%|█████████▎| 223/239 [00:01<00:00, 152.46it/s]\u001b[A\n",
      "100%|██████████| 239/239 [00:01<00:00, 153.68it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 15296 dev_label_list: 15296 example_id_list: 15296\n",
      "num: 15296\n",
      "n: 3262\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 87%|████████▋ | 2504/2877 [00:51<00:46,  7.94it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.29245861434702636\n",
      "f1: 0.7633409730129598\n",
      "Test Loss: 0.008247, Acc: 0.745293\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 2877/2877 [00:57<00:00, 49.93it/s]\n",
      "  8%|▊         | 19/239 [00:00<00:01, 183.71it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 239/239 [00:01<00:00, 186.19it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 15296 dev_label_list: 15296 example_id_list: 15296\n",
      "num: 15296\n",
      "n: 3262\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|          | 7/2877 [00:00<00:42, 67.37it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.3142244022072348\n",
      "f1: 0.7121867488053916\n",
      "Test Loss: 0.008229, Acc: 0.746535\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 17%|█▋        | 499/2877 [00:07<00:36, 64.58it/s]\n",
      "  0%|          | 0/239 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 18/239 [00:00<00:01, 175.86it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.417\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 15%|█▌        | 36/239 [00:00<00:01, 176.96it/s]\u001b[A\n",
      " 23%|██▎       | 55/239 [00:00<00:01, 179.17it/s]\u001b[A\n",
      " 31%|███       | 74/239 [00:00<00:00, 181.01it/s]\u001b[A\n",
      " 39%|███▉      | 93/239 [00:00<00:00, 181.89it/s]\u001b[A\n",
      " 46%|████▋     | 111/239 [00:00<00:00, 181.17it/s]\u001b[A\n",
      " 54%|█████▍    | 130/239 [00:00<00:00, 181.18it/s]\u001b[A\n",
      " 62%|██████▏   | 147/239 [00:00<00:00, 161.86it/s]\u001b[A\n",
      " 69%|██████▊   | 164/239 [00:00<00:00, 154.99it/s]\u001b[A\n",
      " 75%|███████▌  | 180/239 [00:01<00:00, 150.34it/s]\u001b[A\n",
      " 83%|████████▎ | 199/239 [00:01<00:00, 158.58it/s]\u001b[A\n",
      " 91%|█████████ | 218/239 [00:01<00:00, 164.75it/s]\u001b[A\n",
      "100%|██████████| 239/239 [00:01<00:00, 168.84it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 15296 dev_label_list: 15296 example_id_list: 15296\n",
      "num: 15296\n",
      "n: 3262\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 18%|█▊        | 506/2877 [00:09<04:33,  8.66it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.2811158798283262\n",
      "f1: 0.7693692641577426\n",
      "Test Loss: 0.008453, Acc: 0.739997\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 35%|███▍      | 997/2877 [00:17<00:28, 66.56it/s]\n",
      "  0%|          | 0/239 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 18/239 [00:00<00:01, 171.53it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.5508\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 15%|█▌        | 36/239 [00:00<00:01, 172.20it/s]\u001b[A\n",
      " 23%|██▎       | 54/239 [00:00<00:01, 172.35it/s]\u001b[A\n",
      " 30%|███       | 72/239 [00:00<00:00, 174.27it/s]\u001b[A\n",
      " 38%|███▊      | 90/239 [00:00<00:00, 173.67it/s]\u001b[A\n",
      " 45%|████▌     | 108/239 [00:00<00:00, 174.97it/s]\u001b[A\n",
      " 53%|█████▎    | 126/239 [00:00<00:00, 175.73it/s]\u001b[A\n",
      " 60%|██████    | 144/239 [00:00<00:00, 174.86it/s]\u001b[A\n",
      " 68%|██████▊   | 163/239 [00:00<00:00, 176.72it/s]\u001b[A\n",
      " 76%|███████▌  | 182/239 [00:01<00:00, 178.01it/s]\u001b[A\n",
      " 84%|████████▎ | 200/239 [00:01<00:00, 175.78it/s]\u001b[A\n",
      " 91%|█████████ | 218/239 [00:01<00:00, 176.04it/s]\u001b[A\n",
      "100%|██████████| 239/239 [00:01<00:00, 175.20it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 15296 dev_label_list: 15296 example_id_list: 15296\n",
      "num: 15296\n",
      "n: 3262\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 35%|███▍      | 1004/2877 [00:19<03:31,  8.86it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.3277130594727161\n",
      "f1: 0.7495773846019128\n",
      "Test Loss: 0.008060, Acc: 0.752811\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 52%|█████▏    | 1494/2877 [00:27<00:20, 68.02it/s]\n",
      "  0%|          | 0/239 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|▊         | 18/239 [00:00<00:01, 179.62it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.5565\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 15%|█▌        | 36/239 [00:00<00:01, 178.33it/s]\u001b[A\n",
      " 23%|██▎       | 54/239 [00:00<00:01, 176.57it/s]\u001b[A\n",
      " 31%|███       | 73/239 [00:00<00:00, 177.74it/s]\u001b[A\n",
      " 38%|███▊      | 92/239 [00:00<00:00, 178.52it/s]\u001b[A\n",
      " 46%|████▌     | 110/239 [00:00<00:00, 178.56it/s]\u001b[A\n",
      " 53%|█████▎    | 126/239 [00:00<00:00, 161.75it/s]\u001b[A\n",
      " 61%|██████    | 145/239 [00:00<00:00, 167.58it/s]\u001b[A\n",
      " 68%|██████▊   | 163/239 [00:00<00:00, 171.08it/s]\u001b[A\n",
      " 76%|███████▌  | 182/239 [00:01<00:00, 174.34it/s]\u001b[A\n",
      " 84%|████████▎ | 200/239 [00:01<00:00, 174.26it/s]\u001b[A\n",
      " 91%|█████████ | 218/239 [00:01<00:00, 173.60it/s]\u001b[A\n",
      "100%|██████████| 239/239 [00:01<00:00, 173.61it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 15296 dev_label_list: 15296 example_id_list: 15296\n",
      "num: 15296\n",
      "n: 3262\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 52%|█████▏    | 1501/2877 [00:29<02:35,  8.83it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.31851624770079706\n",
      "f1: 0.7564302258814872\n",
      "Test Loss: 0.008112, Acc: 0.751111\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 69%|██████▉   | 1997/2877 [00:37<00:13, 63.04it/s]\n",
      "  0%|          | 0/239 [00:00<?, ?it/s]\u001b[A\n",
      "  7%|▋         | 17/239 [00:00<00:01, 167.57it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.5594\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 14%|█▍        | 34/239 [00:00<00:01, 167.29it/s]\u001b[A\n",
      " 21%|██▏       | 51/239 [00:00<00:01, 167.90it/s]\u001b[A\n",
      " 28%|██▊       | 68/239 [00:00<00:01, 168.01it/s]\u001b[A\n",
      " 36%|███▌      | 85/239 [00:00<00:00, 167.64it/s]\u001b[A\n",
      " 43%|████▎     | 102/239 [00:00<00:00, 167.92it/s]\u001b[A\n",
      " 50%|█████     | 120/239 [00:00<00:00, 168.69it/s]\u001b[A\n",
      " 57%|█████▋    | 136/239 [00:07<00:14,  7.29it/s] \u001b[A\n",
      " 64%|██████▎   | 152/239 [00:07<00:08, 10.21it/s]\u001b[A\n",
      " 70%|███████   | 168/239 [00:08<00:05, 14.20it/s]\u001b[A\n",
      " 77%|███████▋  | 184/239 [00:08<00:02, 19.53it/s]\u001b[A\n",
      " 83%|████████▎ | 199/239 [00:08<00:01, 26.42it/s]\u001b[A\n",
      " 90%|████████▉ | 214/239 [00:08<00:00, 35.03it/s]\u001b[A\n",
      "100%|██████████| 239/239 [00:08<00:00, 28.16it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 15296 dev_label_list: 15296 example_id_list: 15296\n",
      "num: 15296\n",
      "n: 3262\n",
      "em: 0.3157572041692213\n",
      "f1: 0.7603060251282254\n",
      "Test Loss: 0.008127, Acc: 0.749869\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 87%|████████▋ | 2493/2877 [00:54<00:05, 67.53it/s]\n",
      "  0%|          | 0/239 [00:00<?, ?it/s]\u001b[A\n",
      "  7%|▋         | 16/239 [00:00<00:01, 156.39it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.4557\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 14%|█▍        | 33/239 [00:00<00:01, 159.32it/s]\u001b[A\n",
      " 21%|██▏       | 51/239 [00:00<00:01, 162.62it/s]\u001b[A\n",
      " 28%|██▊       | 68/239 [00:00<00:01, 164.50it/s]\u001b[A\n",
      " 36%|███▌      | 85/239 [00:00<00:00, 166.04it/s]\u001b[A\n",
      " 42%|████▏     | 101/239 [00:00<00:00, 164.15it/s]\u001b[A\n",
      " 49%|████▉     | 118/239 [00:00<00:00, 164.42it/s]\u001b[A\n",
      " 56%|█████▋    | 135/239 [00:00<00:00, 163.71it/s]\u001b[A\n",
      " 64%|██████▎   | 152/239 [00:00<00:00, 164.06it/s]\u001b[A\n",
      " 71%|███████   | 169/239 [00:01<00:00, 163.58it/s]\u001b[A\n",
      " 78%|███████▊  | 186/239 [00:01<00:00, 163.77it/s]\u001b[A\n",
      " 85%|████████▍ | 203/239 [00:01<00:00, 163.98it/s]\u001b[A\n",
      " 92%|█████████▏| 221/239 [00:01<00:00, 166.02it/s]\u001b[A\n",
      "100%|██████████| 239/239 [00:01<00:00, 164.80it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 15296 dev_label_list: 15296 example_id_list: 15296\n",
      "num: 15296\n",
      "n: 3262\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 87%|████████▋ | 2500/2877 [00:57<00:46,  8.19it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.30073574494175354\n",
      "f1: 0.764783996574315\n",
      "Test Loss: 0.008219, Acc: 0.747450\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 2877/2877 [01:02<00:00, 45.73it/s]\n",
      "  6%|▋         | 15/239 [00:00<00:01, 141.41it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 239/239 [00:01<00:00, 155.82it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 15296 dev_label_list: 15296 example_id_list: 15296\n",
      "num: 15296\n",
      "n: 3262\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/2877 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.3246474555487431\n",
      "f1: 0.7590089827061065\n",
      "Test Loss: 0.008110, Acc: 0.751438\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 17%|█▋        | 497/2877 [00:07<00:36, 64.86it/s]\n",
      "  0%|          | 0/239 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 15/239 [00:00<00:01, 148.38it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.373\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 31/239 [00:00<00:01, 151.00it/s]\u001b[A\n",
      " 20%|█▉        | 47/239 [00:00<00:01, 151.47it/s]\u001b[A\n",
      " 27%|██▋       | 64/239 [00:00<00:01, 154.40it/s]\u001b[A\n",
      " 33%|███▎      | 79/239 [00:00<00:01, 152.83it/s]\u001b[A\n",
      " 40%|███▉      | 95/239 [00:00<00:00, 153.53it/s]\u001b[A\n",
      " 47%|████▋     | 112/239 [00:00<00:00, 156.68it/s]\u001b[A\n",
      " 54%|█████▍    | 129/239 [00:00<00:00, 158.01it/s]\u001b[A\n",
      " 61%|██████    | 146/239 [00:00<00:00, 159.27it/s]\u001b[A\n",
      " 68%|██████▊   | 163/239 [00:01<00:00, 160.78it/s]\u001b[A\n",
      " 75%|███████▍  | 179/239 [00:01<00:00, 159.29it/s]\u001b[A\n",
      " 82%|████████▏ | 195/239 [00:01<00:00, 159.28it/s]\u001b[A\n",
      " 89%|████████▊ | 212/239 [00:01<00:00, 161.17it/s]\u001b[A\n",
      "100%|██████████| 239/239 [00:01<00:00, 158.28it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 15296 dev_label_list: 15296 example_id_list: 15296\n",
      "num: 15296\n",
      "n: 3262\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 18%|█▊        | 504/2877 [00:10<04:58,  7.95it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.3038013488657266\n",
      "f1: 0.7659382207818802\n",
      "Test Loss: 0.008225, Acc: 0.748692\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 35%|███▍      | 998/2877 [00:17<00:27, 68.63it/s]\n",
      "  0%|          | 0/239 [00:00<?, ?it/s]\u001b[A\n",
      "  7%|▋         | 17/239 [00:00<00:01, 165.89it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.5455\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 14%|█▍        | 34/239 [00:00<00:01, 166.20it/s]\u001b[A\n",
      " 22%|██▏       | 52/239 [00:00<00:01, 168.35it/s]\u001b[A\n",
      " 29%|██▉       | 70/239 [00:00<00:00, 171.25it/s]\u001b[A\n",
      " 37%|███▋      | 88/239 [00:00<00:00, 171.46it/s]\u001b[A\n",
      " 44%|████▍     | 106/239 [00:00<00:00, 171.19it/s]\u001b[A\n",
      " 52%|█████▏    | 124/239 [00:00<00:00, 172.15it/s]\u001b[A\n",
      " 59%|█████▉    | 142/239 [00:00<00:00, 171.99it/s]\u001b[A\n",
      " 67%|██████▋   | 161/239 [00:00<00:00, 174.37it/s]\u001b[A\n",
      " 75%|███████▍  | 179/239 [00:01<00:00, 174.33it/s]\u001b[A\n",
      " 82%|████████▏ | 196/239 [00:01<00:00, 158.14it/s]\u001b[A\n",
      " 90%|████████▉ | 214/239 [00:01<00:00, 161.79it/s]\u001b[A\n",
      "100%|██████████| 239/239 [00:01<00:00, 168.69it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 15296 dev_label_list: 15296 example_id_list: 15296\n",
      "num: 15296\n",
      "n: 3262\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 35%|███▍      | 1005/2877 [00:20<03:48,  8.20it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.3258736971183323\n",
      "f1: 0.7617064708573043\n",
      "Test Loss: 0.008117, Acc: 0.753334\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 52%|█████▏    | 1493/2877 [00:27<00:19, 69.73it/s]\n",
      "  0%|          | 0/239 [00:00<?, ?it/s]\u001b[A\n",
      "  7%|▋         | 16/239 [00:00<00:01, 157.46it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.5519\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 32/239 [00:00<00:01, 157.97it/s]\u001b[A\n",
      " 18%|█▊        | 44/239 [00:00<00:01, 142.88it/s]\u001b[A\n",
      " 26%|██▌       | 62/239 [00:00<00:01, 151.13it/s]\u001b[A\n",
      " 31%|███       | 74/239 [00:00<00:01, 139.23it/s]\u001b[A\n",
      " 38%|███▊      | 90/239 [00:00<00:01, 143.24it/s]\u001b[A\n",
      " 45%|████▍     | 107/239 [00:00<00:00, 149.74it/s]\u001b[A\n",
      " 52%|█████▏    | 124/239 [00:00<00:00, 155.16it/s]\u001b[A\n",
      " 59%|█████▉    | 142/239 [00:00<00:00, 160.20it/s]\u001b[A\n",
      " 67%|██████▋   | 160/239 [00:01<00:00, 164.88it/s]\u001b[A\n",
      " 74%|███████▍  | 178/239 [00:01<00:00, 167.46it/s]\u001b[A\n",
      " 82%|████████▏ | 196/239 [00:01<00:00, 168.99it/s]\u001b[A\n",
      " 90%|████████▉ | 214/239 [00:01<00:00, 170.10it/s]\u001b[A\n",
      "100%|██████████| 239/239 [00:01<00:00, 161.62it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 15296 dev_label_list: 15296 example_id_list: 15296\n",
      "num: 15296\n",
      "n: 3262\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 52%|█████▏    | 1505/2877 [00:30<02:03, 11.11it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.32893930104230534\n",
      "f1: 0.7504158029449303\n",
      "Test Loss: 0.008082, Acc: 0.753073\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 69%|██████▉   | 1995/2877 [00:37<00:12, 68.35it/s]\n",
      "  0%|          | 0/239 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 15/239 [00:00<00:01, 148.73it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.5571\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 32/239 [00:00<00:01, 152.75it/s]\u001b[A\n",
      " 20%|██        | 48/239 [00:00<00:01, 154.80it/s]\u001b[A\n",
      " 27%|██▋       | 65/239 [00:00<00:01, 158.44it/s]\u001b[A\n",
      " 34%|███▍      | 82/239 [00:00<00:00, 159.50it/s]\u001b[A\n",
      " 41%|████▏     | 99/239 [00:00<00:00, 160.97it/s]\u001b[A\n",
      " 49%|████▉     | 117/239 [00:00<00:00, 163.70it/s]\u001b[A\n",
      " 56%|█████▌    | 134/239 [00:00<00:00, 164.49it/s]\u001b[A\n",
      " 63%|██████▎   | 151/239 [00:00<00:00, 165.37it/s]\u001b[A\n",
      " 70%|███████   | 168/239 [00:01<00:00, 165.35it/s]\u001b[A\n",
      " 77%|███████▋  | 185/239 [00:01<00:00, 164.31it/s]\u001b[A\n",
      " 85%|████████▍ | 202/239 [00:01<00:00, 164.49it/s]\u001b[A\n",
      " 92%|█████████▏| 219/239 [00:01<00:00, 164.90it/s]\u001b[A\n",
      "100%|██████████| 239/239 [00:01<00:00, 163.51it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 15296 dev_label_list: 15296 example_id_list: 15296\n",
      "num: 15296\n",
      "n: 3262\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 70%|██████▉   | 2002/2877 [00:40<01:46,  8.18it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.3338442673206622\n",
      "f1: 0.7461249306589632\n",
      "Test Loss: 0.008082, Acc: 0.752288\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 87%|████████▋ | 2499/2877 [00:47<00:05, 66.10it/s]\n",
      "  0%|          | 0/239 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▌         | 14/239 [00:00<00:01, 134.43it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.4408\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 30/239 [00:00<00:01, 139.07it/s]\u001b[A\n",
      " 19%|█▉        | 46/239 [00:00<00:01, 143.64it/s]\u001b[A\n",
      " 26%|██▌       | 62/239 [00:00<00:01, 147.83it/s]\u001b[A\n",
      " 33%|███▎      | 78/239 [00:00<00:01, 142.49it/s]\u001b[A\n",
      " 38%|███▊      | 91/239 [00:00<00:01, 137.59it/s]\u001b[A\n",
      " 44%|████▍     | 106/239 [00:00<00:00, 140.39it/s]\u001b[A\n",
      " 51%|█████     | 122/239 [00:00<00:00, 144.14it/s]\u001b[A\n",
      " 58%|█████▊    | 138/239 [00:00<00:00, 147.52it/s]\u001b[A\n",
      " 64%|██████▍   | 154/239 [00:01<00:00, 150.43it/s]\u001b[A\n",
      " 71%|███████   | 170/239 [00:01<00:00, 152.61it/s]\u001b[A\n",
      " 78%|███████▊  | 186/239 [00:01<00:00, 151.80it/s]\u001b[A\n",
      " 85%|████████▍ | 202/239 [00:01<00:00, 148.37it/s]\u001b[A\n",
      " 91%|█████████ | 217/239 [00:01<00:00, 147.16it/s]\u001b[A\n",
      "100%|██████████| 239/239 [00:01<00:00, 147.75it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 15296 dev_label_list: 15296 example_id_list: 15296\n",
      "num: 15296\n",
      "n: 3262\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 87%|████████▋ | 2506/2877 [00:50<00:47,  7.75it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.31882280809319435\n",
      "f1: 0.7596257530193816\n",
      "Test Loss: 0.008124, Acc: 0.752680\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 2877/2877 [00:56<00:00, 51.29it/s]\n",
      "  6%|▋         | 15/239 [00:00<00:01, 145.48it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 239/239 [00:01<00:00, 162.47it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 15296 dev_label_list: 15296 example_id_list: 15296\n",
      "num: 15296\n",
      "n: 3262\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/2877 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.3237277743715512\n",
      "f1: 0.7554414469650571\n",
      "Test Loss: 0.008057, Acc: 0.753334\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 17%|█▋        | 495/2877 [00:07<00:37, 63.28it/s]\n",
      "  0%|          | 0/239 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▌         | 13/239 [00:00<00:01, 122.22it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.3729\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 12%|█▏        | 28/239 [00:00<00:01, 128.93it/s]\u001b[A\n",
      " 18%|█▊        | 43/239 [00:00<00:01, 133.48it/s]\u001b[A\n",
      " 25%|██▌       | 60/239 [00:00<00:01, 141.02it/s]\u001b[A\n",
      " 32%|███▏      | 76/239 [00:00<00:01, 143.71it/s]\u001b[A\n",
      " 37%|███▋      | 89/239 [00:00<00:01, 126.62it/s]\u001b[A\n",
      " 44%|████▎     | 104/239 [00:00<00:01, 131.66it/s]\u001b[A\n",
      " 50%|████▉     | 119/239 [00:00<00:00, 136.40it/s]\u001b[A\n",
      " 56%|█████▋    | 135/239 [00:00<00:00, 142.23it/s]\u001b[A\n",
      " 63%|██████▎   | 151/239 [00:01<00:00, 146.09it/s]\u001b[A\n",
      " 70%|██████▉   | 167/239 [00:01<00:00, 149.56it/s]\u001b[A\n",
      " 76%|███████▌  | 182/239 [00:01<00:00, 147.26it/s]\u001b[A\n",
      " 83%|████████▎ | 198/239 [00:01<00:00, 149.16it/s]\u001b[A\n",
      " 90%|████████▉ | 215/239 [00:01<00:00, 152.60it/s]\u001b[A\n",
      "100%|██████████| 239/239 [00:01<00:00, 146.88it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 15296 dev_label_list: 15296 example_id_list: 15296\n",
      "num: 15296\n",
      "n: 3262\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 17%|█▋        | 502/2877 [00:10<05:14,  7.54it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.3157572041692213\n",
      "f1: 0.7611420104522553\n",
      "Test Loss: 0.008134, Acc: 0.751504\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 35%|███▍      | 994/2877 [00:17<00:29, 64.30it/s]\n",
      "  0%|          | 0/239 [00:00<?, ?it/s]\u001b[A\n",
      "  6%|▋         | 15/239 [00:00<00:01, 145.88it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.5325\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 13%|█▎        | 31/239 [00:00<00:01, 148.46it/s]\u001b[A\n",
      " 20%|█▉        | 47/239 [00:00<00:01, 151.23it/s]\u001b[A\n",
      " 27%|██▋       | 64/239 [00:00<00:01, 154.34it/s]\u001b[A\n",
      " 33%|███▎      | 80/239 [00:00<00:01, 155.44it/s]\u001b[A\n",
      " 41%|████      | 97/239 [00:00<00:00, 157.82it/s]\u001b[A\n",
      " 47%|████▋     | 113/239 [00:00<00:00, 157.85it/s]\u001b[A\n",
      " 54%|█████▍    | 129/239 [00:00<00:00, 158.27it/s]\u001b[A\n",
      " 61%|██████    | 145/239 [00:00<00:00, 158.11it/s]\u001b[A\n",
      " 68%|██████▊   | 162/239 [00:01<00:00, 159.95it/s]\u001b[A\n",
      " 75%|███████▍  | 179/239 [00:01<00:00, 159.01it/s]\u001b[A\n",
      " 82%|████████▏ | 195/239 [00:01<00:00, 157.30it/s]\u001b[A\n",
      " 89%|████████▊ | 212/239 [00:01<00:00, 160.02it/s]\u001b[A\n",
      "100%|██████████| 239/239 [00:01<00:00, 158.51it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 15296 dev_label_list: 15296 example_id_list: 15296\n",
      "num: 15296\n",
      "n: 3262\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 35%|███▍      | 1001/2877 [00:22<06:40,  4.68it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.32924586143470264\n",
      "f1: 0.7607794906231495\n",
      "Test Loss: 0.008111, Acc: 0.754772\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 52%|█████▏    | 1498/2877 [00:30<00:20, 66.28it/s]\n",
      "  0%|          | 0/239 [00:00<?, ?it/s]\u001b[A\n",
      "  7%|▋         | 17/239 [00:00<00:01, 161.83it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.554\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 14%|█▍        | 34/239 [00:00<00:01, 162.41it/s]\u001b[A\n",
      " 21%|██▏       | 51/239 [00:00<00:01, 164.13it/s]\u001b[A\n",
      " 29%|██▉       | 69/239 [00:00<00:01, 167.11it/s]\u001b[A\n",
      " 36%|███▌      | 86/239 [00:00<00:00, 167.32it/s]\u001b[A\n",
      " 43%|████▎     | 103/239 [00:00<00:00, 168.04it/s]\u001b[A\n",
      " 50%|█████     | 120/239 [00:00<00:00, 168.39it/s]\u001b[A\n",
      " 58%|█████▊    | 138/239 [00:00<00:00, 169.88it/s]\u001b[A\n",
      " 65%|██████▌   | 156/239 [00:00<00:00, 170.29it/s]\u001b[A\n",
      " 72%|███████▏  | 173/239 [00:01<00:00, 169.46it/s]\u001b[A\n",
      " 79%|███████▉  | 190/239 [00:01<00:00, 169.20it/s]\u001b[A\n",
      " 87%|████████▋ | 208/239 [00:01<00:00, 170.25it/s]\u001b[A\n",
      "100%|██████████| 239/239 [00:01<00:00, 169.58it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 15296 dev_label_list: 15296 example_id_list: 15296\n",
      "num: 15296\n",
      "n: 3262\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 52%|█████▏    | 1505/2877 [00:32<02:44,  8.36it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.3277130594727161\n",
      "f1: 0.7517211176316061\n",
      "Test Loss: 0.008130, Acc: 0.751765\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 69%|██████▉   | 1994/2877 [00:40<00:12, 68.50it/s]\n",
      "  0%|          | 0/239 [00:00<?, ?it/s]\u001b[A\n",
      "  7%|▋         | 17/239 [00:00<00:01, 163.90it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2000, loss: 0.5517\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 15%|█▍        | 35/239 [00:00<00:01, 166.00it/s]\u001b[A\n",
      " 22%|██▏       | 52/239 [00:00<00:01, 166.29it/s]\u001b[A\n",
      " 29%|██▉       | 70/239 [00:00<00:00, 169.12it/s]\u001b[A\n",
      " 35%|███▍      | 83/239 [00:00<00:01, 132.03it/s]\u001b[A\n",
      " 41%|████      | 97/239 [00:00<00:01, 133.07it/s]\u001b[A\n",
      " 47%|████▋     | 113/239 [00:00<00:00, 139.40it/s]\u001b[A\n",
      " 54%|█████▍    | 129/239 [00:00<00:00, 143.01it/s]\u001b[A\n",
      " 61%|██████    | 145/239 [00:00<00:00, 146.87it/s]\u001b[A\n",
      " 67%|██████▋   | 161/239 [00:01<00:00, 149.82it/s]\u001b[A\n",
      " 74%|███████▍  | 177/239 [00:01<00:00, 150.22it/s]\u001b[A\n",
      " 81%|████████  | 193/239 [00:01<00:00, 150.97it/s]\u001b[A\n",
      " 87%|████████▋ | 209/239 [00:01<00:00, 152.94it/s]\u001b[A\n",
      "100%|██████████| 239/239 [00:01<00:00, 151.17it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 15296 dev_label_list: 15296 example_id_list: 15296\n",
      "num: 15296\n",
      "n: 3262\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      " 70%|██████▉   | 2001/2877 [00:43<01:54,  7.65it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.33108522378908645\n",
      "f1: 0.7543935456872355\n",
      "Test Loss: 0.008074, Acc: 0.753923\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 87%|████████▋ | 2498/2877 [00:51<00:06, 62.02it/s]\n",
      "  0%|          | 0/239 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▍         | 11/239 [00:00<00:02, 102.61it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 2500, loss: 0.4385\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 10%|█         | 24/239 [00:00<00:01, 108.69it/s]\u001b[A\n",
      " 15%|█▌        | 37/239 [00:00<00:01, 111.97it/s]\u001b[A\n",
      " 21%|██▏       | 51/239 [00:00<00:01, 118.38it/s]\u001b[A\n",
      " 27%|██▋       | 65/239 [00:00<00:01, 123.96it/s]\u001b[A\n",
      " 33%|███▎      | 78/239 [00:00<00:01, 125.04it/s]\u001b[A\n",
      " 39%|███▉      | 93/239 [00:00<00:01, 129.29it/s]\u001b[A\n",
      " 44%|████▍     | 106/239 [00:00<00:01, 128.90it/s]\u001b[A\n",
      " 51%|█████     | 121/239 [00:00<00:00, 132.09it/s]\u001b[A\n",
      " 57%|█████▋    | 136/239 [00:01<00:00, 135.43it/s]\u001b[A\n",
      " 63%|██████▎   | 151/239 [00:01<00:00, 137.84it/s]\u001b[A\n",
      " 69%|██████▉   | 165/239 [00:01<00:00, 138.27it/s]\u001b[A\n",
      " 75%|███████▍  | 179/239 [00:01<00:00, 137.09it/s]\u001b[A\n",
      " 81%|████████  | 193/239 [00:01<00:00, 136.60it/s]\u001b[A\n",
      " 87%|████████▋ | 207/239 [00:01<00:00, 137.58it/s]\u001b[A\n",
      " 92%|█████████▏| 221/239 [00:01<00:00, 137.90it/s]\u001b[A\n",
      "100%|██████████| 239/239 [00:01<00:00, 133.62it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 15296 dev_label_list: 15296 example_id_list: 15296\n",
      "num: 15296\n",
      "n: 3262\n",
      "em: 0.31667688534641325\n",
      "f1: 0.7608082002472005\n",
      "Test Loss: 0.008159, Acc: 0.750785\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 2877/2877 [01:00<00:00, 47.77it/s]\n",
      "  5%|▌         | 12/239 [00:00<00:01, 113.90it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 239/239 [00:01<00:00, 144.14it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 15296 dev_label_list: 15296 example_id_list: 15296\n",
      "num: 15296\n",
      "n: 3262\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "  0%|          | 0/2877 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.3234212139791539\n",
      "f1: 0.7345364612225481\n",
      "Test Loss: 0.008127, Acc: 0.751504\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 17%|█▋        | 495/2877 [00:07<00:40, 59.09it/s]\n",
      "  0%|          | 0/239 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▌         | 12/239 [00:00<00:01, 117.83it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.3893\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 11%|█▏        | 27/239 [00:00<00:01, 125.25it/s]\u001b[A\n",
      " 18%|█▊        | 42/239 [00:00<00:01, 130.68it/s]\u001b[A\n",
      " 25%|██▍       | 59/239 [00:00<00:01, 140.05it/s]\u001b[A\n",
      " 31%|███       | 74/239 [00:00<00:01, 142.24it/s]\u001b[A\n",
      " 37%|███▋      | 89/239 [00:00<00:01, 144.21it/s]\u001b[A\n",
      " 44%|████▎     | 104/239 [00:00<00:00, 143.79it/s]\u001b[A\n",
      " 50%|█████     | 120/239 [00:00<00:00, 146.75it/s]\u001b[A\n",
      " 57%|█████▋    | 137/239 [00:00<00:00, 150.07it/s]\u001b[A\n",
      " 64%|██████▍   | 154/239 [00:01<00:00, 153.73it/s]\u001b[A\n",
      " 71%|███████   | 170/239 [00:01<00:00, 155.52it/s]\u001b[A\n",
      " 78%|███████▊  | 186/239 [00:01<00:00, 155.65it/s]\u001b[A\n",
      " 85%|████████▍ | 202/239 [00:01<00:00, 156.62it/s]\u001b[A\n",
      " 91%|█████████ | 218/239 [00:01<00:00, 156.67it/s]\u001b[A\n",
      "100%|██████████| 239/239 [00:01<00:00, 152.33it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 15296 dev_label_list: 15296 example_id_list: 15296\n",
      "num: 15296\n",
      "n: 3262\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 18%|█▊        | 509/2877 [00:10<03:46, 10.48it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.32127529123237275\n",
      "f1: 0.7555227097992328\n",
      "Test Loss: 0.008088, Acc: 0.752027\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 35%|███▍      | 994/2877 [00:18<00:25, 74.57it/s]\n",
      "  0%|          | 0/239 [00:00<?, ?it/s]\u001b[A\n",
      "  5%|▌         | 13/239 [00:00<00:01, 121.59it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.5298\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 10%|█         | 25/239 [00:00<00:01, 120.41it/s]\u001b[A\n",
      " 17%|█▋        | 40/239 [00:00<00:01, 126.29it/s]\u001b[A\n",
      " 24%|██▍       | 57/239 [00:00<00:01, 135.97it/s]\u001b[A\n",
      " 31%|███       | 73/239 [00:00<00:01, 140.18it/s]\u001b[A\n",
      " 37%|███▋      | 89/239 [00:00<00:01, 144.84it/s]\u001b[A\n",
      " 44%|████▍     | 105/239 [00:00<00:00, 146.73it/s]\u001b[A\n",
      " 51%|█████     | 121/239 [00:00<00:00, 149.68it/s]\u001b[A\n",
      " 57%|█████▋    | 137/239 [00:00<00:00, 152.12it/s]\u001b[A\n",
      " 64%|██████▍   | 153/239 [00:01<00:00, 151.81it/s]\u001b[A\n",
      " 71%|███████   | 169/239 [00:01<00:00, 152.00it/s]\u001b[A\n",
      " 77%|███████▋  | 185/239 [00:01<00:00, 152.82it/s]\u001b[A\n",
      " 84%|████████▍ | 201/239 [00:01<00:00, 150.57it/s]\u001b[A\n",
      " 91%|█████████ | 218/239 [00:01<00:00, 154.09it/s]\u001b[A\n",
      "100%|██████████| 239/239 [00:01<00:00, 149.61it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 15296 dev_label_list: 15296 example_id_list: 15296\n",
      "num: 15296\n",
      "n: 3262\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 35%|███▍      | 1002/2877 [00:21<00:39, 47.55it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "em: 0.33016554261189457\n",
      "f1: 0.7619288487927407\n",
      "Test Loss: 0.008118, Acc: 0.754576\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-62-9182c0f793f3>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m#sent6verb4\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0mepoch_num\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mtrain_code_confing\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_loader\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdev_loader\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mepoch_num\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m<ipython-input-59-36cf930af26f>\u001b[0m in \u001b[0;36mtrain_code_confing\u001b[0;34m(train_loader, dev_loader, epoch_num)\u001b[0m\n\u001b[1;32m     76\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     77\u001b[0m                 \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mzero_grad\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 78\u001b[0;31m                 \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     79\u001b[0m                 \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     80\u001b[0m                 \u001b[0mepoch\u001b[0m\u001b[0;34m+=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.conda/envs/learn_py3/lib/python3.7/site-packages/torch/tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph)\u001b[0m\n\u001b[1;32m    116\u001b[0m                 \u001b[0mproducts\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mDefaults\u001b[0m \u001b[0mto\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    117\u001b[0m         \"\"\"\n\u001b[0;32m--> 118\u001b[0;31m         \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    119\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    120\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mregister_hook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.conda/envs/learn_py3/lib/python3.7/site-packages/torch/autograd/__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables)\u001b[0m\n\u001b[1;32m     91\u001b[0m     Variable._execution_engine.run_backward(\n\u001b[1;32m     92\u001b[0m         \u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrad_tensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 93\u001b[0;31m         allow_unreachable=True)  # allow_unreachable flag\n\u001b[0m\u001b[1;32m     94\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     95\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "#sent6verb4\n",
    "epoch_num = 10\n",
    "train_code_confing(train_loader,dev_loader,epoch_num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 25%|██▌       | 494/1967 [00:07<00:21, 67.63it/s]\n",
      "  0%|          | 0/168 [00:00<?, ?it/s]\u001b[A\n",
      " 10%|▉         | 16/168 [00:00<00:00, 159.44it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.5191\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 20%|█▉        | 33/168 [00:00<00:00, 161.11it/s]\u001b[A\n",
      " 29%|██▉       | 49/168 [00:00<00:00, 158.53it/s]\u001b[A\n",
      " 39%|███▉      | 66/168 [00:00<00:00, 156.61it/s]\u001b[A\n",
      " 48%|████▊     | 80/168 [00:00<00:00, 149.27it/s]\u001b[A\n",
      " 58%|█████▊    | 97/168 [00:00<00:00, 152.95it/s]\u001b[A\n",
      " 67%|██████▋   | 113/168 [00:00<00:00, 153.54it/s]\u001b[A\n",
      " 77%|███████▋  | 130/168 [00:00<00:00, 155.75it/s]\u001b[A\n",
      " 87%|████████▋ | 146/168 [00:00<00:00, 156.47it/s]\u001b[A\n",
      "100%|██████████| 168/168 [00:01<00:00, 155.39it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 10752 dev_label_list: 10752 example_id_list: 10752\n",
      "num: 10752\n",
      "n: 2336\n",
      "em: 0.3043664383561644\n",
      "f1: 0.6810267857142863\n",
      "Test Loss: 0.008754, Acc: 0.735026\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 51%|█████     | 998/1967 [00:17<00:16, 59.14it/s]\n",
      "  0%|          | 0/168 [00:00<?, ?it/s]\u001b[A\n",
      " 11%|█▏        | 19/168 [00:00<00:00, 180.65it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.5009\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 21%|██        | 35/168 [00:00<00:00, 172.53it/s]\u001b[A\n",
      " 30%|███       | 51/168 [00:00<00:00, 168.33it/s]\u001b[A\n",
      " 41%|████      | 69/168 [00:00<00:00, 169.59it/s]\u001b[A\n",
      " 52%|█████▏    | 87/168 [00:00<00:00, 171.26it/s]\u001b[A\n",
      " 62%|██████▎   | 105/168 [00:00<00:00, 171.39it/s]\u001b[A\n",
      " 73%|███████▎  | 122/168 [00:00<00:00, 170.77it/s]\u001b[A\n",
      " 83%|████████▎ | 140/168 [00:00<00:00, 171.35it/s]\u001b[A\n",
      "100%|██████████| 168/168 [00:00<00:00, 170.29it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 10752 dev_label_list: 10752 example_id_list: 10752\n",
      "num: 10752\n",
      "n: 2336\n",
      "em: 0.3377568493150685\n",
      "f1: 0.7485305908893223\n",
      "Test Loss: 0.008145, Acc: 0.751023\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 76%|███████▌  | 1499/1967 [00:26<00:07, 65.53it/s]\n",
      "  0%|          | 0/168 [00:00<?, ?it/s]\u001b[A\n",
      " 11%|█         | 18/168 [00:00<00:00, 171.34it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1500, loss: 0.5506\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 21%|██▏       | 36/168 [00:00<00:00, 172.85it/s]\u001b[A\n",
      " 32%|███▏      | 54/168 [00:00<00:00, 174.25it/s]\u001b[A\n",
      " 43%|████▎     | 72/168 [00:00<00:00, 175.57it/s]\u001b[A\n",
      " 54%|█████▍    | 91/168 [00:00<00:00, 177.22it/s]\u001b[A\n",
      " 65%|██████▌   | 110/168 [00:00<00:00, 179.46it/s]\u001b[A\n",
      " 77%|███████▋  | 129/168 [00:00<00:00, 181.39it/s]\u001b[A\n",
      " 88%|████████▊ | 148/168 [00:00<00:00, 182.31it/s]\u001b[A\n",
      "100%|██████████| 168/168 [00:00<00:00, 180.14it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 10752 dev_label_list: 10752 example_id_list: 10752\n",
      "num: 10752\n",
      "n: 2336\n",
      "em: 0.3116438356164384\n",
      "f1: 0.7665270303326787\n",
      "Test Loss: 0.008288, Acc: 0.745257\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 1967/1967 [00:34<00:00, 56.48it/s]\n",
      " 11%|█▏        | 19/168 [00:00<00:00, 185.87it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 168/168 [00:00<00:00, 189.56it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 10752 dev_label_list: 10752 example_id_list: 10752\n",
      "num: 10752\n",
      "n: 2336\n",
      "em: 0.3261986301369863\n",
      "f1: 0.7145694036747116\n",
      "Test Loss: 0.008276, Acc: 0.745815\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/baiyq/.conda/envs/learn_py3/lib/python3.7/site-packages/torch/serialization.py:256: UserWarning: Couldn't retrieve source code for container of type conv_pos_power_net. It won't be checked for correctness upon loading.\n",
      "  \"type \" + obj.__name__ + \". It won't be checked \"\n",
      "/home/baiyq/.conda/envs/learn_py3/lib/python3.7/site-packages/torch/serialization.py:256: UserWarning: Couldn't retrieve source code for container of type conv_one_sent_layer. It won't be checked for correctness upon loading.\n",
      "  \"type \" + obj.__name__ + \". It won't be checked \"\n",
      "/home/baiyq/.conda/envs/learn_py3/lib/python3.7/site-packages/torch/serialization.py:256: UserWarning: Couldn't retrieve source code for container of type conv_article_layer. It won't be checked for correctness upon loading.\n",
      "  \"type \" + obj.__name__ + \". It won't be checked \"\n",
      " 25%|██▌       | 499/1967 [00:07<00:21, 67.56it/s]\n",
      "  0%|          | 0/168 [00:00<?, ?it/s]\u001b[A\n",
      " 11%|█         | 18/168 [00:00<00:00, 176.64it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 500, loss: 0.4959\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 21%|██▏       | 36/168 [00:00<00:00, 176.45it/s]\u001b[A\n",
      " 32%|███▏      | 54/168 [00:00<00:00, 176.38it/s]\u001b[A\n",
      " 43%|████▎     | 72/168 [00:00<00:00, 175.97it/s]\u001b[A\n",
      " 54%|█████▍    | 91/168 [00:00<00:00, 177.26it/s]\u001b[A\n",
      " 65%|██████▌   | 110/168 [00:00<00:00, 179.79it/s]\u001b[A\n",
      " 76%|███████▌  | 128/168 [00:00<00:00, 179.75it/s]\u001b[A\n",
      " 87%|████████▋ | 146/168 [00:00<00:00, 177.42it/s]\u001b[A\n",
      "100%|██████████| 168/168 [00:00<00:00, 176.97it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 10752 dev_label_list: 10752 example_id_list: 10752\n",
      "num: 10752\n",
      "n: 2336\n",
      "em: 0.3257705479452055\n",
      "f1: 0.7426444607523355\n",
      "Test Loss: 0.008127, Acc: 0.747024\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 51%|█████     | 995/1967 [00:16<00:15, 62.70it/s]\n",
      "  0%|          | 0/168 [00:00<?, ?it/s]\u001b[A\n",
      "  7%|▋         | 11/168 [00:00<00:01, 105.06it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 1000, loss: 0.4791\n",
      "load_function\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      " 14%|█▍        | 24/168 [00:00<00:01, 109.76it/s]\u001b[A\n",
      " 24%|██▍       | 41/168 [00:00<00:01, 122.17it/s]\u001b[A\n",
      " 35%|███▍      | 58/168 [00:00<00:00, 132.15it/s]\u001b[A\n",
      " 43%|████▎     | 73/168 [00:00<00:00, 133.93it/s]\u001b[A\n",
      " 52%|█████▏    | 87/168 [00:00<00:00, 134.58it/s]\u001b[A\n",
      " 62%|██████▏   | 104/168 [00:00<00:00, 141.78it/s]\u001b[A\n",
      " 72%|███████▏  | 121/168 [00:00<00:00, 146.25it/s]\u001b[A\n",
      " 82%|████████▏ | 138/168 [00:00<00:00, 150.96it/s]\u001b[A\n",
      "100%|██████████| 168/168 [00:01<00:00, 148.31it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "predict_label_list: 10752 dev_label_list: 10752 example_id_list: 10752\n",
      "num: 10752\n",
      "n: 2336\n",
      "em: 0.336472602739726\n",
      "f1: 0.7529218308327883\n",
      "Test Loss: 0.008082, Acc: 0.750837\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 66%|██████▋   | 1306/1967 [00:23<00:11, 55.90it/s]\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-36-652b4a757c1f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m#sent6verb3\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0mepoch_num\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mtrain_code_confing\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_loader\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdev_loader\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mepoch_num\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m<ipython-input-33-36cf930af26f>\u001b[0m in \u001b[0;36mtrain_code_confing\u001b[0;34m(train_loader, dev_loader, epoch_num)\u001b[0m\n\u001b[1;32m     47\u001b[0m                             \u001b[0mqu_article_power_sum\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     48\u001b[0m                             \u001b[0mcore_article_info\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 49\u001b[0;31m                             \u001b[0mqu_article_info_sum\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     50\u001b[0m \u001b[0;31m#                             data['flow_once_qu_article_power'].float().cuda(),\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     51\u001b[0m \u001b[0;31m#                             data['flow_once_qu_core_power'].float().cuda(),\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.conda/envs/learn_py3/lib/python3.7/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m    545\u001b[0m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    546\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 547\u001b[0;31m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    548\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mhook\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    549\u001b[0m             \u001b[0mhook_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m<ipython-input-17-974d6f1e6503>\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, core_graph, qu_graph, core_article_power, core_qu_info, core_qu_power, article_sum, qu_article_power_sum, core_article_info, qu_article_info_sum)\u001b[0m\n\u001b[1;32m     59\u001b[0m         \u001b[0mcore_in_qu_power\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mcore_in_qu_pos_i_power\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcqp_power_conv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcore_qu_power\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     60\u001b[0m         \u001b[0mqu_in_core_power\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mqu_in_core_pos_i_power\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcqi_info_conv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcore_qu_info\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 61\u001b[0;31m         \u001b[0mcore_sent_article_info\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mcore_sent_article_pos_i_info\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcore_article_info_conv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcore_article_info\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     62\u001b[0m         \u001b[0mqu_article_info\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mqu_article_pos_i_info\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mqu_article_info_conv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mqu_article_info_sum\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     63\u001b[0m \u001b[0;31m#         flow_once_article,flow_once_article_pos = self.flow_once_qu_article_power_conv(flow_once_qu_article_power)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.conda/envs/learn_py3/lib/python3.7/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m    545\u001b[0m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    546\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 547\u001b[0;31m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    548\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mhook\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    549\u001b[0m             \u001b[0mhook_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m<ipython-input-12-acaba342a1f2>\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, sent_graph)\u001b[0m\n\u001b[1;32m     48\u001b[0m                 sent_graph,):\n\u001b[1;32m     49\u001b[0m         \u001b[0;31m#print(\"self.sent_size\",self.sent_size)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 50\u001b[0;31m         \u001b[0marticle_power\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marticle_power_conv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msent_graph\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbatch_size\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msent_num\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msent_size\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     51\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     52\u001b[0m         \u001b[0ma_pos_i_power\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpos_power_conv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msent_graph\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbatch_size\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msent_num\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mverb_num\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m25\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.conda/envs/learn_py3/lib/python3.7/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m    545\u001b[0m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    546\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 547\u001b[0;31m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    548\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mhook\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    549\u001b[0m             \u001b[0mhook_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.conda/envs/learn_py3/lib/python3.7/site-packages/torch/nn/modules/container.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m     90\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     91\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mmodule\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_modules\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 92\u001b[0;31m             \u001b[0minput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     93\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     94\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.conda/envs/learn_py3/lib/python3.7/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m    545\u001b[0m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    546\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 547\u001b[0;31m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    548\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mhook\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    549\u001b[0m             \u001b[0mhook_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.conda/envs/learn_py3/lib/python3.7/site-packages/torch/nn/modules/conv.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m    341\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    342\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 343\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconv2d_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mweight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    344\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    345\u001b[0m \u001b[0;32mclass\u001b[0m \u001b[0mConv3d\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_ConvNd\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.conda/envs/learn_py3/lib/python3.7/site-packages/torch/nn/modules/conv.py\u001b[0m in \u001b[0;36mconv2d_forward\u001b[0;34m(self, input, weight)\u001b[0m\n\u001b[1;32m    338\u001b[0m                             _pair(0), self.dilation, self.groups)\n\u001b[1;32m    339\u001b[0m         return F.conv2d(input, weight, self.bias, self.stride,\n\u001b[0;32m--> 340\u001b[0;31m                         self.padding, self.dilation, self.groups)\n\u001b[0m\u001b[1;32m    341\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    342\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "#sent6verb3\n",
    "epoch_num = 10\n",
    "train_code_confing(train_loader,dev_loader,epoch_num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "learn_py3",
   "language": "python",
   "name": "learn_py3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
