{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import random\n",
    "import math\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "from torch.optim import SGD, RMSprop\n",
    "from torchvision import datasets, transforms\n",
    "dtype = torch.cuda.FloatTensor\n",
    "\n",
    "from src.components import *\n",
    "from src.optimizers import *\n",
    "from sklearn import preprocessing\n",
    "import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<torch._C.Generator at 0x7f08ad50be10>"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pickle\n",
    "\n",
    "random.seed(2)\n",
    "np.random.seed(2)\n",
    "torch.manual_seed(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Files already downloaded and verified\n",
      "Files already downloaded and verified\n"
     ]
    }
   ],
   "source": [
    "#https://towardsdatascience.com/handwritten-digit-mnist-pytorch-977b5338e627\n",
    "# dev = torch.device('cpu')\n",
    "dev = torch.device('cuda:7')\n",
    "\n",
    "transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,), (0.5,))])\n",
    "\n",
    "trainset = datasets.CIFAR10('PATH_TO_STORE_TRAINSET', download=True, train=True, transform=transform)\n",
    "valset = datasets.CIFAR10('PATH_TO_STORE_TESTSET', download=True, train=False, transform=transform)\n",
    "\n",
    "trainset = [(x.to(dev), torch.tensor(y, device=dev)) for x,y in trainset]\n",
    "valset = [(x.to(dev), torch.tensor(y, device=dev)) for x,y in valset]\n",
    "\n",
    "trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True)\n",
    "testloader = torch.utils.data.DataLoader(valset, batch_size=128, shuffle=True)\n",
    "N = len(trainset) #for training\n",
    "# N = len(valset) #for testing\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([128, 3, 32, 32])\n",
      "torch.Size([128])\n"
     ]
    }
   ],
   "source": [
    "dataiter = iter(testloader)\n",
    "images, labels = dataiter.next()\n",
    "\n",
    "print(images.shape)\n",
    "print(labels.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "###### First we run the model with SGD in order to find the MAP solution. Then we run it again with SGLD\n",
    "# in order to find the posterior distribution.\n",
    "\n",
    "model_arch_args = dict(\n",
    "#     num_inputs=iter(trainloader).next()[0].shape[-1]*iter(trainloader).next()[0].shape[-2]*iter(trainloader).next()[0].shape[1],\n",
    "#     num_outputs=10,\n",
    "#     num_layers=2,\n",
    "#     hidden_sizes=[50,50],\n",
    "#     activation_func=nn.ReLU,#nn.Tanh, #nn.ReLU,\n",
    "#     chain_length=4000,\n",
    "#     stochastic_biases=False,\n",
    "#     prior_std = 0.3,\n",
    "#     output_distribution=\"categorical\",\n",
    "#     output_dist_const_params=dict(), #scale=1.0),\n",
    ")\n",
    "\n",
    "sgd_model_args = dict(\n",
    "    group_by_layers=False,\n",
    "    use_random_groups=False,\n",
    "    use_permuted_groups=False,\n",
    "    max_groups=None,\n",
    "    dropout_prob=None,\n",
    "    **model_arch_args,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "sgd_model = BayesianResNet20(**sgd_model_args)\n",
    "sgd_model.initialize_optimizer(\n",
    "    update_determ=True, \n",
    "    update_stoch=True, \n",
    "#     lr=1e-8, #1e-5, \n",
    "    lr=1e-3, \n",
    "    rmsprop=True,\n",
    "    sgd=False, \n",
    "    sgld=False, \n",
    "    psgld=False,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import pickle\n",
    "# pickle.dump(sgd_model_args, open(\"./resnet20_sgd_model_params.pickle\", \"wb\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# dev = torch.device('cpu')\n",
    "\n",
    "# for images, labels in trainloader:\n",
    "#     images = images.to(dev)\n",
    "#     labels = labels.to(dev)\n",
    "\n",
    "#trainset = [(x.to(dev), torch.tensor(y, device=dev).unsqueeze(0)) for x,y in trainloader]\n",
    "\n",
    "sgd_model = sgd_model.to(dev)\n",
    "\n",
    "for n, t in sgd_model.tensor_dict.items():\n",
    "    if isinstance(t, StochasticTensor):\n",
    "        t.prior_dist.loc = t.prior_dist.loc.to(dev)\n",
    "        t.prior_dist.scale = t.prior_dist.scale.to(dev)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# for SGD\n",
    "def evaluation(lvi_model, testloader):\n",
    "    losses = []\n",
    "    cross_losses = []\n",
    "    accuracy = []\n",
    "    \n",
    "    for images, labels in testloader:\n",
    "        inner_cross_losses = []\n",
    "        inner_accuracy = []\n",
    "\n",
    "        loss, y_pred = sgd_model.evaluate(batch=(images, labels),\n",
    "                    N=N,\n",
    "                    num_samples=None,\n",
    "                    deterministic_weights=True)\n",
    "\n",
    "        losses.append(loss)\n",
    "        cross_loss = criterion(y_pred.squeeze(0), labels)\n",
    "        inner_cross_losses.append(cross_loss)\n",
    "        inner_accuracy.append((torch.max(y_pred.squeeze(0),-1).indices == labels).sum().item() / labels.size(0))\n",
    "\n",
    "        accuracy.append(sum(inner_accuracy)/len(inner_accuracy))\n",
    "        cross_losses.append(sum(inner_cross_losses)/len(inner_cross_losses))\n",
    "\n",
    "    print(\"EVALUATION with last weights -> Loss: {}, CrossEntropy: {}, Accuracy: {}\".format(sum(losses)/len(losses), sum(cross_losses)/len(cross_losses), sum(accuracy)/len(accuracy)))\n",
    "    return sum(accuracy)/len(accuracy)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time for the training: 13.524899244308472\n",
      "Iter 1 / 2000, Loss: 4398190775.0, CrossEntropy: 1.7171968221664429, Accuracy: 0.3552030051150895\n",
      "EVALUATION with last weights -> Loss: 9929406.0, CrossEntropy: 1.5665708780288696, Accuracy: 0.42325949367088606\n",
      "Elapsed time for the training: 12.989330768585205\n",
      "Iter 2 / 2000, Loss: 3252694610.0, CrossEntropy: 1.2589025497436523, Accuracy: 0.5364849744245525\n",
      "EVALUATION with last weights -> Loss: 7275478.0, CrossEntropy: 1.1486332416534424, Accuracy: 0.5795094936708861\n",
      "Elapsed time for the training: 12.941123723983765\n",
      "Iter 3 / 2000, Loss: 2654937957.0, CrossEntropy: 1.019898533821106, Accuracy: 0.6322570332480818\n",
      "EVALUATION with last weights -> Loss: 6189925.5, CrossEntropy: 0.9818255305290222, Accuracy: 0.646064082278481\n",
      "Elapsed time for the training: 13.017444133758545\n",
      "Iter 4 / 2000, Loss: 2299312330.75, CrossEntropy: 0.877677321434021, Accuracy: 0.6852621483375959\n",
      "EVALUATION with last weights -> Loss: 7470824.0, CrossEntropy: 1.1775100231170654, Accuracy: 0.5976068037974683\n",
      "Elapsed time for the training: 12.973476886749268\n",
      "Iter 5 / 2000, Loss: 2058760002.25, CrossEntropy: 0.781343400478363, Accuracy: 0.7229379795396419\n",
      "EVALUATION with last weights -> Loss: 5137048.5, CrossEntropy: 0.8134243488311768, Accuracy: 0.7057950949367089\n",
      "Elapsed time for the training: 12.99532961845398\n",
      "Iter 6 / 2000, Loss: 1823519350.25, CrossEntropy: 0.6875643134117126, Accuracy: 0.7571970907928388\n",
      "EVALUATION with last weights -> Loss: 4797837.0, CrossEntropy: 0.759497880935669, Accuracy: 0.7383306962025317\n",
      "Elapsed time for the training: 12.934016942977905\n",
      "Iter 7 / 2000, Loss: 1639515603.0, CrossEntropy: 0.613893449306488, Accuracy: 0.7840273337595908\n",
      "EVALUATION with last weights -> Loss: 5052206.5, CrossEntropy: 0.7986828684806824, Accuracy: 0.7249802215189873\n",
      "Elapsed time for the training: 12.940274000167847\n",
      "Iter 8 / 2000, Loss: 1496198455.0, CrossEntropy: 0.5564942359924316, Accuracy: 0.8064817774936062\n",
      "EVALUATION with last weights -> Loss: 4295439.5, CrossEntropy: 0.6848605275154114, Accuracy: 0.7587025316455697\n",
      "Elapsed time for the training: 12.927564859390259\n",
      "Iter 9 / 2000, Loss: 1378438457.0, CrossEntropy: 0.5092350244522095, Accuracy: 0.8220068734015346\n",
      "EVALUATION with last weights -> Loss: 5243880.0, CrossEntropy: 0.8264549374580383, Accuracy: 0.721815664556962\n",
      "Elapsed time for the training: 12.94193959236145\n",
      "Iter 10 / 2000, Loss: 1262547055.25, CrossEntropy: 0.4630450904369354, Accuracy: 0.8386628836317136\n",
      "EVALUATION with last weights -> Loss: 4211485.5, CrossEntropy: 0.6655711531639099, Accuracy: 0.7729430379746836\n",
      "Elapsed time for the training: 12.92422866821289\n",
      "Iter 11 / 2000, Loss: 1156549138.125, CrossEntropy: 0.42051777243614197, Accuracy: 0.8531609654731458\n",
      "EVALUATION with last weights -> Loss: 5679021.5, CrossEntropy: 0.8989041447639465, Accuracy: 0.7201344936708861\n",
      "Elapsed time for the training: 12.969390392303467\n",
      "Iter 12 / 2000, Loss: 1066188540.0, CrossEntropy: 0.38440144062042236, Accuracy: 0.8664122442455242\n",
      "EVALUATION with last weights -> Loss: 4075403.75, CrossEntropy: 0.644809365272522, Accuracy: 0.790743670886076\n",
      "Elapsed time for the training: 12.9381742477417\n",
      "Iter 13 / 2000, Loss: 994884201.75, CrossEntropy: 0.35594192147254944, Accuracy: 0.8746203644501279\n",
      "EVALUATION with last weights -> Loss: 3944963.75, CrossEntropy: 0.6256103515625, Accuracy: 0.7971716772151899\n",
      "Elapsed time for the training: 13.009807825088501\n",
      "Iter 14 / 2000, Loss: 906387329.625, CrossEntropy: 0.3204457759857178, Accuracy: 0.8887547953964194\n",
      "EVALUATION with last weights -> Loss: 3954612.0, CrossEntropy: 0.621357798576355, Accuracy: 0.7955893987341772\n",
      "Elapsed time for the training: 12.863816022872925\n",
      "Iter 15 / 2000, Loss: 835513946.0, CrossEntropy: 0.29242417216300964, Accuracy: 0.899292679028133\n",
      "EVALUATION with last weights -> Loss: 6117730.5, CrossEntropy: 0.9615879654884338, Accuracy: 0.7217167721518988\n",
      "Elapsed time for the training: 12.957035541534424\n",
      "Iter 16 / 2000, Loss: 764721810.5625, CrossEntropy: 0.2639200687408447, Accuracy: 0.9079243925831203\n",
      "EVALUATION with last weights -> Loss: 4509737.5, CrossEntropy: 0.7116613984107971, Accuracy: 0.7763053797468354\n",
      "Elapsed time for the training: 13.674482107162476\n",
      "Iter 17 / 2000, Loss: 692792852.125, CrossEntropy: 0.23506776988506317, Accuracy: 0.9181945332480819\n",
      "EVALUATION with last weights -> Loss: 4516173.0, CrossEntropy: 0.7139465808868408, Accuracy: 0.7857990506329114\n",
      "Elapsed time for the training: 13.295588254928589\n",
      "Iter 18 / 2000, Loss: 657905828.75, CrossEntropy: 0.2210628241300583, Accuracy: 0.9215672953964195\n",
      "EVALUATION with last weights -> Loss: 3995800.0, CrossEntropy: 0.6312944293022156, Accuracy: 0.8102254746835443\n",
      "Elapsed time for the training: 12.954472541809082\n",
      "Iter 19 / 2000, Loss: 595965293.625, CrossEntropy: 0.19638484716415405, Accuracy: 0.93048273657289\n",
      "EVALUATION with last weights -> Loss: 4442553.5, CrossEntropy: 0.7000661492347717, Accuracy: 0.7953916139240507\n",
      "Elapsed time for the training: 12.92839241027832\n",
      "Iter 20 / 2000, Loss: 560442566.4375, CrossEntropy: 0.18203093111515045, Accuracy: 0.9358655690537084\n",
      "EVALUATION with last weights -> Loss: 4703949.0, CrossEntropy: 0.7394821643829346, Accuracy: 0.7885680379746836\n",
      "Elapsed time for the training: 12.934704303741455\n",
      "Iter 21 / 2000, Loss: 510538467.625, CrossEntropy: 0.16209712624549866, Accuracy: 0.9441935741687979\n",
      "EVALUATION with last weights -> Loss: 4861735.0, CrossEntropy: 0.7622621655464172, Accuracy: 0.794501582278481\n",
      "Elapsed time for the training: 12.944993257522583\n",
      "Iter 22 / 2000, Loss: 482754163.0625, CrossEntropy: 0.15109629929065704, Accuracy: 0.9465672953964195\n",
      "EVALUATION with last weights -> Loss: 5690417.5, CrossEntropy: 0.9112294316291809, Accuracy: 0.7782832278481012\n",
      "Elapsed time for the training: 13.243091583251953\n",
      "Iter 23 / 2000, Loss: 453333890.65625, CrossEntropy: 0.13922053575515747, Accuracy: 0.9499000959079283\n",
      "EVALUATION with last weights -> Loss: 4705546.0, CrossEntropy: 0.7418228387832642, Accuracy: 0.8021162974683544\n",
      "Elapsed time for the training: 13.622480869293213\n",
      "Iter 24 / 2000, Loss: 423727894.875, CrossEntropy: 0.1275118589401245, Accuracy: 0.9545476342710997\n",
      "EVALUATION with last weights -> Loss: 5278950.0, CrossEntropy: 0.8293377757072449, Accuracy: 0.8008306962025317\n",
      "Elapsed time for the training: 12.869224071502686\n",
      "Iter 25 / 2000, Loss: 400044551.875, CrossEntropy: 0.11788440495729446, Accuracy: 0.9596747122762149\n",
      "EVALUATION with last weights -> Loss: 5692100.5, CrossEntropy: 0.8971152305603027, Accuracy: 0.7902492088607594\n",
      "Elapsed time for the training: 12.919637441635132\n",
      "Iter 26 / 2000, Loss: 389730779.5, CrossEntropy: 0.11375484615564346, Accuracy: 0.959554827365729\n",
      "EVALUATION with last weights -> Loss: 5295546.5, CrossEntropy: 0.8323841691017151, Accuracy: 0.8008306962025317\n",
      "Elapsed time for the training: 12.876338481903076\n",
      "Iter 27 / 2000, Loss: 361424857.71875, CrossEntropy: 0.1025136187672615, Accuracy: 0.963766783887468\n",
      "EVALUATION with last weights -> Loss: 5391270.0, CrossEntropy: 0.8486900329589844, Accuracy: 0.7990506329113924\n",
      "Elapsed time for the training: 12.910828113555908\n",
      "Iter 28 / 2000, Loss: 355138634.65625, CrossEntropy: 0.09987287223339081, Accuracy: 0.9643222506393863\n",
      "EVALUATION with last weights -> Loss: 6155276.0, CrossEntropy: 0.9685182571411133, Accuracy: 0.7890625\n",
      "Elapsed time for the training: 13.537118434906006\n",
      "Iter 29 / 2000, Loss: 333099126.3125, CrossEntropy: 0.09112077206373215, Accuracy: 0.9684502877237852\n",
      "EVALUATION with last weights -> Loss: 5372224.5, CrossEntropy: 0.8501298427581787, Accuracy: 0.8024129746835443\n",
      "Elapsed time for the training: 13.722550392150879\n",
      "Iter 30 / 2000, Loss: 326674114.15625, CrossEntropy: 0.0885312482714653, Accuracy: 0.9687140345268541\n",
      "EVALUATION with last weights -> Loss: 6160373.0, CrossEntropy: 0.9708824157714844, Accuracy: 0.7851068037974683\n",
      "Elapsed time for the training: 12.894378185272217\n",
      "Iter 31 / 2000, Loss: 313347281.96875, CrossEntropy: 0.08316273242235184, Accuracy: 0.9704164002557545\n",
      "EVALUATION with last weights -> Loss: 6028483.5, CrossEntropy: 0.9459625482559204, Accuracy: 0.8010284810126582\n",
      "Elapsed time for the training: 12.927350759506226\n",
      "Iter 32 / 2000, Loss: 299728471.71875, CrossEntropy: 0.07771282643079758, Accuracy: 0.9722506393861892\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with last weights -> Loss: 5736510.5, CrossEntropy: 0.8966503143310547, Accuracy: 0.8115110759493671\n",
      "Elapsed time for the training: 12.981561660766602\n",
      "Iter 33 / 2000, Loss: 298973896.25, CrossEntropy: 0.07741603255271912, Accuracy: 0.9718110613810741\n",
      "EVALUATION with last weights -> Loss: 6016308.5, CrossEntropy: 0.9559258818626404, Accuracy: 0.7942049050632911\n",
      "Elapsed time for the training: 12.939810514450073\n",
      "Iter 34 / 2000, Loss: 282792805.90625, CrossEntropy: 0.07101266831159592, Accuracy: 0.9747562340153453\n",
      "EVALUATION with last weights -> Loss: 7068336.0, CrossEntropy: 1.1186822652816772, Accuracy: 0.7756131329113924\n",
      "Elapsed time for the training: 12.895395278930664\n",
      "Iter 35 / 2000, Loss: 291226208.40625, CrossEntropy: 0.07435604929924011, Accuracy: 0.9733296035805626\n",
      "EVALUATION with last weights -> Loss: 6300226.0, CrossEntropy: 1.001921534538269, Accuracy: 0.7946004746835443\n",
      "Elapsed time for the training: 12.917261123657227\n",
      "Iter 36 / 2000, Loss: 272469270.34375, CrossEntropy: 0.06677786260843277, Accuracy: 0.9764905690537085\n",
      "EVALUATION with last weights -> Loss: 5931762.5, CrossEntropy: 0.9385310411453247, Accuracy: 0.7891613924050633\n",
      "Elapsed time for the training: 12.821725130081177\n",
      "Iter 37 / 2000, Loss: 261904494.6875, CrossEntropy: 0.06250030547380447, Accuracy: 0.9780410805626598\n",
      "EVALUATION with last weights -> Loss: 6227696.5, CrossEntropy: 0.9925687909126282, Accuracy: 0.7935126582278481\n",
      "Elapsed time for the training: 12.919508218765259\n",
      "Iter 38 / 2000, Loss: 259218895.40625, CrossEntropy: 0.06142307072877884, Accuracy: 0.9778212915601023\n",
      "EVALUATION with last weights -> Loss: 6132781.5, CrossEntropy: 0.9622385501861572, Accuracy: 0.8027096518987342\n",
      "Elapsed time for the training: 12.937499284744263\n",
      "Iter 39 / 2000, Loss: 258209561.625, CrossEntropy: 0.0610923133790493, Accuracy: 0.9786724744245524\n",
      "EVALUATION with last weights -> Loss: 6468055.5, CrossEntropy: 1.0201213359832764, Accuracy: 0.7983583860759493\n",
      "Elapsed time for the training: 12.908489465713501\n",
      "Iter 40 / 2000, Loss: 244032700.46875, CrossEntropy: 0.05540495365858078, Accuracy: 0.9802949168797954\n",
      "EVALUATION with last weights -> Loss: 6514933.0, CrossEntropy: 1.0253021717071533, Accuracy: 0.7974683544303798\n",
      "Elapsed time for the training: 12.907408714294434\n",
      "Iter 41 / 2000, Loss: 249139429.8125, CrossEntropy: 0.057403624057769775, Accuracy: 0.9796875\n",
      "EVALUATION with last weights -> Loss: 6479228.0, CrossEntropy: 1.0319299697875977, Accuracy: 0.7846123417721519\n",
      "Elapsed time for the training: 12.945927858352661\n",
      "Iter 42 / 2000, Loss: 242636523.15625, CrossEntropy: 0.05488383769989014, Accuracy: 0.9815696930946292\n",
      "EVALUATION with last weights -> Loss: 6330954.0, CrossEntropy: 0.9898462891578674, Accuracy: 0.7977650316455697\n",
      "Elapsed time for the training: 12.906975984573364\n",
      "Iter 43 / 2000, Loss: 237467937.03125, CrossEntropy: 0.052697792649269104, Accuracy: 0.9811061381074169\n",
      "EVALUATION with last weights -> Loss: 5936957.5, CrossEntropy: 0.9290887713432312, Accuracy: 0.8045886075949367\n",
      "Elapsed time for the training: 12.916301012039185\n",
      "Iter 44 / 2000, Loss: 236121255.5, CrossEntropy: 0.05221284553408623, Accuracy: 0.9814697890025575\n",
      "EVALUATION with last weights -> Loss: 6091345.0, CrossEntropy: 0.9676854014396667, Accuracy: 0.7989517405063291\n",
      "Elapsed time for the training: 12.852419376373291\n",
      "Iter 45 / 2000, Loss: 229934005.09375, CrossEntropy: 0.04968918114900589, Accuracy: 0.9833519820971868\n",
      "EVALUATION with last weights -> Loss: 6555848.5, CrossEntropy: 1.036340355873108, Accuracy: 0.7993473101265823\n",
      "Elapsed time for the training: 12.929436206817627\n",
      "Iter 46 / 2000, Loss: 232180940.96875, CrossEntropy: 0.050567980855703354, Accuracy: 0.9822650255754476\n",
      "EVALUATION with last weights -> Loss: 6327697.5, CrossEntropy: 1.0178974866867065, Accuracy: 0.7947982594936709\n",
      "Elapsed time for the training: 12.735810279846191\n",
      "Iter 47 / 2000, Loss: 225371582.6875, CrossEntropy: 0.04786866158246994, Accuracy: 0.9829803388746803\n",
      "EVALUATION with last weights -> Loss: 6561829.5, CrossEntropy: 1.0281473398208618, Accuracy: 0.8048852848101266\n",
      "Elapsed time for the training: 12.90502667427063\n",
      "Iter 48 / 2000, Loss: 223811422.5625, CrossEntropy: 0.047270797193050385, Accuracy: 0.9833559782608695\n",
      "EVALUATION with last weights -> Loss: 7046005.0, CrossEntropy: 1.1170686483383179, Accuracy: 0.7934137658227848\n",
      "Elapsed time for the training: 12.976499795913696\n",
      "Iter 49 / 2000, Loss: 221016203.53125, CrossEntropy: 0.04605771601200104, Accuracy: 0.9840553069053708\n",
      "EVALUATION with last weights -> Loss: 6595486.5, CrossEntropy: 1.0357333421707153, Accuracy: 0.7991495253164557\n",
      "Elapsed time for the training: 12.920805215835571\n",
      "Iter 50 / 2000, Loss: 208464653.21875, CrossEntropy: 0.04106045514345169, Accuracy: 0.9858296035805627\n",
      "EVALUATION with last weights -> Loss: 6496047.5, CrossEntropy: 1.0202414989471436, Accuracy: 0.803995253164557\n",
      "Elapsed time for the training: 12.896712064743042\n",
      "Iter 51 / 2000, Loss: 215476139.4375, CrossEntropy: 0.043850481510162354, Accuracy: 0.983755594629156\n",
      "EVALUATION with last weights -> Loss: 6447414.0, CrossEntropy: 1.025043249130249, Accuracy: 0.8071598101265823\n",
      "Elapsed time for the training: 12.938037872314453\n",
      "Iter 52 / 2000, Loss: 209706671.125, CrossEntropy: 0.04157312214374542, Accuracy: 0.9854020140664962\n",
      "EVALUATION with last weights -> Loss: 7246150.0, CrossEntropy: 1.1491568088531494, Accuracy: 0.7929193037974683\n",
      "Elapsed time for the training: 12.904146671295166\n",
      "Iter 53 / 2000, Loss: 211810890.125, CrossEntropy: 0.042439330369234085, Accuracy: 0.9850583439897698\n",
      "EVALUATION with last weights -> Loss: 6445416.5, CrossEntropy: 1.0350688695907593, Accuracy: 0.8116099683544303\n",
      "Elapsed time for the training: 12.936681747436523\n",
      "Iter 54 / 2000, Loss: 208251815.46875, CrossEntropy: 0.040930237621068954, Accuracy: 0.9860014386189259\n",
      "EVALUATION with last weights -> Loss: 6647279.0, CrossEntropy: 1.0556639432907104, Accuracy: 0.8043908227848101\n",
      "Elapsed time for the training: 12.908207654953003\n",
      "Iter 55 / 2000, Loss: 208375755.03125, CrossEntropy: 0.040949251502752304, Accuracy: 0.985613810741688\n",
      "EVALUATION with last weights -> Loss: 7466334.0, CrossEntropy: 1.173837423324585, Accuracy: 0.7931170886075949\n",
      "Elapsed time for the training: 12.91148042678833\n",
      "Iter 56 / 2000, Loss: 205499419.375, CrossEntropy: 0.039821017533540726, Accuracy: 0.9862212276214835\n",
      "EVALUATION with last weights -> Loss: 6372686.0, CrossEntropy: 1.0063761472702026, Accuracy: 0.8151700949367089\n",
      "Elapsed time for the training: 12.590372800827026\n",
      "Iter 57 / 2000, Loss: 200153739.40625, CrossEntropy: 0.03771341219544411, Accuracy: 0.9867487212276216\n",
      "EVALUATION with last weights -> Loss: 6916080.0, CrossEntropy: 1.0913044214248657, Accuracy: 0.8072587025316456\n",
      "Elapsed time for the training: 11.805779218673706\n",
      "Iter 58 / 2000, Loss: 198703223.4375, CrossEntropy: 0.03707335889339447, Accuracy: 0.9871003836317136\n",
      "EVALUATION with last weights -> Loss: 7074716.5, CrossEntropy: 1.1196352243423462, Accuracy: 0.8029074367088608\n",
      "Elapsed time for the training: 12.907159090042114\n",
      "Iter 59 / 2000, Loss: 199392020.71875, CrossEntropy: 0.03735440969467163, Accuracy: 0.9870404411764706\n",
      "EVALUATION with last weights -> Loss: 6747388.5, CrossEntropy: 1.0817558765411377, Accuracy: 0.8075553797468354\n",
      "Elapsed time for the training: 13.200151205062866\n",
      "Iter 60 / 2000, Loss: 197261665.03125, CrossEntropy: 0.036465298384428024, Accuracy: 0.9874320652173914\n",
      "EVALUATION with last weights -> Loss: 6603910.0, CrossEntropy: 1.0501902103424072, Accuracy: 0.8128955696202531\n",
      "Elapsed time for the training: 12.905923128128052\n",
      "Iter 61 / 2000, Loss: 186587461.3125, CrossEntropy: 0.03219624236226082, Accuracy: 0.9881114130434783\n",
      "EVALUATION with last weights -> Loss: 6653408.0, CrossEntropy: 1.0493923425674438, Accuracy: 0.8168512658227848\n",
      "Elapsed time for the training: 12.929418802261353\n",
      "Iter 62 / 2000, Loss: 191320153.5625, CrossEntropy: 0.034071844071149826, Accuracy: 0.988531010230179\n",
      "EVALUATION with last weights -> Loss: 6618531.5, CrossEntropy: 1.0386695861816406, Accuracy: 0.8106210443037974\n",
      "Elapsed time for the training: 12.930685043334961\n",
      "Iter 63 / 2000, Loss: 190114194.96875, CrossEntropy: 0.03361929580569267, Accuracy: 0.9882792519181586\n",
      "EVALUATION with last weights -> Loss: 6973038.5, CrossEntropy: 1.0898734331130981, Accuracy: 0.8020174050632911\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time for the training: 12.920931100845337\n",
      "Iter 64 / 2000, Loss: 187370851.84375, CrossEntropy: 0.03251371905207634, Accuracy: 0.9892143542199487\n",
      "EVALUATION with last weights -> Loss: 7707208.5, CrossEntropy: 1.2205740213394165, Accuracy: 0.802314082278481\n",
      "Elapsed time for the training: 12.920504570007324\n",
      "Iter 65 / 2000, Loss: 191438859.53125, CrossEntropy: 0.03416123986244202, Accuracy: 0.9881473785166242\n",
      "EVALUATION with last weights -> Loss: 7119917.5, CrossEntropy: 1.1213464736938477, Accuracy: 0.8095332278481012\n",
      "Elapsed time for the training: 12.968037605285645\n",
      "Iter 66 / 2000, Loss: 182837453.3125, CrossEntropy: 0.030646229162812233, Accuracy: 0.9892902813299232\n",
      "EVALUATION with last weights -> Loss: 7195739.5, CrossEntropy: 1.1355844736099243, Accuracy: 0.8033030063291139\n",
      "Elapsed time for the training: 12.90254259109497\n",
      "Iter 67 / 2000, Loss: 182160924.09375, CrossEntropy: 0.030428700149059296, Accuracy: 0.9895180626598465\n",
      "EVALUATION with last weights -> Loss: 6529725.5, CrossEntropy: 1.0212266445159912, Accuracy: 0.817939082278481\n",
      "Elapsed time for the training: 12.898857593536377\n",
      "Iter 68 / 2000, Loss: 188091162.78125, CrossEntropy: 0.03273366019129753, Accuracy: 0.98909047314578\n",
      "EVALUATION with last weights -> Loss: 7015654.5, CrossEntropy: 1.0985063314437866, Accuracy: 0.8036985759493671\n",
      "Elapsed time for the training: 13.011576175689697\n",
      "Iter 69 / 2000, Loss: 173683593.5, CrossEntropy: 0.026972496882081032, Accuracy: 0.9906689578005116\n",
      "EVALUATION with last weights -> Loss: 7098435.5, CrossEntropy: 1.128913402557373, Accuracy: 0.8089398734177216\n",
      "Elapsed time for the training: 12.867676734924316\n",
      "Iter 70 / 2000, Loss: 184508366.1875, CrossEntropy: 0.031320393085479736, Accuracy: 0.9886668797953965\n",
      "EVALUATION with last weights -> Loss: 7127410.5, CrossEntropy: 1.121148943901062, Accuracy: 0.8087420886075949\n",
      "Elapsed time for the training: 12.89986538887024\n",
      "Iter 71 / 2000, Loss: 187455076.0, CrossEntropy: 0.03256634622812271, Accuracy: 0.9882432864450128\n",
      "EVALUATION with last weights -> Loss: 7328910.0, CrossEntropy: 1.1589410305023193, Accuracy: 0.8078520569620253\n",
      "Elapsed time for the training: 12.923496961593628\n",
      "Iter 72 / 2000, Loss: 178485998.0, CrossEntropy: 0.0288761705160141, Accuracy: 0.9901894181585678\n",
      "EVALUATION with last weights -> Loss: 6640841.0, CrossEntropy: 1.0482083559036255, Accuracy: 0.8159612341772152\n",
      "Elapsed time for the training: 12.914557218551636\n",
      "Iter 73 / 2000, Loss: 179333714.53125, CrossEntropy: 0.029193395748734474, Accuracy: 0.9901294757033248\n",
      "EVALUATION with last weights -> Loss: 7314984.0, CrossEntropy: 1.1570968627929688, Accuracy: 0.8073575949367089\n",
      "Elapsed time for the training: 12.980047941207886\n",
      "Iter 74 / 2000, Loss: 184724169.03125, CrossEntropy: 0.03134806081652641, Accuracy: 0.9896978900255755\n",
      "EVALUATION with last weights -> Loss: 7213087.0, CrossEntropy: 1.152820348739624, Accuracy: 0.7997428797468354\n",
      "Elapsed time for the training: 12.884161710739136\n",
      "Iter 75 / 2000, Loss: 175524060.25, CrossEntropy: 0.02773386612534523, Accuracy: 0.9904851342710999\n",
      "EVALUATION with last weights -> Loss: 7796021.5, CrossEntropy: 1.2387088537216187, Accuracy: 0.7910403481012658\n",
      "Elapsed time for the training: 12.93562936782837\n",
      "Iter 76 / 2000, Loss: 170365840.71875, CrossEntropy: 0.02558240294456482, Accuracy: 0.9913682864450127\n",
      "EVALUATION with last weights -> Loss: 7302135.5, CrossEntropy: 1.1534290313720703, Accuracy: 0.8068631329113924\n",
      "Elapsed time for the training: 13.724176168441772\n",
      "Iter 77 / 2000, Loss: 178020513.5625, CrossEntropy: 0.02866237610578537, Accuracy: 0.990005594629156\n",
      "EVALUATION with last weights -> Loss: 6813302.5, CrossEntropy: 1.0778921842575073, Accuracy: 0.8183346518987342\n",
      "Elapsed time for the training: 13.001993656158447\n",
      "Iter 78 / 2000, Loss: 166580524.78125, CrossEntropy: 0.0240732841193676, Accuracy: 0.9916160485933504\n",
      "EVALUATION with last weights -> Loss: 7581452.5, CrossEntropy: 1.210752010345459, Accuracy: 0.8036985759493671\n",
      "Elapsed time for the training: 12.860556602478027\n",
      "Iter 79 / 2000, Loss: 175111811.65625, CrossEntropy: 0.027465734630823135, Accuracy: 0.9909167199488491\n",
      "EVALUATION with last weights -> Loss: 7780382.0, CrossEntropy: 1.2289036512374878, Accuracy: 0.801621835443038\n",
      "Elapsed time for the training: 12.884096384048462\n",
      "Iter 80 / 2000, Loss: 173209207.53125, CrossEntropy: 0.02670164592564106, Accuracy: 0.9909766624040921\n",
      "EVALUATION with last weights -> Loss: 6803116.0, CrossEntropy: 1.0647633075714111, Accuracy: 0.813192246835443\n",
      "Elapsed time for the training: 13.007128238677979\n",
      "Iter 81 / 2000, Loss: 171401216.40625, CrossEntropy: 0.02595408447086811, Accuracy: 0.9912484015345269\n",
      "EVALUATION with last weights -> Loss: 8516194.0, CrossEntropy: 1.356778860092163, Accuracy: 0.7931170886075949\n",
      "Elapsed time for the training: 12.909030199050903\n",
      "Iter 82 / 2000, Loss: 171149214.125, CrossEntropy: 0.025872686877846718, Accuracy: 0.9907169117647059\n",
      "EVALUATION with last weights -> Loss: 7031084.5, CrossEntropy: 1.0993174314498901, Accuracy: 0.8177412974683544\n",
      "Elapsed time for the training: 12.899860620498657\n",
      "Iter 83 / 2000, Loss: 164884011.25, CrossEntropy: 0.02333449386060238, Accuracy: 0.9919677109974424\n",
      "EVALUATION with last weights -> Loss: 7051787.5, CrossEntropy: 1.105797529220581, Accuracy: 0.8160601265822784\n",
      "Elapsed time for the training: 12.728193283081055\n",
      "Iter 84 / 2000, Loss: 164681311.625, CrossEntropy: 0.023243241012096405, Accuracy: 0.9917679028132992\n",
      "EVALUATION with last weights -> Loss: 7972665.5, CrossEntropy: 1.2563531398773193, Accuracy: 0.8043908227848101\n",
      "Elapsed time for the training: 11.773301124572754\n",
      "Iter 85 / 2000, Loss: 165695663.5, CrossEntropy: 0.02369077317416668, Accuracy: 0.9916959718670078\n",
      "EVALUATION with last weights -> Loss: 9054350.0, CrossEntropy: 1.4310039281845093, Accuracy: 0.7787776898734177\n",
      "Elapsed time for the training: 12.763100624084473\n",
      "Iter 86 / 2000, Loss: 171327839.375, CrossEntropy: 0.02590172551572323, Accuracy: 0.9908168158567775\n",
      "EVALUATION with last weights -> Loss: 8033195.0, CrossEntropy: 1.259863257408142, Accuracy: 0.8005340189873418\n",
      "Elapsed time for the training: 12.9098060131073\n",
      "Iter 87 / 2000, Loss: 161697701.53125, CrossEntropy: 0.022053705528378487, Accuracy: 0.992663043478261\n",
      "EVALUATION with last weights -> Loss: 7536659.0, CrossEntropy: 1.1779265403747559, Accuracy: 0.8156645569620253\n",
      "Elapsed time for the training: 14.57376480102539\n",
      "Iter 88 / 2000, Loss: 164371993.09375, CrossEntropy: 0.02308657206594944, Accuracy: 0.992247442455243\n",
      "EVALUATION with last weights -> Loss: 7244068.5, CrossEntropy: 1.1381529569625854, Accuracy: 0.8154667721518988\n",
      "Elapsed time for the training: 15.616447448730469\n",
      "Iter 89 / 2000, Loss: 157797986.0625, CrossEntropy: 0.02045263536274433, Accuracy: 0.9930666560102301\n",
      "EVALUATION with last weights -> Loss: 7872659.0, CrossEntropy: 1.2420117855072021, Accuracy: 0.8068631329113924\n",
      "Elapsed time for the training: 15.723249673843384\n",
      "Iter 90 / 2000, Loss: 165004794.90625, CrossEntropy: 0.02334360033273697, Accuracy: 0.991715952685422\n",
      "EVALUATION with last weights -> Loss: 7609056.5, CrossEntropy: 1.2089614868164062, Accuracy: 0.8069620253164557\n",
      "Elapsed time for the training: 15.233774900436401\n",
      "Iter 91 / 2000, Loss: 160312177.0625, CrossEntropy: 0.02144668810069561, Accuracy: 0.9925471547314578\n",
      "EVALUATION with last weights -> Loss: 7849343.5, CrossEntropy: 1.2477189302444458, Accuracy: 0.8059731012658228\n",
      "Elapsed time for the training: 15.615856885910034\n",
      "Iter 92 / 2000, Loss: 166444722.84375, CrossEntropy: 0.02388930693268776, Accuracy: 0.9916360294117648\n",
      "EVALUATION with last weights -> Loss: 7263254.0, CrossEntropy: 1.134957194328308, Accuracy: 0.8180379746835443\n",
      "Elapsed time for the training: 15.623974561691284\n",
      "Iter 93 / 2000, Loss: 156775041.34375, CrossEntropy: 0.02003628760576248, Accuracy: 0.993394341432225\n",
      "EVALUATION with last weights -> Loss: 7808127.5, CrossEntropy: 1.2244619131088257, Accuracy: 0.8065664556962026\n",
      "Elapsed time for the training: 15.496641159057617\n",
      "Iter 94 / 2000, Loss: 166932451.6875, CrossEntropy: 0.02412763610482216, Accuracy: 0.9915161445012788\n",
      "EVALUATION with last weights -> Loss: 8387788.5, CrossEntropy: 1.311470627784729, Accuracy: 0.8069620253164557\n",
      "Elapsed time for the training: 14.855451583862305\n",
      "Iter 95 / 2000, Loss: 162500585.625, CrossEntropy: 0.02229379303753376, Accuracy: 0.9924072890025576\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with last weights -> Loss: 7263891.5, CrossEntropy: 1.1540197134017944, Accuracy: 0.8193235759493671\n",
      "Elapsed time for the training: 12.922096014022827\n",
      "Iter 96 / 2000, Loss: 161821970.6875, CrossEntropy: 0.022027937695384026, Accuracy: 0.9924352621483377\n",
      "EVALUATION with last weights -> Loss: 8408996.0, CrossEntropy: 1.3245928287506104, Accuracy: 0.8067642405063291\n",
      "Elapsed time for the training: 12.843571662902832\n",
      "Iter 97 / 2000, Loss: 160014880.78125, CrossEntropy: 0.021268784999847412, Accuracy: 0.9926470588235294\n",
      "EVALUATION with last weights -> Loss: 7031946.0, CrossEntropy: 1.1121902465820312, Accuracy: 0.8220925632911392\n",
      "Elapsed time for the training: 12.896942853927612\n",
      "Iter 98 / 2000, Loss: 156376644.75, CrossEntropy: 0.01985589787364006, Accuracy: 0.993394341432225\n",
      "EVALUATION with last weights -> Loss: 7419030.0, CrossEntropy: 1.1684918403625488, Accuracy: 0.8142800632911392\n",
      "Elapsed time for the training: 12.929293632507324\n",
      "Iter 99 / 2000, Loss: 156203489.625, CrossEntropy: 0.019727611914277077, Accuracy: 0.9929467710997443\n",
      "EVALUATION with last weights -> Loss: 7392026.0, CrossEntropy: 1.1690102815628052, Accuracy: 0.8162579113924051\n",
      "Elapsed time for the training: 12.971511363983154\n",
      "Iter 100 / 2000, Loss: 165542879.90625, CrossEntropy: 0.023457719013094902, Accuracy: 0.9919477301790282\n",
      "EVALUATION with last weights -> Loss: 7689568.5, CrossEntropy: 1.2047955989837646, Accuracy: 0.8161590189873418\n",
      "Elapsed time for the training: 12.902860164642334\n",
      "Iter 101 / 2000, Loss: 151986395.71875, CrossEntropy: 0.018029892817139626, Accuracy: 0.9938259271099744\n",
      "EVALUATION with last weights -> Loss: 7211296.5, CrossEntropy: 1.1400901079177856, Accuracy: 0.821993670886076\n",
      "Elapsed time for the training: 12.90959620475769\n",
      "Iter 102 / 2000, Loss: 164320365.90625, CrossEntropy: 0.022969331592321396, Accuracy: 0.9920756074168798\n",
      "EVALUATION with last weights -> Loss: 7831903.0, CrossEntropy: 1.2455806732177734, Accuracy: 0.8025118670886076\n",
      "Elapsed time for the training: 12.896220922470093\n",
      "Iter 103 / 2000, Loss: 157459261.0625, CrossEntropy: 0.02022687904536724, Accuracy: 0.9928748401534527\n",
      "EVALUATION with last weights -> Loss: 8427010.0, CrossEntropy: 1.3272526264190674, Accuracy: 0.8041930379746836\n",
      "Elapsed time for the training: 12.880128145217896\n",
      "Iter 104 / 2000, Loss: 155788414.53125, CrossEntropy: 0.01952200010418892, Accuracy: 0.9933863491048593\n",
      "EVALUATION with last weights -> Loss: 7392770.5, CrossEntropy: 1.1689411401748657, Accuracy: 0.8182357594936709\n",
      "Elapsed time for the training: 12.948541402816772\n",
      "Iter 105 / 2000, Loss: 160460299.8125, CrossEntropy: 0.021396927535533905, Accuracy: 0.9929467710997443\n",
      "EVALUATION with last weights -> Loss: 7427411.5, CrossEntropy: 1.1743605136871338, Accuracy: 0.815565664556962\n",
      "Elapsed time for the training: 12.925816297531128\n",
      "Iter 106 / 2000, Loss: 154091989.59375, CrossEntropy: 0.018831031396985054, Accuracy: 0.9932664641943734\n",
      "EVALUATION with last weights -> Loss: 7403100.5, CrossEntropy: 1.1651825904846191, Accuracy: 0.8078520569620253\n",
      "Elapsed time for the training: 12.833741664886475\n",
      "Iter 107 / 2000, Loss: 152420320.15625, CrossEntropy: 0.018296491354703903, Accuracy: 0.9939218350383632\n",
      "EVALUATION with last weights -> Loss: 7883290.0, CrossEntropy: 1.2354552745819092, Accuracy: 0.8096321202531646\n",
      "Elapsed time for the training: 14.328346252441406\n",
      "Iter 108 / 2000, Loss: 155475378.53125, CrossEntropy: 0.01936044730246067, Accuracy: 0.9937060421994884\n",
      "EVALUATION with last weights -> Loss: 7369506.0, CrossEntropy: 1.1644067764282227, Accuracy: 0.8169501582278481\n",
      "Elapsed time for the training: 12.905969142913818\n",
      "Iter 109 / 2000, Loss: 155322559.5, CrossEntropy: 0.019297756254673004, Accuracy: 0.9937460038363172\n",
      "EVALUATION with last weights -> Loss: 7709503.5, CrossEntropy: 1.2250547409057617, Accuracy: 0.8091376582278481\n",
      "Elapsed time for the training: 12.924440622329712\n",
      "Iter 110 / 2000, Loss: 153669058.59375, CrossEntropy: 0.01863880828022957, Accuracy: 0.993394341432225\n",
      "EVALUATION with last weights -> Loss: 7878985.0, CrossEntropy: 1.2478153705596924, Accuracy: 0.8079509493670886\n",
      "Elapsed time for the training: 12.930346727371216\n",
      "Iter 111 / 2000, Loss: 149105974.59375, CrossEntropy: 0.016843192279338837, Accuracy: 0.9939617966751919\n",
      "EVALUATION with last weights -> Loss: 7756343.5, CrossEntropy: 1.2260514497756958, Accuracy: 0.8178401898734177\n",
      "Elapsed time for the training: 12.89767861366272\n",
      "Iter 112 / 2000, Loss: 152898521.40625, CrossEntropy: 0.018322253599762917, Accuracy: 0.9937140345268543\n",
      "EVALUATION with last weights -> Loss: 7802641.0, CrossEntropy: 1.2253940105438232, Accuracy: 0.8216969936708861\n",
      "Elapsed time for the training: 12.93696665763855\n",
      "Iter 113 / 2000, Loss: 152623550.34375, CrossEntropy: 0.01818181946873665, Accuracy: 0.9935262148337596\n",
      "EVALUATION with last weights -> Loss: 7443504.0, CrossEntropy: 1.16927170753479, Accuracy: 0.8232792721518988\n",
      "Elapsed time for the training: 12.894842624664307\n",
      "Iter 114 / 2000, Loss: 152571700.46875, CrossEntropy: 0.018170414492487907, Accuracy: 0.9936940537084399\n",
      "EVALUATION with last weights -> Loss: 7830675.5, CrossEntropy: 1.2283873558044434, Accuracy: 0.8181368670886076\n",
      "Elapsed time for the training: 12.90676736831665\n",
      "Iter 115 / 2000, Loss: 151761904.71875, CrossEntropy: 0.01782056875526905, Accuracy: 0.9938858695652174\n",
      "EVALUATION with last weights -> Loss: 7462261.5, CrossEntropy: 1.1868103742599487, Accuracy: 0.8235759493670886\n",
      "Elapsed time for the training: 13.106267929077148\n",
      "Iter 116 / 2000, Loss: 155536278.0, CrossEntropy: 0.019610736519098282, Accuracy: 0.9932904411764706\n",
      "EVALUATION with last weights -> Loss: 8113387.0, CrossEntropy: 1.284891128540039, Accuracy: 0.7964794303797469\n",
      "Elapsed time for the training: 13.510184049606323\n",
      "Iter 117 / 2000, Loss: 151834268.625, CrossEntropy: 0.017842287197709084, Accuracy: 0.993514226342711\n",
      "EVALUATION with last weights -> Loss: 7873953.5, CrossEntropy: 1.2813189029693604, Accuracy: 0.8139833860759493\n",
      "Elapsed time for the training: 12.953968286514282\n",
      "Iter 118 / 2000, Loss: 151121547.8125, CrossEntropy: 0.017543243244290352, Accuracy: 0.993865888746803\n",
      "EVALUATION with last weights -> Loss: 8282096.0, CrossEntropy: 1.3377742767333984, Accuracy: 0.8136867088607594\n",
      "Elapsed time for the training: 12.895989418029785\n",
      "Iter 119 / 2000, Loss: 148529617.78125, CrossEntropy: 0.016499489545822144, Accuracy: 0.9943254475703325\n",
      "EVALUATION with last weights -> Loss: 8490829.0, CrossEntropy: 1.335041880607605, Accuracy: 0.8150712025316456\n",
      "Elapsed time for the training: 12.921299695968628\n",
      "Iter 120 / 2000, Loss: 153329081.53125, CrossEntropy: 0.01845732145011425, Accuracy: 0.9936021419437341\n",
      "EVALUATION with last weights -> Loss: 7994626.0, CrossEntropy: 1.2556350231170654, Accuracy: 0.8187302215189873\n",
      "Elapsed time for the training: 13.128778219223022\n",
      "Iter 121 / 2000, Loss: 145688619.90625, CrossEntropy: 0.015362348407506943, Accuracy: 0.9949328644501279\n",
      "EVALUATION with last weights -> Loss: 8064394.5, CrossEntropy: 1.2645201683044434, Accuracy: 0.8138844936708861\n",
      "Elapsed time for the training: 12.885806798934937\n",
      "Iter 122 / 2000, Loss: 144894153.5, CrossEntropy: 0.015037749893963337, Accuracy: 0.994840952685422\n",
      "EVALUATION with last weights -> Loss: 8065841.5, CrossEntropy: 1.2917062044143677, Accuracy: 0.8110166139240507\n",
      "Elapsed time for the training: 13.032548427581787\n",
      "Iter 123 / 2000, Loss: 147370581.875, CrossEntropy: 0.016017848625779152, Accuracy: 0.9948929028132992\n",
      "EVALUATION with last weights -> Loss: 8406551.0, CrossEntropy: 1.3253026008605957, Accuracy: 0.8146756329113924\n",
      "Elapsed time for the training: 12.93404483795166\n",
      "Iter 124 / 2000, Loss: 151255913.84375, CrossEntropy: 0.017552366480231285, Accuracy: 0.993546195652174\n",
      "EVALUATION with last weights -> Loss: 7827306.5, CrossEntropy: 1.2308449745178223, Accuracy: 0.8159612341772152\n",
      "Elapsed time for the training: 12.913092374801636\n",
      "Iter 125 / 2000, Loss: 146974371.9375, CrossEntropy: 0.015840591862797737, Accuracy: 0.9945332480818415\n",
      "EVALUATION with last weights -> Loss: 7918475.5, CrossEntropy: 1.2378605604171753, Accuracy: 0.815565664556962\n",
      "Elapsed time for the training: 12.92602276802063\n",
      "Iter 126 / 2000, Loss: 148365691.25, CrossEntropy: 0.016376342624425888, Accuracy: 0.9942655051150895\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with last weights -> Loss: 7909694.0, CrossEntropy: 1.238834023475647, Accuracy: 0.8147745253164557\n",
      "Elapsed time for the training: 12.900666952133179\n",
      "Iter 127 / 2000, Loss: 145986075.0, CrossEntropy: 0.015439904294908047, Accuracy: 0.9943534207161125\n",
      "EVALUATION with last weights -> Loss: 8139232.5, CrossEntropy: 1.2833738327026367, Accuracy: 0.810818829113924\n",
      "Elapsed time for the training: 12.861001968383789\n",
      "Iter 128 / 2000, Loss: 146838953.34375, CrossEntropy: 0.01575331948697567, Accuracy: 0.9943054667519181\n",
      "EVALUATION with last weights -> Loss: 7674873.5, CrossEntropy: 1.2183470726013184, Accuracy: 0.8160601265822784\n",
      "Elapsed time for the training: 12.883777379989624\n",
      "Iter 129 / 2000, Loss: 147054730.84375, CrossEntropy: 0.015832260251045227, Accuracy: 0.9945252557544757\n",
      "EVALUATION with last weights -> Loss: 8019370.5, CrossEntropy: 1.2585890293121338, Accuracy: 0.8208069620253164\n",
      "Elapsed time for the training: 13.018559694290161\n",
      "Iter 130 / 2000, Loss: 146795751.96875, CrossEntropy: 0.015721378847956657, Accuracy: 0.9943454283887468\n",
      "EVALUATION with last weights -> Loss: 8035398.0, CrossEntropy: 1.2583004236221313, Accuracy: 0.8217958860759493\n",
      "Elapsed time for the training: 13.158730268478394\n",
      "Iter 131 / 2000, Loss: 148599903.71875, CrossEntropy: 0.01642582193017006, Accuracy: 0.9942455242966752\n",
      "EVALUATION with last weights -> Loss: 7706320.5, CrossEntropy: 1.2315351963043213, Accuracy: 0.8142800632911392\n",
      "Elapsed time for the training: 12.939144611358643\n",
      "Iter 132 / 2000, Loss: 144809943.21875, CrossEntropy: 0.014909452758729458, Accuracy: 0.9948849104859335\n",
      "EVALUATION with last weights -> Loss: 8861333.0, CrossEntropy: 1.4070539474487305, Accuracy: 0.8051819620253164\n",
      "Elapsed time for the training: 12.92007040977478\n",
      "Iter 133 / 2000, Loss: 147789239.875, CrossEntropy: 0.016092518344521523, Accuracy: 0.9947250639386189\n",
      "EVALUATION with last weights -> Loss: 7709133.0, CrossEntropy: 1.2333556413650513, Accuracy: 0.8129944620253164\n",
      "Elapsed time for the training: 15.177940368652344\n",
      "Iter 134 / 2000, Loss: 141686774.59375, CrossEntropy: 0.013640341348946095, Accuracy: 0.99528452685422\n",
      "EVALUATION with last weights -> Loss: 7586192.5, CrossEntropy: 1.2013040781021118, Accuracy: 0.8215981012658228\n",
      "Elapsed time for the training: 14.114486455917358\n",
      "Iter 135 / 2000, Loss: 148245626.84375, CrossEntropy: 0.016267763450741768, Accuracy: 0.9940537084398977\n",
      "EVALUATION with last weights -> Loss: 8084623.5, CrossEntropy: 1.276789665222168, Accuracy: 0.8143789556962026\n",
      "Elapsed time for the training: 12.935781002044678\n",
      "Iter 136 / 2000, Loss: 144938238.03125, CrossEntropy: 0.014934561215341091, Accuracy: 0.9949648337595908\n",
      "EVALUATION with last weights -> Loss: 8402232.0, CrossEntropy: 1.3201123476028442, Accuracy: 0.8107199367088608\n",
      "Elapsed time for the training: 13.561007022857666\n",
      "Iter 137 / 2000, Loss: 142657522.78125, CrossEntropy: 0.014008372090756893, Accuracy: 0.9956441815856778\n",
      "EVALUATION with last weights -> Loss: 8385662.0, CrossEntropy: 1.3275963068008423, Accuracy: 0.8106210443037974\n",
      "Elapsed time for the training: 13.515555143356323\n",
      "Iter 138 / 2000, Loss: 148034866.125, CrossEntropy: 0.016154859215021133, Accuracy: 0.9942655051150895\n",
      "EVALUATION with last weights -> Loss: 8209782.5, CrossEntropy: 1.310009241104126, Accuracy: 0.8121044303797469\n",
      "Elapsed time for the training: 12.886218786239624\n",
      "Iter 139 / 2000, Loss: 148926684.9375, CrossEntropy: 0.016587944701313972, Accuracy: 0.9944333439897699\n",
      "EVALUATION with last weights -> Loss: 9186204.0, CrossEntropy: 1.4420114755630493, Accuracy: 0.8105221518987342\n",
      "Elapsed time for the training: 12.918554544448853\n",
      "Iter 140 / 2000, Loss: 141674036.71875, CrossEntropy: 0.013586134649813175, Accuracy: 0.9953644501278772\n",
      "EVALUATION with last weights -> Loss: 7917864.5, CrossEntropy: 1.2502508163452148, Accuracy: 0.8197191455696202\n",
      "Elapsed time for the training: 13.67568039894104\n",
      "Iter 141 / 2000, Loss: 143567760.96875, CrossEntropy: 0.014356409199535847, Accuracy: 0.9949928069053708\n",
      "EVALUATION with last weights -> Loss: 8429470.0, CrossEntropy: 1.3227880001068115, Accuracy: 0.8158623417721519\n",
      "Elapsed time for the training: 12.885100603103638\n",
      "Iter 142 / 2000, Loss: 147720197.4375, CrossEntropy: 0.016141025349497795, Accuracy: 0.9940417199488492\n",
      "EVALUATION with last weights -> Loss: 8750460.0, CrossEntropy: 1.3789687156677246, Accuracy: 0.7978639240506329\n",
      "Elapsed time for the training: 12.909797191619873\n",
      "Iter 143 / 2000, Loss: 141291139.03125, CrossEntropy: 0.013431922532618046, Accuracy: 0.9953924232736573\n",
      "EVALUATION with last weights -> Loss: 10498849.0, CrossEntropy: 1.665459156036377, Accuracy: 0.7886669303797469\n",
      "Elapsed time for the training: 12.910035133361816\n",
      "Iter 144 / 2000, Loss: 142876994.96875, CrossEntropy: 0.014040961861610413, Accuracy: 0.9950247762148338\n",
      "EVALUATION with last weights -> Loss: 8051142.0, CrossEntropy: 1.2648296356201172, Accuracy: 0.8189280063291139\n",
      "Elapsed time for the training: 12.887070417404175\n",
      "Iter 145 / 2000, Loss: 141116371.65625, CrossEntropy: 0.013363739475607872, Accuracy: 0.9953524616368287\n",
      "EVALUATION with last weights -> Loss: 8616194.0, CrossEntropy: 1.3482853174209595, Accuracy: 0.8125988924050633\n",
      "Elapsed time for the training: 12.919800996780396\n",
      "Iter 146 / 2000, Loss: 141968242.0, CrossEntropy: 0.013816137798130512, Accuracy: 0.9953524616368287\n",
      "EVALUATION with last weights -> Loss: 8739651.0, CrossEntropy: 1.3858381509780884, Accuracy: 0.8105221518987342\n",
      "Elapsed time for the training: 12.929198265075684\n",
      "Iter 147 / 2000, Loss: 144664189.09375, CrossEntropy: 0.014736725017428398, Accuracy: 0.9947330562659847\n",
      "EVALUATION with last weights -> Loss: 8716708.0, CrossEntropy: 1.3733409643173218, Accuracy: 0.8068631329113924\n",
      "Elapsed time for the training: 12.933275938034058\n",
      "Iter 148 / 2000, Loss: 138965409.4375, CrossEntropy: 0.012443115934729576, Accuracy: 0.9956441815856778\n",
      "EVALUATION with last weights -> Loss: 8339776.0, CrossEntropy: 1.3039960861206055, Accuracy: 0.8228837025316456\n",
      "Elapsed time for the training: 12.95374059677124\n",
      "Iter 149 / 2000, Loss: 143208648.03125, CrossEntropy: 0.014134623110294342, Accuracy: 0.9950247762148338\n",
      "EVALUATION with last weights -> Loss: 8210807.5, CrossEntropy: 1.2916020154953003, Accuracy: 0.8102254746835443\n",
      "Elapsed time for the training: 12.976101160049438\n",
      "Iter 150 / 2000, Loss: 141575598.03125, CrossEntropy: 0.013525682501494884, Accuracy: 0.9957800511508952\n",
      "EVALUATION with last weights -> Loss: 8460765.0, CrossEntropy: 1.3485630750656128, Accuracy: 0.8078520569620253\n",
      "Elapsed time for the training: 13.65252947807312\n",
      "Iter 151 / 2000, Loss: 143160407.4375, CrossEntropy: 0.014120610430836678, Accuracy: 0.9948929028132992\n",
      "EVALUATION with last weights -> Loss: 8859094.0, CrossEntropy: 1.3925211429595947, Accuracy: 0.806368670886076\n",
      "Elapsed time for the training: 12.960208654403687\n",
      "Iter 152 / 2000, Loss: 140068629.375, CrossEntropy: 0.012850387021899223, Accuracy: 0.9955242966751918\n",
      "EVALUATION with last weights -> Loss: 8871016.0, CrossEntropy: 1.4143691062927246, Accuracy: 0.8089398734177216\n",
      "Elapsed time for the training: 12.92099928855896\n",
      "Iter 153 / 2000, Loss: 144676327.21875, CrossEntropy: 0.014701955020427704, Accuracy: 0.9952125959079284\n",
      "EVALUATION with last weights -> Loss: 8579888.0, CrossEntropy: 1.3447421789169312, Accuracy: 0.8100276898734177\n",
      "Elapsed time for the training: 12.9509596824646\n",
      "Iter 154 / 2000, Loss: 138481157.96875, CrossEntropy: 0.01219986379146576, Accuracy: 0.9958639705882353\n",
      "EVALUATION with last weights -> Loss: 8429870.0, CrossEntropy: 1.3219059705734253, Accuracy: 0.8176424050632911\n",
      "Elapsed time for the training: 12.954567670822144\n",
      "Iter 155 / 2000, Loss: 139944670.40625, CrossEntropy: 0.012788662686944008, Accuracy: 0.9954323849104859\n",
      "EVALUATION with last weights -> Loss: 8423168.0, CrossEntropy: 1.3168613910675049, Accuracy: 0.8158623417721519\n",
      "Elapsed time for the training: 12.912476778030396\n",
      "Iter 156 / 2000, Loss: 147031894.1875, CrossEntropy: 0.015702905133366585, Accuracy: 0.9947730179028134\n",
      "EVALUATION with last weights -> Loss: 8356913.5, CrossEntropy: 1.3482295274734497, Accuracy: 0.810818829113924\n",
      "Elapsed time for the training: 12.936316728591919\n",
      "Iter 157 / 2000, Loss: 137641384.03125, CrossEntropy: 0.01184816099703312, Accuracy: 0.9959119245524297\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with last weights -> Loss: 8208882.5, CrossEntropy: 1.2827078104019165, Accuracy: 0.8220925632911392\n",
      "Elapsed time for the training: 12.95939040184021\n",
      "Iter 158 / 2000, Loss: 141549941.1875, CrossEntropy: 0.013423706404864788, Accuracy: 0.9956921355498721\n",
      "EVALUATION with last weights -> Loss: 9175243.0, CrossEntropy: 1.4446552991867065, Accuracy: 0.8066653481012658\n",
      "Elapsed time for the training: 12.947038888931274\n",
      "Iter 159 / 2000, Loss: 136604726.96875, CrossEntropy: 0.011442059651017189, Accuracy: 0.99607976342711\n",
      "EVALUATION with last weights -> Loss: 8163905.0, CrossEntropy: 1.2892682552337646, Accuracy: 0.8214992088607594\n",
      "Elapsed time for the training: 12.929786682128906\n",
      "Iter 160 / 2000, Loss: 142082786.25, CrossEntropy: 0.013592436909675598, Accuracy: 0.995344469309463\n",
      "EVALUATION with last weights -> Loss: 8501442.0, CrossEntropy: 1.3452426195144653, Accuracy: 0.8151700949367089\n",
      "Elapsed time for the training: 13.404889822006226\n",
      "Iter 161 / 2000, Loss: 137743621.625, CrossEntropy: 0.011862451210618019, Accuracy: 0.9956521739130435\n",
      "EVALUATION with last weights -> Loss: 8698862.0, CrossEntropy: 1.3742634057998657, Accuracy: 0.8167523734177216\n",
      "Elapsed time for the training: 15.654345989227295\n",
      "Iter 162 / 2000, Loss: 141278145.6875, CrossEntropy: 0.013254756107926369, Accuracy: 0.9955043158567775\n",
      "EVALUATION with last weights -> Loss: 8641965.0, CrossEntropy: 1.3674824237823486, Accuracy: 0.8135878164556962\n",
      "Elapsed time for the training: 14.296353101730347\n",
      "Iter 163 / 2000, Loss: 138948579.40625, CrossEntropy: 0.012313046492636204, Accuracy: 0.9955442774936062\n",
      "EVALUATION with last weights -> Loss: 8352058.5, CrossEntropy: 1.3093537092208862, Accuracy: 0.8206091772151899\n",
      "Elapsed time for the training: 12.9031982421875\n",
      "Iter 164 / 2000, Loss: 145226746.8125, CrossEntropy: 0.014815591275691986, Accuracy: 0.9953045076726342\n",
      "EVALUATION with last weights -> Loss: 8303677.0, CrossEntropy: 1.2974529266357422, Accuracy: 0.8216969936708861\n",
      "Elapsed time for the training: 12.964705467224121\n",
      "Iter 165 / 2000, Loss: 138477188.125, CrossEntropy: 0.012111717835068703, Accuracy: 0.9956042199488491\n",
      "EVALUATION with last weights -> Loss: 8431680.0, CrossEntropy: 1.3292886018753052, Accuracy: 0.8202136075949367\n",
      "Elapsed time for the training: 12.892111539840698\n",
      "Iter 166 / 2000, Loss: 140429590.03125, CrossEntropy: 0.012881203554570675, Accuracy: 0.9957041240409207\n",
      "EVALUATION with last weights -> Loss: 8440624.0, CrossEntropy: 1.3197567462921143, Accuracy: 0.8191257911392406\n",
      "Elapsed time for the training: 12.899235486984253\n",
      "Iter 167 / 2000, Loss: 142186102.96875, CrossEntropy: 0.013575929217040539, Accuracy: 0.9955442774936062\n",
      "EVALUATION with last weights -> Loss: 8434576.0, CrossEntropy: 1.3198153972625732, Accuracy: 0.8227848101265823\n",
      "Elapsed time for the training: 13.603637456893921\n",
      "Iter 168 / 2000, Loss: 136268796.3125, CrossEntropy: 0.011205757036805153, Accuracy: 0.9963834718670077\n",
      "EVALUATION with last weights -> Loss: 8090333.5, CrossEntropy: 1.2779520750045776, Accuracy: 0.821004746835443\n",
      "Elapsed time for the training: 13.620530128479004\n",
      "Iter 169 / 2000, Loss: 145252014.1875, CrossEntropy: 0.014809142798185349, Accuracy: 0.9954124040920717\n",
      "EVALUATION with last weights -> Loss: 9006171.0, CrossEntropy: 1.443068504333496, Accuracy: 0.8125\n",
      "Elapsed time for the training: 12.919329643249512\n",
      "Iter 170 / 2000, Loss: 135671296.8125, CrossEntropy: 0.010949089191854, Accuracy: 0.9963834718670077\n",
      "EVALUATION with last weights -> Loss: 8316455.0, CrossEntropy: 1.3183244466781616, Accuracy: 0.8231803797468354\n",
      "Elapsed time for the training: 12.963820934295654\n",
      "Iter 171 / 2000, Loss: 137474416.1875, CrossEntropy: 0.01167027372866869, Accuracy: 0.9959438938618926\n",
      "EVALUATION with last weights -> Loss: 9010909.0, CrossEntropy: 1.4382208585739136, Accuracy: 0.8097310126582279\n",
      "Elapsed time for the training: 12.95885968208313\n",
      "Iter 172 / 2000, Loss: 136268805.6875, CrossEntropy: 0.011203000321984291, Accuracy: 0.9958919437340154\n",
      "EVALUATION with last weights -> Loss: 8822851.0, CrossEntropy: 1.3958989381790161, Accuracy: 0.8152689873417721\n",
      "Elapsed time for the training: 12.917904376983643\n",
      "Iter 173 / 2000, Loss: 139891892.6875, CrossEntropy: 0.012612299993634224, Accuracy: 0.9958040281329923\n",
      "EVALUATION with last weights -> Loss: 8958119.0, CrossEntropy: 1.4052385091781616, Accuracy: 0.8151700949367089\n",
      "Elapsed time for the training: 12.921010494232178\n",
      "Iter 174 / 2000, Loss: 138017277.84375, CrossEntropy: 0.011856131255626678, Accuracy: 0.9956441815856778\n",
      "EVALUATION with last weights -> Loss: 8223036.0, CrossEntropy: 1.2853997945785522, Accuracy: 0.826443829113924\n",
      "Elapsed time for the training: 12.919860124588013\n",
      "Iter 175 / 2000, Loss: 139273038.90625, CrossEntropy: 0.012354083359241486, Accuracy: 0.9957640664961637\n",
      "EVALUATION with last weights -> Loss: 8608639.0, CrossEntropy: 1.3536698818206787, Accuracy: 0.8097310126582279\n",
      "Elapsed time for the training: 13.752944469451904\n",
      "Iter 176 / 2000, Loss: 138300232.46875, CrossEntropy: 0.011954677291214466, Accuracy: 0.9957041240409207\n",
      "EVALUATION with last weights -> Loss: 8718443.0, CrossEntropy: 1.3938369750976562, Accuracy: 0.8149723101265823\n",
      "Elapsed time for the training: 15.47879409790039\n",
      "Iter 177 / 2000, Loss: 136404615.0, CrossEntropy: 0.011189724318683147, Accuracy: 0.9964234335038363\n",
      "EVALUATION with last weights -> Loss: 8507813.0, CrossEntropy: 1.330365538597107, Accuracy: 0.8158623417721519\n",
      "Elapsed time for the training: 15.647583484649658\n",
      "Iter 178 / 2000, Loss: 138095265.09375, CrossEntropy: 0.011856058612465858, Accuracy: 0.9959239130434783\n",
      "EVALUATION with last weights -> Loss: 8174031.5, CrossEntropy: 1.3024389743804932, Accuracy: 0.818631329113924\n",
      "Elapsed time for the training: 15.612653017044067\n",
      "Iter 179 / 2000, Loss: 133018603.625, CrossEntropy: 0.00982133112847805, Accuracy: 0.9964234335038363\n",
      "EVALUATION with last weights -> Loss: 8798297.0, CrossEntropy: 1.3898391723632812, Accuracy: 0.8192246835443038\n",
      "Elapsed time for the training: 15.502247095108032\n",
      "Iter 180 / 2000, Loss: 141184405.53125, CrossEntropy: 0.013107388280332088, Accuracy: 0.9957800511508952\n",
      "EVALUATION with last weights -> Loss: 8939353.0, CrossEntropy: 1.4101808071136475, Accuracy: 0.8175435126582279\n",
      "Elapsed time for the training: 15.625800132751465\n",
      "Iter 181 / 2000, Loss: 138408210.9375, CrossEntropy: 0.011981652118265629, Accuracy: 0.9960917519181586\n",
      "EVALUATION with last weights -> Loss: 8563187.0, CrossEntropy: 1.3616018295288086, Accuracy: 0.8182357594936709\n",
      "Elapsed time for the training: 14.272108554840088\n",
      "Iter 182 / 2000, Loss: 137315182.40625, CrossEntropy: 0.011531814001500607, Accuracy: 0.9955322890025575\n",
      "EVALUATION with last weights -> Loss: 9003753.0, CrossEntropy: 1.4413846731185913, Accuracy: 0.8164556962025317\n",
      "Elapsed time for the training: 12.681077480316162\n",
      "Iter 183 / 2000, Loss: 137394747.21875, CrossEntropy: 0.011537070386111736, Accuracy: 0.9960437979539642\n",
      "EVALUATION with last weights -> Loss: 8963770.0, CrossEntropy: 1.4106180667877197, Accuracy: 0.814181170886076\n",
      "Elapsed time for the training: 13.102530002593994\n",
      "Iter 184 / 2000, Loss: 134141813.4375, CrossEntropy: 0.010324534960091114, Accuracy: 0.9963554987212276\n",
      "EVALUATION with last weights -> Loss: 9060391.0, CrossEntropy: 1.4297897815704346, Accuracy: 0.8175435126582279\n",
      "Elapsed time for the training: 13.788577318191528\n",
      "Iter 185 / 2000, Loss: 138818374.78125, CrossEntropy: 0.012098493054509163, Accuracy: 0.9956841432225064\n",
      "EVALUATION with last weights -> Loss: 8776616.0, CrossEntropy: 1.3878310918807983, Accuracy: 0.8107199367088608\n",
      "Elapsed time for the training: 13.751885414123535\n",
      "Iter 186 / 2000, Loss: 137079787.21875, CrossEntropy: 0.011445017531514168, Accuracy: 0.9959598785166242\n",
      "EVALUATION with last weights -> Loss: 8870315.0, CrossEntropy: 1.3934316635131836, Accuracy: 0.8188291139240507\n",
      "Elapsed time for the training: 14.290517091751099\n",
      "Iter 187 / 2000, Loss: 140063783.75, CrossEntropy: 0.01261451467871666, Accuracy: 0.9955123081841433\n",
      "EVALUATION with last weights -> Loss: 8198723.5, CrossEntropy: 1.2981363534927368, Accuracy: 0.8211036392405063\n",
      "Elapsed time for the training: 14.45381474494934\n",
      "Iter 188 / 2000, Loss: 134499301.96875, CrossEntropy: 0.010382232256233692, Accuracy: 0.996431425831202\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with last weights -> Loss: 8766301.0, CrossEntropy: 1.3796747922897339, Accuracy: 0.8239715189873418\n",
      "Elapsed time for the training: 12.913655996322632\n",
      "Iter 189 / 2000, Loss: 138323437.28125, CrossEntropy: 0.011862605810165405, Accuracy: 0.9959239130434783\n",
      "EVALUATION with last weights -> Loss: 8784443.0, CrossEntropy: 1.3834222555160522, Accuracy: 0.8167523734177216\n",
      "Elapsed time for the training: 12.911372900009155\n",
      "Iter 190 / 2000, Loss: 137336666.53125, CrossEntropy: 0.01157291978597641, Accuracy: 0.9962795716112532\n",
      "EVALUATION with last weights -> Loss: 9714815.0, CrossEntropy: 1.5496454238891602, Accuracy: 0.8028085443037974\n",
      "Elapsed time for the training: 12.885945558547974\n",
      "Iter 191 / 2000, Loss: 137651699.5, CrossEntropy: 0.011577090248465538, Accuracy: 0.9962635869565217\n",
      "EVALUATION with last weights -> Loss: 8231008.5, CrossEntropy: 1.2917532920837402, Accuracy: 0.8153678797468354\n",
      "Elapsed time for the training: 12.908347129821777\n",
      "Iter 192 / 2000, Loss: 133017857.25, CrossEntropy: 0.009718290530145168, Accuracy: 0.9967631074168798\n",
      "EVALUATION with last weights -> Loss: 8628603.0, CrossEntropy: 1.3538458347320557, Accuracy: 0.8170490506329114\n",
      "Elapsed time for the training: 12.92464303970337\n",
      "Iter 193 / 2000, Loss: 134862085.34375, CrossEntropy: 0.0104477284476161, Accuracy: 0.9962835677749361\n",
      "EVALUATION with last weights -> Loss: 8501176.0, CrossEntropy: 1.332027554512024, Accuracy: 0.817939082278481\n",
      "Elapsed time for the training: 12.96515679359436\n",
      "Iter 194 / 2000, Loss: 133128003.125, CrossEntropy: 0.00982845388352871, Accuracy: 0.996611253196931\n",
      "EVALUATION with last weights -> Loss: 9383265.0, CrossEntropy: 1.4719586372375488, Accuracy: 0.8126977848101266\n",
      "Elapsed time for the training: 12.971973896026611\n",
      "Iter 195 / 2000, Loss: 140353295.28125, CrossEntropy: 0.012625930830836296, Accuracy: 0.9958040281329923\n",
      "EVALUATION with last weights -> Loss: 9033387.0, CrossEntropy: 1.4470974206924438, Accuracy: 0.8049841772151899\n",
      "Elapsed time for the training: 12.889958381652832\n",
      "Iter 196 / 2000, Loss: 135480130.90625, CrossEntropy: 0.010680584236979485, Accuracy: 0.9965832800511509\n",
      "EVALUATION with last weights -> Loss: 8811838.0, CrossEntropy: 1.394155740737915, Accuracy: 0.8220925632911392\n",
      "Elapsed time for the training: 13.111273765563965\n",
      "Iter 197 / 2000, Loss: 131749104.8125, CrossEntropy: 0.009180359542369843, Accuracy: 0.9967431265984654\n",
      "EVALUATION with last weights -> Loss: 8857027.0, CrossEntropy: 1.4087395668029785, Accuracy: 0.8169501582278481\n",
      "Elapsed time for the training: 15.486401081085205\n",
      "Iter 198 / 2000, Loss: 141316134.125, CrossEntropy: 0.012996667064726353, Accuracy: 0.9956441815856778\n",
      "EVALUATION with last weights -> Loss: 8611660.0, CrossEntropy: 1.3587778806686401, Accuracy: 0.8237737341772152\n",
      "Elapsed time for the training: 13.843653440475464\n",
      "Iter 199 / 2000, Loss: 136245963.9375, CrossEntropy: 0.010955866426229477, Accuracy: 0.9964833759590793\n",
      "EVALUATION with last weights -> Loss: 9299526.0, CrossEntropy: 1.4597842693328857, Accuracy: 0.8122033227848101\n",
      "Elapsed time for the training: 13.745110750198364\n",
      "Iter 200 / 2000, Loss: 130182081.96875, CrossEntropy: 0.008526262827217579, Accuracy: 0.9969429347826086\n",
      "EVALUATION with last weights -> Loss: 9138960.0, CrossEntropy: 1.4345954656600952, Accuracy: 0.8173457278481012\n",
      "Elapsed time for the training: 13.742698907852173\n",
      "Iter 201 / 2000, Loss: 135872557.90625, CrossEntropy: 0.010789267718791962, Accuracy: 0.9962835677749361\n",
      "EVALUATION with last weights -> Loss: 9020548.0, CrossEntropy: 1.4305812120437622, Accuracy: 0.8214992088607594\n",
      "Elapsed time for the training: 13.714482545852661\n",
      "Iter 202 / 2000, Loss: 135288602.4375, CrossEntropy: 0.010549526661634445, Accuracy: 0.996463395140665\n",
      "EVALUATION with last weights -> Loss: 8500808.0, CrossEntropy: 1.3384307622909546, Accuracy: 0.8194224683544303\n",
      "Elapsed time for the training: 13.689576864242554\n",
      "Iter 203 / 2000, Loss: 135496885.40625, CrossEntropy: 0.010649296455085278, Accuracy: 0.9962915601023018\n",
      "EVALUATION with last weights -> Loss: 9187587.0, CrossEntropy: 1.4355660676956177, Accuracy: 0.8136867088607594\n",
      "Elapsed time for the training: 13.76699686050415\n",
      "Iter 204 / 2000, Loss: 133178128.9375, CrossEntropy: 0.009694266133010387, Accuracy: 0.9966632033248082\n",
      "EVALUATION with last weights -> Loss: 8726416.0, CrossEntropy: 1.3817428350448608, Accuracy: 0.8247626582278481\n",
      "Elapsed time for the training: 13.733572483062744\n",
      "Iter 205 / 2000, Loss: 138452829.96875, CrossEntropy: 0.011791526339948177, Accuracy: 0.9960038363171355\n",
      "EVALUATION with last weights -> Loss: 8736323.0, CrossEntropy: 1.3810409307479858, Accuracy: 0.8251582278481012\n",
      "Elapsed time for the training: 13.709348678588867\n",
      "Iter 206 / 2000, Loss: 134482214.71875, CrossEntropy: 0.010204371996223927, Accuracy: 0.9967031649616368\n",
      "EVALUATION with last weights -> Loss: 8685332.0, CrossEntropy: 1.3630681037902832, Accuracy: 0.8240704113924051\n",
      "Elapsed time for the training: 13.6709725856781\n",
      "Iter 207 / 2000, Loss: 135691356.90625, CrossEntropy: 0.010672219097614288, Accuracy: 0.9962036445012787\n",
      "EVALUATION with last weights -> Loss: 9419080.0, CrossEntropy: 1.4754109382629395, Accuracy: 0.8140822784810127\n",
      "Elapsed time for the training: 12.913306951522827\n",
      "Iter 208 / 2000, Loss: 133714276.375, CrossEntropy: 0.009875915944576263, Accuracy: 0.9967231457800512\n",
      "EVALUATION with last weights -> Loss: 8923175.0, CrossEntropy: 1.3971773386001587, Accuracy: 0.817939082278481\n",
      "Elapsed time for the training: 12.910859823226929\n",
      "Iter 209 / 2000, Loss: 134979334.78125, CrossEntropy: 0.010372922755777836, Accuracy: 0.9962236253196931\n",
      "EVALUATION with last weights -> Loss: 8830979.0, CrossEntropy: 1.3821008205413818, Accuracy: 0.8208069620253164\n",
      "Elapsed time for the training: 12.911298990249634\n",
      "Iter 210 / 2000, Loss: 130759615.09375, CrossEntropy: 0.008678276091814041, Accuracy: 0.997022858056266\n",
      "EVALUATION with last weights -> Loss: 8775053.0, CrossEntropy: 1.4016027450561523, Accuracy: 0.8208069620253164\n",
      "Elapsed time for the training: 13.338135242462158\n",
      "Iter 211 / 2000, Loss: 134706528.84375, CrossEntropy: 0.010248180478811264, Accuracy: 0.9964234335038363\n",
      "EVALUATION with last weights -> Loss: 9383565.0, CrossEntropy: 1.4726210832595825, Accuracy: 0.8182357594936709\n",
      "Elapsed time for the training: 13.76715087890625\n",
      "Iter 212 / 2000, Loss: 134361356.875, CrossEntropy: 0.01010201871395111, Accuracy: 0.9963634910485933\n",
      "EVALUATION with last weights -> Loss: 8699545.0, CrossEntropy: 1.3646819591522217, Accuracy: 0.8254549050632911\n",
      "Elapsed time for the training: 13.766491889953613\n",
      "Iter 213 / 2000, Loss: 134321951.90625, CrossEntropy: 0.010083988308906555, Accuracy: 0.9965033567774936\n",
      "EVALUATION with last weights -> Loss: 9110563.0, CrossEntropy: 1.441542625427246, Accuracy: 0.8192246835443038\n",
      "Elapsed time for the training: 13.709830522537231\n",
      "Iter 214 / 2000, Loss: 134569536.03125, CrossEntropy: 0.010173316113650799, Accuracy: 0.9962835677749361\n",
      "EVALUATION with last weights -> Loss: 8898730.0, CrossEntropy: 1.4140019416809082, Accuracy: 0.8161590189873418\n",
      "Elapsed time for the training: 13.743354558944702\n",
      "Iter 215 / 2000, Loss: 131570156.0625, CrossEntropy: 0.008978018537163734, Accuracy: 0.9970308503836317\n",
      "EVALUATION with last weights -> Loss: 8679601.0, CrossEntropy: 1.3816524744033813, Accuracy: 0.8195213607594937\n",
      "Elapsed time for the training: 13.795594930648804\n",
      "Iter 216 / 2000, Loss: 138430754.3125, CrossEntropy: 0.011700613424181938, Accuracy: 0.9964234335038363\n",
      "EVALUATION with last weights -> Loss: 8803669.0, CrossEntropy: 1.4007084369659424, Accuracy: 0.8228837025316456\n",
      "Elapsed time for the training: 13.533988952636719\n",
      "Iter 217 / 2000, Loss: 131738593.375, CrossEntropy: 0.009029693901538849, Accuracy: 0.9967910805626599\n",
      "EVALUATION with last weights -> Loss: 9298187.0, CrossEntropy: 1.4690479040145874, Accuracy: 0.8153678797468354\n",
      "Elapsed time for the training: 12.885900497436523\n",
      "Iter 218 / 2000, Loss: 130708511.375, CrossEntropy: 0.008597057312726974, Accuracy: 0.9969429347826086\n",
      "EVALUATION with last weights -> Loss: 9266385.0, CrossEntropy: 1.4519944190979004, Accuracy: 0.8072587025316456\n",
      "Elapsed time for the training: 12.863354682922363\n",
      "Iter 219 / 2000, Loss: 136795119.40625, CrossEntropy: 0.011050081811845303, Accuracy: 0.9961716751918159\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with last weights -> Loss: 9357598.0, CrossEntropy: 1.474682092666626, Accuracy: 0.8162579113924051\n",
      "Elapsed time for the training: 12.921977043151855\n",
      "Iter 220 / 2000, Loss: 132096514.0, CrossEntropy: 0.009137862361967564, Accuracy: 0.996962915601023\n",
      "EVALUATION with last weights -> Loss: 9249302.0, CrossEntropy: 1.4732969999313354, Accuracy: 0.8235759493670886\n",
      "Elapsed time for the training: 12.90089225769043\n",
      "Iter 221 / 2000, Loss: 137894934.875, CrossEntropy: 0.011448484845459461, Accuracy: 0.9963634910485933\n",
      "EVALUATION with last weights -> Loss: 8473339.0, CrossEntropy: 1.3518013954162598, Accuracy: 0.8182357594936709\n",
      "Elapsed time for the training: 12.903600454330444\n",
      "Iter 222 / 2000, Loss: 130711581.3125, CrossEntropy: 0.008572789840400219, Accuracy: 0.9970028772378516\n",
      "EVALUATION with last weights -> Loss: 9357674.0, CrossEntropy: 1.4627834558486938, Accuracy: 0.8110166139240507\n",
      "Elapsed time for the training: 12.932862997055054\n",
      "Iter 223 / 2000, Loss: 136873858.03125, CrossEntropy: 0.011023691855370998, Accuracy: 0.9966632033248082\n",
      "EVALUATION with last weights -> Loss: 8961906.0, CrossEntropy: 1.4019064903259277, Accuracy: 0.8231803797468354\n",
      "Elapsed time for the training: 12.959144830703735\n",
      "Iter 224 / 2000, Loss: 133230230.0625, CrossEntropy: 0.00956149771809578, Accuracy: 0.9967031649616368\n",
      "EVALUATION with last weights -> Loss: 8848553.0, CrossEntropy: 1.4103443622589111, Accuracy: 0.8185324367088608\n",
      "Elapsed time for the training: 12.912817001342773\n",
      "Iter 225 / 2000, Loss: 132172348.78125, CrossEntropy: 0.009133542887866497, Accuracy: 0.99690297314578\n",
      "EVALUATION with last weights -> Loss: 9198436.0, CrossEntropy: 1.446742057800293, Accuracy: 0.822685917721519\n",
      "Elapsed time for the training: 12.884179592132568\n",
      "Iter 226 / 2000, Loss: 131261939.40625, CrossEntropy: 0.008758451789617538, Accuracy: 0.9965033567774936\n",
      "EVALUATION with last weights -> Loss: 9533599.0, CrossEntropy: 1.511147141456604, Accuracy: 0.8152689873417721\n",
      "Elapsed time for the training: 12.906741380691528\n",
      "Iter 227 / 2000, Loss: 131541724.125, CrossEntropy: 0.008873697370290756, Accuracy: 0.9966432225063938\n",
      "EVALUATION with last weights -> Loss: 9273223.0, CrossEntropy: 1.4647300243377686, Accuracy: 0.8145767405063291\n",
      "Elapsed time for the training: 12.915999174118042\n",
      "Iter 228 / 2000, Loss: 133549254.90625, CrossEntropy: 0.009663322009146214, Accuracy: 0.9967631074168798\n",
      "EVALUATION with last weights -> Loss: 9099096.0, CrossEntropy: 1.4570229053497314, Accuracy: 0.8187302215189873\n",
      "Elapsed time for the training: 13.295172691345215\n",
      "Iter 229 / 2000, Loss: 135000082.78125, CrossEntropy: 0.010258852504193783, Accuracy: 0.9964114450127878\n",
      "EVALUATION with last weights -> Loss: 9005144.0, CrossEntropy: 1.4232648611068726, Accuracy: 0.8157634493670886\n",
      "Elapsed time for the training: 12.873943567276001\n",
      "Iter 230 / 2000, Loss: 134806575.90625, CrossEntropy: 0.010145172476768494, Accuracy: 0.9965033567774936\n",
      "EVALUATION with last weights -> Loss: 8750544.0, CrossEntropy: 1.3724907636642456, Accuracy: 0.8234770569620253\n",
      "Elapsed time for the training: 12.911991119384766\n",
      "Iter 231 / 2000, Loss: 131222194.03125, CrossEntropy: 0.008777199313044548, Accuracy: 0.9968510230179028\n",
      "EVALUATION with last weights -> Loss: 8984161.0, CrossEntropy: 1.4230952262878418, Accuracy: 0.8150712025316456\n",
      "Elapsed time for the training: 12.894280195236206\n",
      "Iter 232 / 2000, Loss: 134766670.53125, CrossEntropy: 0.010116090066730976, Accuracy: 0.9967231457800512\n",
      "EVALUATION with last weights -> Loss: 9161236.0, CrossEntropy: 1.448509693145752, Accuracy: 0.8234770569620253\n",
      "Elapsed time for the training: 12.946869850158691\n",
      "Iter 233 / 2000, Loss: 130013886.34375, CrossEntropy: 0.008236197754740715, Accuracy: 0.9969709079283888\n",
      "EVALUATION with last weights -> Loss: 9016315.0, CrossEntropy: 1.4098076820373535, Accuracy: 0.8171479430379747\n",
      "Elapsed time for the training: 12.980574131011963\n",
      "Iter 234 / 2000, Loss: 131191262.3125, CrossEntropy: 0.008676416240632534, Accuracy: 0.9972826086956522\n",
      "EVALUATION with last weights -> Loss: 9202383.0, CrossEntropy: 1.4378849267959595, Accuracy: 0.8214992088607594\n",
      "Elapsed time for the training: 12.961609363555908\n",
      "Iter 235 / 2000, Loss: 131344319.59375, CrossEntropy: 0.008733627386391163, Accuracy: 0.997082800511509\n",
      "EVALUATION with last weights -> Loss: 9602329.0, CrossEntropy: 1.5112502574920654, Accuracy: 0.8158623417721519\n",
      "Elapsed time for the training: 12.962826251983643\n",
      "Iter 236 / 2000, Loss: 133175937.4375, CrossEntropy: 0.009488552808761597, Accuracy: 0.9968909846547315\n",
      "EVALUATION with last weights -> Loss: 9510432.0, CrossEntropy: 1.5145776271820068, Accuracy: 0.8088409810126582\n",
      "Elapsed time for the training: 12.914003610610962\n",
      "Iter 237 / 2000, Loss: 131717235.125, CrossEntropy: 0.008860639296472073, Accuracy: 0.996962915601023\n",
      "EVALUATION with last weights -> Loss: 9127789.0, CrossEntropy: 1.441502332687378, Accuracy: 0.8153678797468354\n",
      "Elapsed time for the training: 12.917543888092041\n",
      "Iter 238 / 2000, Loss: 133627314.5625, CrossEntropy: 0.009615842252969742, Accuracy: 0.9961836636828645\n",
      "EVALUATION with last weights -> Loss: 9176464.0, CrossEntropy: 1.4363315105438232, Accuracy: 0.8192246835443038\n",
      "Elapsed time for the training: 12.984456539154053\n",
      "Iter 239 / 2000, Loss: 131127667.875, CrossEntropy: 0.008611726574599743, Accuracy: 0.9966232416879796\n",
      "EVALUATION with last weights -> Loss: 9733451.0, CrossEntropy: 1.52090322971344, Accuracy: 0.8156645569620253\n",
      "Elapsed time for the training: 12.91324496269226\n",
      "Iter 240 / 2000, Loss: 131438110.0625, CrossEntropy: 0.008727217093110085, Accuracy: 0.996962915601023\n",
      "EVALUATION with last weights -> Loss: 9136174.0, CrossEntropy: 1.4423736333847046, Accuracy: 0.8182357594936709\n",
      "Elapsed time for the training: 12.908149242401123\n",
      "Iter 241 / 2000, Loss: 132955463.15625, CrossEntropy: 0.00932579766958952, Accuracy: 0.9968829923273658\n",
      "EVALUATION with last weights -> Loss: 9684776.0, CrossEntropy: 1.533732533454895, Accuracy: 0.8125\n",
      "Elapsed time for the training: 12.958529233932495\n",
      "Iter 242 / 2000, Loss: 133220149.03125, CrossEntropy: 0.009423729963600636, Accuracy: 0.9967631074168798\n",
      "EVALUATION with last weights -> Loss: 9472141.0, CrossEntropy: 1.4835363626480103, Accuracy: 0.8126977848101266\n",
      "Elapsed time for the training: 13.363643407821655\n",
      "Iter 243 / 2000, Loss: 129350802.46875, CrossEntropy: 0.007871005684137344, Accuracy: 0.9973825127877238\n",
      "EVALUATION with last weights -> Loss: 10645499.0, CrossEntropy: 1.6756348609924316, Accuracy: 0.8033030063291139\n",
      "Elapsed time for the training: 13.809519290924072\n",
      "Iter 244 / 2000, Loss: 130319409.0, CrossEntropy: 0.008264441043138504, Accuracy: 0.9972506393861893\n",
      "EVALUATION with last weights -> Loss: 9216524.0, CrossEntropy: 1.4410549402236938, Accuracy: 0.8133900316455697\n",
      "Elapsed time for the training: 13.427075862884521\n",
      "Iter 245 / 2000, Loss: 135164685.875, CrossEntropy: 0.01018854882568121, Accuracy: 0.9965233375959079\n",
      "EVALUATION with last weights -> Loss: 9412988.0, CrossEntropy: 1.4903578758239746, Accuracy: 0.810126582278481\n",
      "Elapsed time for the training: 12.901845932006836\n",
      "Iter 246 / 2000, Loss: 131386185.625, CrossEntropy: 0.008661328814923763, Accuracy: 0.997082800511509\n",
      "EVALUATION with last weights -> Loss: 8960110.0, CrossEntropy: 1.4200323820114136, Accuracy: 0.8203125\n",
      "Elapsed time for the training: 12.905919790267944\n",
      "Iter 247 / 2000, Loss: 131747314.78125, CrossEntropy: 0.008797899819910526, Accuracy: 0.996843030690537\n",
      "EVALUATION with last weights -> Loss: 9195369.0, CrossEntropy: 1.460431694984436, Accuracy: 0.8132911392405063\n",
      "Elapsed time for the training: 12.9797945022583\n",
      "Iter 248 / 2000, Loss: 131296414.21875, CrossEntropy: 0.008610964752733707, Accuracy: 0.997022858056266\n",
      "EVALUATION with last weights -> Loss: 9120723.0, CrossEntropy: 1.429121494293213, Accuracy: 0.814181170886076\n",
      "Elapsed time for the training: 12.925064325332642\n",
      "Iter 249 / 2000, Loss: 130493482.15625, CrossEntropy: 0.008290800265967846, Accuracy: 0.9973425511508951\n",
      "EVALUATION with last weights -> Loss: 9221452.0, CrossEntropy: 1.4462846517562866, Accuracy: 0.8160601265822784\n",
      "Elapsed time for the training: 12.878683090209961\n",
      "Iter 250 / 2000, Loss: 130914135.8125, CrossEntropy: 0.008443623781204224, Accuracy: 0.996962915601023\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with last weights -> Loss: 9069824.0, CrossEntropy: 1.427810549736023, Accuracy: 0.8166534810126582\n",
      "Elapsed time for the training: 12.918680191040039\n",
      "Iter 251 / 2000, Loss: 132638570.4375, CrossEntropy: 0.009328343905508518, Accuracy: 0.9968710038363172\n",
      "EVALUATION with last weights -> Loss: 9590563.0, CrossEntropy: 1.50098717212677, Accuracy: 0.8129944620253164\n",
      "Elapsed time for the training: 12.93363618850708\n",
      "Iter 252 / 2000, Loss: 134478920.5625, CrossEntropy: 0.009853893890976906, Accuracy: 0.9968630115089514\n",
      "EVALUATION with last weights -> Loss: 9216189.0, CrossEntropy: 1.4471427202224731, Accuracy: 0.8167523734177216\n",
      "Elapsed time for the training: 12.906074285507202\n",
      "Iter 253 / 2000, Loss: 130388289.4375, CrossEntropy: 0.008220442570745945, Accuracy: 0.9971107736572891\n",
      "EVALUATION with last weights -> Loss: 9577536.0, CrossEntropy: 1.4976091384887695, Accuracy: 0.8117088607594937\n",
      "Elapsed time for the training: 12.955293893814087\n",
      "Iter 254 / 2000, Loss: 129531570.75, CrossEntropy: 0.0078669972717762, Accuracy: 0.9972826086956522\n",
      "EVALUATION with last weights -> Loss: 9660931.0, CrossEntropy: 1.525744080543518, Accuracy: 0.8147745253164557\n",
      "Elapsed time for the training: 12.992123126983643\n",
      "Iter 255 / 2000, Loss: 131756469.53125, CrossEntropy: 0.008744503371417522, Accuracy: 0.997022858056266\n",
      "EVALUATION with last weights -> Loss: 9538728.0, CrossEntropy: 1.4958187341690063, Accuracy: 0.8220925632911392\n",
      "Elapsed time for the training: 12.941290616989136\n",
      "Iter 256 / 2000, Loss: 134794741.375, CrossEntropy: 0.009950978681445122, Accuracy: 0.9965233375959079\n",
      "EVALUATION with last weights -> Loss: 9632591.0, CrossEntropy: 1.5157326459884644, Accuracy: 0.8178401898734177\n",
      "Elapsed time for the training: 12.913728475570679\n",
      "Iter 257 / 2000, Loss: 130930248.96875, CrossEntropy: 0.00840018130838871, Accuracy: 0.997022858056266\n",
      "EVALUATION with last weights -> Loss: 9056140.0, CrossEntropy: 1.4550795555114746, Accuracy: 0.8185324367088608\n",
      "Elapsed time for the training: 12.841135740280151\n",
      "Iter 258 / 2000, Loss: 132079098.1875, CrossEntropy: 0.008851424790918827, Accuracy: 0.9971227621483376\n",
      "EVALUATION with last weights -> Loss: 9154815.0, CrossEntropy: 1.4657588005065918, Accuracy: 0.8208069620253164\n",
      "Elapsed time for the training: 12.916098356246948\n",
      "Iter 259 / 2000, Loss: 131370707.4375, CrossEntropy: 0.008561252616345882, Accuracy: 0.9972226662404092\n",
      "EVALUATION with last weights -> Loss: 9246570.0, CrossEntropy: 1.468483328819275, Accuracy: 0.8167523734177216\n",
      "Elapsed time for the training: 13.002938270568848\n",
      "Iter 260 / 2000, Loss: 132665711.375, CrossEntropy: 0.009073766879737377, Accuracy: 0.9971227621483376\n",
      "EVALUATION with last weights -> Loss: 9152862.0, CrossEntropy: 1.4428141117095947, Accuracy: 0.822685917721519\n",
      "Elapsed time for the training: 13.002553462982178\n",
      "Iter 261 / 2000, Loss: 129942021.9375, CrossEntropy: 0.007976280525326729, Accuracy: 0.9972626278772379\n",
      "EVALUATION with last weights -> Loss: 9612734.0, CrossEntropy: 1.5229519605636597, Accuracy: 0.8204113924050633\n",
      "Elapsed time for the training: 12.923367023468018\n",
      "Iter 262 / 2000, Loss: 133952331.03125, CrossEntropy: 0.009591903537511826, Accuracy: 0.9969389386189259\n",
      "EVALUATION with last weights -> Loss: 9295662.0, CrossEntropy: 1.4632658958435059, Accuracy: 0.8185324367088608\n",
      "Elapsed time for the training: 13.010895252227783\n",
      "Iter 263 / 2000, Loss: 129657844.625, CrossEntropy: 0.00785059854388237, Accuracy: 0.9972826086956522\n",
      "EVALUATION with last weights -> Loss: 9384496.0, CrossEntropy: 1.4914723634719849, Accuracy: 0.8209058544303798\n",
      "Elapsed time for the training: 12.931002140045166\n",
      "Iter 264 / 2000, Loss: 130227210.0, CrossEntropy: 0.008068366907536983, Accuracy: 0.9971227621483376\n",
      "EVALUATION with last weights -> Loss: 9250116.0, CrossEntropy: 1.4667799472808838, Accuracy: 0.8178401898734177\n",
      "Elapsed time for the training: 12.903663396835327\n",
      "Iter 265 / 2000, Loss: 130410127.625, CrossEntropy: 0.0081533994525671, Accuracy: 0.9971906969309463\n",
      "EVALUATION with last weights -> Loss: 9288623.0, CrossEntropy: 1.4926196336746216, Accuracy: 0.8200158227848101\n",
      "Elapsed time for the training: 12.939745664596558\n",
      "Iter 266 / 2000, Loss: 131428396.9375, CrossEntropy: 0.008534871973097324, Accuracy: 0.996962915601023\n",
      "EVALUATION with last weights -> Loss: 8961228.0, CrossEntropy: 1.438854455947876, Accuracy: 0.8185324367088608\n",
      "Elapsed time for the training: 12.902365922927856\n",
      "Iter 267 / 2000, Loss: 126688096.0, CrossEntropy: 0.006722635589540005, Accuracy: 0.997730179028133\n",
      "EVALUATION with last weights -> Loss: 10611585.0, CrossEntropy: 1.6899298429489136, Accuracy: 0.8003362341772152\n",
      "Elapsed time for the training: 12.884337902069092\n",
      "Iter 268 / 2000, Loss: 129267899.0625, CrossEntropy: 0.007675350643694401, Accuracy: 0.9974904092071611\n",
      "EVALUATION with last weights -> Loss: 9746348.0, CrossEntropy: 1.5307029485702515, Accuracy: 0.8161590189873418\n",
      "Elapsed time for the training: 12.894802808761597\n",
      "Iter 269 / 2000, Loss: 133888765.28125, CrossEntropy: 0.009518136270344257, Accuracy: 0.9967511189258312\n",
      "EVALUATION with last weights -> Loss: 9423777.0, CrossEntropy: 1.5004891157150269, Accuracy: 0.8142800632911392\n",
      "Elapsed time for the training: 12.892165184020996\n",
      "Iter 270 / 2000, Loss: 128978919.625, CrossEntropy: 0.0075392452999949455, Accuracy: 0.9974504475703325\n",
      "EVALUATION with last weights -> Loss: 9930789.0, CrossEntropy: 1.5630719661712646, Accuracy: 0.8143789556962026\n",
      "Elapsed time for the training: 12.92359471321106\n",
      "Iter 271 / 2000, Loss: 132546336.4375, CrossEntropy: 0.008953378535807133, Accuracy: 0.9969828964194374\n",
      "EVALUATION with last weights -> Loss: 9409206.0, CrossEntropy: 1.4778938293457031, Accuracy: 0.8217958860759493\n",
      "Elapsed time for the training: 12.91689682006836\n",
      "Iter 272 / 2000, Loss: 131376125.875, CrossEntropy: 0.008490893989801407, Accuracy: 0.9969908887468031\n",
      "EVALUATION with last weights -> Loss: 9354714.0, CrossEntropy: 1.4871336221694946, Accuracy: 0.8192246835443038\n",
      "Elapsed time for the training: 12.957075595855713\n",
      "Iter 273 / 2000, Loss: 128187282.9375, CrossEntropy: 0.0071892463602125645, Accuracy: 0.9973825127877238\n",
      "EVALUATION with last weights -> Loss: 9356505.0, CrossEntropy: 1.483304500579834, Accuracy: 0.8207080696202531\n",
      "Elapsed time for the training: 12.91068172454834\n",
      "Iter 274 / 2000, Loss: 131147762.125, CrossEntropy: 0.008370386436581612, Accuracy: 0.9970428388746803\n",
      "EVALUATION with last weights -> Loss: 9233097.0, CrossEntropy: 1.4524033069610596, Accuracy: 0.8150712025316456\n",
      "Elapsed time for the training: 12.962023258209229\n",
      "Iter 283 / 2000, Loss: 132621801.53125, CrossEntropy: 0.008890346623957157, Accuracy: 0.9970428388746803\n",
      "EVALUATION with last weights -> Loss: 9167690.0, CrossEntropy: 1.4617726802825928, Accuracy: 0.8174446202531646\n",
      "Elapsed time for the training: 12.904738664627075\n",
      "Iter 284 / 2000, Loss: 127975874.15625, CrossEntropy: 0.007035121787339449, Accuracy: 0.9974824168797954\n",
      "EVALUATION with last weights -> Loss: 8935016.0, CrossEntropy: 1.4057772159576416, Accuracy: 0.8182357594936709\n",
      "Elapsed time for the training: 12.95238709449768\n",
      "Iter 285 / 2000, Loss: 129862271.0625, CrossEntropy: 0.007773151155561209, Accuracy: 0.9972226662404092\n",
      "EVALUATION with last weights -> Loss: 9176118.0, CrossEntropy: 1.4576338529586792, Accuracy: 0.8159612341772152\n",
      "Elapsed time for the training: 12.920693397521973\n",
      "Iter 286 / 2000, Loss: 129658340.96875, CrossEntropy: 0.007698483299463987, Accuracy: 0.9972506393861893\n",
      "EVALUATION with last weights -> Loss: 9037840.0, CrossEntropy: 1.4317829608917236, Accuracy: 0.818631329113924\n",
      "Elapsed time for the training: 12.869200944900513\n",
      "Iter 287 / 2000, Loss: 128071474.78125, CrossEntropy: 0.007050994783639908, Accuracy: 0.9975023976982097\n",
      "EVALUATION with last weights -> Loss: 9567048.0, CrossEntropy: 1.540414571762085, Accuracy: 0.8149723101265823\n",
      "Elapsed time for the training: 12.774121522903442\n",
      "Iter 288 / 2000, Loss: 126533821.3125, CrossEntropy: 0.006421955768018961, Accuracy: 0.9977621483375959\n",
      "EVALUATION with last weights -> Loss: 9115519.0, CrossEntropy: 1.4339888095855713, Accuracy: 0.8202136075949367\n",
      "Elapsed time for the training: 13.72122073173523\n",
      "Iter 289 / 2000, Loss: 130965687.09375, CrossEntropy: 0.008186251856386662, Accuracy: 0.9972626278772379\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with last weights -> Loss: 9375432.0, CrossEntropy: 1.4859099388122559, Accuracy: 0.8152689873417721\n",
      "Elapsed time for the training: 13.00073766708374\n",
      "Iter 290 / 2000, Loss: 128109461.4375, CrossEntropy: 0.007038835436105728, Accuracy: 0.9976822250639387\n",
      "EVALUATION with last weights -> Loss: 9186470.0, CrossEntropy: 1.4457753896713257, Accuracy: 0.8228837025316456\n",
      "Elapsed time for the training: 15.147976398468018\n",
      "Iter 291 / 2000, Loss: 131383159.46875, CrossEntropy: 0.008338775485754013, Accuracy: 0.9971027813299232\n",
      "EVALUATION with last weights -> Loss: 9473822.0, CrossEntropy: 1.487909197807312, Accuracy: 0.8240704113924051\n",
      "Elapsed time for the training: 15.627595663070679\n",
      "Iter 292 / 2000, Loss: 126961373.9375, CrossEntropy: 0.00657313410192728, Accuracy: 0.997790121483376\n",
      "EVALUATION with last weights -> Loss: 9343525.0, CrossEntropy: 1.468070387840271, Accuracy: 0.8205102848101266\n",
      "Elapsed time for the training: 13.104212760925293\n",
      "Iter 293 / 2000, Loss: 129772812.5, CrossEntropy: 0.007699060719460249, Accuracy: 0.9973505434782609\n",
      "EVALUATION with last weights -> Loss: 9833352.0, CrossEntropy: 1.5406482219696045, Accuracy: 0.8140822784810127\n",
      "Elapsed time for the training: 12.885719060897827\n",
      "Iter 294 / 2000, Loss: 125182980.5625, CrossEntropy: 0.005861968267709017, Accuracy: 0.9978700447570332\n",
      "EVALUATION with last weights -> Loss: 10392342.0, CrossEntropy: 1.6322057247161865, Accuracy: 0.8135878164556962\n",
      "Elapsed time for the training: 12.92676305770874\n",
      "Iter 295 / 2000, Loss: 132908531.5625, CrossEntropy: 0.008920843712985516, Accuracy: 0.9967431265984654\n",
      "EVALUATION with last weights -> Loss: 10214529.0, CrossEntropy: 1.667484998703003, Accuracy: 0.8158623417721519\n",
      "Elapsed time for the training: 12.928923606872559\n",
      "Iter 296 / 2000, Loss: 127310411.46875, CrossEntropy: 0.006678313948214054, Accuracy: 0.9975623401534527\n",
      "EVALUATION with last weights -> Loss: 10500036.0, CrossEntropy: 1.6615703105926514, Accuracy: 0.7984572784810127\n",
      "Elapsed time for the training: 12.936721801757812\n",
      "Iter 297 / 2000, Loss: 130647963.71875, CrossEntropy: 0.008003748953342438, Accuracy: 0.9973425511508951\n",
      "EVALUATION with last weights -> Loss: 10053010.0, CrossEntropy: 1.5839512348175049, Accuracy: 0.8112143987341772\n",
      "Elapsed time for the training: 12.938900470733643\n",
      "Iter 298 / 2000, Loss: 127063732.40625, CrossEntropy: 0.00656420411542058, Accuracy: 0.9979219948849105\n",
      "EVALUATION with last weights -> Loss: 9328233.0, CrossEntropy: 1.4968234300613403, Accuracy: 0.8208069620253164\n",
      "Elapsed time for the training: 13.04664158821106\n",
      "Iter 299 / 2000, Loss: 130985502.46875, CrossEntropy: 0.008126051165163517, Accuracy: 0.9972426470588235\n",
      "EVALUATION with last weights -> Loss: 9970001.0, CrossEntropy: 1.5863027572631836, Accuracy: 0.8156645569620253\n",
      "Elapsed time for the training: 12.936011791229248\n",
      "Iter 300 / 2000, Loss: 131129195.0625, CrossEntropy: 0.008175200782716274, Accuracy: 0.9976023017902813\n",
      "EVALUATION with last weights -> Loss: 9958238.0, CrossEntropy: 1.5755424499511719, Accuracy: 0.8201147151898734\n",
      "Elapsed time for the training: 12.949319839477539\n",
      "Iter 301 / 2000, Loss: 127594189.0, CrossEntropy: 0.006755196489393711, Accuracy: 0.9977022058823529\n",
      "EVALUATION with last weights -> Loss: 9534799.0, CrossEntropy: 1.5337849855422974, Accuracy: 0.8105221518987342\n",
      "Elapsed time for the training: 12.924317598342896\n",
      "Iter 302 / 2000, Loss: 132177223.15625, CrossEntropy: 0.00858228001743555, Accuracy: 0.9971627237851662\n",
      "EVALUATION with last weights -> Loss: 9787558.0, CrossEntropy: 1.5435612201690674, Accuracy: 0.8129944620253164\n",
      "Elapsed time for the training: 12.941032886505127\n",
      "Iter 303 / 2000, Loss: 130974178.71875, CrossEntropy: 0.008139034733176231, Accuracy: 0.9971107736572891\n",
      "EVALUATION with last weights -> Loss: 9260549.0, CrossEntropy: 1.4555561542510986, Accuracy: 0.8205102848101266\n",
      "Elapsed time for the training: 14.893044233322144\n",
      "Iter 304 / 2000, Loss: 129235882.6875, CrossEntropy: 0.007390867918729782, Accuracy: 0.9973425511508951\n",
      "EVALUATION with last weights -> Loss: 9653523.0, CrossEntropy: 1.5235613584518433, Accuracy: 0.821004746835443\n",
      "Elapsed time for the training: 15.45216965675354\n",
      "Iter 305 / 2000, Loss: 130447303.375, CrossEntropy: 0.007880432531237602, Accuracy: 0.9973505434782609\n",
      "EVALUATION with last weights -> Loss: 9879742.0, CrossEntropy: 1.5698133707046509, Accuracy: 0.8113132911392406\n",
      "Elapsed time for the training: 12.877238988876343\n",
      "Iter 306 / 2000, Loss: 129918096.5, CrossEntropy: 0.00765238469466567, Accuracy: 0.9972426470588235\n",
      "EVALUATION with last weights -> Loss: 9424400.0, CrossEntropy: 1.4972110986709595, Accuracy: 0.8161590189873418\n",
      "Elapsed time for the training: 12.903746604919434\n",
      "Iter 307 / 2000, Loss: 124903061.4375, CrossEntropy: 0.005638536065816879, Accuracy: 0.9980418797953964\n",
      "EVALUATION with last weights -> Loss: 9467522.0, CrossEntropy: 1.4838225841522217, Accuracy: 0.8195213607594937\n",
      "Elapsed time for the training: 12.932996034622192\n",
      "Iter 308 / 2000, Loss: 130178035.8125, CrossEntropy: 0.007739221677184105, Accuracy: 0.9971827046035806\n",
      "EVALUATION with last weights -> Loss: 9554905.0, CrossEntropy: 1.494025707244873, Accuracy: 0.8231803797468354\n",
      "Elapsed time for the training: 12.873274803161621\n",
      "Iter 309 / 2000, Loss: 131991316.71875, CrossEntropy: 0.008456566371023655, Accuracy: 0.9972626278772379\n",
      "EVALUATION with last weights -> Loss: 9457902.0, CrossEntropy: 1.4843934774398804, Accuracy: 0.8211036392405063\n",
      "Elapsed time for the training: 12.91862940788269\n",
      "Iter 310 / 2000, Loss: 127061367.34375, CrossEntropy: 0.006480157840996981, Accuracy: 0.9979819373401535\n",
      "EVALUATION with last weights -> Loss: 10006659.0, CrossEntropy: 1.5762782096862793, Accuracy: 0.818631329113924\n",
      "Elapsed time for the training: 12.925524711608887\n",
      "Iter 311 / 2000, Loss: 130116706.125, CrossEntropy: 0.007716989144682884, Accuracy: 0.9975703324808184\n",
      "EVALUATION with last weights -> Loss: 10492500.0, CrossEntropy: 1.6555885076522827, Accuracy: 0.8044897151898734\n",
      "Elapsed time for the training: 12.917169570922852\n",
      "Iter 312 / 2000, Loss: 130796801.96875, CrossEntropy: 0.007963555864989758, Accuracy: 0.9974824168797954\n",
      "EVALUATION with last weights -> Loss: 9629058.0, CrossEntropy: 1.5297627449035645, Accuracy: 0.8182357594936709\n",
      "Elapsed time for the training: 12.909830570220947\n",
      "Iter 313 / 2000, Loss: 127891165.9375, CrossEntropy: 0.006791795138269663, Accuracy: 0.9978220907928389\n",
      "EVALUATION with last weights -> Loss: 9483475.0, CrossEntropy: 1.4922678470611572, Accuracy: 0.8220925632911392\n",
      "Elapsed time for the training: 13.36081075668335\n",
      "Iter 314 / 2000, Loss: 129306980.375, CrossEntropy: 0.0073792580515146255, Accuracy: 0.9973705242966753\n",
      "EVALUATION with last weights -> Loss: 10002568.0, CrossEntropy: 1.610047698020935, Accuracy: 0.8159612341772152\n",
      "Elapsed time for the training: 13.697514772415161\n",
      "Iter 315 / 2000, Loss: 128299465.75, CrossEntropy: 0.006997847929596901, Accuracy: 0.9977501598465474\n",
      "EVALUATION with last weights -> Loss: 9416228.0, CrossEntropy: 1.48311448097229, Accuracy: 0.8187302215189873\n",
      "Elapsed time for the training: 13.743489980697632\n",
      "Iter 316 / 2000, Loss: 127561007.59375, CrossEntropy: 0.00664207199588418, Accuracy: 0.9977022058823529\n",
      "EVALUATION with last weights -> Loss: 10241839.0, CrossEntropy: 1.6209983825683594, Accuracy: 0.8146756329113924\n",
      "Elapsed time for the training: 13.763982772827148\n",
      "Iter 317 / 2000, Loss: 129172985.96875, CrossEntropy: 0.007276910822838545, Accuracy: 0.9974624360613811\n",
      "EVALUATION with last weights -> Loss: 9452893.0, CrossEntropy: 1.4947361946105957, Accuracy: 0.8225870253164557\n",
      "Elapsed time for the training: 13.714274168014526\n",
      "Iter 318 / 2000, Loss: 127216674.3125, CrossEntropy: 0.006491407752037048, Accuracy: 0.9977821291560103\n",
      "EVALUATION with last weights -> Loss: 9313740.0, CrossEntropy: 1.479337453842163, Accuracy: 0.8283227848101266\n",
      "Elapsed time for the training: 12.895864486694336\n",
      "Iter 319 / 2000, Loss: 128572083.9375, CrossEntropy: 0.007083119358867407, Accuracy: 0.9975103900255755\n",
      "EVALUATION with last weights -> Loss: 10603963.0, CrossEntropy: 1.6719225645065308, Accuracy: 0.8087420886075949\n",
      "Elapsed time for the training: 12.901501655578613\n",
      "Iter 320 / 2000, Loss: 130824622.71875, CrossEntropy: 0.007916596718132496, Accuracy: 0.9973825127877238\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with last weights -> Loss: 10264341.0, CrossEntropy: 1.6169955730438232, Accuracy: 0.8138844936708861\n",
      "Elapsed time for the training: 12.890974760055542\n",
      "Iter 321 / 2000, Loss: 129642512.3125, CrossEntropy: 0.007436698768287897, Accuracy: 0.9976222826086957\n",
      "EVALUATION with last weights -> Loss: 9377457.0, CrossEntropy: 1.4919509887695312, Accuracy: 0.8218947784810127\n",
      "Elapsed time for the training: 12.879258394241333\n",
      "Iter 322 / 2000, Loss: 128545954.28125, CrossEntropy: 0.00699407234787941, Accuracy: 0.9976222826086957\n",
      "EVALUATION with last weights -> Loss: 9386333.0, CrossEntropy: 1.475111484527588, Accuracy: 0.8244659810126582\n",
      "Elapsed time for the training: 12.89163064956665\n",
      "Iter 323 / 2000, Loss: 127831581.90625, CrossEntropy: 0.0067006247118115425, Accuracy: 0.997582320971867\n",
      "EVALUATION with last weights -> Loss: 9479713.0, CrossEntropy: 1.504172682762146, Accuracy: 0.8254549050632911\n",
      "Elapsed time for the training: 12.901890754699707\n",
      "Iter 324 / 2000, Loss: 129545396.1875, CrossEntropy: 0.007377563044428825, Accuracy: 0.997582320971867\n",
      "EVALUATION with last weights -> Loss: 9354606.0, CrossEntropy: 1.479835033416748, Accuracy: 0.8185324367088608\n",
      "Elapsed time for the training: 12.957808017730713\n",
      "Iter 325 / 2000, Loss: 130259148.5, CrossEntropy: 0.007657900918275118, Accuracy: 0.9975423593350383\n",
      "EVALUATION with last weights -> Loss: 9345926.0, CrossEntropy: 1.484581470489502, Accuracy: 0.821993670886076\n",
      "Elapsed time for the training: 12.942137718200684\n",
      "Iter 326 / 2000, Loss: 130766444.9375, CrossEntropy: 0.007851781323552132, Accuracy: 0.9974024936061381\n",
      "EVALUATION with last weights -> Loss: 9646701.0, CrossEntropy: 1.5114712715148926, Accuracy: 0.8247626582278481\n",
      "Elapsed time for the training: 12.95819616317749\n",
      "Iter 327 / 2000, Loss: 127033849.1875, CrossEntropy: 0.006353838834911585, Accuracy: 0.9980418797953964\n",
      "EVALUATION with last weights -> Loss: 9585448.0, CrossEntropy: 1.5259249210357666, Accuracy: 0.8187302215189873\n",
      "Elapsed time for the training: 12.871651887893677\n",
      "Iter 328 / 2000, Loss: 128277741.84375, CrossEntropy: 0.006844537798315287, Accuracy: 0.9976222826086957\n",
      "EVALUATION with last weights -> Loss: 9908642.0, CrossEntropy: 1.5594843626022339, Accuracy: 0.8152689873417721\n",
      "Elapsed time for the training: 12.883202314376831\n",
      "Iter 329 / 2000, Loss: 129662023.90625, CrossEntropy: 0.0073924595490098, Accuracy: 0.9975623401534527\n",
      "EVALUATION with last weights -> Loss: 9490894.0, CrossEntropy: 1.5008738040924072, Accuracy: 0.8308939873417721\n",
      "Elapsed time for the training: 12.918488264083862\n",
      "Iter 330 / 2000, Loss: 125237448.78125, CrossEntropy: 0.005629207938909531, Accuracy: 0.9980898337595908\n",
      "EVALUATION with last weights -> Loss: 10843366.0, CrossEntropy: 1.712775707244873, Accuracy: 0.8124011075949367\n",
      "Elapsed time for the training: 12.902617931365967\n",
      "Iter 331 / 2000, Loss: 130553656.1875, CrossEntropy: 0.00773686682805419, Accuracy: 0.9973825127877238\n",
      "EVALUATION with last weights -> Loss: 9684047.0, CrossEntropy: 1.5132317543029785, Accuracy: 0.8262460443037974\n",
      "Elapsed time for the training: 12.913904190063477\n",
      "Iter 332 / 2000, Loss: 129443704.84375, CrossEntropy: 0.007286848966032267, Accuracy: 0.9976222826086957\n",
      "EVALUATION with last weights -> Loss: 9829056.0, CrossEntropy: 1.5578973293304443, Accuracy: 0.8208069620253164\n",
      "Elapsed time for the training: 12.840777158737183\n",
      "Iter 333 / 2000, Loss: 127678132.5625, CrossEntropy: 0.0065726484172046185, Accuracy: 0.9978220907928389\n",
      "EVALUATION with last weights -> Loss: 9703977.0, CrossEntropy: 1.5257872343063354, Accuracy: 0.818631329113924\n",
      "Elapsed time for the training: 13.261230230331421\n",
      "Iter 334 / 2000, Loss: 127963898.78125, CrossEntropy: 0.0066796597093343735, Accuracy: 0.9976023017902813\n",
      "EVALUATION with last weights -> Loss: 9318608.0, CrossEntropy: 1.4878219366073608, Accuracy: 0.8227848101265823\n",
      "Elapsed time for the training: 12.941148042678833\n",
      "Iter 335 / 2000, Loss: 127222881.3125, CrossEntropy: 0.006377364508807659, Accuracy: 0.9979419757033248\n",
      "EVALUATION with last weights -> Loss: 9940812.0, CrossEntropy: 1.5756293535232544, Accuracy: 0.8206091772151899\n",
      "Elapsed time for the training: 12.898920059204102\n",
      "Iter 336 / 2000, Loss: 127193399.40625, CrossEntropy: 0.006370733492076397, Accuracy: 0.9978300831202046\n",
      "EVALUATION with last weights -> Loss: 9968120.0, CrossEntropy: 1.5916825532913208, Accuracy: 0.8227848101265823\n",
      "Elapsed time for the training: 12.886255025863647\n",
      "Iter 337 / 2000, Loss: 127104961.6875, CrossEntropy: 0.006326466798782349, Accuracy: 0.9979499680306906\n",
      "EVALUATION with last weights -> Loss: 9975790.0, CrossEntropy: 1.580238699913025, Accuracy: 0.8168512658227848\n",
      "Elapsed time for the training: 12.913516759872437\n",
      "Iter 338 / 2000, Loss: 129284703.875, CrossEntropy: 0.007180511485785246, Accuracy: 0.9973425511508951\n",
      "EVALUATION with last weights -> Loss: 9690195.0, CrossEntropy: 1.5150043964385986, Accuracy: 0.8218947784810127\n",
      "Elapsed time for the training: 12.941089153289795\n",
      "Iter 339 / 2000, Loss: 129496435.5, CrossEntropy: 0.007258433848619461, Accuracy: 0.9974024936061381\n",
      "EVALUATION with last weights -> Loss: 9884702.0, CrossEntropy: 1.554919719696045, Accuracy: 0.8225870253164557\n",
      "Elapsed time for the training: 12.92382550239563\n",
      "Iter 340 / 2000, Loss: 124015429.375, CrossEntropy: 0.0050620026886463165, Accuracy: 0.9983016304347826\n",
      "EVALUATION with last weights -> Loss: 10093233.0, CrossEntropy: 1.5816564559936523, Accuracy: 0.8177412974683544\n",
      "Elapsed time for the training: 12.879083156585693\n",
      "Iter 341 / 2000, Loss: 126913474.46875, CrossEntropy: 0.0062206825241446495, Accuracy: 0.9976822250639387\n",
      "EVALUATION with last weights -> Loss: 9666954.0, CrossEntropy: 1.5206395387649536, Accuracy: 0.8207080696202531\n",
      "Elapsed time for the training: 13.703478574752808\n",
      "Iter 342 / 2000, Loss: 130998727.5, CrossEntropy: 0.007840776816010475, Accuracy: 0.997582320971867\n",
      "EVALUATION with last weights -> Loss: 10120694.0, CrossEntropy: 1.5982238054275513, Accuracy: 0.8137856012658228\n",
      "Elapsed time for the training: 13.72253966331482\n",
      "Iter 343 / 2000, Loss: 127318387.6875, CrossEntropy: 0.006362654734402895, Accuracy: 0.9980418797953964\n",
      "EVALUATION with last weights -> Loss: 9899625.0, CrossEntropy: 1.5468794107437134, Accuracy: 0.8209058544303798\n",
      "Elapsed time for the training: 13.301363706588745\n",
      "Iter 344 / 2000, Loss: 130396494.75, CrossEntropy: 0.00758755300194025, Accuracy: 0.9977821291560103\n",
      "EVALUATION with last weights -> Loss: 10180706.0, CrossEntropy: 1.5913277864456177, Accuracy: 0.817246835443038\n",
      "Elapsed time for the training: 12.9401216506958\n",
      "Iter 345 / 2000, Loss: 129618794.65625, CrossEntropy: 0.007268066983669996, Accuracy: 0.9974824168797954\n",
      "EVALUATION with last weights -> Loss: 9787111.0, CrossEntropy: 1.5410587787628174, Accuracy: 0.8198180379746836\n",
      "Elapsed time for the training: 12.868703365325928\n",
      "Iter 346 / 2000, Loss: 128232766.90625, CrossEntropy: 0.006707593332976103, Accuracy: 0.9978620524296675\n",
      "EVALUATION with last weights -> Loss: 10188229.0, CrossEntropy: 1.6024022102355957, Accuracy: 0.810818829113924\n",
      "Elapsed time for the training: 12.926656246185303\n",
      "Iter 347 / 2000, Loss: 130039621.34375, CrossEntropy: 0.007437289692461491, Accuracy: 0.9976102941176471\n",
      "EVALUATION with last weights -> Loss: 9808371.0, CrossEntropy: 1.5361205339431763, Accuracy: 0.8215981012658228\n",
      "Elapsed time for the training: 12.920756816864014\n",
      "Iter 348 / 2000, Loss: 128009552.3125, CrossEntropy: 0.0066175744868814945, Accuracy: 0.997790121483376\n",
      "EVALUATION with last weights -> Loss: 10023946.0, CrossEntropy: 1.5892375707626343, Accuracy: 0.8200158227848101\n",
      "Elapsed time for the training: 12.905712366104126\n",
      "Iter 349 / 2000, Loss: 125817593.75, CrossEntropy: 0.005729936063289642, Accuracy: 0.9980218989769821\n",
      "EVALUATION with last weights -> Loss: 10003139.0, CrossEntropy: 1.5638642311096191, Accuracy: 0.8227848101265823\n",
      "Elapsed time for the training: 12.935478448867798\n",
      "Iter 350 / 2000, Loss: 127747206.875, CrossEntropy: 0.006524983327835798, Accuracy: 0.9978900255754476\n",
      "EVALUATION with last weights -> Loss: 9953182.0, CrossEntropy: 1.555293321609497, Accuracy: 0.8178401898734177\n",
      "Elapsed time for the training: 12.865553379058838\n",
      "Iter 351 / 2000, Loss: 128255969.125, CrossEntropy: 0.006703865248709917, Accuracy: 0.9977701406649616\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with last weights -> Loss: 10009249.0, CrossEntropy: 1.5714915990829468, Accuracy: 0.8205102848101266\n",
      "Elapsed time for the training: 12.870743036270142\n",
      "Iter 352 / 2000, Loss: 128533390.09375, CrossEntropy: 0.00678919767960906, Accuracy: 0.9978220907928389\n",
      "EVALUATION with last weights -> Loss: 10161385.0, CrossEntropy: 1.6092904806137085, Accuracy: 0.8199169303797469\n",
      "Elapsed time for the training: 12.849300384521484\n",
      "Iter 353 / 2000, Loss: 125531894.84375, CrossEntropy: 0.00559941865503788, Accuracy: 0.9980498721227622\n",
      "EVALUATION with last weights -> Loss: 10751108.0, CrossEntropy: 1.6979578733444214, Accuracy: 0.8177412974683544\n",
      "Elapsed time for the training: 12.868665218353271\n",
      "Iter 354 / 2000, Loss: 127414506.3125, CrossEntropy: 0.006328632589429617, Accuracy: 0.9978820332480819\n",
      "EVALUATION with last weights -> Loss: 9501694.0, CrossEntropy: 1.4980194568634033, Accuracy: 0.8232792721518988\n",
      "Elapsed time for the training: 12.911387920379639\n",
      "Iter 355 / 2000, Loss: 127731859.46875, CrossEntropy: 0.006449076347053051, Accuracy: 0.9978420716112532\n",
      "EVALUATION with last weights -> Loss: 9756685.0, CrossEntropy: 1.542198657989502, Accuracy: 0.8198180379746836\n",
      "Elapsed time for the training: 12.91837477684021\n",
      "Iter 356 / 2000, Loss: 127286992.1875, CrossEntropy: 0.0062650046311318874, Accuracy: 0.9976622442455243\n",
      "EVALUATION with last weights -> Loss: 10160052.0, CrossEntropy: 1.6205228567123413, Accuracy: 0.8203125\n",
      "Elapsed time for the training: 12.826627492904663\n",
      "Iter 357 / 2000, Loss: 126350252.40625, CrossEntropy: 0.005884513724595308, Accuracy: 0.9978420716112532\n",
      "EVALUATION with last weights -> Loss: 10206134.0, CrossEntropy: 1.6032609939575195, Accuracy: 0.8147745253164557\n",
      "Elapsed time for the training: 12.918511629104614\n",
      "Iter 358 / 2000, Loss: 128822019.90625, CrossEntropy: 0.006871846038848162, Accuracy: 0.9978620524296675\n",
      "EVALUATION with last weights -> Loss: 10645444.0, CrossEntropy: 1.674417495727539, Accuracy: 0.8128955696202531\n",
      "Elapsed time for the training: 13.237932443618774\n",
      "Iter 359 / 2000, Loss: 125788787.71875, CrossEntropy: 0.005647319834679365, Accuracy: 0.9980818414322251\n",
      "EVALUATION with last weights -> Loss: 11231314.0, CrossEntropy: 1.7724729776382446, Accuracy: 0.8072587025316456\n",
      "Elapsed time for the training: 12.896206855773926\n",
      "Iter 360 / 2000, Loss: 129683383.84375, CrossEntropy: 0.007196149323135614, Accuracy: 0.9977022058823529\n",
      "EVALUATION with last weights -> Loss: 10424096.0, CrossEntropy: 1.6438379287719727, Accuracy: 0.8120055379746836\n",
      "Elapsed time for the training: 12.915871381759644\n",
      "Iter 361 / 2000, Loss: 125969932.125, CrossEntropy: 0.005705615039914846, Accuracy: 0.9983016304347826\n",
      "EVALUATION with last weights -> Loss: 10646908.0, CrossEntropy: 1.6820358037948608, Accuracy: 0.8136867088607594\n",
      "Elapsed time for the training: 12.882551670074463\n",
      "Iter 362 / 2000, Loss: 130226095.40625, CrossEntropy: 0.007400318514555693, Accuracy: 0.9976023017902813\n",
      "EVALUATION with last weights -> Loss: 9736660.0, CrossEntropy: 1.5213639736175537, Accuracy: 0.8204113924050633\n",
      "Elapsed time for the training: 12.9349365234375\n",
      "Iter 363 / 2000, Loss: 123603879.15625, CrossEntropy: 0.00477063562721014, Accuracy: 0.9983895460358057\n",
      "EVALUATION with last weights -> Loss: 10395984.0, CrossEntropy: 1.6606214046478271, Accuracy: 0.8159612341772152\n",
      "Elapsed time for the training: 12.887778282165527\n",
      "Iter 364 / 2000, Loss: 132410130.46875, CrossEntropy: 0.008280777372419834, Accuracy: 0.9974704283887468\n",
      "EVALUATION with last weights -> Loss: 9949406.0, CrossEntropy: 1.5946604013442993, Accuracy: 0.8099287974683544\n",
      "Elapsed time for the training: 12.877750873565674\n",
      "Iter 365 / 2000, Loss: 122664084.5625, CrossEntropy: 0.00436421250924468, Accuracy: 0.9985214194373402\n",
      "EVALUATION with last weights -> Loss: 10223210.0, CrossEntropy: 1.6161162853240967, Accuracy: 0.821993670886076\n",
      "Elapsed time for the training: 12.893802404403687\n",
      "Iter 366 / 2000, Loss: 130397602.21875, CrossEntropy: 0.007442339323461056, Accuracy: 0.9976023017902813\n",
      "EVALUATION with last weights -> Loss: 10348503.0, CrossEntropy: 1.6318937540054321, Accuracy: 0.8221914556962026\n",
      "Elapsed time for the training: 13.18391489982605\n",
      "Iter 367 / 2000, Loss: 125759983.875, CrossEntropy: 0.005583017133176327, Accuracy: 0.9982217071611253\n",
      "EVALUATION with last weights -> Loss: 9818314.0, CrossEntropy: 1.5651865005493164, Accuracy: 0.8200158227848101\n",
      "Elapsed time for the training: 13.139641523361206\n",
      "Iter 368 / 2000, Loss: 125345302.375, CrossEntropy: 0.0054138824343681335, Accuracy: 0.9981018222506394\n",
      "EVALUATION with last weights -> Loss: 9989149.0, CrossEntropy: 1.5654373168945312, Accuracy: 0.8230814873417721\n",
      "Elapsed time for the training: 12.903356552124023\n",
      "Iter 369 / 2000, Loss: 128454173.90625, CrossEntropy: 0.006645543966442347, Accuracy: 0.9976222826086957\n",
      "EVALUATION with last weights -> Loss: 10408443.0, CrossEntropy: 1.644415259361267, Accuracy: 0.8157634493670886\n",
      "Elapsed time for the training: 12.897983312606812\n",
      "Iter 370 / 2000, Loss: 129357852.34375, CrossEntropy: 0.0070124524645507336, Accuracy: 0.9977701406649616\n",
      "EVALUATION with last weights -> Loss: 10300080.0, CrossEntropy: 1.6158974170684814, Accuracy: 0.8228837025316456\n",
      "Elapsed time for the training: 12.902213335037231\n",
      "Iter 371 / 2000, Loss: 126013666.28125, CrossEntropy: 0.005657332018017769, Accuracy: 0.9980218989769821\n",
      "EVALUATION with last weights -> Loss: 10243701.0, CrossEntropy: 1.6238305568695068, Accuracy: 0.8207080696202531\n",
      "Elapsed time for the training: 12.868656396865845\n",
      "Iter 372 / 2000, Loss: 130585522.8125, CrossEntropy: 0.00749992486089468, Accuracy: 0.9975103900255755\n",
      "EVALUATION with last weights -> Loss: 10305289.0, CrossEntropy: 1.6194941997528076, Accuracy: 0.8234770569620253\n",
      "Elapsed time for the training: 12.917139768600464\n",
      "Iter 373 / 2000, Loss: 129596371.3125, CrossEntropy: 0.007076582871377468, Accuracy: 0.9979619565217391\n",
      "EVALUATION with last weights -> Loss: 9765917.0, CrossEntropy: 1.5772180557250977, Accuracy: 0.8161590189873418\n",
      "Elapsed time for the training: 12.965441942214966\n",
      "Iter 374 / 2000, Loss: 124555159.46875, CrossEntropy: 0.0050778863951563835, Accuracy: 0.998229699488491\n",
      "EVALUATION with last weights -> Loss: 10215077.0, CrossEntropy: 1.6097874641418457, Accuracy: 0.8160601265822784\n",
      "Elapsed time for the training: 12.90359878540039\n",
      "Iter 375 / 2000, Loss: 129194120.03125, CrossEntropy: 0.00692541990429163, Accuracy: 0.9977501598465474\n",
      "EVALUATION with last weights -> Loss: 10036284.0, CrossEntropy: 1.5796366930007935, Accuracy: 0.817939082278481\n",
      "Elapsed time for the training: 12.911117553710938\n",
      "Iter 376 / 2000, Loss: 126793454.6875, CrossEntropy: 0.0059503293596208096, Accuracy: 0.9977101982097187\n",
      "EVALUATION with last weights -> Loss: 10398361.0, CrossEntropy: 1.639751672744751, Accuracy: 0.8193235759493671\n",
      "Elapsed time for the training: 12.909155368804932\n",
      "Iter 377 / 2000, Loss: 129337568.875, CrossEntropy: 0.006957958452403545, Accuracy: 0.9977022058823529\n",
      "EVALUATION with last weights -> Loss: 9791368.0, CrossEntropy: 1.5648857355117798, Accuracy: 0.8229825949367089\n",
      "Elapsed time for the training: 12.9928457736969\n",
      "Iter 378 / 2000, Loss: 126457004.53125, CrossEntropy: 0.005790227558463812, Accuracy: 0.9980818414322251\n",
      "EVALUATION with last weights -> Loss: 10120695.0, CrossEntropy: 1.5890880823135376, Accuracy: 0.8194224683544303\n",
      "Elapsed time for the training: 12.880090713500977\n",
      "Iter 379 / 2000, Loss: 126930854.625, CrossEntropy: 0.0059729465283453465, Accuracy: 0.9980019181585678\n",
      "EVALUATION with last weights -> Loss: 10276851.0, CrossEntropy: 1.618215799331665, Accuracy: 0.8180379746835443\n",
      "Elapsed time for the training: 12.936304330825806\n",
      "Iter 380 / 2000, Loss: 126149314.0, CrossEntropy: 0.00565381720662117, Accuracy: 0.9978620524296675\n",
      "EVALUATION with last weights -> Loss: 10144395.0, CrossEntropy: 1.6047545671463013, Accuracy: 0.8208069620253164\n",
      "Elapsed time for the training: 12.881048202514648\n",
      "Iter 381 / 2000, Loss: 126640258.46875, CrossEntropy: 0.005897744558751583, Accuracy: 0.9981297953964194\n",
      "EVALUATION with last weights -> Loss: 9918063.0, CrossEntropy: 1.5631966590881348, Accuracy: 0.8203125\n",
      "Elapsed time for the training: 12.923047542572021\n",
      "Iter 382 / 2000, Loss: 129738206.625, CrossEntropy: 0.007075686007738113, Accuracy: 0.9975423593350383\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with last weights -> Loss: 10060689.0, CrossEntropy: 1.583447813987732, Accuracy: 0.8265427215189873\n",
      "Elapsed time for the training: 12.925147771835327\n",
      "Iter 383 / 2000, Loss: 124828103.0, CrossEntropy: 0.005106810480356216, Accuracy: 0.998321611253197\n",
      "EVALUATION with last weights -> Loss: 10057600.0, CrossEntropy: 1.5941742658615112, Accuracy: 0.8207080696202531\n",
      "Elapsed time for the training: 12.893893480300903\n",
      "Iter 384 / 2000, Loss: 128162871.28125, CrossEntropy: 0.0064329709857702255, Accuracy: 0.9977022058823529\n",
      "EVALUATION with last weights -> Loss: 9755256.0, CrossEntropy: 1.530897617340088, Accuracy: 0.8260482594936709\n",
      "Elapsed time for the training: 12.993441343307495\n",
      "Iter 385 / 2000, Loss: 124593470.28125, CrossEntropy: 0.005002209451049566, Accuracy: 0.9984414961636828\n",
      "EVALUATION with last weights -> Loss: 10218348.0, CrossEntropy: 1.6061084270477295, Accuracy: 0.8260482594936709\n",
      "Elapsed time for the training: 12.88495421409607\n",
      "Iter 386 / 2000, Loss: 128695057.53125, CrossEntropy: 0.0066395592875778675, Accuracy: 0.9978820332480819\n",
      "EVALUATION with last weights -> Loss: 9932611.0, CrossEntropy: 1.570091724395752, Accuracy: 0.8151700949367089\n",
      "Elapsed time for the training: 12.87815809249878\n",
      "Iter 387 / 2000, Loss: 127049467.1875, CrossEntropy: 0.005991217214614153, Accuracy: 0.9980099104859336\n",
      "EVALUATION with last weights -> Loss: 9765152.0, CrossEntropy: 1.5258543491363525, Accuracy: 0.8259493670886076\n",
      "Elapsed time for the training: 12.863625288009644\n",
      "Iter 388 / 2000, Loss: 125301179.0625, CrossEntropy: 0.005264745093882084, Accuracy: 0.9981218030690537\n",
      "EVALUATION with last weights -> Loss: 9997350.0, CrossEntropy: 1.568246841430664, Accuracy: 0.8232792721518988\n",
      "Elapsed time for the training: 12.870049476623535\n",
      "Iter 389 / 2000, Loss: 129791853.71875, CrossEntropy: 0.007054712623357773, Accuracy: 0.9979020140664961\n",
      "EVALUATION with last weights -> Loss: 10871298.0, CrossEntropy: 1.7215416431427002, Accuracy: 0.8116099683544303\n",
      "Elapsed time for the training: 15.069823265075684\n",
      "Iter 390 / 2000, Loss: 125846601.375, CrossEntropy: 0.005469162482768297, Accuracy: 0.998141783887468\n",
      "EVALUATION with last weights -> Loss: 10188087.0, CrossEntropy: 1.6527491807937622, Accuracy: 0.8132911392405063\n",
      "Elapsed time for the training: 15.728931903839111\n",
      "Iter 391 / 2000, Loss: 126131750.5, CrossEntropy: 0.005605083424597979, Accuracy: 0.9979499680306906\n",
      "EVALUATION with last weights -> Loss: 11302641.0, CrossEntropy: 1.7812029123306274, Accuracy: 0.8165545886075949\n",
      "Elapsed time for the training: 15.618761777877808\n",
      "Iter 392 / 2000, Loss: 125325928.5, CrossEntropy: 0.005248573608696461, Accuracy: 0.9983016304347826\n",
      "EVALUATION with last weights -> Loss: 10388029.0, CrossEntropy: 1.6458369493484497, Accuracy: 0.8181368670886076\n",
      "Elapsed time for the training: 15.587686777114868\n",
      "Iter 393 / 2000, Loss: 127702481.8125, CrossEntropy: 0.006191676016896963, Accuracy: 0.9979419757033248\n",
      "EVALUATION with last weights -> Loss: 9908714.0, CrossEntropy: 1.5626249313354492, Accuracy: 0.8217958860759493\n",
      "Elapsed time for the training: 15.013793706893921\n",
      "Iter 394 / 2000, Loss: 129335137.34375, CrossEntropy: 0.00685748178511858, Accuracy: 0.9979699488491048\n",
      "EVALUATION with last weights -> Loss: 10438959.0, CrossEntropy: 1.6436527967453003, Accuracy: 0.8100276898734177\n",
      "Elapsed time for the training: 14.97872018814087\n",
      "Iter 395 / 2000, Loss: 124404374.84375, CrossEntropy: 0.004862012807279825, Accuracy: 0.9984614769820972\n",
      "EVALUATION with last weights -> Loss: 10319081.0, CrossEntropy: 1.6444464921951294, Accuracy: 0.8178401898734177\n",
      "Elapsed time for the training: 13.262312889099121\n",
      "Iter 396 / 2000, Loss: 128289648.625, CrossEntropy: 0.0064124902710318565, Accuracy: 0.9977421675191815\n",
      "EVALUATION with last weights -> Loss: 10654485.0, CrossEntropy: 1.676444172859192, Accuracy: 0.8216969936708861\n",
      "Elapsed time for the training: 13.753372192382812\n",
      "Iter 397 / 2000, Loss: 127886739.0625, CrossEntropy: 0.006261087022721767, Accuracy: 0.9978300831202046\n",
      "EVALUATION with last weights -> Loss: 11243174.0, CrossEntropy: 1.7843751907348633, Accuracy: 0.8085443037974683\n",
      "Elapsed time for the training: 13.716473817825317\n",
      "Iter 398 / 2000, Loss: 124472527.875, CrossEntropy: 0.004871899727731943, Accuracy: 0.9981218030690537\n",
      "EVALUATION with last weights -> Loss: 10343049.0, CrossEntropy: 1.6266930103302002, Accuracy: 0.8214003164556962\n",
      "Elapsed time for the training: 13.701019525527954\n",
      "Iter 399 / 2000, Loss: 127716463.96875, CrossEntropy: 0.006160645745694637, Accuracy: 0.9980019181585678\n",
      "EVALUATION with last weights -> Loss: 10185821.0, CrossEntropy: 1.591801643371582, Accuracy: 0.823378164556962\n",
      "Elapsed time for the training: 12.956377983093262\n",
      "Iter 400 / 2000, Loss: 129347363.78125, CrossEntropy: 0.006849770434200764, Accuracy: 0.997730179028133\n",
      "EVALUATION with last weights -> Loss: 10549466.0, CrossEntropy: 1.659842848777771, Accuracy: 0.8114121835443038\n",
      "Elapsed time for the training: 12.877092599868774\n",
      "Iter 401 / 2000, Loss: 125880949.28125, CrossEntropy: 0.0054871742613613605, Accuracy: 0.9980978260869566\n",
      "EVALUATION with last weights -> Loss: 11888767.0, CrossEntropy: 1.8716989755630493, Accuracy: 0.8017207278481012\n",
      "Elapsed time for the training: 12.89314866065979\n",
      "Iter 402 / 2000, Loss: 128333609.34375, CrossEntropy: 0.006388964131474495, Accuracy: 0.9977821291560103\n",
      "EVALUATION with last weights -> Loss: 10102839.0, CrossEntropy: 1.6152713298797607, Accuracy: 0.8198180379746836\n",
      "Elapsed time for the training: 12.898398637771606\n",
      "Iter 403 / 2000, Loss: 126103789.875, CrossEntropy: 0.005491205491125584, Accuracy: 0.9981218030690537\n",
      "EVALUATION with last weights -> Loss: 9949777.0, CrossEntropy: 1.5826407670974731, Accuracy: 0.8242681962025317\n",
      "Elapsed time for the training: 13.348853826522827\n",
      "Iter 404 / 2000, Loss: 123757972.5, CrossEntropy: 0.004548217635601759, Accuracy: 0.9985813618925832\n",
      "EVALUATION with last weights -> Loss: 10014695.0, CrossEntropy: 1.581870675086975, Accuracy: 0.8270371835443038\n",
      "Elapsed time for the training: 12.895086526870728\n",
      "Iter 405 / 2000, Loss: 127620601.03125, CrossEntropy: 0.006085785571485758, Accuracy: 0.9979219948849105\n",
      "EVALUATION with last weights -> Loss: 10774811.0, CrossEntropy: 1.6836817264556885, Accuracy: 0.8177412974683544\n",
      "Elapsed time for the training: 12.956045389175415\n",
      "Iter 406 / 2000, Loss: 129536600.53125, CrossEntropy: 0.006844594143331051, Accuracy: 0.9978420716112532\n",
      "EVALUATION with last weights -> Loss: 10545557.0, CrossEntropy: 1.6602798700332642, Accuracy: 0.817246835443038\n",
      "EVALUATION with last weights -> Loss: 9989349.0, CrossEntropy: 1.57053804397583, Accuracy: 0.8211036392405063\n",
      "Elapsed time for the training: 12.941846132278442\n",
      "Iter 416 / 2000, Loss: 123568298.25, CrossEntropy: 0.004398960620164871, Accuracy: 0.9984015345268542\n",
      "EVALUATION with last weights -> Loss: 10264745.0, CrossEntropy: 1.6073544025421143, Accuracy: 0.8221914556962026\n",
      "Elapsed time for the training: 12.901713132858276\n",
      "Iter 417 / 2000, Loss: 126127899.6875, CrossEntropy: 0.005420568864792585, Accuracy: 0.9982416879795396\n",
      "EVALUATION with last weights -> Loss: 9979412.0, CrossEntropy: 1.5695613622665405, Accuracy: 0.8220925632911392\n",
      "Elapsed time for the training: 12.863025903701782\n",
      "Iter 418 / 2000, Loss: 130369706.75, CrossEntropy: 0.007104484364390373, Accuracy: 0.9975423593350383\n",
      "EVALUATION with last weights -> Loss: 10102725.0, CrossEntropy: 1.5859071016311646, Accuracy: 0.8238726265822784\n",
      "Elapsed time for the training: 12.86748456954956\n",
      "Iter 419 / 2000, Loss: 125735581.0, CrossEntropy: 0.005251000635325909, Accuracy: 0.9981817455242967\n",
      "EVALUATION with last weights -> Loss: 10816737.0, CrossEntropy: 1.7202558517456055, Accuracy: 0.8202136075949367\n",
      "Elapsed time for the training: 12.863267660140991\n",
      "Iter 420 / 2000, Loss: 125057528.28125, CrossEntropy: 0.0049856496043503284, Accuracy: 0.9982696611253197\n",
      "EVALUATION with last weights -> Loss: 10157926.0, CrossEntropy: 1.6027069091796875, Accuracy: 0.8235759493670886\n",
      "Elapsed time for the training: 12.892170906066895\n",
      "Iter 421 / 2000, Loss: 126326038.09375, CrossEntropy: 0.005469788797199726, Accuracy: 0.9980618606138107\n",
      "EVALUATION with last weights -> Loss: 10248261.0, CrossEntropy: 1.642777681350708, Accuracy: 0.8235759493670886\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time for the training: 12.968297004699707\n",
      "Iter 422 / 2000, Loss: 126855452.125, CrossEntropy: 0.005675681866705418, Accuracy: 0.9983415920716112\n",
      "EVALUATION with last weights -> Loss: 10300001.0, CrossEntropy: 1.6640965938568115, Accuracy: 0.8224881329113924\n",
      "Elapsed time for the training: 13.124780178070068\n",
      "Iter 423 / 2000, Loss: 128509872.1875, CrossEntropy: 0.006334033329039812, Accuracy: 0.9979619565217391\n",
      "EVALUATION with last weights -> Loss: 10068380.0, CrossEntropy: 1.5937856435775757, Accuracy: 0.8191257911392406\n",
      "Elapsed time for the training: 12.94884443283081\n",
      "Iter 424 / 2000, Loss: 130227521.28125, CrossEntropy: 0.007011170964688063, Accuracy: 0.9978220907928389\n",
      "EVALUATION with last weights -> Loss: 10382852.0, CrossEntropy: 1.6412749290466309, Accuracy: 0.8238726265822784\n",
      "Elapsed time for the training: 12.864009618759155\n",
      "Iter 425 / 2000, Loss: 124631632.65625, CrossEntropy: 0.004768440965563059, Accuracy: 0.9985414002557544\n",
      "EVALUATION with last weights -> Loss: 10753355.0, CrossEntropy: 1.717538833618164, Accuracy: 0.8183346518987342\n",
      "Elapsed time for the training: 14.306735754013062\n",
      "Iter 426 / 2000, Loss: 123644750.4375, CrossEntropy: 0.00436909357085824, Accuracy: 0.9984814578005116\n",
      "EVALUATION with last weights -> Loss: 11088460.0, CrossEntropy: 1.7685060501098633, Accuracy: 0.8183346518987342\n",
      "Elapsed time for the training: 15.66141128540039\n",
      "Iter 427 / 2000, Loss: 127725384.125, CrossEntropy: 0.005994105711579323, Accuracy: 0.9978820332480819\n",
      "EVALUATION with last weights -> Loss: 10695114.0, CrossEntropy: 1.6720309257507324, Accuracy: 0.8203125\n",
      "Elapsed time for the training: 15.610352277755737\n",
      "Iter 428 / 2000, Loss: 129679114.71875, CrossEntropy: 0.006766890175640583, Accuracy: 0.9976622442455243\n",
      "EVALUATION with last weights -> Loss: 10725558.0, CrossEntropy: 1.685563564300537, Accuracy: 0.8207080696202531\n",
      "Elapsed time for the training: 15.602608680725098\n",
      "Iter 429 / 2000, Loss: 127813952.75, CrossEntropy: 0.0060164835304021835, Accuracy: 0.9979020140664961\n",
      "EVALUATION with last weights -> Loss: 10809616.0, CrossEntropy: 1.7056728601455688, Accuracy: 0.8234770569620253\n",
      "Elapsed time for the training: 15.435593605041504\n",
      "Iter 430 / 2000, Loss: 125321534.90625, CrossEntropy: 0.00501441303640604, Accuracy: 0.9983415920716112\n",
      "EVALUATION with last weights -> Loss: 10592902.0, CrossEntropy: 1.6651619672775269, Accuracy: 0.8189280063291139\n",
      "Elapsed time for the training: 14.238465785980225\n",
      "Iter 431 / 2000, Loss: 129654597.25, CrossEntropy: 0.0067447153851389885, Accuracy: 0.9976822250639387\n",
      "EVALUATION with last weights -> Loss: 11100601.0, CrossEntropy: 1.7566969394683838, Accuracy: 0.8202136075949367\n",
      "Elapsed time for the training: 13.784281492233276\n",
      "Iter 432 / 2000, Loss: 125952734.46875, CrossEntropy: 0.005254547111690044, Accuracy: 0.9981817455242967\n",
      "EVALUATION with last weights -> Loss: 10736645.0, CrossEntropy: 1.6870498657226562, Accuracy: 0.8217958860759493\n",
      "Elapsed time for the training: 13.694016456604004\n",
      "Iter 433 / 2000, Loss: 128065765.90625, CrossEntropy: 0.006096780300140381, Accuracy: 0.9981617647058824\n",
      "EVALUATION with last weights -> Loss: 10696155.0, CrossEntropy: 1.6850470304489136, Accuracy: 0.822685917721519\n",
      "Elapsed time for the training: 13.743466854095459\n",
      "Iter 434 / 2000, Loss: 124409323.5, CrossEntropy: 0.004624700639396906, Accuracy: 0.998201726342711\n",
      "EVALUATION with last weights -> Loss: 11047812.0, CrossEntropy: 1.7618569135665894, Accuracy: 0.817246835443038\n",
      "Elapsed time for the training: 13.6849524974823\n",
      "Iter 435 / 2000, Loss: 126190758.96875, CrossEntropy: 0.005331362597644329, Accuracy: 0.998261668797954\n",
      "EVALUATION with last weights -> Loss: 10228904.0, CrossEntropy: 1.6549068689346313, Accuracy: 0.8209058544303798\n",
      "Elapsed time for the training: 14.112903356552124\n",
      "Iter 436 / 2000, Loss: 125301155.59375, CrossEntropy: 0.004977303557097912, Accuracy: 0.9984215153452686\n",
      "EVALUATION with last weights -> Loss: 10471158.0, CrossEntropy: 1.6679692268371582, Accuracy: 0.8231803797468354\n",
      "Elapsed time for the training: 11.815425872802734\n",
      "Iter 437 / 2000, Loss: 124321038.8125, CrossEntropy: 0.004654074553400278, Accuracy: 0.9984494884910486\n",
      "EVALUATION with last weights -> Loss: 10811818.0, CrossEntropy: 1.72618567943573, Accuracy: 0.8192246835443038\n",
      "Elapsed time for the training: 11.847947120666504\n",
      "Iter 438 / 2000, Loss: 127739720.46875, CrossEntropy: 0.005932426545768976, Accuracy: 0.9978620524296675\n",
      "EVALUATION with last weights -> Loss: 11455197.0, CrossEntropy: 1.8162059783935547, Accuracy: 0.8120055379746836\n",
      "Elapsed time for the training: 11.814680337905884\n",
      "Iter 439 / 2000, Loss: 126047469.28125, CrossEntropy: 0.005252836737781763, Accuracy: 0.998321611253197\n",
      "EVALUATION with last weights -> Loss: 10860125.0, CrossEntropy: 1.7040174007415771, Accuracy: 0.8162579113924051\n",
      "Elapsed time for the training: 11.802057981491089\n",
      "Iter 440 / 2000, Loss: 125105493.34375, CrossEntropy: 0.0048683155328035355, Accuracy: 0.9983415920716112\n",
      "EVALUATION with last weights -> Loss: 10280185.0, CrossEntropy: 1.638291358947754, Accuracy: 0.8188291139240507\n",
      "Elapsed time for the training: 11.817022323608398\n",
      "Iter 441 / 2000, Loss: 125321006.78125, CrossEntropy: 0.0049483999609947205, Accuracy: 0.9983016304347826\n",
      "EVALUATION with last weights -> Loss: 11116477.0, CrossEntropy: 1.7527954578399658, Accuracy: 0.821004746835443\n",
      "Elapsed time for the training: 11.808303117752075\n",
      "Iter 442 / 2000, Loss: 130058469.59375, CrossEntropy: 0.006835045292973518, Accuracy: 0.9976622442455243\n",
      "EVALUATION with last weights -> Loss: 10753505.0, CrossEntropy: 1.7285224199295044, Accuracy: 0.818631329113924\n",
      "Elapsed time for the training: 11.801682472229004\n",
      "Iter 443 / 2000, Loss: 125589896.6875, CrossEntropy: 0.0050447010435163975, Accuracy: 0.9981018222506394\n",
      "EVALUATION with last weights -> Loss: 10840864.0, CrossEntropy: 1.6938939094543457, Accuracy: 0.8211036392405063\n",
      "Elapsed time for the training: 12.219557523727417\n",
      "Iter 444 / 2000, Loss: 130088814.21875, CrossEntropy: 0.007037715055048466, Accuracy: 0.997850063938619\n",
      "EVALUATION with last weights -> Loss: 10841374.0, CrossEntropy: 1.7109053134918213, Accuracy: 0.8130933544303798\n",
      "Elapsed time for the training: 12.9941565990448\n",
      "Iter 445 / 2000, Loss: 125792915.75, CrossEntropy: 0.005113230086863041, Accuracy: 0.9982217071611253\n",
      "EVALUATION with last weights -> Loss: 10735691.0, CrossEntropy: 1.6905120611190796, Accuracy: 0.818631329113924\n",
      "Elapsed time for the training: 12.873948574066162\n",
      "Iter 446 / 2000, Loss: 124380869.21875, CrossEntropy: 0.004572367295622826, Accuracy: 0.9983096227621484\n",
      "EVALUATION with last weights -> Loss: 11599539.0, CrossEntropy: 1.8323677778244019, Accuracy: 0.8117088607594937\n",
      "Elapsed time for the training: 12.884608507156372\n",
      "Iter 447 / 2000, Loss: 127941232.875, CrossEntropy: 0.00595916947349906, Accuracy: 0.9980818414322251\n",
      "EVALUATION with last weights -> Loss: 10283511.0, CrossEntropy: 1.617601990699768, Accuracy: 0.8242681962025317\n",
      "Elapsed time for the training: 13.063814401626587\n",
      "Iter 448 / 2000, Loss: 127964525.84375, CrossEntropy: 0.005963865201920271, Accuracy: 0.9979819373401535\n",
      "EVALUATION with last weights -> Loss: 10454939.0, CrossEntropy: 1.6389477252960205, Accuracy: 0.8246637658227848\n",
      "Elapsed time for the training: 12.977356433868408\n",
      "Iter 449 / 2000, Loss: 127416297.625, CrossEntropy: 0.005738919135183096, Accuracy: 0.9980019181585678\n",
      "EVALUATION with last weights -> Loss: 10571737.0, CrossEntropy: 1.67263662815094, Accuracy: 0.8220925632911392\n",
      "Elapsed time for the training: 12.837071657180786\n",
      "Iter 450 / 2000, Loss: 124238093.375, CrossEntropy: 0.004463233985006809, Accuracy: 0.9985014386189258\n",
      "EVALUATION with last weights -> Loss: 10536608.0, CrossEntropy: 1.6486469507217407, Accuracy: 0.8254549050632911\n",
      "Elapsed time for the training: 12.87366795539856\n",
      "Iter 451 / 2000, Loss: 125810561.90625, CrossEntropy: 0.005083767231553793, Accuracy: 0.998201726342711\n",
      "EVALUATION with last weights -> Loss: 10302259.0, CrossEntropy: 1.6456059217453003, Accuracy: 0.8217958860759493\n",
      "Elapsed time for the training: 12.87723970413208\n",
      "Iter 452 / 2000, Loss: 128143958.59375, CrossEntropy: 0.006064623594284058, Accuracy: 0.9980898337595908\n",
      "EVALUATION with last weights -> Loss: 10710661.0, CrossEntropy: 1.6783250570297241, Accuracy: 0.8199169303797469\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time for the training: 12.87016224861145\n",
      "Iter 453 / 2000, Loss: 127490160.65625, CrossEntropy: 0.00574223417788744, Accuracy: 0.9980019181585678\n",
      "EVALUATION with last weights -> Loss: 10862854.0, CrossEntropy: 1.7214452028274536, Accuracy: 0.8142800632911392\n",
      "Elapsed time for the training: 13.682962894439697\n",
      "Iter 454 / 2000, Loss: 125736777.625, CrossEntropy: 0.0050360895693302155, Accuracy: 0.9982217071611253\n",
      "EVALUATION with last weights -> Loss: 10515708.0, CrossEntropy: 1.6589025259017944, Accuracy: 0.8190268987341772\n",
      "Elapsed time for the training: 13.696977138519287\n",
      "Iter 455 / 2000, Loss: 127464176.09375, CrossEntropy: 0.005721345078200102, Accuracy: 0.9979419757033248\n",
      "EVALUATION with last weights -> Loss: 10886515.0, CrossEntropy: 1.7017884254455566, Accuracy: 0.8206091772151899\n",
      "Elapsed time for the training: 13.062915563583374\n",
      "Iter 456 / 2000, Loss: 126928740.625, CrossEntropy: 0.005500740837305784, Accuracy: 0.9982416879795396\n",
      "EVALUATION with last weights -> Loss: 11051813.0, CrossEntropy: 1.7473772764205933, Accuracy: 0.8083465189873418\n",
      "Elapsed time for the training: 12.914026260375977\n",
      "Iter 457 / 2000, Loss: 123324956.15625, CrossEntropy: 0.004057030193507671, Accuracy: 0.9985414002557544\n",
      "EVALUATION with last weights -> Loss: 11003140.0, CrossEntropy: 1.7441976070404053, Accuracy: 0.8165545886075949\n",
      "Elapsed time for the training: 12.876944065093994\n",
      "Iter 458 / 2000, Loss: 129663643.03125, CrossEntropy: 0.0066315047442913055, Accuracy: 0.99767023657289\n",
      "EVALUATION with last weights -> Loss: 11126109.0, CrossEntropy: 1.7527552843093872, Accuracy: 0.8159612341772152\n",
      "Elapsed time for the training: 12.869139432907104\n",
      "Iter 459 / 2000, Loss: 128389761.59375, CrossEntropy: 0.0060666450299322605, Accuracy: 0.9981218030690537\n",
      "EVALUATION with last weights -> Loss: 11037380.0, CrossEntropy: 1.7282373905181885, Accuracy: 0.8197191455696202\n",
      "Elapsed time for the training: 12.89473843574524\n",
      "Iter 460 / 2000, Loss: 128122482.46875, CrossEntropy: 0.005954334512352943, Accuracy: 0.9983415920716112\n",
      "EVALUATION with last weights -> Loss: 10500668.0, CrossEntropy: 1.6772892475128174, Accuracy: 0.8166534810126582\n",
      "Elapsed time for the training: 12.892167806625366\n",
      "Iter 461 / 2000, Loss: 126374170.84375, CrossEntropy: 0.005249840673059225, Accuracy: 0.998201726342711\n",
      "EVALUATION with last weights -> Loss: 10504114.0, CrossEntropy: 1.6514191627502441, Accuracy: 0.819620253164557\n",
      "Elapsed time for the training: 12.88094711303711\n",
      "Iter 462 / 2000, Loss: 126243001.03125, CrossEntropy: 0.005193795543164015, Accuracy: 0.9983016304347826\n",
      "EVALUATION with last weights -> Loss: 10436041.0, CrossEntropy: 1.6312377452850342, Accuracy: 0.8214003164556962\n",
      "Elapsed time for the training: 12.916781187057495\n",
      "Iter 463 / 2000, Loss: 125949418.34375, CrossEntropy: 0.005071437917649746, Accuracy: 0.9983615728900256\n",
      "EVALUATION with last weights -> Loss: 10951621.0, CrossEntropy: 1.7314101457595825, Accuracy: 0.814181170886076\n",
      "Elapsed time for the training: 12.895413875579834\n",
      "Iter 464 / 2000, Loss: 125872663.9375, CrossEntropy: 0.005034070461988449, Accuracy: 0.9984215153452686\n",
      "EVALUATION with last weights -> Loss: 11094617.0, CrossEntropy: 1.7543390989303589, Accuracy: 0.8111155063291139\n",
      "Elapsed time for the training: 12.906501770019531\n",
      "Iter 465 / 2000, Loss: 125900941.15625, CrossEntropy: 0.005038029048591852, Accuracy: 0.998321611253197\n",
      "EVALUATION with last weights -> Loss: 11088474.0, CrossEntropy: 1.738835096359253, Accuracy: 0.8200158227848101\n",
      "Elapsed time for the training: 12.901342391967773\n",
      "Iter 466 / 2000, Loss: 127503508.46875, CrossEntropy: 0.005671747960150242, Accuracy: 0.9980818414322251\n",
      "EVALUATION with last weights -> Loss: 10408922.0, CrossEntropy: 1.6398122310638428, Accuracy: 0.8214003164556962\n",
      "Elapsed time for the training: 14.760433673858643\n",
      "Iter 467 / 2000, Loss: 124874430.53125, CrossEntropy: 0.0046182251535356045, Accuracy: 0.9986013427109974\n",
      "EVALUATION with last weights -> Loss: 10390025.0, CrossEntropy: 1.6444907188415527, Accuracy: 0.8217958860759493\n",
      "Elapsed time for the training: 15.507444620132446\n",
      "Iter 468 / 2000, Loss: 127540380.28125, CrossEntropy: 0.005675656720995903, Accuracy: 0.9979419757033248\n",
      "EVALUATION with last weights -> Loss: 10386039.0, CrossEntropy: 1.6519896984100342, Accuracy: 0.8244659810126582\n",
      "Elapsed time for the training: 15.490190267562866\n",
      "Iter 469 / 2000, Loss: 128224533.3125, CrossEntropy: 0.005948791746050119, Accuracy: 0.9981617647058824\n",
      "EVALUATION with last weights -> Loss: 10222391.0, CrossEntropy: 1.6297816038131714, Accuracy: 0.8204113924050633\n",
      "Elapsed time for the training: 15.604079484939575\n",
      "Iter 470 / 2000, Loss: 124468770.9375, CrossEntropy: 0.004436379298567772, Accuracy: 0.9985414002557544\n",
      "EVALUATION with last weights -> Loss: 10458136.0, CrossEntropy: 1.6421267986297607, Accuracy: 0.8246637658227848\n",
      "Elapsed time for the training: 14.727097749710083\n",
      "Iter 471 / 2000, Loss: 127121384.0, CrossEntropy: 0.005508091300725937, Accuracy: 0.9981297953964194\n",
      "EVALUATION with last weights -> Loss: 10624070.0, CrossEntropy: 1.6658868789672852, Accuracy: 0.8230814873417721\n",
      "Elapsed time for the training: 12.902337789535522\n",
      "Iter 472 / 2000, Loss: 126409924.71875, CrossEntropy: 0.005200242158025503, Accuracy: 0.9983415920716112\n",
      "EVALUATION with last weights -> Loss: 10408717.0, CrossEntropy: 1.6522680521011353, Accuracy: 0.8241693037974683\n",
      "Elapsed time for the training: 12.939221858978271\n",
      "Iter 473 / 2000, Loss: 125875866.125, CrossEntropy: 0.004981455393135548, Accuracy: 0.9982217071611253\n",
      "EVALUATION with last weights -> Loss: 10776460.0, CrossEntropy: 1.6991581916809082, Accuracy: 0.8214992088607594\n",
      "Elapsed time for the training: 12.93349575996399\n",
      "Iter 474 / 2000, Loss: 127117793.53125, CrossEntropy: 0.0055383737199008465, Accuracy: 0.9981098145780052\n",
      "EVALUATION with last weights -> Loss: 10285228.0, CrossEntropy: 1.6225582361221313, Accuracy: 0.8243670886075949\n",
      "Elapsed time for the training: 12.736414670944214\n",
      "Iter 475 / 2000, Loss: 125657235.125, CrossEntropy: 0.004884927533566952, Accuracy: 0.9983415920716112\n",
      "EVALUATION with last weights -> Loss: 10489983.0, CrossEntropy: 1.6436651945114136, Accuracy: 0.8248615506329114\n",
      "Elapsed time for the training: 12.900095224380493\n",
      "Iter 476 / 2000, Loss: 128357387.75, CrossEntropy: 0.005956065375357866, Accuracy: 0.9980019181585678\n",
      "EVALUATION with last weights -> Loss: 10488941.0, CrossEntropy: 1.6444658041000366, Accuracy: 0.8234770569620253\n",
      "Elapsed time for the training: 12.926604270935059\n",
      "Iter 477 / 2000, Loss: 127231707.25, CrossEntropy: 0.005499671213328838, Accuracy: 0.9980218989769821\n",
      "EVALUATION with last weights -> Loss: 10869613.0, CrossEntropy: 1.702929139137268, Accuracy: 0.8177412974683544\n",
      "Elapsed time for the training: 12.916317462921143\n",
      "Iter 478 / 2000, Loss: 123474522.59375, CrossEntropy: 0.004004941321909428, Accuracy: 0.9984694693094629\n",
      "EVALUATION with last weights -> Loss: 11588092.0, CrossEntropy: 1.8216817378997803, Accuracy: 0.8076542721518988\n",
      "Elapsed time for the training: 12.880545377731323\n",
      "Iter 479 / 2000, Loss: 128940299.6875, CrossEntropy: 0.006170686334371567, Accuracy: 0.9981817455242967\n",
      "EVALUATION with last weights -> Loss: 10736789.0, CrossEntropy: 1.697069764137268, Accuracy: 0.8188291139240507\n",
      "Elapsed time for the training: 12.816324949264526\n",
      "Iter 480 / 2000, Loss: 126359512.40625, CrossEntropy: 0.0051342579536139965, Accuracy: 0.9984215153452686\n",
      "EVALUATION with last weights -> Loss: 10837582.0, CrossEntropy: 1.7014579772949219, Accuracy: 0.8192246835443038\n",
      "Elapsed time for the training: 12.87842583656311\n",
      "Iter 481 / 2000, Loss: 126567347.09375, CrossEntropy: 0.005210659466683865, Accuracy: 0.9981617647058824\n",
      "EVALUATION with last weights -> Loss: 10923553.0, CrossEntropy: 1.716972827911377, Accuracy: 0.8142800632911392\n",
      "Elapsed time for the training: 12.875175952911377\n",
      "Iter 482 / 2000, Loss: 126032120.59375, CrossEntropy: 0.004991420079022646, Accuracy: 0.998321611253197\n",
      "EVALUATION with last weights -> Loss: 11633853.0, CrossEntropy: 1.8377975225448608, Accuracy: 0.8142800632911392\n",
      "Elapsed time for the training: 12.903285503387451\n",
      "Iter 483 / 2000, Loss: 126234007.78125, CrossEntropy: 0.005066966637969017, Accuracy: 0.9984015345268542\n",
      "EVALUATION with last weights -> Loss: 10682031.0, CrossEntropy: 1.6836864948272705, Accuracy: 0.8209058544303798\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time for the training: 14.814616680145264\n",
      "Iter 484 / 2000, Loss: 126568086.09375, CrossEntropy: 0.005193972494453192, Accuracy: 0.9982816496163683\n",
      "EVALUATION with last weights -> Loss: 10665386.0, CrossEntropy: 1.668777346611023, Accuracy: 0.8208069620253164\n",
      "Elapsed time for the training: 15.48533034324646\n",
      "Iter 485 / 2000, Loss: 127239788.46875, CrossEntropy: 0.005456087179481983, Accuracy: 0.9982217071611253\n",
      "EVALUATION with last weights -> Loss: 11230508.0, CrossEntropy: 1.7845063209533691, Accuracy: 0.8150712025316456\n",
      "Elapsed time for the training: 15.718070030212402\n",
      "Iter 486 / 2000, Loss: 127993815.15625, CrossEntropy: 0.005751878023147583, Accuracy: 0.998141783887468\n",
      "EVALUATION with last weights -> Loss: 11053779.0, CrossEntropy: 1.7310152053833008, Accuracy: 0.8175435126582279\n",
      "Elapsed time for the training: 15.677590131759644\n",
      "Iter 487 / 2000, Loss: 128225451.65625, CrossEntropy: 0.005846079438924789, Accuracy: 0.998201726342711\n",
      "EVALUATION with last weights -> Loss: 11474471.0, CrossEntropy: 1.806534767150879, Accuracy: 0.8121044303797469\n",
      "Elapsed time for the training: 14.31819462776184\n",
      "Iter 488 / 2000, Loss: 123585228.0625, CrossEntropy: 0.003979812376201153, Accuracy: 0.9987811700767263\n",
      "EVALUATION with last weights -> Loss: 10716350.0, CrossEntropy: 1.6777663230895996, Accuracy: 0.8235759493670886\n",
      "Elapsed time for the training: 11.762028932571411\n",
      "Iter 489 / 2000, Loss: 127420322.0625, CrossEntropy: 0.005522018764168024, Accuracy: 0.9982696611253197\n",
      "EVALUATION with last weights -> Loss: 10782115.0, CrossEntropy: 1.6973297595977783, Accuracy: 0.8151700949367089\n",
      "Elapsed time for the training: 12.858889818191528\n",
      "Iter 490 / 2000, Loss: 124485529.0, CrossEntropy: 0.004327957984060049, Accuracy: 0.9985813618925832\n",
      "EVALUATION with last weights -> Loss: 10672542.0, CrossEntropy: 1.6960642337799072, Accuracy: 0.8207080696202531\n",
      "Elapsed time for the training: 12.901976823806763\n",
      "Iter 491 / 2000, Loss: 126056115.78125, CrossEntropy: 0.004952045623213053, Accuracy: 0.9981218030690537\n",
      "EVALUATION with last weights -> Loss: 10943513.0, CrossEntropy: 1.7157390117645264, Accuracy: 0.8217958860759493\n",
      "Elapsed time for the training: 12.860944032669067\n",
      "Iter 492 / 2000, Loss: 126180851.5, CrossEntropy: 0.004994159564375877, Accuracy: 0.998321611253197\n",
      "EVALUATION with last weights -> Loss: 11231018.0, CrossEntropy: 1.7658756971359253, Accuracy: 0.8241693037974683\n",
      "Elapsed time for the training: 12.914180278778076\n",
      "Iter 493 / 2000, Loss: 128540354.71875, CrossEntropy: 0.005931015592068434, Accuracy: 0.9980618606138107\n",
      "EVALUATION with last weights -> Loss: 10813194.0, CrossEntropy: 1.6933059692382812, Accuracy: 0.8241693037974683\n",
      "Elapsed time for the training: 12.889009475708008\n",
      "Iter 494 / 2000, Loss: 126926279.96875, CrossEntropy: 0.005285828374326229, Accuracy: 0.998201726342711\n",
      "EVALUATION with last weights -> Loss: 11137347.0, CrossEntropy: 1.7652002573013306, Accuracy: 0.8235759493670886\n",
      "Elapsed time for the training: 12.912646770477295\n",
      "Iter 495 / 2000, Loss: 126505418.25, CrossEntropy: 0.00511048873886466, Accuracy: 0.9982217071611253\n",
      "EVALUATION with last weights -> Loss: 10944153.0, CrossEntropy: 1.71558678150177, Accuracy: 0.8222903481012658\n",
      "Elapsed time for the training: 12.869484901428223\n",
      "Iter 496 / 2000, Loss: 125244716.875, CrossEntropy: 0.004603905603289604, Accuracy: 0.99838155370844\n",
      "EVALUATION with last weights -> Loss: 11015034.0, CrossEntropy: 1.725353479385376, Accuracy: 0.8189280063291139\n",
      "Elapsed time for the training: 12.921608924865723\n",
      "Iter 497 / 2000, Loss: 125818489.15625, CrossEntropy: 0.0048210714012384415, Accuracy: 0.9984814578005116\n",
      "EVALUATION with last weights -> Loss: 11055089.0, CrossEntropy: 1.7515257596969604, Accuracy: 0.821004746835443\n",
      "Elapsed time for the training: 12.806738138198853\n",
      "Iter 498 / 2000, Loss: 126667016.0625, CrossEntropy: 0.005161408334970474, Accuracy: 0.9982416879795396\n",
      "EVALUATION with last weights -> Loss: 11290427.0, CrossEntropy: 1.7978153228759766, Accuracy: 0.819620253164557\n",
      "Elapsed time for the training: 14.093435049057007\n",
      "Iter 499 / 2000, Loss: 124558614.0, CrossEntropy: 0.004307280760258436, Accuracy: 0.9984814578005116\n",
      "EVALUATION with last weights -> Loss: 10618988.0, CrossEntropy: 1.6742278337478638, Accuracy: 0.8239715189873418\n",
      "Elapsed time for the training: 14.740222454071045\n",
      "Iter 500 / 2000, Loss: 130586165.71875, CrossEntropy: 0.006709438282996416, Accuracy: 0.9980618606138107\n",
      "EVALUATION with last weights -> Loss: 13518805.0, CrossEntropy: 2.1361589431762695, Accuracy: 0.8033030063291139\n",
      "Elapsed time for the training: 12.884475708007812\n",
      "Iter 501 / 2000, Loss: 125462529.0625, CrossEntropy: 0.004655567463487387, Accuracy: 0.998641304347826\n",
      "EVALUATION with last weights -> Loss: 11398958.0, CrossEntropy: 1.7969403266906738, Accuracy: 0.8190268987341772\n",
      "Elapsed time for the training: 12.844841480255127\n",
      "Iter 502 / 2000, Loss: 128903398.125, CrossEntropy: 0.006059200037270784, Accuracy: 0.9978101023017903\n",
      "EVALUATION with last weights -> Loss: 11229397.0, CrossEntropy: 1.7693145275115967, Accuracy: 0.8145767405063291\n",
      "Elapsed time for the training: 12.868113994598389\n",
      "Iter 503 / 2000, Loss: 127229343.90625, CrossEntropy: 0.005350083112716675, Accuracy: 0.9982816496163683\n",
      "EVALUATION with last weights -> Loss: 10776529.0, CrossEntropy: 1.6918284893035889, Accuracy: 0.8260482594936709\n",
      "Elapsed time for the training: 12.913429260253906\n",
      "Iter 504 / 2000, Loss: 125337850.03125, CrossEntropy: 0.004592644050717354, Accuracy: 0.998321611253197\n",
      "EVALUATION with last weights -> Loss: 10793112.0, CrossEntropy: 1.7074131965637207, Accuracy: 0.8207080696202531\n",
      "Elapsed time for the training: 12.890191078186035\n",
      "Iter 505 / 2000, Loss: 125622691.09375, CrossEntropy: 0.004697682801634073, Accuracy: 0.9984414961636828\n",
      "EVALUATION with last weights -> Loss: 10901363.0, CrossEntropy: 1.7404968738555908, Accuracy: 0.817939082278481\n",
      "Elapsed time for the training: 12.884177207946777\n",
      "Iter 506 / 2000, Loss: 128123238.9375, CrossEntropy: 0.005703025031834841, Accuracy: 0.9981697570332481\n",
      "EVALUATION with last weights -> Loss: 10412305.0, CrossEntropy: 1.6288970708847046, Accuracy: 0.8258504746835443\n",
      "Elapsed time for the training: 12.888580083847046\n",
      "Iter 507 / 2000, Loss: 126275307.09375, CrossEntropy: 0.0049471426755189896, Accuracy: 0.9984414961636828\n",
      "EVALUATION with last weights -> Loss: 10912254.0, CrossEntropy: 1.710697054862976, Accuracy: 0.8217958860759493\n",
      "Elapsed time for the training: 12.929112911224365\n",
      "Iter 508 / 2000, Loss: 128249634.65625, CrossEntropy: 0.005730734206736088, Accuracy: 0.9981817455242967\n",
      "EVALUATION with last weights -> Loss: 11259248.0, CrossEntropy: 1.7656059265136719, Accuracy: 0.8205102848101266\n",
      "Elapsed time for the training: 12.940427541732788\n",
      "Iter 509 / 2000, Loss: 125615831.96875, CrossEntropy: 0.0046785613521933556, Accuracy: 0.9981817455242967\n",
      "EVALUATION with last weights -> Loss: 10678923.0, CrossEntropy: 1.6891093254089355, Accuracy: 0.8203125\n",
      "Elapsed time for the training: 13.733673334121704\n",
      "Iter 510 / 2000, Loss: 126134070.0625, CrossEntropy: 0.004882727283984423, Accuracy: 0.9985014386189258\n",
      "EVALUATION with last weights -> Loss: 10791530.0, CrossEntropy: 1.6960320472717285, Accuracy: 0.8234770569620253\n",
      "Elapsed time for the training: 14.356105089187622\n",
      "Iter 511 / 2000, Loss: 126633837.46875, CrossEntropy: 0.005069141276180744, Accuracy: 0.9982816496163683\n",
      "EVALUATION with last weights -> Loss: 10881778.0, CrossEntropy: 1.7002805471420288, Accuracy: 0.8203125\n",
      "Elapsed time for the training: 13.978974342346191\n",
      "Iter 512 / 2000, Loss: 127277918.21875, CrossEntropy: 0.005320862866938114, Accuracy: 0.998641304347826\n",
      "EVALUATION with last weights -> Loss: 10681682.0, CrossEntropy: 1.7060545682907104, Accuracy: 0.819620253164557\n",
      "Elapsed time for the training: 13.697757005691528\n",
      "Iter 513 / 2000, Loss: 129690378.03125, CrossEntropy: 0.0062789092771708965, Accuracy: 0.9978220907928389\n",
      "EVALUATION with last weights -> Loss: 10843841.0, CrossEntropy: 1.698676586151123, Accuracy: 0.8187302215189873\n",
      "Elapsed time for the training: 13.091465950012207\n",
      "Iter 514 / 2000, Loss: 124864984.8125, CrossEntropy: 0.004345335066318512, Accuracy: 0.998641304347826\n",
      "EVALUATION with last weights -> Loss: 10863455.0, CrossEntropy: 1.7307566404342651, Accuracy: 0.8234770569620253\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time for the training: 12.906405210494995\n",
      "Iter 515 / 2000, Loss: 127472477.5, CrossEntropy: 0.005384586751461029, Accuracy: 0.9980218989769821\n",
      "EVALUATION with last weights -> Loss: 10603937.0, CrossEntropy: 1.6749669313430786, Accuracy: 0.8205102848101266\n",
      "Elapsed time for the training: 14.417077541351318\n",
      "Iter 516 / 2000, Loss: 124794798.71875, CrossEntropy: 0.004310874734073877, Accuracy: 0.9986013427109974\n",
      "EVALUATION with last weights -> Loss: 10767217.0, CrossEntropy: 1.6935794353485107, Accuracy: 0.8220925632911392\n",
      "Elapsed time for the training: 13.867947340011597\n",
      "Iter 517 / 2000, Loss: 126193130.0, CrossEntropy: 0.004859353881329298, Accuracy: 0.9985613810741688\n",
      "EVALUATION with last weights -> Loss: 11492613.0, CrossEntropy: 1.7962620258331299, Accuracy: 0.8178401898734177\n",
      "Elapsed time for the training: 12.886042594909668\n",
      "Iter 518 / 2000, Loss: 125029183.28125, CrossEntropy: 0.004388516303151846, Accuracy: 0.9985414002557544\n",
      "EVALUATION with last weights -> Loss: 10455761.0, CrossEntropy: 1.6421663761138916, Accuracy: 0.8259493670886076\n",
      "Elapsed time for the training: 12.90579080581665\n",
      "Iter 519 / 2000, Loss: 126711796.9375, CrossEntropy: 0.005055422428995371, Accuracy: 0.9984614769820972\n",
      "EVALUATION with last weights -> Loss: 10747314.0, CrossEntropy: 1.6916730403900146, Accuracy: 0.8232792721518988\n",
      "Elapsed time for the training: 13.658041000366211\n",
      "Iter 520 / 2000, Loss: 124464890.8125, CrossEntropy: 0.004153187852352858, Accuracy: 0.9985813618925832\n",
      "EVALUATION with last weights -> Loss: 10668825.0, CrossEntropy: 1.6792446374893188, Accuracy: 0.823378164556962\n",
      "Elapsed time for the training: 15.622922420501709\n",
      "Iter 521 / 2000, Loss: 128057320.78125, CrossEntropy: 0.005581964273005724, Accuracy: 0.9982816496163683\n",
      "EVALUATION with last weights -> Loss: 10963012.0, CrossEntropy: 1.7355529069900513, Accuracy: 0.826443829113924\n",
      "Elapsed time for the training: 15.615773677825928\n",
      "Iter 522 / 2000, Loss: 128803890.375, CrossEntropy: 0.0059072524309158325, Accuracy: 0.9980498721227622\n",
      "EVALUATION with last weights -> Loss: 10600665.0, CrossEntropy: 1.6826261281967163, Accuracy: 0.8243670886075949\n",
      "Elapsed time for the training: 15.605931758880615\n",
      "Iter 523 / 2000, Loss: 127431961.53125, CrossEntropy: 0.005321123171597719, Accuracy: 0.9983016304347826\n",
      "EVALUATION with last weights -> Loss: 10692548.0, CrossEntropy: 1.6970499753952026, Accuracy: 0.8230814873417721\n",
      "Elapsed time for the training: 12.908634185791016\n",
      "Iter 524 / 2000, Loss: 125706817.25, CrossEntropy: 0.004630770534276962, Accuracy: 0.9984215153452686\n",
      "EVALUATION with last weights -> Loss: 11095658.0, CrossEntropy: 1.7443760633468628, Accuracy: 0.8193235759493671\n",
      "Elapsed time for the training: 12.87993597984314\n",
      "Iter 525 / 2000, Loss: 124792079.71875, CrossEntropy: 0.004255441948771477, Accuracy: 0.9984814578005116\n",
      "EVALUATION with last weights -> Loss: 10485422.0, CrossEntropy: 1.676020622253418, Accuracy: 0.8237737341772152\n",
      "Elapsed time for the training: 12.898053884506226\n",
      "Iter 526 / 2000, Loss: 126340080.375, CrossEntropy: 0.004869924858212471, Accuracy: 0.9984015345268542\n",
      "EVALUATION with last weights -> Loss: 10876083.0, CrossEntropy: 1.7385798692703247, Accuracy: 0.8212025316455697\n",
      "Elapsed time for the training: 12.878298282623291\n",
      "Iter 527 / 2000, Loss: 128236410.5625, CrossEntropy: 0.0056820702739059925, Accuracy: 0.998349584398977\n",
      "EVALUATION with last weights -> Loss: 11521246.0, CrossEntropy: 1.823879599571228, Accuracy: 0.8205102848101266\n",
      "Elapsed time for the training: 12.80942964553833\n",
      "Iter 528 / 2000, Loss: 125688656.375, CrossEntropy: 0.004612083546817303, Accuracy: 0.9983895460358057\n",
      "EVALUATION with last weights -> Loss: 10721161.0, CrossEntropy: 1.6877726316452026, Accuracy: 0.8191257911392406\n",
      "Elapsed time for the training: 12.828638792037964\n",
      "Iter 529 / 2000, Loss: 126533558.875, CrossEntropy: 0.004930406808853149, Accuracy: 0.9984414961636828\n",
      "EVALUATION with last weights -> Loss: 10839448.0, CrossEntropy: 1.7285326719284058, Accuracy: 0.8197191455696202\n",
      "Elapsed time for the training: 12.871011734008789\n",
      "Iter 530 / 2000, Loss: 128233955.65625, CrossEntropy: 0.005608074367046356, Accuracy: 0.9981617647058824\n",
      "EVALUATION with last weights -> Loss: 10872810.0, CrossEntropy: 1.7191822528839111, Accuracy: 0.8215981012658228\n",
      "Elapsed time for the training: 11.81889295578003\n",
      "Iter 531 / 2000, Loss: 127130142.0625, CrossEntropy: 0.0052603441290557384, Accuracy: 0.9983895460358057\n",
      "EVALUATION with last weights -> Loss: 10730124.0, CrossEntropy: 1.6925679445266724, Accuracy: 0.8136867088607594\n",
      "Elapsed time for the training: 11.789412498474121\n",
      "Iter 532 / 2000, Loss: 127085662.84375, CrossEntropy: 0.005133629310876131, Accuracy: 0.998201726342711\n",
      "EVALUATION with last weights -> Loss: 10458682.0, CrossEntropy: 1.6552733182907104, Accuracy: 0.8242681962025317\n",
      "Elapsed time for the training: 12.773690223693848\n",
      "Iter 533 / 2000, Loss: 122481415.125, CrossEntropy: 0.0032918076030910015, Accuracy: 0.998821131713555\n",
      "EVALUATION with last weights -> Loss: 10868551.0, CrossEntropy: 1.742455244064331, Accuracy: 0.8236748417721519\n",
      "Elapsed time for the training: 12.905873537063599\n",
      "Iter 534 / 2000, Loss: 127313286.25, CrossEntropy: 0.005219897720962763, Accuracy: 0.9983415920716112\n",
      "EVALUATION with last weights -> Loss: 10794251.0, CrossEntropy: 1.6917153596878052, Accuracy: 0.8234770569620253\n",
      "Elapsed time for the training: 12.91683053970337\n",
      "Iter 535 / 2000, Loss: 124910421.125, CrossEntropy: 0.004249763675034046, Accuracy: 0.9984814578005116\n",
      "EVALUATION with last weights -> Loss: 10621342.0, CrossEntropy: 1.6733779907226562, Accuracy: 0.8263449367088608\n",
      "Elapsed time for the training: 12.923502922058105\n",
      "Iter 536 / 2000, Loss: 126184837.875, CrossEntropy: 0.004753832705318928, Accuracy: 0.9984814578005116\n",
      "EVALUATION with last weights -> Loss: 11035374.0, CrossEntropy: 1.7480463981628418, Accuracy: 0.8269382911392406\n",
      "Elapsed time for the training: 13.039386510848999\n",
      "Iter 537 / 2000, Loss: 128237446.875, CrossEntropy: 0.005568088032305241, Accuracy: 0.998261668797954\n",
      "EVALUATION with last weights -> Loss: 11007293.0, CrossEntropy: 1.7440617084503174, Accuracy: 0.8197191455696202\n",
      "Elapsed time for the training: 12.902315139770508\n",
      "Iter 538 / 2000, Loss: 125542562.71875, CrossEntropy: 0.004489115905016661, Accuracy: 0.998321611253197\n",
      "EVALUATION with last weights -> Loss: 11147865.0, CrossEntropy: 1.7839500904083252, Accuracy: 0.821004746835443\n",
      "Elapsed time for the training: 12.925899028778076\n",
      "Iter 539 / 2000, Loss: 126242852.75, CrossEntropy: 0.004760362673550844, Accuracy: 0.9984015345268542\n",
      "EVALUATION with last weights -> Loss: 10861845.0, CrossEntropy: 1.7340965270996094, Accuracy: 0.8231803797468354\n",
      "Elapsed time for the training: 12.90643572807312\n",
      "Iter 540 / 2000, Loss: 126152820.0, CrossEntropy: 0.0047186873853206635, Accuracy: 0.9985014386189258\n",
      "EVALUATION with last weights -> Loss: 11046132.0, CrossEntropy: 1.7259703874588013, Accuracy: 0.8261471518987342\n",
      "Elapsed time for the training: 12.930834531784058\n",
      "Iter 541 / 2000, Loss: 125301857.28125, CrossEntropy: 0.004373340401798487, Accuracy: 0.9987212276214834\n",
      "EVALUATION with last weights -> Loss: 11163495.0, CrossEntropy: 1.7578233480453491, Accuracy: 0.8227848101265823\n",
      "Elapsed time for the training: 12.884477138519287\n",
      "Iter 542 / 2000, Loss: 127051816.34375, CrossEntropy: 0.005110483150929213, Accuracy: 0.9982696611253197\n",
      "EVALUATION with last weights -> Loss: 11609766.0, CrossEntropy: 1.8310654163360596, Accuracy: 0.8205102848101266\n",
      "Elapsed time for the training: 12.905202388763428\n",
      "Iter 543 / 2000, Loss: 127804717.0625, CrossEntropy: 0.0053629265166819096, Accuracy: 0.9985214194373402\n",
      "EVALUATION with last weights -> Loss: 11605242.0, CrossEntropy: 1.8218955993652344, Accuracy: 0.8183346518987342\n",
      "Elapsed time for the training: 12.930426359176636\n",
      "Iter 544 / 2000, Loss: 124571069.375, CrossEntropy: 0.004065353889018297, Accuracy: 0.9985613810741688\n",
      "EVALUATION with last weights -> Loss: 11075553.0, CrossEntropy: 1.7517074346542358, Accuracy: 0.8247626582278481\n",
      "Elapsed time for the training: 12.898439884185791\n",
      "Iter 545 / 2000, Loss: 127689450.5625, CrossEntropy: 0.00530715798959136, Accuracy: 0.9983615728900256\n",
      "EVALUATION with last weights -> Loss: 11060470.0, CrossEntropy: 1.7550920248031616, Accuracy: 0.8165545886075949\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time for the training: 13.327840328216553\n",
      "Iter 546 / 2000, Loss: 125388086.9375, CrossEntropy: 0.004382697399705648, Accuracy: 0.998641304347826\n",
      "EVALUATION with last weights -> Loss: 10984711.0, CrossEntropy: 1.740936279296875, Accuracy: 0.8165545886075949\n",
      "Elapsed time for the training: 13.351647853851318\n",
      "Iter 547 / 2000, Loss: 124771768.1875, CrossEntropy: 0.004131274297833443, Accuracy: 0.9985813618925832\n",
      "EVALUATION with last weights -> Loss: 11138429.0, CrossEntropy: 1.7677730321884155, Accuracy: 0.8193235759493671\n",
      "Elapsed time for the training: 12.886411190032959\n",
      "Iter 548 / 2000, Loss: 123830870.5625, CrossEntropy: 0.0037485964130610228, Accuracy: 0.9987811700767263\n",
      "EVALUATION with last weights -> Loss: 11139177.0, CrossEntropy: 1.7682170867919922, Accuracy: 0.821993670886076\n",
      "Elapsed time for the training: 12.885928869247437\n",
      "Iter 549 / 2000, Loss: 125726833.59375, CrossEntropy: 0.004501477349549532, Accuracy: 0.9983415920716112\n",
      "EVALUATION with last weights -> Loss: 11618003.0, CrossEntropy: 1.8407303094863892, Accuracy: 0.8163568037974683\n",
      "Elapsed time for the training: 12.939892530441284\n",
      "Iter 550 / 2000, Loss: 126113971.8125, CrossEntropy: 0.004673006944358349, Accuracy: 0.998557384910486\n",
      "EVALUATION with last weights -> Loss: 11539710.0, CrossEntropy: 1.8119823932647705, Accuracy: 0.8213014240506329\n",
      "Elapsed time for the training: 12.882078170776367\n",
      "Iter 551 / 2000, Loss: 125432967.84375, CrossEntropy: 0.0044545638374984264, Accuracy: 0.9985893542199489\n",
      "EVALUATION with last weights -> Loss: 12112113.0, CrossEntropy: 1.8939422369003296, Accuracy: 0.8085443037974683\n",
      "Elapsed time for the training: 12.910249948501587\n",
      "Iter 552 / 2000, Loss: 125671286.28125, CrossEntropy: 0.0044634644873440266, Accuracy: 0.9985014386189258\n",
      "EVALUATION with last weights -> Loss: 11338309.0, CrossEntropy: 1.7902370691299438, Accuracy: 0.8198180379746836\n",
      "Elapsed time for the training: 12.903658151626587\n",
      "Iter 553 / 2000, Loss: 125346618.3125, CrossEntropy: 0.0044074407778680325, Accuracy: 0.99840952685422\n",
      "EVALUATION with last weights -> Loss: 12286682.0, CrossEntropy: 1.9249049425125122, Accuracy: 0.8117088607594937\n",
      "Elapsed time for the training: 12.875697374343872\n",
      "Iter 554 / 2000, Loss: 124623872.96875, CrossEntropy: 0.004043750464916229, Accuracy: 0.9986892583120205\n",
      "EVALUATION with last weights -> Loss: 11867031.0, CrossEntropy: 1.8684420585632324, Accuracy: 0.814181170886076\n",
      "Elapsed time for the training: 12.883889436721802\n",
      "Iter 555 / 2000, Loss: 127844305.90625, CrossEntropy: 0.005323381163179874, Accuracy: 0.998141783887468\n",
      "EVALUATION with last weights -> Loss: 11362499.0, CrossEntropy: 1.8013852834701538, Accuracy: 0.8156645569620253\n",
      "Elapsed time for the training: 12.929149866104126\n",
      "Iter 556 / 2000, Loss: 127136167.78125, CrossEntropy: 0.00505153788253665, Accuracy: 0.9982496803069054\n",
      "EVALUATION with last weights -> Loss: 11836701.0, CrossEntropy: 1.8754788637161255, Accuracy: 0.8189280063291139\n",
      "Elapsed time for the training: 12.932955741882324\n",
      "Iter 557 / 2000, Loss: 124640724.78125, CrossEntropy: 0.004024800844490528, Accuracy: 0.9985813618925832\n",
      "EVALUATION with last weights -> Loss: 11498846.0, CrossEntropy: 1.802794337272644, Accuracy: 0.8209058544303798\n",
      "Elapsed time for the training: 12.935572862625122\n",
      "Iter 558 / 2000, Loss: 127537017.28125, CrossEntropy: 0.0051761711947619915, Accuracy: 0.9982816496163683\n",
      "EVALUATION with last weights -> Loss: 11285793.0, CrossEntropy: 1.790207862854004, Accuracy: 0.8204113924050633\n",
      "Elapsed time for the training: 12.001810073852539\n",
      "Iter 559 / 2000, Loss: 127272874.5, CrossEntropy: 0.005065602250397205, Accuracy: 0.9983016304347826\n",
      "EVALUATION with last weights -> Loss: 11179263.0, CrossEntropy: 1.750643253326416, Accuracy: 0.8221914556962026\n",
      "Elapsed time for the training: 11.773702144622803\n",
      "Iter 560 / 2000, Loss: 127585139.65625, CrossEntropy: 0.00518571212887764, Accuracy: 0.9980618606138107\n",
      "EVALUATION with last weights -> Loss: 10956784.0, CrossEntropy: 1.7392007112503052, Accuracy: 0.8166534810126582\n",
      "Elapsed time for the training: 11.785318851470947\n",
      "Iter 561 / 2000, Loss: 127630875.0625, CrossEntropy: 0.005198418162763119, Accuracy: 0.9984614769820972\n",
      "EVALUATION with last weights -> Loss: 10944055.0, CrossEntropy: 1.712318778038025, Accuracy: 0.8192246835443038\n",
      "Elapsed time for the training: 11.835484981536865\n",
      "Iter 562 / 2000, Loss: 125009019.375, CrossEntropy: 0.00414586067199707, Accuracy: 0.9984614769820972\n",
      "EVALUATION with last weights -> Loss: 11537650.0, CrossEntropy: 1.810241937637329, Accuracy: 0.823378164556962\n",
      "Elapsed time for the training: 11.909821510314941\n",
      "Iter 563 / 2000, Loss: 126030315.40625, CrossEntropy: 0.004551671911031008, Accuracy: 0.9985214194373402\n",
      "EVALUATION with last weights -> Loss: 11420732.0, CrossEntropy: 1.805041790008545, Accuracy: 0.8203125\n",
      "Elapsed time for the training: 12.903080940246582\n",
      "Iter 564 / 2000, Loss: 125758674.125, CrossEntropy: 0.0044348458759486675, Accuracy: 0.9986213235294118\n",
      "EVALUATION with last weights -> Loss: 11651003.0, CrossEntropy: 1.8217147588729858, Accuracy: 0.8144778481012658\n",
      "Elapsed time for the training: 12.881038665771484\n",
      "Iter 565 / 2000, Loss: 127120494.8125, CrossEntropy: 0.004973992705345154, Accuracy: 0.9984414961636828\n",
      "EVALUATION with last weights -> Loss: 11443728.0, CrossEntropy: 1.825675368309021, Accuracy: 0.8178401898734177\n",
      "Elapsed time for the training: 12.877559185028076\n",
      "Iter 566 / 2000, Loss: 125134802.90625, CrossEntropy: 0.00417545298114419, Accuracy: 0.998761189258312\n",
      "EVALUATION with last weights -> Loss: 11359847.0, CrossEntropy: 1.7913564443588257, Accuracy: 0.8185324367088608\n",
      "Elapsed time for the training: 12.866678714752197\n",
      "Iter 567 / 2000, Loss: 127483516.96875, CrossEntropy: 0.005108312703669071, Accuracy: 0.9983016304347826\n",
      "EVALUATION with last weights -> Loss: 11510568.0, CrossEntropy: 1.813397765159607, Accuracy: 0.8243670886075949\n",
      "Elapsed time for the training: 13.705268859863281\n",
      "Iter 568 / 2000, Loss: 125685121.5625, CrossEntropy: 0.004384293686598539, Accuracy: 0.9986213235294118\n",
      "EVALUATION with last weights -> Loss: 11346961.0, CrossEntropy: 1.7946739196777344, Accuracy: 0.8221914556962026\n",
      "Elapsed time for the training: 13.722069263458252\n",
      "Iter 569 / 2000, Loss: 125250885.4375, CrossEntropy: 0.0042058504186570644, Accuracy: 0.9984614769820972\n",
      "EVALUATION with last weights -> Loss: 11520710.0, CrossEntropy: 1.8077921867370605, Accuracy: 0.822685917721519\n",
      "Elapsed time for the training: 13.717477321624756\n",
      "Iter 570 / 2000, Loss: 124785181.9375, CrossEntropy: 0.004014340229332447, Accuracy: 0.9984015345268542\n",
      "EVALUATION with last weights -> Loss: 11807974.0, CrossEntropy: 1.8565442562103271, Accuracy: 0.8174446202531646\n",
      "Elapsed time for the training: 15.232630491256714\n",
      "Iter 571 / 2000, Loss: 125885990.9375, CrossEntropy: 0.004448935855180025, Accuracy: 0.9985414002557544\n",
      "EVALUATION with last weights -> Loss: 11283636.0, CrossEntropy: 1.7841796875, Accuracy: 0.8236748417721519\n",
      "Elapsed time for the training: 15.413758039474487\n",
      "Iter 572 / 2000, Loss: 125767971.78125, CrossEntropy: 0.0043960195034742355, Accuracy: 0.9984215153452686\n",
      "EVALUATION with last weights -> Loss: 11501060.0, CrossEntropy: 1.8027504682540894, Accuracy: 0.8235759493670886\n",
      "Elapsed time for the training: 15.639560461044312\n",
      "Iter 573 / 2000, Loss: 125899673.90625, CrossEntropy: 0.00445041386410594, Accuracy: 0.9985214194373402\n",
      "EVALUATION with last weights -> Loss: 11298110.0, CrossEntropy: 1.787697196006775, Accuracy: 0.8238726265822784\n",
      "Elapsed time for the training: 15.434547424316406\n",
      "Iter 574 / 2000, Loss: 127974244.40625, CrossEntropy: 0.005324983038008213, Accuracy: 0.9981897378516624\n",
      "EVALUATION with last weights -> Loss: 11636839.0, CrossEntropy: 1.839699149131775, Accuracy: 0.8112143987341772\n",
      "Elapsed time for the training: 12.885270833969116\n",
      "Iter 575 / 2000, Loss: 125828747.21875, CrossEntropy: 0.004404671490192413, Accuracy: 0.9985414002557544\n",
      "EVALUATION with last weights -> Loss: 11788228.0, CrossEntropy: 1.869585633277893, Accuracy: 0.8123022151898734\n",
      "Elapsed time for the training: 12.912915706634521\n",
      "Iter 576 / 2000, Loss: 124849853.84375, CrossEntropy: 0.0040094684809446335, Accuracy: 0.998641304347826\n",
      "EVALUATION with last weights -> Loss: 12115886.0, CrossEntropy: 1.914685606956482, Accuracy: 0.8149723101265823\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time for the training: 12.92397427558899\n",
      "Iter 577 / 2000, Loss: 125623087.96875, CrossEntropy: 0.004351832438260317, Accuracy: 0.9985294117647059\n",
      "EVALUATION with last weights -> Loss: 11548834.0, CrossEntropy: 1.8171958923339844, Accuracy: 0.8158623417721519\n",
      "Elapsed time for the training: 12.886386156082153\n",
      "Iter 578 / 2000, Loss: 125202865.5625, CrossEntropy: 0.004138643387705088, Accuracy: 0.998641304347826\n",
      "EVALUATION with last weights -> Loss: 11381462.0, CrossEntropy: 1.808475375175476, Accuracy: 0.8229825949367089\n",
      "Elapsed time for the training: 12.94166874885559\n",
      "Iter 579 / 2000, Loss: 126408948.84375, CrossEntropy: 0.004616362974047661, Accuracy: 0.998641304347826\n",
      "EVALUATION with last weights -> Loss: 11444891.0, CrossEntropy: 1.7983412742614746, Accuracy: 0.8224881329113924\n",
      "Elapsed time for the training: 12.902518033981323\n",
      "Iter 580 / 2000, Loss: 124161584.53125, CrossEntropy: 0.0037187901325523853, Accuracy: 0.998821131713555\n",
      "EVALUATION with last weights -> Loss: 12221962.0, CrossEntropy: 1.9493074417114258, Accuracy: 0.8161590189873418\n",
      "Elapsed time for the training: 13.048021793365479\n",
      "Iter 581 / 2000, Loss: 129931430.0, CrossEntropy: 0.006020320113748312, Accuracy: 0.998141783887468\n",
      "EVALUATION with last weights -> Loss: 11758792.0, CrossEntropy: 1.8482609987258911, Accuracy: 0.8202136075949367\n",
      "Elapsed time for the training: 12.856156349182129\n",
      "Iter 582 / 2000, Loss: 125687680.71875, CrossEntropy: 0.00431246729567647, Accuracy: 0.9986013427109974\n",
      "EVALUATION with last weights -> Loss: 12355717.0, CrossEntropy: 1.9384634494781494, Accuracy: 0.8103243670886076\n",
      "Elapsed time for the training: 12.896021127700806\n",
      "Iter 583 / 2000, Loss: 127416365.40625, CrossEntropy: 0.005007109604775906, Accuracy: 0.9984694693094629\n",
      "EVALUATION with last weights -> Loss: 11457581.0, CrossEntropy: 1.7934634685516357, Accuracy: 0.8207080696202531\n",
      "Elapsed time for the training: 12.852558374404907\n",
      "Iter 584 / 2000, Loss: 125985109.3125, CrossEntropy: 0.0044199931435287, Accuracy: 0.9986013427109974\n",
      "EVALUATION with last weights -> Loss: 12132823.0, CrossEntropy: 1.9217790365219116, Accuracy: 0.813192246835443\n",
      "Elapsed time for the training: 12.918991565704346\n",
      "Iter 585 / 2000, Loss: 125683913.875, CrossEntropy: 0.004315211903303862, Accuracy: 0.9986293158567775\n",
      "EVALUATION with last weights -> Loss: 12000688.0, CrossEntropy: 1.8850561380386353, Accuracy: 0.8157634493670886\n",
      "Elapsed time for the training: 12.914952278137207\n",
      "Iter 586 / 2000, Loss: 124054638.46875, CrossEntropy: 0.003638991853222251, Accuracy: 0.998641304347826\n",
      "EVALUATION with last weights -> Loss: 11254395.0, CrossEntropy: 1.7625713348388672, Accuracy: 0.8214992088607594\n",
      "Elapsed time for the training: 12.870140552520752\n",
      "Iter 587 / 2000, Loss: 130286467.03125, CrossEntropy: 0.006134166847914457, Accuracy: 0.9981817455242967\n",
      "EVALUATION with last weights -> Loss: 11402130.0, CrossEntropy: 1.8255891799926758, Accuracy: 0.8213014240506329\n",
      "Elapsed time for the training: 12.791783332824707\n",
      "Iter 588 / 2000, Loss: 125491121.375, CrossEntropy: 0.0042022052221000195, Accuracy: 0.9986213235294118\n",
      "EVALUATION with last weights -> Loss: 11313745.0, CrossEntropy: 1.7735748291015625, Accuracy: 0.822685917721519\n",
      "Elapsed time for the training: 13.128659009933472\n",
      "Iter 589 / 2000, Loss: 124851025.21875, CrossEntropy: 0.003941152710467577, Accuracy: 0.9986812659846548\n",
      "EVALUATION with last weights -> Loss: 11951493.0, CrossEntropy: 1.8934533596038818, Accuracy: 0.8222903481012658\n",
      "Elapsed time for the training: 14.54507827758789\n",
      "Iter 590 / 2000, Loss: 126628262.90625, CrossEntropy: 0.004646803252398968, Accuracy: 0.9983415920716112\n",
      "EVALUATION with last weights -> Loss: 11352534.0, CrossEntropy: 1.7901906967163086, Accuracy: 0.8184335443037974\n",
      "Elapsed time for the training: 12.907362937927246\n",
      "Iter 591 / 2000, Loss: 122146233.75, CrossEntropy: 0.002851593540981412, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 11279803.0, CrossEntropy: 1.7805144786834717, Accuracy: 0.8217958860759493\n",
      "Elapsed time for the training: 12.85575819015503\n",
      "Iter 592 / 2000, Loss: 126876371.03125, CrossEntropy: 0.004736284725368023, Accuracy: 0.9984614769820972\n",
      "EVALUATION with last weights -> Loss: 11449172.0, CrossEntropy: 1.803783655166626, Accuracy: 0.8273338607594937\n",
      "Elapsed time for the training: 12.858771562576294\n",
      "Iter 593 / 2000, Loss: 127134444.09375, CrossEntropy: 0.0048576886765658855, Accuracy: 0.9983296035805627\n",
      "EVALUATION with last weights -> Loss: 11495376.0, CrossEntropy: 1.809870719909668, Accuracy: 0.8208069620253164\n",
      "Elapsed time for the training: 12.874144077301025\n",
      "Iter 594 / 2000, Loss: 124735567.03125, CrossEntropy: 0.0038773538544774055, Accuracy: 0.9988411125319693\n",
      "EVALUATION with last weights -> Loss: 11666807.0, CrossEntropy: 1.8312243223190308, Accuracy: 0.8215981012658228\n",
      "Elapsed time for the training: 12.890698432922363\n",
      "Iter 595 / 2000, Loss: 123487692.21875, CrossEntropy: 0.0033675821032375097, Accuracy: 0.9988411125319693\n",
      "EVALUATION with last weights -> Loss: 11518084.0, CrossEntropy: 1.8002742528915405, Accuracy: 0.8214992088607594\n",
      "Elapsed time for the training: 12.887501001358032\n",
      "Iter 596 / 2000, Loss: 129198123.9375, CrossEntropy: 0.005646625533699989, Accuracy: 0.9980818414322251\n",
      "EVALUATION with last weights -> Loss: 11699767.0, CrossEntropy: 1.8457332849502563, Accuracy: 0.8171479430379747\n",
      "Elapsed time for the training: 13.292420148849487\n",
      "Iter 597 / 2000, Loss: 125650077.34375, CrossEntropy: 0.00422122934833169, Accuracy: 0.9985214194373402\n",
      "EVALUATION with last weights -> Loss: 11552720.0, CrossEntropy: 1.825664758682251, Accuracy: 0.8252571202531646\n",
      "Elapsed time for the training: 13.19465970993042\n",
      "Iter 598 / 2000, Loss: 128769147.40625, CrossEntropy: 0.005485481582581997, Accuracy: 0.9983296035805627\n",
      "EVALUATION with last weights -> Loss: 11810107.0, CrossEntropy: 1.8700133562088013, Accuracy: 0.8125988924050633\n",
      "Elapsed time for the training: 12.946041107177734\n",
      "Iter 599 / 2000, Loss: 128217009.375, CrossEntropy: 0.005238180048763752, Accuracy: 0.9984814578005116\n",
      "EVALUATION with last weights -> Loss: 11202237.0, CrossEntropy: 1.7504910230636597, Accuracy: 0.8252571202531646\n",
      "Elapsed time for the training: 12.866098880767822\n",
      "Iter 600 / 2000, Loss: 123156101.1875, CrossEntropy: 0.003209746442735195, Accuracy: 0.9990409207161125\n",
      "EVALUATION with last weights -> Loss: 11164081.0, CrossEntropy: 1.7822455167770386, Accuracy: 0.8247626582278481\n",
      "Elapsed time for the training: 12.81716775894165\n",
      "Iter 601 / 2000, Loss: 127896868.21875, CrossEntropy: 0.005099060945212841, Accuracy: 0.9983615728900256\n",
      "EVALUATION with last weights -> Loss: 11732586.0, CrossEntropy: 1.8558859825134277, Accuracy: 0.8205102848101266\n",
      "Elapsed time for the training: 12.894023895263672\n",
      "Iter 602 / 2000, Loss: 125266748.71875, CrossEntropy: 0.004043058026582003, Accuracy: 0.9987811700767263\n",
      "EVALUATION with last weights -> Loss: 11433116.0, CrossEntropy: 1.8048337697982788, Accuracy: 0.8214003164556962\n",
      "Elapsed time for the training: 12.994145154953003\n",
      "Iter 603 / 2000, Loss: 127712842.21875, CrossEntropy: 0.005014961119741201, Accuracy: 0.9985214194373402\n",
      "EVALUATION with last weights -> Loss: 11413414.0, CrossEntropy: 1.791488528251648, Accuracy: 0.8261471518987342\n",
      "Elapsed time for the training: 12.911738157272339\n",
      "Iter 604 / 2000, Loss: 124511378.75, CrossEntropy: 0.003730969037860632, Accuracy: 0.9986213235294118\n",
      "EVALUATION with last weights -> Loss: 12214457.0, CrossEntropy: 1.9357759952545166, Accuracy: 0.8130933544303798\n",
      "Elapsed time for the training: 12.921356201171875\n",
      "Iter 605 / 2000, Loss: 126253687.78125, CrossEntropy: 0.004422952886670828, Accuracy: 0.9987212276214834\n",
      "EVALUATION with last weights -> Loss: 11827619.0, CrossEntropy: 1.8564835786819458, Accuracy: 0.8163568037974683\n",
      "Elapsed time for the training: 12.78951644897461\n",
      "Iter 606 / 2000, Loss: 126859669.8125, CrossEntropy: 0.004660750739276409, Accuracy: 0.9985414002557544\n",
      "EVALUATION with last weights -> Loss: 11173903.0, CrossEntropy: 1.7531195878982544, Accuracy: 0.8300039556962026\n",
      "Elapsed time for the training: 14.513442754745483\n",
      "Iter 607 / 2000, Loss: 123930420.40625, CrossEntropy: 0.0034845031332224607, Accuracy: 0.998821131713555\n",
      "EVALUATION with last weights -> Loss: 11636924.0, CrossEntropy: 1.8351261615753174, Accuracy: 0.8197191455696202\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time for the training: 13.784772872924805\n",
      "Iter 608 / 2000, Loss: 127368218.4375, CrossEntropy: 0.004853024613112211, Accuracy: 0.9983415920716112\n",
      "EVALUATION with last weights -> Loss: 11239982.0, CrossEntropy: 1.7693654298782349, Accuracy: 0.8177412974683544\n",
      "Elapsed time for the training: 12.900996685028076\n",
      "Iter 609 / 2000, Loss: 125930438.3125, CrossEntropy: 0.004273772239685059, Accuracy: 0.9985214194373402\n",
      "EVALUATION with last weights -> Loss: 11396684.0, CrossEntropy: 1.7807673215866089, Accuracy: 0.8246637658227848\n",
      "Elapsed time for the training: 12.938861608505249\n",
      "Iter 610 / 2000, Loss: 128313968.03125, CrossEntropy: 0.005220729857683182, Accuracy: 0.998321611253197\n",
      "EVALUATION with last weights -> Loss: 12227259.0, CrossEntropy: 1.9463316202163696, Accuracy: 0.8145767405063291\n",
      "Elapsed time for the training: 12.912224054336548\n",
      "Iter 611 / 2000, Loss: 125724700.96875, CrossEntropy: 0.004185870289802551, Accuracy: 0.9986812659846548\n",
      "EVALUATION with last weights -> Loss: 11576282.0, CrossEntropy: 1.839349389076233, Accuracy: 0.8224881329113924\n",
      "Elapsed time for the training: 12.887364625930786\n",
      "Iter 612 / 2000, Loss: 124804248.3125, CrossEntropy: 0.003808531677350402, Accuracy: 0.9987412084398977\n",
      "EVALUATION with last weights -> Loss: 11611038.0, CrossEntropy: 1.832524061203003, Accuracy: 0.8142800632911392\n",
      "Elapsed time for the training: 12.88208818435669\n",
      "Iter 613 / 2000, Loss: 128148957.59375, CrossEntropy: 0.005139049608260393, Accuracy: 0.998261668797954\n",
      "EVALUATION with last weights -> Loss: 11463870.0, CrossEntropy: 1.8431264162063599, Accuracy: 0.8253560126582279\n",
      "Elapsed time for the training: 12.886467933654785\n",
      "Iter 614 / 2000, Loss: 127036379.90625, CrossEntropy: 0.004689020104706287, Accuracy: 0.9984814578005116\n",
      "EVALUATION with last weights -> Loss: 12551275.0, CrossEntropy: 1.9899836778640747, Accuracy: 0.8130933544303798\n",
      "Elapsed time for the training: 12.901984691619873\n",
      "Iter 615 / 2000, Loss: 128300130.90625, CrossEntropy: 0.005188317503780127, Accuracy: 0.9983016304347826\n",
      "EVALUATION with last weights -> Loss: 11631750.0, CrossEntropy: 1.854325294494629, Accuracy: 0.8213014240506329\n",
      "Elapsed time for the training: 12.920384645462036\n",
      "Iter 616 / 2000, Loss: 123563307.46875, CrossEntropy: 0.0033113909885287285, Accuracy: 0.9986293158567775\n",
      "EVALUATION with last weights -> Loss: 11382775.0, CrossEntropy: 1.7889145612716675, Accuracy: 0.8251582278481012\n",
      "Elapsed time for the training: 12.907824277877808\n",
      "Iter 617 / 2000, Loss: 127895127.65625, CrossEntropy: 0.005041114054620266, Accuracy: 0.9984494884910486\n",
      "EVALUATION with last weights -> Loss: 11079447.0, CrossEntropy: 1.7533574104309082, Accuracy: 0.8252571202531646\n",
      "Elapsed time for the training: 12.914047241210938\n",
      "Iter 618 / 2000, Loss: 126356233.0625, CrossEntropy: 0.004397165030241013, Accuracy: 0.9986612851662404\n",
      "EVALUATION with last weights -> Loss: 11352323.0, CrossEntropy: 1.7750884294509888, Accuracy: 0.8252571202531646\n",
      "Elapsed time for the training: 12.891142129898071\n",
      "Iter 619 / 2000, Loss: 128848443.8125, CrossEntropy: 0.005387002602219582, Accuracy: 0.998201726342711\n",
      "EVALUATION with last weights -> Loss: 11305123.0, CrossEntropy: 1.7901298999786377, Accuracy: 0.8227848101265823\n",
      "Elapsed time for the training: 12.83741807937622\n",
      "Iter 620 / 2000, Loss: 124500928.59375, CrossEntropy: 0.0036822734400629997, Accuracy: 0.9985693734015345\n",
      "EVALUATION with last weights -> Loss: 13087270.0, CrossEntropy: 2.068700075149536, Accuracy: 0.8100276898734177\n",
      "Elapsed time for the training: 12.94731068611145\n",
      "Iter 621 / 2000, Loss: 127677916.96875, CrossEntropy: 0.004909710958600044, Accuracy: 0.9983016304347826\n",
      "EVALUATION with last weights -> Loss: 11316204.0, CrossEntropy: 1.7998671531677246, Accuracy: 0.8263449367088608\n",
      "Elapsed time for the training: 12.962168455123901\n",
      "Iter 622 / 2000, Loss: 127215682.4375, CrossEntropy: 0.004720182158052921, Accuracy: 0.9986213235294118\n",
      "EVALUATION with last weights -> Loss: 11644443.0, CrossEntropy: 1.8460906744003296, Accuracy: 0.823378164556962\n",
      "Elapsed time for the training: 12.93299126625061\n",
      "Iter 623 / 2000, Loss: 125633634.25, CrossEntropy: 0.004083219449967146, Accuracy: 0.9985613810741688\n",
      "EVALUATION with last weights -> Loss: 11499469.0, CrossEntropy: 1.823933720588684, Accuracy: 0.8242681962025317\n",
      "Elapsed time for the training: 13.275816202163696\n",
      "Iter 624 / 2000, Loss: 127275221.09375, CrossEntropy: 0.004734012298285961, Accuracy: 0.9983415920716112\n",
      "EVALUATION with last weights -> Loss: 11262012.0, CrossEntropy: 1.7622926235198975, Accuracy: 0.8265427215189873\n",
      "Elapsed time for the training: 12.89290189743042\n",
      "Iter 625 / 2000, Loss: 124922973.03125, CrossEntropy: 0.00378987193107605, Accuracy: 0.9987212276214834\n",
      "EVALUATION with last weights -> Loss: 11535281.0, CrossEntropy: 1.8123953342437744, Accuracy: 0.8230814873417721\n",
      "Elapsed time for the training: 12.909722805023193\n",
      "Iter 626 / 2000, Loss: 124483353.125, CrossEntropy: 0.003610356943681836, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 11701113.0, CrossEntropy: 1.849651575088501, Accuracy: 0.8176424050632911\n",
      "Elapsed time for the training: 12.925820350646973\n",
      "Iter 627 / 2000, Loss: 127674621.0, CrossEntropy: 0.004879472311586142, Accuracy: 0.9982816496163683\n",
      "EVALUATION with last weights -> Loss: 11676326.0, CrossEntropy: 1.8544087409973145, Accuracy: 0.8170490506329114\n",
      "Elapsed time for the training: 12.851051092147827\n",
      "Iter 628 / 2000, Loss: 124832333.09375, CrossEntropy: 0.003739016829058528, Accuracy: 0.998821131713555\n",
      "EVALUATION with last weights -> Loss: 11924375.0, CrossEntropy: 1.8779782056808472, Accuracy: 0.8228837025316456\n",
      "Elapsed time for the training: 12.932380676269531\n",
      "Iter 629 / 2000, Loss: 127092038.8125, CrossEntropy: 0.004636771976947784, Accuracy: 0.9985214194373402\n",
      "EVALUATION with last weights -> Loss: 11856326.0, CrossEntropy: 1.8994317054748535, Accuracy: 0.8144778481012658\n",
      "Elapsed time for the training: 12.859328746795654\n",
      "Iter 630 / 2000, Loss: 128215207.875, CrossEntropy: 0.005080684553831816, Accuracy: 0.9984015345268542\n",
      "EVALUATION with last weights -> Loss: 11478396.0, CrossEntropy: 1.8173574209213257, Accuracy: 0.8205102848101266\n",
      "Elapsed time for the training: 12.888839721679688\n",
      "Iter 631 / 2000, Loss: 129452191.0625, CrossEntropy: 0.005574404262006283, Accuracy: 0.9984414961636828\n",
      "EVALUATION with last weights -> Loss: 11364999.0, CrossEntropy: 1.779975414276123, Accuracy: 0.8241693037974683\n",
      "Elapsed time for the training: 15.003554582595825\n",
      "Iter 632 / 2000, Loss: 126352306.0, CrossEntropy: 0.004326396156102419, Accuracy: 0.9988011508951407\n",
      "EVALUATION with last weights -> Loss: 11467965.0, CrossEntropy: 1.7975273132324219, Accuracy: 0.8211036392405063\n",
      "Elapsed time for the training: 15.53899884223938\n",
      "Iter 633 / 2000, Loss: 127411078.59375, CrossEntropy: 0.004744499456137419, Accuracy: 0.9985014386189258\n",
      "EVALUATION with last weights -> Loss: 12232751.0, CrossEntropy: 1.934906005859375, Accuracy: 0.8142800632911392\n",
      "Elapsed time for the training: 13.785037994384766\n",
      "Iter 634 / 2000, Loss: 125941998.65625, CrossEntropy: 0.00415197154507041, Accuracy: 0.9986213235294118\n",
      "EVALUATION with last weights -> Loss: 11193288.0, CrossEntropy: 1.7568572759628296, Accuracy: 0.8267405063291139\n",
      "Elapsed time for the training: 12.883965730667114\n",
      "Iter 635 / 2000, Loss: 126743939.65625, CrossEntropy: 0.004467963241040707, Accuracy: 0.998321611253197\n",
      "EVALUATION with last weights -> Loss: 12440405.0, CrossEntropy: 1.971824049949646, Accuracy: 0.8130933544303798\n",
      "Elapsed time for the training: 12.91232442855835\n",
      "Iter 636 / 2000, Loss: 125613238.9375, CrossEntropy: 0.004064725246280432, Accuracy: 0.9987092391304349\n",
      "EVALUATION with last weights -> Loss: 11922131.0, CrossEntropy: 1.8654457330703735, Accuracy: 0.8211036392405063\n",
      "Elapsed time for the training: 12.46002459526062\n",
      "Iter 637 / 2000, Loss: 129712350.625, CrossEntropy: 0.005645737051963806, Accuracy: 0.998261668797954\n",
      "EVALUATION with last weights -> Loss: 11652637.0, CrossEntropy: 1.8470629453659058, Accuracy: 0.8241693037974683\n",
      "Elapsed time for the training: 12.888699531555176\n",
      "Iter 638 / 2000, Loss: 124966118.1875, CrossEntropy: 0.003741609398275614, Accuracy: 0.9987811700767263\n",
      "EVALUATION with last weights -> Loss: 11687913.0, CrossEntropy: 1.8438349962234497, Accuracy: 0.8234770569620253\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time for the training: 12.930759191513062\n",
      "Iter 639 / 2000, Loss: 125384550.53125, CrossEntropy: 0.003911592531949282, Accuracy: 0.9984814578005116\n",
      "EVALUATION with last weights -> Loss: 12159910.0, CrossEntropy: 1.9202678203582764, Accuracy: 0.8178401898734177\n",
      "Elapsed time for the training: 12.911046266555786\n",
      "Iter 640 / 2000, Loss: 126467922.0, CrossEntropy: 0.004332116339355707, Accuracy: 0.9984414961636828\n",
      "EVALUATION with last weights -> Loss: 12133412.0, CrossEntropy: 1.901238203048706, Accuracy: 0.8188291139240507\n",
      "Elapsed time for the training: 12.906813859939575\n",
      "Iter 641 / 2000, Loss: 126588578.75, CrossEntropy: 0.004375953692942858, Accuracy: 0.9986013427109974\n",
      "EVALUATION with last weights -> Loss: 11753044.0, CrossEntropy: 1.8535526990890503, Accuracy: 0.8175435126582279\n",
      "Elapsed time for the training: 12.890879392623901\n",
      "Iter 642 / 2000, Loss: 126545312.5, CrossEntropy: 0.004353297408670187, Accuracy: 0.9984614769820972\n",
      "EVALUATION with last weights -> Loss: 12498005.0, CrossEntropy: 1.9627550840377808, Accuracy: 0.8145767405063291\n",
      "Elapsed time for the training: 12.889503002166748\n",
      "Iter 643 / 2000, Loss: 126947937.78125, CrossEntropy: 0.004508631303906441, Accuracy: 0.9983615728900256\n",
      "EVALUATION with last weights -> Loss: 11980418.0, CrossEntropy: 1.8948973417282104, Accuracy: 0.821993670886076\n",
      "Elapsed time for the training: 12.880142211914062\n",
      "Iter 644 / 2000, Loss: 123284812.78125, CrossEntropy: 0.0030421828851103783, Accuracy: 0.9988011508951407\n",
      "EVALUATION with last weights -> Loss: 11587017.0, CrossEntropy: 1.8147917985916138, Accuracy: 0.8209058544303798\n",
      "Elapsed time for the training: 12.972814321517944\n",
      "Iter 645 / 2000, Loss: 125888810.78125, CrossEntropy: 0.00407614978030324, Accuracy: 0.9986812659846548\n",
      "EVALUATION with last weights -> Loss: 11840038.0, CrossEntropy: 1.9000056982040405, Accuracy: 0.8143789556962026\n",
      "Elapsed time for the training: 12.862710952758789\n",
      "Iter 646 / 2000, Loss: 123799942.9375, CrossEntropy: 0.0032364921644330025, Accuracy: 0.9988610933503836\n",
      "EVALUATION with last weights -> Loss: 12659377.0, CrossEntropy: 1.9811614751815796, Accuracy: 0.8207080696202531\n",
      "Elapsed time for the training: 12.931000232696533\n",
      "Iter 647 / 2000, Loss: 126067598.1875, CrossEntropy: 0.004137417301535606, Accuracy: 0.9984414961636828\n",
      "EVALUATION with last weights -> Loss: 11806724.0, CrossEntropy: 1.857696771621704, Accuracy: 0.8234770569620253\n",
      "Elapsed time for the training: 12.927454233169556\n",
      "Iter 648 / 2000, Loss: 127018454.625, CrossEntropy: 0.0045126741752028465, Accuracy: 0.9985214194373402\n",
      "EVALUATION with last weights -> Loss: 11597419.0, CrossEntropy: 1.8223336935043335, Accuracy: 0.8242681962025317\n",
      "Elapsed time for the training: 12.910016536712646\n",
      "Iter 649 / 2000, Loss: 125401460.5625, CrossEntropy: 0.003862076671794057, Accuracy: 0.9987212276214834\n",
      "EVALUATION with last weights -> Loss: 11938594.0, CrossEntropy: 1.8771955966949463, Accuracy: 0.8183346518987342\n",
      "Elapsed time for the training: 12.889899492263794\n",
      "Iter 650 / 2000, Loss: 127022139.75, CrossEntropy: 0.004515181295573711, Accuracy: 0.9986093350383632\n",
      "EVALUATION with last weights -> Loss: 12409133.0, CrossEntropy: 1.9429429769515991, Accuracy: 0.8176424050632911\n",
      "Elapsed time for the training: 12.900434017181396\n",
      "Iter 651 / 2000, Loss: 129081741.15625, CrossEntropy: 0.005322644952684641, Accuracy: 0.99838155370844\n",
      "EVALUATION with last weights -> Loss: 11416089.0, CrossEntropy: 1.8089221715927124, Accuracy: 0.8158623417721519\n",
      "Elapsed time for the training: 13.351153135299683\n",
      "Iter 652 / 2000, Loss: 128232066.375, CrossEntropy: 0.004977616481482983, Accuracy: 0.9985014386189258\n",
      "EVALUATION with last weights -> Loss: 11704015.0, CrossEntropy: 1.8484395742416382, Accuracy: 0.8164556962025317\n",
      "Elapsed time for the training: 13.681380987167358\n",
      "Iter 653 / 2000, Loss: 128256103.21875, CrossEntropy: 0.004982766695320606, Accuracy: 0.9985414002557544\n",
      "EVALUATION with last weights -> Loss: 11084488.0, CrossEntropy: 1.7389949560165405, Accuracy: 0.8258504746835443\n",
      "Elapsed time for the training: 13.72440242767334\n",
      "Iter 654 / 2000, Loss: 124890864.71875, CrossEntropy: 0.0036329366266727448, Accuracy: 0.998701246803069\n",
      "EVALUATION with last weights -> Loss: 11355339.0, CrossEntropy: 1.796704649925232, Accuracy: 0.8240704113924051\n",
      "Elapsed time for the training: 13.613470792770386\n",
      "Iter 655 / 2000, Loss: 126636447.3125, CrossEntropy: 0.004325787536799908, Accuracy: 0.9987412084398977\n",
      "EVALUATION with last weights -> Loss: 11353339.0, CrossEntropy: 1.7818169593811035, Accuracy: 0.825751582278481\n",
      "Elapsed time for the training: 13.238894939422607\n",
      "Iter 656 / 2000, Loss: 126250771.96875, CrossEntropy: 0.004171312786638737, Accuracy: 0.9986812659846548\n",
      "EVALUATION with last weights -> Loss: 11183036.0, CrossEntropy: 1.7545623779296875, Accuracy: 0.8258504746835443\n",
      "Elapsed time for the training: 12.886862993240356\n",
      "Iter 657 / 2000, Loss: 126331330.5, CrossEntropy: 0.004194637294858694, Accuracy: 0.9986013427109974\n",
      "EVALUATION with last weights -> Loss: 11778183.0, CrossEntropy: 1.8664194345474243, Accuracy: 0.8171479430379747\n",
      "Elapsed time for the training: 12.933523416519165\n",
      "Iter 658 / 2000, Loss: 128440432.125, CrossEntropy: 0.005075989756733179, Accuracy: 0.99840952685422\n",
      "EVALUATION with last weights -> Loss: 11965854.0, CrossEntropy: 1.8800394535064697, Accuracy: 0.8156645569620253\n",
      "Elapsed time for the training: 12.877121686935425\n",
      "Iter 659 / 2000, Loss: 125603792.0, CrossEntropy: 0.0039030236657708883, Accuracy: 0.9985294117647059\n",
      "EVALUATION with last weights -> Loss: 11652778.0, CrossEntropy: 1.8475091457366943, Accuracy: 0.821993670886076\n",
      "Elapsed time for the training: 12.86693000793457\n",
      "Iter 660 / 2000, Loss: 126922752.78125, CrossEntropy: 0.004415932111442089, Accuracy: 0.9985214194373402\n",
      "EVALUATION with last weights -> Loss: 11998412.0, CrossEntropy: 1.8748310804367065, Accuracy: 0.8203125\n",
      "Elapsed time for the training: 12.87220573425293\n",
      "Iter 661 / 2000, Loss: 128556647.5625, CrossEntropy: 0.005064512602984905, Accuracy: 0.998321611253197\n",
      "EVALUATION with last weights -> Loss: 11202350.0, CrossEntropy: 1.7812891006469727, Accuracy: 0.8271360759493671\n",
      "Elapsed time for the training: 12.865483045578003\n",
      "Iter 662 / 2000, Loss: 127245415.1875, CrossEntropy: 0.004535573534667492, Accuracy: 0.9985014386189258\n",
      "EVALUATION with last weights -> Loss: 11981760.0, CrossEntropy: 1.9170650243759155, Accuracy: 0.8189280063291139\n",
      "Elapsed time for the training: 12.858614206314087\n",
      "Iter 663 / 2000, Loss: 126898508.53125, CrossEntropy: 0.004407444968819618, Accuracy: 0.9987092391304349\n",
      "EVALUATION with last weights -> Loss: 11500710.0, CrossEntropy: 1.806596279144287, Accuracy: 0.8218947784810127\n",
      "Elapsed time for the training: 12.912689924240112\n",
      "Iter 664 / 2000, Loss: 127138629.34375, CrossEntropy: 0.004516007844358683, Accuracy: 0.9985893542199489\n",
      "EVALUATION with last weights -> Loss: 12002489.0, CrossEntropy: 1.8901435136795044, Accuracy: 0.8202136075949367\n",
      "Elapsed time for the training: 12.868624210357666\n",
      "Iter 665 / 2000, Loss: 126347734.34375, CrossEntropy: 0.004176028538495302, Accuracy: 0.9987492007672635\n",
      "EVALUATION with last weights -> Loss: 12105779.0, CrossEntropy: 1.9226337671279907, Accuracy: 0.818631329113924\n",
      "Elapsed time for the training: 12.923444032669067\n",
      "Iter 666 / 2000, Loss: 125184674.96875, CrossEntropy: 0.003692838829010725, Accuracy: 0.9988411125319693\n",
      "EVALUATION with last weights -> Loss: 11528897.0, CrossEntropy: 1.810320496559143, Accuracy: 0.8237737341772152\n",
      "Elapsed time for the training: 12.86483645439148\n",
      "Iter 667 / 2000, Loss: 127709273.34375, CrossEntropy: 0.004696615040302277, Accuracy: 0.9983016304347826\n",
      "EVALUATION with last weights -> Loss: 11446116.0, CrossEntropy: 1.7999235391616821, Accuracy: 0.8236748417721519\n",
      "Elapsed time for the training: 12.88778305053711\n",
      "Iter 668 / 2000, Loss: 124855101.71875, CrossEntropy: 0.0036177486181259155, Accuracy: 0.9986892583120205\n",
      "EVALUATION with last weights -> Loss: 12056020.0, CrossEntropy: 1.9122519493103027, Accuracy: 0.8236748417721519\n",
      "Elapsed time for the training: 12.883122205734253\n",
      "Iter 669 / 2000, Loss: 126940514.34375, CrossEntropy: 0.00437944894656539, Accuracy: 0.9984814578005116\n",
      "EVALUATION with last weights -> Loss: 11227613.0, CrossEntropy: 1.7736961841583252, Accuracy: 0.8229825949367089\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time for the training: 12.96513032913208\n",
      "Iter 670 / 2000, Loss: 125568151.0625, CrossEntropy: 0.0038261348381638527, Accuracy: 0.998641304347826\n",
      "EVALUATION with last weights -> Loss: 12149149.0, CrossEntropy: 1.942201852798462, Accuracy: 0.8135878164556962\n",
      "Elapsed time for the training: 12.906188488006592\n",
      "Iter 671 / 2000, Loss: 126978396.375, CrossEntropy: 0.004394231364130974, Accuracy: 0.9986812659846548\n",
      "EVALUATION with last weights -> Loss: 12965626.0, CrossEntropy: 2.028911590576172, Accuracy: 0.8123022151898734\n",
      "Elapsed time for the training: 13.16148328781128\n",
      "Iter 672 / 2000, Loss: 128164344.03125, CrossEntropy: 0.0048541962169110775, Accuracy: 0.9984614769820972\n",
      "EVALUATION with last weights -> Loss: 11867752.0, CrossEntropy: 1.8720709085464478, Accuracy: 0.8204113924050633\n",
      "Elapsed time for the training: 12.927017211914062\n",
      "Iter 673 / 2000, Loss: 124308763.375, CrossEntropy: 0.0033343564718961716, Accuracy: 0.9988890664961637\n",
      "EVALUATION with last weights -> Loss: 12284518.0, CrossEntropy: 1.9211785793304443, Accuracy: 0.8169501582278481\n",
      "Elapsed time for the training: 12.90964961051941\n",
      "Iter 674 / 2000, Loss: 129174421.71875, CrossEntropy: 0.005248518660664558, Accuracy: 0.9983415920716112\n",
      "EVALUATION with last weights -> Loss: 11907437.0, CrossEntropy: 1.8667309284210205, Accuracy: 0.8202136075949367\n",
      "Elapsed time for the training: 12.865962028503418\n",
      "Iter 675 / 2000, Loss: 127009896.90625, CrossEntropy: 0.004380102269351482, Accuracy: 0.9984614769820972\n",
      "EVALUATION with last weights -> Loss: 11737716.0, CrossEntropy: 1.8476215600967407, Accuracy: 0.821004746835443\n",
      "Elapsed time for the training: 12.912227153778076\n",
      "Iter 676 / 2000, Loss: 124430059.59375, CrossEntropy: 0.003343890653923154, Accuracy: 0.9989410166240409\n",
      "EVALUATION with last weights -> Loss: 11724656.0, CrossEntropy: 1.841271996498108, Accuracy: 0.821004746835443\n",
      "Elapsed time for the training: 12.886597633361816\n",
      "Iter 677 / 2000, Loss: 127850655.4375, CrossEntropy: 0.004706250503659248, Accuracy: 0.9984814578005116\n",
      "EVALUATION with last weights -> Loss: 12234657.0, CrossEntropy: 1.929638147354126, Accuracy: 0.8152689873417721\n",
      "Elapsed time for the training: 12.883690595626831\n",
      "Iter 678 / 2000, Loss: 127466614.6875, CrossEntropy: 0.004547799006104469, Accuracy: 0.998701246803069\n",
      "EVALUATION with last weights -> Loss: 11854071.0, CrossEntropy: 1.868443250656128, Accuracy: 0.8247626582278481\n",
      "Elapsed time for the training: 12.89511251449585\n",
      "Iter 679 / 2000, Loss: 128616286.5625, CrossEntropy: 0.005087509751319885, Accuracy: 0.9981577685421995\n",
      "EVALUATION with last weights -> Loss: 12065764.0, CrossEntropy: 1.9137036800384521, Accuracy: 0.8159612341772152\n",
      "Elapsed time for the training: 15.104135036468506\n",
      "Iter 680 / 2000, Loss: 126842064.0625, CrossEntropy: 0.004348997492343187, Accuracy: 0.9986492966751919\n",
      "EVALUATION with last weights -> Loss: 11845478.0, CrossEntropy: 1.8532423973083496, Accuracy: 0.815565664556962\n",
      "Elapsed time for the training: 14.733135461807251\n",
      "Iter 681 / 2000, Loss: 127029037.71875, CrossEntropy: 0.004357334692031145, Accuracy: 0.9986812659846548\n",
      "EVALUATION with last weights -> Loss: 12260177.0, CrossEntropy: 1.9370299577713013, Accuracy: 0.8147745253164557\n",
      "Elapsed time for the training: 12.956918716430664\n",
      "Iter 682 / 2000, Loss: 126178689.25, CrossEntropy: 0.004012824036180973, Accuracy: 0.9985613810741688\n",
      "EVALUATION with last weights -> Loss: 12178872.0, CrossEntropy: 1.9067870378494263, Accuracy: 0.8255537974683544\n",
      "Elapsed time for the training: 12.8916494846344\n",
      "Iter 683 / 2000, Loss: 126312316.25, CrossEntropy: 0.004062260035425425, Accuracy: 0.9986213235294118\n",
      "EVALUATION with last weights -> Loss: 11692493.0, CrossEntropy: 1.8571476936340332, Accuracy: 0.8159612341772152\n",
      "Elapsed time for the training: 13.056830406188965\n",
      "Iter 684 / 2000, Loss: 129548513.53125, CrossEntropy: 0.005357886664569378, Accuracy: 0.9981218030690537\n",
      "EVALUATION with last weights -> Loss: 11382811.0, CrossEntropy: 1.7946178913116455, Accuracy: 0.8215981012658228\n",
      "Elapsed time for the training: 13.21422290802002\n",
      "Iter 685 / 2000, Loss: 125229579.28125, CrossEntropy: 0.003618980059400201, Accuracy: 0.9987412084398977\n",
      "EVALUATION with last weights -> Loss: 11913076.0, CrossEntropy: 1.8675050735473633, Accuracy: 0.8181368670886076\n",
      "Elapsed time for the training: 12.031250715255737\n",
      "Iter 686 / 2000, Loss: 126629247.375, CrossEntropy: 0.004176498390734196, Accuracy: 0.9985414002557544\n",
      "EVALUATION with last weights -> Loss: 11614235.0, CrossEntropy: 1.8462047576904297, Accuracy: 0.822685917721519\n",
      "Elapsed time for the training: 12.017998933792114\n",
      "Iter 687 / 2000, Loss: 128052939.75, CrossEntropy: 0.0047374083660542965, Accuracy: 0.998761189258312\n",
      "EVALUATION with last weights -> Loss: 11446341.0, CrossEntropy: 1.789824366569519, Accuracy: 0.8215981012658228\n",
      "Elapsed time for the training: 12.829400300979614\n",
      "Iter 688 / 2000, Loss: 125052860.65625, CrossEntropy: 0.0035339724272489548, Accuracy: 0.9987811700767263\n",
      "EVALUATION with last weights -> Loss: 12044641.0, CrossEntropy: 1.8912779092788696, Accuracy: 0.8203125\n",
      "Elapsed time for the training: 12.84623122215271\n",
      "Iter 689 / 2000, Loss: 126509994.28125, CrossEntropy: 0.004111710470169783, Accuracy: 0.9985813618925832\n",
      "EVALUATION with last weights -> Loss: 11946097.0, CrossEntropy: 1.9081772565841675, Accuracy: 0.8181368670886076\n",
      "Elapsed time for the training: 12.866772651672363\n",
      "Iter 690 / 2000, Loss: 126106684.15625, CrossEntropy: 0.003946016076952219, Accuracy: 0.998761189258312\n",
      "EVALUATION with last weights -> Loss: 11703178.0, CrossEntropy: 1.831870436668396, Accuracy: 0.8225870253164557\n",
      "Elapsed time for the training: 12.88440990447998\n",
      "Iter 691 / 2000, Loss: 126353733.03125, CrossEntropy: 0.0040400163270533085, Accuracy: 0.9987212276214834\n",
      "EVALUATION with last weights -> Loss: 11778014.0, CrossEntropy: 1.8531512022018433, Accuracy: 0.8191257911392406\n",
      "Elapsed time for the training: 13.437167406082153\n",
      "Iter 692 / 2000, Loss: 129426766.375, CrossEntropy: 0.005290383938699961, Accuracy: 0.9983096227621484\n",
      "EVALUATION with last weights -> Loss: 13243885.0, CrossEntropy: 2.0900237560272217, Accuracy: 0.8071598101265823\n",
      "Elapsed time for the training: 13.69633436203003\n",
      "Iter 693 / 2000, Loss: 125334887.84375, CrossEntropy: 0.0036237789317965508, Accuracy: 0.998641304347826\n",
      "EVALUATION with last weights -> Loss: 12087231.0, CrossEntropy: 1.8955460786819458, Accuracy: 0.8144778481012658\n",
      "Elapsed time for the training: 12.943591117858887\n",
      "Iter 694 / 2000, Loss: 125179270.65625, CrossEntropy: 0.003559214761480689, Accuracy: 0.9987811700767263\n",
      "EVALUATION with last weights -> Loss: 11233296.0, CrossEntropy: 1.7796121835708618, Accuracy: 0.8194224683544303\n",
      "Elapsed time for the training: 12.994324922561646\n",
      "Iter 695 / 2000, Loss: 127897389.84375, CrossEntropy: 0.004638273268938065, Accuracy: 0.998641304347826\n",
      "EVALUATION with last weights -> Loss: 11470122.0, CrossEntropy: 1.807838797569275, Accuracy: 0.821004746835443\n",
      "Elapsed time for the training: 13.03252625465393\n",
      "Iter 696 / 2000, Loss: 127914399.75, CrossEntropy: 0.004645293578505516, Accuracy: 0.9986013427109974\n",
      "EVALUATION with last weights -> Loss: 11898922.0, CrossEntropy: 1.8792526721954346, Accuracy: 0.8215981012658228\n",
      "Elapsed time for the training: 12.91169786453247\n",
      "Iter 697 / 2000, Loss: 127217008.5, CrossEntropy: 0.004356143996119499, Accuracy: 0.998701246803069\n",
      "EVALUATION with last weights -> Loss: 11689610.0, CrossEntropy: 1.8434242010116577, Accuracy: 0.821004746835443\n",
      "Elapsed time for the training: 12.901638746261597\n",
      "Iter 698 / 2000, Loss: 124491385.4375, CrossEntropy: 0.003262682119384408, Accuracy: 0.9989010549872123\n",
      "EVALUATION with last weights -> Loss: 11901073.0, CrossEntropy: 1.8678531646728516, Accuracy: 0.8253560126582279\n",
      "Elapsed time for the training: 12.866736650466919\n",
      "Iter 699 / 2000, Loss: 128027998.78125, CrossEntropy: 0.0046742819249629974, Accuracy: 0.9984614769820972\n",
      "EVALUATION with last weights -> Loss: 12121080.0, CrossEntropy: 1.8995230197906494, Accuracy: 0.8170490506329114\n",
      "Elapsed time for the training: 12.944406986236572\n",
      "Iter 700 / 2000, Loss: 124772594.3125, CrossEntropy: 0.003366530407220125, Accuracy: 0.9987811700767263\n",
      "EVALUATION with last weights -> Loss: 11970505.0, CrossEntropy: 1.8853700160980225, Accuracy: 0.8171479430379747\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time for the training: 12.932741403579712\n",
      "Iter 701 / 2000, Loss: 125535008.78125, CrossEntropy: 0.003666335018351674, Accuracy: 0.998761189258312\n",
      "EVALUATION with last weights -> Loss: 12472012.0, CrossEntropy: 1.9772615432739258, Accuracy: 0.8189280063291139\n",
      "Elapsed time for the training: 12.929481744766235\n",
      "Iter 702 / 2000, Loss: 129872956.84375, CrossEntropy: 0.005399657413363457, Accuracy: 0.9982816496163683\n",
      "EVALUATION with last weights -> Loss: 13354824.0, CrossEntropy: 2.1144497394561768, Accuracy: 0.8129944620253164\n",
      "Elapsed time for the training: 12.89887523651123\n",
      "Iter 703 / 2000, Loss: 125835472.84375, CrossEntropy: 0.0037796918768435717, Accuracy: 0.9985414002557544\n",
      "EVALUATION with last weights -> Loss: 11678904.0, CrossEntropy: 1.84328031539917, Accuracy: 0.8198180379746836\n",
      "Elapsed time for the training: 12.912660598754883\n",
      "Iter 704 / 2000, Loss: 126576953.1875, CrossEntropy: 0.004137453623116016, Accuracy: 0.9986692774936061\n",
      "EVALUATION with last weights -> Loss: 11967339.0, CrossEntropy: 1.917310118675232, Accuracy: 0.8190268987341772\n",
      "Elapsed time for the training: 12.930649280548096\n",
      "Iter 705 / 2000, Loss: 126830837.6875, CrossEntropy: 0.004165231250226498, Accuracy: 0.9987811700767263\n",
      "EVALUATION with last weights -> Loss: 11444175.0, CrossEntropy: 1.7953248023986816, Accuracy: 0.828817246835443\n",
      "Elapsed time for the training: 12.936914205551147\n",
      "Iter 706 / 2000, Loss: 126220083.96875, CrossEntropy: 0.003916602116078138, Accuracy: 0.9987212276214834\n",
      "EVALUATION with last weights -> Loss: 12000393.0, CrossEntropy: 1.9131319522857666, Accuracy: 0.8244659810126582\n",
      "Elapsed time for the training: 12.888909101486206\n",
      "Iter 707 / 2000, Loss: 127030193.25, CrossEntropy: 0.004235618747770786, Accuracy: 0.9985214194373402\n",
      "EVALUATION with last weights -> Loss: 11942728.0, CrossEntropy: 1.8715890645980835, Accuracy: 0.821004746835443\n",
      "Elapsed time for the training: 12.915547609329224\n",
      "Iter 708 / 2000, Loss: 126120847.9375, CrossEntropy: 0.0038698685821145773, Accuracy: 0.9988411125319693\n",
      "EVALUATION with last weights -> Loss: 11584386.0, CrossEntropy: 1.8692692518234253, Accuracy: 0.821993670886076\n",
      "Elapsed time for the training: 12.912156343460083\n",
      "Iter 709 / 2000, Loss: 123276450.5625, CrossEntropy: 0.002743414370343089, Accuracy: 0.9990089514066497\n",
      "EVALUATION with last weights -> Loss: 11537729.0, CrossEntropy: 1.8036412000656128, Accuracy: 0.8249604430379747\n",
      "Elapsed time for the training: 12.896533012390137\n",
      "Iter 710 / 2000, Loss: 126653308.5625, CrossEntropy: 0.004074108321219683, Accuracy: 0.9988011508951407\n",
      "EVALUATION with last weights -> Loss: 11649817.0, CrossEntropy: 1.843957543373108, Accuracy: 0.8234770569620253\n",
      "Elapsed time for the training: 12.890743970870972\n",
      "Iter 711 / 2000, Loss: 124106330.59375, CrossEntropy: 0.0030525883194059134, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 11480936.0, CrossEntropy: 1.795982837677002, Accuracy: 0.8295094936708861\n",
      "Elapsed time for the training: 12.893454790115356\n",
      "Iter 712 / 2000, Loss: 125934961.15625, CrossEntropy: 0.003778405487537384, Accuracy: 0.998701246803069\n",
      "EVALUATION with last weights -> Loss: 10990352.0, CrossEntropy: 1.734086513519287, Accuracy: 0.8301028481012658\n",
      "Elapsed time for the training: 12.875542640686035\n",
      "Iter 713 / 2000, Loss: 129676555.09375, CrossEntropy: 0.00527392141520977, Accuracy: 0.9985014386189258\n",
      "EVALUATION with last weights -> Loss: 12014935.0, CrossEntropy: 1.9179197549819946, Accuracy: 0.8130933544303798\n",
      "Elapsed time for the training: 12.952508926391602\n",
      "Iter 714 / 2000, Loss: 125842481.84375, CrossEntropy: 0.00373286847025156, Accuracy: 0.9987811700767263\n",
      "EVALUATION with last weights -> Loss: 11642478.0, CrossEntropy: 1.8280850648880005, Accuracy: 0.8279272151898734\n",
      "Elapsed time for the training: 12.896944761276245\n",
      "Iter 715 / 2000, Loss: 127482737.78125, CrossEntropy: 0.0043827686458826065, Accuracy: 0.998641304347826\n",
      "EVALUATION with last weights -> Loss: 11891599.0, CrossEntropy: 1.8677791357040405, Accuracy: 0.8262460443037974\n",
      "Elapsed time for the training: 12.897871255874634\n",
      "Iter 716 / 2000, Loss: 129496241.65625, CrossEntropy: 0.0051823691464960575, Accuracy: 0.9983016304347826\n",
      "EVALUATION with last weights -> Loss: 12174843.0, CrossEntropy: 1.9369431734085083, Accuracy: 0.8192246835443038\n",
      "Elapsed time for the training: 12.806523084640503\n",
      "Iter 717 / 2000, Loss: 123595738.90625, CrossEntropy: 0.002820883644744754, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 11712327.0, CrossEntropy: 1.8645000457763672, Accuracy: 0.8258504746835443\n",
      "Elapsed time for the training: 12.83808946609497\n",
      "Iter 718 / 2000, Loss: 126106755.78125, CrossEntropy: 0.0038809471298009157, Accuracy: 0.9986892583120205\n",
      "EVALUATION with last weights -> Loss: 12570916.0, CrossEntropy: 1.9751996994018555, Accuracy: 0.8142800632911392\n",
      "Elapsed time for the training: 12.960114240646362\n",
      "Iter 719 / 2000, Loss: 126585730.40625, CrossEntropy: 0.00400758208706975, Accuracy: 0.9987811700767263\n",
      "EVALUATION with last weights -> Loss: 11681766.0, CrossEntropy: 1.835100769996643, Accuracy: 0.8277294303797469\n",
      "Elapsed time for the training: 12.926921129226685\n",
      "Iter 720 / 2000, Loss: 127872256.875, CrossEntropy: 0.004517298191785812, Accuracy: 0.9984215153452686\n",
      "EVALUATION with last weights -> Loss: 12047986.0, CrossEntropy: 1.9003204107284546, Accuracy: 0.8244659810126582\n",
      "Elapsed time for the training: 13.486264944076538\n",
      "Iter 721 / 2000, Loss: 126798489.0, CrossEntropy: 0.0040831719525158405, Accuracy: 0.998701246803069\n",
      "EVALUATION with last weights -> Loss: 12963132.0, CrossEntropy: 2.057851791381836, Accuracy: 0.8117088607594937\n",
      "Elapsed time for the training: 12.542586088180542\n",
      "Iter 722 / 2000, Loss: 127794890.25, CrossEntropy: 0.004476808477193117, Accuracy: 0.9983415920716112\n",
      "EVALUATION with last weights -> Loss: 12250726.0, CrossEntropy: 1.9586628675460815, Accuracy: 0.8193235759493671\n",
      "Elapsed time for the training: 12.874334573745728\n",
      "Iter 723 / 2000, Loss: 127434611.15625, CrossEntropy: 0.00432765856385231, Accuracy: 0.9987212276214834\n",
      "EVALUATION with last weights -> Loss: 11515652.0, CrossEntropy: 1.8066736459732056, Accuracy: 0.8271360759493671\n",
      "Elapsed time for the training: 12.359862327575684\n",
      "Iter 724 / 2000, Loss: 129120202.875, CrossEntropy: 0.0049971784465014935, Accuracy: 0.9984215153452686\n",
      "EVALUATION with last weights -> Loss: 12022843.0, CrossEntropy: 1.9094688892364502, Accuracy: 0.821004746835443\n",
      "Elapsed time for the training: 12.16029691696167\n",
      "Iter 725 / 2000, Loss: 128113550.0, CrossEntropy: 0.004589342512190342, Accuracy: 0.9985414002557544\n",
      "EVALUATION with last weights -> Loss: 11912922.0, CrossEntropy: 1.882866621017456, Accuracy: 0.8242681962025317\n",
      "Elapsed time for the training: 12.904070615768433\n",
      "Iter 726 / 2000, Loss: 127079602.84375, CrossEntropy: 0.004172191023826599, Accuracy: 0.9987212276214834\n",
      "EVALUATION with last weights -> Loss: 11930831.0, CrossEntropy: 1.8890676498413086, Accuracy: 0.8253560126582279\n",
      "Elapsed time for the training: 12.88789701461792\n",
      "Iter 727 / 2000, Loss: 123832146.71875, CrossEntropy: 0.002880292711779475, Accuracy: 0.9988610933503836\n",
      "EVALUATION with last weights -> Loss: 11624554.0, CrossEntropy: 1.8214147090911865, Accuracy: 0.8300039556962026\n",
      "Elapsed time for the training: 12.87208890914917\n",
      "Iter 728 / 2000, Loss: 129126649.46875, CrossEntropy: 0.004982172045856714, Accuracy: 0.9983016304347826\n",
      "EVALUATION with last weights -> Loss: 11646138.0, CrossEntropy: 1.8621461391448975, Accuracy: 0.8258504746835443\n",
      "Elapsed time for the training: 12.883501291275024\n",
      "Iter 729 / 2000, Loss: 124584980.40625, CrossEntropy: 0.003162970067933202, Accuracy: 0.9987412084398977\n",
      "EVALUATION with last weights -> Loss: 11963323.0, CrossEntropy: 1.8921432495117188, Accuracy: 0.8248615506329114\n",
      "Elapsed time for the training: 12.881493330001831\n",
      "Iter 730 / 2000, Loss: 125842409.875, CrossEntropy: 0.0036617135629057884, Accuracy: 0.9987412084398977\n",
      "EVALUATION with last weights -> Loss: 12262862.0, CrossEntropy: 1.9160722494125366, Accuracy: 0.8215981012658228\n",
      "Elapsed time for the training: 12.937341928482056\n",
      "Iter 731 / 2000, Loss: 128239303.0625, CrossEntropy: 0.004614864010363817, Accuracy: 0.9984614769820972\n",
      "EVALUATION with last weights -> Loss: 11940494.0, CrossEntropy: 1.8693628311157227, Accuracy: 0.8234770569620253\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time for the training: 12.888445377349854\n",
      "Iter 732 / 2000, Loss: 126834832.21875, CrossEntropy: 0.004054530989378691, Accuracy: 0.9986812659846548\n",
      "EVALUATION with last weights -> Loss: 12206121.0, CrossEntropy: 1.917378544807434, Accuracy: 0.823378164556962\n",
      "Elapsed time for the training: 12.98190188407898\n",
      "Iter 733 / 2000, Loss: 130079898.15625, CrossEntropy: 0.0053408509120345116, Accuracy: 0.9983016304347826\n",
      "EVALUATION with last weights -> Loss: 11437049.0, CrossEntropy: 1.7943024635314941, Accuracy: 0.8278283227848101\n",
      "Elapsed time for the training: 13.706898212432861\n",
      "Iter 734 / 2000, Loss: 124144243.375, CrossEntropy: 0.0029800559859722853, Accuracy: 0.9989290281329923\n",
      "EVALUATION with last weights -> Loss: 11435383.0, CrossEntropy: 1.8029781579971313, Accuracy: 0.8256526898734177\n",
      "Elapsed time for the training: 14.773590087890625\n",
      "Iter 735 / 2000, Loss: 128134322.125, CrossEntropy: 0.004561082925647497, Accuracy: 0.9984614769820972\n",
      "EVALUATION with last weights -> Loss: 11879802.0, CrossEntropy: 1.8818321228027344, Accuracy: 0.8265427215189873\n",
      "Elapsed time for the training: 12.888787269592285\n",
      "Iter 736 / 2000, Loss: 126636128.9375, CrossEntropy: 0.0039521255530416965, Accuracy: 0.9988011508951407\n",
      "EVALUATION with last weights -> Loss: 11979511.0, CrossEntropy: 1.8791782855987549, Accuracy: 0.8242681962025317\n",
      "Elapsed time for the training: 12.936903238296509\n",
      "Iter 737 / 2000, Loss: 126155415.4375, CrossEntropy: 0.0037559038028120995, Accuracy: 0.998761189258312\n",
      "EVALUATION with last weights -> Loss: 12169753.0, CrossEntropy: 1.9221141338348389, Accuracy: 0.8208069620253164\n",
      "Elapsed time for the training: 12.920943021774292\n",
      "Iter 738 / 2000, Loss: 125663775.0625, CrossEntropy: 0.0035552496556192636, Accuracy: 0.9986812659846548\n",
      "EVALUATION with last weights -> Loss: 11884135.0, CrossEntropy: 1.8649197816848755, Accuracy: 0.8291139240506329\n",
      "Elapsed time for the training: 12.867103099822998\n",
      "Iter 739 / 2000, Loss: 126730569.0, CrossEntropy: 0.003977288957685232, Accuracy: 0.9986013427109974\n",
      "EVALUATION with last weights -> Loss: 12436771.0, CrossEntropy: 1.9541468620300293, Accuracy: 0.8182357594936709\n",
      "Elapsed time for the training: 12.867620468139648\n",
      "Iter 740 / 2000, Loss: 124467623.40625, CrossEntropy: 0.003069092519581318, Accuracy: 0.998821131713555\n",
      "EVALUATION with last weights -> Loss: 11790074.0, CrossEntropy: 1.8712284564971924, Accuracy: 0.8263449367088608\n",
      "Elapsed time for the training: 12.088857173919678\n",
      "Iter 741 / 2000, Loss: 125851209.0625, CrossEntropy: 0.0036174992565065622, Accuracy: 0.9987212276214834\n",
      "EVALUATION with last weights -> Loss: 11799003.0, CrossEntropy: 1.8701417446136475, Accuracy: 0.8212025316455697\n",
      "Elapsed time for the training: 12.368539571762085\n",
      "Iter 742 / 2000, Loss: 127114670.625, CrossEntropy: 0.004118234384804964, Accuracy: 0.9985613810741688\n",
      "EVALUATION with last weights -> Loss: 11780303.0, CrossEntropy: 1.840685486793518, Accuracy: 0.8262460443037974\n",
      "Elapsed time for the training: 12.943852424621582\n",
      "Iter 743 / 2000, Loss: 124761108.75, CrossEntropy: 0.003173300065100193, Accuracy: 0.9987811700767263\n",
      "EVALUATION with last weights -> Loss: 11784744.0, CrossEntropy: 1.843739628791809, Accuracy: 0.8258504746835443\n",
      "Elapsed time for the training: 12.814894199371338\n",
      "Iter 744 / 2000, Loss: 126790689.625, CrossEntropy: 0.003981004469096661, Accuracy: 0.998761189258312\n",
      "EVALUATION with last weights -> Loss: 12195852.0, CrossEntropy: 1.9459065198898315, Accuracy: 0.8170490506329114\n",
      "Elapsed time for the training: 12.155287265777588\n",
      "Iter 745 / 2000, Loss: 126197166.65625, CrossEntropy: 0.0037399944849312305, Accuracy: 0.998821131713555\n",
      "EVALUATION with last weights -> Loss: 11806290.0, CrossEntropy: 1.8829610347747803, Accuracy: 0.8224881329113924\n",
      "Elapsed time for the training: 11.754679203033447\n",
      "Iter 746 / 2000, Loss: 129735148.8125, CrossEntropy: 0.005148373544216156, Accuracy: 0.9982416879795396\n",
      "EVALUATION with last weights -> Loss: 12881676.0, CrossEntropy: 2.0344185829162598, Accuracy: 0.8223892405063291\n",
      "Elapsed time for the training: 11.76407504081726\n",
      "Iter 747 / 2000, Loss: 125175322.40625, CrossEntropy: 0.0033405963331460953, Accuracy: 0.9989889705882353\n",
      "EVALUATION with last weights -> Loss: 14127689.0, CrossEntropy: 2.2086410522460938, Accuracy: 0.8035996835443038\n",
      "Elapsed time for the training: 11.77333950996399\n",
      "Iter 748 / 2000, Loss: 126087597.25, CrossEntropy: 0.00370571156963706, Accuracy: 0.9987891624040921\n",
      "EVALUATION with last weights -> Loss: 12264108.0, CrossEntropy: 1.928039789199829, Accuracy: 0.8208069620253164\n",
      "Elapsed time for the training: 12.51553201675415\n",
      "Iter 749 / 2000, Loss: 128350738.6875, CrossEntropy: 0.004581913817673922, Accuracy: 0.9985214194373402\n",
      "EVALUATION with last weights -> Loss: 11982079.0, CrossEntropy: 1.8930678367614746, Accuracy: 0.8217958860759493\n",
      "Elapsed time for the training: 13.076327800750732\n",
      "Iter 750 / 2000, Loss: 123630649.6875, CrossEntropy: 0.002691982313990593, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 11652074.0, CrossEntropy: 1.8442133665084839, Accuracy: 0.8253560126582279\n",
      "Elapsed time for the training: 13.105431318283081\n",
      "Iter 751 / 2000, Loss: 126621011.46875, CrossEntropy: 0.003886555088683963, Accuracy: 0.9987811700767263\n",
      "EVALUATION with last weights -> Loss: 11954362.0, CrossEntropy: 1.8849070072174072, Accuracy: 0.8170490506329114\n",
      "Elapsed time for the training: 13.181625127792358\n",
      "Iter 752 / 2000, Loss: 124219571.75, CrossEntropy: 0.0029179914854466915, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 12116019.0, CrossEntropy: 1.9145528078079224, Accuracy: 0.8240704113924051\n",
      "Elapsed time for the training: 12.858749628067017\n",
      "Iter 753 / 2000, Loss: 129147654.59375, CrossEntropy: 0.004979327321052551, Accuracy: 0.9984894501278773\n",
      "EVALUATION with last weights -> Loss: 11608157.0, CrossEntropy: 1.8310246467590332, Accuracy: 0.8197191455696202\n",
      "Elapsed time for the training: 12.867862701416016\n",
      "Iter 754 / 2000, Loss: 127161344.09375, CrossEntropy: 0.004085130523890257, Accuracy: 0.9986013427109974\n",
      "EVALUATION with last weights -> Loss: 12242282.0, CrossEntropy: 1.9163235425949097, Accuracy: 0.823378164556962\n",
      "Elapsed time for the training: 13.657714605331421\n",
      "Iter 755 / 2000, Loss: 128286817.125, CrossEntropy: 0.00453025521710515, Accuracy: 0.9984614769820972\n",
      "EVALUATION with last weights -> Loss: 11606276.0, CrossEntropy: 1.8690394163131714, Accuracy: 0.8259493670886076\n",
      "Elapsed time for the training: 15.069874048233032\n",
      "Iter 756 / 2000, Loss: 126077768.59375, CrossEntropy: 0.0036426959559321404, Accuracy: 0.998701246803069\n",
      "EVALUATION with last weights -> Loss: 11418913.0, CrossEntropy: 1.7855215072631836, Accuracy: 0.8279272151898734\n",
      "Elapsed time for the training: 12.88955283164978\n",
      "Iter 757 / 2000, Loss: 125143675.625, CrossEntropy: 0.0033073867671191692, Accuracy: 0.9989490089514067\n",
      "EVALUATION with last weights -> Loss: 12278508.0, CrossEntropy: 1.9316682815551758, Accuracy: 0.8200158227848101\n",
      "Elapsed time for the training: 12.856171607971191\n",
      "Iter 758 / 2000, Loss: 129364331.15625, CrossEntropy: 0.004948765505105257, Accuracy: 0.99838155370844\n",
      "EVALUATION with last weights -> Loss: 11947547.0, CrossEntropy: 1.8692907094955444, Accuracy: 0.8253560126582279\n",
      "Elapsed time for the training: 12.876834630966187\n",
      "Iter 759 / 2000, Loss: 126167423.875, CrossEntropy: 0.0036777863278985023, Accuracy: 0.9987412084398977\n",
      "EVALUATION with last weights -> Loss: 11801183.0, CrossEntropy: 1.8573923110961914, Accuracy: 0.8228837025316456\n",
      "Elapsed time for the training: 12.884891748428345\n",
      "Iter 760 / 2000, Loss: 125385760.59375, CrossEntropy: 0.0033534234389662743, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 12290820.0, CrossEntropy: 1.972809910774231, Accuracy: 0.8190268987341772\n",
      "Elapsed time for the training: 12.917896509170532\n",
      "Iter 761 / 2000, Loss: 130132268.71875, CrossEntropy: 0.005242332816123962, Accuracy: 0.9984414961636828\n",
      "EVALUATION with last weights -> Loss: 12245183.0, CrossEntropy: 1.9232429265975952, Accuracy: 0.8211036392405063\n",
      "Elapsed time for the training: 12.898440599441528\n",
      "Iter 762 / 2000, Loss: 125961289.75, CrossEntropy: 0.003575763665139675, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 11960977.0, CrossEntropy: 1.8941776752471924, Accuracy: 0.8216969936708861\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time for the training: 12.955370664596558\n",
      "Iter 763 / 2000, Loss: 126842824.0, CrossEntropy: 0.003919424954801798, Accuracy: 0.998821131713555\n",
      "EVALUATION with last weights -> Loss: 12477286.0, CrossEntropy: 1.9854798316955566, Accuracy: 0.8211036392405063\n",
      "Elapsed time for the training: 12.862076759338379\n",
      "Iter 764 / 2000, Loss: 128642371.375, CrossEntropy: 0.004634075332432985, Accuracy: 0.9985613810741688\n",
      "EVALUATION with last weights -> Loss: 12722571.0, CrossEntropy: 2.010819911956787, Accuracy: 0.8213014240506329\n",
      "Elapsed time for the training: 12.910157680511475\n",
      "Iter 765 / 2000, Loss: 128724286.0625, CrossEntropy: 0.004662859253585339, Accuracy: 0.9984414961636828\n",
      "EVALUATION with last weights -> Loss: 11889265.0, CrossEntropy: 1.865147352218628, Accuracy: 0.8229825949367089\n",
      "Elapsed time for the training: 12.908439874649048\n",
      "Iter 766 / 2000, Loss: 126678624.03125, CrossEntropy: 0.00385273271240294, Accuracy: 0.9986892583120205\n",
      "EVALUATION with last weights -> Loss: 11594819.0, CrossEntropy: 1.8251986503601074, Accuracy: 0.8271360759493671\n",
      "Elapsed time for the training: 13.746599435806274\n",
      "Iter 767 / 2000, Loss: 125333821.875, CrossEntropy: 0.0033161297906190157, Accuracy: 0.9988491048593351\n",
      "EVALUATION with last weights -> Loss: 12168836.0, CrossEntropy: 1.9241325855255127, Accuracy: 0.813192246835443\n",
      "Elapsed time for the training: 13.027383804321289\n",
      "Iter 768 / 2000, Loss: 126475787.65625, CrossEntropy: 0.0037513626739382744, Accuracy: 0.9988810741687979\n",
      "EVALUATION with last weights -> Loss: 12247253.0, CrossEntropy: 1.9313864707946777, Accuracy: 0.8248615506329114\n",
      "Elapsed time for the training: 12.906970977783203\n",
      "Iter 769 / 2000, Loss: 126505903.34375, CrossEntropy: 0.003815355245023966, Accuracy: 0.9989090473145781\n",
      "EVALUATION with last weights -> Loss: 12440843.0, CrossEntropy: 1.9635686874389648, Accuracy: 0.8168512658227848\n",
      "Elapsed time for the training: 12.905131816864014\n",
      "Iter 770 / 2000, Loss: 127335727.25, CrossEntropy: 0.004086664877831936, Accuracy: 0.9987212276214834\n",
      "EVALUATION with last weights -> Loss: 11929981.0, CrossEntropy: 1.8912386894226074, Accuracy: 0.8216969936708861\n",
      "Elapsed time for the training: 14.200092315673828\n",
      "Iter 771 / 2000, Loss: 127087886.40625, CrossEntropy: 0.003989154007285833, Accuracy: 0.9985813618925832\n",
      "EVALUATION with last weights -> Loss: 12229374.0, CrossEntropy: 1.9325975179672241, Accuracy: 0.8151700949367089\n",
      "Elapsed time for the training: 15.598488569259644\n",
      "Iter 772 / 2000, Loss: 126165985.25, CrossEntropy: 0.0036106929183006287, Accuracy: 0.998821131713555\n",
      "EVALUATION with last weights -> Loss: 12086987.0, CrossEntropy: 1.9139821529388428, Accuracy: 0.8225870253164557\n",
      "Elapsed time for the training: 12.941306352615356\n",
      "Iter 773 / 2000, Loss: 128406683.0, CrossEntropy: 0.004500962793827057, Accuracy: 0.9984814578005116\n",
      "EVALUATION with last weights -> Loss: 12801354.0, CrossEntropy: 2.025935411453247, Accuracy: 0.8169501582278481\n",
      "Elapsed time for the training: 12.891548871994019\n",
      "Iter 774 / 2000, Loss: 125615723.46875, CrossEntropy: 0.003398210508748889, Accuracy: 0.9989290281329923\n",
      "EVALUATION with last weights -> Loss: 12110593.0, CrossEntropy: 1.9086415767669678, Accuracy: 0.8244659810126582\n",
      "Elapsed time for the training: 12.88990068435669\n",
      "Iter 775 / 2000, Loss: 125818493.34375, CrossEntropy: 0.0034636182244867086, Accuracy: 0.9987412084398977\n",
      "EVALUATION with last weights -> Loss: 12301781.0, CrossEntropy: 1.9296839237213135, Accuracy: 0.8252571202531646\n",
      "Elapsed time for the training: 12.8804771900177\n",
      "Iter 776 / 2000, Loss: 127828040.75, CrossEntropy: 0.0042636338621377945, Accuracy: 0.998701246803069\n",
      "EVALUATION with last weights -> Loss: 12133098.0, CrossEntropy: 1.9311507940292358, Accuracy: 0.8239715189873418\n",
      "Elapsed time for the training: 12.882840394973755\n",
      "Iter 777 / 2000, Loss: 128891635.375, CrossEntropy: 0.004678206518292427, Accuracy: 0.9985613810741688\n",
      "EVALUATION with last weights -> Loss: 12175347.0, CrossEntropy: 1.9136316776275635, Accuracy: 0.8278283227848101\n",
      "Elapsed time for the training: 12.878162622451782\n",
      "Iter 778 / 2000, Loss: 126705175.75, CrossEntropy: 0.0038130718749016523, Accuracy: 0.9987092391304349\n",
      "EVALUATION with last weights -> Loss: 11986838.0, CrossEntropy: 1.8983246088027954, Accuracy: 0.8225870253164557\n",
      "Elapsed time for the training: 12.96839451789856\n",
      "Iter 779 / 2000, Loss: 123308107.0, CrossEntropy: 0.0024416069500148296, Accuracy: 0.9991208439897699\n",
      "EVALUATION with last weights -> Loss: 12969957.0, CrossEntropy: 2.0460634231567383, Accuracy: 0.814873417721519\n",
      "Elapsed time for the training: 12.897940635681152\n",
      "Iter 780 / 2000, Loss: 129145934.375, CrossEntropy: 0.004767491947859526, Accuracy: 0.9986013427109974\n",
      "EVALUATION with last weights -> Loss: 12133747.0, CrossEntropy: 1.9126611948013306, Accuracy: 0.8220925632911392\n",
      "Elapsed time for the training: 12.912555932998657\n",
      "Iter 781 / 2000, Loss: 126891345.71875, CrossEntropy: 0.003861289471387863, Accuracy: 0.998761189258312\n",
      "EVALUATION with last weights -> Loss: 12305185.0, CrossEntropy: 1.9391942024230957, Accuracy: 0.8228837025316456\n",
      "Elapsed time for the training: 12.996130466461182\n",
      "Iter 782 / 2000, Loss: 126759867.03125, CrossEntropy: 0.0038047980051487684, Accuracy: 0.998821131713555\n",
      "EVALUATION with last weights -> Loss: 11743834.0, CrossEntropy: 1.845982313156128, Accuracy: 0.8230814873417721\n",
      "Elapsed time for the training: 12.656459093093872\n",
      "Iter 783 / 2000, Loss: 126345040.6875, CrossEntropy: 0.0036350851878523827, Accuracy: 0.9989210358056266\n",
      "EVALUATION with last weights -> Loss: 12165638.0, CrossEntropy: 1.9263432025909424, Accuracy: 0.8197191455696202\n",
      "Elapsed time for the training: 12.915562391281128\n",
      "Iter 784 / 2000, Loss: 130255304.53125, CrossEntropy: 0.0051932851783931255, Accuracy: 0.998321611253197\n",
      "EVALUATION with last weights -> Loss: 11775901.0, CrossEntropy: 1.8544591665267944, Accuracy: 0.8171479430379747\n",
      "Elapsed time for the training: 12.96773076057434\n",
      "Iter 785 / 2000, Loss: 126235160.34375, CrossEntropy: 0.0035817704629153013, Accuracy: 0.9988011508951407\n",
      "EVALUATION with last weights -> Loss: 11789208.0, CrossEntropy: 1.8460006713867188, Accuracy: 0.8269382911392406\n",
      "Elapsed time for the training: 12.945523262023926\n",
      "Iter 786 / 2000, Loss: 126984014.90625, CrossEntropy: 0.003876923583447933, Accuracy: 0.998701246803069\n",
      "EVALUATION with last weights -> Loss: 11981113.0, CrossEntropy: 1.9013444185256958, Accuracy: 0.821993670886076\n",
      "Elapsed time for the training: 12.851496696472168\n",
      "Iter 787 / 2000, Loss: 127239159.75, CrossEntropy: 0.0039743990637362, Accuracy: 0.9986612851662404\n",
      "EVALUATION with last weights -> Loss: 12091268.0, CrossEntropy: 1.8907948732376099, Accuracy: 0.8218947784810127\n",
      "Elapsed time for the training: 12.200953245162964\n",
      "Iter 788 / 2000, Loss: 123396590.6875, CrossEntropy: 0.0024591872934252024, Accuracy: 0.9991368286445014\n",
      "EVALUATION with last weights -> Loss: 13520375.0, CrossEntropy: 2.1346945762634277, Accuracy: 0.8164556962025317\n",
      "Elapsed time for the training: 12.985320091247559\n",
      "Iter 789 / 2000, Loss: 127661268.875, CrossEntropy: 0.004142004530876875, Accuracy: 0.9986213235294118\n",
      "EVALUATION with last weights -> Loss: 12172959.0, CrossEntropy: 1.9231635332107544, Accuracy: 0.8212025316455697\n",
      "Elapsed time for the training: 12.920825719833374\n",
      "Iter 790 / 2000, Loss: 125043248.03125, CrossEntropy: 0.0030854090582579374, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 11889068.0, CrossEntropy: 1.864317536354065, Accuracy: 0.8241693037974683\n",
      "Elapsed time for the training: 12.956162691116333\n",
      "Iter 791 / 2000, Loss: 127951391.84375, CrossEntropy: 0.004242636729031801, Accuracy: 0.9985214194373402\n",
      "EVALUATION with last weights -> Loss: 12036136.0, CrossEntropy: 1.8806787729263306, Accuracy: 0.8218947784810127\n",
      "Elapsed time for the training: 12.918896675109863\n",
      "Iter 792 / 2000, Loss: 127713404.03125, CrossEntropy: 0.00414347043260932, Accuracy: 0.9986612851662404\n",
      "EVALUATION with last weights -> Loss: 11941964.0, CrossEntropy: 1.868830680847168, Accuracy: 0.8243670886075949\n",
      "Elapsed time for the training: 12.88506555557251\n",
      "Iter 793 / 2000, Loss: 128268048.0, CrossEntropy: 0.004361231345683336, Accuracy: 0.9986812659846548\n",
      "EVALUATION with last weights -> Loss: 11916975.0, CrossEntropy: 1.8964049816131592, Accuracy: 0.8237737341772152\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time for the training: 12.884479522705078\n",
      "Iter 794 / 2000, Loss: 128350129.40625, CrossEntropy: 0.004390512127429247, Accuracy: 0.9985014386189258\n",
      "EVALUATION with last weights -> Loss: 12113662.0, CrossEntropy: 1.9143747091293335, Accuracy: 0.821993670886076\n",
      "Elapsed time for the training: 12.863529682159424\n",
      "Iter 795 / 2000, Loss: 126859887.75, CrossEntropy: 0.0037900512106716633, Accuracy: 0.9987412084398977\n",
      "EVALUATION with last weights -> Loss: 12303941.0, CrossEntropy: 1.959233045578003, Accuracy: 0.8175435126582279\n",
      "Elapsed time for the training: 12.869559288024902\n",
      "Iter 796 / 2000, Loss: 124972657.46875, CrossEntropy: 0.0030324235558509827, Accuracy: 0.9988810741687979\n",
      "EVALUATION with last weights -> Loss: 12064343.0, CrossEntropy: 1.8916887044906616, Accuracy: 0.8238726265822784\n",
      "Elapsed time for the training: 12.904685735702515\n",
      "Iter 797 / 2000, Loss: 128790135.625, CrossEntropy: 0.004552869126200676, Accuracy: 0.9985813618925832\n",
      "EVALUATION with last weights -> Loss: 12658065.0, CrossEntropy: 2.015626907348633, Accuracy: 0.8142800632911392\n",
      "Elapsed time for the training: 12.86070966720581\n",
      "Iter 798 / 2000, Loss: 125688949.0, CrossEntropy: 0.0033348044380545616, Accuracy: 0.9989290281329923\n",
      "EVALUATION with last weights -> Loss: 12223169.0, CrossEntropy: 1.9217702150344849, Accuracy: 0.8188291139240507\n",
      "Elapsed time for the training: 12.91127634048462\n",
      "Iter 799 / 2000, Loss: 126910390.09375, CrossEntropy: 0.003793512936681509, Accuracy: 0.9988411125319693\n",
      "EVALUATION with last weights -> Loss: 12170811.0, CrossEntropy: 1.9021016359329224, Accuracy: 0.8205102848101266\n",
      "Elapsed time for the training: 12.87924599647522\n",
      "Iter 800 / 2000, Loss: 128293330.90625, CrossEntropy: 0.004342048428952694, Accuracy: 0.9985813618925832\n",
      "EVALUATION with last weights -> Loss: 11669243.0, CrossEntropy: 1.8438730239868164, Accuracy: 0.8255537974683544\n",
      "Elapsed time for the training: 12.893892288208008\n",
      "Iter 801 / 2000, Loss: 127032230.625, CrossEntropy: 0.003849851666018367, Accuracy: 0.9986692774936061\n",
      "EVALUATION with last weights -> Loss: 12304584.0, CrossEntropy: 1.9423075914382935, Accuracy: 0.8225870253164557\n",
      "Elapsed time for the training: 12.919857501983643\n",
      "Iter 802 / 2000, Loss: 125532050.90625, CrossEntropy: 0.0032312676776200533, Accuracy: 0.998701246803069\n",
      "EVALUATION with last weights -> Loss: 12130465.0, CrossEntropy: 1.9133179187774658, Accuracy: 0.8248615506329114\n",
      "Elapsed time for the training: 12.88464093208313\n",
      "Iter 803 / 2000, Loss: 127225168.625, CrossEntropy: 0.00390366418287158, Accuracy: 0.998821131713555\n",
      "EVALUATION with last weights -> Loss: 12008038.0, CrossEntropy: 1.88638174533844, Accuracy: 0.8266416139240507\n",
      "Elapsed time for the training: 12.869542360305786\n",
      "Iter 804 / 2000, Loss: 126515936.8125, CrossEntropy: 0.0036183358170092106, Accuracy: 0.9988610933503836\n",
      "EVALUATION with last weights -> Loss: 13006932.0, CrossEntropy: 2.0386314392089844, Accuracy: 0.8205102848101266\n",
      "Elapsed time for the training: 12.871107339859009\n",
      "Iter 805 / 2000, Loss: 125463888.71875, CrossEntropy: 0.0031923255883157253, Accuracy: 0.9989010549872123\n",
      "EVALUATION with last weights -> Loss: 11800036.0, CrossEntropy: 1.883660912513733, Accuracy: 0.8252571202531646\n",
      "Elapsed time for the training: 12.796992301940918\n",
      "Iter 806 / 2000, Loss: 128415227.53125, CrossEntropy: 0.004424309358000755, Accuracy: 0.9986093350383632\n",
      "EVALUATION with last weights -> Loss: 12792787.0, CrossEntropy: 2.0556912422180176, Accuracy: 0.8158623417721519\n",
      "Elapsed time for the training: 12.883115768432617\n",
      "Iter 807 / 2000, Loss: 126862668.15625, CrossEntropy: 0.003788137575611472, Accuracy: 0.9989370204603581\n",
      "EVALUATION with last weights -> Loss: 12473589.0, CrossEntropy: 1.9790863990783691, Accuracy: 0.8107199367088608\n",
      "Elapsed time for the training: 12.88348913192749\n",
      "Iter 808 / 2000, Loss: 126497030.125, CrossEntropy: 0.0035933954641222954, Accuracy: 0.9987412084398977\n",
      "EVALUATION with last weights -> Loss: 11917510.0, CrossEntropy: 1.8741921186447144, Accuracy: 0.8207080696202531\n",
      "Elapsed time for the training: 12.938444375991821\n",
      "Iter 809 / 2000, Loss: 127344546.65625, CrossEntropy: 0.003930721897631884, Accuracy: 0.9988610933503836\n",
      "EVALUATION with last weights -> Loss: 12287571.0, CrossEntropy: 1.9369449615478516, Accuracy: 0.8189280063291139\n",
      "Elapsed time for the training: 12.921330690383911\n",
      "Iter 810 / 2000, Loss: 126604947.09375, CrossEntropy: 0.003628783393651247, Accuracy: 0.9988411125319693\n",
      "EVALUATION with last weights -> Loss: 11967364.0, CrossEntropy: 1.8952306509017944, Accuracy: 0.8242681962025317\n",
      "Elapsed time for the training: 12.908627033233643\n",
      "Iter 811 / 2000, Loss: 126516219.125, CrossEntropy: 0.0035885327961295843, Accuracy: 0.998761189258312\n",
      "EVALUATION with last weights -> Loss: 12030216.0, CrossEntropy: 1.8979811668395996, Accuracy: 0.8152689873417721\n",
      "Elapsed time for the training: 12.929833173751831\n",
      "Iter 812 / 2000, Loss: 127665179.65625, CrossEntropy: 0.004043931141495705, Accuracy: 0.9988011508951407\n",
      "EVALUATION with last weights -> Loss: 12738965.0, CrossEntropy: 2.002017021179199, Accuracy: 0.8136867088607594\n",
      "Elapsed time for the training: 12.909550428390503\n",
      "Iter 813 / 2000, Loss: 124091949.875, CrossEntropy: 0.0026118855457752943, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 12089072.0, CrossEntropy: 1.9264014959335327, Accuracy: 0.8203125\n",
      "Elapsed time for the training: 13.82929801940918\n",
      "Iter 814 / 2000, Loss: 127160614.625, CrossEntropy: 0.0038388140965253115, Accuracy: 0.9988610933503836\n",
      "EVALUATION with last weights -> Loss: 12765860.0, CrossEntropy: 2.0026097297668457, Accuracy: 0.8177412974683544\n",
      "Elapsed time for the training: 13.902004957199097\n",
      "Iter 815 / 2000, Loss: 127695372.34375, CrossEntropy: 0.004045259673148394, Accuracy: 0.9986013427109974\n",
      "EVALUATION with last weights -> Loss: 12153091.0, CrossEntropy: 1.9223968982696533, Accuracy: 0.8198180379746836\n",
      "Elapsed time for the training: 13.153138637542725\n",
      "Iter 816 / 2000, Loss: 125247240.03125, CrossEntropy: 0.0030629585962742567, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 13289214.0, CrossEntropy: 2.1048455238342285, Accuracy: 0.817939082278481\n",
      "Elapsed time for the training: 12.926145076751709\n",
      "Iter 817 / 2000, Loss: 128008117.65625, CrossEntropy: 0.004162628669291735, Accuracy: 0.9986013427109974\n",
      "EVALUATION with last weights -> Loss: 12035499.0, CrossEntropy: 1.9063761234283447, Accuracy: 0.8243670886075949\n",
      "Elapsed time for the training: 15.497516870498657\n",
      "Iter 818 / 2000, Loss: 125139587.125, CrossEntropy: 0.0030166644137352705, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 12714873.0, CrossEntropy: 1.9926592111587524, Accuracy: 0.8175435126582279\n",
      "Elapsed time for the training: 15.635068655014038\n",
      "Iter 819 / 2000, Loss: 128500852.34375, CrossEntropy: 0.004350417293608189, Accuracy: 0.9985014386189258\n",
      "EVALUATION with last weights -> Loss: 12211364.0, CrossEntropy: 1.9356448650360107, Accuracy: 0.8197191455696202\n",
      "Elapsed time for the training: 15.621973037719727\n",
      "Iter 820 / 2000, Loss: 124310775.90625, CrossEntropy: 0.0026719800662249327, Accuracy: 0.9990409207161125\n",
      "EVALUATION with last weights -> Loss: 12146637.0, CrossEntropy: 1.9268097877502441, Accuracy: 0.8275316455696202\n",
      "Elapsed time for the training: 13.99243712425232\n",
      "Iter 821 / 2000, Loss: 125823306.84375, CrossEntropy: 0.0032730770763009787, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 12219935.0, CrossEntropy: 1.916801929473877, Accuracy: 0.8227848101265823\n",
      "Elapsed time for the training: 12.912142276763916\n",
      "Iter 822 / 2000, Loss: 126260777.40625, CrossEntropy: 0.003445475362241268, Accuracy: 0.9990409207161125\n",
      "EVALUATION with last weights -> Loss: 12629102.0, CrossEntropy: 1.9870872497558594, Accuracy: 0.8189280063291139\n",
      "Elapsed time for the training: 12.901221990585327\n",
      "Iter 823 / 2000, Loss: 125646794.90625, CrossEntropy: 0.0031944678630679846, Accuracy: 0.9988810741687979\n",
      "EVALUATION with last weights -> Loss: 12038074.0, CrossEntropy: 1.8844538927078247, Accuracy: 0.8239715189873418\n",
      "Elapsed time for the training: 12.88072419166565\n",
      "Iter 824 / 2000, Loss: 127402916.71875, CrossEntropy: 0.003891459433361888, Accuracy: 0.9986612851662404\n",
      "EVALUATION with last weights -> Loss: 12184454.0, CrossEntropy: 1.9489368200302124, Accuracy: 0.8189280063291139\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time for the training: 12.814366579055786\n",
      "Iter 825 / 2000, Loss: 127105388.15625, CrossEntropy: 0.003769249888136983, Accuracy: 0.9986812659846548\n",
      "EVALUATION with last weights -> Loss: 13395643.0, CrossEntropy: 2.117314338684082, Accuracy: 0.8123022151898734\n",
      "Elapsed time for the training: 12.862043857574463\n",
      "Iter 826 / 2000, Loss: 127743087.59375, CrossEntropy: 0.004019101615995169, Accuracy: 0.9986013427109974\n",
      "EVALUATION with last weights -> Loss: 12060294.0, CrossEntropy: 1.9348175525665283, Accuracy: 0.8248615506329114\n",
      "Elapsed time for the training: 12.916615962982178\n",
      "Iter 827 / 2000, Loss: 127462719.375, CrossEntropy: 0.0039687673561275005, Accuracy: 0.9986692774936061\n",
      "EVALUATION with last weights -> Loss: 12116854.0, CrossEntropy: 1.9122848510742188, Accuracy: 0.8217958860759493\n",
      "Elapsed time for the training: 13.016661405563354\n",
      "Iter 828 / 2000, Loss: 124451394.75, CrossEntropy: 0.0026968892198055983, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 11757739.0, CrossEntropy: 1.8449974060058594, Accuracy: 0.8262460443037974\n",
      "Elapsed time for the training: 12.933317422866821\n",
      "Iter 829 / 2000, Loss: 134262040.59375, CrossEntropy: 0.006655547302216291, Accuracy: 0.9979899296675192\n",
      "EVALUATION with last weights -> Loss: 12373213.0, CrossEntropy: 1.9574207067489624, Accuracy: 0.8201147151898734\n",
      "Elapsed time for the training: 12.9156334400177\n",
      "Iter 830 / 2000, Loss: 124515941.25, CrossEntropy: 0.0027136376593261957, Accuracy: 0.9990409207161125\n",
      "EVALUATION with last weights -> Loss: 11970672.0, CrossEntropy: 1.872120976448059, Accuracy: 0.8266416139240507\n",
      "Elapsed time for the training: 12.91188645362854\n",
      "Iter 831 / 2000, Loss: 127257836.53125, CrossEntropy: 0.0038252202793955803, Accuracy: 0.9987292199488491\n",
      "EVALUATION with last weights -> Loss: 12823530.0, CrossEntropy: 2.0061960220336914, Accuracy: 0.8197191455696202\n",
      "Elapsed time for the training: 13.245946407318115\n",
      "Iter 832 / 2000, Loss: 126843094.21875, CrossEntropy: 0.0036383296828716993, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 12308322.0, CrossEntropy: 1.9232017993927002, Accuracy: 0.8258504746835443\n",
      "Elapsed time for the training: 12.94297170639038\n",
      "Iter 833 / 2000, Loss: 128580032.8125, CrossEntropy: 0.004326422233134508, Accuracy: 0.9985414002557544\n",
      "EVALUATION with last weights -> Loss: 12577587.0, CrossEntropy: 2.0098624229431152, Accuracy: 0.8194224683544303\n",
      "Elapsed time for the training: 12.69847846031189\n",
      "Iter 834 / 2000, Loss: 127170851.4375, CrossEntropy: 0.003759371116757393, Accuracy: 0.9986213235294118\n",
      "EVALUATION with last weights -> Loss: 11700820.0, CrossEntropy: 1.8336960077285767, Accuracy: 0.8260482594936709\n",
      "Elapsed time for the training: 12.401989459991455\n",
      "Iter 835 / 2000, Loss: 126690967.0625, CrossEntropy: 0.0036208494566380978, Accuracy: 0.9989090473145781\n",
      "EVALUATION with last weights -> Loss: 12375549.0, CrossEntropy: 1.9545342922210693, Accuracy: 0.8228837025316456\n",
      "Elapsed time for the training: 14.767226934432983\n",
      "Iter 836 / 2000, Loss: 126107341.8125, CrossEntropy: 0.003325776429846883, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 12003733.0, CrossEntropy: 1.8926692008972168, Accuracy: 0.8191257911392406\n",
      "Elapsed time for the training: 13.663556098937988\n",
      "Iter 837 / 2000, Loss: 130103631.875, CrossEntropy: 0.004919830709695816, Accuracy: 0.9983415920716112\n",
      "EVALUATION with last weights -> Loss: 12385667.0, CrossEntropy: 1.9851394891738892, Accuracy: 0.8236748417721519\n",
      "Elapsed time for the training: 13.695361852645874\n",
      "Iter 838 / 2000, Loss: 125549698.40625, CrossEntropy: 0.003094507846981287, Accuracy: 0.9989410166240409\n",
      "EVALUATION with last weights -> Loss: 12371982.0, CrossEntropy: 1.9695943593978882, Accuracy: 0.8163568037974683\n",
      "Elapsed time for the training: 13.787898540496826\n",
      "Iter 839 / 2000, Loss: 126632061.0625, CrossEntropy: 0.0035243912134319544, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 12859537.0, CrossEntropy: 2.0477135181427, Accuracy: 0.8146756329113924\n",
      "Elapsed time for the training: 12.918684244155884\n",
      "Iter 840 / 2000, Loss: 126705707.125, CrossEntropy: 0.003550422377884388, Accuracy: 0.9988411125319693\n",
      "EVALUATION with last weights -> Loss: 12586780.0, CrossEntropy: 1.9864293336868286, Accuracy: 0.8189280063291139\n",
      "Elapsed time for the training: 12.888389587402344\n",
      "Iter 841 / 2000, Loss: 127003395.3125, CrossEntropy: 0.0037013490218669176, Accuracy: 0.9988770780051152\n",
      "EVALUATION with last weights -> Loss: 13020142.0, CrossEntropy: 2.0482888221740723, Accuracy: 0.8208069620253164\n",
      "Elapsed time for the training: 12.900573253631592\n",
      "Iter 842 / 2000, Loss: 126167345.46875, CrossEntropy: 0.0033258474431931973, Accuracy: 0.998821131713555\n",
      "EVALUATION with last weights -> Loss: 12190581.0, CrossEntropy: 1.928652286529541, Accuracy: 0.8200158227848101\n",
      "Elapsed time for the training: 12.899906635284424\n",
      "Iter 843 / 2000, Loss: 127046597.59375, CrossEntropy: 0.0036723867524415255, Accuracy: 0.998761189258312\n",
      "EVALUATION with last weights -> Loss: 12108004.0, CrossEntropy: 1.9120498895645142, Accuracy: 0.8218947784810127\n",
      "Elapsed time for the training: 12.943853855133057\n",
      "Iter 844 / 2000, Loss: 126795304.59375, CrossEntropy: 0.0035682092420756817, Accuracy: 0.9987811700767263\n",
      "EVALUATION with last weights -> Loss: 12683435.0, CrossEntropy: 1.9992821216583252, Accuracy: 0.817939082278481\n",
      "Elapsed time for the training: 12.923596620559692\n",
      "Iter 845 / 2000, Loss: 128925729.46875, CrossEntropy: 0.004415260627865791, Accuracy: 0.998701246803069\n",
      "EVALUATION with last weights -> Loss: 12404738.0, CrossEntropy: 1.9700185060501099, Accuracy: 0.8223892405063291\n",
      "Elapsed time for the training: 14.975237131118774\n",
      "Iter 846 / 2000, Loss: 124441608.34375, CrossEntropy: 0.002654057927429676, Accuracy: 0.9989090473145781\n",
      "EVALUATION with last weights -> Loss: 12183539.0, CrossEntropy: 1.9449635744094849, Accuracy: 0.8211036392405063\n",
      "Elapsed time for the training: 13.31549596786499\n",
      "Iter 847 / 2000, Loss: 128677445.28125, CrossEntropy: 0.004359875805675983, Accuracy: 0.9987571930946292\n",
      "EVALUATION with last weights -> Loss: 13048088.0, CrossEntropy: 2.063403606414795, Accuracy: 0.8128955696202531\n",
      "Elapsed time for the training: 12.84921145439148\n",
      "Iter 848 / 2000, Loss: 126863030.28125, CrossEntropy: 0.003578896401450038, Accuracy: 0.9986213235294118\n",
      "EVALUATION with last weights -> Loss: 12206783.0, CrossEntropy: 1.9076430797576904, Accuracy: 0.8268393987341772\n",
      "Elapsed time for the training: 13.473555564880371\n",
      "Iter 849 / 2000, Loss: 127407739.78125, CrossEntropy: 0.0038463319651782513, Accuracy: 0.9988690856777493\n",
      "EVALUATION with last weights -> Loss: 13226061.0, CrossEntropy: 2.1023449897766113, Accuracy: 0.8092365506329114\n",
      "Elapsed time for the training: 12.933034181594849\n",
      "Iter 850 / 2000, Loss: 128998598.03125, CrossEntropy: 0.004424256272614002, Accuracy: 0.9984015345268542\n",
      "EVALUATION with last weights -> Loss: 12150937.0, CrossEntropy: 1.948937177658081, Accuracy: 0.8174446202531646\n",
      "Elapsed time for the training: 12.891316413879395\n",
      "Iter 851 / 2000, Loss: 124398761.59375, CrossEntropy: 0.002582352375611663, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 12609039.0, CrossEntropy: 1.980096459388733, Accuracy: 0.818631329113924\n",
      "Elapsed time for the training: 12.934551239013672\n",
      "Iter 852 / 2000, Loss: 128683728.6875, CrossEntropy: 0.004292303696274757, Accuracy: 0.9985414002557544\n",
      "EVALUATION with last weights -> Loss: 12658353.0, CrossEntropy: 1.98142409324646, Accuracy: 0.8175435126582279\n",
      "Elapsed time for the training: 12.919766902923584\n",
      "Iter 853 / 2000, Loss: 128403607.4375, CrossEntropy: 0.004174669738858938, Accuracy: 0.9986612851662404\n",
      "EVALUATION with last weights -> Loss: 12704690.0, CrossEntropy: 1.995874047279358, Accuracy: 0.819620253164557\n",
      "Elapsed time for the training: 12.945780277252197\n",
      "Iter 854 / 2000, Loss: 126236954.375, CrossEntropy: 0.0033045385498553514, Accuracy: 0.998821131713555\n",
      "EVALUATION with last weights -> Loss: 12213991.0, CrossEntropy: 1.9675896167755127, Accuracy: 0.8241693037974683\n",
      "Elapsed time for the training: 14.309255123138428\n",
      "Iter 855 / 2000, Loss: 124637259.6875, CrossEntropy: 0.0026640561409294605, Accuracy: 0.9989410166240409\n",
      "EVALUATION with last weights -> Loss: 12973521.0, CrossEntropy: 2.0761663913726807, Accuracy: 0.815565664556962\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time for the training: 15.524774312973022\n",
      "Iter 856 / 2000, Loss: 129358811.9375, CrossEntropy: 0.004546657204627991, Accuracy: 0.9986812659846548\n",
      "EVALUATION with last weights -> Loss: 12693875.0, CrossEntropy: 2.001777410507202, Accuracy: 0.8170490506329114\n",
      "Elapsed time for the training: 12.91403865814209\n",
      "Iter 857 / 2000, Loss: 127584421.875, CrossEntropy: 0.003832894144579768, Accuracy: 0.9988411125319693\n",
      "EVALUATION with last weights -> Loss: 12497023.0, CrossEntropy: 1.9840785264968872, Accuracy: 0.8176424050632911\n",
      "Elapsed time for the training: 12.895430326461792\n",
      "Iter 858 / 2000, Loss: 125787300.125, CrossEntropy: 0.003111214842647314, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 12844869.0, CrossEntropy: 2.0699188709259033, Accuracy: 0.8168512658227848\n",
      "Elapsed time for the training: 12.849819421768188\n",
      "Iter 859 / 2000, Loss: 127529279.78125, CrossEntropy: 0.0038043998647481203, Accuracy: 0.9985613810741688\n",
      "EVALUATION with last weights -> Loss: 12311353.0, CrossEntropy: 1.9474265575408936, Accuracy: 0.8207080696202531\n",
      "Elapsed time for the training: 12.806324481964111\n",
      "Iter 860 / 2000, Loss: 124836732.4375, CrossEntropy: 0.0027260619681328535, Accuracy: 0.9992007672634271\n",
      "EVALUATION with last weights -> Loss: 12064953.0, CrossEntropy: 1.919893503189087, Accuracy: 0.825059335443038\n",
      "Elapsed time for the training: 12.888488054275513\n",
      "Iter 861 / 2000, Loss: 128298995.15625, CrossEntropy: 0.004103890620172024, Accuracy: 0.9986213235294118\n",
      "EVALUATION with last weights -> Loss: 12801891.0, CrossEntropy: 2.007232666015625, Accuracy: 0.8163568037974683\n",
      "Elapsed time for the training: 12.949081182479858\n",
      "Iter 862 / 2000, Loss: 125135620.46875, CrossEntropy: 0.0029145353473722935, Accuracy: 0.9991288363171356\n",
      "EVALUATION with last weights -> Loss: 12450730.0, CrossEntropy: 1.9486151933670044, Accuracy: 0.8198180379746836\n",
      "Elapsed time for the training: 12.894959688186646\n",
      "Iter 863 / 2000, Loss: 128918815.5625, CrossEntropy: 0.004355122335255146, Accuracy: 0.9984694693094629\n",
      "EVALUATION with last weights -> Loss: 12348219.0, CrossEntropy: 1.9316951036453247, Accuracy: 0.8173457278481012\n",
      "Elapsed time for the training: 12.891575574874878\n",
      "Iter 864 / 2000, Loss: 128483483.0, CrossEntropy: 0.00416571693494916, Accuracy: 0.998761189258312\n",
      "EVALUATION with last weights -> Loss: 12385522.0, CrossEntropy: 1.9441148042678833, Accuracy: 0.8237737341772152\n",
      "Elapsed time for the training: 12.910870552062988\n",
      "Iter 865 / 2000, Loss: 126266192.53125, CrossEntropy: 0.0032770647667348385, Accuracy: 0.9989010549872123\n",
      "EVALUATION with last weights -> Loss: 12415176.0, CrossEntropy: 1.97579824924469, Accuracy: 0.8216969936708861\n",
      "Elapsed time for the training: 12.916677713394165\n",
      "Iter 866 / 2000, Loss: 126097071.6875, CrossEntropy: 0.003205498680472374, Accuracy: 0.9989410166240409\n",
      "EVALUATION with last weights -> Loss: 12266289.0, CrossEntropy: 1.919745922088623, Accuracy: 0.8268393987341772\n",
      "Elapsed time for the training: 12.89905571937561\n",
      "Iter 867 / 2000, Loss: 127470566.78125, CrossEntropy: 0.0037483766209334135, Accuracy: 0.9987212276214834\n",
      "EVALUATION with last weights -> Loss: 11706030.0, CrossEntropy: 1.8396739959716797, Accuracy: 0.8235759493670886\n",
      "Elapsed time for the training: 12.883656740188599\n",
      "Iter 868 / 2000, Loss: 127814490.46875, CrossEntropy: 0.0038878275081515312, Accuracy: 0.9987811700767263\n",
      "EVALUATION with last weights -> Loss: 12581359.0, CrossEntropy: 1.9775402545928955, Accuracy: 0.8212025316455697\n",
      "Elapsed time for the training: 12.861234188079834\n",
      "Iter 869 / 2000, Loss: 127692977.75, CrossEntropy: 0.0038323046173900366, Accuracy: 0.9988810741687979\n",
      "EVALUATION with last weights -> Loss: 12146333.0, CrossEntropy: 1.9036580324172974, Accuracy: 0.8201147151898734\n",
      "Elapsed time for the training: 12.945682764053345\n",
      "Iter 870 / 2000, Loss: 126899850.625, CrossEntropy: 0.003509037895128131, Accuracy: 0.9988011508951407\n",
      "EVALUATION with last weights -> Loss: 12330933.0, CrossEntropy: 1.935594081878662, Accuracy: 0.822685917721519\n",
      "Elapsed time for the training: 12.877363443374634\n",
      "Iter 871 / 2000, Loss: 127061884.84375, CrossEntropy: 0.00356943323276937, Accuracy: 0.9986013427109974\n",
      "EVALUATION with last weights -> Loss: 12724134.0, CrossEntropy: 1.9886606931686401, Accuracy: 0.8259493670886076\n",
      "Elapsed time for the training: 13.552875280380249\n",
      "Iter 872 / 2000, Loss: 128261915.15625, CrossEntropy: 0.004052481148391962, Accuracy: 0.9987412084398977\n",
      "EVALUATION with last weights -> Loss: 12347752.0, CrossEntropy: 1.9524565935134888, Accuracy: 0.8228837025316456\n",
      "Elapsed time for the training: 13.000685453414917\n",
      "Iter 873 / 2000, Loss: 126945073.90625, CrossEntropy: 0.003514517331495881, Accuracy: 0.9986013427109974\n",
      "EVALUATION with last weights -> Loss: 12403095.0, CrossEntropy: 1.9449059963226318, Accuracy: 0.8275316455696202\n",
      "Elapsed time for the training: 12.937227487564087\n",
      "Iter 874 / 2000, Loss: 127246168.9375, CrossEntropy: 0.0036491944920271635, Accuracy: 0.9989090473145781\n",
      "EVALUATION with last weights -> Loss: 12697885.0, CrossEntropy: 1.9840455055236816, Accuracy: 0.8151700949367089\n",
      "Elapsed time for the training: 12.995941162109375\n",
      "Iter 875 / 2000, Loss: 128712772.3125, CrossEntropy: 0.004214181564748287, Accuracy: 0.9986013427109974\n",
      "EVALUATION with last weights -> Loss: 13288804.0, CrossEntropy: 2.089066505432129, Accuracy: 0.8170490506329114\n",
      "Elapsed time for the training: 12.926090478897095\n",
      "Iter 876 / 2000, Loss: 124467614.875, CrossEntropy: 0.002513354178518057, Accuracy: 0.9992207480818415\n",
      "EVALUATION with last weights -> Loss: 12364938.0, CrossEntropy: 1.9456589221954346, Accuracy: 0.8236748417721519\n",
      "Elapsed time for the training: 12.902676582336426\n",
      "Iter 877 / 2000, Loss: 128880243.25, CrossEntropy: 0.0042823078110814095, Accuracy: 0.9985014386189258\n",
      "EVALUATION with last weights -> Loss: 12141051.0, CrossEntropy: 1.897375226020813, Accuracy: 0.8272349683544303\n",
      "Elapsed time for the training: 13.064948797225952\n",
      "Iter 878 / 2000, Loss: 123755460.71875, CrossEntropy: 0.0022218097001314163, Accuracy: 0.9992207480818415\n",
      "EVALUATION with last weights -> Loss: 12476236.0, CrossEntropy: 1.9495245218276978, Accuracy: 0.8242681962025317\n",
      "Elapsed time for the training: 12.985156297683716\n",
      "Iter 879 / 2000, Loss: 128742583.1875, CrossEntropy: 0.004210155922919512, Accuracy: 0.998701246803069\n",
      "EVALUATION with last weights -> Loss: 12330790.0, CrossEntropy: 1.957417607307434, Accuracy: 0.8248615506329114\n",
      "Elapsed time for the training: 12.906929731369019\n",
      "Iter 880 / 2000, Loss: 126359522.125, CrossEntropy: 0.0032577465754002333, Accuracy: 0.9990009590792839\n",
      "EVALUATION with last weights -> Loss: 12812680.0, CrossEntropy: 2.0501444339752197, Accuracy: 0.8176424050632911\n",
      "Elapsed time for the training: 12.921855926513672\n",
      "Iter 881 / 2000, Loss: 125196674.625, CrossEntropy: 0.0027862864080816507, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 12443026.0, CrossEntropy: 1.9538323879241943, Accuracy: 0.8218947784810127\n",
      "Elapsed time for the training: 13.073364734649658\n",
      "Iter 882 / 2000, Loss: 129228002.90625, CrossEntropy: 0.004393479321151972, Accuracy: 0.9984814578005116\n",
      "EVALUATION with last weights -> Loss: 12759519.0, CrossEntropy: 2.038081645965576, Accuracy: 0.822685917721519\n",
      "Elapsed time for the training: 12.945658206939697\n",
      "Iter 883 / 2000, Loss: 128608302.40625, CrossEntropy: 0.00414116308093071, Accuracy: 0.9985813618925832\n",
      "EVALUATION with last weights -> Loss: 12673735.0, CrossEntropy: 2.024141311645508, Accuracy: 0.8248615506329114\n",
      "Elapsed time for the training: 13.376274108886719\n",
      "Iter 884 / 2000, Loss: 125112650.125, CrossEntropy: 0.00274069351144135, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 12441975.0, CrossEntropy: 1.947986364364624, Accuracy: 0.8240704113924051\n",
      "Elapsed time for the training: 13.70121693611145\n",
      "Iter 885 / 2000, Loss: 128736831.0625, CrossEntropy: 0.0041845254600048065, Accuracy: 0.998701246803069\n",
      "EVALUATION with last weights -> Loss: 12482781.0, CrossEntropy: 1.987191915512085, Accuracy: 0.8217958860759493\n",
      "Elapsed time for the training: 13.76175594329834\n",
      "Iter 886 / 2000, Loss: 127275709.65625, CrossEntropy: 0.003597115632146597, Accuracy: 0.9988011508951407\n",
      "EVALUATION with last weights -> Loss: 12912877.0, CrossEntropy: 2.0734829902648926, Accuracy: 0.8160601265822784\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time for the training: 13.295135021209717\n",
      "Iter 887 / 2000, Loss: 127426882.96875, CrossEntropy: 0.0036557225976139307, Accuracy: 0.9989410166240409\n",
      "EVALUATION with last weights -> Loss: 12485123.0, CrossEntropy: 1.9680445194244385, Accuracy: 0.8200158227848101\n",
      "Elapsed time for the training: 12.92310118675232\n",
      "Iter 888 / 2000, Loss: 125423585.1875, CrossEntropy: 0.002849464537575841, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 12299163.0, CrossEntropy: 1.9305312633514404, Accuracy: 0.8240704113924051\n",
      "Elapsed time for the training: 12.92723798751831\n",
      "Iter 889 / 2000, Loss: 127944098.4375, CrossEntropy: 0.003855370217934251, Accuracy: 0.9986812659846548\n",
      "EVALUATION with last weights -> Loss: 12187963.0, CrossEntropy: 1.925352692604065, Accuracy: 0.821004746835443\n",
      "Elapsed time for the training: 12.894392967224121\n",
      "Iter 890 / 2000, Loss: 127703056.84375, CrossEntropy: 0.003754622070118785, Accuracy: 0.9986812659846548\n",
      "EVALUATION with last weights -> Loss: 12379987.0, CrossEntropy: 1.9494792222976685, Accuracy: 0.826443829113924\n",
      "Elapsed time for the training: 12.861855745315552\n",
      "Iter 891 / 2000, Loss: 125199028.125, CrossEntropy: 0.002747699385508895, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 13429137.0, CrossEntropy: 2.1112804412841797, Accuracy: 0.809434335443038\n",
      "Elapsed time for the training: 12.895501375198364\n",
      "Iter 892 / 2000, Loss: 126656588.375, CrossEntropy: 0.0033265678212046623, Accuracy: 0.9988011508951407\n",
      "EVALUATION with last weights -> Loss: 13545949.0, CrossEntropy: 2.142306327819824, Accuracy: 0.8144778481012658\n",
      "Elapsed time for the training: 12.896696090698242\n",
      "Iter 893 / 2000, Loss: 127568742.34375, CrossEntropy: 0.0036877491511404514, Accuracy: 0.9989210358056266\n",
      "EVALUATION with last weights -> Loss: 12680250.0, CrossEntropy: 2.002284526824951, Accuracy: 0.8204113924050633\n",
      "Elapsed time for the training: 12.89036774635315\n",
      "Iter 894 / 2000, Loss: 126241362.3125, CrossEntropy: 0.0031611360609531403, Accuracy: 0.9988411125319693\n",
      "EVALUATION with last weights -> Loss: 12203161.0, CrossEntropy: 1.936171531677246, Accuracy: 0.8197191455696202\n",
      "Elapsed time for the training: 15.04318356513977\n",
      "Iter 895 / 2000, Loss: 127001329.96875, CrossEntropy: 0.0034582382068037987, Accuracy: 0.9989410166240409\n",
      "EVALUATION with last weights -> Loss: 12359948.0, CrossEntropy: 1.9442046880722046, Accuracy: 0.8214003164556962\n",
      "Elapsed time for the training: 14.735949754714966\n",
      "Iter 896 / 2000, Loss: 127988363.1875, CrossEntropy: 0.003843245329335332, Accuracy: 0.9986213235294118\n",
      "EVALUATION with last weights -> Loss: 12338409.0, CrossEntropy: 1.9782464504241943, Accuracy: 0.8234770569620253\n",
      "Elapsed time for the training: 12.896557569503784\n",
      "Iter 897 / 2000, Loss: 126060502.9375, CrossEntropy: 0.00306890532374382, Accuracy: 0.9991408248081841\n",
      "EVALUATION with last weights -> Loss: 13259024.0, CrossEntropy: 2.1352460384368896, Accuracy: 0.8149723101265823\n",
      "Elapsed time for the training: 12.92510437965393\n",
      "Iter 898 / 2000, Loss: 126323926.90625, CrossEntropy: 0.0031711850315332413, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 12534355.0, CrossEntropy: 1.970662236213684, Accuracy: 0.8225870253164557\n",
      "Elapsed time for the training: 12.900439023971558\n",
      "Iter 899 / 2000, Loss: 128266937.46875, CrossEntropy: 0.003944042604416609, Accuracy: 0.9986013427109974\n",
      "EVALUATION with last weights -> Loss: 12464183.0, CrossEntropy: 1.9739786386489868, Accuracy: 0.8243670886075949\n",
      "Elapsed time for the training: 12.916354179382324\n",
      "Iter 900 / 2000, Loss: 126875107.8125, CrossEntropy: 0.003388168290257454, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 12307466.0, CrossEntropy: 1.9317874908447266, Accuracy: 0.8228837025316456\n",
      "Elapsed time for the training: 12.781615972518921\n",
      "Iter 901 / 2000, Loss: 126481274.0625, CrossEntropy: 0.00322307413443923, Accuracy: 0.9989010549872123\n",
      "EVALUATION with last weights -> Loss: 12484327.0, CrossEntropy: 1.9704111814498901, Accuracy: 0.818631329113924\n",
      "Elapsed time for the training: 12.826751232147217\n",
      "Iter 902 / 2000, Loss: 127776954.65625, CrossEntropy: 0.0037412543315440416, Accuracy: 0.9986612851662404\n",
      "EVALUATION with last weights -> Loss: 12529476.0, CrossEntropy: 1.9717150926589966, Accuracy: 0.8239715189873418\n",
      "Elapsed time for the training: 12.938469648361206\n",
      "Iter 903 / 2000, Loss: 128838899.59375, CrossEntropy: 0.004155890084803104, Accuracy: 0.9985613810741688\n",
      "EVALUATION with last weights -> Loss: 12214424.0, CrossEntropy: 1.916718602180481, Accuracy: 0.8247626582278481\n",
      "Elapsed time for the training: 12.914446115493774\n",
      "Iter 904 / 2000, Loss: 127392509.03125, CrossEntropy: 0.0035749278031289577, Accuracy: 0.9990409207161125\n",
      "EVALUATION with last weights -> Loss: 12498517.0, CrossEntropy: 1.9919936656951904, Accuracy: 0.8214992088607594\n",
      "Elapsed time for the training: 12.875641107559204\n",
      "Iter 905 / 2000, Loss: 124610243.46875, CrossEntropy: 0.002459966577589512, Accuracy: 0.9993006713554987\n",
      "EVALUATION with last weights -> Loss: 12595195.0, CrossEntropy: 1.9741777181625366, Accuracy: 0.8220925632911392\n",
      "Elapsed time for the training: 13.38778042793274\n",
      "Iter 906 / 2000, Loss: 129330047.65625, CrossEntropy: 0.004342658445239067, Accuracy: 0.9986612851662404\n",
      "EVALUATION with last weights -> Loss: 12936158.0, CrossEntropy: 2.0418922901153564, Accuracy: 0.8202136075949367\n",
      "Elapsed time for the training: 12.910440444946289\n",
      "Iter 907 / 2000, Loss: 125623179.65625, CrossEntropy: 0.0028567796107381582, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 12017518.0, CrossEntropy: 1.9186546802520752, Accuracy: 0.8243670886075949\n",
      "Elapsed time for the training: 12.925861358642578\n",
      "Iter 908 / 2000, Loss: 128584867.84375, CrossEntropy: 0.004036725498735905, Accuracy: 0.9985613810741688\n",
      "EVALUATION with last weights -> Loss: 13368440.0, CrossEntropy: 2.1064043045043945, Accuracy: 0.8206091772151899\n",
      "Elapsed time for the training: 12.906611442565918\n",
      "Iter 909 / 2000, Loss: 125044133.71875, CrossEntropy: 0.0026382107753306627, Accuracy: 0.9990489130434783\n",
      "EVALUATION with last weights -> Loss: 12608850.0, CrossEntropy: 1.9771846532821655, Accuracy: 0.821004746835443\n",
      "Elapsed time for the training: 12.913240432739258\n",
      "Iter 910 / 2000, Loss: 127744394.25, CrossEntropy: 0.003750685602426529, Accuracy: 0.9988890664961637\n",
      "EVALUATION with last weights -> Loss: 13235328.0, CrossEntropy: 2.1034791469573975, Accuracy: 0.8165545886075949\n",
      "Elapsed time for the training: 12.928046464920044\n",
      "Iter 911 / 2000, Loss: 127924709.21875, CrossEntropy: 0.00377725251019001, Accuracy: 0.9986892583120205\n",
      "EVALUATION with last weights -> Loss: 12500836.0, CrossEntropy: 1.983142375946045, Accuracy: 0.8246637658227848\n",
      "Elapsed time for the training: 12.89703917503357\n",
      "Iter 912 / 2000, Loss: 125727475.9375, CrossEntropy: 0.0028811597730964422, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 12827888.0, CrossEntropy: 2.040231704711914, Accuracy: 0.8190268987341772\n",
      "Elapsed time for the training: 12.913425922393799\n",
      "Iter 913 / 2000, Loss: 127075761.5625, CrossEntropy: 0.0034259152598679066, Accuracy: 0.9988291240409207\n",
      "EVALUATION with last weights -> Loss: 12500417.0, CrossEntropy: 1.9904130697250366, Accuracy: 0.8153678797468354\n",
      "Elapsed time for the training: 12.909983396530151\n",
      "Iter 914 / 2000, Loss: 126129941.78125, CrossEntropy: 0.0030341860838234425, Accuracy: 0.9988810741687979\n",
      "EVALUATION with last weights -> Loss: 12179045.0, CrossEntropy: 1.9274060726165771, Accuracy: 0.8277294303797469\n",
      "Elapsed time for the training: 15.382538795471191\n",
      "Iter 915 / 2000, Loss: 128538795.53125, CrossEntropy: 0.004069637972861528, Accuracy: 0.9988291240409207\n",
      "EVALUATION with last weights -> Loss: 14468566.0, CrossEntropy: 2.2650389671325684, Accuracy: 0.8048852848101266\n",
      "Elapsed time for the training: 15.560862064361572\n",
      "Iter 916 / 2000, Loss: 128606288.5, CrossEntropy: 0.004020740743726492, Accuracy: 0.9988411125319693\n",
      "EVALUATION with last weights -> Loss: 12642167.0, CrossEntropy: 1.9945281744003296, Accuracy: 0.8222903481012658\n",
      "Elapsed time for the training: 12.861072540283203\n",
      "Iter 917 / 2000, Loss: 127242323.15625, CrossEntropy: 0.0034680659882724285, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 12421538.0, CrossEntropy: 1.9536712169647217, Accuracy: 0.8232792721518988\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time for the training: 12.878360033035278\n",
      "Iter 918 / 2000, Loss: 129406119.75, CrossEntropy: 0.004327920265495777, Accuracy: 0.9986213235294118\n",
      "EVALUATION with last weights -> Loss: 12211270.0, CrossEntropy: 1.9299988746643066, Accuracy: 0.8236748417721519\n",
      "Elapsed time for the training: 12.910812854766846\n",
      "Iter 919 / 2000, Loss: 126681730.03125, CrossEntropy: 0.0032358586322516203, Accuracy: 0.9988610933503836\n",
      "EVALUATION with last weights -> Loss: 12781779.0, CrossEntropy: 2.0355780124664307, Accuracy: 0.8153678797468354\n",
      "Elapsed time for the training: 12.887634038925171\n",
      "Iter 920 / 2000, Loss: 127576530.21875, CrossEntropy: 0.0035889395512640476, Accuracy: 0.9989010549872123\n",
      "EVALUATION with last weights -> Loss: 11902143.0, CrossEntropy: 1.859876275062561, Accuracy: 0.827432753164557\n",
      "Elapsed time for the training: 12.892094612121582\n",
      "Iter 921 / 2000, Loss: 127235252.90625, CrossEntropy: 0.003449807409197092, Accuracy: 0.9988011508951407\n",
      "EVALUATION with last weights -> Loss: 12626024.0, CrossEntropy: 1.9728169441223145, Accuracy: 0.8150712025316456\n",
      "Elapsed time for the training: 15.159468650817871\n",
      "Iter 922 / 2000, Loss: 127444110.90625, CrossEntropy: 0.0035290243104100227, Accuracy: 0.9988011508951407\n",
      "EVALUATION with last weights -> Loss: 12214610.0, CrossEntropy: 1.9230595827102661, Accuracy: 0.8235759493670886\n",
      "Elapsed time for the training: 13.65663743019104\n",
      "Iter 923 / 2000, Loss: 127090386.375, CrossEntropy: 0.0033905047457665205, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 12660969.0, CrossEntropy: 2.008840322494507, Accuracy: 0.8147745253164557\n",
      "Elapsed time for the training: 12.233107805252075\n",
      "Iter 924 / 2000, Loss: 128700088.375, CrossEntropy: 0.004023426212370396, Accuracy: 0.9984814578005116\n",
      "EVALUATION with last weights -> Loss: 12655543.0, CrossEntropy: 1.994624376296997, Accuracy: 0.821004746835443\n",
      "Elapsed time for the training: 11.788039922714233\n",
      "Iter 925 / 2000, Loss: 126924107.3125, CrossEntropy: 0.0033106557093560696, Accuracy: 0.9989010549872123\n",
      "EVALUATION with last weights -> Loss: 12956490.0, CrossEntropy: 2.044506311416626, Accuracy: 0.8183346518987342\n",
      "Elapsed time for the training: 11.797725915908813\n",
      "Iter 926 / 2000, Loss: 128088861.6875, CrossEntropy: 0.0037713812198489904, Accuracy: 0.998761189258312\n",
      "EVALUATION with last weights -> Loss: 12546309.0, CrossEntropy: 1.9927594661712646, Accuracy: 0.8212025316455697\n",
      "Elapsed time for the training: 11.875754356384277\n",
      "Iter 927 / 2000, Loss: 126527929.53125, CrossEntropy: 0.0031441766768693924, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 12581249.0, CrossEntropy: 1.9774894714355469, Accuracy: 0.8209058544303798\n",
      "Elapsed time for the training: 12.929006099700928\n",
      "Iter 928 / 2000, Loss: 128541690.5625, CrossEntropy: 0.004033575300127268, Accuracy: 0.9984774616368287\n",
      "EVALUATION with last weights -> Loss: 13629640.0, CrossEntropy: 2.1463468074798584, Accuracy: 0.8153678797468354\n",
      "Elapsed time for the training: 12.967585563659668\n",
      "Iter 929 / 2000, Loss: 124377913.59375, CrossEntropy: 0.002277685794979334, Accuracy: 0.9991608056265985\n",
      "EVALUATION with last weights -> Loss: 13436074.0, CrossEntropy: 2.1118087768554688, Accuracy: 0.8159612341772152\n",
      "Elapsed time for the training: 12.85237169265747\n",
      "Iter 930 / 2000, Loss: 130169348.71875, CrossEntropy: 0.004588591866195202, Accuracy: 0.9987212276214834\n",
      "EVALUATION with last weights -> Loss: 12863353.0, CrossEntropy: 2.059539318084717, Accuracy: 0.8213014240506329\n",
      "Elapsed time for the training: 12.87502145767212\n",
      "Iter 931 / 2000, Loss: 126237866.09375, CrossEntropy: 0.003014699323102832, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 13140480.0, CrossEntropy: 2.118877649307251, Accuracy: 0.8137856012658228\n",
      "Elapsed time for the training: 12.944279193878174\n",
      "Iter 932 / 2000, Loss: 126058309.78125, CrossEntropy: 0.0029390447307378054, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 12975836.0, CrossEntropy: 2.027508497238159, Accuracy: 0.8266416139240507\n",
      "Elapsed time for the training: 12.930444478988647\n",
      "Iter 933 / 2000, Loss: 129568235.875, CrossEntropy: 0.004337862133979797, Accuracy: 0.9987212276214834\n",
      "EVALUATION with last weights -> Loss: 12664166.0, CrossEntropy: 2.005751132965088, Accuracy: 0.8214992088607594\n",
      "Elapsed time for the training: 12.930370330810547\n",
      "Iter 934 / 2000, Loss: 124271533.125, CrossEntropy: 0.0022176383063197136, Accuracy: 0.999380594629156\n",
      "EVALUATION with last weights -> Loss: 12574055.0, CrossEntropy: 1.9788612127304077, Accuracy: 0.8222903481012658\n",
      "Elapsed time for the training: 12.891473293304443\n",
      "Iter 935 / 2000, Loss: 130118967.1875, CrossEntropy: 0.004567838739603758, Accuracy: 0.9986093350383632\n",
      "EVALUATION with last weights -> Loss: 12634683.0, CrossEntropy: 2.01755428314209, Accuracy: 0.8238726265822784\n",
      "Elapsed time for the training: 12.870871305465698\n",
      "Iter 936 / 2000, Loss: 127275889.0, CrossEntropy: 0.00341252563521266, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 12455829.0, CrossEntropy: 1.9889808893203735, Accuracy: 0.8214003164556962\n",
      "Elapsed time for the training: 12.9214928150177\n",
      "Iter 937 / 2000, Loss: 126807646.1875, CrossEntropy: 0.0032232701778411865, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 12826120.0, CrossEntropy: 2.015798568725586, Accuracy: 0.8143789556962026\n",
      "Elapsed time for the training: 12.899806261062622\n",
      "Iter 938 / 2000, Loss: 129040538.34375, CrossEntropy: 0.004110289271920919, Accuracy: 0.9987212276214834\n",
      "EVALUATION with last weights -> Loss: 12821447.0, CrossEntropy: 2.0587666034698486, Accuracy: 0.8188291139240507\n",
      "Elapsed time for the training: 12.883925914764404\n",
      "Iter 939 / 2000, Loss: 128552674.53125, CrossEntropy: 0.003914844244718552, Accuracy: 0.9989210358056266\n",
      "EVALUATION with last weights -> Loss: 12794068.0, CrossEntropy: 2.004286527633667, Accuracy: 0.8200158227848101\n",
      "Elapsed time for the training: 12.887272834777832\n",
      "Iter 940 / 2000, Loss: 127699512.96875, CrossEntropy: 0.0035665882751345634, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 12394859.0, CrossEntropy: 1.9381896257400513, Accuracy: 0.8225870253164557\n",
      "Elapsed time for the training: 12.893900156021118\n",
      "Iter 941 / 2000, Loss: 127940163.75, CrossEntropy: 0.0037078752648085356, Accuracy: 0.9988890664961637\n",
      "EVALUATION with last weights -> Loss: 14294208.0, CrossEntropy: 2.244594097137451, Accuracy: 0.8073575949367089\n",
      "Elapsed time for the training: 13.729581356048584\n",
      "Iter 942 / 2000, Loss: 128622023.0, CrossEntropy: 0.003929407801479101, Accuracy: 0.9987811700767263\n",
      "EVALUATION with last weights -> Loss: 12204375.0, CrossEntropy: 1.9276962280273438, Accuracy: 0.8255537974683544\n",
      "Elapsed time for the training: 15.610632181167603\n",
      "Iter 943 / 2000, Loss: 126962965.40625, CrossEntropy: 0.003262156620621681, Accuracy: 0.9988810741687979\n",
      "EVALUATION with last weights -> Loss: 12487364.0, CrossEntropy: 1.975677490234375, Accuracy: 0.8230814873417721\n",
      "Elapsed time for the training: 15.46822190284729\n",
      "Iter 944 / 2000, Loss: 127611320.625, CrossEntropy: 0.0035169655457139015, Accuracy: 0.9988810741687979\n",
      "EVALUATION with last weights -> Loss: 12461137.0, CrossEntropy: 1.9639408588409424, Accuracy: 0.8206091772151899\n",
      "Elapsed time for the training: 15.631806373596191\n",
      "Iter 945 / 2000, Loss: 129425811.1875, CrossEntropy: 0.004238223657011986, Accuracy: 0.998821131713555\n",
      "EVALUATION with last weights -> Loss: 12748312.0, CrossEntropy: 2.0285305976867676, Accuracy: 0.8189280063291139\n",
      "Elapsed time for the training: 15.6245436668396\n",
      "Iter 946 / 2000, Loss: 127360532.9375, CrossEntropy: 0.0034491796977818012, Accuracy: 0.9989889705882353\n",
      "EVALUATION with last weights -> Loss: 13190733.0, CrossEntropy: 2.0611140727996826, Accuracy: 0.8171479430379747\n",
      "Elapsed time for the training: 12.908109426498413\n",
      "Iter 947 / 2000, Loss: 128472271.53125, CrossEntropy: 0.003860540920868516, Accuracy: 0.9986492966751919\n",
      "EVALUATION with last weights -> Loss: 12757775.0, CrossEntropy: 2.0404629707336426, Accuracy: 0.8201147151898734\n",
      "Elapsed time for the training: 12.940174341201782\n",
      "Iter 948 / 2000, Loss: 127980006.71875, CrossEntropy: 0.0036514780949801207, Accuracy: 0.998821131713555\n",
      "EVALUATION with last weights -> Loss: 12666824.0, CrossEntropy: 2.0196752548217773, Accuracy: 0.8185324367088608\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time for the training: 12.883954286575317\n",
      "Iter 949 / 2000, Loss: 126546545.40625, CrossEntropy: 0.00307296309620142, Accuracy: 0.9989210358056266\n",
      "EVALUATION with last weights -> Loss: 12347955.0, CrossEntropy: 1.9460422992706299, Accuracy: 0.8231803797468354\n",
      "Elapsed time for the training: 12.86870813369751\n",
      "Iter 950 / 2000, Loss: 128230956.875, CrossEntropy: 0.003742762841284275, Accuracy: 0.9987811700767263\n",
      "EVALUATION with last weights -> Loss: 12758841.0, CrossEntropy: 2.0080785751342773, Accuracy: 0.8227848101265823\n",
      "Elapsed time for the training: 12.1960608959198\n",
      "Iter 951 / 2000, Loss: 128494885.5625, CrossEntropy: 0.0038575120270252228, Accuracy: 0.9987292199488491\n",
      "EVALUATION with last weights -> Loss: 12910568.0, CrossEntropy: 2.0444839000701904, Accuracy: 0.8225870253164557\n",
      "Elapsed time for the training: 11.805437564849854\n",
      "Iter 952 / 2000, Loss: 127439421.4375, CrossEntropy: 0.003427022835239768, Accuracy: 0.9990489130434783\n",
      "EVALUATION with last weights -> Loss: 12459672.0, CrossEntropy: 1.9965890645980835, Accuracy: 0.8241693037974683\n",
      "Elapsed time for the training: 11.779772758483887\n",
      "Iter 953 / 2000, Loss: 126457300.34375, CrossEntropy: 0.0030229485128074884, Accuracy: 0.9988610933503836\n",
      "EVALUATION with last weights -> Loss: 12793848.0, CrossEntropy: 2.0004515647888184, Accuracy: 0.8223892405063291\n",
      "Elapsed time for the training: 11.783721446990967\n",
      "Iter 954 / 2000, Loss: 126655315.15625, CrossEntropy: 0.003098307643085718, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 12621158.0, CrossEntropy: 1.982924461364746, Accuracy: 0.8266416139240507\n",
      "Elapsed time for the training: 11.788403749465942\n",
      "Iter 955 / 2000, Loss: 127807039.46875, CrossEntropy: 0.0035549383610486984, Accuracy: 0.9988610933503836\n",
      "EVALUATION with last weights -> Loss: 12822683.0, CrossEntropy: 2.031914472579956, Accuracy: 0.8231803797468354\n",
      "Elapsed time for the training: 12.66138482093811\n",
      "Iter 956 / 2000, Loss: 127549755.9375, CrossEntropy: 0.00344921019859612, Accuracy: 0.9988011508951407\n",
      "EVALUATION with last weights -> Loss: 13180215.0, CrossEntropy: 2.076120615005493, Accuracy: 0.8259493670886076\n",
      "Elapsed time for the training: 12.920594692230225\n",
      "Iter 957 / 2000, Loss: 125812269.6875, CrossEntropy: 0.002751412568613887, Accuracy: 0.9989410166240409\n",
      "EVALUATION with last weights -> Loss: 12788499.0, CrossEntropy: 2.012040376663208, Accuracy: 0.8246637658227848\n",
      "Elapsed time for the training: 12.906450510025024\n",
      "Iter 958 / 2000, Loss: 128584324.125, CrossEntropy: 0.0038551283068954945, Accuracy: 0.9987811700767263\n",
      "EVALUATION with last weights -> Loss: 13119109.0, CrossEntropy: 2.0549232959747314, Accuracy: 0.814873417721519\n",
      "Elapsed time for the training: 12.767584323883057\n",
      "Iter 959 / 2000, Loss: 128394179.40625, CrossEntropy: 0.0037947329692542553, Accuracy: 0.9987492007672635\n",
      "EVALUATION with last weights -> Loss: 12511101.0, CrossEntropy: 1.9641813039779663, Accuracy: 0.821004746835443\n",
      "Elapsed time for the training: 12.703634023666382\n",
      "Iter 960 / 2000, Loss: 128576238.53125, CrossEntropy: 0.0038443924859166145, Accuracy: 0.9985813618925832\n",
      "EVALUATION with last weights -> Loss: 12857951.0, CrossEntropy: 2.041745185852051, Accuracy: 0.8212025316455697\n",
      "Elapsed time for the training: 12.87514352798462\n",
      "Iter 961 / 2000, Loss: 128226454.53125, CrossEntropy: 0.0036997185088694096, Accuracy: 0.9987412084398977\n",
      "EVALUATION with last weights -> Loss: 12469751.0, CrossEntropy: 1.9737389087677002, Accuracy: 0.8254549050632911\n",
      "Elapsed time for the training: 15.023151874542236\n",
      "Iter 962 / 2000, Loss: 129879686.34375, CrossEntropy: 0.004357419442385435, Accuracy: 0.998701246803069\n",
      "EVALUATION with last weights -> Loss: 12516930.0, CrossEntropy: 1.9778193235397339, Accuracy: 0.819620253164557\n",
      "Elapsed time for the training: 15.523618459701538\n",
      "Iter 963 / 2000, Loss: 126312183.40625, CrossEntropy: 0.002928955713286996, Accuracy: 0.9988810741687979\n",
      "EVALUATION with last weights -> Loss: 12876108.0, CrossEntropy: 2.033348798751831, Accuracy: 0.8248615506329114\n",
      "Elapsed time for the training: 15.477221250534058\n",
      "Iter 964 / 2000, Loss: 126758529.9375, CrossEntropy: 0.003103541675955057, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 12798627.0, CrossEntropy: 2.035223960876465, Accuracy: 0.8228837025316456\n",
      "Elapsed time for the training: 15.311099767684937\n",
      "Iter 965 / 2000, Loss: 125782473.375, CrossEntropy: 0.0027110802475363016, Accuracy: 0.9992607097186701\n",
      "EVALUATION with last weights -> Loss: 13000820.0, CrossEntropy: 2.044102668762207, Accuracy: 0.8251582278481012\n",
      "Elapsed time for the training: 14.032380819320679\n",
      "Iter 966 / 2000, Loss: 129133754.28125, CrossEntropy: 0.004045327194035053, Accuracy: 0.998701246803069\n",
      "EVALUATION with last weights -> Loss: 13422090.0, CrossEntropy: 2.117371082305908, Accuracy: 0.817939082278481\n",
      "Elapsed time for the training: 13.702873229980469\n",
      "Iter 967 / 2000, Loss: 130500455.53125, CrossEntropy: 0.004664702340960503, Accuracy: 0.998557384910486\n",
      "EVALUATION with last weights -> Loss: 13199440.0, CrossEntropy: 2.100184202194214, Accuracy: 0.8223892405063291\n",
      "Elapsed time for the training: 13.509040594100952\n",
      "Iter 968 / 2000, Loss: 127646160.9375, CrossEntropy: 0.00344571634195745, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 12736491.0, CrossEntropy: 1.994126796722412, Accuracy: 0.8213014240506329\n",
      "Elapsed time for the training: 12.870683431625366\n",
      "Iter 969 / 2000, Loss: 128631031.34375, CrossEntropy: 0.003837233642116189, Accuracy: 0.998821131713555\n",
      "EVALUATION with last weights -> Loss: 12393252.0, CrossEntropy: 1.9364728927612305, Accuracy: 0.8269382911392406\n",
      "Elapsed time for the training: 12.892187118530273\n",
      "Iter 970 / 2000, Loss: 125820916.1875, CrossEntropy: 0.0027069482021033764, Accuracy: 0.9991208439897699\n",
      "EVALUATION with last weights -> Loss: 13217076.0, CrossEntropy: 2.14253830909729, Accuracy: 0.8156645569620253\n",
      "Elapsed time for the training: 12.88875937461853\n",
      "Iter 971 / 2000, Loss: 129046990.0, CrossEntropy: 0.003992305602878332, Accuracy: 0.9989010549872123\n",
      "EVALUATION with last weights -> Loss: 12380152.0, CrossEntropy: 1.9825491905212402, Accuracy: 0.8231803797468354\n",
      "Elapsed time for the training: 12.91997504234314\n",
      "Iter 972 / 2000, Loss: 128426075.96875, CrossEntropy: 0.003740477142855525, Accuracy: 0.9988810741687979\n",
      "EVALUATION with last weights -> Loss: 12395395.0, CrossEntropy: 1.9708776473999023, Accuracy: 0.8236748417721519\n",
      "Elapsed time for the training: 12.880118370056152\n",
      "Iter 973 / 2000, Loss: 126798276.25, CrossEntropy: 0.003087516874074936, Accuracy: 0.9991608056265985\n",
      "EVALUATION with last weights -> Loss: 12718611.0, CrossEntropy: 2.009451150894165, Accuracy: 0.8227848101265823\n",
      "Elapsed time for the training: 14.9080491065979\n",
      "Iter 974 / 2000, Loss: 128953656.09375, CrossEntropy: 0.003945083823055029, Accuracy: 0.9986013427109974\n",
      "EVALUATION with last weights -> Loss: 12719971.0, CrossEntropy: 2.0033671855926514, Accuracy: 0.8128955696202531\n",
      "Elapsed time for the training: 15.623357772827148\n",
      "Iter 975 / 2000, Loss: 125792798.78125, CrossEntropy: 0.002677601994946599, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 12640912.0, CrossEntropy: 1.9859172105789185, Accuracy: 0.8195213607594937\n",
      "Elapsed time for the training: 13.963975429534912\n",
      "Iter 976 / 2000, Loss: 127133847.1875, CrossEntropy: 0.0032098765950649977, Accuracy: 0.9988810741687979\n",
      "EVALUATION with last weights -> Loss: 12036131.0, CrossEntropy: 1.8984062671661377, Accuracy: 0.8276305379746836\n",
      "Elapsed time for the training: 12.883239507675171\n",
      "Iter 977 / 2000, Loss: 126528626.53125, CrossEntropy: 0.0029646093025803566, Accuracy: 0.9991608056265985\n",
      "EVALUATION with last weights -> Loss: 13386571.0, CrossEntropy: 2.1241188049316406, Accuracy: 0.8165545886075949\n",
      "Elapsed time for the training: 12.89090895652771\n",
      "Iter 978 / 2000, Loss: 129023254.0625, CrossEntropy: 0.00395989092066884, Accuracy: 0.998701246803069\n",
      "EVALUATION with last weights -> Loss: 12590022.0, CrossEntropy: 1.9923067092895508, Accuracy: 0.8248615506329114\n",
      "Elapsed time for the training: 12.903849363327026\n",
      "Iter 979 / 2000, Loss: 126964140.0, CrossEntropy: 0.0031316441018134356, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 12363433.0, CrossEntropy: 1.9793506860733032, Accuracy: 0.8256526898734177\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time for the training: 12.947110891342163\n",
      "Iter 980 / 2000, Loss: 128430702.90625, CrossEntropy: 0.0037144820671528578, Accuracy: 0.9988011508951407\n",
      "EVALUATION with last weights -> Loss: 12216401.0, CrossEntropy: 1.9319674968719482, Accuracy: 0.8214992088607594\n",
      "Elapsed time for the training: 12.950016021728516\n",
      "Iter 981 / 2000, Loss: 128569192.65625, CrossEntropy: 0.003767122747376561, Accuracy: 0.9987811700767263\n",
      "EVALUATION with last weights -> Loss: 12671534.0, CrossEntropy: 2.001013994216919, Accuracy: 0.8198180379746836\n",
      "Elapsed time for the training: 12.89040470123291\n",
      "Iter 982 / 2000, Loss: 127860490.34375, CrossEntropy: 0.003479905193671584, Accuracy: 0.9988810741687979\n",
      "EVALUATION with last weights -> Loss: 13233544.0, CrossEntropy: 2.076122522354126, Accuracy: 0.8169501582278481\n",
      "Elapsed time for the training: 12.93233036994934\n",
      "Iter 983 / 2000, Loss: 125615458.9375, CrossEntropy: 0.0025795649271458387, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 12573149.0, CrossEntropy: 1.9865238666534424, Accuracy: 0.8270371835443038\n",
      "Elapsed time for the training: 12.879128217697144\n",
      "Iter 984 / 2000, Loss: 129804054.1875, CrossEntropy: 0.004250671248883009, Accuracy: 0.998761189258312\n",
      "EVALUATION with last weights -> Loss: 13398860.0, CrossEntropy: 2.1158316135406494, Accuracy: 0.8125\n",
      "Elapsed time for the training: 12.88413143157959\n",
      "Iter 985 / 2000, Loss: 128372022.125, CrossEntropy: 0.0036837062798440456, Accuracy: 0.9987292199488491\n",
      "EVALUATION with last weights -> Loss: 12698281.0, CrossEntropy: 2.0124917030334473, Accuracy: 0.8227848101265823\n",
      "Elapsed time for the training: 12.90609097480774\n",
      "Iter 986 / 2000, Loss: 128560038.15625, CrossEntropy: 0.0037453542463481426, Accuracy: 0.9987212276214834\n",
      "EVALUATION with last weights -> Loss: 12614907.0, CrossEntropy: 1.9925819635391235, Accuracy: 0.8234770569620253\n",
      "Elapsed time for the training: 12.918426990509033\n",
      "Iter 987 / 2000, Loss: 126813365.625, CrossEntropy: 0.0030439975671470165, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 12209553.0, CrossEntropy: 1.9197421073913574, Accuracy: 0.826443829113924\n",
      "Elapsed time for the training: 12.904734134674072\n",
      "Iter 988 / 2000, Loss: 124206396.59375, CrossEntropy: 0.002000638050958514, Accuracy: 0.999380594629156\n",
      "EVALUATION with last weights -> Loss: 12285697.0, CrossEntropy: 1.9327738285064697, Accuracy: 0.8251582278481012\n",
      "Elapsed time for the training: 12.89933443069458\n",
      "Iter 989 / 2000, Loss: 129559775.0, CrossEntropy: 0.0041353474371135235, Accuracy: 0.9986213235294118\n",
      "EVALUATION with last weights -> Loss: 12237062.0, CrossEntropy: 1.9489926099777222, Accuracy: 0.826443829113924\n",
      "Elapsed time for the training: 12.897281885147095\n",
      "Iter 990 / 2000, Loss: 126273196.53125, CrossEntropy: 0.002853153273463249, Accuracy: 0.9991288363171356\n",
      "EVALUATION with last weights -> Loss: 13234902.0, CrossEntropy: 2.0758309364318848, Accuracy: 0.814873417721519\n",
      "Elapsed time for the training: 12.867030143737793\n",
      "Iter 991 / 2000, Loss: 128112824.46875, CrossEntropy: 0.003551222151145339, Accuracy: 0.9989210358056266\n",
      "EVALUATION with last weights -> Loss: 12890531.0, CrossEntropy: 2.024707555770874, Accuracy: 0.8242681962025317\n",
      "Elapsed time for the training: 12.940836191177368\n",
      "Iter 992 / 2000, Loss: 124803577.09375, CrossEntropy: 0.0022262544371187687, Accuracy: 0.9992607097186701\n",
      "EVALUATION with last weights -> Loss: 12546712.0, CrossEntropy: 1.9874643087387085, Accuracy: 0.8273338607594937\n",
      "Elapsed time for the training: 12.915448188781738\n",
      "Iter 993 / 2000, Loss: 125831505.8125, CrossEntropy: 0.0026369334664195776, Accuracy: 0.9992207480818415\n",
      "EVALUATION with last weights -> Loss: 12566422.0, CrossEntropy: 1.9845081567764282, Accuracy: 0.8273338607594937\n",
      "Elapsed time for the training: 12.889402866363525\n",
      "Iter 994 / 2000, Loss: 129149055.5, CrossEntropy: 0.003957593347877264, Accuracy: 0.9986013427109974\n",
      "EVALUATION with last weights -> Loss: 12247395.0, CrossEntropy: 1.9197677373886108, Accuracy: 0.8247626582278481\n",
      "Elapsed time for the training: 12.918466567993164\n",
      "Iter 995 / 2000, Loss: 126595541.375, CrossEntropy: 0.0029347436502575874, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 12553416.0, CrossEntropy: 2.0196399688720703, Accuracy: 0.8222903481012658\n",
      "Elapsed time for the training: 12.879100561141968\n",
      "Iter 996 / 2000, Loss: 126873507.5, CrossEntropy: 0.0030540702864527702, Accuracy: 0.9990489130434783\n",
      "EVALUATION with last weights -> Loss: 13600352.0, CrossEntropy: 2.151731252670288, Accuracy: 0.8165545886075949\n",
      "Elapsed time for the training: 12.879817485809326\n",
      "Iter 997 / 2000, Loss: 126345661.46875, CrossEntropy: 0.0028259549289941788, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 13057928.0, CrossEntropy: 2.0459868907928467, Accuracy: 0.8260482594936709\n",
      "Elapsed time for the training: 12.902302503585815\n",
      "Iter 998 / 2000, Loss: 125153937.8125, CrossEntropy: 0.002349213929846883, Accuracy: 0.9992007672634271\n",
      "EVALUATION with last weights -> Loss: 13829851.0, CrossEntropy: 2.1846840381622314, Accuracy: 0.8167523734177216\n",
      "Elapsed time for the training: 12.732634544372559\n",
      "Iter 999 / 2000, Loss: 125772052.6875, CrossEntropy: 0.002592866774648428, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 12910304.0, CrossEntropy: 2.0488550662994385, Accuracy: 0.821004746835443\n",
      "Elapsed time for the training: 12.888725280761719\n",
      "Iter 1000 / 2000, Loss: 129223195.875, CrossEntropy: 0.003967931028455496, Accuracy: 0.9989210358056266\n",
      "EVALUATION with last weights -> Loss: 13227497.0, CrossEntropy: 2.073256254196167, Accuracy: 0.8206091772151899\n",
      "Elapsed time for the training: 12.858002424240112\n",
      "Iter 1001 / 2000, Loss: 130037043.6875, CrossEntropy: 0.004289539996534586, Accuracy: 0.9989410166240409\n",
      "EVALUATION with last weights -> Loss: 12857755.0, CrossEntropy: 2.0155036449432373, Accuracy: 0.8221914556962026\n",
      "Elapsed time for the training: 15.037075281143188\n",
      "Iter 1002 / 2000, Loss: 128495742.875, CrossEntropy: 0.0036701252683997154, Accuracy: 0.9991408248081841\n",
      "EVALUATION with last weights -> Loss: 12943372.0, CrossEntropy: 2.0310614109039307, Accuracy: 0.826443829113924\n",
      "Elapsed time for the training: 12.892163276672363\n",
      "Iter 1003 / 2000, Loss: 127706321.90625, CrossEntropy: 0.0033651706762611866, Accuracy: 0.9987891624040921\n",
      "EVALUATION with last weights -> Loss: 13000345.0, CrossEntropy: 2.054497003555298, Accuracy: 0.8206091772151899\n",
      "Elapsed time for the training: 12.92710542678833\n",
      "Iter 1004 / 2000, Loss: 127774071.5, CrossEntropy: 0.0033763605169951916, Accuracy: 0.9989210358056266\n",
      "EVALUATION with last weights -> Loss: 12996859.0, CrossEntropy: 2.039468288421631, Accuracy: 0.8209058544303798\n",
      "Elapsed time for the training: 12.871233940124512\n",
      "Iter 1005 / 2000, Loss: 125873844.8125, CrossEntropy: 0.002612526062875986, Accuracy: 0.9991608056265985\n",
      "EVALUATION with last weights -> Loss: 13353740.0, CrossEntropy: 2.1081042289733887, Accuracy: 0.8204113924050633\n",
      "Elapsed time for the training: 12.880859375\n",
      "Iter 1006 / 2000, Loss: 127358828.25, CrossEntropy: 0.0032027307897806168, Accuracy: 0.9990009590792839\n",
      "EVALUATION with last weights -> Loss: 12698198.0, CrossEntropy: 2.0530076026916504, Accuracy: 0.8254549050632911\n",
      "Elapsed time for the training: 12.8892662525177\n",
      "Iter 1007 / 2000, Loss: 128657071.21875, CrossEntropy: 0.003718284424394369, Accuracy: 0.9988411125319693\n",
      "EVALUATION with last weights -> Loss: 13338152.0, CrossEntropy: 2.1132988929748535, Accuracy: 0.8239715189873418\n",
      "Elapsed time for the training: 12.91908049583435\n",
      "Iter 1008 / 2000, Loss: 130812477.25, CrossEntropy: 0.004597916267812252, Accuracy: 0.9984894501278773\n",
      "EVALUATION with last weights -> Loss: 13171835.0, CrossEntropy: 2.078881025314331, Accuracy: 0.8197191455696202\n",
      "Elapsed time for the training: 13.113395929336548\n",
      "Iter 1009 / 2000, Loss: 128398259.65625, CrossEntropy: 0.003638593014329672, Accuracy: 0.9987691815856777\n",
      "EVALUATION with last weights -> Loss: 13533986.0, CrossEntropy: 2.124579429626465, Accuracy: 0.819620253164557\n",
      "Elapsed time for the training: 13.604471683502197\n",
      "Iter 1010 / 2000, Loss: 127781253.03125, CrossEntropy: 0.0033577694557607174, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 13998422.0, CrossEntropy: 2.2000114917755127, Accuracy: 0.8147745253164557\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time for the training: 12.80956768989563\n",
      "Iter 1011 / 2000, Loss: 127863179.0, CrossEntropy: 0.0033872174099087715, Accuracy: 0.998821131713555\n",
      "EVALUATION with last weights -> Loss: 13258698.0, CrossEntropy: 2.078446626663208, Accuracy: 0.8185324367088608\n",
      "Elapsed time for the training: 12.902496337890625\n",
      "Iter 1012 / 2000, Loss: 126402933.75, CrossEntropy: 0.002800720976665616, Accuracy: 0.9990009590792839\n",
      "EVALUATION with last weights -> Loss: 13184473.0, CrossEntropy: 2.079380989074707, Accuracy: 0.8227848101265823\n",
      "Elapsed time for the training: 15.217523097991943\n",
      "Iter 1013 / 2000, Loss: 127032078.25, CrossEntropy: 0.003171400399878621, Accuracy: 0.9990688938618926\n",
      "EVALUATION with last weights -> Loss: 14313892.0, CrossEntropy: 2.2666802406311035, Accuracy: 0.8104232594936709\n",
      "Elapsed time for the training: 15.616998195648193\n",
      "Iter 1014 / 2000, Loss: 128018719.21875, CrossEntropy: 0.0034394925460219383, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 12645302.0, CrossEntropy: 1.9758577346801758, Accuracy: 0.8237737341772152\n",
      "Elapsed time for the training: 15.647135257720947\n",
      "Iter 1015 / 2000, Loss: 127529087.15625, CrossEntropy: 0.0033187351655215025, Accuracy: 0.9988690856777493\n",
      "EVALUATION with last weights -> Loss: 13903124.0, CrossEntropy: 2.17616868019104, Accuracy: 0.822685917721519\n",
      "Elapsed time for the training: 15.24193263053894\n",
      "Iter 1016 / 2000, Loss: 129002291.90625, CrossEntropy: 0.0038361430633813143, Accuracy: 0.9988091432225065\n",
      "EVALUATION with last weights -> Loss: 12278719.0, CrossEntropy: 1.9769856929779053, Accuracy: 0.8195213607594937\n",
      "Elapsed time for the training: 12.991947412490845\n",
      "Iter 1017 / 2000, Loss: 129754294.96875, CrossEntropy: 0.004123235121369362, Accuracy: 0.9988610933503836\n",
      "EVALUATION with last weights -> Loss: 13123024.0, CrossEntropy: 2.113098382949829, Accuracy: 0.8193235759493671\n",
      "Elapsed time for the training: 13.659632921218872\n",
      "Iter 1018 / 2000, Loss: 128846298.875, CrossEntropy: 0.0037573708686977625, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 12604483.0, CrossEntropy: 1.9800643920898438, Accuracy: 0.8245648734177216\n",
      "Elapsed time for the training: 12.876007556915283\n",
      "Iter 1019 / 2000, Loss: 129119889.4375, CrossEntropy: 0.003864007769152522, Accuracy: 0.998701246803069\n",
      "EVALUATION with last weights -> Loss: 13011892.0, CrossEntropy: 2.0546059608459473, Accuracy: 0.8208069620253164\n",
      "Elapsed time for the training: 12.897801637649536\n",
      "Iter 1020 / 2000, Loss: 129802138.3125, CrossEntropy: 0.004130987450480461, Accuracy: 0.9988411125319693\n",
      "EVALUATION with last weights -> Loss: 13161146.0, CrossEntropy: 2.0966362953186035, Accuracy: 0.822685917721519\n",
      "Elapsed time for the training: 12.925232410430908\n",
      "Iter 1021 / 2000, Loss: 128640247.0, CrossEntropy: 0.003664485877379775, Accuracy: 0.9988411125319693\n",
      "EVALUATION with last weights -> Loss: 13060455.0, CrossEntropy: 2.067657470703125, Accuracy: 0.8224881329113924\n",
      "Elapsed time for the training: 12.878453493118286\n",
      "Iter 1022 / 2000, Loss: 128899386.625, CrossEntropy: 0.0037634209729731083, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 12772166.0, CrossEntropy: 2.014409303665161, Accuracy: 0.8230814873417721\n",
      "Elapsed time for the training: 12.905895948410034\n",
      "Iter 1023 / 2000, Loss: 128238049.25, CrossEntropy: 0.0034969542175531387, Accuracy: 0.998821131713555\n",
      "EVALUATION with last weights -> Loss: 13403253.0, CrossEntropy: 2.132967233657837, Accuracy: 0.8145767405063291\n",
      "Elapsed time for the training: 12.928398132324219\n",
      "Iter 1024 / 2000, Loss: 130367523.71875, CrossEntropy: 0.0043432884849607944, Accuracy: 0.9987811700767263\n",
      "EVALUATION with last weights -> Loss: 13020798.0, CrossEntropy: 2.047323226928711, Accuracy: 0.818631329113924\n",
      "Elapsed time for the training: 12.944490432739258\n",
      "Iter 1025 / 2000, Loss: 124283690.375, CrossEntropy: 0.0019152165623381734, Accuracy: 0.9993406329923273\n",
      "EVALUATION with last weights -> Loss: 13025373.0, CrossEntropy: 2.059431314468384, Accuracy: 0.8255537974683544\n",
      "Elapsed time for the training: 12.989211559295654\n",
      "Iter 1026 / 2000, Loss: 129379314.6875, CrossEntropy: 0.003942270763218403, Accuracy: 0.998821131713555\n",
      "EVALUATION with last weights -> Loss: 12574220.0, CrossEntropy: 1.9947651624679565, Accuracy: 0.8248615506329114\n",
      "Elapsed time for the training: 12.962096452713013\n",
      "Iter 1027 / 2000, Loss: 129530924.375, CrossEntropy: 0.00400395505130291, Accuracy: 0.998761189258312\n",
      "EVALUATION with last weights -> Loss: 12635286.0, CrossEntropy: 2.0133907794952393, Accuracy: 0.8181368670886076\n",
      "Elapsed time for the training: 12.888623714447021\n",
      "Iter 1028 / 2000, Loss: 126950012.3125, CrossEntropy: 0.002964872168377042, Accuracy: 0.9990009590792839\n",
      "EVALUATION with last weights -> Loss: 12747797.0, CrossEntropy: 2.0007846355438232, Accuracy: 0.8243670886075949\n",
      "Elapsed time for the training: 12.89930772781372\n",
      "Iter 1029 / 2000, Loss: 129109037.625, CrossEntropy: 0.0038244205061346292, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 12545029.0, CrossEntropy: 1.9967955350875854, Accuracy: 0.8222903481012658\n",
      "Elapsed time for the training: 12.891620397567749\n",
      "Iter 1030 / 2000, Loss: 128121707.78125, CrossEntropy: 0.003426241921260953, Accuracy: 0.9989410166240409\n",
      "EVALUATION with last weights -> Loss: 12742807.0, CrossEntropy: 2.001676559448242, Accuracy: 0.8275316455696202\n",
      "Elapsed time for the training: 12.886685132980347\n",
      "Iter 1031 / 2000, Loss: 128632846.5, CrossEntropy: 0.003627535654231906, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 13268330.0, CrossEntropy: 2.1439874172210693, Accuracy: 0.8153678797468354\n",
      "Elapsed time for the training: 12.907382011413574\n",
      "Iter 1032 / 2000, Loss: 128045154.75, CrossEntropy: 0.003387913340702653, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 12983490.0, CrossEntropy: 2.050119638442993, Accuracy: 0.8212025316455697\n",
      "Elapsed time for the training: 12.910942792892456\n",
      "Iter 1033 / 2000, Loss: 128060659.84375, CrossEntropy: 0.003391068894416094, Accuracy: 0.9988810741687979\n",
      "EVALUATION with last weights -> Loss: 12518531.0, CrossEntropy: 1.984437108039856, Accuracy: 0.8238726265822784\n",
      "Elapsed time for the training: 12.93649411201477\n",
      "Iter 1034 / 2000, Loss: 129940877.625, CrossEntropy: 0.004138229880481958, Accuracy: 0.9988610933503836\n",
      "EVALUATION with last weights -> Loss: 12838313.0, CrossEntropy: 2.0300939083099365, Accuracy: 0.8248615506329114\n",
      "Elapsed time for the training: 12.916620016098022\n",
      "Iter 1035 / 2000, Loss: 125694805.25, CrossEntropy: 0.0024391773622483015, Accuracy: 0.9992806905370843\n",
      "EVALUATION with last weights -> Loss: 13108271.0, CrossEntropy: 2.098926544189453, Accuracy: 0.8200158227848101\n",
      "Elapsed time for the training: 12.900028228759766\n",
      "Iter 1036 / 2000, Loss: 129265991.875, CrossEntropy: 0.0038747016806155443, Accuracy: 0.9987891624040921\n",
      "EVALUATION with last weights -> Loss: 13538304.0, CrossEntropy: 2.136716604232788, Accuracy: 0.8207080696202531\n",
      "Elapsed time for the training: 12.907889127731323\n",
      "Iter 1037 / 2000, Loss: 126884447.59375, CrossEntropy: 0.002906834241002798, Accuracy: 0.9989210358056266\n",
      "EVALUATION with last weights -> Loss: 12033520.0, CrossEntropy: 1.8949378728866577, Accuracy: 0.8239715189873418\n",
      "Elapsed time for the training: 12.901207447052002\n",
      "Iter 1038 / 2000, Loss: 128029572.53125, CrossEntropy: 0.0033622251357883215, Accuracy: 0.9988011508951407\n",
      "EVALUATION with last weights -> Loss: 13040968.0, CrossEntropy: 2.071160316467285, Accuracy: 0.8202136075949367\n",
      "Elapsed time for the training: 13.018820524215698\n",
      "Iter 1039 / 2000, Loss: 129522652.875, CrossEntropy: 0.004026402719318867, Accuracy: 0.9987492007672635\n",
      "EVALUATION with last weights -> Loss: 13042901.0, CrossEntropy: 2.0819191932678223, Accuracy: 0.8243670886075949\n",
      "Elapsed time for the training: 12.988835096359253\n",
      "Iter 1040 / 2000, Loss: 128087139.46875, CrossEntropy: 0.0033897997345775366, Accuracy: 0.9989090473145781\n",
      "EVALUATION with last weights -> Loss: 13341977.0, CrossEntropy: 2.1498026847839355, Accuracy: 0.8167523734177216\n",
      "Elapsed time for the training: 12.933554887771606\n",
      "Iter 1041 / 2000, Loss: 126882576.78125, CrossEntropy: 0.002897583646699786, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 12802870.0, CrossEntropy: 2.006744623184204, Accuracy: 0.8262460443037974\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time for the training: 12.920098304748535\n",
      "Iter 1042 / 2000, Loss: 127455482.8125, CrossEntropy: 0.003119152272120118, Accuracy: 0.9987811700767263\n",
      "EVALUATION with last weights -> Loss: 12790089.0, CrossEntropy: 2.0124783515930176, Accuracy: 0.8216969936708861\n",
      "Elapsed time for the training: 12.937042713165283\n",
      "Iter 1043 / 2000, Loss: 128164130.5625, CrossEntropy: 0.003506699111312628, Accuracy: 0.9988890664961637\n",
      "EVALUATION with last weights -> Loss: 14184711.0, CrossEntropy: 2.261542797088623, Accuracy: 0.8154667721518988\n",
      "Elapsed time for the training: 12.890918016433716\n",
      "Iter 1044 / 2000, Loss: 126524794.9375, CrossEntropy: 0.002751001389697194, Accuracy: 0.9991288363171356\n",
      "EVALUATION with last weights -> Loss: 12843442.0, CrossEntropy: 2.030808448791504, Accuracy: 0.823378164556962\n",
      "Elapsed time for the training: 12.85103702545166\n",
      "Iter 1045 / 2000, Loss: 127847792.3125, CrossEntropy: 0.0032669929787516594, Accuracy: 0.9988810741687979\n",
      "EVALUATION with last weights -> Loss: 13054768.0, CrossEntropy: 2.0418426990509033, Accuracy: 0.8218947784810127\n",
      "Elapsed time for the training: 12.853240013122559\n",
      "Iter 1046 / 2000, Loss: 126941270.0625, CrossEntropy: 0.0029014169704169035, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 12918052.0, CrossEntropy: 2.034661054611206, Accuracy: 0.8192246835443038\n",
      "Elapsed time for the training: 12.879268169403076\n",
      "Iter 1047 / 2000, Loss: 128544709.5625, CrossEntropy: 0.003538487246260047, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 12700335.0, CrossEntropy: 2.0002763271331787, Accuracy: 0.8255537974683544\n",
      "Elapsed time for the training: 13.64944839477539\n",
      "Iter 1048 / 2000, Loss: 124748150.59375, CrossEntropy: 0.002019214676693082, Accuracy: 0.9994205562659847\n",
      "EVALUATION with last weights -> Loss: 14015647.0, CrossEntropy: 2.1994097232818604, Accuracy: 0.819620253164557\n",
      "Elapsed time for the training: 12.88314175605774\n",
      "Iter 1049 / 2000, Loss: 128725183.46875, CrossEntropy: 0.003616519970819354, Accuracy: 0.9988411125319693\n",
      "EVALUATION with last weights -> Loss: 13001516.0, CrossEntropy: 2.0361437797546387, Accuracy: 0.8273338607594937\n",
      "Elapsed time for the training: 12.900622844696045\n",
      "Iter 1050 / 2000, Loss: 125940977.9375, CrossEntropy: 0.002489138161763549, Accuracy: 0.9992007672634271\n",
      "EVALUATION with last weights -> Loss: 12835033.0, CrossEntropy: 2.0389697551727295, Accuracy: 0.8242681962025317\n",
      "Elapsed time for the training: 12.878540992736816\n",
      "Iter 1051 / 2000, Loss: 128730266.375, CrossEntropy: 0.0036045724991708994, Accuracy: 0.9986612851662404\n",
      "EVALUATION with last weights -> Loss: 13544904.0, CrossEntropy: 2.141747236251831, Accuracy: 0.8207080696202531\n",
      "Elapsed time for the training: 12.949301481246948\n",
      "Iter 1052 / 2000, Loss: 125845184.0, CrossEntropy: 0.0024435464292764664, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 13415274.0, CrossEntropy: 2.096736431121826, Accuracy: 0.8230814873417721\n",
      "Elapsed time for the training: 12.908756256103516\n",
      "Iter 1053 / 2000, Loss: 128037244.875, CrossEntropy: 0.0033160122111439705, Accuracy: 0.9987811700767263\n",
      "EVALUATION with last weights -> Loss: 12806564.0, CrossEntropy: 2.0144336223602295, Accuracy: 0.8265427215189873\n",
      "Elapsed time for the training: 12.913851499557495\n",
      "Iter 1054 / 2000, Loss: 127459971.6875, CrossEntropy: 0.0030995956622064114, Accuracy: 0.9989889705882353\n",
      "EVALUATION with last weights -> Loss: 13131671.0, CrossEntropy: 2.121746301651001, Accuracy: 0.8173457278481012\n",
      "Elapsed time for the training: 12.848631381988525\n",
      "Iter 1055 / 2000, Loss: 127280244.78125, CrossEntropy: 0.0030071039218455553, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 13421243.0, CrossEntropy: 2.134618043899536, Accuracy: 0.8200158227848101\n",
      "Elapsed time for the training: 14.562167406082153\n",
      "Iter 1056 / 2000, Loss: 126723178.53125, CrossEntropy: 0.0027814069762825966, Accuracy: 0.9992007672634271\n",
      "EVALUATION with last weights -> Loss: 13015149.0, CrossEntropy: 2.0636889934539795, Accuracy: 0.8217958860759493\n",
      "Elapsed time for the training: 15.640045881271362\n",
      "Iter 1057 / 2000, Loss: 127696058.625, CrossEntropy: 0.0031669370364397764, Accuracy: 0.9990009590792839\n",
      "EVALUATION with last weights -> Loss: 13001119.0, CrossEntropy: 2.0570688247680664, Accuracy: 0.8072587025316456\n",
      "Elapsed time for the training: 15.63120412826538\n",
      "Iter 1058 / 2000, Loss: 128139345.4375, CrossEntropy: 0.0033409399911761284, Accuracy: 0.9990409207161125\n",
      "EVALUATION with last weights -> Loss: 13514988.0, CrossEntropy: 2.204023838043213, Accuracy: 0.811807753164557\n",
      "Elapsed time for the training: 14.130016326904297\n",
      "Iter 1059 / 2000, Loss: 127646079.9375, CrossEntropy: 0.0031427217181771994, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 12708678.0, CrossEntropy: 2.010852098464966, Accuracy: 0.8206091772151899\n",
      "Elapsed time for the training: 12.895624160766602\n",
      "Iter 1060 / 2000, Loss: 129249262.1875, CrossEntropy: 0.0037784960586577654, Accuracy: 0.9990009590792839\n",
      "EVALUATION with last weights -> Loss: 13327328.0, CrossEntropy: 2.122907876968384, Accuracy: 0.8194224683544303\n",
      "Elapsed time for the training: 12.915844678878784\n",
      "Iter 1061 / 2000, Loss: 126873971.46875, CrossEntropy: 0.002826894400641322, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 12758930.0, CrossEntropy: 2.0156326293945312, Accuracy: 0.8240704113924051\n",
      "Elapsed time for the training: 12.902023315429688\n",
      "Iter 1062 / 2000, Loss: 126943067.75, CrossEntropy: 0.0028516913298517466, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 13725993.0, CrossEntropy: 2.1803948879241943, Accuracy: 0.815565664556962\n",
      "Elapsed time for the training: 12.950884103775024\n",
      "Iter 1063 / 2000, Loss: 129287031.375, CrossEntropy: 0.0038358946330845356, Accuracy: 0.9989290281329923\n",
      "EVALUATION with last weights -> Loss: 14028238.0, CrossEntropy: 2.21226167678833, Accuracy: 0.8167523734177216\n",
      "Elapsed time for the training: 12.898852825164795\n",
      "Iter 1064 / 2000, Loss: 127078529.6875, CrossEntropy: 0.0028989585116505623, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 13041953.0, CrossEntropy: 2.051935911178589, Accuracy: 0.8205102848101266\n",
      "Elapsed time for the training: 12.896397590637207\n",
      "Iter 1065 / 2000, Loss: 128249257.53125, CrossEntropy: 0.003364473581314087, Accuracy: 0.9990409207161125\n",
      "EVALUATION with last weights -> Loss: 13411872.0, CrossEntropy: 2.105548143386841, Accuracy: 0.8231803797468354\n",
      "Elapsed time for the training: 12.909459352493286\n",
      "Iter 1066 / 2000, Loss: 127745516.1875, CrossEntropy: 0.003159211715683341, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 12533764.0, CrossEntropy: 1.9890843629837036, Accuracy: 0.8245648734177216\n",
      "Elapsed time for the training: 12.890243530273438\n",
      "Iter 1067 / 2000, Loss: 126966754.78125, CrossEntropy: 0.002845353214070201, Accuracy: 0.9991208439897699\n",
      "EVALUATION with last weights -> Loss: 12882540.0, CrossEntropy: 2.038341999053955, Accuracy: 0.8251582278481012\n",
      "Elapsed time for the training: 12.886582612991333\n",
      "Iter 1068 / 2000, Loss: 129192586.96875, CrossEntropy: 0.0037324614822864532, Accuracy: 0.9987811700767263\n",
      "EVALUATION with last weights -> Loss: 12732419.0, CrossEntropy: 2.0331571102142334, Accuracy: 0.8299050632911392\n",
      "Elapsed time for the training: 12.956665754318237\n",
      "Iter 1069 / 2000, Loss: 130042000.5, CrossEntropy: 0.0041646589525043964, Accuracy: 0.9987971547314578\n",
      "EVALUATION with last weights -> Loss: 14441798.0, CrossEntropy: 2.271667242050171, Accuracy: 0.8193235759493671\n",
      "Elapsed time for the training: 12.979212284088135\n",
      "Iter 1070 / 2000, Loss: 129794010.0, CrossEntropy: 0.0039638797752559185, Accuracy: 0.9988011508951407\n",
      "EVALUATION with last weights -> Loss: 12835214.0, CrossEntropy: 2.0077075958251953, Accuracy: 0.8225870253164557\n",
      "Elapsed time for the training: 12.961987495422363\n",
      "Iter 1071 / 2000, Loss: 126032565.5, CrossEntropy: 0.0024588981177657843, Accuracy: 0.9990409207161125\n",
      "EVALUATION with last weights -> Loss: 12690094.0, CrossEntropy: 2.0050368309020996, Accuracy: 0.8222903481012658\n",
      "Elapsed time for the training: 12.864314556121826\n",
      "Iter 1072 / 2000, Loss: 127310456.96875, CrossEntropy: 0.0029658987186849117, Accuracy: 0.9991208439897699\n",
      "EVALUATION with last weights -> Loss: 12757937.0, CrossEntropy: 2.030787467956543, Accuracy: 0.819620253164557\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time for the training: 12.865435361862183\n",
      "Iter 1073 / 2000, Loss: 126968238.09375, CrossEntropy: 0.002828796161338687, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 13718002.0, CrossEntropy: 2.1872267723083496, Accuracy: 0.8187302215189873\n",
      "Elapsed time for the training: 12.870192527770996\n",
      "Iter 1074 / 2000, Loss: 127858423.5625, CrossEntropy: 0.0031789580825716257, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 13000287.0, CrossEntropy: 2.035297393798828, Accuracy: 0.8251582278481012\n",
      "Elapsed time for the training: 12.90292763710022\n",
      "Iter 1075 / 2000, Loss: 128312775.6875, CrossEntropy: 0.003357539651915431, Accuracy: 0.9989010549872123\n",
      "EVALUATION with last weights -> Loss: 13177427.0, CrossEntropy: 2.0668106079101562, Accuracy: 0.8246637658227848\n",
      "Elapsed time for the training: 12.91918134689331\n",
      "Iter 1076 / 2000, Loss: 124996345.125, CrossEntropy: 0.0020288843661546707, Accuracy: 0.9993006713554987\n",
      "EVALUATION with last weights -> Loss: 12749419.0, CrossEntropy: 2.0126993656158447, Accuracy: 0.8249604430379747\n",
      "Elapsed time for the training: 12.950150728225708\n",
      "Iter 1077 / 2000, Loss: 128904720.53125, CrossEntropy: 0.00358894863165915, Accuracy: 0.9988411125319693\n",
      "EVALUATION with last weights -> Loss: 13228027.0, CrossEntropy: 2.0862090587615967, Accuracy: 0.8206091772151899\n",
      "Elapsed time for the training: 12.913206338882446\n",
      "Iter 1078 / 2000, Loss: 127656331.125, CrossEntropy: 0.003086602780967951, Accuracy: 0.9991208439897699\n",
      "EVALUATION with last weights -> Loss: 13814357.0, CrossEntropy: 2.2040395736694336, Accuracy: 0.8214992088607594\n",
      "Elapsed time for the training: 12.898454904556274\n",
      "Iter 1079 / 2000, Loss: 131622191.65625, CrossEntropy: 0.004667794797569513, Accuracy: 0.9985813618925832\n",
      "EVALUATION with last weights -> Loss: 12714732.0, CrossEntropy: 1.9925837516784668, Accuracy: 0.8218947784810127\n",
      "Elapsed time for the training: 13.195114850997925\n",
      "Iter 1080 / 2000, Loss: 128411516.96875, CrossEntropy: 0.0033813409972935915, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 12963948.0, CrossEntropy: 2.042422294616699, Accuracy: 0.8255537974683544\n",
      "Elapsed time for the training: 13.484599590301514\n",
      "Iter 1081 / 2000, Loss: 126959820.96875, CrossEntropy: 0.002798868343234062, Accuracy: 0.9992207480818415\n",
      "EVALUATION with last weights -> Loss: 12758722.0, CrossEntropy: 2.0161736011505127, Accuracy: 0.8253560126582279\n",
      "Elapsed time for the training: 13.017023086547852\n",
      "Iter 1082 / 2000, Loss: 128401799.78125, CrossEntropy: 0.00337179540656507, Accuracy: 0.9989410166240409\n",
      "EVALUATION with last weights -> Loss: 12830918.0, CrossEntropy: 2.0263383388519287, Accuracy: 0.8218947784810127\n",
      "Elapsed time for the training: 12.916491985321045\n",
      "Iter 1083 / 2000, Loss: 127287986.25, CrossEntropy: 0.002923822496086359, Accuracy: 0.9992007672634271\n",
      "EVALUATION with last weights -> Loss: 13216844.0, CrossEntropy: 2.12640380859375, Accuracy: 0.8252571202531646\n",
      "Elapsed time for the training: 12.83120846748352\n",
      "Iter 1084 / 2000, Loss: 127441396.125, CrossEntropy: 0.0029830310959368944, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 12950030.0, CrossEntropy: 2.0573880672454834, Accuracy: 0.8207080696202531\n",
      "Elapsed time for the training: 12.862068176269531\n",
      "Iter 1085 / 2000, Loss: 127515534.34375, CrossEntropy: 0.00302147981710732, Accuracy: 0.9990489130434783\n",
      "EVALUATION with last weights -> Loss: 13140253.0, CrossEntropy: 2.064802646636963, Accuracy: 0.8239715189873418\n",
      "Elapsed time for the training: 12.880092859268188\n",
      "Iter 1086 / 2000, Loss: 128163824.1875, CrossEntropy: 0.003265484469011426, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 12912007.0, CrossEntropy: 2.031113386154175, Accuracy: 0.828125\n",
      "Elapsed time for the training: 13.418674230575562\n",
      "Iter 1087 / 2000, Loss: 128582077.40625, CrossEntropy: 0.0034358131233602762, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 13186171.0, CrossEntropy: 2.096982479095459, Accuracy: 0.8252571202531646\n",
      "Elapsed time for the training: 13.80916452407837\n",
      "Iter 1088 / 2000, Loss: 125810423.0, CrossEntropy: 0.0023189731873571873, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 12973734.0, CrossEntropy: 2.0326342582702637, Accuracy: 0.8234770569620253\n",
      "Elapsed time for the training: 13.281272649765015\n",
      "Iter 1089 / 2000, Loss: 131824069.25, CrossEntropy: 0.004718865733593702, Accuracy: 0.998641304347826\n",
      "EVALUATION with last weights -> Loss: 12959889.0, CrossEntropy: 2.0301482677459717, Accuracy: 0.8253560126582279\n",
      "Elapsed time for the training: 12.872670650482178\n",
      "Iter 1090 / 2000, Loss: 128963747.03125, CrossEntropy: 0.0035720630548894405, Accuracy: 0.998761189258312\n",
      "EVALUATION with last weights -> Loss: 13420080.0, CrossEntropy: 2.1335487365722656, Accuracy: 0.8215981012658228\n",
      "Elapsed time for the training: 13.046353816986084\n",
      "Iter 1091 / 2000, Loss: 127512825.28125, CrossEntropy: 0.0029885503463447094, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 12800119.0, CrossEntropy: 2.012580394744873, Accuracy: 0.8243670886075949\n",
      "Elapsed time for the training: 12.929444551467896\n",
      "Iter 1092 / 2000, Loss: 127569657.9375, CrossEntropy: 0.0030096976552158594, Accuracy: 0.9990409207161125\n",
      "EVALUATION with last weights -> Loss: 12859695.0, CrossEntropy: 2.0242838859558105, Accuracy: 0.8206091772151899\n",
      "Elapsed time for the training: 12.877165794372559\n",
      "Iter 1093 / 2000, Loss: 127625197.46875, CrossEntropy: 0.003028313862159848, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 12884148.0, CrossEntropy: 2.02262544631958, Accuracy: 0.8235759493670886\n",
      "Elapsed time for the training: 12.926847457885742\n",
      "Iter 1094 / 2000, Loss: 127427277.625, CrossEntropy: 0.0029459840152412653, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 13093679.0, CrossEntropy: 2.0590057373046875, Accuracy: 0.8213014240506329\n",
      "Elapsed time for the training: 12.876168012619019\n",
      "Iter 1095 / 2000, Loss: 128074228.40625, CrossEntropy: 0.003253936767578125, Accuracy: 0.9990888746803069\n",
      "EVALUATION with last weights -> Loss: 13432606.0, CrossEntropy: 2.1108455657958984, Accuracy: 0.8221914556962026\n",
      "Elapsed time for the training: 12.908644199371338\n",
      "Iter 1096 / 2000, Loss: 125585609.03125, CrossEntropy: 0.0022056561429053545, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 12854510.0, CrossEntropy: 2.0313148498535156, Accuracy: 0.827432753164557\n",
      "Elapsed time for the training: 12.910606861114502\n",
      "Iter 1097 / 2000, Loss: 128754963.3125, CrossEntropy: 0.003651910927146673, Accuracy: 0.999028932225064\n",
      "EVALUATION with last weights -> Loss: 13668464.0, CrossEntropy: 2.1890313625335693, Accuracy: 0.819620253164557\n",
      "Elapsed time for the training: 12.894854545593262\n",
      "Iter 1098 / 2000, Loss: 128135048.84375, CrossEntropy: 0.0032350714318454266, Accuracy: 0.9988690856777493\n",
      "EVALUATION with last weights -> Loss: 13294999.0, CrossEntropy: 2.090961456298828, Accuracy: 0.8214003164556962\n",
      "Elapsed time for the training: 12.866943120956421\n",
      "Iter 1099 / 2000, Loss: 126853289.09375, CrossEntropy: 0.0027029220946133137, Accuracy: 0.9992207480818415\n",
      "EVALUATION with last weights -> Loss: 13073450.0, CrossEntropy: 2.0748090744018555, Accuracy: 0.8182357594936709\n",
      "Elapsed time for the training: 12.950009822845459\n",
      "Iter 1100 / 2000, Loss: 130548758.6875, CrossEntropy: 0.00417618453502655, Accuracy: 0.9987412084398977\n",
      "EVALUATION with last weights -> Loss: 12912503.0, CrossEntropy: 2.0587825775146484, Accuracy: 0.826443829113924\n",
      "Elapsed time for the training: 12.926017045974731\n",
      "Iter 1101 / 2000, Loss: 125797024.96875, CrossEntropy: 0.002273558173328638, Accuracy: 0.9992207480818415\n",
      "EVALUATION with last weights -> Loss: 12693896.0, CrossEntropy: 2.007632255554199, Accuracy: 0.8296083860759493\n",
      "Elapsed time for the training: 14.45434021949768\n",
      "Iter 1102 / 2000, Loss: 128622354.375, CrossEntropy: 0.003406112315133214, Accuracy: 0.9988610933503836\n",
      "EVALUATION with last weights -> Loss: 12753369.0, CrossEntropy: 2.005086660385132, Accuracy: 0.8299050632911392\n",
      "Elapsed time for the training: 15.337517976760864\n",
      "Iter 1103 / 2000, Loss: 130326494.09375, CrossEntropy: 0.0041086855344474316, Accuracy: 0.9988890664961637\n",
      "EVALUATION with last weights -> Loss: 12921391.0, CrossEntropy: 2.05033278465271, Accuracy: 0.8225870253164557\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time for the training: 15.61819839477539\n",
      "Iter 1104 / 2000, Loss: 128840052.03125, CrossEntropy: 0.0035426667891442776, Accuracy: 0.9988890664961637\n",
      "EVALUATION with last weights -> Loss: 13302911.0, CrossEntropy: 2.1144914627075195, Accuracy: 0.8212025316455697\n",
      "Elapsed time for the training: 13.416760444641113\n",
      "Iter 1105 / 2000, Loss: 130283753.9375, CrossEntropy: 0.0040536439046263695, Accuracy: 0.9988610933503836\n",
      "EVALUATION with last weights -> Loss: 13831552.0, CrossEntropy: 2.1889820098876953, Accuracy: 0.8199169303797469\n",
      "Elapsed time for the training: 12.91254186630249\n",
      "Iter 1106 / 2000, Loss: 128819651.25, CrossEntropy: 0.0034642170649021864, Accuracy: 0.9989010549872123\n",
      "EVALUATION with last weights -> Loss: 13767334.0, CrossEntropy: 2.1568241119384766, Accuracy: 0.8222903481012658\n",
      "Elapsed time for the training: 12.868512630462646\n",
      "Iter 1107 / 2000, Loss: 127797633.46875, CrossEntropy: 0.0030529089272022247, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 13370864.0, CrossEntropy: 2.1076011657714844, Accuracy: 0.8177412974683544\n",
      "Elapsed time for the training: 12.824146747589111\n",
      "Iter 1108 / 2000, Loss: 128883686.96875, CrossEntropy: 0.0035118593368679285, Accuracy: 0.9989570012787724\n",
      "EVALUATION with last weights -> Loss: 12838155.0, CrossEntropy: 2.0598692893981934, Accuracy: 0.8171479430379747\n",
      "Elapsed time for the training: 12.870623350143433\n",
      "Iter 1109 / 2000, Loss: 125939086.0, CrossEntropy: 0.0023064606357365847, Accuracy: 0.9991807864450127\n",
      "EVALUATION with last weights -> Loss: 12451888.0, CrossEntropy: 1.947980523109436, Accuracy: 0.8299050632911392\n",
      "Elapsed time for the training: 12.873628377914429\n",
      "Iter 1110 / 2000, Loss: 130529380.0625, CrossEntropy: 0.004145652521401644, Accuracy: 0.9986492966751919\n",
      "EVALUATION with last weights -> Loss: 12930269.0, CrossEntropy: 2.041816234588623, Accuracy: 0.8234770569620253\n",
      "Elapsed time for the training: 12.87649917602539\n",
      "Iter 1111 / 2000, Loss: 127885735.71875, CrossEntropy: 0.00307618360966444, Accuracy: 0.9990409207161125\n",
      "EVALUATION with last weights -> Loss: 12881583.0, CrossEntropy: 2.0290284156799316, Accuracy: 0.8308939873417721\n",
      "Elapsed time for the training: 12.236047506332397\n",
      "Iter 1112 / 2000, Loss: 129120760.5, CrossEntropy: 0.0035998248495161533, Accuracy: 0.9991088554987213\n",
      "EVALUATION with last weights -> Loss: 13785798.0, CrossEntropy: 2.158031463623047, Accuracy: 0.8175435126582279\n",
      "Elapsed time for the training: 12.279526948928833\n",
      "Iter 1113 / 2000, Loss: 129594216.0625, CrossEntropy: 0.003752281656488776, Accuracy: 0.9988411125319693\n",
      "EVALUATION with last weights -> Loss: 13671844.0, CrossEntropy: 2.182318925857544, Accuracy: 0.8162579113924051\n",
      "Elapsed time for the training: 15.08512544631958\n",
      "Iter 1114 / 2000, Loss: 127140712.25, CrossEntropy: 0.002791397273540497, Accuracy: 0.9990688938618926\n",
      "EVALUATION with last weights -> Loss: 13484924.0, CrossEntropy: 2.1184957027435303, Accuracy: 0.8224881329113924\n",
      "Elapsed time for the training: 14.33632779121399\n",
      "Iter 1115 / 2000, Loss: 127598788.8125, CrossEntropy: 0.0029499593656510115, Accuracy: 0.9991807864450127\n",
      "EVALUATION with last weights -> Loss: 13402277.0, CrossEntropy: 2.1079225540161133, Accuracy: 0.8201147151898734\n",
      "Elapsed time for the training: 12.897323608398438\n",
      "Iter 1116 / 2000, Loss: 128302388.21875, CrossEntropy: 0.0032273216638714075, Accuracy: 0.9988411125319693\n",
      "EVALUATION with last weights -> Loss: 13149343.0, CrossEntropy: 2.092470169067383, Accuracy: 0.8184335443037974\n",
      "Elapsed time for the training: 12.962743043899536\n",
      "Iter 1117 / 2000, Loss: 127464026.03125, CrossEntropy: 0.002898493781685829, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 13355943.0, CrossEntropy: 2.1256906986236572, Accuracy: 0.822685917721519\n",
      "Elapsed time for the training: 12.905620813369751\n",
      "Iter 1118 / 2000, Loss: 129930368.71875, CrossEntropy: 0.0038730185478925705, Accuracy: 0.9988610933503836\n",
      "EVALUATION with last weights -> Loss: 13106636.0, CrossEntropy: 2.0482873916625977, Accuracy: 0.8263449367088608\n",
      "Elapsed time for the training: 12.884644508361816\n",
      "Iter 1119 / 2000, Loss: 128167012.21875, CrossEntropy: 0.0032575970981270075, Accuracy: 0.9990688938618926\n",
      "EVALUATION with last weights -> Loss: 12622471.0, CrossEntropy: 1.9773714542388916, Accuracy: 0.8269382911392406\n",
      "Elapsed time for the training: 12.873591184616089\n",
      "Iter 1120 / 2000, Loss: 125600902.1875, CrossEntropy: 0.002137695671990514, Accuracy: 0.9992007672634271\n",
      "EVALUATION with last weights -> Loss: 13229846.0, CrossEntropy: 2.0759146213531494, Accuracy: 0.8262460443037974\n",
      "Elapsed time for the training: 12.888378143310547\n",
      "Iter 1121 / 2000, Loss: 131722748.75, CrossEntropy: 0.004580856766551733, Accuracy: 0.9987811700767263\n",
      "EVALUATION with last weights -> Loss: 13121621.0, CrossEntropy: 2.0875587463378906, Accuracy: 0.8203125\n",
      "Elapsed time for the training: 12.883731126785278\n",
      "Iter 1122 / 2000, Loss: 128486037.34375, CrossEntropy: 0.003284590085968375, Accuracy: 0.9991408248081841\n",
      "EVALUATION with last weights -> Loss: 13184008.0, CrossEntropy: 2.0833988189697266, Accuracy: 0.823378164556962\n",
      "Elapsed time for the training: 12.942192792892456\n",
      "Iter 1123 / 2000, Loss: 127111630.9375, CrossEntropy: 0.0027318561915308237, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 12854818.0, CrossEntropy: 2.0663487911224365, Accuracy: 0.8217958860759493\n",
      "Elapsed time for the training: 12.90394115447998\n",
      "Iter 1124 / 2000, Loss: 130491531.875, CrossEntropy: 0.004078688099980354, Accuracy: 0.998701246803069\n",
      "EVALUATION with last weights -> Loss: 13545237.0, CrossEntropy: 2.1182568073272705, Accuracy: 0.8163568037974683\n",
      "Elapsed time for the training: 12.902456283569336\n",
      "Iter 1125 / 2000, Loss: 128191100.625, CrossEntropy: 0.0031568938866257668, Accuracy: 0.9992007672634271\n",
      "EVALUATION with last weights -> Loss: 12696338.0, CrossEntropy: 2.009577751159668, Accuracy: 0.8262460443037974\n",
      "Elapsed time for the training: 12.880303621292114\n",
      "Iter 1126 / 2000, Loss: 127196201.75, CrossEntropy: 0.002758919959887862, Accuracy: 0.9990409207161125\n",
      "EVALUATION with last weights -> Loss: 12801122.0, CrossEntropy: 2.0040249824523926, Accuracy: 0.8252571202531646\n",
      "Elapsed time for the training: 12.883641242980957\n",
      "Iter 1127 / 2000, Loss: 128102861.9375, CrossEntropy: 0.003117069136351347, Accuracy: 0.9989010549872123\n",
      "EVALUATION with last weights -> Loss: 13108061.0, CrossEntropy: 2.0655808448791504, Accuracy: 0.8209058544303798\n",
      "Elapsed time for the training: 12.886428833007812\n",
      "Iter 1128 / 2000, Loss: 129478722.0625, CrossEntropy: 0.0036911028437316418, Accuracy: 0.9987691815856777\n",
      "EVALUATION with last weights -> Loss: 13356135.0, CrossEntropy: 2.148160696029663, Accuracy: 0.8093354430379747\n",
      "Elapsed time for the training: 12.928054809570312\n",
      "Iter 1129 / 2000, Loss: 127922543.34375, CrossEntropy: 0.0030385188292711973, Accuracy: 0.9990409207161125\n",
      "EVALUATION with last weights -> Loss: 13818929.0, CrossEntropy: 2.184556722640991, Accuracy: 0.8189280063291139\n",
      "Elapsed time for the training: 12.864638328552246\n",
      "Iter 1130 / 2000, Loss: 130481331.5, CrossEntropy: 0.0040583861991763115, Accuracy: 0.998641304347826\n",
      "EVALUATION with last weights -> Loss: 13685216.0, CrossEntropy: 2.162221670150757, Accuracy: 0.815565664556962\n",
      "Elapsed time for the training: 12.877695560455322\n",
      "Iter 1131 / 2000, Loss: 126788854.34375, CrossEntropy: 0.0025797197595238686, Accuracy: 0.9991807864450127\n",
      "EVALUATION with last weights -> Loss: 13156209.0, CrossEntropy: 2.0805418491363525, Accuracy: 0.8235759493670886\n",
      "Elapsed time for the training: 12.926425218582153\n",
      "Iter 1132 / 2000, Loss: 127846303.21875, CrossEntropy: 0.0029989902395755053, Accuracy: 0.9990409207161125\n",
      "EVALUATION with last weights -> Loss: 13355663.0, CrossEntropy: 2.1102375984191895, Accuracy: 0.8173457278481012\n",
      "Elapsed time for the training: 12.912362337112427\n",
      "Iter 1133 / 2000, Loss: 125583958.75, CrossEntropy: 0.0020933139603585005, Accuracy: 0.9992607097186701\n",
      "EVALUATION with last weights -> Loss: 13783052.0, CrossEntropy: 2.1590349674224854, Accuracy: 0.8223892405063291\n",
      "Elapsed time for the training: 12.907963037490845\n",
      "Iter 1134 / 2000, Loss: 128311329.90625, CrossEntropy: 0.0031801213044673204, Accuracy: 0.998701246803069\n",
      "EVALUATION with last weights -> Loss: 13035268.0, CrossEntropy: 2.0492591857910156, Accuracy: 0.8283227848101266\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time for the training: 13.06512999534607\n",
      "Iter 1135 / 2000, Loss: 131305080.0, CrossEntropy: 0.004372819792479277, Accuracy: 0.998761189258312\n",
      "EVALUATION with last weights -> Loss: 13443674.0, CrossEntropy: 2.112483263015747, Accuracy: 0.8204113924050633\n",
      "Elapsed time for the training: 12.909257650375366\n",
      "Iter 1136 / 2000, Loss: 126716034.4375, CrossEntropy: 0.0025359014980494976, Accuracy: 0.9992207480818415\n",
      "EVALUATION with last weights -> Loss: 13854357.0, CrossEntropy: 2.180771589279175, Accuracy: 0.818631329113924\n",
      "Elapsed time for the training: 12.921536922454834\n",
      "Iter 1137 / 2000, Loss: 127343121.21875, CrossEntropy: 0.0027834747452288866, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 13398989.0, CrossEntropy: 2.100606679916382, Accuracy: 0.8234770569620253\n",
      "Elapsed time for the training: 12.831360101699829\n",
      "Iter 1138 / 2000, Loss: 127709813.375, CrossEntropy: 0.0029270395170897245, Accuracy: 0.9990009590792839\n",
      "EVALUATION with last weights -> Loss: 13290818.0, CrossEntropy: 2.0819005966186523, Accuracy: 0.8190268987341772\n",
      "Elapsed time for the training: 12.827940225601196\n",
      "Iter 1139 / 2000, Loss: 127659692.03125, CrossEntropy: 0.002905437722802162, Accuracy: 0.9988810741687979\n",
      "EVALUATION with last weights -> Loss: 13380197.0, CrossEntropy: 2.098801851272583, Accuracy: 0.8248615506329114\n",
      "Elapsed time for the training: 12.867570400238037\n",
      "Iter 1140 / 2000, Loss: 125110076.84375, CrossEntropy: 0.0018833837239071727, Accuracy: 0.999380594629156\n",
      "EVALUATION with last weights -> Loss: 13673132.0, CrossEntropy: 2.145643949508667, Accuracy: 0.8229825949367089\n",
      "Elapsed time for the training: 12.886723756790161\n",
      "Iter 1141 / 2000, Loss: 131606848.65625, CrossEntropy: 0.004477588459849358, Accuracy: 0.9987412084398977\n",
      "EVALUATION with last weights -> Loss: 14019333.0, CrossEntropy: 2.2099616527557373, Accuracy: 0.8192246835443038\n",
      "Elapsed time for the training: 12.910201072692871\n",
      "Iter 1142 / 2000, Loss: 127828321.34375, CrossEntropy: 0.002963815815746784, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 13440802.0, CrossEntropy: 2.102069854736328, Accuracy: 0.8178401898734177\n",
      "Elapsed time for the training: 12.965011835098267\n",
      "Iter 1143 / 2000, Loss: 131876551.8125, CrossEntropy: 0.004578756168484688, Accuracy: 0.9985613810741688\n",
      "EVALUATION with last weights -> Loss: 13346170.0, CrossEntropy: 2.098731279373169, Accuracy: 0.8227848101265823\n",
      "Elapsed time for the training: 12.899602174758911\n",
      "Iter 1144 / 2000, Loss: 127909885.25, CrossEntropy: 0.0029938595835119486, Accuracy: 0.9991408248081841\n",
      "EVALUATION with last weights -> Loss: 13015542.0, CrossEntropy: 2.0678226947784424, Accuracy: 0.8137856012658228\n",
      "Elapsed time for the training: 12.918420314788818\n",
      "Iter 1145 / 2000, Loss: 129936207.71875, CrossEntropy: 0.003796457080170512, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 14229237.0, CrossEntropy: 2.262322425842285, Accuracy: 0.8185324367088608\n",
      "Elapsed time for the training: 12.915440559387207\n",
      "Iter 1146 / 2000, Loss: 128274918.84375, CrossEntropy: 0.0031289393082261086, Accuracy: 0.9989410166240409\n",
      "EVALUATION with last weights -> Loss: 12958109.0, CrossEntropy: 2.031445026397705, Accuracy: 0.825059335443038\n",
      "Elapsed time for the training: 12.95112919807434\n",
      "Iter 1147 / 2000, Loss: 128380389.5, CrossEntropy: 0.003167961025610566, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 13090257.0, CrossEntropy: 2.0871310234069824, Accuracy: 0.8187302215189873\n",
      "Elapsed time for the training: 12.938509941101074\n",
      "Iter 1148 / 2000, Loss: 127143161.21875, CrossEntropy: 0.0026706389617174864, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 13054589.0, CrossEntropy: 2.054387092590332, Accuracy: 0.8225870253164557\n",
      "Elapsed time for the training: 12.901147365570068\n",
      "Iter 1149 / 2000, Loss: 128012327.90625, CrossEntropy: 0.003018139861524105, Accuracy: 0.9988810741687979\n",
      "EVALUATION with last weights -> Loss: 13458936.0, CrossEntropy: 2.1198737621307373, Accuracy: 0.821004746835443\n",
      "Elapsed time for the training: 12.902286767959595\n",
      "Iter 1150 / 2000, Loss: 129847150.8125, CrossEntropy: 0.003744734451174736, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 12860446.0, CrossEntropy: 2.017174243927002, Accuracy: 0.8203125\n",
      "Elapsed time for the training: 15.3983473777771\n",
      "Iter 1151 / 2000, Loss: 128471079.5, CrossEntropy: 0.0031921621412038803, Accuracy: 0.9991208439897699\n",
      "EVALUATION with last weights -> Loss: 12977682.0, CrossEntropy: 2.040310859680176, Accuracy: 0.8230814873417721\n",
      "Elapsed time for the training: 15.579864263534546\n",
      "Iter 1152 / 2000, Loss: 128359229.03125, CrossEntropy: 0.003145262598991394, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 12960601.0, CrossEntropy: 2.0659899711608887, Accuracy: 0.8242681962025317\n",
      "Elapsed time for the training: 15.330066680908203\n",
      "Iter 1153 / 2000, Loss: 128197605.3125, CrossEntropy: 0.0030967537313699722, Accuracy: 0.9990688938618926\n",
      "EVALUATION with last weights -> Loss: 13177945.0, CrossEntropy: 2.1115243434906006, Accuracy: 0.8197191455696202\n",
      "Elapsed time for the training: 12.887462139129639\n",
      "Iter 1154 / 2000, Loss: 128615694.34375, CrossEntropy: 0.0032409068662673235, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 13018687.0, CrossEntropy: 2.043888568878174, Accuracy: 0.8230814873417721\n",
      "Elapsed time for the training: 12.891507863998413\n",
      "Iter 1155 / 2000, Loss: 127298007.25, CrossEntropy: 0.002712229499593377, Accuracy: 0.9991408248081841\n",
      "EVALUATION with last weights -> Loss: 13171526.0, CrossEntropy: 2.0581953525543213, Accuracy: 0.8265427215189873\n",
      "Elapsed time for the training: 12.889127016067505\n",
      "Iter 1156 / 2000, Loss: 129491311.9375, CrossEntropy: 0.003586361650377512, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 13377598.0, CrossEntropy: 2.1261229515075684, Accuracy: 0.8224881329113924\n",
      "Elapsed time for the training: 12.890967845916748\n",
      "Iter 1157 / 2000, Loss: 128625670.75, CrossEntropy: 0.0032386030070483685, Accuracy: 0.9987811700767263\n",
      "EVALUATION with last weights -> Loss: 13013318.0, CrossEntropy: 2.07515287399292, Accuracy: 0.8206091772151899\n",
      "Elapsed time for the training: 12.893031120300293\n",
      "Iter 1158 / 2000, Loss: 127462364.3125, CrossEntropy: 0.002788123209029436, Accuracy: 0.998968989769821\n",
      "EVALUATION with last weights -> Loss: 13309994.0, CrossEntropy: 2.1532509326934814, Accuracy: 0.8268393987341772\n",
      "Elapsed time for the training: 12.94775652885437\n",
      "Iter 1159 / 2000, Loss: 128588865.78125, CrossEntropy: 0.0032167101744562387, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 13768278.0, CrossEntropy: 2.156771421432495, Accuracy: 0.821004746835443\n",
      "Elapsed time for the training: 12.069123268127441\n",
      "Iter 1160 / 2000, Loss: 128576265.75, CrossEntropy: 0.003208418609574437, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 13621141.0, CrossEntropy: 2.15575909614563, Accuracy: 0.8153678797468354\n",
      "Elapsed time for the training: 11.80771803855896\n",
      "Iter 1161 / 2000, Loss: 128865770.3125, CrossEntropy: 0.003324476070702076, Accuracy: 0.9989410166240409\n",
      "EVALUATION with last weights -> Loss: 14145894.0, CrossEntropy: 2.262727737426758, Accuracy: 0.814873417721519\n",
      "Elapsed time for the training: 12.223883152008057\n",
      "Iter 1162 / 2000, Loss: 129920942.9375, CrossEntropy: 0.00373910553753376, Accuracy: 0.9989010549872123\n",
      "EVALUATION with last weights -> Loss: 13135743.0, CrossEntropy: 2.09004807472229, Accuracy: 0.8265427215189873\n",
      "Elapsed time for the training: 12.921048641204834\n",
      "Iter 1163 / 2000, Loss: 128480907.40625, CrossEntropy: 0.003163526766002178, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 13184587.0, CrossEntropy: 2.0801680088043213, Accuracy: 0.8203125\n",
      "Elapsed time for the training: 12.898693799972534\n",
      "Iter 1164 / 2000, Loss: 128144912.3125, CrossEntropy: 0.00305239693261683, Accuracy: 0.9990089514066497\n",
      "EVALUATION with last weights -> Loss: 14607991.0, CrossEntropy: 2.339195728302002, Accuracy: 0.8214992088607594\n",
      "Elapsed time for the training: 13.629864692687988\n",
      "Iter 1165 / 2000, Loss: 128907133.71875, CrossEntropy: 0.0033307322300970554, Accuracy: 0.9988810741687979\n",
      "EVALUATION with last weights -> Loss: 12859081.0, CrossEntropy: 2.0156185626983643, Accuracy: 0.8216969936708861\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time for the training: 13.251713514328003\n",
      "Iter 1166 / 2000, Loss: 126737012.25, CrossEntropy: 0.0024562955368310213, Accuracy: 0.9992207480818415\n",
      "EVALUATION with last weights -> Loss: 13393724.0, CrossEntropy: 2.126993179321289, Accuracy: 0.8227848101265823\n",
      "Elapsed time for the training: 12.867351055145264\n",
      "Iter 1167 / 2000, Loss: 127538364.65625, CrossEntropy: 0.0027747233398258686, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 13580261.0, CrossEntropy: 2.1782000064849854, Accuracy: 0.8227848101265823\n",
      "Elapsed time for the training: 12.897669315338135\n",
      "Iter 1168 / 2000, Loss: 126927855.625, CrossEntropy: 0.0025325450114905834, Accuracy: 0.9992806905370843\n",
      "EVALUATION with last weights -> Loss: 12650940.0, CrossEntropy: 1.9769073724746704, Accuracy: 0.8300039556962026\n",
      "Elapsed time for the training: 12.902921915054321\n",
      "Iter 1169 / 2000, Loss: 128424869.84375, CrossEntropy: 0.0031260019168257713, Accuracy: 0.9989010549872123\n",
      "EVALUATION with last weights -> Loss: 12875589.0, CrossEntropy: 2.0392439365386963, Accuracy: 0.822685917721519\n",
      "Elapsed time for the training: 13.593541145324707\n",
      "Iter 1170 / 2000, Loss: 126681692.59375, CrossEntropy: 0.002425471553578973, Accuracy: 0.9991608056265985\n",
      "EVALUATION with last weights -> Loss: 13671133.0, CrossEntropy: 2.1691341400146484, Accuracy: 0.8136867088607594\n",
      "Elapsed time for the training: 15.643575191497803\n",
      "Iter 1171 / 2000, Loss: 128204044.90625, CrossEntropy: 0.0030317038763314486, Accuracy: 0.9990409207161125\n",
      "EVALUATION with last weights -> Loss: 13271408.0, CrossEntropy: 2.0963666439056396, Accuracy: 0.8222903481012658\n",
      "Elapsed time for the training: 15.603441953659058\n",
      "Iter 1172 / 2000, Loss: 130665034.46875, CrossEntropy: 0.004012870602309704, Accuracy: 0.9987412084398977\n",
      "EVALUATION with last weights -> Loss: 13185794.0, CrossEntropy: 2.079226493835449, Accuracy: 0.8190268987341772\n",
      "Elapsed time for the training: 15.602882385253906\n",
      "Iter 1173 / 2000, Loss: 127622558.0625, CrossEntropy: 0.0027954913675785065, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 13083658.0, CrossEntropy: 2.0693485736846924, Accuracy: 0.8187302215189873\n",
      "Elapsed time for the training: 15.608779907226562\n",
      "Iter 1174 / 2000, Loss: 128743330.15625, CrossEntropy: 0.003238796489313245, Accuracy: 0.9990409207161125\n",
      "EVALUATION with last weights -> Loss: 14407645.0, CrossEntropy: 2.262852907180786, Accuracy: 0.8162579113924051\n",
      "Elapsed time for the training: 15.631867408752441\n",
      "Iter 1175 / 2000, Loss: 128064039.625, CrossEntropy: 0.0029638679698109627, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 13189523.0, CrossEntropy: 2.089197874069214, Accuracy: 0.8223892405063291\n",
      "Elapsed time for the training: 14.287172317504883\n",
      "Iter 1176 / 2000, Loss: 129052972.6875, CrossEntropy: 0.003357344539836049, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 13568906.0, CrossEntropy: 2.1566920280456543, Accuracy: 0.8207080696202531\n",
      "Elapsed time for the training: 13.758997917175293\n",
      "Iter 1177 / 2000, Loss: 127180282.40625, CrossEntropy: 0.002606041496619582, Accuracy: 0.9991608056265985\n",
      "EVALUATION with last weights -> Loss: 13507149.0, CrossEntropy: 2.1104958057403564, Accuracy: 0.8200158227848101\n",
      "Elapsed time for the training: 13.759654521942139\n",
      "Iter 1178 / 2000, Loss: 128978124.3125, CrossEntropy: 0.003339440794661641, Accuracy: 0.999028932225064\n",
      "EVALUATION with last weights -> Loss: 13141755.0, CrossEntropy: 2.0704188346862793, Accuracy: 0.8215981012658228\n",
      "Elapsed time for the training: 13.729042291641235\n",
      "Iter 1179 / 2000, Loss: 129416465.46875, CrossEntropy: 0.003493306925520301, Accuracy: 0.9988011508951407\n",
      "EVALUATION with last weights -> Loss: 13575922.0, CrossEntropy: 2.19242525100708, Accuracy: 0.8193235759493671\n",
      "Elapsed time for the training: 13.709908962249756\n",
      "Iter 1180 / 2000, Loss: 129206070.59375, CrossEntropy: 0.003406860865652561, Accuracy: 0.9988610933503836\n",
      "EVALUATION with last weights -> Loss: 13730508.0, CrossEntropy: 2.1708478927612305, Accuracy: 0.8214992088607594\n",
      "Elapsed time for the training: 13.650278329849243\n",
      "Iter 1181 / 2000, Loss: 128667669.25, CrossEntropy: 0.0031889816746115685, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 13212736.0, CrossEntropy: 2.068958282470703, Accuracy: 0.8244659810126582\n",
      "Elapsed time for the training: 13.175718545913696\n",
      "Iter 1182 / 2000, Loss: 129418536.25, CrossEntropy: 0.003484810469672084, Accuracy: 0.9989210358056266\n",
      "EVALUATION with last weights -> Loss: 13634497.0, CrossEntropy: 2.136021614074707, Accuracy: 0.8277294303797469\n",
      "Elapsed time for the training: 12.924926519393921\n",
      "Iter 1183 / 2000, Loss: 128357890.3125, CrossEntropy: 0.0030722147785127163, Accuracy: 0.9989490089514067\n",
      "EVALUATION with last weights -> Loss: 14317455.0, CrossEntropy: 2.2537543773651123, Accuracy: 0.8168512658227848\n",
      "Elapsed time for the training: 12.845438003540039\n",
      "Iter 1184 / 2000, Loss: 128636881.25, CrossEntropy: 0.0031665449496358633, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 13180985.0, CrossEntropy: 2.083329916000366, Accuracy: 0.818631329113924\n",
      "Elapsed time for the training: 12.80238652229309\n",
      "Iter 1185 / 2000, Loss: 130236389.28125, CrossEntropy: 0.0038022580556571484, Accuracy: 0.9987811700767263\n",
      "EVALUATION with last weights -> Loss: 13252555.0, CrossEntropy: 2.083889961242676, Accuracy: 0.8261471518987342\n",
      "Elapsed time for the training: 13.053187608718872\n",
      "Iter 1186 / 2000, Loss: 127820318.0, CrossEntropy: 0.0028525807429105043, Accuracy: 0.9989290281329923\n",
      "EVALUATION with last weights -> Loss: 13467925.0, CrossEntropy: 2.1843972206115723, Accuracy: 0.8185324367088608\n",
      "Elapsed time for the training: 12.92892861366272\n",
      "Iter 1187 / 2000, Loss: 127291998.46875, CrossEntropy: 0.0026186727918684483, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 13318182.0, CrossEntropy: 2.08978009223938, Accuracy: 0.8241693037974683\n",
      "Elapsed time for the training: 12.924473762512207\n",
      "Iter 1188 / 2000, Loss: 128594182.9375, CrossEntropy: 0.0031376041006296873, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 13088426.0, CrossEntropy: 2.064517021179199, Accuracy: 0.8243670886075949\n",
      "Elapsed time for the training: 13.491873264312744\n",
      "Iter 1189 / 2000, Loss: 130276522.6875, CrossEntropy: 0.0038056806661188602, Accuracy: 0.998761189258312\n",
      "EVALUATION with last weights -> Loss: 13187955.0, CrossEntropy: 2.0929439067840576, Accuracy: 0.8266416139240507\n",
      "Elapsed time for the training: 13.756863832473755\n",
      "Iter 1190 / 2000, Loss: 128689151.5625, CrossEntropy: 0.003262212499976158, Accuracy: 0.9990089514066497\n",
      "EVALUATION with last weights -> Loss: 13785623.0, CrossEntropy: 2.182189702987671, Accuracy: 0.8239715189873418\n",
      "Elapsed time for the training: 13.698084592819214\n",
      "Iter 1191 / 2000, Loss: 129945849.6875, CrossEntropy: 0.0036673303693532944, Accuracy: 0.9988810741687979\n",
      "EVALUATION with last weights -> Loss: 13796129.0, CrossEntropy: 2.1988863945007324, Accuracy: 0.8201147151898734\n",
      "Elapsed time for the training: 13.701454639434814\n",
      "Iter 1192 / 2000, Loss: 125523688.78125, CrossEntropy: 0.0018994533456861973, Accuracy: 0.9994005754475703\n",
      "EVALUATION with last weights -> Loss: 13753187.0, CrossEntropy: 2.170193910598755, Accuracy: 0.813192246835443\n",
      "Elapsed time for the training: 13.399469137191772\n",
      "Iter 1193 / 2000, Loss: 129470706.84375, CrossEntropy: 0.0034733230713754892, Accuracy: 0.9987811700767263\n",
      "EVALUATION with last weights -> Loss: 13129682.0, CrossEntropy: 2.080596923828125, Accuracy: 0.8220925632911392\n",
      "Elapsed time for the training: 12.916061639785767\n",
      "Iter 1194 / 2000, Loss: 128179444.375, CrossEntropy: 0.002954573603346944, Accuracy: 0.9988810741687979\n",
      "EVALUATION with last weights -> Loss: 13124890.0, CrossEntropy: 2.0781688690185547, Accuracy: 0.8206091772151899\n",
      "Elapsed time for the training: 12.885573625564575\n",
      "Iter 1195 / 2000, Loss: 126242909.84375, CrossEntropy: 0.0021786426659673452, Accuracy: 0.9993206521739131\n",
      "EVALUATION with last weights -> Loss: 13837342.0, CrossEntropy: 2.204916000366211, Accuracy: 0.8152689873417721\n",
      "Elapsed time for the training: 12.858592748641968\n",
      "Iter 1196 / 2000, Loss: 129866430.625, CrossEntropy: 0.00362594542093575, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 13145544.0, CrossEntropy: 2.1066951751708984, Accuracy: 0.8215981012658228\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time for the training: 12.90147352218628\n",
      "Iter 1197 / 2000, Loss: 125405016.4375, CrossEntropy: 0.0018379975808784366, Accuracy: 0.9993206521739131\n",
      "EVALUATION with last weights -> Loss: 13248509.0, CrossEntropy: 2.1039905548095703, Accuracy: 0.8224881329113924\n",
      "Elapsed time for the training: 12.882220268249512\n",
      "Iter 1198 / 2000, Loss: 127361599.15625, CrossEntropy: 0.0026175514794886112, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 13070735.0, CrossEntropy: 2.0579495429992676, Accuracy: 0.8280261075949367\n",
      "Elapsed time for the training: 12.911823749542236\n",
      "Iter 1199 / 2000, Loss: 128556932.65625, CrossEntropy: 0.00309300166554749, Accuracy: 0.9989410166240409\n",
      "EVALUATION with last weights -> Loss: 12922086.0, CrossEntropy: 2.024414539337158, Accuracy: 0.8297072784810127\n",
      "Elapsed time for the training: 12.965926885604858\n",
      "Iter 1200 / 2000, Loss: 128288831.625, CrossEntropy: 0.0029843009542673826, Accuracy: 0.9991608056265985\n",
      "EVALUATION with last weights -> Loss: 13596992.0, CrossEntropy: 2.159393072128296, Accuracy: 0.8252571202531646\n",
      "Elapsed time for the training: 12.871512174606323\n",
      "Iter 1201 / 2000, Loss: 127528371.75, CrossEntropy: 0.0026880414225161076, Accuracy: 0.9991488171355499\n",
      "EVALUATION with last weights -> Loss: 13506107.0, CrossEntropy: 2.113619804382324, Accuracy: 0.8245648734177216\n",
      "Elapsed time for the training: 12.938365697860718\n",
      "Iter 1202 / 2000, Loss: 127532257.84375, CrossEntropy: 0.0026775796432048082, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 13053406.0, CrossEntropy: 2.056002378463745, Accuracy: 0.8258504746835443\n",
      "Elapsed time for the training: 13.629843711853027\n",
      "Iter 1203 / 2000, Loss: 126229102.84375, CrossEntropy: 0.002153602195903659, Accuracy: 0.9992007672634271\n",
      "EVALUATION with last weights -> Loss: 13325540.0, CrossEntropy: 2.106525182723999, Accuracy: 0.8303995253164557\n",
      "Elapsed time for the training: 13.146855115890503\n",
      "Iter 1204 / 2000, Loss: 128168293.0, CrossEntropy: 0.002925892360508442, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 13102552.0, CrossEntropy: 2.0804848670959473, Accuracy: 0.8307950949367089\n",
      "Elapsed time for the training: 12.847187995910645\n",
      "Iter 1205 / 2000, Loss: 127810662.28125, CrossEntropy: 0.0027810807805508375, Accuracy: 0.9991208439897699\n",
      "EVALUATION with last weights -> Loss: 13031358.0, CrossEntropy: 2.0362210273742676, Accuracy: 0.8292128164556962\n",
      "Elapsed time for the training: 12.909735679626465\n",
      "Iter 1206 / 2000, Loss: 127284399.0625, CrossEntropy: 0.0025684607680886984, Accuracy: 0.9991208439897699\n",
      "EVALUATION with last weights -> Loss: 13287195.0, CrossEntropy: 2.109595537185669, Accuracy: 0.8248615506329114\n",
      "Elapsed time for the training: 12.872414827346802\n",
      "Iter 1207 / 2000, Loss: 129058293.09375, CrossEntropy: 0.0032736281864345074, Accuracy: 0.9989410166240409\n",
      "EVALUATION with last weights -> Loss: 13113607.0, CrossEntropy: 2.055736780166626, Accuracy: 0.8285205696202531\n",
      "Elapsed time for the training: 12.900839567184448\n",
      "Iter 1208 / 2000, Loss: 127130034.03125, CrossEntropy: 0.0025004532653838396, Accuracy: 0.9991807864450127\n",
      "EVALUATION with last weights -> Loss: 13073493.0, CrossEntropy: 2.0805294513702393, Accuracy: 0.8248615506329114\n",
      "Elapsed time for the training: 12.912936449050903\n",
      "Iter 1209 / 2000, Loss: 128810351.1875, CrossEntropy: 0.0031694502104073763, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 14157542.0, CrossEntropy: 2.2237389087677, Accuracy: 0.8187302215189873\n",
      "Elapsed time for the training: 12.870453834533691\n",
      "Iter 1210 / 2000, Loss: 130319529.96875, CrossEntropy: 0.0037714962381869555, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 13468936.0, CrossEntropy: 2.1218392848968506, Accuracy: 0.8238726265822784\n",
      "Elapsed time for the training: 12.880693674087524\n",
      "Iter 1211 / 2000, Loss: 126295860.9375, CrossEntropy: 0.0021965757478028536, Accuracy: 0.9992487212276215\n",
      "EVALUATION with last weights -> Loss: 14466951.0, CrossEntropy: 2.2605063915252686, Accuracy: 0.8153678797468354\n",
      "Elapsed time for the training: 12.904281377792358\n",
      "Iter 1212 / 2000, Loss: 130211665.75, CrossEntropy: 0.0037205705884844065, Accuracy: 0.9988011508951407\n",
      "EVALUATION with last weights -> Loss: 13395619.0, CrossEntropy: 2.1229677200317383, Accuracy: 0.8215981012658228\n",
      "Elapsed time for the training: 12.845784425735474\n",
      "Iter 1213 / 2000, Loss: 128217472.28125, CrossEntropy: 0.0029211929067969322, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 13164717.0, CrossEntropy: 2.0828452110290527, Accuracy: 0.8291139240506329\n",
      "Elapsed time for the training: 12.914093017578125\n",
      "Iter 1214 / 2000, Loss: 127909268.75, CrossEntropy: 0.002796903485432267, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 13654219.0, CrossEntropy: 2.1975760459899902, Accuracy: 0.8213014240506329\n",
      "Elapsed time for the training: 12.888947486877441\n",
      "Iter 1215 / 2000, Loss: 129980784.875, CrossEntropy: 0.003621756797656417, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 13208283.0, CrossEntropy: 2.0953590869903564, Accuracy: 0.8248615506329114\n",
      "Elapsed time for the training: 12.85300350189209\n",
      "Iter 1216 / 2000, Loss: 128702415.125, CrossEntropy: 0.0031074199359863997, Accuracy: 0.9990409207161125\n",
      "EVALUATION with last weights -> Loss: 13295992.0, CrossEntropy: 2.100788116455078, Accuracy: 0.8253560126582279\n",
      "Elapsed time for the training: 12.875381469726562\n",
      "Iter 1217 / 2000, Loss: 128937885.1875, CrossEntropy: 0.0032039321959018707, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 13625519.0, CrossEntropy: 2.171069622039795, Accuracy: 0.8197191455696202\n",
      "Elapsed time for the training: 12.886472463607788\n",
      "Iter 1218 / 2000, Loss: 131178329.25, CrossEntropy: 0.004091149196028709, Accuracy: 0.9986213235294118\n",
      "EVALUATION with last weights -> Loss: 13252153.0, CrossEntropy: 2.1359989643096924, Accuracy: 0.8243670886075949\n",
      "Elapsed time for the training: 15.256040334701538\n",
      "Iter 1219 / 2000, Loss: 128509052.53125, CrossEntropy: 0.0030205557122826576, Accuracy: 0.9991807864450127\n",
      "EVALUATION with last weights -> Loss: 13251092.0, CrossEntropy: 2.093292713165283, Accuracy: 0.8204113924050633\n",
      "Elapsed time for the training: 13.189834594726562\n",
      "Iter 1220 / 2000, Loss: 129942712.28125, CrossEntropy: 0.0035901260562241077, Accuracy: 0.9990009590792839\n",
      "EVALUATION with last weights -> Loss: 13140254.0, CrossEntropy: 2.065910816192627, Accuracy: 0.8259493670886076\n",
      "Elapsed time for the training: 12.890233278274536\n",
      "Iter 1221 / 2000, Loss: 129943183.28125, CrossEntropy: 0.00358757795765996, Accuracy: 0.9989210358056266\n",
      "EVALUATION with last weights -> Loss: 13176597.0, CrossEntropy: 2.089167833328247, Accuracy: 0.8255537974683544\n",
      "Elapsed time for the training: 12.882155418395996\n",
      "Iter 1222 / 2000, Loss: 130743949.1875, CrossEntropy: 0.0039037391543388367, Accuracy: 0.9991608056265985\n",
      "EVALUATION with last weights -> Loss: 13213935.0, CrossEntropy: 2.0840516090393066, Accuracy: 0.8220925632911392\n",
      "Elapsed time for the training: 12.908545732498169\n",
      "Iter 1223 / 2000, Loss: 127071428.59375, CrossEntropy: 0.002434890251606703, Accuracy: 0.9990409207161125\n",
      "EVALUATION with last weights -> Loss: 13898910.0, CrossEntropy: 2.172384023666382, Accuracy: 0.8212025316455697\n",
      "Elapsed time for the training: 12.934694766998291\n",
      "Iter 1224 / 2000, Loss: 129008530.1875, CrossEntropy: 0.0032060865778476, Accuracy: 0.9990409207161125\n",
      "EVALUATION with last weights -> Loss: 13247767.0, CrossEntropy: 2.0871100425720215, Accuracy: 0.8272349683544303\n",
      "Elapsed time for the training: 12.886759996414185\n",
      "Iter 1225 / 2000, Loss: 129495738.78125, CrossEntropy: 0.0033975797705352306, Accuracy: 0.998821131713555\n",
      "EVALUATION with last weights -> Loss: 14067599.0, CrossEntropy: 2.2080347537994385, Accuracy: 0.8197191455696202\n",
      "Elapsed time for the training: 12.897042512893677\n",
      "Iter 1226 / 2000, Loss: 126800132.53125, CrossEntropy: 0.0023251494858413935, Accuracy: 0.9993006713554987\n",
      "EVALUATION with last weights -> Loss: 13768338.0, CrossEntropy: 2.2004199028015137, Accuracy: 0.8165545886075949\n",
      "Elapsed time for the training: 12.888514041900635\n",
      "Iter 1227 / 2000, Loss: 128219101.375, CrossEntropy: 0.002898895414546132, Accuracy: 0.9991887787723785\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with last weights -> Loss: 13811366.0, CrossEntropy: 2.196024179458618, Accuracy: 0.8220925632911392\n",
      "Elapsed time for the training: 13.66434645652771\n",
      "Iter 1228 / 2000, Loss: 130011390.8125, CrossEntropy: 0.003597681177780032, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 13195874.0, CrossEntropy: 2.0690579414367676, Accuracy: 0.8237737341772152\n",
      "Elapsed time for the training: 15.609110832214355\n",
      "Iter 1229 / 2000, Loss: 127305283.28125, CrossEntropy: 0.0025142300873994827, Accuracy: 0.9992607097186701\n",
      "EVALUATION with last weights -> Loss: 13469516.0, CrossEntropy: 2.140155792236328, Accuracy: 0.8214003164556962\n",
      "Elapsed time for the training: 14.82060980796814\n",
      "Iter 1230 / 2000, Loss: 126682364.875, CrossEntropy: 0.0022651359904557467, Accuracy: 0.9991208439897699\n",
      "EVALUATION with last weights -> Loss: 13459578.0, CrossEntropy: 2.1069743633270264, Accuracy: 0.822685917721519\n",
      "Elapsed time for the training: 12.932419061660767\n",
      "Iter 1231 / 2000, Loss: 129636006.875, CrossEntropy: 0.0034399237483739853, Accuracy: 0.9989410166240409\n",
      "EVALUATION with last weights -> Loss: 13284211.0, CrossEntropy: 2.0872387886047363, Accuracy: 0.8228837025316456\n",
      "Elapsed time for the training: 12.867710590362549\n",
      "Iter 1232 / 2000, Loss: 129606192.625, CrossEntropy: 0.0034245941787958145, Accuracy: 0.9990009590792839\n",
      "EVALUATION with last weights -> Loss: 13640928.0, CrossEntropy: 2.1641788482666016, Accuracy: 0.8220925632911392\n",
      "Elapsed time for the training: 12.855112791061401\n",
      "Iter 1233 / 2000, Loss: 128246324.78125, CrossEntropy: 0.002878650091588497, Accuracy: 0.9990409207161125\n",
      "EVALUATION with last weights -> Loss: 13716600.0, CrossEntropy: 2.1555569171905518, Accuracy: 0.8200158227848101\n",
      "Elapsed time for the training: 12.91096806526184\n",
      "Iter 1234 / 2000, Loss: 125679115.90625, CrossEntropy: 0.0018507825443521142, Accuracy: 0.9993606138107417\n",
      "EVALUATION with last weights -> Loss: 13911948.0, CrossEntropy: 2.175278425216675, Accuracy: 0.8175435126582279\n",
      "Elapsed time for the training: 13.089296102523804\n",
      "Iter 1235 / 2000, Loss: 131065288.15625, CrossEntropy: 0.003999775741249323, Accuracy: 0.9988610933503836\n",
      "EVALUATION with last weights -> Loss: 13335339.0, CrossEntropy: 2.121213912963867, Accuracy: 0.8212025316455697\n",
      "Elapsed time for the training: 12.896389484405518\n",
      "Iter 1236 / 2000, Loss: 130056589.78125, CrossEntropy: 0.003593006869778037, Accuracy: 0.9989010549872123\n",
      "EVALUATION with last weights -> Loss: 13241102.0, CrossEntropy: 2.0836002826690674, Accuracy: 0.8231803797468354\n",
      "Elapsed time for the training: 12.922475099563599\n",
      "Iter 1237 / 2000, Loss: 129603302.625, CrossEntropy: 0.003409414552152157, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 12868663.0, CrossEntropy: 2.0208005905151367, Accuracy: 0.8228837025316456\n",
      "Elapsed time for the training: 14.178813219070435\n",
      "Iter 1238 / 2000, Loss: 132766963.9375, CrossEntropy: 0.004669915419071913, Accuracy: 0.9985613810741688\n",
      "EVALUATION with last weights -> Loss: 13249400.0, CrossEntropy: 2.070349931716919, Accuracy: 0.8270371835443038\n",
      "Elapsed time for the training: 15.561492204666138\n",
      "Iter 1239 / 2000, Loss: 126099782.25, CrossEntropy: 0.0020364851225167513, Accuracy: 0.9993486253196932\n",
      "EVALUATION with last weights -> Loss: 14853325.0, CrossEntropy: 2.3828163146972656, Accuracy: 0.8114121835443038\n",
      "Elapsed time for the training: 15.670888662338257\n",
      "Iter 1240 / 2000, Loss: 129304903.0, CrossEntropy: 0.0032816699240356684, Accuracy: 0.998821131713555\n",
      "EVALUATION with last weights -> Loss: 13241309.0, CrossEntropy: 2.071293354034424, Accuracy: 0.8282238924050633\n",
      "Elapsed time for the training: 13.916616678237915\n",
      "Iter 1241 / 2000, Loss: 129707143.15625, CrossEntropy: 0.0034387721680104733, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 13490943.0, CrossEntropy: 2.1441986560821533, Accuracy: 0.8244659810126582\n",
      "Elapsed time for the training: 12.905827760696411\n",
      "Iter 1242 / 2000, Loss: 129867718.15625, CrossEntropy: 0.0036507253535091877, Accuracy: 0.9989090473145781\n",
      "EVALUATION with last weights -> Loss: 13632021.0, CrossEntropy: 2.145507335662842, Accuracy: 0.8112143987341772\n",
      "Elapsed time for the training: 12.908517837524414\n",
      "Iter 1243 / 2000, Loss: 129641422.59375, CrossEntropy: 0.003407771699130535, Accuracy: 0.9989210358056266\n",
      "EVALUATION with last weights -> Loss: 13511976.0, CrossEntropy: 2.1624858379364014, Accuracy: 0.8185324367088608\n",
      "Elapsed time for the training: 12.878661155700684\n",
      "Iter 1244 / 2000, Loss: 127541567.84375, CrossEntropy: 0.002563853980973363, Accuracy: 0.9991208439897699\n",
      "EVALUATION with last weights -> Loss: 13164960.0, CrossEntropy: 2.0707883834838867, Accuracy: 0.8224881329113924\n",
      "Elapsed time for the training: 12.810231685638428\n",
      "Iter 1245 / 2000, Loss: 129413835.9375, CrossEntropy: 0.003310117870569229, Accuracy: 0.9988810741687979\n",
      "EVALUATION with last weights -> Loss: 13449093.0, CrossEntropy: 2.106283664703369, Accuracy: 0.8235759493670886\n",
      "Elapsed time for the training: 11.818429231643677\n",
      "Iter 1246 / 2000, Loss: 129045889.9375, CrossEntropy: 0.0031690329778939486, Accuracy: 0.998968989769821\n",
      "EVALUATION with last weights -> Loss: 12809819.0, CrossEntropy: 2.0241522789001465, Accuracy: 0.8177412974683544\n",
      "Elapsed time for the training: 13.490795373916626\n",
      "Iter 1247 / 2000, Loss: 128689259.96875, CrossEntropy: 0.003014344722032547, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 13440687.0, CrossEntropy: 2.1041259765625, Accuracy: 0.8178401898734177\n",
      "Elapsed time for the training: 12.917493104934692\n",
      "Iter 1248 / 2000, Loss: 128635716.28125, CrossEntropy: 0.002992004621773958, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 13298227.0, CrossEntropy: 2.0860462188720703, Accuracy: 0.8231803797468354\n",
      "Elapsed time for the training: 12.87458324432373\n",
      "Iter 1249 / 2000, Loss: 128700599.34375, CrossEntropy: 0.0030180825851857662, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 13466052.0, CrossEntropy: 2.145319700241089, Accuracy: 0.8156645569620253\n",
      "Elapsed time for the training: 12.90734052658081\n",
      "Iter 1250 / 2000, Loss: 128235658.71875, CrossEntropy: 0.0028262296691536903, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 13699323.0, CrossEntropy: 2.2625200748443604, Accuracy: 0.8169501582278481\n",
      "Elapsed time for the training: 12.881540536880493\n",
      "Iter 1251 / 2000, Loss: 128795490.21875, CrossEntropy: 0.0030503608286380768, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 14165436.0, CrossEntropy: 2.2268009185791016, Accuracy: 0.8199169303797469\n",
      "Elapsed time for the training: 12.909085988998413\n",
      "Iter 1252 / 2000, Loss: 127941444.15625, CrossEntropy: 0.002703174250200391, Accuracy: 0.9989010549872123\n",
      "EVALUATION with last weights -> Loss: 14239712.0, CrossEntropy: 2.275644540786743, Accuracy: 0.8134889240506329\n",
      "Elapsed time for the training: 15.53620457649231\n",
      "Iter 1253 / 2000, Loss: 130573345.5625, CrossEntropy: 0.0037520211189985275, Accuracy: 0.9990409207161125\n",
      "EVALUATION with last weights -> Loss: 13141917.0, CrossEntropy: 2.0925817489624023, Accuracy: 0.8248615506329114\n",
      "Elapsed time for the training: 15.619928359985352\n",
      "Iter 1254 / 2000, Loss: 129403546.9375, CrossEntropy: 0.00328167830593884, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 13703616.0, CrossEntropy: 2.1725666522979736, Accuracy: 0.8113132911392406\n",
      "Elapsed time for the training: 12.90581226348877\n",
      "Iter 1255 / 2000, Loss: 128479160.53125, CrossEntropy: 0.0029092056211084127, Accuracy: 0.9989410166240409\n",
      "EVALUATION with last weights -> Loss: 13214255.0, CrossEntropy: 2.0707530975341797, Accuracy: 0.8202136075949367\n",
      "Elapsed time for the training: 12.897655010223389\n",
      "Iter 1256 / 2000, Loss: 130343947.5, CrossEntropy: 0.0036504061426967382, Accuracy: 0.998821131713555\n",
      "EVALUATION with last weights -> Loss: 13064051.0, CrossEntropy: 2.0752310752868652, Accuracy: 0.8208069620253164\n",
      "Elapsed time for the training: 12.867740631103516\n",
      "Iter 1257 / 2000, Loss: 128001670.125, CrossEntropy: 0.002711721695959568, Accuracy: 0.9989210358056266\n",
      "EVALUATION with last weights -> Loss: 13264009.0, CrossEntropy: 2.101145029067993, Accuracy: 0.8236748417721519\n",
      "Elapsed time for the training: 12.875872373580933\n",
      "Iter 1258 / 2000, Loss: 128040570.9375, CrossEntropy: 0.0027242167852818966, Accuracy: 0.9991208439897699\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with last weights -> Loss: 13183347.0, CrossEntropy: 2.0603644847869873, Accuracy: 0.8279272151898734\n",
      "Elapsed time for the training: 12.936802625656128\n",
      "Iter 1259 / 2000, Loss: 128101058.8125, CrossEntropy: 0.0027494088280946016, Accuracy: 0.9990409207161125\n",
      "EVALUATION with last weights -> Loss: 14620147.0, CrossEntropy: 2.301645517349243, Accuracy: 0.811807753164557\n",
      "Elapsed time for the training: 12.893916130065918\n",
      "Iter 1260 / 2000, Loss: 130230230.28125, CrossEntropy: 0.0035937130451202393, Accuracy: 0.9989010549872123\n",
      "EVALUATION with last weights -> Loss: 14024802.0, CrossEntropy: 2.2120566368103027, Accuracy: 0.8191257911392406\n",
      "Elapsed time for the training: 12.894344806671143\n",
      "Iter 1261 / 2000, Loss: 128574517.25, CrossEntropy: 0.002928969217464328, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 13105844.0, CrossEntropy: 2.082663059234619, Accuracy: 0.826443829113924\n",
      "Elapsed time for the training: 12.919932842254639\n",
      "Iter 1262 / 2000, Loss: 131820868.40625, CrossEntropy: 0.004227899014949799, Accuracy: 0.9988011508951407\n",
      "EVALUATION with last weights -> Loss: 13470643.0, CrossEntropy: 2.1076014041900635, Accuracy: 0.8204113924050633\n",
      "Elapsed time for the training: 12.188148975372314\n",
      "Iter 1263 / 2000, Loss: 126571901.25, CrossEntropy: 0.002136573428288102, Accuracy: 0.9993286445012788\n",
      "EVALUATION with last weights -> Loss: 14134127.0, CrossEntropy: 2.2435805797576904, Accuracy: 0.8269382911392406\n",
      "Elapsed time for the training: 12.813620328903198\n",
      "Iter 1264 / 2000, Loss: 129013158.3125, CrossEntropy: 0.0030973234679549932, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 14062678.0, CrossEntropy: 2.214454174041748, Accuracy: 0.8145767405063291\n",
      "Elapsed time for the training: 13.39012598991394\n",
      "Iter 1265 / 2000, Loss: 127026204.4375, CrossEntropy: 0.002321803942322731, Accuracy: 0.9992087595907929\n",
      "EVALUATION with last weights -> Loss: 14447432.0, CrossEntropy: 2.2935597896575928, Accuracy: 0.8229825949367089\n",
      "Elapsed time for the training: 13.974833965301514\n",
      "Iter 1266 / 2000, Loss: 128256486.75, CrossEntropy: 0.002790250815451145, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 13503052.0, CrossEntropy: 2.126751184463501, Accuracy: 0.8244659810126582\n",
      "Elapsed time for the training: 13.738556385040283\n",
      "Iter 1267 / 2000, Loss: 129337267.375, CrossEntropy: 0.0032196033280342817, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 13457767.0, CrossEntropy: 2.130446434020996, Accuracy: 0.8282238924050633\n",
      "Elapsed time for the training: 13.455090761184692\n",
      "Iter 1268 / 2000, Loss: 130444171.46875, CrossEntropy: 0.0036591163370758295, Accuracy: 0.998821131713555\n",
      "EVALUATION with last weights -> Loss: 13406242.0, CrossEntropy: 2.139566421508789, Accuracy: 0.821004746835443\n",
      "Elapsed time for the training: 12.833390712738037\n",
      "Iter 1269 / 2000, Loss: 125916282.9375, CrossEntropy: 0.0018468390917405486, Accuracy: 0.999380594629156\n",
      "EVALUATION with last weights -> Loss: 13044983.0, CrossEntropy: 2.0970144271850586, Accuracy: 0.8253560126582279\n",
      "Elapsed time for the training: 12.80180835723877\n",
      "Iter 1270 / 2000, Loss: 131128042.4375, CrossEntropy: 0.003926672972738743, Accuracy: 0.998821131713555\n",
      "EVALUATION with last weights -> Loss: 13428466.0, CrossEntropy: 2.1069436073303223, Accuracy: 0.8282238924050633\n",
      "Elapsed time for the training: 12.962628841400146\n",
      "Iter 1271 / 2000, Loss: 128590763.65625, CrossEntropy: 0.0029124238062649965, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 14037796.0, CrossEntropy: 2.2099812030792236, Accuracy: 0.8206091772151899\n",
      "Elapsed time for the training: 12.92418646812439\n",
      "Iter 1272 / 2000, Loss: 129587755.0625, CrossEntropy: 0.003305696649476886, Accuracy: 0.998821131713555\n",
      "EVALUATION with last weights -> Loss: 13700563.0, CrossEntropy: 2.153564214706421, Accuracy: 0.8215981012658228\n",
      "Elapsed time for the training: 13.475039005279541\n",
      "Iter 1273 / 2000, Loss: 129712644.65625, CrossEntropy: 0.0033559210132807493, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 13724330.0, CrossEntropy: 2.171168804168701, Accuracy: 0.8235759493670886\n",
      "Elapsed time for the training: 13.885494470596313\n",
      "Iter 1274 / 2000, Loss: 131744439.53125, CrossEntropy: 0.0041608018800616264, Accuracy: 0.9987811700767263\n",
      "EVALUATION with last weights -> Loss: 12997420.0, CrossEntropy: 2.04856538772583, Accuracy: 0.8262460443037974\n",
      "Elapsed time for the training: 13.740166664123535\n",
      "Iter 1275 / 2000, Loss: 128353638.59375, CrossEntropy: 0.002803720533847809, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 13319131.0, CrossEntropy: 2.0841691493988037, Accuracy: 0.8220925632911392\n",
      "Elapsed time for the training: 13.722376823425293\n",
      "Iter 1276 / 2000, Loss: 129493174.78125, CrossEntropy: 0.003256772179156542, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 13228457.0, CrossEntropy: 2.0952045917510986, Accuracy: 0.8243670886075949\n",
      "Elapsed time for the training: 13.15010666847229\n",
      "Iter 1277 / 2000, Loss: 127383074.34375, CrossEntropy: 0.002416875446215272, Accuracy: 0.9993606138107417\n",
      "EVALUATION with last weights -> Loss: 13857280.0, CrossEntropy: 2.204864740371704, Accuracy: 0.8150712025316456\n",
      "Elapsed time for the training: 12.862420320510864\n",
      "Iter 1278 / 2000, Loss: 131532005.375, CrossEntropy: 0.0040654283948242664, Accuracy: 0.998761189258312\n",
      "EVALUATION with last weights -> Loss: 13182218.0, CrossEntropy: 2.0752906799316406, Accuracy: 0.8275316455696202\n",
      "Elapsed time for the training: 12.886629343032837\n",
      "Iter 1279 / 2000, Loss: 129628532.375, CrossEntropy: 0.0033017753157764673, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 14240307.0, CrossEntropy: 2.2557356357574463, Accuracy: 0.8216969936708861\n",
      "Elapsed time for the training: 12.99703598022461\n",
      "Iter 1280 / 2000, Loss: 129604672.375, CrossEntropy: 0.0033194192219525576, Accuracy: 0.9989769820971868\n",
      "EVALUATION with last weights -> Loss: 14202705.0, CrossEntropy: 2.262490749359131, Accuracy: 0.8089398734177216\n",
      "Elapsed time for the training: 12.88344430923462\n",
      "Iter 1281 / 2000, Loss: 128399100.03125, CrossEntropy: 0.002804229035973549, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 13771311.0, CrossEntropy: 2.1523144245147705, Accuracy: 0.8216969936708861\n",
      "Elapsed time for the training: 12.860824346542358\n",
      "Iter 1282 / 2000, Loss: 130948333.3125, CrossEntropy: 0.0038203622680157423, Accuracy: 0.9988610933503836\n",
      "EVALUATION with last weights -> Loss: 12882607.0, CrossEntropy: 2.0833048820495605, Accuracy: 0.8295094936708861\n",
      "Elapsed time for the training: 12.895953178405762\n",
      "Iter 1283 / 2000, Loss: 127958981.21875, CrossEntropy: 0.0026228574570268393, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 13273588.0, CrossEntropy: 2.105767250061035, Accuracy: 0.8266416139240507\n",
      "Elapsed time for the training: 12.833982944488525\n",
      "Iter 1284 / 2000, Loss: 130021286.65625, CrossEntropy: 0.0034440411254763603, Accuracy: 0.9989210358056266\n",
      "EVALUATION with last weights -> Loss: 14186244.0, CrossEntropy: 2.255992889404297, Accuracy: 0.8175435126582279\n",
      "Elapsed time for the training: 12.91258716583252\n",
      "Iter 1285 / 2000, Loss: 130193421.21875, CrossEntropy: 0.0035256007686257362, Accuracy: 0.9991088554987213\n",
      "EVALUATION with last weights -> Loss: 13519180.0, CrossEntropy: 2.1363229751586914, Accuracy: 0.8237737341772152\n",
      "Elapsed time for the training: 12.872140169143677\n",
      "Iter 1286 / 2000, Loss: 128782003.25, CrossEntropy: 0.0029446471016854048, Accuracy: 0.9991408248081841\n",
      "EVALUATION with last weights -> Loss: 13191923.0, CrossEntropy: 2.1118690967559814, Accuracy: 0.8234770569620253\n",
      "Elapsed time for the training: 12.876698732376099\n",
      "Iter 1287 / 2000, Loss: 128471314.25, CrossEntropy: 0.0028174493927508593, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 13412633.0, CrossEntropy: 2.157153844833374, Accuracy: 0.8237737341772152\n",
      "Elapsed time for the training: 12.87636685371399\n",
      "Iter 1288 / 2000, Loss: 130576932.15625, CrossEntropy: 0.0036561882589012384, Accuracy: 0.9988610933503836\n",
      "EVALUATION with last weights -> Loss: 13568669.0, CrossEntropy: 2.167023181915283, Accuracy: 0.819620253164557\n",
      "Elapsed time for the training: 12.86742615699768\n",
      "Iter 1289 / 2000, Loss: 130855652.0625, CrossEntropy: 0.0037649290170520544, Accuracy: 0.9989010549872123\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with last weights -> Loss: 13771031.0, CrossEntropy: 2.2076685428619385, Accuracy: 0.8220925632911392\n",
      "Elapsed time for the training: 12.921557426452637\n",
      "Iter 1290 / 2000, Loss: 126192178.75, CrossEntropy: 0.0019089992856606841, Accuracy: 0.9992687020460358\n",
      "EVALUATION with last weights -> Loss: 13679459.0, CrossEntropy: 2.147402048110962, Accuracy: 0.828125\n",
      "Elapsed time for the training: 12.873167276382446\n",
      "Iter 1291 / 2000, Loss: 129347434.40625, CrossEntropy: 0.0031572580337524414, Accuracy: 0.9990009590792839\n",
      "EVALUATION with last weights -> Loss: 13382888.0, CrossEntropy: 2.1166865825653076, Accuracy: 0.8239715189873418\n",
      "Elapsed time for the training: 12.923012495040894\n",
      "Iter 1292 / 2000, Loss: 130365549.75, CrossEntropy: 0.0035609689075499773, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 13796202.0, CrossEntropy: 2.18904709815979, Accuracy: 0.8244659810126582\n",
      "Elapsed time for the training: 15.402188062667847\n",
      "Iter 1293 / 2000, Loss: 128876995.96875, CrossEntropy: 0.0029636856634169817, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 13626618.0, CrossEntropy: 2.1813454627990723, Accuracy: 0.8251582278481012\n",
      "Elapsed time for the training: 15.42928171157837\n",
      "Iter 1294 / 2000, Loss: 129683748.28125, CrossEntropy: 0.0032832880970090628, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 13680034.0, CrossEntropy: 2.183292865753174, Accuracy: 0.8223892405063291\n",
      "Elapsed time for the training: 15.010920286178589\n",
      "Iter 1295 / 2000, Loss: 132977443.3125, CrossEntropy: 0.004596749786287546, Accuracy: 0.9987412084398977\n",
      "EVALUATION with last weights -> Loss: 13661845.0, CrossEntropy: 2.143720865249634, Accuracy: 0.827432753164557\n",
      "Elapsed time for the training: 12.897470235824585\n",
      "Iter 1296 / 2000, Loss: 127671858.78125, CrossEntropy: 0.0024744956754148006, Accuracy: 0.9992607097186701\n",
      "EVALUATION with last weights -> Loss: 14303814.0, CrossEntropy: 2.247635841369629, Accuracy: 0.8215981012658228\n",
      "Elapsed time for the training: 12.898012638092041\n",
      "Iter 1297 / 2000, Loss: 129322715.21875, CrossEntropy: 0.003131107660010457, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 13614228.0, CrossEntropy: 2.1551942825317383, Accuracy: 0.8209058544303798\n",
      "Elapsed time for the training: 12.919148206710815\n",
      "Iter 1298 / 2000, Loss: 127911622.34375, CrossEntropy: 0.0025646942667663097, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 14280646.0, CrossEntropy: 2.242525339126587, Accuracy: 0.8150712025316456\n",
      "Elapsed time for the training: 12.88812518119812\n",
      "Iter 1299 / 2000, Loss: 129441792.46875, CrossEntropy: 0.00317377015016973, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 13149008.0, CrossEntropy: 2.07434344291687, Accuracy: 0.8289161392405063\n",
      "Elapsed time for the training: 12.852030754089355\n",
      "Iter 1300 / 2000, Loss: 128501236.71875, CrossEntropy: 0.0027960254810750484, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 14208264.0, CrossEntropy: 2.227893114089966, Accuracy: 0.8170490506329114\n",
      "Elapsed time for the training: 13.0515718460083\n",
      "Iter 1301 / 2000, Loss: 127815386.8125, CrossEntropy: 0.0025185842532664537, Accuracy: 0.9991408248081841\n",
      "EVALUATION with last weights -> Loss: 13322008.0, CrossEntropy: 2.100703001022339, Accuracy: 0.8259493670886076\n",
      "Elapsed time for the training: 12.887280702590942\n",
      "Iter 1302 / 2000, Loss: 128133476.875, CrossEntropy: 0.002645061817020178, Accuracy: 0.9991608056265985\n",
      "EVALUATION with last weights -> Loss: 13579634.0, CrossEntropy: 2.1428117752075195, Accuracy: 0.8208069620253164\n",
      "Elapsed time for the training: 12.89035415649414\n",
      "Iter 1303 / 2000, Loss: 128430251.1875, CrossEntropy: 0.0027609877288341522, Accuracy: 0.9991208439897699\n",
      "EVALUATION with last weights -> Loss: 13052610.0, CrossEntropy: 2.052711009979248, Accuracy: 0.8245648734177216\n",
      "Elapsed time for the training: 12.866031646728516\n",
      "Iter 1304 / 2000, Loss: 127824775.625, CrossEntropy: 0.0025170124135911465, Accuracy: 0.9991608056265985\n",
      "EVALUATION with last weights -> Loss: 12990988.0, CrossEntropy: 2.0453357696533203, Accuracy: 0.8204113924050633\n",
      "Elapsed time for the training: 12.869528532028198\n",
      "Iter 1305 / 2000, Loss: 130248592.75, CrossEntropy: 0.003482616739347577, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 13551529.0, CrossEntropy: 2.1544349193573, Accuracy: 0.8231803797468354\n",
      "Elapsed time for the training: 12.888535022735596\n",
      "Iter 1306 / 2000, Loss: 129504549.6875, CrossEntropy: 0.0031821844168007374, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 13372514.0, CrossEntropy: 2.100431203842163, Accuracy: 0.8278283227848101\n",
      "Elapsed time for the training: 12.9156653881073\n",
      "Iter 1307 / 2000, Loss: 129442332.4375, CrossEntropy: 0.0031558529008179903, Accuracy: 0.9991807864450127\n",
      "EVALUATION with last weights -> Loss: 13460940.0, CrossEntropy: 2.1504406929016113, Accuracy: 0.8243670886075949\n",
      "Elapsed time for the training: 12.945301294326782\n",
      "Iter 1308 / 2000, Loss: 130659929.40625, CrossEntropy: 0.003641516901552677, Accuracy: 0.9988610933503836\n",
      "EVALUATION with last weights -> Loss: 13308068.0, CrossEntropy: 2.126771926879883, Accuracy: 0.8260482594936709\n",
      "Elapsed time for the training: 13.068753004074097\n",
      "Iter 1309 / 2000, Loss: 130617515.78125, CrossEntropy: 0.003625784069299698, Accuracy: 0.9990009590792839\n",
      "EVALUATION with last weights -> Loss: 13929633.0, CrossEntropy: 2.185394048690796, Accuracy: 0.8223892405063291\n",
      "Elapsed time for the training: 12.952203273773193\n",
      "Iter 1310 / 2000, Loss: 128060016.28125, CrossEntropy: 0.0025944914668798447, Accuracy: 0.9992007672634271\n",
      "EVALUATION with last weights -> Loss: 13248260.0, CrossEntropy: 2.092616558074951, Accuracy: 0.8268393987341772\n",
      "Elapsed time for the training: 12.891879796981812\n",
      "Iter 1311 / 2000, Loss: 128411193.0625, CrossEntropy: 0.002733898349106312, Accuracy: 0.9991807864450127\n",
      "EVALUATION with last weights -> Loss: 13138872.0, CrossEntropy: 2.0968856811523438, Accuracy: 0.8227848101265823\n",
      "Elapsed time for the training: 12.849245548248291\n",
      "Iter 1312 / 2000, Loss: 128343889.96875, CrossEntropy: 0.0027054413221776485, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 13616256.0, CrossEntropy: 2.135852575302124, Accuracy: 0.822685917721519\n",
      "Elapsed time for the training: 12.962164878845215\n",
      "Iter 1313 / 2000, Loss: 130738834.0, CrossEntropy: 0.003659301670268178, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 13214061.0, CrossEntropy: 2.0733752250671387, Accuracy: 0.8286194620253164\n",
      "Elapsed time for the training: 12.871941566467285\n",
      "Iter 1314 / 2000, Loss: 128961902.90625, CrossEntropy: 0.00294682988896966, Accuracy: 0.9992407289002557\n",
      "EVALUATION with last weights -> Loss: 13544095.0, CrossEntropy: 2.1391351222991943, Accuracy: 0.8188291139240507\n",
      "Elapsed time for the training: 12.833207845687866\n",
      "Iter 1315 / 2000, Loss: 130514703.125, CrossEntropy: 0.0035654492676258087, Accuracy: 0.9989410166240409\n",
      "EVALUATION with last weights -> Loss: 13348215.0, CrossEntropy: 2.095906972885132, Accuracy: 0.8247626582278481\n",
      "Elapsed time for the training: 12.841764688491821\n",
      "Iter 1316 / 2000, Loss: 129144707.28125, CrossEntropy: 0.0030147014185786247, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 13567820.0, CrossEntropy: 2.148693323135376, Accuracy: 0.8194224683544303\n",
      "Elapsed time for the training: 12.90630030632019\n",
      "Iter 1317 / 2000, Loss: 128078609.3125, CrossEntropy: 0.0025860117748379707, Accuracy: 0.9991208439897699\n",
      "EVALUATION with last weights -> Loss: 13472474.0, CrossEntropy: 2.131317615509033, Accuracy: 0.8216969936708861\n",
      "Elapsed time for the training: 12.927050828933716\n",
      "Iter 1318 / 2000, Loss: 129674888.6875, CrossEntropy: 0.0032219169661402702, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 14386154.0, CrossEntropy: 2.2516984939575195, Accuracy: 0.8142800632911392\n",
      "Elapsed time for the training: 12.970805883407593\n",
      "Iter 1319 / 2000, Loss: 129238343.875, CrossEntropy: 0.0030433463398367167, Accuracy: 0.9989210358056266\n",
      "EVALUATION with last weights -> Loss: 14536753.0, CrossEntropy: 2.2974021434783936, Accuracy: 0.817246835443038\n",
      "Elapsed time for the training: 12.870941162109375\n",
      "Iter 1320 / 2000, Loss: 130849750.84375, CrossEntropy: 0.0036837845109403133, Accuracy: 0.998821131713555\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with last weights -> Loss: 13733201.0, CrossEntropy: 2.185365676879883, Accuracy: 0.821993670886076\n",
      "Elapsed time for the training: 12.892277717590332\n",
      "Iter 1321 / 2000, Loss: 129957080.34375, CrossEntropy: 0.0033242576755583286, Accuracy: 0.9989210358056266\n",
      "EVALUATION with last weights -> Loss: 13862980.0, CrossEntropy: 2.194455146789551, Accuracy: 0.8193235759493671\n",
      "Elapsed time for the training: 12.906356573104858\n",
      "Iter 1322 / 2000, Loss: 130039757.125, CrossEntropy: 0.0033547093626111746, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 13257698.0, CrossEntropy: 2.084784984588623, Accuracy: 0.8237737341772152\n",
      "Elapsed time for the training: 12.933529138565063\n",
      "Iter 1323 / 2000, Loss: 127580744.4375, CrossEntropy: 0.002369585679844022, Accuracy: 0.9991807864450127\n",
      "EVALUATION with last weights -> Loss: 13349079.0, CrossEntropy: 2.11384654045105, Accuracy: 0.8212025316455697\n",
      "Elapsed time for the training: 14.460808038711548\n",
      "Iter 1324 / 2000, Loss: 127751807.96875, CrossEntropy: 0.002456431742757559, Accuracy: 0.9991887787723785\n",
      "EVALUATION with last weights -> Loss: 14059183.0, CrossEntropy: 2.199537515640259, Accuracy: 0.8198180379746836\n",
      "Elapsed time for the training: 15.510331869125366\n",
      "Iter 1325 / 2000, Loss: 127044978.5, CrossEntropy: 0.0021540250163525343, Accuracy: 0.9993206521739131\n",
      "EVALUATION with last weights -> Loss: 13538323.0, CrossEntropy: 2.1374711990356445, Accuracy: 0.8217958860759493\n",
      "Elapsed time for the training: 13.520649194717407\n",
      "Iter 1326 / 2000, Loss: 128011808.71875, CrossEntropy: 0.0025368318893015385, Accuracy: 0.9992007672634271\n",
      "EVALUATION with last weights -> Loss: 13702336.0, CrossEntropy: 2.1694881916046143, Accuracy: 0.823378164556962\n",
      "Elapsed time for the training: 12.863113641738892\n",
      "Iter 1327 / 2000, Loss: 131441220.125, CrossEntropy: 0.003939283546060324, Accuracy: 0.9989090473145781\n",
      "EVALUATION with last weights -> Loss: 13629476.0, CrossEntropy: 2.1585466861724854, Accuracy: 0.8176424050632911\n",
      "Elapsed time for the training: 12.970006704330444\n",
      "Iter 1328 / 2000, Loss: 128133565.59375, CrossEntropy: 0.002607994247227907, Accuracy: 0.9990688938618926\n",
      "EVALUATION with last weights -> Loss: 14788861.0, CrossEntropy: 2.3484044075012207, Accuracy: 0.8124011075949367\n",
      "Elapsed time for the training: 12.978737354278564\n",
      "Iter 1329 / 2000, Loss: 129821253.84375, CrossEntropy: 0.0032512256875634193, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 13880980.0, CrossEntropy: 2.168958902359009, Accuracy: 0.8238726265822784\n",
      "Elapsed time for the training: 13.054553508758545\n",
      "Iter 1330 / 2000, Loss: 127765553.96875, CrossEntropy: 0.0024269551504403353, Accuracy: 0.9992007672634271\n",
      "EVALUATION with last weights -> Loss: 13518686.0, CrossEntropy: 2.1148438453674316, Accuracy: 0.8247626582278481\n",
      "Elapsed time for the training: 13.410165309906006\n",
      "Iter 1331 / 2000, Loss: 129465677.21875, CrossEntropy: 0.003103535156697035, Accuracy: 0.9988411125319693\n",
      "EVALUATION with last weights -> Loss: 13635863.0, CrossEntropy: 2.1341471672058105, Accuracy: 0.8238726265822784\n",
      "Elapsed time for the training: 12.884508848190308\n",
      "Iter 1332 / 2000, Loss: 128878969.75, CrossEntropy: 0.0028725196607410908, Accuracy: 0.9990009590792839\n",
      "EVALUATION with last weights -> Loss: 13443008.0, CrossEntropy: 2.100482702255249, Accuracy: 0.8280261075949367\n",
      "Elapsed time for the training: 12.900964260101318\n",
      "Iter 1333 / 2000, Loss: 127901543.90625, CrossEntropy: 0.0024735622573643923, Accuracy: 0.9992607097186701\n",
      "EVALUATION with last weights -> Loss: 13206482.0, CrossEntropy: 2.0635159015655518, Accuracy: 0.831190664556962\n",
      "Elapsed time for the training: 12.896501064300537\n",
      "Iter 1334 / 2000, Loss: 128597381.15625, CrossEntropy: 0.002750309184193611, Accuracy: 0.9991208439897699\n",
      "EVALUATION with last weights -> Loss: 14091046.0, CrossEntropy: 2.2133243083953857, Accuracy: 0.821993670886076\n",
      "Elapsed time for the training: 12.875516176223755\n",
      "Iter 1335 / 2000, Loss: 127915894.1875, CrossEntropy: 0.0024762579705566168, Accuracy: 0.9991807864450127\n",
      "EVALUATION with last weights -> Loss: 14344427.0, CrossEntropy: 2.2480878829956055, Accuracy: 0.8277294303797469\n",
      "Elapsed time for the training: 14.89411735534668\n",
      "Iter 1336 / 2000, Loss: 128871125.34375, CrossEntropy: 0.0028553835581988096, Accuracy: 0.9991408248081841\n",
      "EVALUATION with last weights -> Loss: 12900134.0, CrossEntropy: 2.03641939163208, Accuracy: 0.8268393987341772\n",
      "Elapsed time for the training: 14.529897928237915\n",
      "Iter 1337 / 2000, Loss: 128458538.46875, CrossEntropy: 0.002687690546736121, Accuracy: 0.9991208439897699\n",
      "EVALUATION with last weights -> Loss: 13261286.0, CrossEntropy: 2.092503547668457, Accuracy: 0.8265427215189873\n",
      "Elapsed time for the training: 12.879886865615845\n",
      "Iter 1338 / 2000, Loss: 128158854.8125, CrossEntropy: 0.0025671503972262144, Accuracy: 0.9991608056265985\n",
      "EVALUATION with last weights -> Loss: 13261538.0, CrossEntropy: 2.0788612365722656, Accuracy: 0.825059335443038\n",
      "Elapsed time for the training: 12.863956451416016\n",
      "Iter 1339 / 2000, Loss: 128576167.84375, CrossEntropy: 0.0027304808609187603, Accuracy: 0.9991608056265985\n",
      "EVALUATION with last weights -> Loss: 13314257.0, CrossEntropy: 2.110525608062744, Accuracy: 0.825059335443038\n",
      "Elapsed time for the training: 12.823552370071411\n",
      "Iter 1340 / 2000, Loss: 133351017.53125, CrossEntropy: 0.004635412245988846, Accuracy: 0.9986213235294118\n",
      "EVALUATION with last weights -> Loss: 13003800.0, CrossEntropy: 2.053821325302124, Accuracy: 0.8278283227848101\n",
      "Elapsed time for the training: 12.100807666778564\n",
      "Iter 1341 / 2000, Loss: 125765219.3125, CrossEntropy: 0.0016015433939173818, Accuracy: 0.9994005754475703\n",
      "EVALUATION with last weights -> Loss: 13114199.0, CrossEntropy: 2.0764803886413574, Accuracy: 0.8313884493670886\n",
      "Elapsed time for the training: 11.787585020065308\n",
      "Iter 1342 / 2000, Loss: 128929676.6875, CrossEntropy: 0.00286542228423059, Accuracy: 0.9992806905370843\n",
      "EVALUATION with last weights -> Loss: 13386969.0, CrossEntropy: 2.0998001098632812, Accuracy: 0.8289161392405063\n",
      "Elapsed time for the training: 12.65427017211914\n",
      "Iter 1343 / 2000, Loss: 130143314.3125, CrossEntropy: 0.003396308980882168, Accuracy: 0.9991288363171356\n",
      "EVALUATION with last weights -> Loss: 13349371.0, CrossEntropy: 2.1001381874084473, Accuracy: 0.8249604430379747\n",
      "Elapsed time for the training: 12.88019347190857\n",
      "Iter 1344 / 2000, Loss: 128222496.1875, CrossEntropy: 0.0025783006567507982, Accuracy: 0.9991807864450127\n",
      "EVALUATION with last weights -> Loss: 13684356.0, CrossEntropy: 2.1519064903259277, Accuracy: 0.8211036392405063\n",
      "Elapsed time for the training: 12.903418064117432\n",
      "Iter 1345 / 2000, Loss: 131314079.0, CrossEntropy: 0.0038108057342469692, Accuracy: 0.9989210358056266\n",
      "EVALUATION with last weights -> Loss: 14498565.0, CrossEntropy: 2.29219651222229, Accuracy: 0.8191257911392406\n",
      "Elapsed time for the training: 12.8688485622406\n",
      "Iter 1346 / 2000, Loss: 131273690.28125, CrossEntropy: 0.0037917080335319042, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 13454127.0, CrossEntropy: 2.107663154602051, Accuracy: 0.8213014240506329\n",
      "Elapsed time for the training: 12.895468711853027\n",
      "Iter 1347 / 2000, Loss: 128520788.875, CrossEntropy: 0.002688043750822544, Accuracy: 0.9991608056265985\n",
      "EVALUATION with last weights -> Loss: 13746201.0, CrossEntropy: 2.1862270832061768, Accuracy: 0.8221914556962026\n",
      "Elapsed time for the training: 12.946261882781982\n",
      "Iter 1348 / 2000, Loss: 128508722.78125, CrossEntropy: 0.002681487239897251, Accuracy: 0.9992407289002557\n",
      "EVALUATION with last weights -> Loss: 14055168.0, CrossEntropy: 2.2192177772521973, Accuracy: 0.8275316455696202\n",
      "Elapsed time for the training: 12.883306741714478\n",
      "Iter 1349 / 2000, Loss: 127941936.1875, CrossEntropy: 0.0024532214738428593, Accuracy: 0.9991208439897699\n",
      "EVALUATION with last weights -> Loss: 13435639.0, CrossEntropy: 2.0998921394348145, Accuracy: 0.828817246835443\n",
      "Elapsed time for the training: 12.895899295806885\n",
      "Iter 1350 / 2000, Loss: 131199183.0, CrossEntropy: 0.0037518779281526804, Accuracy: 0.9988610933503836\n",
      "EVALUATION with last weights -> Loss: 13225166.0, CrossEntropy: 2.066760540008545, Accuracy: 0.8310917721518988\n",
      "Elapsed time for the training: 13.368294715881348\n",
      "Iter 1351 / 2000, Loss: 128850819.53125, CrossEntropy: 0.0028105846140533686, Accuracy: 0.9990009590792839\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with last weights -> Loss: 14324268.0, CrossEntropy: 2.2664902210235596, Accuracy: 0.8231803797468354\n",
      "Elapsed time for the training: 15.480865955352783\n",
      "Iter 1352 / 2000, Loss: 129769907.8125, CrossEntropy: 0.003175572259351611, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 13830713.0, CrossEntropy: 2.1971681118011475, Accuracy: 0.8207080696202531\n",
      "Elapsed time for the training: 15.621987104415894\n",
      "Iter 1353 / 2000, Loss: 130703759.03125, CrossEntropy: 0.003571961307898164, Accuracy: 0.9988690856777493\n",
      "EVALUATION with last weights -> Loss: 13752414.0, CrossEntropy: 2.1490323543548584, Accuracy: 0.8279272151898734\n",
      "Elapsed time for the training: 14.543582439422607\n",
      "Iter 1354 / 2000, Loss: 128380557.65625, CrossEntropy: 0.0026138087268918753, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 13183687.0, CrossEntropy: 2.0906503200531006, Accuracy: 0.8235759493670886\n",
      "Elapsed time for the training: 12.89042592048645\n",
      "Iter 1355 / 2000, Loss: 129873584.40625, CrossEntropy: 0.0032227286137640476, Accuracy: 0.998968989769821\n",
      "EVALUATION with last weights -> Loss: 13706224.0, CrossEntropy: 2.176443338394165, Accuracy: 0.8225870253164557\n",
      "Elapsed time for the training: 12.912457704544067\n",
      "Iter 1356 / 2000, Loss: 129756193.15625, CrossEntropy: 0.003249398898333311, Accuracy: 0.9990369245524298\n",
      "EVALUATION with last weights -> Loss: 13711205.0, CrossEntropy: 2.151370048522949, Accuracy: 0.8184335443037974\n",
      "Elapsed time for the training: 12.888489961624146\n",
      "Iter 1357 / 2000, Loss: 129500681.84375, CrossEntropy: 0.003058821428567171, Accuracy: 0.9991208439897699\n",
      "EVALUATION with last weights -> Loss: 13345382.0, CrossEntropy: 2.1003997325897217, Accuracy: 0.8263449367088608\n",
      "Elapsed time for the training: 12.84911561012268\n",
      "Iter 1358 / 2000, Loss: 132031814.8125, CrossEntropy: 0.004064322914928198, Accuracy: 0.9988610933503836\n",
      "EVALUATION with last weights -> Loss: 13195508.0, CrossEntropy: 2.0753679275512695, Accuracy: 0.8268393987341772\n",
      "Elapsed time for the training: 12.885918378829956\n",
      "Iter 1359 / 2000, Loss: 131033286.25, CrossEntropy: 0.0036723774392157793, Accuracy: 0.9989889705882353\n",
      "EVALUATION with last weights -> Loss: 13708794.0, CrossEntropy: 2.150728702545166, Accuracy: 0.8255537974683544\n",
      "Elapsed time for the training: 12.933794736862183\n",
      "Iter 1360 / 2000, Loss: 129142607.34375, CrossEntropy: 0.002904457738623023, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 13271042.0, CrossEntropy: 2.0892038345336914, Accuracy: 0.8261471518987342\n",
      "Elapsed time for the training: 12.889997720718384\n",
      "Iter 1361 / 2000, Loss: 126285557.875, CrossEntropy: 0.0017606879118829966, Accuracy: 0.9993606138107417\n",
      "EVALUATION with last weights -> Loss: 13323794.0, CrossEntropy: 2.101386070251465, Accuracy: 0.8312895569620253\n",
      "Elapsed time for the training: 12.861029863357544\n",
      "Iter 1362 / 2000, Loss: 128595875.0625, CrossEntropy: 0.0026814339216798544, Accuracy: 0.9991807864450127\n",
      "EVALUATION with last weights -> Loss: 14147286.0, CrossEntropy: 2.218900442123413, Accuracy: 0.8231803797468354\n",
      "Elapsed time for the training: 12.848772764205933\n",
      "Iter 1363 / 2000, Loss: 129240016.375, CrossEntropy: 0.0029369741678237915, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 13643452.0, CrossEntropy: 2.1681153774261475, Accuracy: 0.8229825949367089\n",
      "Elapsed time for the training: 12.875832557678223\n",
      "Iter 1364 / 2000, Loss: 131443530.09375, CrossEntropy: 0.003847226733341813, Accuracy: 0.9988970588235294\n",
      "EVALUATION with last weights -> Loss: 14365871.0, CrossEntropy: 2.2608323097229004, Accuracy: 0.8228837025316456\n",
      "Elapsed time for the training: 12.879499912261963\n",
      "Iter 1365 / 2000, Loss: 130788742.125, CrossEntropy: 0.0035497313365340233, Accuracy: 0.9988610933503836\n",
      "EVALUATION with last weights -> Loss: 13773865.0, CrossEntropy: 2.165989637374878, Accuracy: 0.8258504746835443\n",
      "Elapsed time for the training: 12.931299686431885\n",
      "Iter 1366 / 2000, Loss: 129686582.875, CrossEntropy: 0.0031061475165188313, Accuracy: 0.9991807864450127\n",
      "EVALUATION with last weights -> Loss: 13065617.0, CrossEntropy: 2.0769777297973633, Accuracy: 0.8218947784810127\n",
      "Elapsed time for the training: 12.8849356174469\n",
      "Iter 1367 / 2000, Loss: 129378371.0625, CrossEntropy: 0.0029799495823681355, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 13933017.0, CrossEntropy: 2.185991048812866, Accuracy: 0.8292128164556962\n",
      "Elapsed time for the training: 12.87023663520813\n",
      "Iter 1368 / 2000, Loss: 130144334.4375, CrossEntropy: 0.00328463944606483, Accuracy: 0.9991408248081841\n",
      "EVALUATION with last weights -> Loss: 13901092.0, CrossEntropy: 2.219055414199829, Accuracy: 0.8227848101265823\n",
      "Elapsed time for the training: 12.889129877090454\n",
      "Iter 1369 / 2000, Loss: 130160485.375, CrossEntropy: 0.0032886494882404804, Accuracy: 0.9990009590792839\n",
      "EVALUATION with last weights -> Loss: 13456326.0, CrossEntropy: 2.1201932430267334, Accuracy: 0.8292128164556962\n",
      "Elapsed time for the training: 13.511569023132324\n",
      "Iter 1370 / 2000, Loss: 127618411.25, CrossEntropy: 0.0022704750299453735, Accuracy: 0.9992407289002557\n",
      "EVALUATION with last weights -> Loss: 13980245.0, CrossEntropy: 2.2034218311309814, Accuracy: 0.8201147151898734\n",
      "Elapsed time for the training: 13.716163158416748\n",
      "Iter 1371 / 2000, Loss: 127280161.78125, CrossEntropy: 0.002134243492037058, Accuracy: 0.9991807864450127\n",
      "EVALUATION with last weights -> Loss: 14184409.0, CrossEntropy: 2.2189688682556152, Accuracy: 0.8277294303797469\n",
      "Elapsed time for the training: 13.347809076309204\n",
      "Iter 1372 / 2000, Loss: 129990134.71875, CrossEntropy: 0.003214460564777255, Accuracy: 0.9988411125319693\n",
      "EVALUATION with last weights -> Loss: 13706777.0, CrossEntropy: 2.1801552772521973, Accuracy: 0.822685917721519\n",
      "Elapsed time for the training: 13.605813980102539\n",
      "Iter 1373 / 2000, Loss: 127691299.125, CrossEntropy: 0.0022924914956092834, Accuracy: 0.9992806905370843\n",
      "EVALUATION with last weights -> Loss: 13718127.0, CrossEntropy: 2.1605591773986816, Accuracy: 0.8279272151898734\n",
      "Elapsed time for the training: 13.520642280578613\n",
      "Iter 1374 / 2000, Loss: 130682588.6875, CrossEntropy: 0.0034849238581955433, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 13374528.0, CrossEntropy: 2.1142921447753906, Accuracy: 0.8243670886075949\n",
      "Elapsed time for the training: 12.886008024215698\n",
      "Iter 1375 / 2000, Loss: 129765940.5, CrossEntropy: 0.003116493346169591, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 13394783.0, CrossEntropy: 2.1103177070617676, Accuracy: 0.8279272151898734\n",
      "Elapsed time for the training: 12.921167850494385\n",
      "Iter 1376 / 2000, Loss: 128578649.53125, CrossEntropy: 0.0026400911156088114, Accuracy: 0.9990409207161125\n",
      "EVALUATION with last weights -> Loss: 13941882.0, CrossEntropy: 2.19185471534729, Accuracy: 0.8244659810126582\n",
      "Elapsed time for the training: 12.923309803009033\n",
      "Iter 1377 / 2000, Loss: 130458619.4375, CrossEntropy: 0.003387765260413289, Accuracy: 0.9989010549872123\n",
      "EVALUATION with last weights -> Loss: 13701213.0, CrossEntropy: 2.155257225036621, Accuracy: 0.8208069620253164\n",
      "Elapsed time for the training: 13.001281976699829\n",
      "Iter 1378 / 2000, Loss: 130824915.78125, CrossEntropy: 0.003531724913045764, Accuracy: 0.9989010549872123\n",
      "EVALUATION with last weights -> Loss: 13868415.0, CrossEntropy: 2.2227048873901367, Accuracy: 0.8173457278481012\n",
      "Elapsed time for the training: 12.933878660202026\n",
      "Iter 1379 / 2000, Loss: 128255790.9375, CrossEntropy: 0.0025025440845638514, Accuracy: 0.9992806905370843\n",
      "EVALUATION with last weights -> Loss: 13942425.0, CrossEntropy: 2.219006299972534, Accuracy: 0.8190268987341772\n",
      "Elapsed time for the training: 13.090561389923096\n",
      "Iter 1380 / 2000, Loss: 128773960.28125, CrossEntropy: 0.0027070820797234774, Accuracy: 0.9991807864450127\n",
      "EVALUATION with last weights -> Loss: 13434119.0, CrossEntropy: 2.1145079135894775, Accuracy: 0.8230814873417721\n",
      "Elapsed time for the training: 13.741750717163086\n",
      "Iter 1381 / 2000, Loss: 129057489.1875, CrossEntropy: 0.002818566747009754, Accuracy: 0.9992007672634271\n",
      "EVALUATION with last weights -> Loss: 13677547.0, CrossEntropy: 2.164355993270874, Accuracy: 0.8252571202531646\n",
      "Elapsed time for the training: 12.98838210105896\n",
      "Iter 1382 / 2000, Loss: 130165242.5, CrossEntropy: 0.0032587575260549784, Accuracy: 0.9990808823529411\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with last weights -> Loss: 13292206.0, CrossEntropy: 2.087904930114746, Accuracy: 0.8236748417721519\n",
      "Elapsed time for the training: 12.882723093032837\n",
      "Iter 1391 / 2000, Loss: 131145536.09375, CrossEntropy: 0.003629017388448119, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 14118478.0, CrossEntropy: 2.2168495655059814, Accuracy: 0.8218947784810127\n",
      "Elapsed time for the training: 12.930974006652832\n",
      "Iter 1392 / 2000, Loss: 131077943.6875, CrossEntropy: 0.0036003144923597574, Accuracy: 0.9988810741687979\n",
      "EVALUATION with last weights -> Loss: 13081308.0, CrossEntropy: 2.0476677417755127, Accuracy: 0.8271360759493671\n",
      "Elapsed time for the training: 12.94662857055664\n",
      "Iter 1393 / 2000, Loss: 128120910.5625, CrossEntropy: 0.002414367627352476, Accuracy: 0.9992207480818415\n",
      "EVALUATION with last weights -> Loss: 13838934.0, CrossEntropy: 2.210899591445923, Accuracy: 0.8240704113924051\n",
      "Elapsed time for the training: 12.836386680603027\n",
      "Iter 1394 / 2000, Loss: 128374553.40625, CrossEntropy: 0.0025140398647636175, Accuracy: 0.9992407289002557\n",
      "EVALUATION with last weights -> Loss: 14132048.0, CrossEntropy: 2.239250659942627, Accuracy: 0.819620253164557\n",
      "Elapsed time for the training: 12.955743789672852\n",
      "Iter 1395 / 2000, Loss: 129305603.1875, CrossEntropy: 0.002884063171222806, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 14915798.0, CrossEntropy: 2.335737943649292, Accuracy: 0.8120055379746836\n",
      "Elapsed time for the training: 12.851723432540894\n",
      "Iter 1396 / 2000, Loss: 128733015.875, CrossEntropy: 0.0027355141937732697, Accuracy: 0.9991687979539642\n",
      "EVALUATION with last weights -> Loss: 15887514.0, CrossEntropy: 2.5330119132995605, Accuracy: 0.7984572784810127\n",
      "Elapsed time for the training: 12.910243034362793\n",
      "Iter 1397 / 2000, Loss: 129448133.65625, CrossEntropy: 0.002954379888251424, Accuracy: 0.9992687020460358\n",
      "EVALUATION with last weights -> Loss: 13674747.0, CrossEntropy: 2.18092942237854, Accuracy: 0.8232792721518988\n",
      "Elapsed time for the training: 12.917220830917358\n",
      "Iter 1398 / 2000, Loss: 128088453.8125, CrossEntropy: 0.002391411690041423, Accuracy: 0.9991408248081841\n",
      "EVALUATION with last weights -> Loss: 13066353.0, CrossEntropy: 2.0486702919006348, Accuracy: 0.8303006329113924\n",
      "Elapsed time for the training: 12.870516061782837\n",
      "Iter 1399 / 2000, Loss: 129477327.71875, CrossEntropy: 0.00296102580614388, Accuracy: 0.9990688938618926\n",
      "EVALUATION with last weights -> Loss: 13638159.0, CrossEntropy: 2.182823419570923, Accuracy: 0.8234770569620253\n",
      "Elapsed time for the training: 12.883110761642456\n",
      "Iter 1400 / 2000, Loss: 128519502.78125, CrossEntropy: 0.0025591780431568623, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 13290971.0, CrossEntropy: 2.081474542617798, Accuracy: 0.8268393987341772\n",
      "Elapsed time for the training: 12.889850378036499\n",
      "Iter 1401 / 2000, Loss: 128233381.59375, CrossEntropy: 0.002443823264911771, Accuracy: 0.9991408248081841\n",
      "EVALUATION with last weights -> Loss: 13721936.0, CrossEntropy: 2.2103583812713623, Accuracy: 0.8232792721518988\n",
      "Elapsed time for the training: 12.918158054351807\n",
      "Iter 1402 / 2000, Loss: 127155873.65625, CrossEntropy: 0.0020100565161556005, Accuracy: 0.9992806905370843\n",
      "EVALUATION with last weights -> Loss: 14439721.0, CrossEntropy: 2.3033061027526855, Accuracy: 0.8160601265822784\n",
      "Elapsed time for the training: 12.917491912841797\n",
      "Iter 1403 / 2000, Loss: 128479546.5, CrossEntropy: 0.0025371212977916002, Accuracy: 0.9992007672634271\n",
      "EVALUATION with last weights -> Loss: 14131119.0, CrossEntropy: 2.247579336166382, Accuracy: 0.8239715189873418\n",
      "Elapsed time for the training: 12.907902479171753\n",
      "Iter 1404 / 2000, Loss: 129717583.09375, CrossEntropy: 0.0030298116616904736, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 13217779.0, CrossEntropy: 2.0843045711517334, Accuracy: 0.8265427215189873\n",
      "Elapsed time for the training: 12.91019058227539\n",
      "Iter 1405 / 2000, Loss: 127822013.09375, CrossEntropy: 0.0022707628086209297, Accuracy: 0.9992007672634271\n",
      "EVALUATION with last weights -> Loss: 13392621.0, CrossEntropy: 2.0926220417022705, Accuracy: 0.8323773734177216\n",
      "Elapsed time for the training: 12.970843076705933\n",
      "Iter 1406 / 2000, Loss: 128533280.84375, CrossEntropy: 0.0025526294484734535, Accuracy: 0.9992607097186701\n",
      "EVALUATION with last weights -> Loss: 14847378.0, CrossEntropy: 2.336085557937622, Accuracy: 0.8216969936708861\n",
      "Elapsed time for the training: 12.91347622871399\n",
      "Iter 1407 / 2000, Loss: 132055117.0, CrossEntropy: 0.0039730374701321125, Accuracy: 0.9990489130434783\n",
      "EVALUATION with last weights -> Loss: 13920271.0, CrossEntropy: 2.2183775901794434, Accuracy: 0.8194224683544303\n",
      "Elapsed time for the training: 12.912368059158325\n",
      "Iter 1408 / 2000, Loss: 127375601.46875, CrossEntropy: 0.002085659420117736, Accuracy: 0.9992007672634271\n",
      "EVALUATION with last weights -> Loss: 13547234.0, CrossEntropy: 2.125760078430176, Accuracy: 0.8268393987341772\n",
      "Elapsed time for the training: 12.949402332305908\n",
      "Iter 1409 / 2000, Loss: 130043231.5625, CrossEntropy: 0.003149323631078005, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 13552954.0, CrossEntropy: 2.126899480819702, Accuracy: 0.8278283227848101\n",
      "Elapsed time for the training: 12.883998394012451\n",
      "Iter 1410 / 2000, Loss: 130996532.21875, CrossEntropy: 0.0035275735426694155, Accuracy: 0.9988610933503836\n",
      "EVALUATION with last weights -> Loss: 13458011.0, CrossEntropy: 2.1123135089874268, Accuracy: 0.8249604430379747\n",
      "Elapsed time for the training: 12.9133141040802\n",
      "Iter 1411 / 2000, Loss: 129862880.5625, CrossEntropy: 0.0030722019728273153, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 13792519.0, CrossEntropy: 2.2035040855407715, Accuracy: 0.8216969936708861\n",
      "Elapsed time for the training: 12.973065614700317\n",
      "Iter 1412 / 2000, Loss: 127063554.34375, CrossEntropy: 0.0019513838924467564, Accuracy: 0.9992607097186701\n",
      "EVALUATION with last weights -> Loss: 13558639.0, CrossEntropy: 2.1443722248077393, Accuracy: 0.8286194620253164\n",
      "Elapsed time for the training: 13.848095655441284\n",
      "Iter 1413 / 2000, Loss: 129876902.15625, CrossEntropy: 0.003075930057093501, Accuracy: 0.9991608056265985\n",
      "EVALUATION with last weights -> Loss: 13789615.0, CrossEntropy: 2.1731860637664795, Accuracy: 0.8211036392405063\n",
      "Elapsed time for the training: 14.826135635375977\n",
      "Iter 1414 / 2000, Loss: 130450599.09375, CrossEntropy: 0.0033264479134231806, Accuracy: 0.9990089514066497\n",
      "EVALUATION with last weights -> Loss: 14313971.0, CrossEntropy: 2.236562490463257, Accuracy: 0.8231803797468354\n",
      "Elapsed time for the training: 12.932978630065918\n",
      "Iter 1415 / 2000, Loss: 130572687.75, CrossEntropy: 0.0033480371348559856, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 13622559.0, CrossEntropy: 2.1400856971740723, Accuracy: 0.8268393987341772\n",
      "Elapsed time for the training: 12.89579701423645\n",
      "Iter 1416 / 2000, Loss: 128468292.21875, CrossEntropy: 0.002504754113033414, Accuracy: 0.9990409207161125\n",
      "EVALUATION with last weights -> Loss: 13628147.0, CrossEntropy: 2.1772680282592773, Accuracy: 0.819620253164557\n",
      "Elapsed time for the training: 12.930330276489258\n",
      "Iter 1417 / 2000, Loss: 127897316.125, CrossEntropy: 0.0022743581794202328, Accuracy: 0.9993006713554987\n",
      "EVALUATION with last weights -> Loss: 14621904.0, CrossEntropy: 2.372009038925171, Accuracy: 0.8187302215189873\n",
      "Elapsed time for the training: 13.107669115066528\n",
      "Iter 1418 / 2000, Loss: 129581423.46875, CrossEntropy: 0.002956786658614874, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 13664969.0, CrossEntropy: 2.1552727222442627, Accuracy: 0.8277294303797469\n",
      "Elapsed time for the training: 13.000385761260986\n",
      "Iter 1419 / 2000, Loss: 128588282.34375, CrossEntropy: 0.002547001000493765, Accuracy: 0.9991608056265985\n",
      "EVALUATION with last weights -> Loss: 13710740.0, CrossEntropy: 2.143261432647705, Accuracy: 0.8245648734177216\n",
      "Elapsed time for the training: 13.115979194641113\n",
      "Iter 1420 / 2000, Loss: 129401687.84375, CrossEntropy: 0.0028691566549241543, Accuracy: 0.9991408248081841\n",
      "EVALUATION with last weights -> Loss: 13659146.0, CrossEntropy: 2.1519877910614014, Accuracy: 0.8275316455696202\n",
      "Elapsed time for the training: 12.983989000320435\n",
      "Iter 1421 / 2000, Loss: 127047312.5625, CrossEntropy: 0.0019633725751191378, Accuracy: 0.9993486253196932\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with last weights -> Loss: 15157163.0, CrossEntropy: 2.3951520919799805, Accuracy: 0.8116099683544303\n",
      "Elapsed time for the training: 12.187703371047974\n",
      "Iter 1422 / 2000, Loss: 127634985.65625, CrossEntropy: 0.002162700053304434, Accuracy: 0.9993406329923273\n",
      "EVALUATION with last weights -> Loss: 13902521.0, CrossEntropy: 2.1828184127807617, Accuracy: 0.8275316455696202\n",
      "Elapsed time for the training: 11.816808462142944\n",
      "Iter 1423 / 2000, Loss: 129172881.125, CrossEntropy: 0.002774727065116167, Accuracy: 0.9991807864450127\n",
      "EVALUATION with last weights -> Loss: 14151321.0, CrossEntropy: 2.211144208908081, Accuracy: 0.8230814873417721\n",
      "Elapsed time for the training: 12.70325779914856\n",
      "Iter 1424 / 2000, Loss: 128132726.5, CrossEntropy: 0.002356837037950754, Accuracy: 0.9993006713554987\n",
      "EVALUATION with last weights -> Loss: 13847761.0, CrossEntropy: 2.183619976043701, Accuracy: 0.8229825949367089\n",
      "Elapsed time for the training: 12.916258573532104\n",
      "Iter 1425 / 2000, Loss: 128171201.6875, CrossEntropy: 0.0023712499532848597, Accuracy: 0.9992207480818415\n",
      "EVALUATION with last weights -> Loss: 13861770.0, CrossEntropy: 2.189470052719116, Accuracy: 0.8236748417721519\n",
      "Elapsed time for the training: 12.942455768585205\n",
      "Iter 1426 / 2000, Loss: 129243320.59375, CrossEntropy: 0.002797165885567665, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 14508462.0, CrossEntropy: 2.292588710784912, Accuracy: 0.8146756329113924\n",
      "Elapsed time for the training: 12.980751991271973\n",
      "Iter 1427 / 2000, Loss: 129968808.4375, CrossEntropy: 0.003093759063631296, Accuracy: 0.9988690856777493\n",
      "EVALUATION with last weights -> Loss: 14385758.0, CrossEntropy: 2.2645347118377686, Accuracy: 0.8232792721518988\n",
      "Elapsed time for the training: 12.911158561706543\n",
      "Iter 1428 / 2000, Loss: 130821549.5, CrossEntropy: 0.0034221825189888477, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 13774132.0, CrossEntropy: 2.187239646911621, Accuracy: 0.8259493670886076\n",
      "Elapsed time for the training: 12.88631296157837\n",
      "Iter 1429 / 2000, Loss: 128142572.5, CrossEntropy: 0.0023509811144322157, Accuracy: 0.9994205562659847\n",
      "EVALUATION with last weights -> Loss: 13761938.0, CrossEntropy: 2.1943061351776123, Accuracy: 0.8263449367088608\n",
      "Elapsed time for the training: 12.908354997634888\n",
      "Iter 1430 / 2000, Loss: 127555277.375, CrossEntropy: 0.0021138121373951435, Accuracy: 0.9993006713554987\n",
      "EVALUATION with last weights -> Loss: 13684035.0, CrossEntropy: 2.1680760383605957, Accuracy: 0.8280261075949367\n",
      "Elapsed time for the training: 12.93714165687561\n",
      "Iter 1431 / 2000, Loss: 128362936.40625, CrossEntropy: 0.002435270231217146, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 13968674.0, CrossEntropy: 2.196617603302002, Accuracy: 0.8238726265822784\n",
      "Elapsed time for the training: 12.886327981948853\n",
      "Iter 1432 / 2000, Loss: 127887837.9375, CrossEntropy: 0.0022433556150645018, Accuracy: 0.9992407289002557\n",
      "EVALUATION with last weights -> Loss: 13641866.0, CrossEntropy: 2.135357618331909, Accuracy: 0.8253560126582279\n",
      "Elapsed time for the training: 12.868058204650879\n",
      "Iter 1433 / 2000, Loss: 129618074.5625, CrossEntropy: 0.0029324686620384455, Accuracy: 0.9991208439897699\n",
      "EVALUATION with last weights -> Loss: 13821826.0, CrossEntropy: 2.165493965148926, Accuracy: 0.8213014240506329\n",
      "Elapsed time for the training: 12.84640908241272\n",
      "Iter 1434 / 2000, Loss: 129717655.28125, CrossEntropy: 0.002969340654090047, Accuracy: 0.9991807864450127\n",
      "EVALUATION with last weights -> Loss: 13911428.0, CrossEntropy: 2.1928532123565674, Accuracy: 0.8269382911392406\n",
      "Elapsed time for the training: 12.825902462005615\n",
      "Iter 1435 / 2000, Loss: 128468633.125, CrossEntropy: 0.002467534737661481, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 13505596.0, CrossEntropy: 2.1253161430358887, Accuracy: 0.8286194620253164\n",
      "Elapsed time for the training: 12.863409042358398\n",
      "Iter 1436 / 2000, Loss: 130412474.5625, CrossEntropy: 0.003243622137233615, Accuracy: 0.9990009590792839\n",
      "EVALUATION with last weights -> Loss: 13707804.0, CrossEntropy: 2.1522703170776367, Accuracy: 0.8278283227848101\n",
      "Elapsed time for the training: 12.871882200241089\n",
      "Iter 1437 / 2000, Loss: 128716187.28125, CrossEntropy: 0.002561954315751791, Accuracy: 0.9992607097186701\n",
      "EVALUATION with last weights -> Loss: 13308736.0, CrossEntropy: 2.0910284519195557, Accuracy: 0.8276305379746836\n",
      "Elapsed time for the training: 12.904869556427002\n",
      "Iter 1438 / 2000, Loss: 131544079.71875, CrossEntropy: 0.0036893023643642664, Accuracy: 0.9989410166240409\n",
      "EVALUATION with last weights -> Loss: 13543040.0, CrossEntropy: 2.143136978149414, Accuracy: 0.8212025316455697\n",
      "Elapsed time for the training: 12.877323865890503\n",
      "Iter 1439 / 2000, Loss: 129751162.78125, CrossEntropy: 0.002970079192891717, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 13961826.0, CrossEntropy: 2.2002670764923096, Accuracy: 0.8223892405063291\n",
      "Elapsed time for the training: 12.89200472831726\n",
      "Iter 1440 / 2000, Loss: 128771060.8125, CrossEntropy: 0.0025765651371330023, Accuracy: 0.9991807864450127\n",
      "EVALUATION with last weights -> Loss: 13688401.0, CrossEntropy: 2.171363353729248, Accuracy: 0.8254549050632911\n",
      "Elapsed time for the training: 12.846489667892456\n",
      "Iter 1441 / 2000, Loss: 129198093.9375, CrossEntropy: 0.002744851401075721, Accuracy: 0.9991208439897699\n",
      "EVALUATION with last weights -> Loss: 13806378.0, CrossEntropy: 2.1660642623901367, Accuracy: 0.8263449367088608\n",
      "Elapsed time for the training: 12.424620628356934\n",
      "Iter 1442 / 2000, Loss: 131737826.65625, CrossEntropy: 0.0037574090529233217, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 13527558.0, CrossEntropy: 2.1269116401672363, Accuracy: 0.819620253164557\n",
      "Elapsed time for the training: 11.767348527908325\n",
      "Iter 1443 / 2000, Loss: 127533279.34375, CrossEntropy: 0.002075623255223036, Accuracy: 0.9993206521739131\n",
      "EVALUATION with last weights -> Loss: 13784461.0, CrossEntropy: 2.2113914489746094, Accuracy: 0.819620253164557\n",
      "Elapsed time for the training: 11.967018127441406\n",
      "Iter 1444 / 2000, Loss: 129638801.03125, CrossEntropy: 0.0029182936996221542, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 13990130.0, CrossEntropy: 2.2143592834472656, Accuracy: 0.8185324367088608\n",
      "Elapsed time for the training: 12.86597728729248\n",
      "Iter 1445 / 2000, Loss: 131870204.65625, CrossEntropy: 0.0038040089420974255, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 13810048.0, CrossEntropy: 2.1777803897857666, Accuracy: 0.8224881329113924\n",
      "Elapsed time for the training: 12.894916534423828\n",
      "Iter 1446 / 2000, Loss: 129960058.875, CrossEntropy: 0.0030388052109628916, Accuracy: 0.9991408248081841\n",
      "EVALUATION with last weights -> Loss: 14051644.0, CrossEntropy: 2.230537176132202, Accuracy: 0.822685917721519\n",
      "Elapsed time for the training: 12.878103494644165\n",
      "Iter 1447 / 2000, Loss: 130146741.0, CrossEntropy: 0.0031244840938597918, Accuracy: 0.9992287404092072\n",
      "EVALUATION with last weights -> Loss: 14732175.0, CrossEntropy: 2.3495593070983887, Accuracy: 0.8174446202531646\n",
      "Elapsed time for the training: 12.907686233520508\n",
      "Iter 1448 / 2000, Loss: 128315583.75, CrossEntropy: 0.002376219490543008, Accuracy: 0.9992607097186701\n",
      "EVALUATION with last weights -> Loss: 14401791.0, CrossEntropy: 2.2555508613586426, Accuracy: 0.8225870253164557\n",
      "Elapsed time for the training: 12.888458728790283\n",
      "Iter 1449 / 2000, Loss: 129293076.1875, CrossEntropy: 0.0027653491124510765, Accuracy: 0.9991208439897699\n",
      "EVALUATION with last weights -> Loss: 13764616.0, CrossEntropy: 2.1586568355560303, Accuracy: 0.8253560126582279\n",
      "Elapsed time for the training: 12.644438028335571\n",
      "Iter 1450 / 2000, Loss: 132375798.9375, CrossEntropy: 0.00399414449930191, Accuracy: 0.9989010549872123\n",
      "EVALUATION with last weights -> Loss: 14114133.0, CrossEntropy: 2.2187416553497314, Accuracy: 0.8235759493670886\n",
      "Elapsed time for the training: 11.80833387374878\n",
      "Iter 1451 / 2000, Loss: 128989542.625, CrossEntropy: 0.002637235913425684, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 13734386.0, CrossEntropy: 2.1582658290863037, Accuracy: 0.825751582278481\n",
      "Elapsed time for the training: 11.785260200500488\n",
      "Iter 1452 / 2000, Loss: 127012463.53125, CrossEntropy: 0.0018462433945387602, Accuracy: 0.9993206521739131\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with last weights -> Loss: 13467080.0, CrossEntropy: 2.1158838272094727, Accuracy: 0.8240704113924051\n",
      "Elapsed time for the training: 13.023595333099365\n",
      "Iter 1453 / 2000, Loss: 129062395.25, CrossEntropy: 0.0026628165505826473, Accuracy: 0.9992806905370843\n",
      "EVALUATION with last weights -> Loss: 13689024.0, CrossEntropy: 2.1534745693206787, Accuracy: 0.8283227848101266\n",
      "Elapsed time for the training: 12.939908027648926\n",
      "Iter 1454 / 2000, Loss: 133410306.03125, CrossEntropy: 0.0043970756232738495, Accuracy: 0.9987811700767263\n",
      "EVALUATION with last weights -> Loss: 13624991.0, CrossEntropy: 2.148502826690674, Accuracy: 0.8254549050632911\n",
      "Elapsed time for the training: 12.834998369216919\n",
      "Iter 1455 / 2000, Loss: 127529899.875, CrossEntropy: 0.002044870750978589, Accuracy: 0.9993406329923273\n",
      "EVALUATION with last weights -> Loss: 13458072.0, CrossEntropy: 2.11696457862854, Accuracy: 0.8287183544303798\n",
      "Elapsed time for the training: 12.923258781433105\n",
      "Iter 1456 / 2000, Loss: 129054451.40625, CrossEntropy: 0.0026530157774686813, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 13257603.0, CrossEntropy: 2.1044435501098633, Accuracy: 0.827432753164557\n",
      "Elapsed time for the training: 12.885743618011475\n",
      "Iter 1457 / 2000, Loss: 131604896.0625, CrossEntropy: 0.0036689776461571455, Accuracy: 0.998821131713555\n",
      "EVALUATION with last weights -> Loss: 13245318.0, CrossEntropy: 2.0779452323913574, Accuracy: 0.8251582278481012\n",
      "Elapsed time for the training: 12.87395715713501\n",
      "Iter 1458 / 2000, Loss: 130198573.125, CrossEntropy: 0.003104609902948141, Accuracy: 0.9991608056265985\n",
      "EVALUATION with last weights -> Loss: 13694660.0, CrossEntropy: 2.1593899726867676, Accuracy: 0.8254549050632911\n",
      "Elapsed time for the training: 12.89193868637085\n",
      "Iter 1459 / 2000, Loss: 129185665.28125, CrossEntropy: 0.002715360838919878, Accuracy: 0.9991088554987213\n",
      "EVALUATION with last weights -> Loss: 13946995.0, CrossEntropy: 2.179511308670044, Accuracy: 0.8177412974683544\n",
      "Elapsed time for the training: 12.87829065322876\n",
      "Iter 1460 / 2000, Loss: 130214718.1875, CrossEntropy: 0.003105750074610114, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 14019113.0, CrossEntropy: 2.224041700363159, Accuracy: 0.8220925632911392\n",
      "Elapsed time for the training: 12.877633094787598\n",
      "Iter 1461 / 2000, Loss: 127122673.75, CrossEntropy: 0.0018678278429433703, Accuracy: 0.9994605179028133\n",
      "EVALUATION with last weights -> Loss: 13730894.0, CrossEntropy: 2.154315710067749, Accuracy: 0.8265427215189873\n",
      "Elapsed time for the training: 12.905438423156738\n",
      "Iter 1462 / 2000, Loss: 130532307.625, CrossEntropy: 0.0032289528753608465, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 14088521.0, CrossEntropy: 2.2050981521606445, Accuracy: 0.8187302215189873\n",
      "Elapsed time for the training: 12.88968300819397\n",
      "Iter 1463 / 2000, Loss: 129714858.03125, CrossEntropy: 0.002901777857914567, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 13422024.0, CrossEntropy: 2.1110470294952393, Accuracy: 0.8253560126582279\n",
      "Elapsed time for the training: 12.878867864608765\n",
      "Iter 1464 / 2000, Loss: 128949080.15625, CrossEntropy: 0.0025924581568688154, Accuracy: 0.9992407289002557\n",
      "EVALUATION with last weights -> Loss: 14281712.0, CrossEntropy: 2.268228054046631, Accuracy: 0.8217958860759493\n",
      "Elapsed time for the training: 12.89717984199524\n",
      "Iter 1465 / 2000, Loss: 129714217.1875, CrossEntropy: 0.0028951140120625496, Accuracy: 0.9991408248081841\n",
      "EVALUATION with last weights -> Loss: 13984331.0, CrossEntropy: 2.2190029621124268, Accuracy: 0.8111155063291139\n",
      "Elapsed time for the training: 12.870591402053833\n",
      "Iter 1466 / 2000, Loss: 127720190.875, CrossEntropy: 0.0020963535644114017, Accuracy: 0.9992607097186701\n",
      "EVALUATION with last weights -> Loss: 13362907.0, CrossEntropy: 2.0907444953918457, Accuracy: 0.8236748417721519\n",
      "Elapsed time for the training: 12.87077522277832\n",
      "Iter 1467 / 2000, Loss: 130820042.0625, CrossEntropy: 0.0033325895201414824, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 13988953.0, CrossEntropy: 2.2410106658935547, Accuracy: 0.8238726265822784\n",
      "Elapsed time for the training: 12.916807889938354\n",
      "Iter 1468 / 2000, Loss: 127870704.59375, CrossEntropy: 0.0021515723783522844, Accuracy: 0.9993606138107417\n",
      "EVALUATION with last weights -> Loss: 13949735.0, CrossEntropy: 2.1996912956237793, Accuracy: 0.8243670886075949\n",
      "Elapsed time for the training: 12.91645860671997\n",
      "Iter 1469 / 2000, Loss: 129952732.34375, CrossEntropy: 0.002982111880555749, Accuracy: 0.9989010549872123\n",
      "EVALUATION with last weights -> Loss: 13770804.0, CrossEntropy: 2.1563377380371094, Accuracy: 0.8256526898734177\n",
      "Elapsed time for the training: 13.69101357460022\n",
      "Iter 1470 / 2000, Loss: 129620304.15625, CrossEntropy: 0.0028483497444540262, Accuracy: 0.9991608056265985\n",
      "EVALUATION with last weights -> Loss: 14064164.0, CrossEntropy: 2.204685926437378, Accuracy: 0.8197191455696202\n",
      "Elapsed time for the training: 12.883042573928833\n",
      "Iter 1471 / 2000, Loss: 128933893.5, CrossEntropy: 0.002589424839243293, Accuracy: 0.9991687979539642\n",
      "EVALUATION with last weights -> Loss: 13670627.0, CrossEntropy: 2.149660348892212, Accuracy: 0.8232792721518988\n",
      "Elapsed time for the training: 12.85062551498413\n",
      "Iter 1472 / 2000, Loss: 132641376.96875, CrossEntropy: 0.0040483600459992886, Accuracy: 0.998821131713555\n",
      "EVALUATION with last weights -> Loss: 13934349.0, CrossEntropy: 2.1866817474365234, Accuracy: 0.8211036392405063\n",
      "Elapsed time for the training: 12.807414054870605\n",
      "Iter 1473 / 2000, Loss: 129144676.46875, CrossEntropy: 0.0026486883871257305, Accuracy: 0.9991608056265985\n",
      "EVALUATION with last weights -> Loss: 13321555.0, CrossEntropy: 2.1237123012542725, Accuracy: 0.8277294303797469\n",
      "Elapsed time for the training: 12.845588684082031\n",
      "Iter 1474 / 2000, Loss: 128755550.65625, CrossEntropy: 0.0024917598348110914, Accuracy: 0.9992407289002557\n",
      "EVALUATION with last weights -> Loss: 13299249.0, CrossEntropy: 2.1088085174560547, Accuracy: 0.8255537974683544\n",
      "Elapsed time for the training: 13.070409774780273\n",
      "Iter 1475 / 2000, Loss: 130239055.90625, CrossEntropy: 0.0030825957655906677, Accuracy: 0.9990009590792839\n",
      "EVALUATION with last weights -> Loss: 14015187.0, CrossEntropy: 2.197849988937378, Accuracy: 0.8224881329113924\n",
      "Elapsed time for the training: 14.511728525161743\n",
      "Iter 1476 / 2000, Loss: 129024493.03125, CrossEntropy: 0.002595412079244852, Accuracy: 0.9992207480818415\n",
      "EVALUATION with last weights -> Loss: 14971336.0, CrossEntropy: 2.359278678894043, Accuracy: 0.8152689873417721\n",
      "Elapsed time for the training: 15.607908248901367\n",
      "Iter 1477 / 2000, Loss: 128760561.25, CrossEntropy: 0.0024878268595784903, Accuracy: 0.9992607097186701\n",
      "EVALUATION with last weights -> Loss: 13473298.0, CrossEntropy: 2.115463972091675, Accuracy: 0.8216969936708861\n",
      "Elapsed time for the training: 15.451002836227417\n",
      "Iter 1478 / 2000, Loss: 127884382.5, CrossEntropy: 0.002136597177013755, Accuracy: 0.9992607097186701\n",
      "EVALUATION with last weights -> Loss: 14236862.0, CrossEntropy: 2.2324397563934326, Accuracy: 0.8221914556962026\n",
      "Elapsed time for the training: 15.562100887298584\n",
      "Iter 1479 / 2000, Loss: 129351417.65625, CrossEntropy: 0.0027206779923290014, Accuracy: 0.9989410166240409\n",
      "EVALUATION with last weights -> Loss: 13505678.0, CrossEntropy: 2.150052785873413, Accuracy: 0.8235759493670886\n",
      "Elapsed time for the training: 15.592972755432129\n",
      "Iter 1480 / 2000, Loss: 127794161.84375, CrossEntropy: 0.002099091885611415, Accuracy: 0.999380594629156\n",
      "EVALUATION with last weights -> Loss: 13851942.0, CrossEntropy: 2.1858394145965576, Accuracy: 0.8197191455696202\n",
      "Elapsed time for the training: 15.611696481704712\n",
      "Iter 1481 / 2000, Loss: 130640013.40625, CrossEntropy: 0.003231772920116782, Accuracy: 0.9991408248081841\n",
      "EVALUATION with last weights -> Loss: 13417564.0, CrossEntropy: 2.0981802940368652, Accuracy: 0.8243670886075949\n",
      "Elapsed time for the training: 12.889189958572388\n",
      "Iter 1482 / 2000, Loss: 130752746.84375, CrossEntropy: 0.003274646820500493, Accuracy: 0.9989010549872123\n",
      "EVALUATION with last weights -> Loss: 13903211.0, CrossEntropy: 2.187044143676758, Accuracy: 0.8156645569620253\n",
      "Elapsed time for the training: 12.919975280761719\n",
      "Iter 1483 / 2000, Loss: 131402683.59375, CrossEntropy: 0.003531423630192876, Accuracy: 0.9990009590792839\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with last weights -> Loss: 14057280.0, CrossEntropy: 2.196448564529419, Accuracy: 0.825751582278481\n",
      "Elapsed time for the training: 12.89873218536377\n",
      "Iter 1484 / 2000, Loss: 132140885.03125, CrossEntropy: 0.0038235634565353394, Accuracy: 0.9990409207161125\n",
      "EVALUATION with last weights -> Loss: 13746602.0, CrossEntropy: 2.1817848682403564, Accuracy: 0.8249604430379747\n",
      "Elapsed time for the training: 12.946929931640625\n",
      "Iter 1485 / 2000, Loss: 127478923.625, CrossEntropy: 0.0019838670268654823, Accuracy: 0.9993286445012788\n",
      "EVALUATION with last weights -> Loss: 14336047.0, CrossEntropy: 2.2590255737304688, Accuracy: 0.8189280063291139\n",
      "Elapsed time for the training: 12.931639671325684\n",
      "Iter 1486 / 2000, Loss: 129545047.125, CrossEntropy: 0.0027823878917843103, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 13845213.0, CrossEntropy: 2.181316614151001, Accuracy: 0.8249604430379747\n",
      "Elapsed time for the training: 12.916078329086304\n",
      "Iter 1487 / 2000, Loss: 130501040.6875, CrossEntropy: 0.0031619486398994923, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 13891161.0, CrossEntropy: 2.2082326412200928, Accuracy: 0.8203125\n",
      "Elapsed time for the training: 12.908985137939453\n",
      "Iter 1488 / 2000, Loss: 130931806.375, CrossEntropy: 0.0033324265386909246, Accuracy: 0.9990409207161125\n",
      "EVALUATION with last weights -> Loss: 13908305.0, CrossEntropy: 2.217425584793091, Accuracy: 0.8206091772151899\n",
      "Elapsed time for the training: 12.904760837554932\n",
      "Iter 1489 / 2000, Loss: 127303400.65625, CrossEntropy: 0.0018803334096446633, Accuracy: 0.9993206521739131\n",
      "EVALUATION with last weights -> Loss: 13915514.0, CrossEntropy: 2.194849967956543, Accuracy: 0.8276305379746836\n",
      "Elapsed time for the training: 12.812827825546265\n",
      "Iter 1490 / 2000, Loss: 129491837.4375, CrossEntropy: 0.00275254063308239, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 13837408.0, CrossEntropy: 2.1997368335723877, Accuracy: 0.8193235759493671\n",
      "Elapsed time for the training: 12.909278154373169\n",
      "Iter 1491 / 2000, Loss: 127787654.46875, CrossEntropy: 0.0020697240252047777, Accuracy: 0.9993406329923273\n",
      "EVALUATION with last weights -> Loss: 14346086.0, CrossEntropy: 2.344451427459717, Accuracy: 0.8166534810126582\n",
      "Elapsed time for the training: 12.903236627578735\n",
      "Iter 1492 / 2000, Loss: 132795380.40625, CrossEntropy: 0.004068602807819843, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 13629785.0, CrossEntropy: 2.1509854793548584, Accuracy: 0.8230814873417721\n",
      "Elapsed time for the training: 12.916917324066162\n",
      "Iter 1493 / 2000, Loss: 129896725.8125, CrossEntropy: 0.0029141027480363846, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 14297991.0, CrossEntropy: 2.236999034881592, Accuracy: 0.8270371835443038\n",
      "Elapsed time for the training: 12.885432004928589\n",
      "Iter 1494 / 2000, Loss: 126959677.09375, CrossEntropy: 0.0017514927312731743, Accuracy: 0.9994884910485934\n",
      "EVALUATION with last weights -> Loss: 14350902.0, CrossEntropy: 2.250218152999878, Accuracy: 0.8208069620253164\n",
      "Elapsed time for the training: 12.910501480102539\n",
      "Iter 1495 / 2000, Loss: 130755313.46875, CrossEntropy: 0.0032511597964912653, Accuracy: 0.9990409207161125\n",
      "EVALUATION with last weights -> Loss: 13712465.0, CrossEntropy: 2.142577886581421, Accuracy: 0.8261471518987342\n",
      "Elapsed time for the training: 12.88875675201416\n",
      "Iter 1496 / 2000, Loss: 129688802.34375, CrossEntropy: 0.0028202359098941088, Accuracy: 0.9990009590792839\n",
      "EVALUATION with last weights -> Loss: 13585666.0, CrossEntropy: 2.1690244674682617, Accuracy: 0.8198180379746836\n",
      "Elapsed time for the training: 13.048943758010864\n",
      "Iter 1497 / 2000, Loss: 128748838.78125, CrossEntropy: 0.0024455292150378227, Accuracy: 0.9992407289002557\n",
      "EVALUATION with last weights -> Loss: 13987336.0, CrossEntropy: 2.223515272140503, Accuracy: 0.8202136075949367\n",
      "Elapsed time for the training: 12.934806823730469\n",
      "Iter 1498 / 2000, Loss: 129665557.6875, CrossEntropy: 0.0028062905184924603, Accuracy: 0.9992207480818415\n",
      "EVALUATION with last weights -> Loss: 14180045.0, CrossEntropy: 2.2781078815460205, Accuracy: 0.8213014240506329\n",
      "Elapsed time for the training: 13.544338703155518\n",
      "Iter 1499 / 2000, Loss: 127679084.90625, CrossEntropy: 0.0020108141470700502, Accuracy: 0.9994205562659847\n",
      "EVALUATION with last weights -> Loss: 13797893.0, CrossEntropy: 2.202901601791382, Accuracy: 0.8215981012658228\n",
      "Elapsed time for the training: 13.273010015487671\n",
      "Iter 1500 / 2000, Loss: 130450666.1875, CrossEntropy: 0.0031172679737210274, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 13974018.0, CrossEntropy: 2.2302823066711426, Accuracy: 0.8211036392405063\n",
      "Elapsed time for the training: 12.858240604400635\n",
      "Iter 1501 / 2000, Loss: 128819544.40625, CrossEntropy: 0.002463369397446513, Accuracy: 0.9993206521739131\n",
      "EVALUATION with last weights -> Loss: 13262121.0, CrossEntropy: 2.097291946411133, Accuracy: 0.8283227848101266\n",
      "Elapsed time for the training: 12.859217166900635\n",
      "Iter 1502 / 2000, Loss: 127667546.5625, CrossEntropy: 0.0020057400688529015, Accuracy: 0.9994005754475703\n",
      "EVALUATION with last weights -> Loss: 15284757.0, CrossEntropy: 2.4202237129211426, Accuracy: 0.8105221518987342\n",
      "Elapsed time for the training: 12.92803168296814\n",
      "Iter 1503 / 2000, Loss: 130897091.125, CrossEntropy: 0.00329301948659122, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 14381573.0, CrossEntropy: 2.270153760910034, Accuracy: 0.8223892405063291\n",
      "Elapsed time for the training: 12.918749570846558\n",
      "Iter 1504 / 2000, Loss: 130302743.59375, CrossEntropy: 0.0030536979902535677, Accuracy: 0.9992806905370843\n",
      "EVALUATION with last weights -> Loss: 14262107.0, CrossEntropy: 2.228548526763916, Accuracy: 0.8232792721518988\n",
      "Elapsed time for the training: 12.977541208267212\n",
      "Iter 1505 / 2000, Loss: 128322999.59375, CrossEntropy: 0.0022628086153417826, Accuracy: 0.9992207480818415\n",
      "EVALUATION with last weights -> Loss: 13398484.0, CrossEntropy: 2.11751389503479, Accuracy: 0.8260482594936709\n",
      "Elapsed time for the training: 12.991309881210327\n",
      "Iter 1506 / 2000, Loss: 130237868.53125, CrossEntropy: 0.003035975620150566, Accuracy: 0.999028932225064\n",
      "EVALUATION with last weights -> Loss: 14315992.0, CrossEntropy: 2.2435991764068604, Accuracy: 0.8246637658227848\n",
      "Elapsed time for the training: 12.977960109710693\n",
      "Iter 1507 / 2000, Loss: 132244496.875, CrossEntropy: 0.003822967177256942, Accuracy: 0.9989609974424553\n",
      "EVALUATION with last weights -> Loss: 13729603.0, CrossEntropy: 2.1923747062683105, Accuracy: 0.8241693037974683\n",
      "Elapsed time for the training: 13.508360147476196\n",
      "Iter 1508 / 2000, Loss: 129459054.6875, CrossEntropy: 0.0027075072284787893, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 13933690.0, CrossEntropy: 2.2189133167266846, Accuracy: 0.8238726265822784\n",
      "Elapsed time for the training: 15.637523651123047\n",
      "Iter 1509 / 2000, Loss: 126241380.15625, CrossEntropy: 0.0014238245785236359, Accuracy: 0.9996603260869565\n",
      "EVALUATION with last weights -> Loss: 13910755.0, CrossEntropy: 2.192063093185425, Accuracy: 0.8234770569620253\n",
      "Elapsed time for the training: 15.400465726852417\n",
      "Iter 1510 / 2000, Loss: 129139336.5625, CrossEntropy: 0.0025793814565986395, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 14243309.0, CrossEntropy: 2.2711617946624756, Accuracy: 0.8178401898734177\n",
      "Elapsed time for the training: 15.61184811592102\n",
      "Iter 1511 / 2000, Loss: 130607761.40625, CrossEntropy: 0.0031631251331418753, Accuracy: 0.9991408248081841\n",
      "EVALUATION with last weights -> Loss: 14106798.0, CrossEntropy: 2.2129411697387695, Accuracy: 0.8223892405063291\n",
      "Elapsed time for the training: 13.20832085609436\n",
      "Iter 1512 / 2000, Loss: 130334705.15625, CrossEntropy: 0.003051354782655835, Accuracy: 0.9991608056265985\n",
      "EVALUATION with last weights -> Loss: 13831267.0, CrossEntropy: 2.1791608333587646, Accuracy: 0.8175435126582279\n",
      "Elapsed time for the training: 12.913544654846191\n",
      "Iter 1513 / 2000, Loss: 131122495.9375, CrossEntropy: 0.0033814869821071625, Accuracy: 0.9989290281329923\n",
      "EVALUATION with last weights -> Loss: 14218498.0, CrossEntropy: 2.2270069122314453, Accuracy: 0.8165545886075949\n",
      "Elapsed time for the training: 12.888324499130249\n",
      "Iter 1514 / 2000, Loss: 131479787.9375, CrossEntropy: 0.003504330525174737, Accuracy: 0.9989609974424553\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with last weights -> Loss: 13918613.0, CrossEntropy: 2.193979501724243, Accuracy: 0.8231803797468354\n",
      "Elapsed time for the training: 12.96157431602478\n",
      "Iter 1515 / 2000, Loss: 127578906.46875, CrossEntropy: 0.0019450285471975803, Accuracy: 0.9993406329923273\n",
      "EVALUATION with last weights -> Loss: 13836672.0, CrossEntropy: 2.1917386054992676, Accuracy: 0.823378164556962\n",
      "Elapsed time for the training: 12.89678955078125\n",
      "Iter 1516 / 2000, Loss: 131467442.6875, CrossEntropy: 0.0034955639857798815, Accuracy: 0.9988610933503836\n",
      "EVALUATION with last weights -> Loss: 13862107.0, CrossEntropy: 2.2079057693481445, Accuracy: 0.8240704113924051\n",
      "Elapsed time for the training: 12.887868642807007\n",
      "Iter 1517 / 2000, Loss: 129353422.46875, CrossEntropy: 0.002648336114361882, Accuracy: 0.9992207480818415\n",
      "EVALUATION with last weights -> Loss: 14199245.0, CrossEntropy: 2.245682954788208, Accuracy: 0.8254549050632911\n",
      "Elapsed time for the training: 12.884302854537964\n",
      "Iter 1518 / 2000, Loss: 129760918.25, CrossEntropy: 0.002809016266837716, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 14618647.0, CrossEntropy: 2.340672016143799, Accuracy: 0.8185324367088608\n",
      "Elapsed time for the training: 12.898844242095947\n",
      "Iter 1519 / 2000, Loss: 130490690.25, CrossEntropy: 0.0030986187048256397, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 14190552.0, CrossEntropy: 2.223098039627075, Accuracy: 0.8227848101265823\n",
      "Elapsed time for the training: 12.86824369430542\n",
      "Iter 1520 / 2000, Loss: 128962359.8125, CrossEntropy: 0.002547426614910364, Accuracy: 0.9991887787723785\n",
      "EVALUATION with last weights -> Loss: 14225360.0, CrossEntropy: 2.2373924255371094, Accuracy: 0.8206091772151899\n",
      "Elapsed time for the training: 12.923675537109375\n",
      "Iter 1521 / 2000, Loss: 128583914.46875, CrossEntropy: 0.002333414275199175, Accuracy: 0.9992207480818415\n",
      "EVALUATION with last weights -> Loss: 14514353.0, CrossEntropy: 2.2846121788024902, Accuracy: 0.8230814873417721\n",
      "Elapsed time for the training: 12.869938373565674\n",
      "Iter 1522 / 2000, Loss: 129824513.6875, CrossEntropy: 0.002827454125508666, Accuracy: 0.9992407289002557\n",
      "EVALUATION with last weights -> Loss: 14085300.0, CrossEntropy: 2.209886312484741, Accuracy: 0.8242681962025317\n",
      "Elapsed time for the training: 12.882102251052856\n",
      "Iter 1523 / 2000, Loss: 129841205.25, CrossEntropy: 0.00283235777169466, Accuracy: 0.9991208439897699\n",
      "EVALUATION with last weights -> Loss: 13940942.0, CrossEntropy: 2.1820030212402344, Accuracy: 0.8260482594936709\n",
      "Elapsed time for the training: 12.917683362960815\n",
      "Iter 1524 / 2000, Loss: 131732329.03125, CrossEntropy: 0.0035852333530783653, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 13961082.0, CrossEntropy: 2.208031177520752, Accuracy: 0.822685917721519\n",
      "Elapsed time for the training: 12.90355110168457\n",
      "Iter 1525 / 2000, Loss: 129064774.625, CrossEntropy: 0.00251672207377851, Accuracy: 0.9991807864450127\n",
      "EVALUATION with last weights -> Loss: 14093173.0, CrossEntropy: 2.25759220123291, Accuracy: 0.822685917721519\n",
      "Elapsed time for the training: 12.841344118118286\n",
      "Iter 1526 / 2000, Loss: 129722224.8125, CrossEntropy: 0.0028261111583560705, Accuracy: 0.999028932225064\n",
      "EVALUATION with last weights -> Loss: 15384405.0, CrossEntropy: 2.4240734577178955, Accuracy: 0.8152689873417721\n",
      "Elapsed time for the training: 12.929518938064575\n",
      "Iter 1527 / 2000, Loss: 130971292.03125, CrossEntropy: 0.0032749169040471315, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 14512812.0, CrossEntropy: 2.2863364219665527, Accuracy: 0.8235759493670886\n",
      "Elapsed time for the training: 12.872850179672241\n",
      "Iter 1528 / 2000, Loss: 128819623.75, CrossEntropy: 0.002412155969068408, Accuracy: 0.9991208439897699\n",
      "EVALUATION with last weights -> Loss: 13593122.0, CrossEntropy: 2.1392576694488525, Accuracy: 0.8283227848101266\n",
      "Elapsed time for the training: 12.902773380279541\n",
      "Iter 1529 / 2000, Loss: 131234767.1875, CrossEntropy: 0.0033768964931368828, Accuracy: 0.9989410166240409\n",
      "EVALUATION with last weights -> Loss: 14060392.0, CrossEntropy: 2.2288429737091064, Accuracy: 0.8194224683544303\n",
      "Elapsed time for the training: 12.902015209197998\n",
      "Iter 1530 / 2000, Loss: 131053185.75, CrossEntropy: 0.0033009457401931286, Accuracy: 0.9992007672634271\n",
      "EVALUATION with last weights -> Loss: 13864688.0, CrossEntropy: 2.2196381092071533, Accuracy: 0.8235759493670886\n",
      "Elapsed time for the training: 12.910687685012817\n",
      "Iter 1531 / 2000, Loss: 129908410.875, CrossEntropy: 0.002841303125023842, Accuracy: 0.9991807864450127\n",
      "EVALUATION with last weights -> Loss: 14382876.0, CrossEntropy: 2.290327548980713, Accuracy: 0.822685917721519\n",
      "Elapsed time for the training: 12.919577360153198\n",
      "Iter 1532 / 2000, Loss: 129833303.21875, CrossEntropy: 0.00280962698161602, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 14272781.0, CrossEntropy: 2.2643918991088867, Accuracy: 0.8208069620253164\n",
      "Elapsed time for the training: 12.896783590316772\n",
      "Iter 1533 / 2000, Loss: 127964886.1875, CrossEntropy: 0.002061085309833288, Accuracy: 0.9993606138107417\n",
      "EVALUATION with last weights -> Loss: 14134583.0, CrossEntropy: 2.2292518615722656, Accuracy: 0.8228837025316456\n",
      "Elapsed time for the training: 12.843682289123535\n",
      "Iter 1534 / 2000, Loss: 127892475.0, CrossEntropy: 0.0020301896147429943, Accuracy: 0.9992607097186701\n",
      "EVALUATION with last weights -> Loss: 13783930.0, CrossEntropy: 2.1731526851654053, Accuracy: 0.8221914556962026\n",
      "Elapsed time for the training: 12.875293970108032\n",
      "Iter 1535 / 2000, Loss: 130443861.1875, CrossEntropy: 0.0030486981850117445, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 14531072.0, CrossEntropy: 2.28104567527771, Accuracy: 0.8201147151898734\n",
      "Elapsed time for the training: 12.858514070510864\n",
      "Iter 1536 / 2000, Loss: 127733894.375, CrossEntropy: 0.0019641246180981398, Accuracy: 0.9992607097186701\n",
      "EVALUATION with last weights -> Loss: 14252060.0, CrossEntropy: 2.2582292556762695, Accuracy: 0.817246835443038\n",
      "Elapsed time for the training: 12.868809461593628\n",
      "Iter 1537 / 2000, Loss: 130743871.65625, CrossEntropy: 0.003178979968652129, Accuracy: 0.9990089514066497\n",
      "EVALUATION with last weights -> Loss: 15651445.0, CrossEntropy: 2.446744203567505, Accuracy: 0.805379746835443\n",
      "Elapsed time for the training: 12.861786842346191\n",
      "Iter 1538 / 2000, Loss: 130355475.65625, CrossEntropy: 0.0030079458374530077, Accuracy: 0.9990009590792839\n",
      "EVALUATION with last weights -> Loss: 13947342.0, CrossEntropy: 2.2202703952789307, Accuracy: 0.8229825949367089\n",
      "Elapsed time for the training: 12.909101486206055\n",
      "Iter 1539 / 2000, Loss: 132622355.0625, CrossEntropy: 0.003938233479857445, Accuracy: 0.9990089514066497\n",
      "EVALUATION with last weights -> Loss: 13882641.0, CrossEntropy: 2.1817214488983154, Accuracy: 0.8216969936708861\n",
      "Elapsed time for the training: 12.872144937515259\n",
      "Iter 1540 / 2000, Loss: 130680662.1875, CrossEntropy: 0.0031330932397395372, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 13676635.0, CrossEntropy: 2.1422879695892334, Accuracy: 0.8267405063291139\n",
      "Elapsed time for the training: 12.884939670562744\n",
      "Iter 1541 / 2000, Loss: 129107834.0, CrossEntropy: 0.002501311246305704, Accuracy: 0.9992207480818415\n",
      "EVALUATION with last weights -> Loss: 14399412.0, CrossEntropy: 2.2789363861083984, Accuracy: 0.8197191455696202\n",
      "Elapsed time for the training: 12.89778447151184\n",
      "Iter 1542 / 2000, Loss: 131448924.59375, CrossEntropy: 0.0034334147348999977, Accuracy: 0.9989010549872123\n",
      "EVALUATION with last weights -> Loss: 13704933.0, CrossEntropy: 2.1606645584106445, Accuracy: 0.8276305379746836\n",
      "Elapsed time for the training: 12.897127389907837\n",
      "Iter 1543 / 2000, Loss: 128106880.0625, CrossEntropy: 0.0020953721832484007, Accuracy: 0.9992207480818415\n",
      "EVALUATION with last weights -> Loss: 14337773.0, CrossEntropy: 2.2403087615966797, Accuracy: 0.8235759493670886\n",
      "Elapsed time for the training: 12.894792318344116\n",
      "Iter 1544 / 2000, Loss: 128905424.0625, CrossEntropy: 0.0024149096570909023, Accuracy: 0.9992806905370843\n",
      "EVALUATION with last weights -> Loss: 13861790.0, CrossEntropy: 2.1660268306732178, Accuracy: 0.8215981012658228\n",
      "Elapsed time for the training: 12.910104990005493\n",
      "Iter 1545 / 2000, Loss: 128467925.71875, CrossEntropy: 0.0022363027092069387, Accuracy: 0.9992007672634271\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with last weights -> Loss: 13910139.0, CrossEntropy: 2.1738767623901367, Accuracy: 0.8289161392405063\n",
      "Elapsed time for the training: 12.879568099975586\n",
      "Iter 1546 / 2000, Loss: 129703299.46875, CrossEntropy: 0.0027300298679620028, Accuracy: 0.9991608056265985\n",
      "EVALUATION with last weights -> Loss: 14320111.0, CrossEntropy: 2.2387077808380127, Accuracy: 0.8267405063291139\n",
      "Elapsed time for the training: 12.707720041275024\n",
      "Iter 1547 / 2000, Loss: 133171362.3125, CrossEntropy: 0.004110556561499834, Accuracy: 0.9988810741687979\n",
      "EVALUATION with last weights -> Loss: 13833437.0, CrossEntropy: 2.161486864089966, Accuracy: 0.8247626582278481\n",
      "Elapsed time for the training: 11.780978679656982\n",
      "Iter 1548 / 2000, Loss: 129693118.09375, CrossEntropy: 0.002718118019402027, Accuracy: 0.9992207480818415\n",
      "EVALUATION with last weights -> Loss: 14432896.0, CrossEntropy: 2.3008317947387695, Accuracy: 0.8212025316455697\n",
      "Elapsed time for the training: 13.257866859436035\n",
      "Iter 1549 / 2000, Loss: 131716217.4375, CrossEntropy: 0.003525255946442485, Accuracy: 0.9989210358056266\n",
      "EVALUATION with last weights -> Loss: 14299261.0, CrossEntropy: 2.253575086593628, Accuracy: 0.8218947784810127\n",
      "Elapsed time for the training: 13.67487120628357\n",
      "Iter 1550 / 2000, Loss: 129743436.6875, CrossEntropy: 0.0027342699468135834, Accuracy: 0.9992607097186701\n",
      "EVALUATION with last weights -> Loss: 14380165.0, CrossEntropy: 2.265470027923584, Accuracy: 0.8221914556962026\n",
      "Elapsed time for the training: 13.329630136489868\n",
      "Iter 1551 / 2000, Loss: 128539007.875, CrossEntropy: 0.00225063250400126, Accuracy: 0.9993406329923273\n",
      "EVALUATION with last weights -> Loss: 14355149.0, CrossEntropy: 2.288184881210327, Accuracy: 0.8182357594936709\n",
      "Elapsed time for the training: 12.903814792633057\n",
      "Iter 1552 / 2000, Loss: 133257828.8125, CrossEntropy: 0.004133341833949089, Accuracy: 0.9989010549872123\n",
      "EVALUATION with last weights -> Loss: 13625890.0, CrossEntropy: 2.147979497909546, Accuracy: 0.8267405063291139\n",
      "Elapsed time for the training: 12.906887292861938\n",
      "Iter 1553 / 2000, Loss: 128530606.125, CrossEntropy: 0.002242351183667779, Accuracy: 0.9991408248081841\n",
      "EVALUATION with last weights -> Loss: 14116420.0, CrossEntropy: 2.2258553504943848, Accuracy: 0.8235759493670886\n",
      "Elapsed time for the training: 12.91584324836731\n",
      "Iter 1554 / 2000, Loss: 129975926.3125, CrossEntropy: 0.002817910397425294, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 14629738.0, CrossEntropy: 2.3239376544952393, Accuracy: 0.8224881329113924\n",
      "Elapsed time for the training: 12.895163536071777\n",
      "Iter 1555 / 2000, Loss: 128775598.21875, CrossEntropy: 0.0023372292052954435, Accuracy: 0.9992607097186701\n",
      "EVALUATION with last weights -> Loss: 14333640.0, CrossEntropy: 2.2409350872039795, Accuracy: 0.8190268987341772\n",
      "Elapsed time for the training: 12.87932825088501\n",
      "Iter 1556 / 2000, Loss: 127583864.71875, CrossEntropy: 0.001860845717601478, Accuracy: 0.999380594629156\n",
      "EVALUATION with last weights -> Loss: 14741457.0, CrossEntropy: 2.328383207321167, Accuracy: 0.8241693037974683\n",
      "Elapsed time for the training: 13.596592664718628\n",
      "Iter 1557 / 2000, Loss: 128394483.84375, CrossEntropy: 0.002183360978960991, Accuracy: 0.9991807864450127\n",
      "EVALUATION with last weights -> Loss: 13378989.0, CrossEntropy: 2.1234242916107178, Accuracy: 0.8240704113924051\n",
      "Elapsed time for the training: 13.118303537368774\n",
      "Iter 1558 / 2000, Loss: 129151402.03125, CrossEntropy: 0.002484241733327508, Accuracy: 0.999380594629156\n",
      "EVALUATION with last weights -> Loss: 14410396.0, CrossEntropy: 2.2680068016052246, Accuracy: 0.8212025316455697\n",
      "Elapsed time for the training: 14.673301935195923\n",
      "Iter 1559 / 2000, Loss: 130381680.0, CrossEntropy: 0.0029740179888904095, Accuracy: 0.9991408248081841\n",
      "EVALUATION with last weights -> Loss: 13698170.0, CrossEntropy: 2.1701996326446533, Accuracy: 0.8244659810126582\n",
      "Elapsed time for the training: 13.575857162475586\n",
      "Iter 1560 / 2000, Loss: 127198818.5625, CrossEntropy: 0.001702398294582963, Accuracy: 0.999440537084399\n",
      "EVALUATION with last weights -> Loss: 14042833.0, CrossEntropy: 2.22243070602417, Accuracy: 0.8287183544303798\n",
      "Elapsed time for the training: 12.869268655776978\n",
      "Iter 1561 / 2000, Loss: 132184367.0625, CrossEntropy: 0.0037021818570792675, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 15218649.0, CrossEntropy: 2.3949975967407227, Accuracy: 0.821004746835443\n",
      "Elapsed time for the training: 12.907076597213745\n",
      "Iter 1562 / 2000, Loss: 130708992.90625, CrossEntropy: 0.003099467372521758, Accuracy: 0.9990009590792839\n",
      "EVALUATION with last weights -> Loss: 14131210.0, CrossEntropy: 2.2428667545318604, Accuracy: 0.8222903481012658\n",
      "Elapsed time for the training: 12.945595264434814\n",
      "Iter 1563 / 2000, Loss: 129377174.28125, CrossEntropy: 0.0025657033547759056, Accuracy: 0.9992607097186701\n",
      "EVALUATION with last weights -> Loss: 13802067.0, CrossEntropy: 2.1798477172851562, Accuracy: 0.8223892405063291\n",
      "Elapsed time for the training: 12.906490802764893\n",
      "Iter 1564 / 2000, Loss: 129301030.9375, CrossEntropy: 0.0025344956666231155, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 14259012.0, CrossEntropy: 2.2413625717163086, Accuracy: 0.8206091772151899\n",
      "Elapsed time for the training: 12.886725187301636\n",
      "Iter 1565 / 2000, Loss: 129372278.71875, CrossEntropy: 0.0025691494811326265, Accuracy: 0.9993006713554987\n",
      "EVALUATION with last weights -> Loss: 14109813.0, CrossEntropy: 2.235121011734009, Accuracy: 0.8243670886075949\n",
      "Elapsed time for the training: 12.836130380630493\n",
      "Iter 1566 / 2000, Loss: 129346646.5625, CrossEntropy: 0.0025490964762866497, Accuracy: 0.9992607097186701\n",
      "EVALUATION with last weights -> Loss: 13941840.0, CrossEntropy: 2.201206684112549, Accuracy: 0.8223892405063291\n",
      "Elapsed time for the training: 12.939090728759766\n",
      "Iter 1567 / 2000, Loss: 128537158.5, CrossEntropy: 0.0022425255738198757, Accuracy: 0.9991687979539642\n",
      "EVALUATION with last weights -> Loss: 14518848.0, CrossEntropy: 2.288113832473755, Accuracy: 0.825059335443038\n",
      "Elapsed time for the training: 13.134839534759521\n",
      "Iter 1568 / 2000, Loss: 131628534.71875, CrossEntropy: 0.0034576740581542253, Accuracy: 0.9990409207161125\n",
      "EVALUATION with last weights -> Loss: 14214110.0, CrossEntropy: 2.2441611289978027, Accuracy: 0.8211036392405063\n",
      "Elapsed time for the training: 13.005600452423096\n",
      "Iter 1569 / 2000, Loss: 127810657.65625, CrossEntropy: 0.001929442398250103, Accuracy: 0.9994005754475703\n",
      "EVALUATION with last weights -> Loss: 15366525.0, CrossEntropy: 2.433436870574951, Accuracy: 0.8086431962025317\n",
      "Elapsed time for the training: 12.921728610992432\n",
      "Iter 1570 / 2000, Loss: 132279939.5, CrossEntropy: 0.003714173100888729, Accuracy: 0.9989210358056266\n",
      "EVALUATION with last weights -> Loss: 13771861.0, CrossEntropy: 2.1777806282043457, Accuracy: 0.8230814873417721\n",
      "Elapsed time for the training: 12.880272626876831\n",
      "Iter 1571 / 2000, Loss: 129568227.9375, CrossEntropy: 0.0026317271403968334, Accuracy: 0.9990409207161125\n",
      "EVALUATION with last weights -> Loss: 14079691.0, CrossEntropy: 2.2228050231933594, Accuracy: 0.8232792721518988\n",
      "Elapsed time for the training: 12.849766492843628\n",
      "Iter 1572 / 2000, Loss: 129051852.875, CrossEntropy: 0.002419991884380579, Accuracy: 0.9992806905370843\n",
      "EVALUATION with last weights -> Loss: 14059294.0, CrossEntropy: 2.2120423316955566, Accuracy: 0.8249604430379747\n",
      "Elapsed time for the training: 12.883028745651245\n",
      "Iter 1573 / 2000, Loss: 130491030.375, CrossEntropy: 0.0030055639799684286, Accuracy: 0.9992687020460358\n",
      "EVALUATION with last weights -> Loss: 15310481.0, CrossEntropy: 2.453636646270752, Accuracy: 0.8126977848101266\n",
      "Elapsed time for the training: 12.881120681762695\n",
      "Iter 1574 / 2000, Loss: 130370571.375, CrossEntropy: 0.0029477656353265047, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 13956241.0, CrossEntropy: 2.207618474960327, Accuracy: 0.8204113924050633\n",
      "Elapsed time for the training: 12.879429817199707\n",
      "Iter 1575 / 2000, Loss: 129054463.875, CrossEntropy: 0.002415621420368552, Accuracy: 0.9992806905370843\n",
      "EVALUATION with last weights -> Loss: 14333105.0, CrossEntropy: 2.2836434841156006, Accuracy: 0.8110166139240507\n",
      "Elapsed time for the training: 13.32332158088684\n",
      "Iter 1576 / 2000, Loss: 128595991.1875, CrossEntropy: 0.002231164136901498, Accuracy: 0.9992407289002557\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with last weights -> Loss: 14224804.0, CrossEntropy: 2.26560640335083, Accuracy: 0.8236748417721519\n",
      "Elapsed time for the training: 13.733510971069336\n",
      "Iter 1577 / 2000, Loss: 131585021.8125, CrossEntropy: 0.00342315505258739, Accuracy: 0.9990009590792839\n",
      "EVALUATION with last weights -> Loss: 14179382.0, CrossEntropy: 2.2432944774627686, Accuracy: 0.8205102848101266\n",
      "Elapsed time for the training: 13.446181058883667\n",
      "Iter 1578 / 2000, Loss: 131251858.46875, CrossEntropy: 0.0032875591423362494, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 14948963.0, CrossEntropy: 2.346926212310791, Accuracy: 0.8212025316455697\n",
      "Elapsed time for the training: 12.885347366333008\n",
      "Iter 1579 / 2000, Loss: 128238622.78125, CrossEntropy: 0.0020826486870646477, Accuracy: 0.9993206521739131\n",
      "EVALUATION with last weights -> Loss: 13999172.0, CrossEntropy: 2.2117245197296143, Accuracy: 0.8227848101265823\n",
      "Elapsed time for the training: 12.908156633377075\n",
      "Iter 1580 / 2000, Loss: 128361542.0, CrossEntropy: 0.0021288683637976646, Accuracy: 0.9993206521739131\n",
      "EVALUATION with last weights -> Loss: 14107399.0, CrossEntropy: 2.2057762145996094, Accuracy: 0.8238726265822784\n",
      "Elapsed time for the training: 12.866634607315063\n",
      "Iter 1581 / 2000, Loss: 131605543.5625, CrossEntropy: 0.0034228237345814705, Accuracy: 0.9989210358056266\n",
      "EVALUATION with last weights -> Loss: 13874674.0, CrossEntropy: 2.1931092739105225, Accuracy: 0.821993670886076\n",
      "Elapsed time for the training: 12.953498601913452\n",
      "Iter 1582 / 2000, Loss: 128916114.125, CrossEntropy: 0.0023470597807317972, Accuracy: 0.9993406329923273\n",
      "EVALUATION with last weights -> Loss: 13941524.0, CrossEntropy: 2.1869802474975586, Accuracy: 0.8299050632911392\n",
      "Elapsed time for the training: 12.889799118041992\n",
      "Iter 1583 / 2000, Loss: 128897120.90625, CrossEntropy: 0.0023386836983263493, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 13713399.0, CrossEntropy: 2.1579484939575195, Accuracy: 0.8252571202531646\n",
      "Elapsed time for the training: 12.699482202529907\n",
      "Iter 1584 / 2000, Loss: 129062614.3125, CrossEntropy: 0.0024040022399276495, Accuracy: 0.9992007672634271\n",
      "EVALUATION with last weights -> Loss: 14215974.0, CrossEntropy: 2.2270712852478027, Accuracy: 0.8237737341772152\n",
      "Elapsed time for the training: 12.933129072189331\n",
      "Iter 1585 / 2000, Loss: 131158252.15625, CrossEntropy: 0.0032381301280111074, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 14058929.0, CrossEntropy: 2.2183403968811035, Accuracy: 0.8244659810126582\n",
      "Elapsed time for the training: 12.89663028717041\n",
      "Iter 1586 / 2000, Loss: 130033115.75, CrossEntropy: 0.002787039615213871, Accuracy: 0.9991807864450127\n",
      "EVALUATION with last weights -> Loss: 14727110.0, CrossEntropy: 2.33864688873291, Accuracy: 0.8191257911392406\n",
      "Elapsed time for the training: 12.879847764968872\n",
      "Iter 1587 / 2000, Loss: 127783944.15625, CrossEntropy: 0.001885759411379695, Accuracy: 0.9993606138107417\n",
      "EVALUATION with last weights -> Loss: 14927341.0, CrossEntropy: 2.3468780517578125, Accuracy: 0.8160601265822784\n",
      "Elapsed time for the training: 13.002249717712402\n",
      "Iter 1588 / 2000, Loss: 132154458.03125, CrossEntropy: 0.0036295356694608927, Accuracy: 0.9989010549872123\n",
      "EVALUATION with last weights -> Loss: 14041866.0, CrossEntropy: 2.2010717391967773, Accuracy: 0.8239715189873418\n",
      "Elapsed time for the training: 13.08200216293335\n",
      "Iter 1589 / 2000, Loss: 126517785.03125, CrossEntropy: 0.0013769810320809484, Accuracy: 0.9995804028132992\n",
      "EVALUATION with last weights -> Loss: 13832698.0, CrossEntropy: 2.194469451904297, Accuracy: 0.8258504746835443\n",
      "Elapsed time for the training: 12.892924070358276\n",
      "Iter 1590 / 2000, Loss: 129089352.09375, CrossEntropy: 0.002404175465926528, Accuracy: 0.9992407289002557\n",
      "EVALUATION with last weights -> Loss: 14694916.0, CrossEntropy: 2.3265597820281982, Accuracy: 0.821004746835443\n",
      "Elapsed time for the training: 12.899009943008423\n",
      "Iter 1591 / 2000, Loss: 129809920.875, CrossEntropy: 0.002689813729375601, Accuracy: 0.9993006713554987\n",
      "EVALUATION with last weights -> Loss: 13963259.0, CrossEntropy: 2.1909022331237793, Accuracy: 0.8255537974683544\n",
      "Elapsed time for the training: 12.896102905273438\n",
      "Iter 1592 / 2000, Loss: 128278438.03125, CrossEntropy: 0.002076231176033616, Accuracy: 0.9994205562659847\n",
      "EVALUATION with last weights -> Loss: 14679630.0, CrossEntropy: 2.310413360595703, Accuracy: 0.8255537974683544\n",
      "Elapsed time for the training: 12.916677236557007\n",
      "Iter 1593 / 2000, Loss: 129299373.625, CrossEntropy: 0.0024825057480484247, Accuracy: 0.9992407289002557\n",
      "EVALUATION with last weights -> Loss: 15452771.0, CrossEntropy: 2.4314253330230713, Accuracy: 0.8167523734177216\n",
      "Elapsed time for the training: 12.947828769683838\n",
      "Iter 1594 / 2000, Loss: 131719604.875, CrossEntropy: 0.0034476551227271557, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 14354771.0, CrossEntropy: 2.270604133605957, Accuracy: 0.8249604430379747\n",
      "Elapsed time for the training: 12.902744770050049\n",
      "Iter 1595 / 2000, Loss: 127212907.03125, CrossEntropy: 0.0016450796974822879, Accuracy: 0.9994804987212276\n",
      "EVALUATION with last weights -> Loss: 14568964.0, CrossEntropy: 2.34328293800354, Accuracy: 0.8206091772151899\n",
      "Elapsed time for the training: 12.921195983886719\n",
      "Iter 1596 / 2000, Loss: 131000729.25, CrossEntropy: 0.0031563336960971355, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 14405114.0, CrossEntropy: 2.255671739578247, Accuracy: 0.825059335443038\n",
      "Elapsed time for the training: 12.891247272491455\n",
      "Iter 1597 / 2000, Loss: 128654959.90625, CrossEntropy: 0.0022184911649674177, Accuracy: 0.9993606138107417\n",
      "EVALUATION with last weights -> Loss: 14768030.0, CrossEntropy: 2.4004571437835693, Accuracy: 0.8187302215189873\n",
      "Elapsed time for the training: 12.87899661064148\n",
      "Iter 1598 / 2000, Loss: 129989983.03125, CrossEntropy: 0.002753208391368389, Accuracy: 0.9993006713554987\n",
      "EVALUATION with last weights -> Loss: 14742733.0, CrossEntropy: 2.324692964553833, Accuracy: 0.825751582278481\n",
      "Elapsed time for the training: 12.90903615951538\n",
      "Iter 1599 / 2000, Loss: 128424245.5, CrossEntropy: 0.0021567484363913536, Accuracy: 0.9993885869565218\n",
      "EVALUATION with last weights -> Loss: 14813129.0, CrossEntropy: 2.3237695693969727, Accuracy: 0.8218947784810127\n",
      "Elapsed time for the training: 12.886433362960815\n",
      "Iter 1600 / 2000, Loss: 130471504.875, CrossEntropy: 0.0029431895818561316, Accuracy: 0.9992207480818415\n",
      "EVALUATION with last weights -> Loss: 14116213.0, CrossEntropy: 2.216892957687378, Accuracy: 0.8302017405063291\n",
      "Elapsed time for the training: 12.888973712921143\n",
      "Iter 1601 / 2000, Loss: 130233061.8125, CrossEntropy: 0.002843025140464306, Accuracy: 0.9990009590792839\n",
      "EVALUATION with last weights -> Loss: 14357757.0, CrossEntropy: 2.295562267303467, Accuracy: 0.8253560126582279\n",
      "Elapsed time for the training: 12.913701057434082\n",
      "Iter 1602 / 2000, Loss: 130550847.875, CrossEntropy: 0.0030046014580875635, Accuracy: 0.999028932225064\n",
      "EVALUATION with last weights -> Loss: 15329979.0, CrossEntropy: 2.4142117500305176, Accuracy: 0.8232792721518988\n",
      "Elapsed time for the training: 12.90027379989624\n",
      "Iter 1603 / 2000, Loss: 131095522.6875, CrossEntropy: 0.0031829550862312317, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 14154025.0, CrossEntropy: 2.2782018184661865, Accuracy: 0.8189280063291139\n",
      "Elapsed time for the training: 12.873270750045776\n",
      "Iter 1604 / 2000, Loss: 130834181.5625, CrossEntropy: 0.003075098153203726, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 14832814.0, CrossEntropy: 2.3647773265838623, Accuracy: 0.8218947784810127\n",
      "Elapsed time for the training: 12.90596055984497\n",
      "Iter 1605 / 2000, Loss: 133013087.75, CrossEntropy: 0.0039425985887646675, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 14109448.0, CrossEntropy: 2.2357776165008545, Accuracy: 0.8198180379746836\n",
      "Elapsed time for the training: 13.623514413833618\n",
      "Iter 1606 / 2000, Loss: 131403546.34375, CrossEntropy: 0.003296260256320238, Accuracy: 0.9990409207161125\n",
      "EVALUATION with last weights -> Loss: 14373234.0, CrossEntropy: 2.27477765083313, Accuracy: 0.8208069620253164\n",
      "Elapsed time for the training: 12.915201902389526\n",
      "Iter 1607 / 2000, Loss: 128501739.53125, CrossEntropy: 0.002134779468178749, Accuracy: 0.9993606138107417\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with last weights -> Loss: 14640762.0, CrossEntropy: 2.308701276779175, Accuracy: 0.8247626582278481\n",
      "Elapsed time for the training: 12.911082744598389\n",
      "Iter 1608 / 2000, Loss: 128941545.6875, CrossEntropy: 0.00230865809135139, Accuracy: 0.9991608056265985\n",
      "EVALUATION with last weights -> Loss: 14055935.0, CrossEntropy: 2.23616623878479, Accuracy: 0.8241693037974683\n",
      "Elapsed time for the training: 12.959197044372559\n",
      "Iter 1609 / 2000, Loss: 130295281.84375, CrossEntropy: 0.0028484645299613476, Accuracy: 0.9992407289002557\n",
      "EVALUATION with last weights -> Loss: 14479385.0, CrossEntropy: 2.2769782543182373, Accuracy: 0.8258504746835443\n",
      "Elapsed time for the training: 12.896148204803467\n",
      "Iter 1610 / 2000, Loss: 130199991.3125, CrossEntropy: 0.002806890057399869, Accuracy: 0.9992007672634271\n",
      "EVALUATION with last weights -> Loss: 14430291.0, CrossEntropy: 2.2665328979492188, Accuracy: 0.8245648734177216\n",
      "Elapsed time for the training: 12.872199296951294\n",
      "Iter 1611 / 2000, Loss: 131150744.0, CrossEntropy: 0.003190641989931464, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 14655529.0, CrossEntropy: 2.316333532333374, Accuracy: 0.8255537974683544\n",
      "Elapsed time for the training: 12.24229884147644\n",
      "Iter 1612 / 2000, Loss: 130688934.375, CrossEntropy: 0.0029984123539179564, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 14185425.0, CrossEntropy: 2.254232406616211, Accuracy: 0.822685917721519\n",
      "Elapsed time for the training: 11.797350645065308\n",
      "Iter 1613 / 2000, Loss: 129234718.46875, CrossEntropy: 0.0024171974509954453, Accuracy: 0.9992407289002557\n",
      "EVALUATION with last weights -> Loss: 14822943.0, CrossEntropy: 2.343688726425171, Accuracy: 0.8262460443037974\n",
      "Elapsed time for the training: 12.300286769866943\n",
      "Iter 1614 / 2000, Loss: 129021994.0, CrossEntropy: 0.0023297208826988935, Accuracy: 0.9992806905370843\n",
      "EVALUATION with last weights -> Loss: 14551692.0, CrossEntropy: 2.339116334915161, Accuracy: 0.8203125\n",
      "Elapsed time for the training: 12.868637084960938\n",
      "Iter 1615 / 2000, Loss: 131913757.90625, CrossEntropy: 0.0034824751783162355, Accuracy: 0.9990409207161125\n",
      "EVALUATION with last weights -> Loss: 14698547.0, CrossEntropy: 2.320235013961792, Accuracy: 0.8238726265822784\n",
      "Elapsed time for the training: 12.888137578964233\n",
      "Iter 1616 / 2000, Loss: 130672310.53125, CrossEntropy: 0.002984832040965557, Accuracy: 0.9991408248081841\n",
      "EVALUATION with last weights -> Loss: 14393754.0, CrossEntropy: 2.274188280105591, Accuracy: 0.823378164556962\n",
      "Elapsed time for the training: 12.937283515930176\n",
      "Iter 1617 / 2000, Loss: 130159036.84375, CrossEntropy: 0.002780501265078783, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 14966689.0, CrossEntropy: 2.3497226238250732, Accuracy: 0.8214992088607594\n",
      "Elapsed time for the training: 12.888759136199951\n",
      "Iter 1618 / 2000, Loss: 129416878.875, CrossEntropy: 0.0024901193100959063, Accuracy: 0.9992207480818415\n",
      "EVALUATION with last weights -> Loss: 15109814.0, CrossEntropy: 2.4191694259643555, Accuracy: 0.8254549050632911\n",
      "Elapsed time for the training: 12.982288122177124\n",
      "Iter 1619 / 2000, Loss: 129532279.46875, CrossEntropy: 0.002525015501305461, Accuracy: 0.9992806905370843\n",
      "EVALUATION with last weights -> Loss: 14624992.0, CrossEntropy: 2.314978837966919, Accuracy: 0.8207080696202531\n",
      "Elapsed time for the training: 12.944799661636353\n",
      "Iter 1620 / 2000, Loss: 132064847.25, CrossEntropy: 0.0035360197070986032, Accuracy: 0.9988810741687979\n",
      "EVALUATION with last weights -> Loss: 14933158.0, CrossEntropy: 2.360926389694214, Accuracy: 0.8199169303797469\n",
      "Elapsed time for the training: 12.879637002944946\n",
      "Iter 1621 / 2000, Loss: 129294737.875, CrossEntropy: 0.0024260766804218292, Accuracy: 0.999380594629156\n",
      "EVALUATION with last weights -> Loss: 14226335.0, CrossEntropy: 2.2371604442596436, Accuracy: 0.8276305379746836\n",
      "Elapsed time for the training: 12.887807607650757\n",
      "Iter 1622 / 2000, Loss: 129511703.03125, CrossEntropy: 0.0025145208928734064, Accuracy: 0.9992007672634271\n",
      "EVALUATION with last weights -> Loss: 14517221.0, CrossEntropy: 2.3202595710754395, Accuracy: 0.8223892405063291\n",
      "Elapsed time for the training: 12.909005880355835\n",
      "Iter 1623 / 2000, Loss: 130715116.5, CrossEntropy: 0.002990107284858823, Accuracy: 0.9989410166240409\n",
      "EVALUATION with last weights -> Loss: 15045921.0, CrossEntropy: 2.398347854614258, Accuracy: 0.8273338607594937\n",
      "Elapsed time for the training: 12.906505346298218\n",
      "Iter 1624 / 2000, Loss: 128919396.03125, CrossEntropy: 0.00227070483379066, Accuracy: 0.9992806905370843\n",
      "EVALUATION with last weights -> Loss: 14862821.0, CrossEntropy: 2.34635853767395, Accuracy: 0.8203125\n",
      "Elapsed time for the training: 12.93398666381836\n",
      "Iter 1625 / 2000, Loss: 128880980.125, CrossEntropy: 0.002253727288916707, Accuracy: 0.9992407289002557\n",
      "EVALUATION with last weights -> Loss: 14679420.0, CrossEntropy: 2.3147263526916504, Accuracy: 0.8223892405063291\n",
      "Elapsed time for the training: 12.886725187301636\n",
      "Iter 1626 / 2000, Loss: 128600461.96875, CrossEntropy: 0.002140514785423875, Accuracy: 0.9992407289002557\n",
      "EVALUATION with last weights -> Loss: 14753026.0, CrossEntropy: 2.306574821472168, Accuracy: 0.8212025316455697\n",
      "Elapsed time for the training: 12.908953189849854\n",
      "Iter 1627 / 2000, Loss: 132131573.78125, CrossEntropy: 0.003549360204488039, Accuracy: 0.9989010549872123\n",
      "EVALUATION with last weights -> Loss: 15098542.0, CrossEntropy: 2.3592514991760254, Accuracy: 0.8207080696202531\n",
      "Elapsed time for the training: 13.019306421279907\n",
      "Iter 1628 / 2000, Loss: 129088940.25, CrossEntropy: 0.0023318862076848745, Accuracy: 0.9993206521739131\n",
      "EVALUATION with last weights -> Loss: 14646007.0, CrossEntropy: 2.339637279510498, Accuracy: 0.8195213607594937\n",
      "Elapsed time for the training: 12.935667753219604\n",
      "Iter 1629 / 2000, Loss: 132674946.9375, CrossEntropy: 0.0037628700956702232, Accuracy: 0.998761189258312\n",
      "EVALUATION with last weights -> Loss: 15016977.0, CrossEntropy: 2.399310350418091, Accuracy: 0.8205102848101266\n",
      "Elapsed time for the training: 12.897339344024658\n",
      "Iter 1630 / 2000, Loss: 129535119.5, CrossEntropy: 0.002507501747459173, Accuracy: 0.9991208439897699\n",
      "EVALUATION with last weights -> Loss: 14410584.0, CrossEntropy: 2.2614011764526367, Accuracy: 0.8236748417721519\n",
      "Elapsed time for the training: 12.977234125137329\n",
      "Iter 1631 / 2000, Loss: 128381533.5, CrossEntropy: 0.002042098669335246, Accuracy: 0.9992407289002557\n",
      "EVALUATION with last weights -> Loss: 14387069.0, CrossEntropy: 2.2659358978271484, Accuracy: 0.8234770569620253\n",
      "Elapsed time for the training: 13.274876594543457\n",
      "Iter 1632 / 2000, Loss: 129073245.25, CrossEntropy: 0.0023174495436251163, Accuracy: 0.9991608056265985\n",
      "EVALUATION with last weights -> Loss: 14975999.0, CrossEntropy: 2.3412063121795654, Accuracy: 0.8207080696202531\n",
      "Elapsed time for the training: 12.897384405136108\n",
      "Iter 1633 / 2000, Loss: 130565528.34375, CrossEntropy: 0.0029117236845195293, Accuracy: 0.9991807864450127\n",
      "EVALUATION with last weights -> Loss: 14699681.0, CrossEntropy: 2.3348987102508545, Accuracy: 0.8230814873417721\n",
      "Elapsed time for the training: 12.882673263549805\n",
      "Iter 1634 / 2000, Loss: 129638298.3125, CrossEntropy: 0.0025406447239220142, Accuracy: 0.9991608056265985\n",
      "EVALUATION with last weights -> Loss: 14223270.0, CrossEntropy: 2.274718761444092, Accuracy: 0.8217958860759493\n",
      "Elapsed time for the training: 12.884208679199219\n",
      "Iter 1635 / 2000, Loss: 129415985.28125, CrossEntropy: 0.0024659479968249798, Accuracy: 0.9992087595907929\n",
      "EVALUATION with last weights -> Loss: 14996317.0, CrossEntropy: 2.407130718231201, Accuracy: 0.8150712025316456\n",
      "Elapsed time for the training: 12.947010278701782\n",
      "Iter 1636 / 2000, Loss: 130921549.4375, CrossEntropy: 0.0030477263499051332, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 14619161.0, CrossEntropy: 2.307708978652954, Accuracy: 0.817939082278481\n",
      "Elapsed time for the training: 12.890741109848022\n",
      "Iter 1637 / 2000, Loss: 128221395.625, CrossEntropy: 0.001966977957636118, Accuracy: 0.9992407289002557\n",
      "EVALUATION with last weights -> Loss: 14190821.0, CrossEntropy: 2.238039255142212, Accuracy: 0.8256526898734177\n",
      "Elapsed time for the training: 13.011611700057983\n",
      "Iter 1638 / 2000, Loss: 128586662.59375, CrossEntropy: 0.002112079644575715, Accuracy: 0.9993006713554987\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with last weights -> Loss: 14115769.0, CrossEntropy: 2.2247812747955322, Accuracy: 0.8214992088607594\n",
      "Elapsed time for the training: 12.913733720779419\n",
      "Iter 1639 / 2000, Loss: 130044710.28125, CrossEntropy: 0.0026935264468193054, Accuracy: 0.9991807864450127\n",
      "EVALUATION with last weights -> Loss: 14091796.0, CrossEntropy: 2.2300026416778564, Accuracy: 0.8241693037974683\n",
      "Elapsed time for the training: 12.934064149856567\n",
      "Iter 1640 / 2000, Loss: 129633089.75, CrossEntropy: 0.002527024131268263, Accuracy: 0.9990009590792839\n",
      "EVALUATION with last weights -> Loss: 14009894.0, CrossEntropy: 2.200033187866211, Accuracy: 0.8252571202531646\n",
      "Elapsed time for the training: 12.904029130935669\n",
      "Iter 1641 / 2000, Loss: 130461707.5625, CrossEntropy: 0.0028563046362251043, Accuracy: 0.9992207480818415\n",
      "EVALUATION with last weights -> Loss: 15067161.0, CrossEntropy: 2.414396286010742, Accuracy: 0.8178401898734177\n",
      "Elapsed time for the training: 12.938419103622437\n",
      "Iter 1642 / 2000, Loss: 129312474.625, CrossEntropy: 0.002395085757598281, Accuracy: 0.9992407289002557\n",
      "EVALUATION with last weights -> Loss: 14487932.0, CrossEntropy: 2.2725512981414795, Accuracy: 0.8258504746835443\n",
      "Elapsed time for the training: 12.933311462402344\n",
      "Iter 1643 / 2000, Loss: 127768693.8125, CrossEntropy: 0.0017785216914489865, Accuracy: 0.999380594629156\n",
      "EVALUATION with last weights -> Loss: 14651772.0, CrossEntropy: 2.3118772506713867, Accuracy: 0.8213014240506329\n",
      "Elapsed time for the training: 12.76262879371643\n",
      "Iter 1644 / 2000, Loss: 131089324.6875, CrossEntropy: 0.0031032711267471313, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 14439486.0, CrossEntropy: 2.2932560443878174, Accuracy: 0.8214992088607594\n",
      "Elapsed time for the training: 12.8631751537323\n",
      "Iter 1645 / 2000, Loss: 131895626.6875, CrossEntropy: 0.0034267024602741003, Accuracy: 0.9990209398976982\n",
      "EVALUATION with last weights -> Loss: 14548861.0, CrossEntropy: 2.2892682552337646, Accuracy: 0.8209058544303798\n",
      "Elapsed time for the training: 12.891988754272461\n",
      "Iter 1646 / 2000, Loss: 129066497.25, CrossEntropy: 0.00229212106205523, Accuracy: 0.9992407289002557\n",
      "EVALUATION with last weights -> Loss: 14668021.0, CrossEntropy: 2.3040473461151123, Accuracy: 0.8200158227848101\n",
      "Elapsed time for the training: 12.905709743499756\n",
      "Iter 1647 / 2000, Loss: 130241755.875, CrossEntropy: 0.0027588598895817995, Accuracy: 0.9992007672634271\n",
      "EVALUATION with last weights -> Loss: 14662438.0, CrossEntropy: 2.294738292694092, Accuracy: 0.8198180379746836\n",
      "Elapsed time for the training: 12.971885681152344\n",
      "Iter 1648 / 2000, Loss: 129366488.96875, CrossEntropy: 0.0024068537168204784, Accuracy: 0.9992007672634271\n",
      "EVALUATION with last weights -> Loss: 14220292.0, CrossEntropy: 2.241661310195923, Accuracy: 0.8265427215189873\n",
      "Elapsed time for the training: 12.941162347793579\n",
      "Iter 1649 / 2000, Loss: 128567856.0625, CrossEntropy: 0.0020922671537846327, Accuracy: 0.9993006713554987\n",
      "EVALUATION with last weights -> Loss: 14547079.0, CrossEntropy: 2.2775375843048096, Accuracy: 0.8258504746835443\n",
      "Elapsed time for the training: 12.878623008728027\n",
      "Iter 1650 / 2000, Loss: 129156204.90625, CrossEntropy: 0.002320921281352639, Accuracy: 0.9991408248081841\n",
      "EVALUATION with last weights -> Loss: 14352263.0, CrossEntropy: 2.2561910152435303, Accuracy: 0.8244659810126582\n",
      "Elapsed time for the training: 12.8991539478302\n",
      "Iter 1651 / 2000, Loss: 129199681.6875, CrossEntropy: 0.0023377910256385803, Accuracy: 0.9992607097186701\n",
      "EVALUATION with last weights -> Loss: 14661010.0, CrossEntropy: 2.3260140419006348, Accuracy: 0.8244659810126582\n",
      "Elapsed time for the training: 12.844552040100098\n",
      "Iter 1652 / 2000, Loss: 130811812.1875, CrossEntropy: 0.002978817792609334, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 14317262.0, CrossEntropy: 2.2375190258026123, Accuracy: 0.8275316455696202\n",
      "Elapsed time for the training: 12.893869161605835\n",
      "Iter 1653 / 2000, Loss: 129345809.1875, CrossEntropy: 0.0024443098809570074, Accuracy: 0.9992287404092072\n",
      "EVALUATION with last weights -> Loss: 14359149.0, CrossEntropy: 2.261770725250244, Accuracy: 0.8207080696202531\n",
      "Elapsed time for the training: 12.87146782875061\n",
      "Iter 1654 / 2000, Loss: 130653604.34375, CrossEntropy: 0.0029122999403625727, Accuracy: 0.9992007672634271\n",
      "EVALUATION with last weights -> Loss: 16039885.0, CrossEntropy: 2.543783664703369, Accuracy: 0.8212025316455697\n",
      "Elapsed time for the training: 15.162278413772583\n",
      "Iter 1655 / 2000, Loss: 129183216.5625, CrossEntropy: 0.0023414972238242626, Accuracy: 0.9992087595907929\n",
      "EVALUATION with last weights -> Loss: 15012531.0, CrossEntropy: 2.379983425140381, Accuracy: 0.8068631329113924\n",
      "Elapsed time for the training: 12.837849855422974\n",
      "Iter 1656 / 2000, Loss: 132023657.34375, CrossEntropy: 0.003454463789239526, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 14011948.0, CrossEntropy: 2.2226040363311768, Accuracy: 0.8216969936708861\n",
      "Elapsed time for the training: 12.879651069641113\n",
      "Iter 1657 / 2000, Loss: 129606364.0, CrossEntropy: 0.0024860233534127474, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 14274325.0, CrossEntropy: 2.243593454360962, Accuracy: 0.8217958860759493\n",
      "Elapsed time for the training: 12.881601810455322\n",
      "Iter 1658 / 2000, Loss: 131597891.59375, CrossEntropy: 0.003279336029663682, Accuracy: 0.9990808823529411\n",
      "EVALUATION with last weights -> Loss: 14504272.0, CrossEntropy: 2.2801475524902344, Accuracy: 0.8207080696202531\n",
      "Elapsed time for the training: 14.74629020690918\n",
      "Iter 1659 / 2000, Loss: 129727734.5, CrossEntropy: 0.0025304295122623444, Accuracy: 0.9992607097186701\n",
      "EVALUATION with last weights -> Loss: 14565909.0, CrossEntropy: 2.3069746494293213, Accuracy: 0.8176424050632911\n",
      "Elapsed time for the training: 15.187480449676514\n",
      "Iter 1660 / 2000, Loss: 130156532.0625, CrossEntropy: 0.002698985394090414, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 14014879.0, CrossEntropy: 2.2587521076202393, Accuracy: 0.825059335443038\n",
      "Elapsed time for the training: 12.899307012557983\n",
      "Iter 1661 / 2000, Loss: 130569825.5625, CrossEntropy: 0.0028640853706747293, Accuracy: 0.9992607097186701\n",
      "EVALUATION with last weights -> Loss: 14269769.0, CrossEntropy: 2.2481584548950195, Accuracy: 0.8171479430379747\n",
      "Elapsed time for the training: 12.878568649291992\n",
      "Iter 1662 / 2000, Loss: 129885257.53125, CrossEntropy: 0.002589428797364235, Accuracy: 0.9992806905370843\n",
      "EVALUATION with last weights -> Loss: 14473152.0, CrossEntropy: 2.2714526653289795, Accuracy: 0.8191257911392406\n",
      "Elapsed time for the training: 12.850045919418335\n",
      "Iter 1663 / 2000, Loss: 129887125.6875, CrossEntropy: 0.0025885531213134527, Accuracy: 0.9992207480818415\n",
      "EVALUATION with last weights -> Loss: 14403855.0, CrossEntropy: 2.297860622406006, Accuracy: 0.8206091772151899\n",
      "Elapsed time for the training: 12.871965169906616\n",
      "Iter 1664 / 2000, Loss: 128181003.875, CrossEntropy: 0.001905572833493352, Accuracy: 0.999380594629156\n",
      "EVALUATION with last weights -> Loss: 14708736.0, CrossEntropy: 2.338862419128418, Accuracy: 0.8208069620253164\n",
      "Elapsed time for the training: 13.006762027740479\n",
      "Iter 1665 / 2000, Loss: 134265711.84375, CrossEntropy: 0.0043878937140107155, Accuracy: 0.9987092391304349\n",
      "EVALUATION with last weights -> Loss: 14990084.0, CrossEntropy: 2.3562638759613037, Accuracy: 0.8189280063291139\n",
      "Elapsed time for the training: 12.945571422576904\n",
      "Iter 1666 / 2000, Loss: 128930027.8125, CrossEntropy: 0.0021987799555063248, Accuracy: 0.9992407289002557\n",
      "EVALUATION with last weights -> Loss: 14561747.0, CrossEntropy: 2.28682541847229, Accuracy: 0.8232792721518988\n",
      "Elapsed time for the training: 12.910564184188843\n",
      "Iter 1667 / 2000, Loss: 131884326.40625, CrossEntropy: 0.0033784983679652214, Accuracy: 0.9990409207161125\n",
      "EVALUATION with last weights -> Loss: 14266088.0, CrossEntropy: 2.234388828277588, Accuracy: 0.8276305379746836\n",
      "Elapsed time for the training: 12.885968685150146\n",
      "Iter 1668 / 2000, Loss: 130449437.59375, CrossEntropy: 0.002803029492497444, Accuracy: 0.9991408248081841\n",
      "EVALUATION with last weights -> Loss: 14686970.0, CrossEntropy: 2.33396577835083, Accuracy: 0.8230814873417721\n",
      "Elapsed time for the training: 12.936872243881226\n",
      "Iter 1669 / 2000, Loss: 130077301.9375, CrossEntropy: 0.0026954887434840202, Accuracy: 0.9992087595907929\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with last weights -> Loss: 14862806.0, CrossEntropy: 2.3345894813537598, Accuracy: 0.8142800632911392\n",
      "Elapsed time for the training: 13.231553316116333\n",
      "Iter 1670 / 2000, Loss: 130170364.875, CrossEntropy: 0.0026870027650147676, Accuracy: 0.9992607097186701\n",
      "EVALUATION with last weights -> Loss: 13999218.0, CrossEntropy: 2.2001309394836426, Accuracy: 0.8259493670886076\n",
      "Elapsed time for the training: 12.92217469215393\n",
      "Iter 1671 / 2000, Loss: 132967195.03125, CrossEntropy: 0.0038309749215841293, Accuracy: 0.9987891624040921\n",
      "EVALUATION with last weights -> Loss: 14327347.0, CrossEntropy: 2.244337558746338, Accuracy: 0.8221914556962026\n",
      "Elapsed time for the training: 12.883737802505493\n",
      "Iter 1672 / 2000, Loss: 128750816.15625, CrossEntropy: 0.002148075494915247, Accuracy: 0.9992886828644502\n",
      "EVALUATION with last weights -> Loss: 15104851.0, CrossEntropy: 2.415748357772827, Accuracy: 0.8166534810126582\n",
      "Elapsed time for the training: 12.900819778442383\n",
      "Iter 1673 / 2000, Loss: 131562976.71875, CrossEntropy: 0.0032371224369853735, Accuracy: 0.9989809782608695\n",
      "EVALUATION with last weights -> Loss: 14052851.0, CrossEntropy: 2.2157061100006104, Accuracy: 0.8237737341772152\n",
      "Elapsed time for the training: 12.90951418876648\n",
      "Iter 1674 / 2000, Loss: 128818543.5625, CrossEntropy: 0.0021995140705257654, Accuracy: 0.9992886828644502\n",
      "EVALUATION with last weights -> Loss: 16357678.0, CrossEntropy: 2.5650012493133545, Accuracy: 0.8159612341772152\n",
      "Elapsed time for the training: 12.90572714805603\n",
      "Iter 1675 / 2000, Loss: 130701343.15625, CrossEntropy: 0.002890006871894002, Accuracy: 0.9990609015345269\n",
      "EVALUATION with last weights -> Loss: 14625052.0, CrossEntropy: 2.3755970001220703, Accuracy: 0.8200158227848101\n",
      "Elapsed time for the training: 12.913956880569458\n",
      "Iter 1676 / 2000, Loss: 128288799.0625, CrossEntropy: 0.0019245647126808763, Accuracy: 0.9995004795396419\n",
      "EVALUATION with last weights -> Loss: 14463080.0, CrossEntropy: 2.2918734550476074, Accuracy: 0.8239715189873418\n",
      "Elapsed time for the training: 12.942235708236694\n",
      "Iter 1677 / 2000, Loss: 131203702.125, CrossEntropy: 0.0031230300664901733, Accuracy: 0.9990489130434783\n",
      "EVALUATION with last weights -> Loss: 15435425.0, CrossEntropy: 2.4489974975585938, Accuracy: 0.8142800632911392\n",
      "Elapsed time for the training: 12.91313362121582\n",
      "Iter 1678 / 2000, Loss: 133222400.21875, CrossEntropy: 0.0038918363861739635, Accuracy: 0.9991008631713555\n",
      "EVALUATION with last weights -> Loss: 14889273.0, CrossEntropy: 2.3376624584198, Accuracy: 0.8177412974683544\n",
      "Elapsed time for the training: 12.894266366958618\n",
      "Iter 1679 / 2000, Loss: 128902425.5625, CrossEntropy: 0.0021647834219038486, Accuracy: 0.9992407289002557\n",
      "EVALUATION with last weights -> Loss: 14259754.0, CrossEntropy: 2.272118091583252, Accuracy: 0.8202136075949367\n",
      "Elapsed time for the training: 15.490966796875\n",
      "Iter 1680 / 2000, Loss: 129854013.0625, CrossEntropy: 0.0025414577685296535, Accuracy: 0.9991807864450127\n",
      "EVALUATION with last weights -> Loss: 14554864.0, CrossEntropy: 2.30025315284729, Accuracy: 0.8207080696202531\n",
      "Elapsed time for the training: 15.502100229263306\n",
      "Iter 1681 / 2000, Loss: 130711036.1875, CrossEntropy: 0.0028828110080212355, Accuracy: 0.9991408248081841\n",
      "EVALUATION with last weights -> Loss: 13965479.0, CrossEntropy: 2.1963295936584473, Accuracy: 0.8245648734177216\n",
      "Elapsed time for the training: 15.646567106246948\n",
      "Iter 1682 / 2000, Loss: 133647563.625, CrossEntropy: 0.004053538665175438, Accuracy: 0.9988610933503836\n",
      "EVALUATION with last weights -> Loss: 15715374.0, CrossEntropy: 2.467480182647705, Accuracy: 0.8165545886075949\n",
      "Elapsed time for the training: 15.192466259002686\n",
      "Iter 1683 / 2000, Loss: 131333373.6875, CrossEntropy: 0.0031392716336995363, Accuracy: 0.9990888746803069\n",
      "EVALUATION with last weights -> Loss: 14487970.0, CrossEntropy: 2.2857000827789307, Accuracy: 0.8214003164556962\n",
      "Elapsed time for the training: 12.819427251815796\n",
      "Iter 1684 / 2000, Loss: 128859366.3125, CrossEntropy: 0.002136450493708253, Accuracy: 0.9992407289002557\n",
      "EVALUATION with last weights -> Loss: 14263779.0, CrossEntropy: 2.2686569690704346, Accuracy: 0.828817246835443\n",
      "Elapsed time for the training: 12.90897011756897\n",
      "Iter 1685 / 2000, Loss: 129457023.21875, CrossEntropy: 0.002373403636738658, Accuracy: 0.9992007672634271\n",
      "EVALUATION with last weights -> Loss: 14341556.0, CrossEntropy: 2.3095452785491943, Accuracy: 0.8225870253164557\n",
      "Elapsed time for the training: 12.897440195083618\n",
      "Iter 1686 / 2000, Loss: 128064786.46875, CrossEntropy: 0.001815908937714994, Accuracy: 0.9994605179028133\n",
      "EVALUATION with last weights -> Loss: 14760926.0, CrossEntropy: 2.336444139480591, Accuracy: 0.825059335443038\n",
      "Elapsed time for the training: 12.915126323699951\n",
      "Iter 1687 / 2000, Loss: 132715352.4375, CrossEntropy: 0.0036732128355652094, Accuracy: 0.998821131713555\n",
      "EVALUATION with last weights -> Loss: 14209329.0, CrossEntropy: 2.238835334777832, Accuracy: 0.8231803797468354\n",
      "Elapsed time for the training: 12.95050048828125\n",
      "Iter 1688 / 2000, Loss: 130456376.15625, CrossEntropy: 0.0027685172390192747, Accuracy: 0.9990009590792839\n",
      "EVALUATION with last weights -> Loss: 14738813.0, CrossEntropy: 2.346205234527588, Accuracy: 0.8180379746835443\n",
      "Elapsed time for the training: 12.930097103118896\n",
      "Iter 1689 / 2000, Loss: 130745508.28125, CrossEntropy: 0.0028813586104661226, Accuracy: 0.9991807864450127\n",
      "EVALUATION with last weights -> Loss: 14412753.0, CrossEntropy: 2.268548011779785, Accuracy: 0.8237737341772152\n",
      "Elapsed time for the training: 12.918432474136353\n",
      "Iter 1690 / 2000, Loss: 130531404.75, CrossEntropy: 0.0027938541024923325, Accuracy: 0.9992407289002557\n",
      "EVALUATION with last weights -> Loss: 14658844.0, CrossEntropy: 2.294022798538208, Accuracy: 0.8207080696202531\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-10-2a4f601573ca>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     15\u001b[0m \u001b[0;31m#         images = images.view(images.shape[0], -1)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     16\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 17\u001b[0;31m         loss, y_pred,_ = sgd_model.training_step(\n\u001b[0m\u001b[1;32m     18\u001b[0m             \u001b[0mbatch\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mimages\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     19\u001b[0m             \u001b[0mN\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mN\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/Langevin_Variational_Inference/Deep_Nets/CNNs/ResNet/src/components.py\u001b[0m in \u001b[0;36mtraining_step\u001b[0;34m(self, batch, N, vi_batch_size, deterministic_weights)\u001b[0m\n\u001b[1;32m    411\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    412\u001b[0m         \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlog_prior\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mlog_likelihood\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 413\u001b[0;31m         \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    414\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    415\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.local/lib/python3.8/site-packages/torch/tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph)\u001b[0m\n\u001b[1;32m    183\u001b[0m                 \u001b[0mproducts\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mDefaults\u001b[0m \u001b[0mto\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    184\u001b[0m         \"\"\"\n\u001b[0;32m--> 185\u001b[0;31m         \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    186\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    187\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mregister_hook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.local/lib/python3.8/site-packages/torch/autograd/__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables)\u001b[0m\n\u001b[1;32m    123\u001b[0m         \u001b[0mretain_graph\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    124\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 125\u001b[0;31m     Variable._execution_engine.run_backward(\n\u001b[0m\u001b[1;32m    126\u001b[0m         \u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrad_tensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    127\u001b[0m         allow_unreachable=True)  # allow_unreachable flag\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "num_epochs = 2000\n",
    "criterion = torch.nn.CrossEntropyLoss()  # loss function\n",
    "total_acc = []\n",
    "\n",
    "for i in range(num_epochs):\n",
    "    losses = []\n",
    "    cross_losses = []\n",
    "    accuracy = []\n",
    "    \n",
    "    start = time.time()\n",
    "    \n",
    "    for images, labels in trainloader:\n",
    "\n",
    "        # Flatten MNIST images into a 784 long vector\n",
    "#         images = images.view(images.shape[0], -1)\n",
    "\n",
    "        loss, y_pred,_ = sgd_model.training_step(\n",
    "            batch=(images, labels),\n",
    "            N=N,\n",
    "            deterministic_weights=True,\n",
    "            vi_batch_size=None,\n",
    "        )\n",
    "        losses.append(loss)\n",
    "        \n",
    "        cross_loss = criterion(y_pred.squeeze(0), labels)\n",
    "        cross_losses.append(cross_loss)\n",
    "        accuracy.append((torch.max(y_pred.squeeze(0),-1).indices == labels).sum().item() / labels.size(0))\n",
    "        \n",
    "    end = time.time()\n",
    "    print('Elapsed time for the training:', end - start)\n",
    "\n",
    "#     if (i+1) % 10**math.floor(math.log10(i+1)) == 0:  # True when i+1 \\in {1, 2, ..., 10, 20, ..., 100, 200, ..., 1000, 2000, ...}\n",
    "    print(\"Iter {} / {}, Loss: {}, CrossEntropy: {}, Accuracy: {}\".format(i+1, num_epochs, sum(losses), sum(cross_losses)/len(cross_losses), sum(accuracy)/len(accuracy)))\n",
    "    tmp_acc = evaluation(sgd_model, testloader)\n",
    "    total_acc.append(tmp_acc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# np.save('sgd_acc_cifar10.npy',total_acc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# torch.save(sgd_model.state_dict(), \"./sgd_resnet20_svhn_map.pt\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# LVI or non-LVI models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "291376\n"
     ]
    }
   ],
   "source": [
    "import pickle\n",
    "\n",
    "sgd_model = BayesianResNet20(**pickle.load(open(\"./resnet20_sgd_model_params.pickle\", \"rb\")))\n",
    "sgd_model.load_state_dict(torch.load(\"./sgd_resnet20_svhn_map.pt\", map_location=dev))\n",
    "# sgd_model.load_state_dict(torch.load(\"./cnn_svhn_non_lvi.pt\", map_location=dev))\n",
    "\n",
    "\n",
    "num_stoch_params = 0\n",
    "for param in sgd_model.get_stochastic_params():\n",
    "    param_size = 1\n",
    "    for dim in param.shape:\n",
    "        param_size *= dim\n",
    "    num_stoch_params += param_size\n",
    "print(num_stoch_params)\n",
    "\n",
    "lvi_model_params = pickle.load(open(\"./resnet20_sgd_model_params.pickle\", \"rb\"))\n",
    "lvi_model_params[\"group_by_layers\"] = False\n",
    "lvi_model_params[\"use_random_groups\"] = False\n",
    "lvi_model_params[\"use_permuted_groups\"] = True\n",
    "lvi_model_params[\"max_groups\"] = num_stoch_params\n",
    "lvi_model_params[\"dropout_prob\"] = 0.1\n",
    "lvi_model_params[\"chain_length\"] = 5000\n",
    "lvi_model_params[\"prior_std\"] = 0.3\n",
    "# lvi_model_params[\"output_distribution\"] = \"categorical\"\n",
    "# lvi_model_params[\"output_dist_const_params\"] = dict(scale=1.0)\n",
    "\n",
    "lvi_model_params[\"init_values\"] = {k:v.theta_actual.data for k,v in sgd_model.tensor_dict.items()}\n",
    "del sgd_model\n",
    "\n",
    "lvi_model = BayesianResNet20(**lvi_model_params)\n",
    "\n",
    "lvi_model.initialize_optimizer(\n",
    "    update_determ=False, \n",
    "    update_stoch=True, \n",
    "#     lr=1e-4,\n",
    "    lr=1e-2, \n",
    "    rmsprop=False,\n",
    "    sgd=False, \n",
    "    sgld=False, \n",
    "    psgld=False,\n",
    "    sghmc = True\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# dev = torch.device('cpu')\n",
    "# dev = torch.device('cuda:1')\n",
    "\n",
    "lvi_model = lvi_model.to(dev)\n",
    "for n, t in lvi_model.tensor_dict.items():\n",
    "    if isinstance(t, StochasticTensor):\n",
    "        t.prior_dist.loc = t.prior_dist.loc.to(dev)\n",
    "        t.prior_dist.scale = t.prior_dist.scale.to(dev)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Before initialization: tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:7')\n",
      "After initialization: tensor([1, 1, 1,  ..., 1, 1, 1], device='cuda:7')\n"
     ]
    }
   ],
   "source": [
    "# print(\"Before initialization: {}\".format(sgld_model.num_samples_per_group))\n",
    "# sgld_model.init_chains()\n",
    "# print(\"After initialization: {}\".format(sgld_model.num_samples_per_group))\n",
    "print(\"Before initialization: {}\".format(lvi_model.num_samples_per_group))\n",
    "lvi_model.init_chains()\n",
    "print(\"After initialization: {}\".format(lvi_model.num_samples_per_group))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# for lvi\n",
    "\n",
    "def evaluation(lvi_model, testloader):\n",
    "    losses = []\n",
    "    cross_losses = []\n",
    "    accuracy = []\n",
    "\n",
    "    for images, labels in testloader:\n",
    "        inner_cross_losses = []\n",
    "        inner_accuracy = []\n",
    "\n",
    "        loss, y_pred = lvi_model.evaluate(batch=(images, labels),\n",
    "                    N=N,\n",
    "                    num_samples=100,\n",
    "                    deterministic_weights=False)\n",
    "\n",
    "        losses.append(loss)\n",
    "        for j in range(y_pred.shape[0]):\n",
    "            cross_loss = criterion(y_pred.squeeze(0)[j], labels)\n",
    "            inner_cross_losses.append(cross_loss)\n",
    "            inner_accuracy.append((torch.max(y_pred.squeeze(0)[j],-1).indices == labels).sum().item() / labels.size(0))\n",
    "\n",
    "            accuracy.append(sum(inner_accuracy)/len(inner_accuracy))\n",
    "            cross_losses.append(sum(inner_cross_losses)/len(inner_cross_losses))\n",
    "\n",
    "    print(\"EVALUATION with 100 samples -> Loss: {}, CrossEntropy: {}, Accuracy: {}\".format(sum(losses)/len(losses), sum(cross_losses)/len(cross_losses), sum(accuracy)/len(accuracy)))\n",
    "    return sum(accuracy)/len(accuracy)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Iter 1 / 2000, Loss: 29254699.299232736, CrossEntropy: 4.528735637664795, Accuracy: 0.542395300511509\n",
      "Elapsed time for the training: 17.629674673080444\n",
      "EVALUATION with 100 samples -> Loss: 53069948.0, CrossEntropy: 8.378366470336914, Accuracy: 0.4154737936166249\n",
      "Iter 2 / 2000, Loss: 135191591.0792839, CrossEntropy: 2.1117284297943115, Accuracy: 0.599144820971867\n",
      "Elapsed time for the training: 56.00531888008118\n",
      "EVALUATION with 100 samples -> Loss: 15497970.0, CrossEntropy: 2.451812505722046, Accuracy: 0.5650431116893918\n",
      "Iter 3 / 2000, Loss: 100716900.09207161, CrossEntropy: 1.572136640548706, Accuracy: 0.6368486253196931\n",
      "Elapsed time for the training: 56.297507524490356\n",
      "EVALUATION with 100 samples -> Loss: 12218481.0, CrossEntropy: 1.9494715929031372, Accuracy: 0.5930655403837327\n",
      "Iter 4 / 2000, Loss: 81936927.09974425, CrossEntropy: 1.278669834136963, Accuracy: 0.6673518422314578\n",
      "Elapsed time for the training: 56.56178164482117\n",
      "EVALUATION with 100 samples -> Loss: 10733824.0, CrossEntropy: 1.707229495048523, Accuracy: 0.6167045798368084\n",
      "Iter 5 / 2000, Loss: 71760956.89002557, CrossEntropy: 1.1191946268081665, Accuracy: 0.6897068813938618\n",
      "Elapsed time for the training: 56.56319522857666\n",
      "EVALUATION with 100 samples -> Loss: 9449591.0, CrossEntropy: 1.4938539266586304, Accuracy: 0.6352495024129123\n",
      "Iter 6 / 2000, Loss: 64590898.30179028, CrossEntropy: 1.0071561336517334, Accuracy: 0.7090123481457801\n",
      "Elapsed time for the training: 56.502665281295776\n",
      "EVALUATION with 100 samples -> Loss: 8936983.0, CrossEntropy: 1.4160372018814087, Accuracy: 0.6477905094694072\n",
      "Iter 7 / 2000, Loss: 58599198.13810742, CrossEntropy: 0.9135737419128418, Accuracy: 0.7247677229859336\n",
      "Elapsed time for the training: 56.418320655822754\n",
      "EVALUATION with 100 samples -> Loss: 8069719.5, CrossEntropy: 1.273787498474121, Accuracy: 0.6637085672490818\n",
      "Iter 8 / 2000, Loss: 55228775.140664965, CrossEntropy: 0.8607532978057861, Accuracy: 0.7361243206521739\n",
      "Elapsed time for the training: 56.415640354156494\n",
      "EVALUATION with 100 samples -> Loss: 8027543.5, CrossEntropy: 1.2795170545578003, Accuracy: 0.6647999855871944\n",
      "Iter 9 / 2000, Loss: 53369680.59335038, CrossEntropy: 0.8312643766403198, Accuracy: 0.7430391823849105\n",
      "Elapsed time for the training: 56.26438760757446\n",
      "EVALUATION with 100 samples -> Loss: 7834391.5, CrossEntropy: 1.2353862524032593, Accuracy: 0.673254436187118\n",
      "Iter 10 / 2000, Loss: 50013816.26086956, CrossEntropy: 0.778885006904602, Accuracy: 0.7536290161445013\n",
      "Elapsed time for the training: 56.35957193374634\n",
      "EVALUATION with 100 samples -> Loss: 7298333.5, CrossEntropy: 1.1617332696914673, Accuracy: 0.6837925747154496\n",
      "Iter 11 / 2000, Loss: 45664320.327365726, CrossEntropy: 0.7109092473983765, Accuracy: 0.7690242367327366\n",
      "Elapsed time for the training: 56.26219964027405\n",
      "EVALUATION with 100 samples -> Loss: 6884849.5, CrossEntropy: 1.0863757133483887, Accuracy: 0.6936158309496975\n",
      "Iter 12 / 2000, Loss: 44504069.16112532, CrossEntropy: 0.6929510831832886, Accuracy: 0.7737117367327365\n",
      "Elapsed time for the training: 56.19403576850891\n",
      "EVALUATION with 100 samples -> Loss: 6885322.0, CrossEntropy: 1.0907987356185913, Accuracy: 0.6916431434764578\n",
      "Iter 13 / 2000, Loss: 44278945.9028133, CrossEntropy: 0.6892820596694946, Accuracy: 0.7738840712915601\n",
      "Elapsed time for the training: 56.3407506942749\n",
      "EVALUATION with 100 samples -> Loss: 6864767.5, CrossEntropy: 1.0823962688446045, Accuracy: 0.6944901613662348\n",
      "Iter 14 / 2000, Loss: 43559689.58056266, CrossEntropy: 0.6779880523681641, Accuracy: 0.7774101862212276\n",
      "Elapsed time for the training: 56.27777123451233\n",
      "EVALUATION with 100 samples -> Loss: 6849081.0, CrossEntropy: 1.0770206451416016, Accuracy: 0.6976770745904503\n",
      "Iter 15 / 2000, Loss: 43179925.62148338, CrossEntropy: 0.6721600890159607, Accuracy: 0.7787978540601024\n",
      "Elapsed time for the training: 56.293254137039185\n",
      "EVALUATION with 100 samples -> Loss: 6869828.0, CrossEntropy: 1.082912564277649, Accuracy: 0.6966567324255676\n",
      "Iter 16 / 2000, Loss: 42460578.98721228, CrossEntropy: 0.6607233285903931, Accuracy: 0.7816516144501279\n",
      "Elapsed time for the training: 57.865434885025024\n",
      "EVALUATION with 100 samples -> Loss: 6793842.5, CrossEntropy: 1.0699244737625122, Accuracy: 0.6996198369915322\n",
      "Iter 17 / 2000, Loss: 41462054.85421995, CrossEntropy: 0.6451613306999207, Accuracy: 0.7854459718670077\n",
      "Elapsed time for the training: 57.677324056625366\n",
      "EVALUATION with 100 samples -> Loss: 6769385.5, CrossEntropy: 1.068205714225769, Accuracy: 0.7018418910840407\n",
      "Iter 18 / 2000, Loss: 40196199.00767264, CrossEntropy: 0.6252002120018005, Accuracy: 0.7909157209079284\n",
      "Elapsed time for the training: 58.148011207580566\n",
      "EVALUATION with 100 samples -> Loss: 6659337.0, CrossEntropy: 1.0533168315887451, Accuracy: 0.706055488125313\n",
      "Iter 19 / 2000, Loss: 38927923.88235294, CrossEntropy: 0.6056410670280457, Accuracy: 0.7954293877877238\n",
      "Elapsed time for the training: 58.31248235702515\n",
      "EVALUATION with 100 samples -> Loss: 6543663.5, CrossEntropy: 1.0305042266845703, Accuracy: 0.7103235288995042\n",
      "Iter 20 / 2000, Loss: 37035884.15856777, CrossEntropy: 0.5757761001586914, Accuracy: 0.8035126278772378\n",
      "Elapsed time for the training: 58.360599994659424\n",
      "EVALUATION with 100 samples -> Loss: 6302150.5, CrossEntropy: 1.0031675100326538, Accuracy: 0.7149397877974756\n",
      "Iter 21 / 2000, Loss: 35309889.953964196, CrossEntropy: 0.5488461256027222, Accuracy: 0.8112102381713554\n",
      "Elapsed time for the training: 58.30700922012329\n",
      "EVALUATION with 100 samples -> Loss: 6018677.0, CrossEntropy: 0.9556949734687805, Accuracy: 0.7227079467631436\n",
      "Iter 22 / 2000, Loss: 32711432.75191816, CrossEntropy: 0.5080360770225525, Accuracy: 0.8236183264066496\n",
      "Elapsed time for the training: 58.285521507263184\n",
      "EVALUATION with 100 samples -> Loss: 5650229.0, CrossEntropy: 0.9042408466339111, Accuracy: 0.7290176366105958\n",
      "Iter 23 / 2000, Loss: 30255388.72634271, CrossEntropy: 0.46978840231895447, Accuracy: 0.8353820332480818\n",
      "Elapsed time for the training: 58.36642098426819\n",
      "EVALUATION with 100 samples -> Loss: 5420382.5, CrossEntropy: 0.8551359176635742, Accuracy: 0.7354889728330385\n",
      "Iter 24 / 2000, Loss: 28517041.994884912, CrossEntropy: 0.4426707327365875, Accuracy: 0.8437210278132993\n",
      "Elapsed time for the training: 58.34221696853638\n",
      "EVALUATION with 100 samples -> Loss: 5223773.5, CrossEntropy: 0.8294448256492615, Accuracy: 0.742281236677142\n",
      "Iter 25 / 2000, Loss: 26968406.227621485, CrossEntropy: 0.41822803020477295, Accuracy: 0.8522883032289001\n",
      "Elapsed time for the training: 58.33821511268616\n",
      "EVALUATION with 100 samples -> Loss: 5046317.5, CrossEntropy: 0.7974934577941895, Accuracy: 0.7513231939926441\n",
      "Iter 26 / 2000, Loss: 25624185.480818413, CrossEntropy: 0.39714035391807556, Accuracy: 0.8597261628836317\n",
      "Elapsed time for the training: 58.254064083099365\n",
      "EVALUATION with 100 samples -> Loss: 4937965.0, CrossEntropy: 0.7794872522354126, Accuracy: 0.7581484407793007\n",
      "Iter 27 / 2000, Loss: 24285377.938618924, CrossEntropy: 0.3762706518173218, Accuracy: 0.8666385270140665\n",
      "Elapsed time for the training: 58.295132637023926\n",
      "EVALUATION with 100 samples -> Loss: 4892921.5, CrossEntropy: 0.776235818862915, Accuracy: 0.7606929619487444\n",
      "Iter 28 / 2000, Loss: 23098409.994884912, CrossEntropy: 0.35771846771240234, Accuracy: 0.8733306026214833\n",
      "Elapsed time for the training: 58.27649736404419\n",
      "EVALUATION with 100 samples -> Loss: 4801495.0, CrossEntropy: 0.7591904997825623, Accuracy: 0.7656422054032415\n",
      "Iter 29 / 2000, Loss: 22106488.260869566, CrossEntropy: 0.3422319293022156, Accuracy: 0.8787783727621483\n",
      "Elapsed time for the training: 58.25923728942871\n",
      "EVALUATION with 100 samples -> Loss: 4733124.0, CrossEntropy: 0.7528565526008606, Accuracy: 0.7672662312959535\n",
      "Iter 30 / 2000, Loss: 21092427.700767264, CrossEntropy: 0.3263866901397705, Accuracy: 0.8847756154092071\n",
      "Elapsed time for the training: 58.150771379470825\n",
      "EVALUATION with 100 samples -> Loss: 4653074.5, CrossEntropy: 0.7361955642700195, Accuracy: 0.7725160635608862\n",
      "Iter 31 / 2000, Loss: 20293030.391304348, CrossEntropy: 0.3138006031513214, Accuracy: 0.8889870724104859\n",
      "Elapsed time for the training: 58.195034980773926\n",
      "EVALUATION with 100 samples -> Loss: 4645428.5, CrossEntropy: 0.7289393544197083, Accuracy: 0.773734892902647\n",
      "Iter 32 / 2000, Loss: 19528735.007672634, CrossEntropy: 0.3017875552177429, Accuracy: 0.8933988371163682\n",
      "Elapsed time for the training: 58.20441150665283\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with 100 samples -> Loss: 4586705.0, CrossEntropy: 0.7234978079795837, Accuracy: 0.7763254348179531\n",
      "Iter 33 / 2000, Loss: 18684360.36061381, CrossEntropy: 0.28869155049324036, Accuracy: 0.8984784606777494\n",
      "Elapsed time for the training: 58.15559244155884\n",
      "EVALUATION with 100 samples -> Loss: 4543236.0, CrossEntropy: 0.7183951139450073, Accuracy: 0.7779279186400904\n",
      "Iter 34 / 2000, Loss: 17917086.455242965, CrossEntropy: 0.2766212821006775, Accuracy: 0.9031604659526854\n",
      "Elapsed time for the training: 58.19235587120056\n",
      "EVALUATION with 100 samples -> Loss: 4500271.5, CrossEntropy: 0.7144201993942261, Accuracy: 0.778957654609659\n",
      "Iter 35 / 2000, Loss: 17186981.969309464, CrossEntropy: 0.26522940397262573, Accuracy: 0.9079863331202046\n",
      "Elapsed time for the training: 58.32004141807556\n",
      "EVALUATION with 100 samples -> Loss: 4452577.0, CrossEntropy: 0.7037757039070129, Accuracy: 0.7813665908410301\n",
      "Iter 36 / 2000, Loss: 16623011.017902814, CrossEntropy: 0.2565436363220215, Accuracy: 0.9109829563618926\n",
      "Elapsed time for the training: 58.291409492492676\n",
      "EVALUATION with 100 samples -> Loss: 4439332.0, CrossEntropy: 0.7002753019332886, Accuracy: 0.7817274665774104\n",
      "Iter 37 / 2000, Loss: 15872429.25319693, CrossEntropy: 0.2447149008512497, Accuracy: 0.9156629635549872\n",
      "Elapsed time for the training: 58.37329316139221\n",
      "EVALUATION with 100 samples -> Loss: 4396306.0, CrossEntropy: 0.6915019154548645, Accuracy: 0.7843463416896211\n",
      "Iter 38 / 2000, Loss: 15191257.551150896, CrossEntropy: 0.23401258885860443, Accuracy: 0.9201071970907929\n",
      "Elapsed time for the training: 58.30111742019653\n",
      "EVALUATION with 100 samples -> Loss: 4383402.0, CrossEntropy: 0.6933786273002625, Accuracy: 0.7863236011775592\n",
      "Iter 39 / 2000, Loss: 14576605.539641943, CrossEntropy: 0.22459657490253448, Accuracy: 0.9243206521739131\n",
      "Elapsed time for the training: 58.36996245384216\n",
      "EVALUATION with 100 samples -> Loss: 4384133.0, CrossEntropy: 0.6959636211395264, Accuracy: 0.7850944009478583\n",
      "Iter 40 / 2000, Loss: 14027883.309462916, CrossEntropy: 0.21586140990257263, Accuracy: 0.9276169876918159\n",
      "Elapsed time for the training: 58.39778470993042\n",
      "EVALUATION with 100 samples -> Loss: 4344987.5, CrossEntropy: 0.6854298710823059, Accuracy: 0.7873498318721731\n",
      "Iter 41 / 2000, Loss: 13421306.50511509, CrossEntropy: 0.20638982951641083, Accuracy: 0.931997782129156\n",
      "Elapsed time for the training: 58.48555564880371\n",
      "EVALUATION with 100 samples -> Loss: 4354087.5, CrossEntropy: 0.6916768550872803, Accuracy: 0.786679361110933\n",
      "Iter 42 / 2000, Loss: 12816256.202046037, CrossEntropy: 0.19684121012687683, Accuracy: 0.9354589593989769\n",
      "Elapsed time for the training: 58.30675554275513\n",
      "EVALUATION with 100 samples -> Loss: 4346522.5, CrossEntropy: 0.6857475638389587, Accuracy: 0.7897750580645739\n",
      "Iter 43 / 2000, Loss: 12275257.482097186, CrossEntropy: 0.18845605850219727, Accuracy: 0.938990069533248\n",
      "Elapsed time for the training: 58.31831932067871\n",
      "EVALUATION with 100 samples -> Loss: 4342336.5, CrossEntropy: 0.6814007759094238, Accuracy: 0.7908849111191881\n",
      "Iter 44 / 2000, Loss: 11869496.576726343, CrossEntropy: 0.1821480095386505, Accuracy: 0.9416919757033249\n",
      "Elapsed time for the training: 58.311843395233154\n",
      "EVALUATION with 100 samples -> Loss: 4343261.0, CrossEntropy: 0.6868505477905273, Accuracy: 0.7892479160236524\n",
      "Iter 45 / 2000, Loss: 11323644.212276215, CrossEntropy: 0.17350253462791443, Accuracy: 0.9451931146099745\n",
      "Elapsed time for the training: 58.35012125968933\n",
      "EVALUATION with 100 samples -> Loss: 4366658.5, CrossEntropy: 0.6918630003929138, Accuracy: 0.7897863734242647\n",
      "Iter 46 / 2000, Loss: 10816579.208439898, CrossEntropy: 0.16562221944332123, Accuracy: 0.9490149456521739\n",
      "Elapsed time for the training: 58.26695227622986\n",
      "EVALUATION with 100 samples -> Loss: 4367789.0, CrossEntropy: 0.6936597228050232, Accuracy: 0.7866318709870139\n",
      "Iter 47 / 2000, Loss: 10375837.493606139, CrossEntropy: 0.15867282450199127, Accuracy: 0.9522148737212276\n",
      "Elapsed time for the training: 58.252959966659546\n",
      "EVALUATION with 100 samples -> Loss: 4363056.5, CrossEntropy: 0.6890120506286621, Accuracy: 0.7886878294242373\n",
      "Iter 48 / 2000, Loss: 10008092.332480818, CrossEntropy: 0.15308117866516113, Accuracy: 0.95443624120844\n",
      "Elapsed time for the training: 58.27672219276428\n",
      "EVALUATION with 100 samples -> Loss: 4388829.5, CrossEntropy: 0.6925182342529297, Accuracy: 0.7866270152544318\n",
      "Iter 49 / 2000, Loss: 9626559.528132992, CrossEntropy: 0.1470392346382141, Accuracy: 0.9567939777813299\n",
      "Elapsed time for the training: 58.14645028114319\n",
      "EVALUATION with 100 samples -> Loss: 4370896.5, CrossEntropy: 0.6867005825042725, Accuracy: 0.7888408426125388\n",
      "Iter 50 / 2000, Loss: 9161168.740409207, CrossEntropy: 0.13971485197544098, Accuracy: 0.9602466632033249\n",
      "Elapsed time for the training: 58.40204882621765\n",
      "EVALUATION with 100 samples -> Loss: 4404625.0, CrossEntropy: 0.6927490830421448, Accuracy: 0.7887277127480506\n",
      "Iter 51 / 2000, Loss: 8796993.837595908, CrossEntropy: 0.13399961590766907, Accuracy: 0.961868106617647\n",
      "Elapsed time for the training: 58.252262115478516\n",
      "EVALUATION with 100 samples -> Loss: 4409385.0, CrossEntropy: 0.7028001546859741, Accuracy: 0.7865312441663815\n",
      "Iter 52 / 2000, Loss: 8481305.47826087, CrossEntropy: 0.12908519804477692, Accuracy: 0.9638606937340154\n",
      "Elapsed time for the training: 58.32963800430298\n",
      "EVALUATION with 100 samples -> Loss: 4405734.5, CrossEntropy: 0.6986204981803894, Accuracy: 0.7888967679440543\n",
      "Iter 53 / 2000, Loss: 8141346.734654731, CrossEntropy: 0.12376142293214798, Accuracy: 0.9664097466432225\n",
      "Elapsed time for the training: 58.33257818222046\n",
      "EVALUATION with 100 samples -> Loss: 4430718.0, CrossEntropy: 0.7063541412353516, Accuracy: 0.7879753472827536\n",
      "Iter 54 / 2000, Loss: 7852237.24488491, CrossEntropy: 0.11927998065948486, Accuracy: 0.9678253876278772\n",
      "Elapsed time for the training: 58.297398805618286\n",
      "EVALUATION with 100 samples -> Loss: 4434636.0, CrossEntropy: 0.7059844136238098, Accuracy: 0.7893932193415384\n",
      "Iter 55 / 2000, Loss: 7509063.900255755, CrossEntropy: 0.11393401771783829, Accuracy: 0.9700977062020459\n",
      "Elapsed time for the training: 58.415748596191406\n",
      "EVALUATION with 100 samples -> Loss: 4462094.5, CrossEntropy: 0.7007554173469543, Accuracy: 0.7914719399356511\n",
      "Iter 56 / 2000, Loss: 7160417.840153453, CrossEntropy: 0.10844708234071732, Accuracy: 0.9722121763107417\n",
      "Elapsed time for the training: 58.37713599205017\n",
      "EVALUATION with 100 samples -> Loss: 4475925.5, CrossEntropy: 0.715918242931366, Accuracy: 0.7900735004466346\n",
      "Iter 57 / 2000, Loss: 6948694.3657289, CrossEntropy: 0.10515435039997101, Accuracy: 0.9736378077046036\n",
      "Elapsed time for the training: 58.33386826515198\n",
      "EVALUATION with 100 samples -> Loss: 4502387.0, CrossEntropy: 0.7123865485191345, Accuracy: 0.7904788906402448\n",
      "Iter 58 / 2000, Loss: 6660658.6745524295, CrossEntropy: 0.10065069049596786, Accuracy: 0.974729259910486\n",
      "Elapsed time for the training: 58.76239275932312\n",
      "EVALUATION with 100 samples -> Loss: 4535551.0, CrossEntropy: 0.7186523675918579, Accuracy: 0.7914547904493451\n",
      "Iter 59 / 2000, Loss: 6372042.966751918, CrossEntropy: 0.09615887701511383, Accuracy: 0.9768921835038363\n",
      "Elapsed time for the training: 58.430418968200684\n",
      "EVALUATION with 100 samples -> Loss: 4549999.0, CrossEntropy: 0.721290647983551, Accuracy: 0.7905431024385341\n",
      "Iter 60 / 2000, Loss: 6146963.107416879, CrossEntropy: 0.09262780845165253, Accuracy: 0.9781120124680307\n",
      "Elapsed time for the training: 58.66457200050354\n",
      "EVALUATION with 100 samples -> Loss: 4585887.5, CrossEntropy: 0.7224069237709045, Accuracy: 0.7932344772146324\n",
      "Iter 61 / 2000, Loss: 5901280.381074169, CrossEntropy: 0.08881772309541702, Accuracy: 0.9794107656649617\n",
      "Elapsed time for the training: 58.438565731048584\n",
      "EVALUATION with 100 samples -> Loss: 4581345.5, CrossEntropy: 0.7248006463050842, Accuracy: 0.7932457223104398\n",
      "Iter 62 / 2000, Loss: 5620325.4130434785, CrossEntropy: 0.08439328521490097, Accuracy: 0.9812864649936062\n",
      "Elapsed time for the training: 58.53956651687622\n",
      "EVALUATION with 100 samples -> Loss: 4607355.5, CrossEntropy: 0.7292595505714417, Accuracy: 0.7935290336660489\n",
      "Iter 63 / 2000, Loss: 5390489.409846547, CrossEntropy: 0.08083236962556839, Accuracy: 0.9827115968670077\n",
      "Elapsed time for the training: 58.41489362716675\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with 100 samples -> Loss: 4634388.5, CrossEntropy: 0.7297563552856445, Accuracy: 0.7938679360759217\n",
      "Iter 64 / 2000, Loss: 5159196.126598465, CrossEntropy: 0.0772145465016365, Accuracy: 0.984174192774936\n",
      "Elapsed time for the training: 58.35119843482971\n",
      "EVALUATION with 100 samples -> Loss: 4684767.0, CrossEntropy: 0.7411863803863525, Accuracy: 0.7929228774989414\n",
      "Iter 65 / 2000, Loss: 4950908.265984654, CrossEntropy: 0.07395952939987183, Accuracy: 0.9856422834079285\n",
      "Elapsed time for the training: 58.39100170135498\n",
      "EVALUATION with 100 samples -> Loss: 4657211.0, CrossEntropy: 0.7359995245933533, Accuracy: 0.7948437145382862\n",
      "Iter 66 / 2000, Loss: 4753750.907928389, CrossEntropy: 0.07086629420518875, Accuracy: 0.9870084718670077\n",
      "Elapsed time for the training: 58.30784296989441\n",
      "EVALUATION with 100 samples -> Loss: 4713655.0, CrossEntropy: 0.7491257190704346, Accuracy: 0.792926247476264\n",
      "Iter 67 / 2000, Loss: 4594775.014066496, CrossEntropy: 0.06835366040468216, Accuracy: 0.9877412683823529\n",
      "Elapsed time for the training: 58.41589426994324\n",
      "EVALUATION with 100 samples -> Loss: 4740303.5, CrossEntropy: 0.7458863258361816, Accuracy: 0.7949679485249866\n",
      "Iter 68 / 2000, Loss: 4405048.253836317, CrossEntropy: 0.065389484167099, Accuracy: 0.9885180226982097\n",
      "Elapsed time for the training: 58.3747763633728\n",
      "EVALUATION with 100 samples -> Loss: 4781454.5, CrossEntropy: 0.7571889162063599, Accuracy: 0.7936684970784293\n",
      "Iter 69 / 2000, Loss: 4250968.563299233, CrossEntropy: 0.06298056989908218, Accuracy: 0.9895630195012788\n",
      "Elapsed time for the training: 58.337791204452515\n",
      "EVALUATION with 100 samples -> Loss: 4763170.5, CrossEntropy: 0.7513082027435303, Accuracy: 0.7952539788413368\n",
      "Iter 70 / 2000, Loss: 4093707.492966752, CrossEntropy: 0.06056203693151474, Accuracy: 0.9904456721547314\n",
      "Elapsed time for the training: 58.44292211532593\n",
      "EVALUATION with 100 samples -> Loss: 4778512.5, CrossEntropy: 0.7560287117958069, Accuracy: 0.7934525609838982\n",
      "Iter 71 / 2000, Loss: 3943407.7352941176, CrossEntropy: 0.05822216346859932, Accuracy: 0.9910485933503836\n",
      "Elapsed time for the training: 58.34154462814331\n",
      "EVALUATION with 100 samples -> Loss: 4803038.0, CrossEntropy: 0.759239912033081, Accuracy: 0.7948586658356164\n",
      "Iter 72 / 2000, Loss: 3773254.188938619, CrossEntropy: 0.05552937835454941, Accuracy: 0.991865309303069\n",
      "Elapsed time for the training: 58.36842632293701\n",
      "EVALUATION with 100 samples -> Loss: 4838238.0, CrossEntropy: 0.7704333066940308, Accuracy: 0.7916759646605787\n",
      "Iter 73 / 2000, Loss: 3603272.03101023, CrossEntropy: 0.05288570001721382, Accuracy: 0.992794417359335\n",
      "Elapsed time for the training: 58.33774161338806\n",
      "EVALUATION with 100 samples -> Loss: 4856646.0, CrossEntropy: 0.7695716023445129, Accuracy: 0.7907676385109998\n",
      "Iter 74 / 2000, Loss: 3509167.7647058824, CrossEntropy: 0.051431119441986084, Accuracy: 0.9931460797634271\n",
      "Elapsed time for the training: 58.44942498207092\n",
      "EVALUATION with 100 samples -> Loss: 4844777.5, CrossEntropy: 0.7654022574424744, Accuracy: 0.7928330448833292\n",
      "Iter 75 / 2000, Loss: 3342474.786445013, CrossEntropy: 0.04885884374380112, Accuracy: 0.9940322290601022\n",
      "Elapsed time for the training: 58.52015161514282\n",
      "EVALUATION with 100 samples -> Loss: 4897189.0, CrossEntropy: 0.7747498750686646, Accuracy: 0.7934612522068959\n",
      "Iter 76 / 2000, Loss: 3245632.2455242965, CrossEntropy: 0.04731697216629982, Accuracy: 0.9942030650575447\n",
      "Elapsed time for the training: 58.45494771003723\n",
      "EVALUATION with 100 samples -> Loss: 4906868.5, CrossEntropy: 0.775877058506012, Accuracy: 0.7923078492159935\n",
      "Iter 77 / 2000, Loss: 3115862.939258312, CrossEntropy: 0.04527588188648224, Accuracy: 0.994874420556266\n",
      "Elapsed time for the training: 58.37665629386902\n",
      "EVALUATION with 100 samples -> Loss: 4939893.0, CrossEntropy: 0.7943792343139648, Accuracy: 0.7906415723575513\n",
      "Iter 78 / 2000, Loss: 3004624.1166879795, CrossEntropy: 0.0435861200094223, Accuracy: 0.9952900215792839\n",
      "Elapsed time for the training: 58.37322187423706\n",
      "EVALUATION with 100 samples -> Loss: 4955213.0, CrossEntropy: 0.7847646474838257, Accuracy: 0.7919197416741107\n",
      "Iter 79 / 2000, Loss: 2912512.1432225066, CrossEntropy: 0.04213659092783928, Accuracy: 0.9956321930946291\n",
      "Elapsed time for the training: 58.4644832611084\n",
      "EVALUATION with 100 samples -> Loss: 4991703.0, CrossEntropy: 0.7943585515022278, Accuracy: 0.793192506800915\n",
      "Iter 80 / 2000, Loss: 2766017.8660485935, CrossEntropy: 0.039817679673433304, Accuracy: 0.9962760749680307\n",
      "Elapsed time for the training: 58.449671506881714\n",
      "EVALUATION with 100 samples -> Loss: 5043706.0, CrossEntropy: 0.7945924401283264, Accuracy: 0.7942657262542013\n",
      "Iter 81 / 2000, Loss: 2695197.601982097, CrossEntropy: 0.038729146122932434, Accuracy: 0.996397957960358\n",
      "Elapsed time for the training: 58.296401023864746\n",
      "EVALUATION with 100 samples -> Loss: 5037202.5, CrossEntropy: 0.7931569814682007, Accuracy: 0.795280231398216\n",
      "Iter 82 / 2000, Loss: 2617553.782608696, CrossEntropy: 0.037591490894556046, Accuracy: 0.9964763826726343\n",
      "Elapsed time for the training: 58.31570553779602\n",
      "EVALUATION with 100 samples -> Loss: 5076370.0, CrossEntropy: 0.7938066124916077, Accuracy: 0.7958515615522787\n",
      "Iter 83 / 2000, Loss: 2539032.6371483374, CrossEntropy: 0.036285027861595154, Accuracy: 0.9969079683503836\n",
      "Elapsed time for the training: 58.25592517852783\n",
      "EVALUATION with 100 samples -> Loss: 5060725.5, CrossEntropy: 0.8006318211555481, Accuracy: 0.794180441026822\n",
      "Iter 84 / 2000, Loss: 2457303.7445652173, CrossEntropy: 0.035015176981687546, Accuracy: 0.9973280650575448\n",
      "Elapsed time for the training: 58.30146503448486\n",
      "EVALUATION with 100 samples -> Loss: 5101089.5, CrossEntropy: 0.8054701685905457, Accuracy: 0.7944532672723232\n",
      "Iter 85 / 2000, Loss: 2377011.389386189, CrossEntropy: 0.033762410283088684, Accuracy: 0.9974774216751918\n",
      "Elapsed time for the training: 58.235326528549194\n",
      "EVALUATION with 100 samples -> Loss: 5119761.5, CrossEntropy: 0.8160533905029297, Accuracy: 0.7923880846466154\n",
      "Iter 86 / 2000, Loss: 2310297.108056266, CrossEntropy: 0.032718803733587265, Accuracy: 0.9975973065856778\n",
      "Elapsed time for the training: 58.24533724784851\n",
      "EVALUATION with 100 samples -> Loss: 5155293.5, CrossEntropy: 0.8107547760009766, Accuracy: 0.7957548026316582\n",
      "Iter 87 / 2000, Loss: 2208939.108056266, CrossEntropy: 0.03116128034889698, Accuracy: 0.9980044157608695\n",
      "Elapsed time for the training: 58.277756690979004\n",
      "EVALUATION with 100 samples -> Loss: 5177013.0, CrossEntropy: 0.8104062080383301, Accuracy: 0.7970429039068485\n",
      "Iter 88 / 2000, Loss: 2194679.9485294116, CrossEntropy: 0.03091958723962307, Accuracy: 0.9978205922314578\n",
      "Elapsed time for the training: 58.21756076812744\n",
      "EVALUATION with 100 samples -> Loss: 5203405.5, CrossEntropy: 0.8168061375617981, Accuracy: 0.795703582989391\n",
      "Iter 89 / 2000, Loss: 2075503.257033248, CrossEntropy: 0.029064126312732697, Accuracy: 0.9982841472186701\n",
      "Elapsed time for the training: 58.312002420425415\n",
      "EVALUATION with 100 samples -> Loss: 5242521.0, CrossEntropy: 0.8216747045516968, Accuracy: 0.7966428521245968\n",
      "Iter 90 / 2000, Loss: 2015734.2250639386, CrossEntropy: 0.028137709945440292, Accuracy: 0.9983266064578005\n",
      "Elapsed time for the training: 58.671037435531616\n",
      "EVALUATION with 100 samples -> Loss: 5245684.0, CrossEntropy: 0.827825665473938, Accuracy: 0.7969894041411477\n",
      "Iter 91 / 2000, Loss: 1962816.390505115, CrossEntropy: 0.027321627363562584, Accuracy: 0.998509930466752\n",
      "Elapsed time for the training: 58.336689472198486\n",
      "EVALUATION with 100 samples -> Loss: 5265794.0, CrossEntropy: 0.8276093006134033, Accuracy: 0.7956604571695072\n",
      "Iter 92 / 2000, Loss: 1925930.5580242968, CrossEntropy: 0.026744669303297997, Accuracy: 0.9985174232736573\n",
      "Elapsed time for the training: 58.359952449798584\n",
      "EVALUATION with 100 samples -> Loss: 5294440.0, CrossEntropy: 0.8347789645195007, Accuracy: 0.7963911136515796\n",
      "Iter 93 / 2000, Loss: 1874754.6478580562, CrossEntropy: 0.02596995420753956, Accuracy: 0.998766683983376\n",
      "Elapsed time for the training: 58.53268003463745\n",
      "EVALUATION with 100 samples -> Loss: 5320917.0, CrossEntropy: 0.8340876698493958, Accuracy: 0.7967310828638801\n",
      "Iter 94 / 2000, Loss: 1804460.4998401534, CrossEntropy: 0.024862954393029213, Accuracy: 0.9988321211636829\n",
      "Elapsed time for the training: 58.41148924827576\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with 100 samples -> Loss: 5319242.0, CrossEntropy: 0.8378104567527771, Accuracy: 0.7966272479426589\n",
      "Iter 95 / 2000, Loss: 1796198.465792839, CrossEntropy: 0.02473299764096737, Accuracy: 0.9986363091432225\n",
      "Elapsed time for the training: 58.40380358695984\n",
      "EVALUATION with 100 samples -> Loss: 5321016.5, CrossEntropy: 0.8460816740989685, Accuracy: 0.7956386430503797\n",
      "Iter 96 / 2000, Loss: 1740522.2632672635, CrossEntropy: 0.023853225633502007, Accuracy: 0.9987986532928389\n",
      "Elapsed time for the training: 58.45355749130249\n",
      "EVALUATION with 100 samples -> Loss: 5365506.5, CrossEntropy: 0.8470046520233154, Accuracy: 0.7961913371591613\n",
      "Iter 97 / 2000, Loss: 1689898.3617327365, CrossEntropy: 0.023077266290783882, Accuracy: 0.9989659926470589\n",
      "Elapsed time for the training: 58.38820505142212\n",
      "EVALUATION with 100 samples -> Loss: 5348729.0, CrossEntropy: 0.8476061820983887, Accuracy: 0.7939491772280923\n",
      "Iter 98 / 2000, Loss: 1649245.557225064, CrossEntropy: 0.02244376204907894, Accuracy: 0.9989859734654731\n",
      "Elapsed time for the training: 58.34309983253479\n",
      "EVALUATION with 100 samples -> Loss: 5403322.5, CrossEntropy: 0.8575201630592346, Accuracy: 0.7927688386421179\n",
      "Iter 99 / 2000, Loss: 1606820.25879156, CrossEntropy: 0.021808631718158722, Accuracy: 0.9990688938618926\n",
      "Elapsed time for the training: 58.395488262176514\n",
      "EVALUATION with 100 samples -> Loss: 5423038.0, CrossEntropy: 0.8578134179115295, Accuracy: 0.7922750619408213\n",
      "At iteration 100 we change the dropout rate from 0.1 to 0.2. \n",
      "Iter 100 / 2000, Loss: 1642632.5952685422, CrossEntropy: 0.02082229033112526, Accuracy: 0.9990334279092071\n",
      "Elapsed time for the training: 58.38463592529297\n",
      "EVALUATION with 100 samples -> Loss: 5485797.0, CrossEntropy: 0.858081042766571, Accuracy: 0.7952410175513793\n",
      "Iter 101 / 2000, Loss: 1549207.787404092, CrossEntropy: 0.01937316358089447, Accuracy: 0.999440537084399\n",
      "Elapsed time for the training: 58.39018177986145\n",
      "EVALUATION with 100 samples -> Loss: 5495655.5, CrossEntropy: 0.8611574172973633, Accuracy: 0.7941846671811261\n",
      "Iter 102 / 2000, Loss: 1486377.4171994885, CrossEntropy: 0.018390588462352753, Accuracy: 0.9994904891304348\n",
      "Elapsed time for the training: 58.29302477836609\n",
      "EVALUATION with 100 samples -> Loss: 5558743.5, CrossEntropy: 0.8823264241218567, Accuracy: 0.7917941359478238\n",
      "Iter 103 / 2000, Loss: 1389134.1008631713, CrossEntropy: 0.01690005697309971, Accuracy: 0.9996453404731458\n",
      "Elapsed time for the training: 58.261573791503906\n",
      "EVALUATION with 100 samples -> Loss: 5554580.0, CrossEntropy: 0.879409909248352, Accuracy: 0.792622291735938\n",
      "Iter 104 / 2000, Loss: 1364859.0938299233, CrossEntropy: 0.01652483083307743, Accuracy: 0.9995779052109974\n",
      "Elapsed time for the training: 58.18349575996399\n",
      "EVALUATION with 100 samples -> Loss: 5581653.0, CrossEntropy: 0.8817073702812195, Accuracy: 0.7937395330401944\n",
      "Iter 105 / 2000, Loss: 1305457.2265025575, CrossEntropy: 0.015620581805706024, Accuracy: 0.9996788083439898\n",
      "Elapsed time for the training: 58.14450216293335\n",
      "EVALUATION with 100 samples -> Loss: 5603069.0, CrossEntropy: 0.8810069561004639, Accuracy: 0.7961831918509262\n",
      "Iter 106 / 2000, Loss: 1278866.0, CrossEntropy: 0.015201477333903313, Accuracy: 0.9996553308823529\n",
      "Elapsed time for the training: 58.30237054824829\n",
      "EVALUATION with 100 samples -> Loss: 5664313.5, CrossEntropy: 0.8981010913848877, Accuracy: 0.79626190583999\n",
      "Iter 107 / 2000, Loss: 1212736.88379156, CrossEntropy: 0.014221813529729843, Accuracy: 0.9997007872442456\n",
      "Elapsed time for the training: 58.206650733947754\n",
      "EVALUATION with 100 samples -> Loss: 5691027.0, CrossEntropy: 0.8965275287628174, Accuracy: 0.7953506987842612\n",
      "Iter 108 / 2000, Loss: 1191422.7945971866, CrossEntropy: 0.013858441263437271, Accuracy: 0.9997427469629157\n",
      "Elapsed time for the training: 58.31435227394104\n",
      "EVALUATION with 100 samples -> Loss: 5702054.0, CrossEntropy: 0.9061340093612671, Accuracy: 0.7946139211151535\n",
      "Iter 109 / 2000, Loss: 1150222.9055306904, CrossEntropy: 0.013229894451797009, Accuracy: 0.9997926990089514\n",
      "Elapsed time for the training: 58.29388403892517\n",
      "EVALUATION with 100 samples -> Loss: 5756465.5, CrossEntropy: 0.9130088686943054, Accuracy: 0.7961236285395892\n",
      "Iter 110 / 2000, Loss: 1111526.8214514067, CrossEntropy: 0.012632542289793491, Accuracy: 0.9998601342710998\n",
      "Elapsed time for the training: 58.186692237854004\n",
      "EVALUATION with 100 samples -> Loss: 5736049.5, CrossEntropy: 0.9050828814506531, Accuracy: 0.7976575960422214\n",
      "Iter 111 / 2000, Loss: 1060571.358695652, CrossEntropy: 0.01185025367885828, Accuracy: 0.9999200767263428\n",
      "Elapsed time for the training: 58.31332468986511\n",
      "EVALUATION with 100 samples -> Loss: 5754164.5, CrossEntropy: 0.9112311005592346, Accuracy: 0.7970238183244461\n",
      "Iter 112 / 2000, Loss: 1046325.1416240409, CrossEntropy: 0.01163814589381218, Accuracy: 0.9998701246803069\n",
      "Elapsed time for the training: 58.381346464157104\n",
      "EVALUATION with 100 samples -> Loss: 5817884.0, CrossEntropy: 0.9139915108680725, Accuracy: 0.7969287962497196\n",
      "Iter 113 / 2000, Loss: 1018643.9047314578, CrossEntropy: 0.011231307871639729, Accuracy: 0.9998161764705883\n",
      "Elapsed time for the training: 58.3752384185791\n",
      "EVALUATION with 100 samples -> Loss: 5847204.0, CrossEntropy: 0.9176733493804932, Accuracy: 0.7978475999099152\n",
      "Iter 114 / 2000, Loss: 998886.7236253197, CrossEntropy: 0.010935124941170216, Accuracy: 0.9997857057225065\n",
      "Elapsed time for the training: 58.3840606212616\n",
      "EVALUATION with 100 samples -> Loss: 5880250.5, CrossEntropy: 0.9207468628883362, Accuracy: 0.797476240140533\n",
      "Iter 115 / 2000, Loss: 968174.0345268542, CrossEntropy: 0.010453117080032825, Accuracy: 0.999845148657289\n",
      "Elapsed time for the training: 58.24856233596802\n",
      "EVALUATION with 100 samples -> Loss: 5868963.0, CrossEntropy: 0.930324137210846, Accuracy: 0.7940328578421234\n",
      "Iter 116 / 2000, Loss: 938258.013746803, CrossEntropy: 0.009997564367949963, Accuracy: 0.9999200767263428\n",
      "Elapsed time for the training: 58.25482702255249\n",
      "EVALUATION with 100 samples -> Loss: 5889032.0, CrossEntropy: 0.9364322423934937, Accuracy: 0.7951044498969478\n",
      "Iter 117 / 2000, Loss: 909587.1919757033, CrossEntropy: 0.00956047885119915, Accuracy: 0.9999350623401535\n",
      "Elapsed time for the training: 58.40506911277771\n",
      "EVALUATION with 100 samples -> Loss: 5914919.0, CrossEntropy: 0.9545665979385376, Accuracy: 0.7930103554808628\n",
      "Iter 118 / 2000, Loss: 885204.5278132992, CrossEntropy: 0.009192079305648804, Accuracy: 0.999940057544757\n",
      "Elapsed time for the training: 58.50924205780029\n",
      "EVALUATION with 100 samples -> Loss: 5944708.0, CrossEntropy: 0.94272780418396, Accuracy: 0.7962376702337652\n",
      "Iter 119 / 2000, Loss: 862392.717870844, CrossEntropy: 0.008851362392306328, Accuracy: 0.9999100863171355\n",
      "Elapsed time for the training: 58.219632148742676\n",
      "EVALUATION with 100 samples -> Loss: 5930572.5, CrossEntropy: 0.9405867457389832, Accuracy: 0.7963226407955752\n",
      "Iter 120 / 2000, Loss: 838285.185741688, CrossEntropy: 0.008482306264340878, Accuracy: 0.9999375599424553\n",
      "Elapsed time for the training: 58.25321102142334\n",
      "EVALUATION with 100 samples -> Loss: 5953629.0, CrossEntropy: 0.9342040419578552, Accuracy: 0.7981820681218542\n",
      "Iter 121 / 2000, Loss: 813321.4449328644, CrossEntropy: 0.008106177672743797, Accuracy: 0.9999425551470589\n",
      "Elapsed time for the training: 58.1372435092926\n",
      "EVALUATION with 100 samples -> Loss: 5990837.5, CrossEntropy: 0.9427458643913269, Accuracy: 0.7970041537287873\n",
      "Iter 122 / 2000, Loss: 780582.7258631714, CrossEntropy: 0.0076059456914663315, Accuracy: 0.9999775215792839\n",
      "Elapsed time for the training: 58.23570227622986\n",
      "EVALUATION with 100 samples -> Loss: 6015101.5, CrossEntropy: 0.9612264633178711, Accuracy: 0.7954141344762776\n",
      "Iter 123 / 2000, Loss: 775693.4467710997, CrossEntropy: 0.007542879786342382, Accuracy: 0.9999625359654731\n",
      "Elapsed time for the training: 58.17351531982422\n",
      "EVALUATION with 100 samples -> Loss: 6057486.5, CrossEntropy: 0.9576554894447327, Accuracy: 0.7971659073854884\n",
      "Iter 124 / 2000, Loss: 764971.2512787724, CrossEntropy: 0.007390561979264021, Accuracy: 0.9999750239769821\n",
      "Elapsed time for the training: 58.317880153656006\n",
      "EVALUATION with 100 samples -> Loss: 6098648.0, CrossEntropy: 0.9701642990112305, Accuracy: 0.7946189361079896\n",
      "Iter 125 / 2000, Loss: 752810.7051630435, CrossEntropy: 0.007219355553388596, Accuracy: 0.9999600383631714\n",
      "Elapsed time for the training: 58.26722836494446\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with 100 samples -> Loss: 6081779.0, CrossEntropy: 0.9535661339759827, Accuracy: 0.7979047978155259\n",
      "Iter 126 / 2000, Loss: 746362.8848305626, CrossEntropy: 0.007127596531063318, Accuracy: 0.9999075887148338\n",
      "Elapsed time for the training: 58.30416226387024\n",
      "EVALUATION with 100 samples -> Loss: 6107290.5, CrossEntropy: 0.9615979194641113, Accuracy: 0.7971061679397158\n",
      "Iter 127 / 2000, Loss: 714201.37060422, CrossEntropy: 0.006640428677201271, Accuracy: 0.9999775215792839\n",
      "Elapsed time for the training: 58.24697494506836\n",
      "EVALUATION with 100 samples -> Loss: 6148356.5, CrossEntropy: 0.9621917009353638, Accuracy: 0.7981774125800691\n",
      "Iter 128 / 2000, Loss: 691476.8131393862, CrossEntropy: 0.006290115416049957, Accuracy: 0.9999925071930946\n",
      "Elapsed time for the training: 58.237462520599365\n",
      "EVALUATION with 100 samples -> Loss: 6135963.0, CrossEntropy: 0.9628667831420898, Accuracy: 0.7980616202094928\n",
      "Iter 129 / 2000, Loss: 690780.4864929668, CrossEntropy: 0.006290392484515905, Accuracy: 0.9999725263746803\n",
      "Elapsed time for the training: 58.24986386299133\n",
      "EVALUATION with 100 samples -> Loss: 6169609.5, CrossEntropy: 0.9728116989135742, Accuracy: 0.7974735148506615\n",
      "Iter 130 / 2000, Loss: 663222.6010230179, CrossEntropy: 0.005876573268324137, Accuracy: 0.9999900095907929\n",
      "Elapsed time for the training: 58.2640700340271\n",
      "EVALUATION with 100 samples -> Loss: 6146335.5, CrossEntropy: 0.9623844623565674, Accuracy: 0.7975407769778463\n",
      "Iter 131 / 2000, Loss: 670419.0932704604, CrossEntropy: 0.006000702269375324, Accuracy: 0.9999825167838875\n",
      "Elapsed time for the training: 58.48033666610718\n",
      "EVALUATION with 100 samples -> Loss: 6134284.0, CrossEntropy: 0.9790692329406738, Accuracy: 0.7950587337899621\n",
      "Iter 132 / 2000, Loss: 656209.1628037085, CrossEntropy: 0.005789014510810375, Accuracy: 0.9999925071930946\n",
      "Elapsed time for the training: 58.379178047180176\n",
      "EVALUATION with 100 samples -> Loss: 6156813.5, CrossEntropy: 0.9724671244621277, Accuracy: 0.7976031237797866\n",
      "Iter 133 / 2000, Loss: 636889.7420875959, CrossEntropy: 0.005512951407581568, Accuracy: 0.9999900095907929\n",
      "Elapsed time for the training: 58.33077907562256\n",
      "EVALUATION with 100 samples -> Loss: 6192711.5, CrossEntropy: 0.9828593134880066, Accuracy: 0.7968367279218321\n",
      "Iter 134 / 2000, Loss: 628263.4792199489, CrossEntropy: 0.005381322931498289, Accuracy: 0.9999825167838875\n",
      "Elapsed time for the training: 58.2723343372345\n",
      "EVALUATION with 100 samples -> Loss: 6188496.0, CrossEntropy: 0.98795086145401, Accuracy: 0.7957971270204542\n",
      "Iter 135 / 2000, Loss: 626396.7886828644, CrossEntropy: 0.005361014045774937, Accuracy: 0.9999975023976982\n",
      "Elapsed time for the training: 58.307695388793945\n",
      "EVALUATION with 100 samples -> Loss: 6216508.0, CrossEntropy: 0.9853523969650269, Accuracy: 0.7962817893253271\n",
      "Iter 136 / 2000, Loss: 613254.3299232736, CrossEntropy: 0.005173846147954464, Accuracy: 0.9999800191815856\n",
      "Elapsed time for the training: 58.51306748390198\n",
      "EVALUATION with 100 samples -> Loss: 6227807.0, CrossEntropy: 0.9821202158927917, Accuracy: 0.7971611658071358\n",
      "Iter 137 / 2000, Loss: 606732.8232097187, CrossEntropy: 0.005091861821711063, Accuracy: 0.9999845148657289\n",
      "Elapsed time for the training: 58.29019832611084\n",
      "EVALUATION with 100 samples -> Loss: 6232768.0, CrossEntropy: 0.9794268608093262, Accuracy: 0.798133757227091\n",
      "Iter 138 / 2000, Loss: 596304.8091432225, CrossEntropy: 0.004931861534714699, Accuracy: 0.9999925071930946\n",
      "Elapsed time for the training: 58.26690316200256\n",
      "EVALUATION with 100 samples -> Loss: 6247954.0, CrossEntropy: 0.9961230754852295, Accuracy: 0.7967450429288712\n",
      "Iter 139 / 2000, Loss: 590398.9332640665, CrossEntropy: 0.0048543368466198444, Accuracy: 0.9999775215792839\n",
      "Elapsed time for the training: 58.2425901889801\n",
      "EVALUATION with 100 samples -> Loss: 6276770.5, CrossEntropy: 0.9886810183525085, Accuracy: 0.7973896269736849\n",
      "Iter 140 / 2000, Loss: 578986.7354539642, CrossEntropy: 0.004685875028371811, Accuracy: 0.9999950047953964\n",
      "Elapsed time for the training: 58.225549936294556\n",
      "EVALUATION with 100 samples -> Loss: 6264133.0, CrossEntropy: 0.9887435436248779, Accuracy: 0.7965746296720103\n",
      "Iter 141 / 2000, Loss: 568899.9255914323, CrossEntropy: 0.004543633200228214, Accuracy: 0.9999975023976982\n",
      "Elapsed time for the training: 58.205602169036865\n",
      "EVALUATION with 100 samples -> Loss: 6304598.0, CrossEntropy: 0.995102047920227, Accuracy: 0.7958258050436459\n",
      "Iter 142 / 2000, Loss: 554915.2129955243, CrossEntropy: 0.004334672354161739, Accuracy: 0.9999900095907929\n",
      "Elapsed time for the training: 58.30956697463989\n",
      "EVALUATION with 100 samples -> Loss: 6324788.0, CrossEntropy: 0.9985989928245544, Accuracy: 0.7966637193749193\n",
      "Iter 143 / 2000, Loss: 549853.0708120205, CrossEntropy: 0.004268191754817963, Accuracy: 0.9999925071930946\n",
      "Elapsed time for the training: 58.248024225234985\n",
      "EVALUATION with 100 samples -> Loss: 6357227.0, CrossEntropy: 1.002498984336853, Accuracy: 0.796570557174278\n",
      "Iter 144 / 2000, Loss: 542184.3543797954, CrossEntropy: 0.004168138839304447, Accuracy: 0.9999975023976982\n",
      "Elapsed time for the training: 58.22800159454346\n",
      "EVALUATION with 100 samples -> Loss: 6402039.0, CrossEntropy: 1.0085296630859375, Accuracy: 0.7976884579389136\n",
      "Iter 145 / 2000, Loss: 529279.7070012788, CrossEntropy: 0.003973522689193487, Accuracy: 0.999987511988491\n",
      "Elapsed time for the training: 58.289334297180176\n",
      "EVALUATION with 100 samples -> Loss: 6425615.0, CrossEntropy: 1.010388731956482, Accuracy: 0.7975443438173341\n",
      "Iter 146 / 2000, Loss: 529154.3600543478, CrossEntropy: 0.003983573988080025, Accuracy: 0.9999950047953964\n",
      "Elapsed time for the training: 58.462372064590454\n",
      "EVALUATION with 100 samples -> Loss: 6455125.0, CrossEntropy: 1.0145834684371948, Accuracy: 0.7984485188759743\n",
      "Iter 147 / 2000, Loss: 527422.6429827366, CrossEntropy: 0.003969930112361908, Accuracy: 0.9999950047953964\n",
      "Elapsed time for the training: 58.357449531555176\n",
      "EVALUATION with 100 samples -> Loss: 6455091.5, CrossEntropy: 1.0329588651657104, Accuracy: 0.7955761299754601\n",
      "Iter 148 / 2000, Loss: 518235.4918478261, CrossEntropy: 0.0038383896462619305, Accuracy: 0.9999850143861893\n",
      "Elapsed time for the training: 58.34081792831421\n",
      "EVALUATION with 100 samples -> Loss: 6463991.5, CrossEntropy: 1.0169395208358765, Accuracy: 0.7978644986430974\n",
      "Iter 149 / 2000, Loss: 508000.23913043475, CrossEntropy: 0.0036912120413035154, Accuracy: 0.9999975023976982\n",
      "Elapsed time for the training: 58.33689904212952\n",
      "EVALUATION with 100 samples -> Loss: 6470196.5, CrossEntropy: 1.0199612379074097, Accuracy: 0.7974720378696818\n",
      "Iter 150 / 2000, Loss: 511785.4171994885, CrossEntropy: 0.0037628780119121075, Accuracy: 0.999987511988491\n",
      "Elapsed time for the training: 58.40244770050049\n",
      "EVALUATION with 100 samples -> Loss: 6469835.0, CrossEntropy: 1.0269020795822144, Accuracy: 0.7971184302369502\n",
      "Iter 151 / 2000, Loss: 507244.3351182864, CrossEntropy: 0.0037038633599877357, Accuracy: 0.9999925071930946\n",
      "Elapsed time for the training: 58.376583099365234\n",
      "EVALUATION with 100 samples -> Loss: 6471223.0, CrossEntropy: 1.0209894180297852, Accuracy: 0.7981799566243463\n",
      "Iter 152 / 2000, Loss: 495298.6886988491, CrossEntropy: 0.0035289956722408533, Accuracy: 0.9999925071930946\n",
      "Elapsed time for the training: 58.31290888786316\n",
      "EVALUATION with 100 samples -> Loss: 6455114.0, CrossEntropy: 1.0253926515579224, Accuracy: 0.7973362530368744\n",
      "Iter 153 / 2000, Loss: 490745.4584398977, CrossEntropy: 0.0034702399279922247, Accuracy: 0.9999700287723785\n",
      "Elapsed time for the training: 58.37226700782776\n",
      "EVALUATION with 100 samples -> Loss: 6452961.0, CrossEntropy: 1.0103073120117188, Accuracy: 0.7999378936220038\n",
      "Iter 154 / 2000, Loss: 485253.8448689258, CrossEntropy: 0.0034012570977211, Accuracy: 0.9999925071930946\n",
      "Elapsed time for the training: 58.72061800956726\n",
      "EVALUATION with 100 samples -> Loss: 6479526.0, CrossEntropy: 1.0220595598220825, Accuracy: 0.7980026392547006\n",
      "Iter 155 / 2000, Loss: 476040.77725383634, CrossEntropy: 0.003267507767304778, Accuracy: 0.9999975023976982\n",
      "Elapsed time for the training: 58.42761731147766\n",
      "EVALUATION with 100 samples -> Loss: 6463682.0, CrossEntropy: 1.017402172088623, Accuracy: 0.7987573769198447\n",
      "Iter 156 / 2000, Loss: 478857.5680147059, CrossEntropy: 0.0033304535318166018, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.499500036239624\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with 100 samples -> Loss: 6488626.0, CrossEntropy: 1.0176640748977661, Accuracy: 0.7989965640063954\n",
      "Iter 157 / 2000, Loss: 464902.3610933504, CrossEntropy: 0.003117215819656849, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.444722175598145\n",
      "EVALUATION with 100 samples -> Loss: 6481398.5, CrossEntropy: 1.0154995918273926, Accuracy: 0.7993767736234565\n",
      "Iter 158 / 2000, Loss: 464238.58288043475, CrossEntropy: 0.0031184619292616844, Accuracy: 0.9999975023976982\n",
      "Elapsed time for the training: 58.55346393585205\n",
      "EVALUATION with 100 samples -> Loss: 6523387.5, CrossEntropy: 1.032336950302124, Accuracy: 0.7985931968441576\n",
      "Iter 159 / 2000, Loss: 459422.2555147059, CrossEntropy: 0.0030574544798582792, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.302059173583984\n",
      "EVALUATION with 100 samples -> Loss: 6493920.5, CrossEntropy: 1.0205528736114502, Accuracy: 0.7993598693587606\n",
      "Iter 160 / 2000, Loss: 462135.01294757036, CrossEntropy: 0.0031121380161494017, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.30063247680664\n",
      "EVALUATION with 100 samples -> Loss: 6521479.5, CrossEntropy: 1.026204228401184, Accuracy: 0.7982829391968724\n",
      "Iter 161 / 2000, Loss: 450850.2027653453, CrossEntropy: 0.002948728622868657, Accuracy: 0.9999975023976982\n",
      "Elapsed time for the training: 58.36465287208557\n",
      "EVALUATION with 100 samples -> Loss: 6536857.5, CrossEntropy: 1.0282057523727417, Accuracy: 0.798225930168929\n",
      "Iter 162 / 2000, Loss: 452102.1537723785, CrossEntropy: 0.002978932810947299, Accuracy: 0.9999950047953964\n",
      "Elapsed time for the training: 58.30616211891174\n",
      "EVALUATION with 100 samples -> Loss: 6575650.0, CrossEntropy: 1.0533920526504517, Accuracy: 0.7966355653101654\n",
      "Iter 163 / 2000, Loss: 444360.82592711, CrossEntropy: 0.002869382733479142, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.41109800338745\n",
      "EVALUATION with 100 samples -> Loss: 6593952.0, CrossEntropy: 1.0380258560180664, Accuracy: 0.7961707266573987\n",
      "Iter 164 / 2000, Loss: 443934.30810421996, CrossEntropy: 0.0028751918580383062, Accuracy: 0.9999825167838875\n",
      "Elapsed time for the training: 58.392205476760864\n",
      "EVALUATION with 100 samples -> Loss: 6586421.0, CrossEntropy: 1.0396454334259033, Accuracy: 0.7983538449280773\n",
      "Iter 165 / 2000, Loss: 432534.8240888747, CrossEntropy: 0.0027090904768556356, Accuracy: 0.9999800191815856\n",
      "Elapsed time for the training: 58.31860947608948\n",
      "EVALUATION with 100 samples -> Loss: 6587414.0, CrossEntropy: 1.044753909111023, Accuracy: 0.7980889134513016\n",
      "Iter 166 / 2000, Loss: 433239.7611892583, CrossEntropy: 0.002733539789915085, Accuracy: 0.9999925071930946\n",
      "Elapsed time for the training: 58.51620936393738\n",
      "EVALUATION with 100 samples -> Loss: 6614823.0, CrossEntropy: 1.0360078811645508, Accuracy: 0.7993718478789733\n",
      "Iter 167 / 2000, Loss: 432295.2595907928, CrossEntropy: 0.0027280179783701897, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.29440116882324\n",
      "EVALUATION with 100 samples -> Loss: 6607583.0, CrossEntropy: 1.0502532720565796, Accuracy: 0.7980591738668433\n",
      "Iter 168 / 2000, Loss: 433626.1815856777, CrossEntropy: 0.0027629355899989605, Accuracy: 0.9999700287723785\n",
      "Elapsed time for the training: 58.33206915855408\n",
      "EVALUATION with 100 samples -> Loss: 6654173.0, CrossEntropy: 1.0421502590179443, Accuracy: 0.7982606414886204\n",
      "Iter 169 / 2000, Loss: 427011.2520780051, CrossEntropy: 0.002680719830095768, Accuracy: 0.9999800191815856\n",
      "Elapsed time for the training: 58.29844260215759\n",
      "EVALUATION with 100 samples -> Loss: 6654866.5, CrossEntropy: 1.0503042936325073, Accuracy: 0.7953571511638166\n",
      "Iter 170 / 2000, Loss: 419217.30786445015, CrossEntropy: 0.002560253022238612, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.10730266571045\n",
      "EVALUATION with 100 samples -> Loss: 6642926.5, CrossEntropy: 1.0435881614685059, Accuracy: 0.7984615710146814\n",
      "Iter 171 / 2000, Loss: 424150.7017263427, CrossEntropy: 0.002652798779308796, Accuracy: 0.9999800191815856\n",
      "Elapsed time for the training: 58.12399506568909\n",
      "EVALUATION with 100 samples -> Loss: 6664207.5, CrossEntropy: 1.051203727722168, Accuracy: 0.7969239958803419\n",
      "Iter 172 / 2000, Loss: 417311.95124680304, CrossEntropy: 0.002554877195507288, Accuracy: 0.9999850143861893\n",
      "Elapsed time for the training: 58.113893270492554\n",
      "EVALUATION with 100 samples -> Loss: 6710355.5, CrossEntropy: 1.0641084909439087, Accuracy: 0.796647622746861\n",
      "Iter 173 / 2000, Loss: 412993.20132672635, CrossEntropy: 0.0024983487091958523, Accuracy: 0.9999800191815856\n",
      "Elapsed time for the training: 58.20714282989502\n",
      "EVALUATION with 100 samples -> Loss: 6635024.0, CrossEntropy: 1.0438392162322998, Accuracy: 0.7992671525324944\n",
      "Iter 174 / 2000, Loss: 407547.3828324808, CrossEntropy: 0.002426696941256523, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.270668745040894\n",
      "EVALUATION with 100 samples -> Loss: 6661426.0, CrossEntropy: 1.0483293533325195, Accuracy: 0.798719104108899\n",
      "Iter 175 / 2000, Loss: 407662.4929667519, CrossEntropy: 0.00244144513271749, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.18084239959717\n",
      "EVALUATION with 100 samples -> Loss: 6662088.5, CrossEntropy: 1.0529251098632812, Accuracy: 0.7977666206069012\n",
      "Iter 176 / 2000, Loss: 401386.09702685423, CrossEntropy: 0.0023553583305329084, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.12771487236023\n",
      "EVALUATION with 100 samples -> Loss: 6705094.0, CrossEntropy: 1.0614196062088013, Accuracy: 0.796213080541416\n",
      "Iter 177 / 2000, Loss: 400577.4099264706, CrossEntropy: 0.0023566274903714657, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.12447929382324\n",
      "EVALUATION with 100 samples -> Loss: 6689425.0, CrossEntropy: 1.0467575788497925, Accuracy: 0.7997412711136934\n",
      "Iter 178 / 2000, Loss: 396770.7332960358, CrossEntropy: 0.0023082292173057795, Accuracy: 0.9999950047953964\n",
      "Elapsed time for the training: 58.03656554222107\n",
      "EVALUATION with 100 samples -> Loss: 6693241.0, CrossEntropy: 1.0531409978866577, Accuracy: 0.7989525028828552\n",
      "Iter 179 / 2000, Loss: 388885.78956202045, CrossEntropy: 0.0021957289427518845, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.09739351272583\n",
      "EVALUATION with 100 samples -> Loss: 6712085.5, CrossEntropy: 1.0577999353408813, Accuracy: 0.7982220389493574\n",
      "Iter 180 / 2000, Loss: 394688.4276694373, CrossEntropy: 0.0023003669921308756, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.995055198669434\n",
      "EVALUATION with 100 samples -> Loss: 6694391.0, CrossEntropy: 1.0475963354110718, Accuracy: 0.7987320183637339\n",
      "Iter 181 / 2000, Loss: 390175.1235613811, CrossEntropy: 0.0022364577744156122, Accuracy: 0.9999975023976982\n",
      "Elapsed time for the training: 58.07101845741272\n",
      "EVALUATION with 100 samples -> Loss: 6739273.5, CrossEntropy: 1.0681129693984985, Accuracy: 0.7981943691008716\n",
      "Iter 182 / 2000, Loss: 384801.0666560102, CrossEntropy: 0.0021674393210560083, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.98792839050293\n",
      "EVALUATION with 100 samples -> Loss: 6749076.5, CrossEntropy: 1.0646426677703857, Accuracy: 0.7983787279352724\n",
      "Iter 183 / 2000, Loss: 380407.02221867006, CrossEntropy: 0.0021048826165497303, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.01996898651123\n",
      "EVALUATION with 100 samples -> Loss: 6739372.5, CrossEntropy: 1.0598628520965576, Accuracy: 0.8003038451642345\n",
      "Iter 184 / 2000, Loss: 377965.941576087, CrossEntropy: 0.0020841953810304403, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.03399038314819\n",
      "EVALUATION with 100 samples -> Loss: 6721600.5, CrossEntropy: 1.0643984079360962, Accuracy: 0.7974163361597348\n",
      "Iter 185 / 2000, Loss: 377969.64817774936, CrossEntropy: 0.002090563764795661, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.990052938461304\n",
      "EVALUATION with 100 samples -> Loss: 6775207.0, CrossEntropy: 1.081099271774292, Accuracy: 0.7962323294887966\n",
      "Iter 186 / 2000, Loss: 384095.1254795396, CrossEntropy: 0.0021989436354488134, Accuracy: 0.9999850143861893\n",
      "Elapsed time for the training: 58.16552734375\n",
      "EVALUATION with 100 samples -> Loss: 6758169.0, CrossEntropy: 1.0570557117462158, Accuracy: 0.7999434301662127\n",
      "Iter 187 / 2000, Loss: 373504.80626598466, CrossEntropy: 0.0020424951799213886, Accuracy: 0.999987511988491\n",
      "Elapsed time for the training: 58.07539200782776\n",
      "EVALUATION with 100 samples -> Loss: 6777461.5, CrossEntropy: 1.0648308992385864, Accuracy: 0.7988751670008771\n",
      "Iter 188 / 2000, Loss: 369719.6628836317, CrossEntropy: 0.001995019381865859, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.22712516784668\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with 100 samples -> Loss: 6769542.5, CrossEntropy: 1.0784145593643188, Accuracy: 0.7972575232289383\n",
      "Iter 189 / 2000, Loss: 377847.44789002556, CrossEntropy: 0.0021337249781936407, Accuracy: 0.9999800191815856\n",
      "Elapsed time for the training: 58.14092469215393\n",
      "EVALUATION with 100 samples -> Loss: 6829797.5, CrossEntropy: 1.069292426109314, Accuracy: 0.7990790672312219\n",
      "Iter 190 / 2000, Loss: 366860.2978740409, CrossEntropy: 0.0019732241053134203, Accuracy: 0.9999975023976982\n",
      "Elapsed time for the training: 58.18437457084656\n",
      "EVALUATION with 100 samples -> Loss: 6783916.5, CrossEntropy: 1.077628493309021, Accuracy: 0.7980027300716186\n",
      "Iter 191 / 2000, Loss: 370613.63786764705, CrossEntropy: 0.0020473129115998745, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.19317317008972\n",
      "EVALUATION with 100 samples -> Loss: 6818357.0, CrossEntropy: 1.0677694082260132, Accuracy: 0.8002534016698942\n",
      "Iter 192 / 2000, Loss: 363578.89002557547, CrossEntropy: 0.0019473765278235078, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.18488669395447\n",
      "EVALUATION with 100 samples -> Loss: 6829059.5, CrossEntropy: 1.0698537826538086, Accuracy: 0.799820460690619\n",
      "Iter 193 / 2000, Loss: 361250.03436700767, CrossEntropy: 0.001920533599331975, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.216850996017456\n",
      "EVALUATION with 100 samples -> Loss: 6885540.0, CrossEntropy: 1.0909236669540405, Accuracy: 0.7995423647136813\n",
      "Iter 194 / 2000, Loss: 360649.57832480816, CrossEntropy: 0.0019200996030122042, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.204901695251465\n",
      "EVALUATION with 100 samples -> Loss: 6819412.5, CrossEntropy: 1.0729948282241821, Accuracy: 0.7997436258641901\n",
      "Iter 195 / 2000, Loss: 354025.0921515345, CrossEntropy: 0.0018278051866218448, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.190505266189575\n",
      "EVALUATION with 100 samples -> Loss: 6843159.5, CrossEntropy: 1.0936816930770874, Accuracy: 0.7976513696500852\n",
      "Iter 196 / 2000, Loss: 353184.69381393865, CrossEntropy: 0.0018259366042912006, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.27386808395386\n",
      "EVALUATION with 100 samples -> Loss: 6873533.0, CrossEntropy: 1.0925971269607544, Accuracy: 0.7965314992500822\n",
      "Iter 197 / 2000, Loss: 350074.2770140665, CrossEntropy: 0.0017893860349431634, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.154311418533325\n",
      "EVALUATION with 100 samples -> Loss: 6862811.0, CrossEntropy: 1.089916467666626, Accuracy: 0.7966095647454516\n",
      "Iter 198 / 2000, Loss: 352446.0583439898, CrossEntropy: 0.0018355101346969604, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.050217628479004\n",
      "EVALUATION with 100 samples -> Loss: 6860051.5, CrossEntropy: 1.0844687223434448, Accuracy: 0.7981769860109296\n",
      "Iter 199 / 2000, Loss: 347650.5685741688, CrossEntropy: 0.0017750130500644445, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.06059789657593\n",
      "EVALUATION with 100 samples -> Loss: 6919256.5, CrossEntropy: 1.0981584787368774, Accuracy: 0.7970481754339473\n",
      "At iteration 200 we change the dropout rate from 0.1 to 0.2. \n",
      "Iter 200 / 2000, Loss: 347147.6708759591, CrossEntropy: 0.0017749923281371593, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.150752782821655\n",
      "EVALUATION with 100 samples -> Loss: 6867909.0, CrossEntropy: 1.079126238822937, Accuracy: 0.7991538031764266\n",
      "Iter 201 / 2000, Loss: 345868.65369245526, CrossEntropy: 0.0017663947073742747, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.08246207237244\n",
      "EVALUATION with 100 samples -> Loss: 6906912.0, CrossEntropy: 1.0999999046325684, Accuracy: 0.7977549703191317\n",
      "Iter 202 / 2000, Loss: 342646.62492007675, CrossEntropy: 0.001725687412545085, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.97288107872009\n",
      "EVALUATION with 100 samples -> Loss: 6915479.0, CrossEntropy: 1.0907453298568726, Accuracy: 0.7984369149907411\n",
      "Iter 203 / 2000, Loss: 336859.2089194373, CrossEntropy: 0.0016457432648167014, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.02626132965088\n",
      "EVALUATION with 100 samples -> Loss: 6945093.0, CrossEntropy: 1.0896929502487183, Accuracy: 0.7987575422105635\n",
      "Iter 204 / 2000, Loss: 341928.1011029412, CrossEntropy: 0.0017352743307128549, Accuracy: 0.9999850143861893\n",
      "Elapsed time for the training: 57.944620847702026\n",
      "EVALUATION with 100 samples -> Loss: 6886904.0, CrossEntropy: 1.0880213975906372, Accuracy: 0.799006022445401\n",
      "Iter 205 / 2000, Loss: 336873.3966592072, CrossEntropy: 0.001666823634877801, Accuracy: 0.9999900095907929\n",
      "Elapsed time for the training: 57.93493056297302\n",
      "EVALUATION with 100 samples -> Loss: 6918463.5, CrossEntropy: 1.0958291292190552, Accuracy: 0.7971887215067379\n",
      "Iter 206 / 2000, Loss: 333930.4581202046, CrossEntropy: 0.0016318786656484008, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.949655532836914\n",
      "EVALUATION with 100 samples -> Loss: 6927608.0, CrossEntropy: 1.084857702255249, Accuracy: 0.8004659785321899\n",
      "Iter 207 / 2000, Loss: 329839.5250959079, CrossEntropy: 0.0015788645250722766, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.208216190338135\n",
      "EVALUATION with 100 samples -> Loss: 6945120.0, CrossEntropy: 1.0901366472244263, Accuracy: 0.7988948585518101\n",
      "Iter 208 / 2000, Loss: 333122.7827685422, CrossEntropy: 0.0016398571897298098, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.07470083236694\n",
      "EVALUATION with 100 samples -> Loss: 6940371.5, CrossEntropy: 1.0905390977859497, Accuracy: 0.798603519806034\n",
      "Iter 209 / 2000, Loss: 331517.54499680304, CrossEntropy: 0.0016271386994048953, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.117438316345215\n",
      "EVALUATION with 100 samples -> Loss: 6924299.5, CrossEntropy: 1.0952311754226685, Accuracy: 0.7976574800488189\n",
      "Iter 210 / 2000, Loss: 331485.5949888107, CrossEntropy: 0.001639822730794549, Accuracy: 0.9999925071930946\n",
      "Elapsed time for the training: 58.20432639122009\n",
      "EVALUATION with 100 samples -> Loss: 6939174.5, CrossEntropy: 1.105819582939148, Accuracy: 0.7967875596881211\n",
      "Iter 211 / 2000, Loss: 326083.6528132992, CrossEntropy: 0.001563116442412138, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.02621293067932\n",
      "EVALUATION with 100 samples -> Loss: 6957203.5, CrossEntropy: 1.0901545286178589, Accuracy: 0.7992335944921869\n",
      "Iter 212 / 2000, Loss: 320440.7403292839, CrossEntropy: 0.0014850248116999865, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.127511501312256\n",
      "EVALUATION with 100 samples -> Loss: 6931683.0, CrossEntropy: 1.0991723537445068, Accuracy: 0.7985714362661179\n",
      "Iter 213 / 2000, Loss: 319508.45740089513, CrossEntropy: 0.0014810103457421064, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.09368443489075\n",
      "EVALUATION with 100 samples -> Loss: 6946887.5, CrossEntropy: 1.0989192724227905, Accuracy: 0.7981236289889074\n",
      "Iter 214 / 2000, Loss: 322297.29411764705, CrossEntropy: 0.0015343900304287672, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.10164713859558\n",
      "EVALUATION with 100 samples -> Loss: 6962117.0, CrossEntropy: 1.0931953191757202, Accuracy: 0.7985918394783438\n",
      "Iter 215 / 2000, Loss: 322192.98593350383, CrossEntropy: 0.001542990910820663, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.07854390144348\n",
      "EVALUATION with 100 samples -> Loss: 6965158.5, CrossEntropy: 1.1047203540802002, Accuracy: 0.7982071064910435\n",
      "Iter 216 / 2000, Loss: 316417.16456202045, CrossEntropy: 0.0014624259201809764, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.24260640144348\n",
      "EVALUATION with 100 samples -> Loss: 7000341.0, CrossEntropy: 1.1233042478561401, Accuracy: 0.7984714072110459\n",
      "Iter 217 / 2000, Loss: 315697.9640744885, CrossEntropy: 0.0014615922700613737, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.33882427215576\n",
      "EVALUATION with 100 samples -> Loss: 6986963.0, CrossEntropy: 1.096060037612915, Accuracy: 0.799676868807672\n",
      "Iter 218 / 2000, Loss: 311806.95748081844, CrossEntropy: 0.0014118656981736422, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.165873765945435\n",
      "EVALUATION with 100 samples -> Loss: 6958938.0, CrossEntropy: 1.0920350551605225, Accuracy: 0.7999039960719564\n",
      "Iter 219 / 2000, Loss: 315577.46007832483, CrossEntropy: 0.0014809148851782084, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.123162269592285\n",
      "EVALUATION with 100 samples -> Loss: 6979528.5, CrossEntropy: 1.0954525470733643, Accuracy: 0.8007121735977639\n",
      "Iter 220 / 2000, Loss: 309849.0105498721, CrossEntropy: 0.001401329762302339, Accuracy: 0.9999800191815856\n",
      "Elapsed time for the training: 58.082767724990845\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with 100 samples -> Loss: 7025371.0, CrossEntropy: 1.1245992183685303, Accuracy: 0.7982351305307851\n",
      "Iter 221 / 2000, Loss: 311304.18534207164, CrossEntropy: 0.0014333655126392841, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.19491958618164\n",
      "EVALUATION with 100 samples -> Loss: 7039395.0, CrossEntropy: 1.1174269914627075, Accuracy: 0.7977760074550014\n",
      "Iter 222 / 2000, Loss: 310946.28204923274, CrossEntropy: 0.0014372797450050712, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.061601400375366\n",
      "EVALUATION with 100 samples -> Loss: 7009864.5, CrossEntropy: 1.0966970920562744, Accuracy: 0.8000001888345474\n",
      "Iter 223 / 2000, Loss: 310088.2872442455, CrossEntropy: 0.0014339372282847762, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.17270827293396\n",
      "EVALUATION with 100 samples -> Loss: 7025301.0, CrossEntropy: 1.104619026184082, Accuracy: 0.7994443513277164\n",
      "Iter 224 / 2000, Loss: 310639.8497442455, CrossEntropy: 0.0014517903327941895, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.097630739212036\n",
      "EVALUATION with 100 samples -> Loss: 7030586.5, CrossEntropy: 1.1200546026229858, Accuracy: 0.7956784690882336\n",
      "Iter 225 / 2000, Loss: 304552.4344629156, CrossEntropy: 0.0013681757263839245, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.13638353347778\n",
      "EVALUATION with 100 samples -> Loss: 7040404.5, CrossEntropy: 1.107113242149353, Accuracy: 0.7994530796009982\n",
      "Iter 226 / 2000, Loss: 304137.44361413043, CrossEntropy: 0.0013707998441532254, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.081226110458374\n",
      "EVALUATION with 100 samples -> Loss: 7080850.0, CrossEntropy: 1.1072736978530884, Accuracy: 0.8006521807662881\n",
      "Iter 227 / 2000, Loss: 301965.0111892583, CrossEntropy: 0.0013474149163812399, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.20411968231201\n",
      "EVALUATION with 100 samples -> Loss: 7083997.5, CrossEntropy: 1.1225849390029907, Accuracy: 0.7981233490283741\n",
      "Iter 228 / 2000, Loss: 300952.77945172635, CrossEntropy: 0.001343761570751667, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.13388729095459\n",
      "EVALUATION with 100 samples -> Loss: 7054254.5, CrossEntropy: 1.1300033330917358, Accuracy: 0.7961023488919088\n",
      "Iter 229 / 2000, Loss: 300181.26542519184, CrossEntropy: 0.0013395866844803095, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.20062041282654\n",
      "EVALUATION with 100 samples -> Loss: 7077398.0, CrossEntropy: 1.1210782527923584, Accuracy: 0.7989623355920652\n",
      "Iter 230 / 2000, Loss: 296564.94605179026, CrossEntropy: 0.0012912411475554109, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.10702967643738\n",
      "EVALUATION with 100 samples -> Loss: 7112979.5, CrossEntropy: 1.1188595294952393, Accuracy: 0.7992879298113825\n",
      "Iter 231 / 2000, Loss: 295759.13351182867, CrossEntropy: 0.0012881805887445807, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.15968060493469\n",
      "EVALUATION with 100 samples -> Loss: 7037630.5, CrossEntropy: 1.1187419891357422, Accuracy: 0.7976332488946647\n",
      "Iter 232 / 2000, Loss: 296273.09902493603, CrossEntropy: 0.0013068125117570162, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.12852168083191\n",
      "EVALUATION with 100 samples -> Loss: 7078419.0, CrossEntropy: 1.1194549798965454, Accuracy: 0.7976923644976804\n",
      "Iter 233 / 2000, Loss: 292235.66803868284, CrossEntropy: 0.0012520226882770658, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.20637106895447\n",
      "EVALUATION with 100 samples -> Loss: 7067206.0, CrossEntropy: 1.1131818294525146, Accuracy: 0.7987084299462981\n",
      "Iter 234 / 2000, Loss: 294443.25267742964, CrossEntropy: 0.0012957797152921557, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.260401487350464\n",
      "EVALUATION with 100 samples -> Loss: 7071464.5, CrossEntropy: 1.107914686203003, Accuracy: 0.7990400799149381\n",
      "Iter 235 / 2000, Loss: 292547.13786764705, CrossEntropy: 0.0012785219587385654, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.237504959106445\n",
      "EVALUATION with 100 samples -> Loss: 7068118.0, CrossEntropy: 1.1083935499191284, Accuracy: 0.7996565142640716\n",
      "Iter 236 / 2000, Loss: 290866.2632672634, CrossEntropy: 0.0012594821164384484, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.23014998435974\n",
      "EVALUATION with 100 samples -> Loss: 7080892.0, CrossEntropy: 1.1132022142410278, Accuracy: 0.7999589400787958\n",
      "Iter 237 / 2000, Loss: 289559.05578644504, CrossEntropy: 0.0012494386173784733, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.0866277217865\n",
      "EVALUATION with 100 samples -> Loss: 7092738.0, CrossEntropy: 1.1276544332504272, Accuracy: 0.7988362622646991\n",
      "Iter 238 / 2000, Loss: 287696.2778132992, CrossEntropy: 0.001229443703778088, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.21709871292114\n",
      "EVALUATION with 100 samples -> Loss: 7123388.0, CrossEntropy: 1.113592505455017, Accuracy: 0.8013680767106507\n",
      "Iter 239 / 2000, Loss: 289714.84295076726, CrossEntropy: 0.0012707961723208427, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.343950271606445\n",
      "EVALUATION with 100 samples -> Loss: 7090813.5, CrossEntropy: 1.1094695329666138, Accuracy: 0.799546302793366\n",
      "Iter 240 / 2000, Loss: 285255.3314418159, CrossEntropy: 0.0012104649795219302, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.18868160247803\n",
      "EVALUATION with 100 samples -> Loss: 7126159.5, CrossEntropy: 1.1171894073486328, Accuracy: 0.7981309328764957\n",
      "Iter 241 / 2000, Loss: 281423.1484175192, CrossEntropy: 0.0011652308749035, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.18458318710327\n",
      "EVALUATION with 100 samples -> Loss: 7132652.5, CrossEntropy: 1.138157606124878, Accuracy: 0.7971477332256514\n",
      "Iter 242 / 2000, Loss: 283422.1168877877, CrossEntropy: 0.0012005844619125128, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.1558051109314\n",
      "EVALUATION with 100 samples -> Loss: 7104723.5, CrossEntropy: 1.1105083227157593, Accuracy: 0.801097596699607\n",
      "Iter 243 / 2000, Loss: 282383.2377317775, CrossEntropy: 0.0011934430804103613, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.26913928985596\n",
      "EVALUATION with 100 samples -> Loss: 7148868.0, CrossEntropy: 1.1404308080673218, Accuracy: 0.7957628666454192\n",
      "Iter 244 / 2000, Loss: 283092.18530211, CrossEntropy: 0.0012128432281315327, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.18831706047058\n",
      "EVALUATION with 100 samples -> Loss: 7192494.5, CrossEntropy: 1.1260263919830322, Accuracy: 0.7997938289093428\n",
      "Iter 245 / 2000, Loss: 279863.72130754474, CrossEntropy: 0.0011729864636436105, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.37684607505798\n",
      "EVALUATION with 100 samples -> Loss: 7144207.5, CrossEntropy: 1.1336150169372559, Accuracy: 0.7979832441184317\n",
      "Iter 246 / 2000, Loss: 279535.75635390024, CrossEntropy: 0.0011764756636694074, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.409082651138306\n",
      "EVALUATION with 100 samples -> Loss: 7172047.5, CrossEntropy: 1.1357382535934448, Accuracy: 0.797337381041221\n",
      "Iter 247 / 2000, Loss: 278373.76890185423, CrossEntropy: 0.001168654765933752, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.28931427001953\n",
      "EVALUATION with 100 samples -> Loss: 7165062.0, CrossEntropy: 1.1230967044830322, Accuracy: 0.7986516898436706\n",
      "Iter 248 / 2000, Loss: 275119.4967631074, CrossEntropy: 0.0011257100850343704, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.214781761169434\n",
      "EVALUATION with 100 samples -> Loss: 7176623.0, CrossEntropy: 1.1347733736038208, Accuracy: 0.7968029816141825\n",
      "Iter 249 / 2000, Loss: 273958.52046035806, CrossEntropy: 0.0011155412066727877, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.177050828933716\n",
      "EVALUATION with 100 samples -> Loss: 7162848.5, CrossEntropy: 1.120680809020996, Accuracy: 0.7988235189656225\n",
      "Iter 250 / 2000, Loss: 272701.8369964834, CrossEntropy: 0.0011059594107791781, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.35072898864746\n",
      "EVALUATION with 100 samples -> Loss: 7161736.5, CrossEntropy: 1.121052622795105, Accuracy: 0.7996517079762412\n",
      "Iter 251 / 2000, Loss: 276001.68562180304, CrossEntropy: 0.0011846759589388967, Accuracy: 0.9999680306905371\n",
      "Elapsed time for the training: 58.15713405609131\n",
      "EVALUATION with 100 samples -> Loss: 7232241.5, CrossEntropy: 1.1349642276763916, Accuracy: 0.7980406575780354\n",
      "Iter 252 / 2000, Loss: 272823.89522058825, CrossEntropy: 0.0011257112491875887, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.184149742126465\n",
      "EVALUATION with 100 samples -> Loss: 7205953.0, CrossEntropy: 1.128746747970581, Accuracy: 0.7994540512491041\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Iter 253 / 2000, Loss: 269894.2599904092, CrossEntropy: 0.0010901211062446237, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.21337652206421\n",
      "EVALUATION with 100 samples -> Loss: 7169361.0, CrossEntropy: 1.1290453672409058, Accuracy: 0.7969571305509933\n",
      "Iter 254 / 2000, Loss: 269702.33467870846, CrossEntropy: 0.001094592153094709, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.303600788116455\n",
      "EVALUATION with 100 samples -> Loss: 7214204.0, CrossEntropy: 1.1370654106140137, Accuracy: 0.7984708478947161\n",
      "Iter 255 / 2000, Loss: 266856.387547954, CrossEntropy: 0.0010591300670057535, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.26611948013306\n",
      "EVALUATION with 100 samples -> Loss: 7210840.5, CrossEntropy: 1.1429322957992554, Accuracy: 0.7962541249710384\n",
      "Iter 256 / 2000, Loss: 265971.2320172634, CrossEntropy: 0.001056901877745986, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.16826033592224\n",
      "EVALUATION with 100 samples -> Loss: 7204186.0, CrossEntropy: 1.1426388025283813, Accuracy: 0.7970875970937019\n",
      "Iter 257 / 2000, Loss: 266504.0099904092, CrossEntropy: 0.0010711492504924536, Accuracy: 0.9999950047953964\n",
      "Elapsed time for the training: 58.33284378051758\n",
      "EVALUATION with 100 samples -> Loss: 7206533.0, CrossEntropy: 1.1491806507110596, Accuracy: 0.7947764634586063\n",
      "Iter 258 / 2000, Loss: 266870.3841112532, CrossEntropy: 0.0010866947704926133, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.19569230079651\n",
      "EVALUATION with 100 samples -> Loss: 7235131.5, CrossEntropy: 1.1567466259002686, Accuracy: 0.7959894458249014\n",
      "Iter 259 / 2000, Loss: 264130.084398977, CrossEntropy: 0.0010515509638935328, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.12486505508423\n",
      "EVALUATION with 100 samples -> Loss: 7244088.0, CrossEntropy: 1.1334232091903687, Accuracy: 0.7992326670336396\n",
      "Iter 260 / 2000, Loss: 266344.24144820974, CrossEntropy: 0.0010947687551379204, Accuracy: 0.9999975023976982\n",
      "Elapsed time for the training: 58.13833713531494\n",
      "EVALUATION with 100 samples -> Loss: 7229922.0, CrossEntropy: 1.1462002992630005, Accuracy: 0.7977540610850054\n",
      "Iter 261 / 2000, Loss: 262861.7782928389, CrossEntropy: 0.0010516024194657803, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.249542474746704\n",
      "EVALUATION with 100 samples -> Loss: 7227763.0, CrossEntropy: 1.1423547267913818, Accuracy: 0.7993356207437806\n",
      "Iter 262 / 2000, Loss: 262351.30518702045, CrossEntropy: 0.0010513014858588576, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.14286518096924\n",
      "EVALUATION with 100 samples -> Loss: 7260853.5, CrossEntropy: 1.142043948173523, Accuracy: 0.7973476116672691\n",
      "Iter 263 / 2000, Loss: 260234.05203005116, CrossEntropy: 0.0010258712572976947, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.07449913024902\n",
      "EVALUATION with 100 samples -> Loss: 7250771.5, CrossEntropy: 1.1483315229415894, Accuracy: 0.7979750961873218\n",
      "Iter 264 / 2000, Loss: 261237.12256234014, CrossEntropy: 0.0010507560800760984, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.18406796455383\n",
      "EVALUATION with 100 samples -> Loss: 7268372.5, CrossEntropy: 1.1687424182891846, Accuracy: 0.797493942315574\n",
      "Iter 265 / 2000, Loss: 256926.54064098466, CrossEntropy: 0.0009959954768419266, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.134159564971924\n",
      "EVALUATION with 100 samples -> Loss: 7243446.5, CrossEntropy: 1.1430995464324951, Accuracy: 0.7967077660712907\n",
      "Iter 266 / 2000, Loss: 258721.66420236573, CrossEntropy: 0.001028083497658372, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.253376483917236\n",
      "EVALUATION with 100 samples -> Loss: 7273418.5, CrossEntropy: 1.168466329574585, Accuracy: 0.7940470177392995\n",
      "Iter 267 / 2000, Loss: 258384.9330642583, CrossEntropy: 0.0010315513936802745, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.11904716491699\n",
      "EVALUATION with 100 samples -> Loss: 7259305.5, CrossEntropy: 1.1458653211593628, Accuracy: 0.7977934160006449\n",
      "Iter 268 / 2000, Loss: 254908.17818893862, CrossEntropy: 0.0009872214868664742, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.454227685928345\n",
      "EVALUATION with 100 samples -> Loss: 7250253.0, CrossEntropy: 1.1371839046478271, Accuracy: 0.7975029657697621\n",
      "Iter 269 / 2000, Loss: 251751.6854219949, CrossEntropy: 0.0009455772815272212, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.4513943195343\n",
      "EVALUATION with 100 samples -> Loss: 7277906.0, CrossEntropy: 1.1627966165542603, Accuracy: 0.7961381600197207\n",
      "Iter 270 / 2000, Loss: 253694.60845588235, CrossEntropy: 0.0009832310024648905, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.51138472557068\n",
      "EVALUATION with 100 samples -> Loss: 7289278.0, CrossEntropy: 1.1445015668869019, Accuracy: 0.7984839834514542\n",
      "Iter 271 / 2000, Loss: 256278.39893702045, CrossEntropy: 0.0010337663115933537, Accuracy: 0.9999975023976982\n",
      "Elapsed time for the training: 58.54967927932739\n",
      "EVALUATION with 100 samples -> Loss: 7250340.0, CrossEntropy: 1.139642596244812, Accuracy: 0.7992056199599573\n",
      "Iter 272 / 2000, Loss: 254588.75995044757, CrossEntropy: 0.0010139872319996357, Accuracy: 0.9999950047953964\n",
      "Elapsed time for the training: 58.58450508117676\n",
      "EVALUATION with 100 samples -> Loss: 7291035.5, CrossEntropy: 1.1704059839248657, Accuracy: 0.7946136407943092\n",
      "Iter 273 / 2000, Loss: 249810.8770380435, CrossEntropy: 0.0009485150803811848, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.52392387390137\n",
      "EVALUATION with 100 samples -> Loss: 7281739.5, CrossEntropy: 1.1592124700546265, Accuracy: 0.7968922927416543\n",
      "Iter 274 / 2000, Loss: 250033.78968190536, CrossEntropy: 0.0009616544703021646, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.62011504173279\n",
      "EVALUATION with 100 samples -> Loss: 7284515.0, CrossEntropy: 1.1655842065811157, Accuracy: 0.7964631158557469\n",
      "Iter 275 / 2000, Loss: 250028.41268382352, CrossEntropy: 0.0009708313154987991, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.62886357307434\n",
      "EVALUATION with 100 samples -> Loss: 7278739.0, CrossEntropy: 1.153698444366455, Accuracy: 0.7949189098049538\n",
      "Iter 276 / 2000, Loss: 250280.50315696932, CrossEntropy: 0.0009810118936002254, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.518118143081665\n",
      "EVALUATION with 100 samples -> Loss: 7290759.5, CrossEntropy: 1.1393704414367676, Accuracy: 0.8001612688767692\n",
      "Iter 277 / 2000, Loss: 245080.07436860615, CrossEntropy: 0.0009069416555576026, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.61031222343445\n",
      "EVALUATION with 100 samples -> Loss: 7299455.5, CrossEntropy: 1.1680207252502441, Accuracy: 0.7981441447710026\n",
      "Iter 278 / 2000, Loss: 245837.23901054988, CrossEntropy: 0.0009277379722334445, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.5850727558136\n",
      "EVALUATION with 100 samples -> Loss: 7326958.5, CrossEntropy: 1.1512925624847412, Accuracy: 0.7985670118600916\n",
      "Iter 279 / 2000, Loss: 244504.13934622763, CrossEntropy: 0.0009148919489234686, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.107226610183716\n",
      "EVALUATION with 100 samples -> Loss: 7305982.5, CrossEntropy: 1.1582733392715454, Accuracy: 0.7978379813688921\n",
      "Iter 280 / 2000, Loss: 243095.57452845268, CrossEntropy: 0.000901001098100096, Accuracy: 1.0\n",
      "Elapsed time for the training: 56.86984634399414\n",
      "EVALUATION with 100 samples -> Loss: 7352495.0, CrossEntropy: 1.1530194282531738, Accuracy: 0.7982021246302013\n",
      "Iter 281 / 2000, Loss: 244910.61237212276, CrossEntropy: 0.0009371606865897775, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.00990962982178\n",
      "EVALUATION with 100 samples -> Loss: 7353560.5, CrossEntropy: 1.1564223766326904, Accuracy: 0.7977108024357034\n",
      "Iter 282 / 2000, Loss: 240716.0053148977, CrossEntropy: 0.0008798675844445825, Accuracy: 1.0\n",
      "Elapsed time for the training: 56.78745794296265\n",
      "EVALUATION with 100 samples -> Loss: 7356907.0, CrossEntropy: 1.1738677024841309, Accuracy: 0.7969179673146999\n",
      "Iter 283 / 2000, Loss: 239772.9705882353, CrossEntropy: 0.0008740593330003321, Accuracy: 1.0\n",
      "Elapsed time for the training: 56.798248291015625\n",
      "EVALUATION with 100 samples -> Loss: 7346033.0, CrossEntropy: 1.1571295261383057, Accuracy: 0.7975570446740202\n",
      "Iter 284 / 2000, Loss: 239540.06349904093, CrossEntropy: 0.0008779177442193031, Accuracy: 1.0\n",
      "Elapsed time for the training: 56.71247482299805\n",
      "EVALUATION with 100 samples -> Loss: 7338949.0, CrossEntropy: 1.1504417657852173, Accuracy: 0.7977097806392242\n",
      "Iter 285 / 2000, Loss: 235567.04919277492, CrossEntropy: 0.0008242221083492041, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.80630874633789\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with 100 samples -> Loss: 7336614.5, CrossEntropy: 1.1534181833267212, Accuracy: 0.8002717082030216\n",
      "Iter 286 / 2000, Loss: 237597.85034367008, CrossEntropy: 0.0008633395773358643, Accuracy: 1.0\n",
      "Elapsed time for the training: 56.7264461517334\n",
      "EVALUATION with 100 samples -> Loss: 7334880.5, CrossEntropy: 1.1588020324707031, Accuracy: 0.7996307976343632\n",
      "Iter 287 / 2000, Loss: 238590.85078324808, CrossEntropy: 0.0008883933769538999, Accuracy: 1.0\n",
      "Elapsed time for the training: 56.84145784378052\n",
      "EVALUATION with 100 samples -> Loss: 7383645.5, CrossEntropy: 1.1918010711669922, Accuracy: 0.795697348240611\n",
      "Iter 288 / 2000, Loss: 238124.47474424553, CrossEntropy: 0.0008882528636604548, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.00797724723816\n",
      "EVALUATION with 100 samples -> Loss: 7363978.0, CrossEntropy: 1.1561782360076904, Accuracy: 0.7996024508997325\n",
      "Iter 289 / 2000, Loss: 239363.381713555, CrossEntropy: 0.0009161423658952117, Accuracy: 1.0\n",
      "Elapsed time for the training: 56.73643517494202\n",
      "EVALUATION with 100 samples -> Loss: 7382563.0, CrossEntropy: 1.1664842367172241, Accuracy: 0.7972024765038402\n",
      "Iter 290 / 2000, Loss: 236286.59111253198, CrossEntropy: 0.0008763353107497096, Accuracy: 1.0\n",
      "Elapsed time for the training: 56.868852853775024\n",
      "EVALUATION with 100 samples -> Loss: 7348049.0, CrossEntropy: 1.161120057106018, Accuracy: 0.7962697182180769\n",
      "Iter 291 / 2000, Loss: 231929.13083439897, CrossEntropy: 0.00081560731632635, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.90157222747803\n",
      "EVALUATION with 100 samples -> Loss: 7325539.0, CrossEntropy: 1.154201865196228, Accuracy: 0.7994558440784485\n",
      "Iter 292 / 2000, Loss: 233253.3501438619, CrossEntropy: 0.0008442596881650388, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.1430926322937\n",
      "EVALUATION with 100 samples -> Loss: 7388658.5, CrossEntropy: 1.1684353351593018, Accuracy: 0.7970077324053196\n",
      "Iter 293 / 2000, Loss: 232144.21771099744, CrossEntropy: 0.000834164151456207, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.019044399261475\n",
      "EVALUATION with 100 samples -> Loss: 7350208.0, CrossEntropy: 1.158815860748291, Accuracy: 0.7990353006248303\n",
      "Iter 294 / 2000, Loss: 234071.0445971867, CrossEntropy: 0.0008726821979507804, Accuracy: 0.9999800191815856\n",
      "Elapsed time for the training: 57.99877953529358\n",
      "EVALUATION with 100 samples -> Loss: 7365712.5, CrossEntropy: 1.1702790260314941, Accuracy: 0.798243223742343\n",
      "Iter 295 / 2000, Loss: 233591.03172953965, CrossEntropy: 0.0008720593177713454, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.21305823326111\n",
      "EVALUATION with 100 samples -> Loss: 7375951.5, CrossEntropy: 1.2147868871688843, Accuracy: 0.7975510241817035\n",
      "Iter 296 / 2000, Loss: 229962.62212276214, CrossEntropy: 0.0008249391685239971, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.127445697784424\n",
      "EVALUATION with 100 samples -> Loss: 7341804.5, CrossEntropy: 1.148379921913147, Accuracy: 0.8005388322226095\n",
      "Iter 297 / 2000, Loss: 229377.5802829284, CrossEntropy: 0.0008221333846449852, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.08299136161804\n",
      "EVALUATION with 100 samples -> Loss: 7368584.5, CrossEntropy: 1.1675950288772583, Accuracy: 0.7980476261814613\n",
      "Iter 298 / 2000, Loss: 230106.17707001278, CrossEntropy: 0.0008408831781707704, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.19168305397034\n",
      "EVALUATION with 100 samples -> Loss: 7355877.5, CrossEntropy: 1.182390809059143, Accuracy: 0.7955535510710483\n",
      "Iter 299 / 2000, Loss: 226746.82364929668, CrossEntropy: 0.0007956107147037983, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.95831489562988\n",
      "EVALUATION with 100 samples -> Loss: 7413466.0, CrossEntropy: 1.167606234550476, Accuracy: 0.7981443468383675\n",
      "At iteration 300 we change the dropout rate from 0.1 to 0.2. \n",
      "Iter 300 / 2000, Loss: 225720.24808184142, CrossEntropy: 0.000787281256634742, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.1534104347229\n",
      "EVALUATION with 100 samples -> Loss: 7381759.5, CrossEntropy: 1.1725538969039917, Accuracy: 0.7967620616024201\n",
      "Iter 301 / 2000, Loss: 231195.6985693734, CrossEntropy: 0.0008819968206807971, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.318074464797974\n",
      "EVALUATION with 100 samples -> Loss: 7385358.0, CrossEntropy: 1.175958514213562, Accuracy: 0.7956332106593499\n",
      "Iter 302 / 2000, Loss: 224479.20911924553, CrossEntropy: 0.0007832772680558264, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.28775930404663\n",
      "EVALUATION with 100 samples -> Loss: 7385185.5, CrossEntropy: 1.1720795631408691, Accuracy: 0.7985460456689693\n",
      "Iter 303 / 2000, Loss: 225828.76015025575, CrossEntropy: 0.0008129292400553823, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.361658811569214\n",
      "EVALUATION with 100 samples -> Loss: 7421796.0, CrossEntropy: 1.166526436805725, Accuracy: 0.7989566362593069\n",
      "Iter 304 / 2000, Loss: 222600.64130434784, CrossEntropy: 0.0007690086495131254, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.13055229187012\n",
      "EVALUATION with 100 samples -> Loss: 7418650.0, CrossEntropy: 1.1691460609436035, Accuracy: 0.7997744364747085\n",
      "Iter 305 / 2000, Loss: 225264.8481857417, CrossEntropy: 0.0008179252617992461, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.19743371009827\n",
      "EVALUATION with 100 samples -> Loss: 7451525.0, CrossEntropy: 1.181631088256836, Accuracy: 0.7981598553786357\n",
      "Iter 306 / 2000, Loss: 223242.95851982097, CrossEntropy: 0.0007938655908219516, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.29604768753052\n",
      "EVALUATION with 100 samples -> Loss: 7430377.5, CrossEntropy: 1.1642574071884155, Accuracy: 0.7986406380777099\n",
      "Iter 307 / 2000, Loss: 222145.1420236573, CrossEntropy: 0.0007852093549445271, Accuracy: 0.9999850143861893\n",
      "Elapsed time for the training: 58.24758696556091\n",
      "EVALUATION with 100 samples -> Loss: 7440943.0, CrossEntropy: 1.173093557357788, Accuracy: 0.7981219384956041\n",
      "Iter 308 / 2000, Loss: 222364.96867007672, CrossEntropy: 0.0007948487182147801, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.11207842826843\n",
      "EVALUATION with 100 samples -> Loss: 7435618.0, CrossEntropy: 1.1787134408950806, Accuracy: 0.798379120774794\n",
      "Iter 309 / 2000, Loss: 219210.33523817136, CrossEntropy: 0.0007532616145908833, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.19279932975769\n",
      "EVALUATION with 100 samples -> Loss: 7454508.0, CrossEntropy: 1.1785542964935303, Accuracy: 0.7970280356257081\n",
      "Iter 310 / 2000, Loss: 220369.6805466752, CrossEntropy: 0.0007783120963722467, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.1099271774292\n",
      "EVALUATION with 100 samples -> Loss: 7449682.0, CrossEntropy: 1.175026297569275, Accuracy: 0.7963173880311233\n",
      "Iter 311 / 2000, Loss: 218344.8879076087, CrossEntropy: 0.0007548092980869114, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.136263370513916\n",
      "EVALUATION with 100 samples -> Loss: 7460222.0, CrossEntropy: 1.1694973707199097, Accuracy: 0.7987758753425498\n",
      "Iter 312 / 2000, Loss: 214930.6336716752, CrossEntropy: 0.0007092341547831893, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.17117619514465\n",
      "EVALUATION with 100 samples -> Loss: 7420908.0, CrossEntropy: 1.1768648624420166, Accuracy: 0.7963577451596288\n",
      "Iter 313 / 2000, Loss: 217782.889745844, CrossEntropy: 0.0007601399556733668, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.10423827171326\n",
      "EVALUATION with 100 samples -> Loss: 7469421.5, CrossEntropy: 1.1753418445587158, Accuracy: 0.7973430043537417\n",
      "Iter 314 / 2000, Loss: 217221.899976023, CrossEntropy: 0.0007595689385198057, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.08236050605774\n",
      "EVALUATION with 100 samples -> Loss: 7460066.0, CrossEntropy: 1.1917402744293213, Accuracy: 0.7978751862535597\n",
      "Iter 315 / 2000, Loss: 215446.03272858055, CrossEntropy: 0.0007386686047539115, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.22307276725769\n",
      "EVALUATION with 100 samples -> Loss: 7461671.0, CrossEntropy: 1.1672155857086182, Accuracy: 0.7990973389355063\n",
      "Iter 316 / 2000, Loss: 214313.64578005116, CrossEntropy: 0.0007274564704857767, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.26942420005798\n",
      "EVALUATION with 100 samples -> Loss: 7483488.0, CrossEntropy: 1.183245062828064, Accuracy: 0.7988275000188693\n",
      "Iter 317 / 2000, Loss: 214057.53952205883, CrossEntropy: 0.0007308992207981646, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.13977861404419\n",
      "EVALUATION with 100 samples -> Loss: 7441730.0, CrossEntropy: 1.171978235244751, Accuracy: 0.7995245062625087\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Iter 318 / 2000, Loss: 214358.1204443734, CrossEntropy: 0.0007424322539009154, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.123645305633545\n",
      "EVALUATION with 100 samples -> Loss: 7478004.0, CrossEntropy: 1.1819796562194824, Accuracy: 0.7975246945322955\n",
      "Iter 319 / 2000, Loss: 215182.41564098466, CrossEntropy: 0.0007634739158675075, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.15389132499695\n",
      "EVALUATION with 100 samples -> Loss: 7448111.0, CrossEntropy: 1.1663265228271484, Accuracy: 0.7988525506450341\n",
      "Iter 320 / 2000, Loss: 215289.94653132992, CrossEntropy: 0.0007721134461462498, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.20931673049927\n",
      "EVALUATION with 100 samples -> Loss: 7475535.5, CrossEntropy: 1.1737321615219116, Accuracy: 0.7981567048487427\n",
      "Iter 321 / 2000, Loss: 212801.7940377238, CrossEntropy: 0.0007398377056233585, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.26417899131775\n",
      "EVALUATION with 100 samples -> Loss: 7496275.0, CrossEntropy: 1.1868207454681396, Accuracy: 0.7976563769938997\n",
      "Iter 322 / 2000, Loss: 210163.67563139385, CrossEntropy: 0.0007052939618006349, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.317551136016846\n",
      "EVALUATION with 100 samples -> Loss: 7493734.5, CrossEntropy: 1.1776196956634521, Accuracy: 0.7995801203909823\n",
      "Iter 323 / 2000, Loss: 212733.40664961637, CrossEntropy: 0.0007539729704149067, Accuracy: 0.9999950047953964\n",
      "Elapsed time for the training: 58.14554762840271\n",
      "EVALUATION with 100 samples -> Loss: 7515752.0, CrossEntropy: 1.1956011056900024, Accuracy: 0.7963272585635314\n",
      "Iter 324 / 2000, Loss: 210157.17966751917, CrossEntropy: 0.0007213009521365166, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.001187562942505\n",
      "EVALUATION with 100 samples -> Loss: 7553633.5, CrossEntropy: 1.191640019416809, Accuracy: 0.798356351148003\n",
      "Iter 325 / 2000, Loss: 211383.03768382352, CrossEntropy: 0.0007516740006394684, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.10138416290283\n",
      "EVALUATION with 100 samples -> Loss: 7508797.0, CrossEntropy: 1.186898112297058, Accuracy: 0.7983866723164179\n",
      "Iter 326 / 2000, Loss: 208855.48833120204, CrossEntropy: 0.0007129416917450726, Accuracy: 0.9999950047953964\n",
      "Elapsed time for the training: 58.117605447769165\n",
      "EVALUATION with 100 samples -> Loss: 7503827.5, CrossEntropy: 1.1840436458587646, Accuracy: 0.8000178721401985\n",
      "Iter 327 / 2000, Loss: 208276.639745844, CrossEntropy: 0.000712319859303534, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.050254821777344\n",
      "EVALUATION with 100 samples -> Loss: 7490876.0, CrossEntropy: 1.1752581596374512, Accuracy: 0.7979736505162693\n",
      "Iter 328 / 2000, Loss: 207668.29567615088, CrossEntropy: 0.0007096167537383735, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.126795053482056\n",
      "EVALUATION with 100 samples -> Loss: 7508031.5, CrossEntropy: 1.1859270334243774, Accuracy: 0.7972909579258135\n",
      "Iter 329 / 2000, Loss: 205968.97458439897, CrossEntropy: 0.0006892414530739188, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.95385432243347\n",
      "EVALUATION with 100 samples -> Loss: 7531070.5, CrossEntropy: 1.1808003187179565, Accuracy: 0.797318891588556\n",
      "Iter 330 / 2000, Loss: 208520.91711956522, CrossEntropy: 0.0007355354027822614, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.19778871536255\n",
      "EVALUATION with 100 samples -> Loss: 7520554.5, CrossEntropy: 1.1977909803390503, Accuracy: 0.7982533919741265\n",
      "Iter 331 / 2000, Loss: 206671.08000319693, CrossEntropy: 0.0007136217318475246, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.278642892837524\n",
      "EVALUATION with 100 samples -> Loss: 7519010.0, CrossEntropy: 1.175912618637085, Accuracy: 0.80004043735712\n",
      "Iter 332 / 2000, Loss: 204035.90349264705, CrossEntropy: 0.0006807614117860794, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.318105697631836\n",
      "EVALUATION with 100 samples -> Loss: 7538922.5, CrossEntropy: 1.2106988430023193, Accuracy: 0.7962036314399887\n",
      "Iter 333 / 2000, Loss: 203800.70620204604, CrossEntropy: 0.0006828638724982738, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.315929651260376\n",
      "EVALUATION with 100 samples -> Loss: 7533962.5, CrossEntropy: 1.1865777969360352, Accuracy: 0.7990577996711868\n",
      "Iter 334 / 2000, Loss: 204220.97746163682, CrossEntropy: 0.000696595583576709, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.16521430015564\n",
      "EVALUATION with 100 samples -> Loss: 7522216.0, CrossEntropy: 1.1989086866378784, Accuracy: 0.7988292574920048\n",
      "Iter 335 / 2000, Loss: 203232.13542998722, CrossEntropy: 0.0006872305530123413, Accuracy: 0.9999975023976982\n",
      "Elapsed time for the training: 58.32922911643982\n",
      "EVALUATION with 100 samples -> Loss: 7548812.5, CrossEntropy: 1.1917940378189087, Accuracy: 0.7972573445029055\n",
      "Iter 336 / 2000, Loss: 202367.1405051151, CrossEntropy: 0.0006803198484703898, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.173134565353394\n",
      "EVALUATION with 100 samples -> Loss: 7472687.0, CrossEntropy: 1.179094672203064, Accuracy: 0.7968250209769081\n",
      "Iter 337 / 2000, Loss: 200033.42942774936, CrossEntropy: 0.0006505741621367633, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.30399036407471\n",
      "EVALUATION with 100 samples -> Loss: 7540746.5, CrossEntropy: 1.1852582693099976, Accuracy: 0.7996404062771167\n",
      "Iter 338 / 2000, Loss: 203319.35350063938, CrossEntropy: 0.0007086668047122657, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.24384021759033\n",
      "EVALUATION with 100 samples -> Loss: 7578616.0, CrossEntropy: 1.1958107948303223, Accuracy: 0.7978832906534514\n",
      "Iter 339 / 2000, Loss: 200658.21043797955, CrossEntropy: 0.0006744649726897478, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.108015060424805\n",
      "EVALUATION with 100 samples -> Loss: 7549808.0, CrossEntropy: 1.197746753692627, Accuracy: 0.8000382470997142\n",
      "Iter 340 / 2000, Loss: 199780.664282289, CrossEntropy: 0.000668487511575222, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.176639556884766\n",
      "EVALUATION with 100 samples -> Loss: 7544534.0, CrossEntropy: 1.1860476732254028, Accuracy: 0.8002619726754011\n",
      "Iter 341 / 2000, Loss: 197525.5775655371, CrossEntropy: 0.0006379039841704071, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.137080669403076\n",
      "EVALUATION with 100 samples -> Loss: 7512697.5, CrossEntropy: 1.180473804473877, Accuracy: 0.7998543288447114\n",
      "Iter 342 / 2000, Loss: 197496.3477461637, CrossEntropy: 0.0006449812208302319, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.318257570266724\n",
      "EVALUATION with 100 samples -> Loss: 7541832.5, CrossEntropy: 1.1895463466644287, Accuracy: 0.7973313910325891\n",
      "Iter 343 / 2000, Loss: 194088.03768382352, CrossEntropy: 0.0005984042072668672, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.208598136901855\n",
      "EVALUATION with 100 samples -> Loss: 7546840.5, CrossEntropy: 1.180215835571289, Accuracy: 0.7996185175047323\n",
      "Iter 344 / 2000, Loss: 197395.86660805627, CrossEntropy: 0.0006562311900779605, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.07242679595947\n",
      "EVALUATION with 100 samples -> Loss: 7559088.0, CrossEntropy: 1.1854932308197021, Accuracy: 0.7994059973405686\n",
      "Iter 345 / 2000, Loss: 194607.77445652173, CrossEntropy: 0.00061960075981915, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.156429052352905\n",
      "EVALUATION with 100 samples -> Loss: 7511788.5, CrossEntropy: 1.1837338209152222, Accuracy: 0.7973140352415877\n",
      "Iter 346 / 2000, Loss: 195129.51990089513, CrossEntropy: 0.0006333880010060966, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.156062602996826\n",
      "EVALUATION with 100 samples -> Loss: 7569975.5, CrossEntropy: 1.1983044147491455, Accuracy: 0.7984522347523859\n",
      "Iter 347 / 2000, Loss: 195169.60465952684, CrossEntropy: 0.0006404381128959358, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.18549346923828\n",
      "EVALUATION with 100 samples -> Loss: 7570502.0, CrossEntropy: 1.191935658454895, Accuracy: 0.7979539674204733\n",
      "Iter 348 / 2000, Loss: 196224.672754156, CrossEntropy: 0.000663680664729327, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.05703830718994\n",
      "EVALUATION with 100 samples -> Loss: 7549979.0, CrossEntropy: 1.1972752809524536, Accuracy: 0.7963439289839267\n",
      "Iter 349 / 2000, Loss: 192451.65173433503, CrossEntropy: 0.0006123476778157055, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.126625299453735\n",
      "EVALUATION with 100 samples -> Loss: 7547441.5, CrossEntropy: 1.1975898742675781, Accuracy: 0.7979499168167544\n",
      "Iter 350 / 2000, Loss: 193835.9758232097, CrossEntropy: 0.0006399275735020638, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.115620613098145\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with 100 samples -> Loss: 7548865.0, CrossEntropy: 1.191843867301941, Accuracy: 0.7964525714088978\n",
      "Iter 351 / 2000, Loss: 193052.4448929028, CrossEntropy: 0.0006342227570712566, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.279136419296265\n",
      "EVALUATION with 100 samples -> Loss: 7637479.0, CrossEntropy: 1.2059650421142578, Accuracy: 0.7970451926885712\n",
      "Iter 352 / 2000, Loss: 191119.93757992328, CrossEntropy: 0.0006100808968767524, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.21473240852356\n",
      "EVALUATION with 100 samples -> Loss: 7592088.5, CrossEntropy: 1.2003753185272217, Accuracy: 0.7987856542068723\n",
      "Iter 353 / 2000, Loss: 193542.04311860615, CrossEntropy: 0.0006542772753164172, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.99283504486084\n",
      "EVALUATION with 100 samples -> Loss: 7626558.0, CrossEntropy: 1.1946972608566284, Accuracy: 0.7982129366175936\n",
      "Iter 354 / 2000, Loss: 191699.20340473147, CrossEntropy: 0.0006307719158940017, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.16055703163147\n",
      "EVALUATION with 100 samples -> Loss: 7635735.0, CrossEntropy: 1.2072471380233765, Accuracy: 0.7978452161920967\n",
      "Iter 355 / 2000, Loss: 189364.70556265986, CrossEntropy: 0.0006016406114213169, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.2147433757782\n",
      "EVALUATION with 100 samples -> Loss: 7601636.5, CrossEntropy: 1.212266206741333, Accuracy: 0.7969044644865991\n",
      "Iter 356 / 2000, Loss: 190554.4039721867, CrossEntropy: 0.0006262508686631918, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.16263484954834\n",
      "EVALUATION with 100 samples -> Loss: 7594322.0, CrossEntropy: 1.2009520530700684, Accuracy: 0.7989435415896383\n",
      "Iter 357 / 2000, Loss: 190794.00447570332, CrossEntropy: 0.0006361441919580102, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.21990942955017\n",
      "EVALUATION with 100 samples -> Loss: 7588878.0, CrossEntropy: 1.1880258321762085, Accuracy: 0.8000552654505524\n",
      "Iter 358 / 2000, Loss: 188036.68046675192, CrossEntropy: 0.0005994117236696184, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.07945418357849\n",
      "EVALUATION with 100 samples -> Loss: 7604763.5, CrossEntropy: 1.2057621479034424, Accuracy: 0.7979383468628424\n",
      "Iter 359 / 2000, Loss: 190089.77265824808, CrossEntropy: 0.0006377081735990942, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.185243368148804\n",
      "EVALUATION with 100 samples -> Loss: 7585775.0, CrossEntropy: 1.204647183418274, Accuracy: 0.7990291696515713\n",
      "Iter 360 / 2000, Loss: 187548.33927429668, CrossEntropy: 0.0006039230502210557, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.39147639274597\n",
      "EVALUATION with 100 samples -> Loss: 7616913.0, CrossEntropy: 1.197281002998352, Accuracy: 0.7979686610192812\n",
      "Iter 361 / 2000, Loss: 186774.26446611254, CrossEntropy: 0.0005986256874166429, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.238081216812134\n",
      "EVALUATION with 100 samples -> Loss: 7619232.0, CrossEntropy: 1.2051278352737427, Accuracy: 0.7994536351164172\n",
      "Iter 362 / 2000, Loss: 185650.4412963555, CrossEntropy: 0.0005869152955710888, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.509299993515015\n",
      "EVALUATION with 100 samples -> Loss: 7611532.5, CrossEntropy: 1.1895570755004883, Accuracy: 0.8011017056634832\n",
      "Iter 363 / 2000, Loss: 185457.98021898977, CrossEntropy: 0.0005912458873353899, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.98202061653137\n",
      "EVALUATION with 100 samples -> Loss: 7622341.0, CrossEntropy: 1.2222633361816406, Accuracy: 0.7976308982451593\n",
      "Iter 364 / 2000, Loss: 184149.46063778774, CrossEntropy: 0.0005772310541942716, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.029494524002075\n",
      "EVALUATION with 100 samples -> Loss: 7619833.0, CrossEntropy: 1.2179707288742065, Accuracy: 0.7993659781416609\n",
      "Iter 365 / 2000, Loss: 185025.29359814577, CrossEntropy: 0.0005959373083896935, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.181804180145264\n",
      "EVALUATION with 100 samples -> Loss: 7628008.5, CrossEntropy: 1.2021112442016602, Accuracy: 0.7978200699152371\n",
      "Iter 366 / 2000, Loss: 184701.9480099105, CrossEntropy: 0.0005975685198791325, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.136699199676514\n",
      "EVALUATION with 100 samples -> Loss: 7642874.5, CrossEntropy: 1.2106736898422241, Accuracy: 0.8003262995722892\n",
      "Iter 367 / 2000, Loss: 184288.95208599744, CrossEntropy: 0.0005968523910269141, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.22894263267517\n",
      "EVALUATION with 100 samples -> Loss: 7660153.5, CrossEntropy: 1.2196698188781738, Accuracy: 0.797894731998626\n",
      "Iter 368 / 2000, Loss: 182330.0920316496, CrossEntropy: 0.0005716752493754029, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.15236139297485\n",
      "EVALUATION with 100 samples -> Loss: 7658583.0, CrossEntropy: 1.2079670429229736, Accuracy: 0.7993891496487572\n",
      "Iter 369 / 2000, Loss: 181022.18342391305, CrossEntropy: 0.0005584352766163647, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.246357679367065\n",
      "EVALUATION with 100 samples -> Loss: 7640686.5, CrossEntropy: 1.1959542036056519, Accuracy: 0.7993698376285308\n",
      "Iter 370 / 2000, Loss: 180707.98889066497, CrossEntropy: 0.0005597632261924446, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.295578479766846\n",
      "EVALUATION with 100 samples -> Loss: 7679540.0, CrossEntropy: 1.2124946117401123, Accuracy: 0.7976743762811118\n",
      "Iter 371 / 2000, Loss: 180278.22214673914, CrossEntropy: 0.0005588791100308299, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.517788887023926\n",
      "EVALUATION with 100 samples -> Loss: 7643448.0, CrossEntropy: 1.208404302597046, Accuracy: 0.7987933082068546\n",
      "Iter 372 / 2000, Loss: 179142.07480818414, CrossEntropy: 0.0005470719188451767, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.351131200790405\n",
      "EVALUATION with 100 samples -> Loss: 7606757.5, CrossEntropy: 1.192168951034546, Accuracy: 0.7990017289703939\n",
      "Iter 373 / 2000, Loss: 183640.0466751918, CrossEntropy: 0.0006235803593881428, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.24475979804993\n",
      "EVALUATION with 100 samples -> Loss: 7631736.0, CrossEntropy: 1.20852530002594, Accuracy: 0.7982324984997156\n",
      "Iter 374 / 2000, Loss: 178829.5471147698, CrossEntropy: 0.0005530325579456985, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.21238708496094\n",
      "EVALUATION with 100 samples -> Loss: 7641926.5, CrossEntropy: 1.2034633159637451, Accuracy: 0.7987916677268151\n",
      "Iter 375 / 2000, Loss: 179414.15800831202, CrossEntropy: 0.0005684160278178751, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.30136322975159\n",
      "EVALUATION with 100 samples -> Loss: 7656576.0, CrossEntropy: 1.2061618566513062, Accuracy: 0.7992249892308457\n",
      "Iter 376 / 2000, Loss: 177254.96283567775, CrossEntropy: 0.0005416605272330344, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.15891623497009\n",
      "EVALUATION with 100 samples -> Loss: 7644226.0, CrossEntropy: 1.2147363424301147, Accuracy: 0.7958034432888369\n",
      "Iter 377 / 2000, Loss: 177887.65900735295, CrossEntropy: 0.0005579687422141433, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.38239288330078\n",
      "EVALUATION with 100 samples -> Loss: 7653303.5, CrossEntropy: 1.204054594039917, Accuracy: 0.7982158082441114\n",
      "Iter 378 / 2000, Loss: 178705.46803069054, CrossEntropy: 0.000576457183342427, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.05769610404968\n",
      "EVALUATION with 100 samples -> Loss: 7584862.0, CrossEntropy: 1.1901121139526367, Accuracy: 0.7978286237544137\n",
      "Iter 379 / 2000, Loss: 177546.7434862532, CrossEntropy: 0.0005648390506394207, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.046030044555664\n",
      "EVALUATION with 100 samples -> Loss: 7693021.5, CrossEntropy: 1.2112209796905518, Accuracy: 0.7990429305444541\n",
      "Iter 380 / 2000, Loss: 175930.85845588235, CrossEntropy: 0.0005436723004095256, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.23619318008423\n",
      "EVALUATION with 100 samples -> Loss: 7669651.0, CrossEntropy: 1.2194668054580688, Accuracy: 0.7978330583676588\n",
      "Iter 381 / 2000, Loss: 174652.9790601023, CrossEntropy: 0.000530109042301774, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.15314245223999\n",
      "EVALUATION with 100 samples -> Loss: 7662762.5, CrossEntropy: 1.2116179466247559, Accuracy: 0.7980816544508048\n",
      "Iter 382 / 2000, Loss: 174086.05478740408, CrossEntropy: 0.0005269359098747373, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.130770444869995\n",
      "EVALUATION with 100 samples -> Loss: 7689526.5, CrossEntropy: 1.213624358177185, Accuracy: 0.7979255085864827\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Iter 383 / 2000, Loss: 177020.68650095907, CrossEntropy: 0.000599674298427999, Accuracy: 0.9999640345268541\n",
      "Elapsed time for the training: 58.17192101478577\n",
      "EVALUATION with 100 samples -> Loss: 7694580.0, CrossEntropy: 1.2311947345733643, Accuracy: 0.7971613464183241\n",
      "Iter 384 / 2000, Loss: 174921.67063618926, CrossEntropy: 0.0005514701479114592, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.10175085067749\n",
      "EVALUATION with 100 samples -> Loss: 7682616.0, CrossEntropy: 1.2110334634780884, Accuracy: 0.7986387649485398\n",
      "Iter 385 / 2000, Loss: 173967.20436381074, CrossEntropy: 0.0005425168783403933, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.254260540008545\n",
      "EVALUATION with 100 samples -> Loss: 7711910.5, CrossEntropy: 1.2234604358673096, Accuracy: 0.7987628974023804\n",
      "Iter 386 / 2000, Loss: 172259.86149296674, CrossEntropy: 0.0005212571122683585, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.29725384712219\n",
      "EVALUATION with 100 samples -> Loss: 7667585.0, CrossEntropy: 1.2338191270828247, Accuracy: 0.7963770386981465\n",
      "Iter 387 / 2000, Loss: 173949.36097346546, CrossEntropy: 0.0005529345944523811, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.2172646522522\n",
      "EVALUATION with 100 samples -> Loss: 7670005.5, CrossEntropy: 1.203792929649353, Accuracy: 0.7976388660214284\n",
      "Iter 388 / 2000, Loss: 173340.1299952046, CrossEntropy: 0.0005499105900526047, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.610780000686646\n",
      "EVALUATION with 100 samples -> Loss: 7708499.5, CrossEntropy: 1.206925392150879, Accuracy: 0.8014164886168799\n",
      "Iter 389 / 2000, Loss: 173086.55594629157, CrossEntropy: 0.0005514220101758838, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.575992822647095\n",
      "EVALUATION with 100 samples -> Loss: 7711535.0, CrossEntropy: 1.2063885927200317, Accuracy: 0.799596220593294\n",
      "Iter 390 / 2000, Loss: 169934.47170716114, CrossEntropy: 0.0005081223789602518, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.54125428199768\n",
      "EVALUATION with 100 samples -> Loss: 7696548.5, CrossEntropy: 1.233299732208252, Accuracy: 0.7979944941565568\n",
      "Iter 391 / 2000, Loss: 169988.2792918798, CrossEntropy: 0.0005148515338078141, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.53362536430359\n",
      "EVALUATION with 100 samples -> Loss: 7715299.0, CrossEntropy: 1.2133830785751343, Accuracy: 0.7977140504163339\n",
      "Iter 392 / 2000, Loss: 169916.52125959078, CrossEntropy: 0.0005186772905290127, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.48322582244873\n",
      "EVALUATION with 100 samples -> Loss: 7720539.5, CrossEntropy: 1.218717098236084, Accuracy: 0.7962675700276038\n",
      "Iter 393 / 2000, Loss: 169557.17135549872, CrossEntropy: 0.0005185818299651146, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.4674174785614\n",
      "EVALUATION with 100 samples -> Loss: 7713862.5, CrossEntropy: 1.227407693862915, Accuracy: 0.7977131815962712\n",
      "Iter 394 / 2000, Loss: 170124.44820971866, CrossEntropy: 0.0005330704152584076, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.419803857803345\n",
      "EVALUATION with 100 samples -> Loss: 7727750.5, CrossEntropy: 1.215408205986023, Accuracy: 0.7980311388097285\n",
      "Iter 395 / 2000, Loss: 168398.25947090794, CrossEntropy: 0.0005120926653034985, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.316582679748535\n",
      "EVALUATION with 100 samples -> Loss: 7695200.5, CrossEntropy: 1.2322417497634888, Accuracy: 0.7977184676295906\n",
      "Iter 396 / 2000, Loss: 167875.19213554988, CrossEntropy: 0.0005094780353829265, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.432528257369995\n",
      "EVALUATION with 100 samples -> Loss: 7742788.0, CrossEntropy: 1.224155306816101, Accuracy: 0.7978212290779201\n",
      "Iter 397 / 2000, Loss: 168101.30582640666, CrossEntropy: 0.0005181831656955183, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.754711627960205\n",
      "EVALUATION with 100 samples -> Loss: 7684006.5, CrossEntropy: 1.2209548950195312, Accuracy: 0.7966922926906542\n",
      "Iter 398 / 2000, Loss: 166952.0282129156, CrossEntropy: 0.0005055932560935616, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.58202075958252\n",
      "EVALUATION with 100 samples -> Loss: 7692755.0, CrossEntropy: 1.2165331840515137, Accuracy: 0.7959719811649023\n",
      "Iter 399 / 2000, Loss: 167740.11712755755, CrossEntropy: 0.0005235581775195897, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.5227153301239\n",
      "EVALUATION with 100 samples -> Loss: 7730607.0, CrossEntropy: 1.214045524597168, Accuracy: 0.7989530392684565\n",
      "At iteration 400 we change the dropout rate from 0.1 to 0.2. \n",
      "Iter 400 / 2000, Loss: 166620.55470748083, CrossEntropy: 0.0005117349210195243, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.931644916534424\n",
      "EVALUATION with 100 samples -> Loss: 7748627.0, CrossEntropy: 1.2371450662612915, Accuracy: 0.7957551420992038\n",
      "Iter 401 / 2000, Loss: 165681.3998561381, CrossEntropy: 0.0005031528999097645, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.69741225242615\n",
      "EVALUATION with 100 samples -> Loss: 7717720.5, CrossEntropy: 1.2213096618652344, Accuracy: 0.796988057525318\n",
      "Iter 402 / 2000, Loss: 164778.86097346546, CrossEntropy: 0.0004940525977872312, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.53171181678772\n",
      "EVALUATION with 100 samples -> Loss: 7769798.5, CrossEntropy: 1.2308837175369263, Accuracy: 0.7962909529713605\n",
      "Iter 403 / 2000, Loss: 165115.75295716114, CrossEntropy: 0.0005062323180027306, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.40069103240967\n",
      "EVALUATION with 100 samples -> Loss: 7730847.0, CrossEntropy: 1.2255820035934448, Accuracy: 0.7985782787208915\n",
      "Iter 404 / 2000, Loss: 163462.8161764706, CrossEntropy: 0.00048439748934470117, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.40671157836914\n",
      "EVALUATION with 100 samples -> Loss: 7733446.5, CrossEntropy: 1.21903395652771, Accuracy: 0.8003676655720945\n",
      "Iter 405 / 2000, Loss: 163042.2636668798, CrossEntropy: 0.0004840378533117473, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.483882665634155\n",
      "EVALUATION with 100 samples -> Loss: 7737767.0, CrossEntropy: 1.2210780382156372, Accuracy: 0.797988079021005\n",
      "Iter 406 / 2000, Loss: 164208.50295716114, CrossEntropy: 0.0005065450677648187, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.536948919296265\n",
      "EVALUATION with 100 samples -> Loss: 7740739.5, CrossEntropy: 1.2166450023651123, Accuracy: 0.7990167458589682\n",
      "Iter 407 / 2000, Loss: 163457.42778932225, CrossEntropy: 0.0005004472914151847, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.514328956604004\n",
      "EVALUATION with 100 samples -> Loss: 7764264.0, CrossEntropy: 1.2161664962768555, Accuracy: 0.7987642664603417\n",
      "Iter 408 / 2000, Loss: 162258.16867806905, CrossEntropy: 0.0004868209653068334, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.46419954299927\n",
      "EVALUATION with 100 samples -> Loss: 7745762.0, CrossEntropy: 1.2174351215362549, Accuracy: 0.7971278400881522\n",
      "Iter 409 / 2000, Loss: 164373.17954763427, CrossEntropy: 0.0005305130616761744, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.34699511528015\n",
      "EVALUATION with 100 samples -> Loss: 7757908.5, CrossEntropy: 1.2293871641159058, Accuracy: 0.7979967712905848\n",
      "Iter 410 / 2000, Loss: 162623.2074408568, CrossEntropy: 0.0005048433085903525, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.468677043914795\n",
      "EVALUATION with 100 samples -> Loss: 7726631.0, CrossEntropy: 1.2248867750167847, Accuracy: 0.7984371544125547\n",
      "Iter 411 / 2000, Loss: 160393.08839514066, CrossEntropy: 0.0004740675212815404, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.380797147750854\n",
      "EVALUATION with 100 samples -> Loss: 7735779.0, CrossEntropy: 1.2151833772659302, Accuracy: 0.7988213086370721\n",
      "Iter 412 / 2000, Loss: 160154.7525975064, CrossEntropy: 0.0004754258261527866, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.58321785926819\n",
      "EVALUATION with 100 samples -> Loss: 7763286.0, CrossEntropy: 1.2331522703170776, Accuracy: 0.7956563906923675\n",
      "Iter 413 / 2000, Loss: 160952.85845588235, CrossEntropy: 0.0004928275011479855, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.55646729469299\n",
      "EVALUATION with 100 samples -> Loss: 7765241.0, CrossEntropy: 1.2300209999084473, Accuracy: 0.7982883891617318\n",
      "Iter 414 / 2000, Loss: 159836.04815377237, CrossEntropy: 0.0004806096258107573, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.57582330703735\n",
      "EVALUATION with 100 samples -> Loss: 7781961.0, CrossEntropy: 1.2329481840133667, Accuracy: 0.7988883318648602\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Iter 415 / 2000, Loss: 161183.56118126598, CrossEntropy: 0.0005066305748187006, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.59094500541687\n",
      "EVALUATION with 100 samples -> Loss: 7788822.0, CrossEntropy: 1.2190511226654053, Accuracy: 0.7995380910808989\n",
      "Iter 416 / 2000, Loss: 159608.21107736573, CrossEntropy: 0.0004879114276263863, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.499916553497314\n",
      "EVALUATION with 100 samples -> Loss: 7790361.5, CrossEntropy: 1.2266759872436523, Accuracy: 0.7989803884780499\n",
      "Iter 417 / 2000, Loss: 157828.52101982097, CrossEntropy: 0.0004651766794268042, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.3598198890686\n",
      "EVALUATION with 100 samples -> Loss: 7801354.5, CrossEntropy: 1.229993462562561, Accuracy: 0.7993501720124749\n",
      "Iter 418 / 2000, Loss: 158234.9174392583, CrossEntropy: 0.000476836139569059, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.3084876537323\n",
      "EVALUATION with 100 samples -> Loss: 7841577.5, CrossEntropy: 1.237719178199768, Accuracy: 0.7992308472087557\n",
      "Iter 419 / 2000, Loss: 156471.58891464194, CrossEntropy: 0.0004541142552625388, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.50312399864197\n",
      "EVALUATION with 100 samples -> Loss: 7819828.0, CrossEntropy: 1.2517858743667603, Accuracy: 0.7980910243211843\n",
      "Iter 420 / 2000, Loss: 155340.50127877237, CrossEntropy: 0.00044161107507534325, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.56193685531616\n",
      "EVALUATION with 100 samples -> Loss: 7789256.5, CrossEntropy: 1.2230463027954102, Accuracy: 0.7981793157866807\n",
      "Iter 421 / 2000, Loss: 157234.6454203964, CrossEntropy: 0.0004767274367623031, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.479652404785156\n",
      "EVALUATION with 100 samples -> Loss: 7826816.0, CrossEntropy: 1.2524631023406982, Accuracy: 0.7966843257647804\n",
      "Iter 422 / 2000, Loss: 155538.61992487212, CrossEntropy: 0.0004554886545520276, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.44097018241882\n",
      "EVALUATION with 100 samples -> Loss: 7804929.0, CrossEntropy: 1.2519567012786865, Accuracy: 0.796714707822966\n",
      "Iter 423 / 2000, Loss: 154846.90289322252, CrossEntropy: 0.00044924431131221354, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.477492809295654\n",
      "EVALUATION with 100 samples -> Loss: 7787605.5, CrossEntropy: 1.2363088130950928, Accuracy: 0.7976344227758895\n",
      "Iter 424 / 2000, Loss: 159886.35637787724, CrossEntropy: 0.0005341108771972358, Accuracy: 0.9999800191815856\n",
      "Elapsed time for the training: 59.46885299682617\n",
      "EVALUATION with 100 samples -> Loss: 7836268.0, CrossEntropy: 1.2545372247695923, Accuracy: 0.7967956350789536\n",
      "Iter 425 / 2000, Loss: 156999.01838235295, CrossEntropy: 0.00049291190225631, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.38352680206299\n",
      "EVALUATION with 100 samples -> Loss: 7819472.5, CrossEntropy: 1.2230972051620483, Accuracy: 0.797945900030466\n",
      "Iter 426 / 2000, Loss: 155381.975463555, CrossEntropy: 0.00047368137165904045, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.52450919151306\n",
      "EVALUATION with 100 samples -> Loss: 7843048.0, CrossEntropy: 1.234267234802246, Accuracy: 0.796824555541099\n",
      "Iter 427 / 2000, Loss: 153879.74356617648, CrossEntropy: 0.00045543850865215063, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.65042209625244\n",
      "EVALUATION with 100 samples -> Loss: 7803675.5, CrossEntropy: 1.223952054977417, Accuracy: 0.7966777474218955\n",
      "Iter 428 / 2000, Loss: 154519.8573369565, CrossEntropy: 0.0004699191194958985, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.544015407562256\n",
      "EVALUATION with 100 samples -> Loss: 7829653.5, CrossEntropy: 1.2447476387023926, Accuracy: 0.7968504086815362\n",
      "Iter 429 / 2000, Loss: 153650.2698609335, CrossEntropy: 0.000461652030935511, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.716628551483154\n",
      "EVALUATION with 100 samples -> Loss: 7801256.5, CrossEntropy: 1.2393605709075928, Accuracy: 0.7956276857702035\n",
      "Iter 430 / 2000, Loss: 153923.74484494884, CrossEntropy: 0.00047086644917726517, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.65262317657471\n",
      "EVALUATION with 100 samples -> Loss: 7830538.0, CrossEntropy: 1.2311700582504272, Accuracy: 0.7959484523115118\n",
      "Iter 431 / 2000, Loss: 152262.41312340152, CrossEntropy: 0.00044949701987206936, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.47882890701294\n",
      "EVALUATION with 100 samples -> Loss: 7796573.5, CrossEntropy: 1.2268190383911133, Accuracy: 0.797163741097502\n",
      "Iter 432 / 2000, Loss: 150661.40996643223, CrossEntropy: 0.00043040807940997183, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.31542158126831\n",
      "EVALUATION with 100 samples -> Loss: 7845728.5, CrossEntropy: 1.2486203908920288, Accuracy: 0.7972184087222933\n",
      "Iter 433 / 2000, Loss: 151881.00427589513, CrossEntropy: 0.00045351512380875647, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.27039575576782\n",
      "EVALUATION with 100 samples -> Loss: 7826153.5, CrossEntropy: 1.228469967842102, Accuracy: 0.7966314721182324\n",
      "Iter 434 / 2000, Loss: 151924.88067455243, CrossEntropy: 0.0004594993661157787, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.383079051971436\n",
      "EVALUATION with 100 samples -> Loss: 7852495.5, CrossEntropy: 1.2360788583755493, Accuracy: 0.7969658372609563\n",
      "Iter 435 / 2000, Loss: 150176.24212755755, CrossEntropy: 0.00043702556286007166, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.207667112350464\n",
      "EVALUATION with 100 samples -> Loss: 7818930.5, CrossEntropy: 1.2625503540039062, Accuracy: 0.7945979503828731\n",
      "Iter 436 / 2000, Loss: 150235.616528133, CrossEntropy: 0.0004420413461048156, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.437305212020874\n",
      "EVALUATION with 100 samples -> Loss: 7849971.0, CrossEntropy: 1.2439619302749634, Accuracy: 0.7984016104913437\n",
      "Iter 437 / 2000, Loss: 149644.15744884912, CrossEntropy: 0.0004445278027560562, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.279581785202026\n",
      "EVALUATION with 100 samples -> Loss: 7876806.5, CrossEntropy: 1.2337929010391235, Accuracy: 0.7989306339044975\n",
      "Iter 438 / 2000, Loss: 148356.28344789002, CrossEntropy: 0.0004231740313116461, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.37047624588013\n",
      "EVALUATION with 100 samples -> Loss: 7854121.5, CrossEntropy: 1.2345151901245117, Accuracy: 0.7976755279403156\n",
      "Iter 439 / 2000, Loss: 147754.22054827365, CrossEntropy: 0.0004177517257630825, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.361472845077515\n",
      "EVALUATION with 100 samples -> Loss: 7891610.0, CrossEntropy: 1.2506422996520996, Accuracy: 0.7967698467194269\n",
      "Iter 440 / 2000, Loss: 148103.68712036446, CrossEntropy: 0.0004281233123037964, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.388850688934326\n",
      "EVALUATION with 100 samples -> Loss: 7896851.5, CrossEntropy: 1.2596186399459839, Accuracy: 0.7961587544602957\n",
      "Iter 441 / 2000, Loss: 148449.5743286445, CrossEntropy: 0.000439107563579455, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.081979751586914\n",
      "EVALUATION with 100 samples -> Loss: 7841106.0, CrossEntropy: 1.2411894798278809, Accuracy: 0.7988297242648811\n",
      "Iter 442 / 2000, Loss: 146702.62268222508, CrossEntropy: 0.00041677342960610986, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.1587028503418\n",
      "EVALUATION with 100 samples -> Loss: 7864105.5, CrossEntropy: 1.261122465133667, Accuracy: 0.7966227759657504\n",
      "Iter 443 / 2000, Loss: 147140.69189578004, CrossEntropy: 0.0004286454932298511, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.21269202232361\n",
      "EVALUATION with 100 samples -> Loss: 7844956.5, CrossEntropy: 1.228940725326538, Accuracy: 0.8001813957744838\n",
      "Iter 444 / 2000, Loss: 147498.00919117648, CrossEntropy: 0.0004392639675643295, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.26029968261719\n",
      "EVALUATION with 100 samples -> Loss: 7881968.0, CrossEntropy: 1.2595466375350952, Accuracy: 0.7963047123149131\n",
      "Iter 445 / 2000, Loss: 145996.578784367, CrossEntropy: 0.00041997520020231605, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.28880786895752\n",
      "EVALUATION with 100 samples -> Loss: 7875077.0, CrossEntropy: 1.2324862480163574, Accuracy: 0.7990086417397612\n",
      "Iter 446 / 2000, Loss: 145805.1422434463, CrossEntropy: 0.00042129092616960406, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.22967576980591\n",
      "EVALUATION with 100 samples -> Loss: 7858197.5, CrossEntropy: 1.2379615306854248, Accuracy: 0.7983204901404293\n",
      "Iter 447 / 2000, Loss: 145933.40301310743, CrossEntropy: 0.00042796737398020923, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.20144772529602\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with 100 samples -> Loss: 7862343.5, CrossEntropy: 1.2461813688278198, Accuracy: 0.7965353072700729\n",
      "Iter 448 / 2000, Loss: 145378.9824168798, CrossEntropy: 0.0004243141447659582, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.21233367919922\n",
      "EVALUATION with 100 samples -> Loss: 7876883.0, CrossEntropy: 1.2366504669189453, Accuracy: 0.7977533811946567\n",
      "Iter 449 / 2000, Loss: 143579.87775735295, CrossEntropy: 0.0004013486031908542, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.1858549118042\n",
      "EVALUATION with 100 samples -> Loss: 7845569.0, CrossEntropy: 1.2261747121810913, Accuracy: 0.7994789120259638\n",
      "Iter 450 / 2000, Loss: 144368.63145380435, CrossEntropy: 0.0004180855758022517, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.162848234176636\n",
      "EVALUATION with 100 samples -> Loss: 7887362.0, CrossEntropy: 1.2407164573669434, Accuracy: 0.7979760339450528\n",
      "Iter 451 / 2000, Loss: 145010.023217711, CrossEntropy: 0.00043228562572039664, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.092278242111206\n",
      "EVALUATION with 100 samples -> Loss: 7880493.0, CrossEntropy: 1.250271201133728, Accuracy: 0.7966008127350753\n",
      "Iter 452 / 2000, Loss: 143184.85847586318, CrossEntropy: 0.00040945829823613167, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.07300066947937\n",
      "EVALUATION with 100 samples -> Loss: 7882950.5, CrossEntropy: 1.2438217401504517, Accuracy: 0.7969876116616412\n",
      "Iter 453 / 2000, Loss: 144297.03740409206, CrossEntropy: 0.0004310816584620625, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.38226127624512\n",
      "EVALUATION with 100 samples -> Loss: 7897771.0, CrossEntropy: 1.2394928932189941, Accuracy: 0.7976354637208126\n",
      "Iter 454 / 2000, Loss: 142812.64849744245, CrossEntropy: 0.0004132415633648634, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.3362250328064\n",
      "EVALUATION with 100 samples -> Loss: 7883407.5, CrossEntropy: 1.2481060028076172, Accuracy: 0.7961937699158215\n",
      "Iter 455 / 2000, Loss: 142217.98789162404, CrossEntropy: 0.0004094452306162566, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.39548325538635\n",
      "EVALUATION with 100 samples -> Loss: 7905088.0, CrossEntropy: 1.2432812452316284, Accuracy: 0.7976608577027952\n",
      "Iter 456 / 2000, Loss: 141083.13598945012, CrossEntropy: 0.00039449406904168427, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.374640703201294\n",
      "EVALUATION with 100 samples -> Loss: 7911865.5, CrossEntropy: 1.244098424911499, Accuracy: 0.7974279108989998\n",
      "Iter 457 / 2000, Loss: 141273.40579044117, CrossEntropy: 0.0004020855121780187, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.31438899040222\n",
      "EVALUATION with 100 samples -> Loss: 7914402.0, CrossEntropy: 1.2548760175704956, Accuracy: 0.7968165277379428\n",
      "Iter 458 / 2000, Loss: 141818.42701007033, CrossEntropy: 0.0004154935304541141, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.249157667160034\n",
      "EVALUATION with 100 samples -> Loss: 7939806.0, CrossEntropy: 1.2418862581253052, Accuracy: 0.8001385401714398\n",
      "Iter 459 / 2000, Loss: 140329.38185342072, CrossEntropy: 0.000397720024921, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.10357356071472\n",
      "EVALUATION with 100 samples -> Loss: 7876484.0, CrossEntropy: 1.23358154296875, Accuracy: 0.8006022185792178\n",
      "Iter 460 / 2000, Loss: 140766.73465473147, CrossEntropy: 0.00040850459481589496, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.97063422203064\n",
      "EVALUATION with 100 samples -> Loss: 7913614.0, CrossEntropy: 1.2600901126861572, Accuracy: 0.798688536095796\n",
      "Iter 461 / 2000, Loss: 140207.13674872124, CrossEntropy: 0.00040538242319598794, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.134485483169556\n",
      "EVALUATION with 100 samples -> Loss: 7905623.5, CrossEntropy: 1.2371726036071777, Accuracy: 0.798502498343472\n",
      "Iter 462 / 2000, Loss: 137654.2355538683, CrossEntropy: 0.00036928930785506964, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.23810815811157\n",
      "EVALUATION with 100 samples -> Loss: 7915431.0, CrossEntropy: 1.2588050365447998, Accuracy: 0.7982474016679938\n",
      "Iter 463 / 2000, Loss: 139806.79917279413, CrossEntropy: 0.00040804489981383085, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.40446758270264\n",
      "EVALUATION with 100 samples -> Loss: 7884691.0, CrossEntropy: 1.2439082860946655, Accuracy: 0.798590209494674\n",
      "Iter 464 / 2000, Loss: 139197.2445851982, CrossEntropy: 0.00040345548768527806, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.318073987960815\n",
      "EVALUATION with 100 samples -> Loss: 7952107.0, CrossEntropy: 1.246572494506836, Accuracy: 0.8004541689867642\n",
      "Iter 465 / 2000, Loss: 138878.04677509592, CrossEntropy: 0.0004013956931885332, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.27879595756531\n",
      "EVALUATION with 100 samples -> Loss: 7949337.5, CrossEntropy: 1.2450741529464722, Accuracy: 0.798701256211863\n",
      "Iter 466 / 2000, Loss: 138569.2203684463, CrossEntropy: 0.0004028616240248084, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.29503631591797\n",
      "EVALUATION with 100 samples -> Loss: 7900674.5, CrossEntropy: 1.2491284608840942, Accuracy: 0.7976008930850553\n",
      "Iter 467 / 2000, Loss: 139029.6371683184, CrossEntropy: 0.0004126064304728061, Accuracy: 0.999987511988491\n",
      "Elapsed time for the training: 59.22023367881775\n",
      "EVALUATION with 100 samples -> Loss: 7878304.0, CrossEntropy: 1.2391501665115356, Accuracy: 0.7986631318892511\n",
      "Iter 468 / 2000, Loss: 135859.5198609335, CrossEntropy: 0.00036921052378602326, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.102635622024536\n",
      "EVALUATION with 100 samples -> Loss: 7908115.0, CrossEntropy: 1.241877555847168, Accuracy: 0.7985520943118581\n",
      "Iter 469 / 2000, Loss: 138389.69311460998, CrossEntropy: 0.00041256158147007227, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.15185284614563\n",
      "EVALUATION with 100 samples -> Loss: 7936304.0, CrossEntropy: 1.2730460166931152, Accuracy: 0.7965665729640371\n",
      "Iter 470 / 2000, Loss: 137141.44968829924, CrossEntropy: 0.0003969369863625616, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.22589993476868\n",
      "EVALUATION with 100 samples -> Loss: 7906438.5, CrossEntropy: 1.246019721031189, Accuracy: 0.7982981194802913\n",
      "Iter 471 / 2000, Loss: 135927.08523817136, CrossEntropy: 0.0003830165369436145, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.384721755981445\n",
      "EVALUATION with 100 samples -> Loss: 7917463.5, CrossEntropy: 1.2453340291976929, Accuracy: 0.7972404246262357\n",
      "Iter 472 / 2000, Loss: 135214.2302589514, CrossEntropy: 0.00037586106918752193, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.28165936470032\n",
      "EVALUATION with 100 samples -> Loss: 7926527.5, CrossEntropy: 1.2549757957458496, Accuracy: 0.7968439578411193\n",
      "Iter 473 / 2000, Loss: 135246.96389466114, CrossEntropy: 0.00038077501812949777, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.22111129760742\n",
      "EVALUATION with 100 samples -> Loss: 7912946.5, CrossEntropy: 1.2522145509719849, Accuracy: 0.7981823229358944\n",
      "Iter 474 / 2000, Loss: 133995.6080762468, CrossEntropy: 0.00036545871989801526, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.17842221260071\n",
      "EVALUATION with 100 samples -> Loss: 7929418.5, CrossEntropy: 1.255483627319336, Accuracy: 0.7982288445938911\n",
      "Iter 475 / 2000, Loss: 134359.43757992328, CrossEntropy: 0.00037621668889187276, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.293086767196655\n",
      "EVALUATION with 100 samples -> Loss: 7955914.5, CrossEntropy: 1.2489237785339355, Accuracy: 0.79831097977021\n",
      "Iter 476 / 2000, Loss: 134426.62883631713, CrossEntropy: 0.00038161384873092175, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.09431552886963\n",
      "EVALUATION with 100 samples -> Loss: 7944576.0, CrossEntropy: 1.2460808753967285, Accuracy: 0.797828236754787\n",
      "Iter 477 / 2000, Loss: 134508.70716112532, CrossEntropy: 0.00038707340718246996, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.001254081726074\n",
      "EVALUATION with 100 samples -> Loss: 7927127.5, CrossEntropy: 1.2487390041351318, Accuracy: 0.7973403196639581\n",
      "Iter 478 / 2000, Loss: 133111.35042359334, CrossEntropy: 0.00036934204399585724, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.03190851211548\n",
      "EVALUATION with 100 samples -> Loss: 7961575.0, CrossEntropy: 1.260349988937378, Accuracy: 0.7971400716102579\n",
      "Iter 479 / 2000, Loss: 133775.95220588235, CrossEntropy: 0.00038426832179538906, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.27533292770386\n",
      "EVALUATION with 100 samples -> Loss: 7965242.5, CrossEntropy: 1.2555395364761353, Accuracy: 0.799066381032474\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Iter 480 / 2000, Loss: 131376.8119405371, CrossEntropy: 0.0003507567453198135, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.21493649482727\n",
      "EVALUATION with 100 samples -> Loss: 7963326.5, CrossEntropy: 1.249625325202942, Accuracy: 0.7993017398958918\n",
      "Iter 481 / 2000, Loss: 131997.6008032289, CrossEntropy: 0.000365202606189996, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.36594867706299\n",
      "EVALUATION with 100 samples -> Loss: 8010010.0, CrossEntropy: 1.2618564367294312, Accuracy: 0.7972316092446114\n",
      "Iter 482 / 2000, Loss: 131477.15758871482, CrossEntropy: 0.0003608937840908766, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.26608061790466\n",
      "EVALUATION with 100 samples -> Loss: 7974235.5, CrossEntropy: 1.254837155342102, Accuracy: 0.7976632374056306\n",
      "Iter 483 / 2000, Loss: 130115.61457001278, CrossEntropy: 0.0003439064894337207, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.277138471603394\n",
      "EVALUATION with 100 samples -> Loss: 7981676.0, CrossEntropy: 1.2528438568115234, Accuracy: 0.7988465176012406\n",
      "Iter 484 / 2000, Loss: 131927.3266064578, CrossEntropy: 0.0003766861336771399, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.306347370147705\n",
      "EVALUATION with 100 samples -> Loss: 7978582.0, CrossEntropy: 1.2561601400375366, Accuracy: 0.7986435755337299\n",
      "Iter 485 / 2000, Loss: 130655.99556425831, CrossEntropy: 0.000361411803169176, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.165693521499634\n",
      "EVALUATION with 100 samples -> Loss: 7972451.0, CrossEntropy: 1.2756245136260986, Accuracy: 0.7962943587814324\n",
      "Iter 486 / 2000, Loss: 130087.42103580563, CrossEntropy: 0.0003562142373993993, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.99586749076843\n",
      "EVALUATION with 100 samples -> Loss: 7951278.5, CrossEntropy: 1.2528656721115112, Accuracy: 0.7965663431167403\n",
      "Iter 487 / 2000, Loss: 130797.44395380435, CrossEntropy: 0.00037252274341881275, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.042078256607056\n",
      "EVALUATION with 100 samples -> Loss: 8025699.0, CrossEntropy: 1.260027527809143, Accuracy: 0.7998753753347313\n",
      "Iter 488 / 2000, Loss: 129732.86472985933, CrossEntropy: 0.0003590963315218687, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.17947959899902\n",
      "EVALUATION with 100 samples -> Loss: 7982841.0, CrossEntropy: 1.2608685493469238, Accuracy: 0.7994174071822691\n",
      "Iter 489 / 2000, Loss: 129209.03204923273, CrossEntropy: 0.0003556168812792748, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.095404624938965\n",
      "EVALUATION with 100 samples -> Loss: 8021472.0, CrossEntropy: 1.2626718282699585, Accuracy: 0.7995355376760128\n",
      "Iter 490 / 2000, Loss: 128876.33068254475, CrossEntropy: 0.0003542219055816531, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.08402228355408\n",
      "EVALUATION with 100 samples -> Loss: 7966765.5, CrossEntropy: 1.2575627565383911, Accuracy: 0.7976229743423147\n",
      "Iter 491 / 2000, Loss: 127774.210717711, CrossEntropy: 0.00034189235884696245, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.15900945663452\n",
      "EVALUATION with 100 samples -> Loss: 7973589.5, CrossEntropy: 1.2512048482894897, Accuracy: 0.7976109564412099\n",
      "Iter 492 / 2000, Loss: 129592.38241288363, CrossEntropy: 0.0003743508132174611, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.227543354034424\n",
      "EVALUATION with 100 samples -> Loss: 7975328.0, CrossEntropy: 1.259442925453186, Accuracy: 0.796929975713637\n",
      "Iter 493 / 2000, Loss: 128654.609594789, CrossEntropy: 0.00036414930946193635, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.027591943740845\n",
      "EVALUATION with 100 samples -> Loss: 7981314.0, CrossEntropy: 1.2700610160827637, Accuracy: 0.7959644047798994\n",
      "Iter 494 / 2000, Loss: 127671.6429028133, CrossEntropy: 0.0003524110943544656, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.02911305427551\n",
      "EVALUATION with 100 samples -> Loss: 7971671.5, CrossEntropy: 1.2593685388565063, Accuracy: 0.7973298202029394\n",
      "Iter 495 / 2000, Loss: 127859.34598785166, CrossEntropy: 0.0003600548079703003, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.11350607872009\n",
      "EVALUATION with 100 samples -> Loss: 8009471.5, CrossEntropy: 1.2524484395980835, Accuracy: 0.798460834735348\n",
      "Iter 496 / 2000, Loss: 126957.52709398977, CrossEntropy: 0.0003497807483654469, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.35684537887573\n",
      "EVALUATION with 100 samples -> Loss: 8017939.0, CrossEntropy: 1.2633713483810425, Accuracy: 0.7988609346945239\n",
      "Iter 497 / 2000, Loss: 128282.80021179667, CrossEntropy: 0.00037521644844673574, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.37344288825989\n",
      "EVALUATION with 100 samples -> Loss: 8046108.5, CrossEntropy: 1.2710583209991455, Accuracy: 0.7969951013565343\n",
      "Iter 498 / 2000, Loss: 127189.5454563619, CrossEntropy: 0.00036151192034594715, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.24207639694214\n",
      "EVALUATION with 100 samples -> Loss: 7988719.0, CrossEntropy: 1.2674025297164917, Accuracy: 0.7983507710218435\n",
      "Iter 499 / 2000, Loss: 126388.23599344629, CrossEntropy: 0.000353015901055187, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.3261022567749\n",
      "EVALUATION with 100 samples -> Loss: 8023141.5, CrossEntropy: 1.2784404754638672, Accuracy: 0.7949199671544182\n",
      "At iteration 500 we change the dropout rate from 0.1 to 0.2. \n",
      "Iter 500 / 2000, Loss: 126563.12214274297, CrossEntropy: 0.00036043001455254853, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.223997354507446\n",
      "EVALUATION with 100 samples -> Loss: 8018750.5, CrossEntropy: 1.2723047733306885, Accuracy: 0.7958839464101586\n",
      "Iter 501 / 2000, Loss: 126527.80656569693, CrossEntropy: 0.00036297785118222237, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.22900128364563\n",
      "EVALUATION with 100 samples -> Loss: 8027751.0, CrossEntropy: 1.2615563869476318, Accuracy: 0.7979183010421379\n",
      "Iter 502 / 2000, Loss: 125349.12899616368, CrossEntropy: 0.0003513614647090435, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.998058795928955\n",
      "EVALUATION with 100 samples -> Loss: 8007858.5, CrossEntropy: 1.2811455726623535, Accuracy: 0.7959665309676094\n",
      "Iter 503 / 2000, Loss: 124720.90589034527, CrossEntropy: 0.00034279018291272223, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.922971963882446\n",
      "EVALUATION with 100 samples -> Loss: 8007640.5, CrossEntropy: 1.2666957378387451, Accuracy: 0.7983145961108088\n",
      "Iter 504 / 2000, Loss: 124409.64242327366, CrossEntropy: 0.0003426690527703613, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.95290422439575\n",
      "EVALUATION with 100 samples -> Loss: 8036752.5, CrossEntropy: 1.2627466917037964, Accuracy: 0.7966046404226094\n",
      "Iter 505 / 2000, Loss: 123827.51052989131, CrossEntropy: 0.00033745463588275015, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.24248552322388\n",
      "EVALUATION with 100 samples -> Loss: 8042122.5, CrossEntropy: 1.2837921380996704, Accuracy: 0.7955932073820673\n",
      "Iter 506 / 2000, Loss: 124219.38363171356, CrossEntropy: 0.0003477941791061312, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.124744176864624\n",
      "EVALUATION with 100 samples -> Loss: 8010751.5, CrossEntropy: 1.2559996843338013, Accuracy: 0.7989381685094304\n",
      "Iter 507 / 2000, Loss: 124869.52699408568, CrossEntropy: 0.00036188994999974966, Accuracy: 1.0\n",
      "Elapsed time for the training: 59.19202208518982\n",
      "EVALUATION with 100 samples -> Loss: 8043373.5, CrossEntropy: 1.2619671821594238, Accuracy: 0.7979856688259918\n",
      "Iter 508 / 2000, Loss: 122559.53268861893, CrossEntropy: 0.0003295386559329927, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.91727066040039\n",
      "EVALUATION with 100 samples -> Loss: 7985870.0, CrossEntropy: 1.2575143575668335, Accuracy: 0.7981525474133784\n",
      "Iter 509 / 2000, Loss: 124247.5203005115, CrossEntropy: 0.0003605677338782698, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.79459095001221\n",
      "EVALUATION with 100 samples -> Loss: 8019150.0, CrossEntropy: 1.2647706270217896, Accuracy: 0.7963893694390478\n",
      "Iter 510 / 2000, Loss: 122958.74698289642, CrossEntropy: 0.00034435055567882955, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.56472587585449\n",
      "EVALUATION with 100 samples -> Loss: 7989648.5, CrossEntropy: 1.2666658163070679, Accuracy: 0.7973046531111899\n",
      "Iter 511 / 2000, Loss: 122096.7882033248, CrossEntropy: 0.00033427184098400176, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.58813953399658\n",
      "EVALUATION with 100 samples -> Loss: 8024157.5, CrossEntropy: 1.2543753385543823, Accuracy: 0.7998877473226527\n",
      "Iter 512 / 2000, Loss: 122164.48703244884, CrossEntropy: 0.00033891567727550864, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.518797159194946\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with 100 samples -> Loss: 8038290.0, CrossEntropy: 1.2732055187225342, Accuracy: 0.7956023546985053\n",
      "Iter 513 / 2000, Loss: 122123.63391144501, CrossEntropy: 0.00034310430055484176, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.487892150878906\n",
      "EVALUATION with 100 samples -> Loss: 8025660.0, CrossEntropy: 1.2568135261535645, Accuracy: 0.7985064543887654\n",
      "Iter 514 / 2000, Loss: 121035.59215153453, CrossEntropy: 0.00033000443363562226, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.613956928253174\n",
      "EVALUATION with 100 samples -> Loss: 8065686.0, CrossEntropy: 1.2825452089309692, Accuracy: 0.7977755033141667\n",
      "Iter 515 / 2000, Loss: 120987.09143222506, CrossEntropy: 0.00033220447949133813, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.61510968208313\n",
      "EVALUATION with 100 samples -> Loss: 8053219.0, CrossEntropy: 1.2721410989761353, Accuracy: 0.7975737819849374\n",
      "Iter 516 / 2000, Loss: 121083.52843270461, CrossEntropy: 0.0003377848188392818, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.46140742301941\n",
      "EVALUATION with 100 samples -> Loss: 8053245.0, CrossEntropy: 1.264546513557434, Accuracy: 0.7978213622104423\n",
      "Iter 517 / 2000, Loss: 120864.63808743606, CrossEntropy: 0.0003388836921658367, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.42098689079285\n",
      "EVALUATION with 100 samples -> Loss: 8061770.5, CrossEntropy: 1.2645787000656128, Accuracy: 0.7987035525611311\n",
      "Iter 518 / 2000, Loss: 121335.37837675831, CrossEntropy: 0.00035094207851216197, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.426366090774536\n",
      "EVALUATION with 100 samples -> Loss: 8051617.5, CrossEntropy: 1.261301040649414, Accuracy: 0.7977994259015949\n",
      "Iter 519 / 2000, Loss: 121043.14583999361, CrossEntropy: 0.00034947870881296694, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.42525362968445\n",
      "EVALUATION with 100 samples -> Loss: 8072384.0, CrossEntropy: 1.266387939453125, Accuracy: 0.7962627192201082\n",
      "Iter 520 / 2000, Loss: 119170.85855578644, CrossEntropy: 0.0003236753982491791, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.495816707611084\n",
      "EVALUATION with 100 samples -> Loss: 8071921.5, CrossEntropy: 1.2767014503479004, Accuracy: 0.7960370156451745\n",
      "Iter 521 / 2000, Loss: 119062.30528692456, CrossEntropy: 0.0003267071151640266, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.403287172317505\n",
      "EVALUATION with 100 samples -> Loss: 8043286.0, CrossEntropy: 1.2684913873672485, Accuracy: 0.7973163060937737\n",
      "Iter 522 / 2000, Loss: 119291.20847985933, CrossEntropy: 0.000333791715092957, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.459370136260986\n",
      "EVALUATION with 100 samples -> Loss: 8084354.5, CrossEntropy: 1.269352912902832, Accuracy: 0.7972006719833934\n",
      "Iter 523 / 2000, Loss: 117890.61141304347, CrossEntropy: 0.00031588267302140594, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.51568150520325\n",
      "EVALUATION with 100 samples -> Loss: 8063331.0, CrossEntropy: 1.2856439352035522, Accuracy: 0.7962691638843783\n",
      "Iter 524 / 2000, Loss: 117518.90716911765, CrossEntropy: 0.0003142823406960815, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.449939489364624\n",
      "EVALUATION with 100 samples -> Loss: 8096532.5, CrossEntropy: 1.2710886001586914, Accuracy: 0.7988298112728288\n",
      "Iter 525 / 2000, Loss: 117920.0951686381, CrossEntropy: 0.000323424901580438, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.44137978553772\n",
      "EVALUATION with 100 samples -> Loss: 8070921.0, CrossEntropy: 1.3053860664367676, Accuracy: 0.7956806890282502\n",
      "Iter 526 / 2000, Loss: 118471.79565617007, CrossEntropy: 0.0003360779955983162, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.409323930740356\n",
      "EVALUATION with 100 samples -> Loss: 8081516.0, CrossEntropy: 1.2782737016677856, Accuracy: 0.7960699816387027\n",
      "Iter 527 / 2000, Loss: 117645.07866448209, CrossEntropy: 0.000327275920426473, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.50858402252197\n",
      "EVALUATION with 100 samples -> Loss: 8090600.5, CrossEntropy: 1.2805147171020508, Accuracy: 0.7967955485089767\n",
      "Iter 528 / 2000, Loss: 116886.88265265345, CrossEntropy: 0.00031852806569077075, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.526647090911865\n",
      "EVALUATION with 100 samples -> Loss: 8074950.5, CrossEntropy: 1.2635432481765747, Accuracy: 0.7969331294323252\n",
      "Iter 529 / 2000, Loss: 116484.3026294757, CrossEntropy: 0.00031591110746376216, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.52719044685364\n",
      "EVALUATION with 100 samples -> Loss: 8076617.0, CrossEntropy: 1.2778918743133545, Accuracy: 0.7983275547465778\n",
      "Iter 530 / 2000, Loss: 114504.49418558185, CrossEntropy: 0.00028907530941069126, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.51448369026184\n",
      "EVALUATION with 100 samples -> Loss: 8063627.5, CrossEntropy: 1.2749600410461426, Accuracy: 0.796374400818209\n",
      "Iter 531 / 2000, Loss: 114976.03886269181, CrossEntropy: 0.0003006019105669111, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.66475868225098\n",
      "EVALUATION with 100 samples -> Loss: 8088302.5, CrossEntropy: 1.272389531135559, Accuracy: 0.7973885553651713\n",
      "Iter 532 / 2000, Loss: 115581.06158088235, CrossEntropy: 0.00031356647377833724, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.508657693862915\n",
      "EVALUATION with 100 samples -> Loss: 8087344.0, CrossEntropy: 1.2810947895050049, Accuracy: 0.7964357208326511\n",
      "Iter 533 / 2000, Loss: 116085.70270540281, CrossEntropy: 0.0003251435700803995, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.49463415145874\n",
      "EVALUATION with 100 samples -> Loss: 8097883.5, CrossEntropy: 1.2928465604782104, Accuracy: 0.7948571479399049\n",
      "Iter 534 / 2000, Loss: 115323.06905370844, CrossEntropy: 0.00031712252530269325, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.4918315410614\n",
      "EVALUATION with 100 samples -> Loss: 8097134.5, CrossEntropy: 1.2886711359024048, Accuracy: 0.7940960113984668\n",
      "Iter 535 / 2000, Loss: 114275.04297874041, CrossEntropy: 0.00030454641091637313, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.55250024795532\n",
      "EVALUATION with 100 samples -> Loss: 8095551.5, CrossEntropy: 1.2819126844406128, Accuracy: 0.796513396867582\n",
      "Iter 536 / 2000, Loss: 115040.53858296036, CrossEntropy: 0.00032050738809630275, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.5696496963501\n",
      "EVALUATION with 100 samples -> Loss: 8141789.5, CrossEntropy: 1.2846590280532837, Accuracy: 0.7961421814553943\n",
      "Iter 537 / 2000, Loss: 114589.99346627238, CrossEntropy: 0.00031663323170505464, Accuracy: 1.0\n",
      "Elapsed time for the training: 56.75987482070923\n",
      "EVALUATION with 100 samples -> Loss: 8090784.0, CrossEntropy: 1.2725528478622437, Accuracy: 0.7950418691379714\n",
      "Iter 538 / 2000, Loss: 112933.74730258952, CrossEntropy: 0.0002950143243651837, Accuracy: 1.0\n",
      "Elapsed time for the training: 55.99039649963379\n",
      "EVALUATION with 100 samples -> Loss: 8126246.5, CrossEntropy: 1.2874729633331299, Accuracy: 0.7937759388522313\n",
      "Iter 539 / 2000, Loss: 114647.3169557225, CrossEntropy: 0.0003253282338846475, Accuracy: 1.0\n",
      "Elapsed time for the training: 56.005249977111816\n",
      "EVALUATION with 100 samples -> Loss: 8120986.0, CrossEntropy: 1.3043153285980225, Accuracy: 0.7963079466163363\n",
      "Iter 540 / 2000, Loss: 113400.33296035805, CrossEntropy: 0.0003091873659286648, Accuracy: 1.0\n",
      "Elapsed time for the training: 56.17170166969299\n",
      "EVALUATION with 100 samples -> Loss: 8119737.5, CrossEntropy: 1.268730878829956, Accuracy: 0.7984315074920569\n",
      "Iter 541 / 2000, Loss: 112112.88702845268, CrossEntropy: 0.0002926039742305875, Accuracy: 1.0\n",
      "Elapsed time for the training: 56.261736154556274\n",
      "EVALUATION with 100 samples -> Loss: 8113143.5, CrossEntropy: 1.2744779586791992, Accuracy: 0.7973987501578574\n",
      "Iter 542 / 2000, Loss: 113196.78328804347, CrossEntropy: 0.0003139865875709802, Accuracy: 1.0\n",
      "Elapsed time for the training: 56.236154556274414\n",
      "EVALUATION with 100 samples -> Loss: 8087841.5, CrossEntropy: 1.2784202098846436, Accuracy: 0.7976898868665976\n",
      "Iter 543 / 2000, Loss: 113729.95156649617, CrossEntropy: 0.00032537602237425745, Accuracy: 1.0\n",
      "Elapsed time for the training: 56.198328256607056\n",
      "EVALUATION with 100 samples -> Loss: 8103451.0, CrossEntropy: 1.277816891670227, Accuracy: 0.799029139679417\n",
      "Iter 544 / 2000, Loss: 111313.97360533888, CrossEntropy: 0.0002915644727181643, Accuracy: 1.0\n",
      "Elapsed time for the training: 56.17784380912781\n",
      "EVALUATION with 100 samples -> Loss: 8094905.0, CrossEntropy: 1.2853705883026123, Accuracy: 0.7976210718731036\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Iter 545 / 2000, Loss: 111937.50907129156, CrossEntropy: 0.00030498410342261195, Accuracy: 1.0\n",
      "Elapsed time for the training: 56.18852376937866\n",
      "EVALUATION with 100 samples -> Loss: 8142090.5, CrossEntropy: 1.2961968183517456, Accuracy: 0.7936140983952117\n",
      "Iter 546 / 2000, Loss: 111157.805127078, CrossEntropy: 0.00029642609297297895, Accuracy: 1.0\n",
      "Elapsed time for the training: 56.2428617477417\n",
      "EVALUATION with 100 samples -> Loss: 8091014.0, CrossEntropy: 1.2894670963287354, Accuracy: 0.796370526522305\n",
      "Iter 547 / 2000, Loss: 111826.31719549233, CrossEntropy: 0.0003106690710410476, Accuracy: 0.9999950047953964\n",
      "Elapsed time for the training: 56.213802099227905\n",
      "EVALUATION with 100 samples -> Loss: 8100388.5, CrossEntropy: 1.276473879814148, Accuracy: 0.7975995760538032\n",
      "Iter 548 / 2000, Loss: 111865.60681745525, CrossEntropy: 0.00031461816979572177, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.33604431152344\n",
      "EVALUATION with 100 samples -> Loss: 8113571.0, CrossEntropy: 1.2859653234481812, Accuracy: 0.7960248287781816\n",
      "Iter 549 / 2000, Loss: 110537.328564578, CrossEntropy: 0.000297547405352816, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.53710961341858\n",
      "EVALUATION with 100 samples -> Loss: 8112407.5, CrossEntropy: 1.2897077798843384, Accuracy: 0.7962234075367447\n",
      "Iter 550 / 2000, Loss: 110525.06745524297, CrossEntropy: 0.0003005205071531236, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.519404888153076\n",
      "EVALUATION with 100 samples -> Loss: 8109660.5, CrossEntropy: 1.2752454280853271, Accuracy: 0.7972122051213651\n",
      "Iter 551 / 2000, Loss: 109501.40964673914, CrossEntropy: 0.00028817044221796095, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.420602321624756\n",
      "EVALUATION with 100 samples -> Loss: 8094866.0, CrossEntropy: 1.2771121263504028, Accuracy: 0.7978106751388463\n",
      "Iter 552 / 2000, Loss: 110016.56425831202, CrossEntropy: 0.00029997280216775835, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.5700364112854\n",
      "EVALUATION with 100 samples -> Loss: 8114515.5, CrossEntropy: 1.27596914768219, Accuracy: 0.7963616334842245\n",
      "Iter 553 / 2000, Loss: 111132.03360773658, CrossEntropy: 0.0003209465357940644, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.57417130470276\n",
      "EVALUATION with 100 samples -> Loss: 8126415.0, CrossEntropy: 1.2788763046264648, Accuracy: 0.7970128353318364\n",
      "Iter 554 / 2000, Loss: 109476.9074488491, CrossEntropy: 0.00029854694730602205, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.74585247039795\n",
      "EVALUATION with 100 samples -> Loss: 8145996.5, CrossEntropy: 1.286922574043274, Accuracy: 0.7966265163008198\n",
      "Iter 555 / 2000, Loss: 108588.08787563938, CrossEntropy: 0.00028886168729513884, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.72805452346802\n",
      "EVALUATION with 100 samples -> Loss: 8132000.5, CrossEntropy: 1.2860732078552246, Accuracy: 0.7955755614935293\n",
      "Iter 556 / 2000, Loss: 110143.7124560422, CrossEntropy: 0.000317022146191448, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.45812368392944\n",
      "EVALUATION with 100 samples -> Loss: 8107113.5, CrossEntropy: 1.2950388193130493, Accuracy: 0.7949299717126965\n",
      "Iter 557 / 2000, Loss: 109980.5427589514, CrossEntropy: 0.000317202415317297, Accuracy: 0.9999975023976982\n",
      "Elapsed time for the training: 57.673784255981445\n",
      "EVALUATION with 100 samples -> Loss: 8157875.0, CrossEntropy: 1.2861216068267822, Accuracy: 0.7972051113422821\n",
      "Iter 558 / 2000, Loss: 108080.02947170717, CrossEntropy: 0.0002903918211814016, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.65155339241028\n",
      "EVALUATION with 100 samples -> Loss: 8132798.0, CrossEntropy: 1.286724328994751, Accuracy: 0.7963319241194893\n",
      "Iter 559 / 2000, Loss: 107163.85244165601, CrossEntropy: 0.0002792564919218421, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.75119090080261\n",
      "EVALUATION with 100 samples -> Loss: 8132434.0, CrossEntropy: 1.2823715209960938, Accuracy: 0.7976500360428064\n",
      "Iter 560 / 2000, Loss: 107153.40784846547, CrossEntropy: 0.0002830434241332114, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.82800483703613\n",
      "EVALUATION with 100 samples -> Loss: 8142270.0, CrossEntropy: 1.2907403707504272, Accuracy: 0.7965976926517448\n",
      "Iter 561 / 2000, Loss: 107959.6830442775, CrossEntropy: 0.00029895122861489654, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.56930589675903\n",
      "EVALUATION with 100 samples -> Loss: 8171326.5, CrossEntropy: 1.2788715362548828, Accuracy: 0.7986746195290992\n",
      "Iter 562 / 2000, Loss: 107450.2853660486, CrossEntropy: 0.0002947241300716996, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.76731729507446\n",
      "EVALUATION with 100 samples -> Loss: 8106330.0, CrossEntropy: 1.278952717781067, Accuracy: 0.7958480234004812\n",
      "Iter 563 / 2000, Loss: 108010.3113011509, CrossEntropy: 0.0003074447449762374, Accuracy: 1.0\n",
      "Elapsed time for the training: 58.01344966888428\n",
      "EVALUATION with 100 samples -> Loss: 8140379.5, CrossEntropy: 1.302718162536621, Accuracy: 0.7955140692988075\n",
      "Iter 564 / 2000, Loss: 105410.7693214514, CrossEntropy: 0.00027058718842454255, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.81201720237732\n",
      "EVALUATION with 100 samples -> Loss: 8181898.5, CrossEntropy: 1.290676236152649, Accuracy: 0.7962106052979362\n",
      "Iter 565 / 2000, Loss: 106449.57506793478, CrossEntropy: 0.0002897105587180704, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.74657344818115\n",
      "EVALUATION with 100 samples -> Loss: 8147743.0, CrossEntropy: 1.2927284240722656, Accuracy: 0.7960033648907783\n",
      "Iter 566 / 2000, Loss: 106344.58252078005, CrossEntropy: 0.00029148918110877275, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.6330783367157\n",
      "EVALUATION with 100 samples -> Loss: 8164500.5, CrossEntropy: 1.2860196828842163, Accuracy: 0.7980112088001061\n",
      "Iter 567 / 2000, Loss: 106287.4508471867, CrossEntropy: 0.0002937189128715545, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.638572216033936\n",
      "EVALUATION with 100 samples -> Loss: 8169196.0, CrossEntropy: 1.2884143590927124, Accuracy: 0.7960156435424928\n",
      "Iter 568 / 2000, Loss: 105066.93564178389, CrossEntropy: 0.0002781558723654598, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.788204193115234\n",
      "EVALUATION with 100 samples -> Loss: 8191742.5, CrossEntropy: 1.2961565256118774, Accuracy: 0.7985618499426864\n",
      "Iter 569 / 2000, Loss: 105016.44261508952, CrossEntropy: 0.00028046013903804123, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.65820002555847\n",
      "EVALUATION with 100 samples -> Loss: 8159052.5, CrossEntropy: 1.283715844154358, Accuracy: 0.7976286732808797\n",
      "Iter 570 / 2000, Loss: 104942.6876198849, CrossEntropy: 0.00028366578044369817, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.61875796318054\n",
      "EVALUATION with 100 samples -> Loss: 8091848.5, CrossEntropy: 1.2797479629516602, Accuracy: 0.7981030884087135\n",
      "Iter 571 / 2000, Loss: 104236.40902733376, CrossEntropy: 0.00027575765852816403, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.66231083869934\n",
      "EVALUATION with 100 samples -> Loss: 8149288.5, CrossEntropy: 1.2879211902618408, Accuracy: 0.7968668887114424\n",
      "Iter 572 / 2000, Loss: 105235.13373161765, CrossEntropy: 0.0002946837921626866, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.45117473602295\n",
      "EVALUATION with 100 samples -> Loss: 8176517.0, CrossEntropy: 1.278862714767456, Accuracy: 0.7985202570073799\n",
      "Iter 573 / 2000, Loss: 104053.84037324169, CrossEntropy: 0.0002793583844322711, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.825878381729126\n",
      "EVALUATION with 100 samples -> Loss: 8191502.0, CrossEntropy: 1.3006304502487183, Accuracy: 0.7956527055717988\n",
      "Iter 574 / 2000, Loss: 104331.97790121484, CrossEntropy: 0.0002875769278034568, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.751357078552246\n",
      "EVALUATION with 100 samples -> Loss: 8170011.0, CrossEntropy: 1.3064494132995605, Accuracy: 0.7960189664144719\n",
      "Iter 575 / 2000, Loss: 103564.47164721867, CrossEntropy: 0.00027903608861379325, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.73584723472595\n",
      "EVALUATION with 100 samples -> Loss: 8153670.5, CrossEntropy: 1.2863500118255615, Accuracy: 0.7953605573960666\n",
      "Iter 576 / 2000, Loss: 104589.85847586318, CrossEntropy: 0.00029778099269606173, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.72370219230652\n",
      "EVALUATION with 100 samples -> Loss: 8183089.0, CrossEntropy: 1.2847881317138672, Accuracy: 0.7969691753848657\n",
      "Iter 577 / 2000, Loss: 104174.15728900256, CrossEntropy: 0.0002957558899652213, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.631298780441284\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with 100 samples -> Loss: 8172531.0, CrossEntropy: 1.2771730422973633, Accuracy: 0.7979985583275488\n",
      "Iter 578 / 2000, Loss: 103892.31297953964, CrossEntropy: 0.0002937951358035207, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.65084147453308\n",
      "EVALUATION with 100 samples -> Loss: 8180606.0, CrossEntropy: 1.2966228723526, Accuracy: 0.7942522782299755\n",
      "Iter 579 / 2000, Loss: 102306.73998960998, CrossEntropy: 0.0002719803305808455, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.85409450531006\n",
      "EVALUATION with 100 samples -> Loss: 8220351.5, CrossEntropy: 1.2904384136199951, Accuracy: 0.7990916553377575\n",
      "Iter 580 / 2000, Loss: 102365.3459279092, CrossEntropy: 0.00027630088152363896, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.7393524646759\n",
      "EVALUATION with 100 samples -> Loss: 8185568.5, CrossEntropy: 1.307913064956665, Accuracy: 0.7962715002906556\n",
      "Iter 581 / 2000, Loss: 102242.11281170076, CrossEntropy: 0.00027758453506976366, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.72859597206116\n",
      "EVALUATION with 100 samples -> Loss: 8211245.0, CrossEntropy: 1.2850314378738403, Accuracy: 0.7982329908001445\n",
      "Iter 582 / 2000, Loss: 103516.58024296675, CrossEntropy: 0.000300998508464545, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.66475486755371\n",
      "EVALUATION with 100 samples -> Loss: 8198118.5, CrossEntropy: 1.287148118019104, Accuracy: 0.7970183778307067\n",
      "Iter 583 / 2000, Loss: 101086.96954923273, CrossEntropy: 0.0002666535147000104, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.717480421066284\n",
      "EVALUATION with 100 samples -> Loss: 8161620.5, CrossEntropy: 1.284072995185852, Accuracy: 0.7975915695789072\n",
      "Iter 584 / 2000, Loss: 102563.4542039642, CrossEntropy: 0.00029270476079545915, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.913984060287476\n",
      "EVALUATION with 100 samples -> Loss: 8182278.5, CrossEntropy: 1.2856487035751343, Accuracy: 0.7986738051029714\n",
      "Iter 585 / 2000, Loss: 101432.75627397698, CrossEntropy: 0.0002782515366561711, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.89463996887207\n",
      "EVALUATION with 100 samples -> Loss: 8187516.0, CrossEntropy: 1.2941430807113647, Accuracy: 0.7975872571644926\n",
      "Iter 586 / 2000, Loss: 101628.0532688619, CrossEntropy: 0.00028472021222114563, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.83042788505554\n",
      "EVALUATION with 100 samples -> Loss: 8196328.0, CrossEntropy: 1.2978904247283936, Accuracy: 0.7985693139241165\n",
      "Iter 587 / 2000, Loss: 100478.22902014067, CrossEntropy: 0.00026954090571962297, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.765456676483154\n",
      "EVALUATION with 100 samples -> Loss: 8233718.5, CrossEntropy: 1.311519742012024, Accuracy: 0.7971556747310726\n",
      "Iter 588 / 2000, Loss: 100933.9757033248, CrossEntropy: 0.0002809040597639978, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.68643283843994\n",
      "EVALUATION with 100 samples -> Loss: 8217753.5, CrossEntropy: 1.2902663946151733, Accuracy: 0.7987943439598065\n",
      "Iter 589 / 2000, Loss: 100925.070752078, CrossEntropy: 0.0002833577455021441, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.726945638656616\n",
      "EVALUATION with 100 samples -> Loss: 8250329.5, CrossEntropy: 1.3030054569244385, Accuracy: 0.7975912931680832\n",
      "Iter 590 / 2000, Loss: 99512.79225943095, CrossEntropy: 0.0002647722722031176, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.8588125705719\n",
      "EVALUATION with 100 samples -> Loss: 8227009.0, CrossEntropy: 1.3045225143432617, Accuracy: 0.7980648518642508\n",
      "Iter 591 / 2000, Loss: 99678.87140345268, CrossEntropy: 0.0002702107303775847, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.6130108833313\n",
      "EVALUATION with 100 samples -> Loss: 8224495.0, CrossEntropy: 1.2891614437103271, Accuracy: 0.7991589269532605\n",
      "Iter 592 / 2000, Loss: 98890.30041160485, CrossEntropy: 0.0002610981755424291, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.57831859588623\n",
      "EVALUATION with 100 samples -> Loss: 8218246.5, CrossEntropy: 1.2993922233581543, Accuracy: 0.7968396805001385\n",
      "Iter 593 / 2000, Loss: 98754.74900095908, CrossEntropy: 0.0002624996122904122, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.54340744018555\n",
      "EVALUATION with 100 samples -> Loss: 8273312.5, CrossEntropy: 1.3110592365264893, Accuracy: 0.7946784024520954\n",
      "Iter 594 / 2000, Loss: 98703.10827605499, CrossEntropy: 0.00026472308672964573, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.83195400238037\n",
      "EVALUATION with 100 samples -> Loss: 8217399.5, CrossEntropy: 1.2974679470062256, Accuracy: 0.7967250453307368\n",
      "Iter 595 / 2000, Loss: 98863.98533407929, CrossEntropy: 0.0002703106147237122, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.72441387176514\n",
      "EVALUATION with 100 samples -> Loss: 8229907.0, CrossEntropy: 1.3092308044433594, Accuracy: 0.7954584675365568\n",
      "Iter 596 / 2000, Loss: 98357.29623561382, CrossEntropy: 0.0002658890443854034, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.743831157684326\n",
      "EVALUATION with 100 samples -> Loss: 8214430.0, CrossEntropy: 1.3009123802185059, Accuracy: 0.7951706128397248\n",
      "Iter 597 / 2000, Loss: 98265.23467471228, CrossEntropy: 0.00026719991001300514, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.67992377281189\n",
      "EVALUATION with 100 samples -> Loss: 8242464.5, CrossEntropy: 1.2969290018081665, Accuracy: 0.7963684764709554\n",
      "Iter 598 / 2000, Loss: 97219.32542758952, CrossEntropy: 0.00025515741435810924, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.48582315444946\n",
      "EVALUATION with 100 samples -> Loss: 8213275.0, CrossEntropy: 1.3110542297363281, Accuracy: 0.7949146440024024\n",
      "Iter 599 / 2000, Loss: 97646.89356218031, CrossEntropy: 0.00026651049847714603, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.610779762268066\n",
      "EVALUATION with 100 samples -> Loss: 8228880.5, CrossEntropy: 1.2881828546524048, Accuracy: 0.7993293639121123\n",
      "At iteration 600 we change the dropout rate from 0.1 to 0.2. \n",
      "Iter 600 / 2000, Loss: 97018.3941016624, CrossEntropy: 0.00025715853553265333, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.70844650268555\n",
      "EVALUATION with 100 samples -> Loss: 8246148.5, CrossEntropy: 1.3277769088745117, Accuracy: 0.7932644929967609\n",
      "Iter 601 / 2000, Loss: 96451.85809622762, CrossEntropy: 0.0002517580287531018, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.73688459396362\n",
      "EVALUATION with 100 samples -> Loss: 8230478.0, CrossEntropy: 1.3010971546173096, Accuracy: 0.7938006481209618\n",
      "Iter 602 / 2000, Loss: 97173.37961556905, CrossEntropy: 0.0002663654740899801, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.80921649932861\n",
      "EVALUATION with 100 samples -> Loss: 8193512.0, CrossEntropy: 1.2943459749221802, Accuracy: 0.7947281556368017\n",
      "Iter 603 / 2000, Loss: 96728.81443813938, CrossEntropy: 0.0002625567140057683, Accuracy: 1.0\n",
      "Elapsed time for the training: 57.58340668678284\n",
      "EVALUATION with 100 samples -> Loss: 8217056.5, CrossEntropy: 1.2877001762390137, Accuracy: 0.7978565948738199\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-9-8805e173e4ab>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     40\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     41\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 42\u001b[0;31m             loss, y_pred,_ = lvi_model.training_step(\n\u001b[0m\u001b[1;32m     43\u001b[0m                 \u001b[0mbatch\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mimages\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     44\u001b[0m                 \u001b[0mN\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mN\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/Langevin_Variational_Inference/Deep_Nets/CNNs/CIFAR10/src/components.py\u001b[0m in \u001b[0;36mtraining_step\u001b[0;34m(self, batch, N, vi_batch_size, deterministic_weights)\u001b[0m\n\u001b[1;32m    417\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend_chains\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparameter_groups_updated\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msample_dict\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"parameter_groups_updated\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    418\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 419\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY_hat\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample_dict\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    420\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    421\u001b[0m     def evaluate(\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "#for lvi\n",
    "\n",
    "from datetime import datetime\n",
    "import time\n",
    "\n",
    "num_epochs = 2000\n",
    "criterion = torch.nn.CrossEntropyLoss()  # loss function\n",
    "total_acc = []\n",
    "# start = time.time()\n",
    "for i in range(num_epochs):\n",
    "    losses = []\n",
    "    cross_losses = []\n",
    "    accuracy = []\n",
    "    \n",
    "    if (i+1) % 100 == 0:\n",
    "        print(\"At iteration %d we change the dropout rate from %.1f to %.1f. \" %(i+1, lvi_model.dropout_prob, lvi_model.dropout_prob+0.1))\n",
    "        lvi_model.change_dropout_rate(lvi_model.dropout_prob+0.1)\n",
    "    \n",
    "    start = time.time()\n",
    "    \n",
    "    for images, labels in trainloader:\n",
    "        inner_cross_losses = []\n",
    "        inner_accuracy = []\n",
    "        \n",
    "        # Flatten MNIST images into a 784 long vector\n",
    "#         images = images.view(images.shape[0], -1)\n",
    "        \n",
    "        if i < 1:\n",
    "            loss, y_pred,_ = lvi_model.training_step(\n",
    "                batch=(images, labels),\n",
    "                N=N,\n",
    "                deterministic_weights=True,\n",
    "                vi_batch_size=1,\n",
    "            )\n",
    "            losses.append(loss)\n",
    "        \n",
    "            cross_loss = criterion(y_pred.squeeze(0), labels)\n",
    "            cross_losses.append(cross_loss)\n",
    "            accuracy.append((torch.max(y_pred.squeeze(0),-1).indices == labels).sum().item() / labels.size(0))\n",
    "            \n",
    "        else:\n",
    "            loss, y_pred,_ = lvi_model.training_step(\n",
    "                batch=(images, labels),\n",
    "                N=N,\n",
    "                deterministic_weights=False,\n",
    "                vi_batch_size=8,\n",
    "            ) \n",
    "\n",
    "            losses.append(loss)\n",
    "\n",
    "            for j in range(y_pred.shape[0]):\n",
    "                cross_loss = criterion(y_pred.squeeze(0)[j], labels)\n",
    "                inner_cross_losses.append(cross_loss)\n",
    "                inner_accuracy.append((torch.max(y_pred.squeeze(0)[j],-1).indices == labels).sum().item() / labels.size(0))\n",
    "\n",
    "            accuracy.append(sum(inner_accuracy)/len(inner_accuracy))\n",
    "            cross_losses.append(sum(inner_cross_losses)/len(inner_cross_losses))\n",
    "\n",
    "#     if (i+1) % 10**math.floor(math.log10(i+1)) == 0:  # True when i+1 \\in {1, 2, ..., 10, 20, ..., 100, 200, ..., 1000, 2000, ...}\n",
    "    print(\"Iter {} / {}, Loss: {}, CrossEntropy: {}, Accuracy: {}\".format(i+1, num_epochs, sum(losses)/len(losses), sum(cross_losses)/len(cross_losses), sum(accuracy)/len(accuracy)))\n",
    "\n",
    "    end = time.time()\n",
    "    print('Elapsed time for the training:', end - start)\n",
    "    \n",
    "    tmp_acc = evaluation(lvi_model, testloader)\n",
    "    total_acc.append(tmp_acc)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.save('sghmc_d1_lr_2_acc.npy',total_acc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "W Shape is: (5000, 291376)\n"
     ]
    }
   ],
   "source": [
    "chains = []\n",
    "for name, chain in lvi_model.get_chains().items():\n",
    "    chains.append(chain.view(chain.shape[0], -1).detach().cpu())\n",
    "w = torch.cat(chains, dim=-1).numpy()\n",
    "print(\"W Shape is:\",w.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "from src.convergence_criteria import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Mean IAC for chain: 825.7818480935349\n"
     ]
    }
   ],
   "source": [
    "iac_time = calculate_IAC(w)\n",
    "ess_time = calculate_ESS(w)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# torch.save(lvi_model.state_dict(), \"./cnn_svhn_non_lvi.pt\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EVALUATION with 100 samples -> Loss: 11035387.0, CrossEntropy: 3.316812038421631, Accuracy: 0.8987949346405288\n"
     ]
    }
   ],
   "source": [
    "N = len(valset)\n",
    "losses = []\n",
    "cross_losses = []\n",
    "accuracy = []\n",
    "\n",
    "for images, labels in testloader:\n",
    "    inner_cross_losses = []\n",
    "    inner_accuracy = []\n",
    "\n",
    "    # Flatten MNIST images into a 784 long vector\n",
    "#     images = images.view(images.shape[0], -1)\n",
    "\n",
    "    loss, y_pred = lvi_model.evaluate(batch=(images, labels),\n",
    "                N=N,\n",
    "                num_samples=100,\n",
    "                deterministic_weights=False)\n",
    "\n",
    "    losses.append(loss)\n",
    "    for j in range(y_pred.shape[0]):\n",
    "        cross_loss = criterion(y_pred.squeeze(0)[j], labels)\n",
    "        inner_cross_losses.append(cross_loss)\n",
    "        inner_accuracy.append((torch.max(y_pred.squeeze(0)[j],-1).indices == labels).sum().item() / labels.size(0))\n",
    "\n",
    "        accuracy.append(sum(inner_accuracy)/len(inner_accuracy))\n",
    "        cross_losses.append(sum(inner_cross_losses)/len(inner_cross_losses))\n",
    "\n",
    "print(\"EVALUATION with 100 samples -> Loss: {}, CrossEntropy: {}, Accuracy: {}\".format(sum(losses)/len(losses), sum(cross_losses)/len(cross_losses), sum(accuracy)/len(accuracy)))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(10,10))\n",
    "plt.plot(range(len(mses)), mses)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lvi_model.use_dropout, lvi_model.num_samples_per_group"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "import pandas as pd\n",
    "sns.set_style(\"white\")\n",
    "\n",
    "#to_plot = sgld_model.tensor_dict.W_2.theta_chains.view(sgld_model.chain_length, -1).cpu().detach().numpy()\n",
    "#to_plot = lvi_model.tensor_dict.W_2.theta_chains.view(lvi_model.chain_length, -1).cpu().detach().numpy()\n",
    "to_plot = lvi_model.get_chains()[\"W_0\"].squeeze().cpu().detach().numpy()\n",
    "\n",
    "g = sns.PairGrid(pd.DataFrame(to_plot))\n",
    "g.map_diag(plt.hist, bins=100)\n",
    "\n",
    "def pairgrid_heatmap(x, y, **kws):\n",
    "    cmap = sns.light_palette(kws.pop(\"color\"), as_cmap=True)\n",
    "    plt.hist2d(x, y, cmap=cmap, cmin=1, **kws)\n",
    "\n",
    "g.map_offdiag(pairgrid_heatmap, bins=100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "shaping = lvi_model.get_chains()[\"W_0\"].squeeze().cpu().detach().numpy().shape[0]\n",
    "w = pd.DataFrame(lvi_model.get_chains()[\"W_0\"].squeeze().cpu().detach().view(shaping, -1).numpy())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "w.to_csv('non_lvi_drop_10_batch_500.csv',header=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
