{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "from torch.utils.data import DataLoader\n",
    "import numpy as np\n",
    "import sys\n",
    "import operator\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "from functools import reduce\n",
    "from functools import partial\n",
    "from neuralop.datasets.tensor_dataset import TensorDataset \n",
    "from neuralop.models import FNO\n",
    "\n",
    "from neuralop import Trainer\n",
    "from neuralop.training import OutputEncoderCallback, SimpleTensorBoardLoggerCallback\n",
    "\n",
    "from neuralop.utils import count_params\n",
    "from neuralop import LpLoss, H1Loss\n",
    "\n",
    "device = 'cuda'\n",
    "\n",
    "activate_cuda = True\n",
    "device = torch.device(\n",
    "    \"cuda\" if activate_cuda and torch.cuda.is_available() else \"cpu\")\n",
    "\n",
    "data = np.load('./data/burgers_ut.npy', allow_pickle=True)\n",
    "if isinstance(data, np.ndarray):\n",
    "    data = data.item()\n",
    "# data = torch.load('./data/burgers_ut.pt')\n",
    "# U, U_x, U_xx, U_t --stack-> [U, U_x, U_xx, U_t]: (C, Nx, Nt) --permute-> (Nx * Nt, C)\n",
    "# data_prep: U :[Nx, Nt] -> [Nt-1, 1, Nx]; target: [Nt-1, 1, Nx]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "inputs = torch.tensor(data['x']).unsqueeze(1)\n",
    "target = torch.tensor(data['y']).unsqueeze(1)\n",
    "\n",
    "dataset = TensorDataset(inputs, target)\n",
    "train_loader = DataLoader(dataset, batch_size=32, shuffle=True)\n",
    "test_loaders = {'test': DataLoader(dataset, batch_size=32, shuffle=False)}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Our FNO1D model has 1248257 parameters.\n"
     ]
    }
   ],
   "source": [
    "model_name = \"FNO1D\"\n",
    "# We create a tensorized FNO model.\n",
    "\n",
    "n_modes=101\n",
    "\n",
    "model = FNO(in_channels=1, n_modes=(n_modes, ), hidden_channels=64, \n",
    "             projection_channels=32, factorization=None, \n",
    "             channel_mixing='', \n",
    "             n_layers=3,\n",
    "             fno_skip='soft-gating'\n",
    "            #  stabilizer='tanh', rank=0.42\n",
    "             )\n",
    "\n",
    "# model = Net1d(100, 3)\n",
    "\n",
    "model = model.to(device)\n",
    "# encoder = encoder.to(device)\n",
    "\n",
    "n_params = count_params(model)\n",
    "print(f'\\nOur '+model_name+f' model has {n_params} parameters.')\n",
    "\n",
    "sys.stdout.flush()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "### MODEL ###\n",
      " FNO(\n",
      "  (fno_blocks): FNOBlocks1(\n",
      "    (convs): SpectralConv(\n",
      "      (weight): ModuleList(\n",
      "        (0-2): 3 x ComplexDenseTensor(shape=torch.Size([64, 64, 50]), rank=None)\n",
      "      )\n",
      "    )\n",
      "    (fno_skips): ModuleList(\n",
      "      (0-2): 3 x SoftGating()\n",
      "    )\n",
      "  )\n",
      "  (lifting): MLP(\n",
      "    (fcs): ModuleList(\n",
      "      (0): Conv1d(1, 256, kernel_size=(1,), stride=(1,))\n",
      "      (1): Conv1d(256, 64, kernel_size=(1,), stride=(1,))\n",
      "    )\n",
      "  )\n",
      "  (projection): MLP(\n",
      "    (fcs): ModuleList(\n",
      "      (0): Conv1d(64, 32, kernel_size=(1,), stride=(1,))\n",
      "      (1): Conv1d(32, 1, kernel_size=(1,), stride=(1,))\n",
      "    )\n",
      "  )\n",
      ")\n",
      "\n",
      "### OPTIMIZER ###\n",
      " Adam (\n",
      "Parameter Group 0\n",
      "    amsgrad: False\n",
      "    betas: (0.9, 0.999)\n",
      "    capturable: False\n",
      "    differentiable: False\n",
      "    eps: 1e-08\n",
      "    foreach: None\n",
      "    fused: None\n",
      "    initial_lr: 0.1\n",
      "    lr: 0.1\n",
      "    maximize: False\n",
      "    weight_decay: 0.0\n",
      ")\n",
      "\n",
      "### SCHEDULER ###\n",
      " <torch.optim.lr_scheduler.StepLR object at 0x74394007e980>\n",
      "\n",
      "### LOSSES ###\n",
      "\n",
      " * Train: <neuralop.training.losses.H1Loss object at 0x74394007d1e0>\n",
      "\n",
      " * Test: {'h1': <neuralop.training.losses.H1Loss object at 0x74394007d1e0>, 'l2': <neuralop.training.losses.LpLoss object at 0x74394007ca90>}\n"
     ]
    }
   ],
   "source": [
    "#Create the optimizer\n",
    "optimizer = torch.optim.Adam(model.parameters(), \n",
    "                                lr=0.1, \n",
    "                                weight_decay=0.)\n",
    "scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.5)\n",
    "\n",
    "# Creating the losses\n",
    "l2loss = LpLoss(d=2, p=2)\n",
    "h1loss = H1Loss(d=2)\n",
    "\n",
    "train_loss = h1loss\n",
    "eval_losses={'h1': h1loss, 'l2': l2loss}\n",
    "\n",
    "print('\\n### MODEL ###\\n', model)\n",
    "print('\\n### OPTIMIZER ###\\n', optimizer)\n",
    "print('\\n### SCHEDULER ###\\n', scheduler)\n",
    "print('\\n### LOSSES ###')\n",
    "print(f'\\n * Train: {train_loss}')\n",
    "print(f'\\n * Test: {eval_losses}')\n",
    "sys.stdout.flush()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import time\n",
    "localtime = time.localtime(time.time())\n",
    "time_now = f\"{localtime.tm_mon}-{localtime.tm_mday}-{localtime.tm_hour}-{localtime.tm_min}\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-05-22 00:26:38.294569: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
      "2024-05-22 00:26:38.312720: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
      "2024-05-22 00:26:38.312744: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
      "2024-05-22 00:26:38.313425: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
      "2024-05-22 00:26:38.317252: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
      "To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
      "2024-05-22 00:26:38.703784: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "using standard method to load data to device.\n",
      "using standard method to compute loss.\n",
      "self.override_load_to_device=False\n",
      "self.overrides_loss=False\n"
     ]
    }
   ],
   "source": [
    "trainer = Trainer(model=model, n_epochs=500,\n",
    "                  device=device,\n",
    "                  callbacks=[\n",
    "                            #  OutputEncoderCallback(encoder), \n",
    "                             SimpleTensorBoardLoggerCallback(log_dir='runs/Burgers_prod_simple_'+model_name+time_now,)\n",
    "                            ],\n",
    "                  wandb_log=False,\n",
    "                  log_test_interval=1,\n",
    "                  use_distributed=False,\n",
    "                  verbose=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Training on 2048 samples\n",
      "Testing on [2048] samples         on resolutions ['test'].\n",
      "Raw outputs of size out.shape=torch.Size([32, 1, 1024])\n",
      "[0] time=0.52, avg_loss=0.0552, train_err=0.0135, test_h1=0.8416, test_l2=0.6258\n",
      "[1] time=0.15, avg_loss=0.0396, train_err=0.0097, test_h1=0.6238, test_l2=0.1042\n",
      "[2] time=0.18, avg_loss=0.0397, train_err=0.0097, test_h1=0.6107, test_l2=0.0753\n",
      "[3] time=0.15, avg_loss=0.0374, train_err=0.0091, test_h1=0.6056, test_l2=0.0585\n",
      "[4] time=0.15, avg_loss=0.0385, train_err=0.0094, test_h1=0.6035, test_l2=0.0520\n",
      "[5] time=0.14, avg_loss=0.0390, train_err=0.0095, test_h1=0.6025, test_l2=0.0481\n",
      "[6] time=0.14, avg_loss=0.0388, train_err=0.0095, test_h1=0.6016, test_l2=0.0451\n",
      "[7] time=0.14, avg_loss=0.0391, train_err=0.0095, test_h1=0.6010, test_l2=0.0456\n",
      "[8] time=0.14, avg_loss=0.0390, train_err=0.0095, test_h1=0.6004, test_l2=0.0396\n",
      "[9] time=0.14, avg_loss=0.0383, train_err=0.0094, test_h1=0.6001, test_l2=0.0450\n",
      "[10] time=0.14, avg_loss=0.0385, train_err=0.0094, test_h1=0.5996, test_l2=0.0369\n",
      "[11] time=0.14, avg_loss=0.0387, train_err=0.0094, test_h1=0.5994, test_l2=0.0361\n",
      "[12] time=0.14, avg_loss=0.0389, train_err=0.0095, test_h1=0.5994, test_l2=0.0394\n",
      "[13] time=0.14, avg_loss=0.0367, train_err=0.0090, test_h1=0.5988, test_l2=0.0337\n",
      "[14] time=0.14, avg_loss=0.0376, train_err=0.0092, test_h1=0.5985, test_l2=0.0379\n",
      "[15] time=0.14, avg_loss=0.0385, train_err=0.0094, test_h1=0.5981, test_l2=0.0338\n",
      "[16] time=0.14, avg_loss=0.0388, train_err=0.0095, test_h1=0.5984, test_l2=0.0345\n",
      "[17] time=0.14, avg_loss=0.0386, train_err=0.0094, test_h1=0.5986, test_l2=0.0616\n",
      "[18] time=0.15, avg_loss=0.0372, train_err=0.0091, test_h1=0.5979, test_l2=0.0441\n",
      "[19] time=0.14, avg_loss=0.0387, train_err=0.0095, test_h1=0.5980, test_l2=0.0450\n",
      "[20] time=0.15, avg_loss=0.0375, train_err=0.0091, test_h1=0.5982, test_l2=0.0613\n",
      "[21] time=0.15, avg_loss=0.0375, train_err=0.0091, test_h1=0.5975, test_l2=0.0306\n",
      "[22] time=0.15, avg_loss=0.0383, train_err=0.0093, test_h1=0.5975, test_l2=0.0540\n",
      "[23] time=0.14, avg_loss=0.0371, train_err=0.0091, test_h1=0.5985, test_l2=0.0924\n",
      "[24] time=0.14, avg_loss=0.0397, train_err=0.0097, test_h1=0.5970, test_l2=0.0291\n",
      "[25] time=0.14, avg_loss=0.0386, train_err=0.0094, test_h1=0.5976, test_l2=0.0752\n",
      "[26] time=0.14, avg_loss=0.0373, train_err=0.0091, test_h1=0.5977, test_l2=0.0859\n",
      "[27] time=0.16, avg_loss=0.0375, train_err=0.0092, test_h1=0.5971, test_l2=0.0382\n",
      "[28] time=0.14, avg_loss=0.0380, train_err=0.0093, test_h1=0.5968, test_l2=0.0488\n",
      "[29] time=0.16, avg_loss=0.0387, train_err=0.0094, test_h1=0.5965, test_l2=0.0309\n",
      "[30] time=0.15, avg_loss=0.0374, train_err=0.0091, test_h1=0.6003, test_l2=0.1536\n",
      "[31] time=0.15, avg_loss=0.0382, train_err=0.0093, test_h1=0.5962, test_l2=0.0327\n",
      "[32] time=0.14, avg_loss=0.0380, train_err=0.0093, test_h1=0.5973, test_l2=0.0849\n",
      "[33] time=0.15, avg_loss=0.0364, train_err=0.0089, test_h1=0.5971, test_l2=0.0771\n",
      "[34] time=0.15, avg_loss=0.0388, train_err=0.0095, test_h1=0.5977, test_l2=0.0939\n",
      "[35] time=0.14, avg_loss=0.0381, train_err=0.0093, test_h1=0.5962, test_l2=0.0453\n",
      "[36] time=0.14, avg_loss=0.0377, train_err=0.0092, test_h1=0.5960, test_l2=0.0315\n",
      "[37] time=0.15, avg_loss=0.0380, train_err=0.0093, test_h1=0.5958, test_l2=0.0289\n",
      "[38] time=0.15, avg_loss=0.0398, train_err=0.0097, test_h1=0.5973, test_l2=0.0917\n",
      "[39] time=0.17, avg_loss=0.0387, train_err=0.0095, test_h1=0.5958, test_l2=0.0265\n",
      "[40] time=0.15, avg_loss=0.0382, train_err=0.0093, test_h1=0.5958, test_l2=0.0455\n",
      "[41] time=0.15, avg_loss=0.0379, train_err=0.0092, test_h1=0.5956, test_l2=0.0253\n",
      "[42] time=0.14, avg_loss=0.0376, train_err=0.0092, test_h1=0.5959, test_l2=0.0505\n",
      "[43] time=0.15, avg_loss=0.0389, train_err=0.0095, test_h1=0.5960, test_l2=0.0480\n",
      "[44] time=0.15, avg_loss=0.0385, train_err=0.0094, test_h1=0.5966, test_l2=0.0775\n",
      "[45] time=0.15, avg_loss=0.0385, train_err=0.0094, test_h1=0.5973, test_l2=0.1006\n",
      "[46] time=0.15, avg_loss=0.0398, train_err=0.0097, test_h1=0.5958, test_l2=0.0390\n",
      "[47] time=0.15, avg_loss=0.0391, train_err=0.0095, test_h1=0.5955, test_l2=0.0297\n",
      "[48] time=0.15, avg_loss=0.0377, train_err=0.0092, test_h1=0.5968, test_l2=0.0896\n",
      "[49] time=0.14, avg_loss=0.0389, train_err=0.0095, test_h1=0.5956, test_l2=0.0339\n",
      "[50] time=0.15, avg_loss=0.0404, train_err=0.0099, test_h1=0.5954, test_l2=0.0270\n",
      "[51] time=0.15, avg_loss=0.0385, train_err=0.0094, test_h1=0.5991, test_l2=0.1453\n",
      "[52] time=0.15, avg_loss=0.0387, train_err=0.0095, test_h1=0.5955, test_l2=0.0304\n",
      "[53] time=0.15, avg_loss=0.0372, train_err=0.0091, test_h1=0.5954, test_l2=0.0302\n",
      "[54] time=0.15, avg_loss=0.0386, train_err=0.0094, test_h1=0.5952, test_l2=0.0309\n",
      "[55] time=0.15, avg_loss=0.0391, train_err=0.0095, test_h1=0.5983, test_l2=0.1221\n",
      "[56] time=0.15, avg_loss=0.0380, train_err=0.0093, test_h1=0.5955, test_l2=0.0561\n",
      "[57] time=0.14, avg_loss=0.0379, train_err=0.0093, test_h1=0.5957, test_l2=0.0629\n",
      "[58] time=0.14, avg_loss=0.0380, train_err=0.0093, test_h1=0.5954, test_l2=0.0515\n",
      "[59] time=0.15, avg_loss=0.0388, train_err=0.0095, test_h1=0.5953, test_l2=0.0303\n",
      "[60] time=0.15, avg_loss=0.0380, train_err=0.0093, test_h1=0.5952, test_l2=0.0280\n",
      "[61] time=0.15, avg_loss=0.0396, train_err=0.0097, test_h1=0.5958, test_l2=0.0579\n",
      "[62] time=0.15, avg_loss=0.0387, train_err=0.0095, test_h1=0.5955, test_l2=0.0528\n",
      "[63] time=0.15, avg_loss=0.0380, train_err=0.0093, test_h1=0.5952, test_l2=0.0315\n",
      "[64] time=0.15, avg_loss=0.0379, train_err=0.0093, test_h1=0.5953, test_l2=0.0385\n",
      "[65] time=0.15, avg_loss=0.0380, train_err=0.0093, test_h1=0.5955, test_l2=0.0428\n",
      "[66] time=0.15, avg_loss=0.0374, train_err=0.0091, test_h1=0.5951, test_l2=0.0260\n",
      "[67] time=0.14, avg_loss=0.0405, train_err=0.0099, test_h1=0.5955, test_l2=0.0601\n",
      "[68] time=0.14, avg_loss=0.0394, train_err=0.0096, test_h1=0.5972, test_l2=0.1138\n",
      "[69] time=0.16, avg_loss=0.0373, train_err=0.0091, test_h1=0.5987, test_l2=0.1387\n",
      "[70] time=0.14, avg_loss=0.0374, train_err=0.0091, test_h1=0.5968, test_l2=0.0890\n",
      "[71] time=0.14, avg_loss=0.0390, train_err=0.0095, test_h1=0.5960, test_l2=0.0635\n",
      "[72] time=0.14, avg_loss=0.0373, train_err=0.0091, test_h1=0.5947, test_l2=0.0297\n",
      "[73] time=0.14, avg_loss=0.0366, train_err=0.0089, test_h1=0.5947, test_l2=0.0242\n",
      "[74] time=0.14, avg_loss=0.0370, train_err=0.0090, test_h1=0.6010, test_l2=0.1672\n",
      "[75] time=0.14, avg_loss=0.0377, train_err=0.0092, test_h1=0.5951, test_l2=0.0386\n",
      "[76] time=0.14, avg_loss=0.0381, train_err=0.0093, test_h1=0.5972, test_l2=0.1154\n",
      "[77] time=0.14, avg_loss=0.0379, train_err=0.0092, test_h1=0.5950, test_l2=0.0507\n",
      "[78] time=0.14, avg_loss=0.0378, train_err=0.0092, test_h1=0.5950, test_l2=0.0355\n",
      "[79] time=0.14, avg_loss=0.0382, train_err=0.0093, test_h1=0.5950, test_l2=0.0337\n",
      "[80] time=0.14, avg_loss=0.0378, train_err=0.0092, test_h1=0.5949, test_l2=0.0303\n",
      "[81] time=0.14, avg_loss=0.0379, train_err=0.0092, test_h1=0.5948, test_l2=0.0339\n",
      "[82] time=0.14, avg_loss=0.0366, train_err=0.0089, test_h1=0.5947, test_l2=0.0311\n",
      "[83] time=0.14, avg_loss=0.0370, train_err=0.0090, test_h1=0.5959, test_l2=0.0545\n",
      "[84] time=0.14, avg_loss=0.0373, train_err=0.0091, test_h1=0.5948, test_l2=0.0289\n",
      "[85] time=0.14, avg_loss=0.0397, train_err=0.0097, test_h1=0.5944, test_l2=0.0281\n",
      "[86] time=0.16, avg_loss=0.0375, train_err=0.0092, test_h1=0.5947, test_l2=0.0343\n",
      "[87] time=0.14, avg_loss=0.0376, train_err=0.0092, test_h1=0.5969, test_l2=0.1107\n",
      "[88] time=0.14, avg_loss=0.0390, train_err=0.0095, test_h1=0.5961, test_l2=0.0788\n",
      "[89] time=0.14, avg_loss=0.0373, train_err=0.0091, test_h1=0.5954, test_l2=0.0616\n",
      "[90] time=0.14, avg_loss=0.0387, train_err=0.0094, test_h1=0.5945, test_l2=0.0243\n",
      "[91] time=0.14, avg_loss=0.0377, train_err=0.0092, test_h1=0.5943, test_l2=0.0251\n",
      "[92] time=0.14, avg_loss=0.0387, train_err=0.0094, test_h1=0.5993, test_l2=0.1473\n",
      "[93] time=0.15, avg_loss=0.0394, train_err=0.0096, test_h1=0.5944, test_l2=0.0249\n",
      "[94] time=0.15, avg_loss=0.0388, train_err=0.0095, test_h1=0.5944, test_l2=0.0362\n",
      "[95] time=0.15, avg_loss=0.0395, train_err=0.0097, test_h1=0.5946, test_l2=0.0266\n",
      "[96] time=0.14, avg_loss=0.0393, train_err=0.0096, test_h1=0.5945, test_l2=0.0238\n",
      "[97] time=0.14, avg_loss=0.0385, train_err=0.0094, test_h1=0.5954, test_l2=0.0722\n",
      "[98] time=0.15, avg_loss=0.0360, train_err=0.0088, test_h1=0.5945, test_l2=0.0338\n",
      "[99] time=0.15, avg_loss=0.0381, train_err=0.0093, test_h1=0.5971, test_l2=0.0426\n",
      "[100] time=0.15, avg_loss=0.0382, train_err=0.0093, test_h1=0.5936, test_l2=0.0213\n",
      "[101] time=0.15, avg_loss=0.0382, train_err=0.0093, test_h1=0.5936, test_l2=0.0211\n",
      "[102] time=0.15, avg_loss=0.0374, train_err=0.0091, test_h1=0.5936, test_l2=0.0215\n",
      "[103] time=0.14, avg_loss=0.0386, train_err=0.0094, test_h1=0.5936, test_l2=0.0208\n",
      "[104] time=0.15, avg_loss=0.0383, train_err=0.0093, test_h1=0.5936, test_l2=0.0294\n",
      "[105] time=0.15, avg_loss=0.0389, train_err=0.0095, test_h1=0.5937, test_l2=0.0216\n",
      "[106] time=0.14, avg_loss=0.0379, train_err=0.0093, test_h1=0.5935, test_l2=0.0222\n",
      "[107] time=0.14, avg_loss=0.0379, train_err=0.0093, test_h1=0.5937, test_l2=0.0273\n",
      "[108] time=0.14, avg_loss=0.0383, train_err=0.0093, test_h1=0.5938, test_l2=0.0243\n",
      "[109] time=0.14, avg_loss=0.0384, train_err=0.0094, test_h1=0.5935, test_l2=0.0214\n",
      "[110] time=0.15, avg_loss=0.0377, train_err=0.0092, test_h1=0.5936, test_l2=0.0282\n",
      "[111] time=0.15, avg_loss=0.0381, train_err=0.0093, test_h1=0.5934, test_l2=0.0206\n",
      "[112] time=0.15, avg_loss=0.0393, train_err=0.0096, test_h1=0.5933, test_l2=0.0212\n",
      "[113] time=0.15, avg_loss=0.0383, train_err=0.0094, test_h1=0.5939, test_l2=0.0250\n",
      "[114] time=0.15, avg_loss=0.0390, train_err=0.0095, test_h1=0.5934, test_l2=0.0224\n",
      "[115] time=0.16, avg_loss=0.0361, train_err=0.0088, test_h1=0.5938, test_l2=0.0257\n",
      "[116] time=0.15, avg_loss=0.0358, train_err=0.0087, test_h1=0.5939, test_l2=0.0234\n",
      "[117] time=0.15, avg_loss=0.0379, train_err=0.0092, test_h1=0.5939, test_l2=0.0398\n",
      "[118] time=0.14, avg_loss=0.0382, train_err=0.0093, test_h1=0.5935, test_l2=0.0234\n",
      "[119] time=0.16, avg_loss=0.0380, train_err=0.0093, test_h1=0.5935, test_l2=0.0219\n",
      "[120] time=0.15, avg_loss=0.0378, train_err=0.0092, test_h1=0.5939, test_l2=0.0293\n",
      "[121] time=0.15, avg_loss=0.0370, train_err=0.0090, test_h1=0.5938, test_l2=0.0305\n",
      "[122] time=0.15, avg_loss=0.0375, train_err=0.0092, test_h1=0.5938, test_l2=0.0324\n",
      "[123] time=0.14, avg_loss=0.0376, train_err=0.0092, test_h1=0.5946, test_l2=0.0276\n",
      "[124] time=0.14, avg_loss=0.0369, train_err=0.0090, test_h1=0.5957, test_l2=0.0476\n",
      "[125] time=0.14, avg_loss=0.0381, train_err=0.0093, test_h1=0.5936, test_l2=0.0276\n",
      "[126] time=0.15, avg_loss=0.0379, train_err=0.0093, test_h1=0.5937, test_l2=0.0246\n",
      "[127] time=0.14, avg_loss=0.0387, train_err=0.0094, test_h1=0.5935, test_l2=0.0288\n",
      "[128] time=0.15, avg_loss=26.6046, train_err=6.4953, test_h1=46.1617, test_l2=30.5203\n",
      "[129] time=0.15, avg_loss=0.0577, train_err=0.0141, test_h1=0.8958, test_l2=0.3091\n",
      "[130] time=0.14, avg_loss=0.0567, train_err=0.0138, test_h1=0.8953, test_l2=0.2999\n",
      "[131] time=0.14, avg_loss=0.0571, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[132] time=0.14, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[133] time=0.14, avg_loss=0.0574, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[134] time=0.14, avg_loss=0.0564, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[135] time=0.14, avg_loss=0.0572, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[136] time=0.14, avg_loss=0.0578, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[137] time=0.14, avg_loss=0.0577, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[138] time=0.14, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[139] time=0.14, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[140] time=0.14, avg_loss=0.0571, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[141] time=0.14, avg_loss=0.0565, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[142] time=0.14, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[143] time=0.14, avg_loss=0.0575, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[144] time=0.15, avg_loss=0.0581, train_err=0.0142, test_h1=0.8953, test_l2=0.2998\n",
      "[145] time=0.15, avg_loss=0.0570, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[146] time=0.15, avg_loss=0.0572, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[147] time=0.15, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[148] time=0.15, avg_loss=0.0578, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[149] time=0.15, avg_loss=0.0571, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[150] time=0.14, avg_loss=0.0560, train_err=0.0137, test_h1=0.8953, test_l2=0.2998\n",
      "[151] time=0.14, avg_loss=0.0574, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[152] time=0.14, avg_loss=0.0578, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[153] time=0.15, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[154] time=0.14, avg_loss=0.0578, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[155] time=0.15, avg_loss=0.0562, train_err=0.0137, test_h1=0.8953, test_l2=0.2998\n",
      "[156] time=0.14, avg_loss=0.0568, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[157] time=0.14, avg_loss=0.0578, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[158] time=0.14, avg_loss=0.0584, train_err=0.0143, test_h1=0.8953, test_l2=0.2998\n",
      "[159] time=0.14, avg_loss=0.0576, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[160] time=0.14, avg_loss=0.0575, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[161] time=0.14, avg_loss=0.0578, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[162] time=0.14, avg_loss=0.0570, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[163] time=0.14, avg_loss=0.0574, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[164] time=0.14, avg_loss=0.0570, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[165] time=0.14, avg_loss=0.0572, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[166] time=0.14, avg_loss=0.0575, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[167] time=0.14, avg_loss=0.0580, train_err=0.0142, test_h1=0.8953, test_l2=0.2998\n",
      "[168] time=0.14, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[169] time=0.14, avg_loss=0.0578, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[170] time=0.15, avg_loss=0.0576, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[171] time=0.14, avg_loss=0.0570, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[172] time=0.14, avg_loss=0.0579, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[173] time=0.14, avg_loss=0.0574, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[174] time=0.14, avg_loss=0.0582, train_err=0.0142, test_h1=0.8953, test_l2=0.2998\n",
      "[175] time=0.14, avg_loss=0.0566, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[176] time=0.14, avg_loss=0.0577, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[177] time=0.14, avg_loss=0.0578, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[178] time=0.14, avg_loss=0.0572, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[179] time=0.14, avg_loss=0.0581, train_err=0.0142, test_h1=0.8953, test_l2=0.2998\n",
      "[180] time=0.14, avg_loss=0.0568, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[181] time=0.14, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[182] time=0.14, avg_loss=0.0570, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[183] time=0.14, avg_loss=0.0575, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[184] time=0.14, avg_loss=0.0572, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[185] time=0.14, avg_loss=0.0572, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[186] time=0.14, avg_loss=0.0585, train_err=0.0143, test_h1=0.8953, test_l2=0.2998\n",
      "[187] time=0.14, avg_loss=0.0585, train_err=0.0143, test_h1=0.8953, test_l2=0.2998\n",
      "[188] time=0.14, avg_loss=0.0581, train_err=0.0142, test_h1=0.8953, test_l2=0.2998\n",
      "[189] time=0.14, avg_loss=0.0574, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[190] time=0.14, avg_loss=0.0574, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[191] time=0.14, avg_loss=0.0567, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[192] time=0.14, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[193] time=0.14, avg_loss=0.0580, train_err=0.0142, test_h1=0.8953, test_l2=0.2998\n",
      "[194] time=0.14, avg_loss=0.0571, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[195] time=0.14, avg_loss=0.0580, train_err=0.0142, test_h1=0.8953, test_l2=0.2998\n",
      "[196] time=0.15, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[197] time=0.14, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[198] time=0.14, avg_loss=0.0566, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[199] time=0.14, avg_loss=0.0569, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[200] time=0.14, avg_loss=0.0566, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[201] time=0.14, avg_loss=0.0575, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[202] time=0.14, avg_loss=0.0568, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[203] time=0.15, avg_loss=0.0575, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[204] time=0.14, avg_loss=0.0570, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[205] time=0.14, avg_loss=0.0574, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[206] time=0.14, avg_loss=0.0582, train_err=0.0142, test_h1=0.8953, test_l2=0.2998\n",
      "[207] time=0.14, avg_loss=0.0565, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[208] time=0.14, avg_loss=0.0571, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[209] time=0.14, avg_loss=0.0575, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[210] time=0.14, avg_loss=0.0570, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[211] time=0.17, avg_loss=0.0574, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[212] time=0.15, avg_loss=0.0565, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[213] time=0.14, avg_loss=0.0562, train_err=0.0137, test_h1=0.8953, test_l2=0.2998\n",
      "[214] time=0.14, avg_loss=0.0576, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[215] time=0.14, avg_loss=0.0572, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[216] time=0.14, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[217] time=0.14, avg_loss=0.0582, train_err=0.0142, test_h1=0.8953, test_l2=0.2998\n",
      "[218] time=0.14, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[219] time=0.15, avg_loss=0.0557, train_err=0.0136, test_h1=0.8953, test_l2=0.2998\n",
      "[220] time=0.15, avg_loss=0.0568, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[221] time=0.14, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[222] time=0.14, avg_loss=0.0565, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[223] time=0.14, avg_loss=0.0575, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[224] time=0.15, avg_loss=0.0571, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[225] time=0.14, avg_loss=0.0579, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[226] time=0.14, avg_loss=0.0572, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[227] time=0.15, avg_loss=0.0571, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[228] time=0.15, avg_loss=0.0588, train_err=0.0143, test_h1=0.8953, test_l2=0.2998\n",
      "[229] time=0.14, avg_loss=0.0576, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[230] time=0.14, avg_loss=0.0571, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[231] time=0.14, avg_loss=0.0582, train_err=0.0142, test_h1=0.8953, test_l2=0.2998\n",
      "[232] time=0.14, avg_loss=0.0565, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[233] time=0.14, avg_loss=0.0574, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[234] time=0.14, avg_loss=0.0568, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[235] time=0.14, avg_loss=0.0572, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[236] time=0.14, avg_loss=0.0572, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[237] time=0.14, avg_loss=0.0577, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[238] time=0.14, avg_loss=0.0570, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[239] time=0.14, avg_loss=0.0572, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[240] time=0.14, avg_loss=0.0577, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[241] time=0.14, avg_loss=0.0575, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[242] time=0.16, avg_loss=0.0569, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[243] time=0.14, avg_loss=0.0569, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[244] time=0.14, avg_loss=0.0578, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[245] time=0.14, avg_loss=0.0565, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[246] time=0.14, avg_loss=0.0578, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[247] time=0.14, avg_loss=0.0577, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[248] time=0.14, avg_loss=0.0576, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[249] time=0.14, avg_loss=0.0577, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[250] time=0.14, avg_loss=0.0568, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[251] time=0.14, avg_loss=0.0574, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[252] time=0.14, avg_loss=0.0578, train_err=0.0141, test_h1=0.8953, test_l2=0.3001\n",
      "[253] time=0.14, avg_loss=0.0576, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[254] time=0.14, avg_loss=0.0574, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[255] time=0.14, avg_loss=0.0580, train_err=0.0142, test_h1=0.8953, test_l2=0.2999\n",
      "[256] time=0.14, avg_loss=0.0576, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[257] time=0.14, avg_loss=0.0583, train_err=0.0142, test_h1=0.8953, test_l2=0.2998\n",
      "[258] time=0.14, avg_loss=0.0567, train_err=0.0138, test_h1=0.8953, test_l2=0.2999\n",
      "[259] time=0.14, avg_loss=0.0577, train_err=0.0141, test_h1=0.8953, test_l2=0.2999\n",
      "[260] time=0.14, avg_loss=0.0563, train_err=0.0138, test_h1=0.8953, test_l2=0.2999\n",
      "[261] time=0.16, avg_loss=0.0570, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[262] time=0.14, avg_loss=0.0569, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[263] time=0.14, avg_loss=0.0577, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[264] time=0.15, avg_loss=0.0567, train_err=0.0138, test_h1=0.8953, test_l2=0.3001\n",
      "[265] time=0.14, avg_loss=0.0583, train_err=0.0142, test_h1=0.8953, test_l2=0.2998\n",
      "[266] time=0.14, avg_loss=0.0571, train_err=0.0140, test_h1=0.8953, test_l2=0.3001\n",
      "[267] time=0.14, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2999\n",
      "[268] time=0.15, avg_loss=0.0576, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[269] time=0.15, avg_loss=0.0571, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[270] time=0.14, avg_loss=0.0574, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[271] time=0.15, avg_loss=0.0579, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[272] time=0.15, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[273] time=0.15, avg_loss=0.0581, train_err=0.0142, test_h1=0.8953, test_l2=0.2998\n",
      "[274] time=0.15, avg_loss=0.0567, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[275] time=0.14, avg_loss=0.0566, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[276] time=0.15, avg_loss=0.0579, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[277] time=0.15, avg_loss=0.0571, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[278] time=0.15, avg_loss=0.0566, train_err=0.0138, test_h1=0.8955, test_l2=0.3032\n",
      "[279] time=0.15, avg_loss=0.0572, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[280] time=0.14, avg_loss=0.0568, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[281] time=0.15, avg_loss=0.0579, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[282] time=0.14, avg_loss=0.0574, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[283] time=0.14, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[284] time=0.15, avg_loss=0.0577, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[285] time=0.15, avg_loss=0.0567, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[286] time=0.14, avg_loss=0.0562, train_err=0.0137, test_h1=0.8953, test_l2=0.3005\n",
      "[287] time=0.14, avg_loss=0.0577, train_err=0.0141, test_h1=0.8953, test_l2=0.3000\n",
      "[288] time=0.14, avg_loss=0.0575, train_err=0.0140, test_h1=0.8953, test_l2=0.2999\n",
      "[289] time=0.14, avg_loss=0.0570, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[290] time=0.14, avg_loss=0.0572, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[291] time=0.15, avg_loss=0.0578, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[292] time=0.15, avg_loss=0.0572, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[293] time=0.14, avg_loss=0.0569, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[294] time=0.14, avg_loss=0.0584, train_err=0.0143, test_h1=0.8953, test_l2=0.2998\n",
      "[295] time=0.14, avg_loss=0.0574, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[296] time=0.14, avg_loss=0.0579, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[297] time=0.15, avg_loss=0.0576, train_err=0.0141, test_h1=0.8953, test_l2=0.3003\n",
      "[298] time=0.14, avg_loss=0.0574, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[299] time=0.14, avg_loss=0.0574, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[300] time=0.15, avg_loss=0.0586, train_err=0.0143, test_h1=0.8953, test_l2=0.2998\n",
      "[301] time=0.15, avg_loss=0.0567, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[302] time=0.15, avg_loss=0.0571, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[303] time=0.15, avg_loss=0.0570, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[304] time=0.14, avg_loss=0.0565, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[305] time=0.15, avg_loss=0.0569, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[306] time=0.14, avg_loss=0.0571, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[307] time=0.14, avg_loss=0.0586, train_err=0.0143, test_h1=0.8953, test_l2=0.2998\n",
      "[308] time=0.14, avg_loss=0.0562, train_err=0.0137, test_h1=0.8953, test_l2=0.2998\n",
      "[309] time=0.14, avg_loss=0.0564, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[310] time=0.14, avg_loss=0.0577, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[311] time=0.14, avg_loss=0.0574, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[312] time=0.14, avg_loss=0.0566, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[313] time=0.14, avg_loss=0.0575, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[314] time=0.14, avg_loss=0.0577, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[315] time=0.14, avg_loss=0.0577, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[316] time=0.15, avg_loss=0.0574, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[317] time=0.15, avg_loss=0.0575, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[318] time=0.15, avg_loss=0.0576, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[319] time=0.14, avg_loss=0.0585, train_err=0.0143, test_h1=0.8953, test_l2=0.2998\n",
      "[320] time=0.14, avg_loss=0.0568, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[321] time=0.14, avg_loss=0.0572, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[322] time=0.15, avg_loss=0.0571, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[323] time=0.14, avg_loss=0.0579, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[324] time=0.14, avg_loss=0.0570, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[325] time=0.14, avg_loss=0.0577, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[326] time=0.14, avg_loss=0.0579, train_err=0.0141, test_h1=0.8953, test_l2=0.2999\n",
      "[327] time=0.14, avg_loss=0.0579, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[328] time=0.14, avg_loss=0.0576, train_err=0.0141, test_h1=0.8953, test_l2=0.2999\n",
      "[329] time=0.14, avg_loss=0.0578, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[330] time=0.14, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[331] time=0.14, avg_loss=0.0579, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[332] time=0.14, avg_loss=0.0566, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[333] time=0.14, avg_loss=0.0568, train_err=0.0139, test_h1=0.8953, test_l2=0.2999\n",
      "[334] time=0.14, avg_loss=0.0579, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[335] time=0.14, avg_loss=0.0578, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[336] time=0.14, avg_loss=0.0566, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[337] time=0.14, avg_loss=0.0574, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[338] time=0.14, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[339] time=0.14, avg_loss=0.0572, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[340] time=0.14, avg_loss=0.0579, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[341] time=0.14, avg_loss=0.0561, train_err=0.0137, test_h1=0.8953, test_l2=0.2998\n",
      "[342] time=0.15, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[343] time=0.14, avg_loss=0.0575, train_err=0.0140, test_h1=0.8953, test_l2=0.2999\n",
      "[344] time=0.14, avg_loss=0.0575, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[345] time=0.14, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[346] time=0.14, avg_loss=0.0576, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[347] time=0.14, avg_loss=0.0569, train_err=0.0139, test_h1=0.8953, test_l2=0.2999\n",
      "[348] time=0.14, avg_loss=0.0572, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[349] time=0.15, avg_loss=0.0565, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[350] time=0.15, avg_loss=0.0567, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[351] time=0.14, avg_loss=0.0569, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[352] time=0.15, avg_loss=0.0564, train_err=0.0138, test_h1=0.8953, test_l2=0.2999\n",
      "[353] time=0.15, avg_loss=0.0580, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[354] time=0.14, avg_loss=0.0568, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[355] time=0.14, avg_loss=0.0570, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[356] time=0.14, avg_loss=0.0566, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[357] time=0.14, avg_loss=0.0583, train_err=0.0142, test_h1=0.8953, test_l2=0.2999\n",
      "[358] time=0.14, avg_loss=0.0568, train_err=0.0139, test_h1=0.8953, test_l2=0.2999\n",
      "[359] time=0.15, avg_loss=0.0569, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[360] time=0.14, avg_loss=0.0569, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[361] time=0.14, avg_loss=0.0580, train_err=0.0142, test_h1=0.8953, test_l2=0.2998\n",
      "[362] time=0.15, avg_loss=0.0567, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[363] time=0.15, avg_loss=0.0581, train_err=0.0142, test_h1=0.8953, test_l2=0.2998\n",
      "[364] time=0.15, avg_loss=0.0569, train_err=0.0139, test_h1=0.8953, test_l2=0.2999\n",
      "[365] time=0.15, avg_loss=0.0574, train_err=0.0140, test_h1=0.8953, test_l2=0.2999\n",
      "[366] time=0.18, avg_loss=0.0568, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[367] time=0.14, avg_loss=0.0581, train_err=0.0142, test_h1=0.8953, test_l2=0.2998\n",
      "[368] time=0.14, avg_loss=0.0568, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[369] time=0.15, avg_loss=0.0572, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[370] time=0.15, avg_loss=0.0576, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[371] time=0.15, avg_loss=0.0563, train_err=0.0137, test_h1=0.8953, test_l2=0.2998\n",
      "[372] time=0.14, avg_loss=0.0570, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[373] time=0.15, avg_loss=0.0578, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[374] time=0.14, avg_loss=0.0572, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[375] time=0.15, avg_loss=0.0574, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[376] time=0.15, avg_loss=0.0570, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[377] time=0.14, avg_loss=0.0571, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[378] time=0.14, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[379] time=0.14, avg_loss=0.0566, train_err=0.0138, test_h1=0.8953, test_l2=0.2999\n",
      "[380] time=0.15, avg_loss=0.0577, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[381] time=0.14, avg_loss=0.0576, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[382] time=0.15, avg_loss=0.0570, train_err=0.0139, test_h1=0.8953, test_l2=0.2999\n",
      "[383] time=0.14, avg_loss=0.0578, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[384] time=0.15, avg_loss=0.0577, train_err=0.0141, test_h1=0.8953, test_l2=0.2999\n",
      "[385] time=0.14, avg_loss=0.0579, train_err=0.0141, test_h1=0.8953, test_l2=0.2999\n",
      "[386] time=0.14, avg_loss=0.0570, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[387] time=0.14, avg_loss=0.0572, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[388] time=0.15, avg_loss=0.0578, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[389] time=0.14, avg_loss=0.0578, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[390] time=0.14, avg_loss=0.0571, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[391] time=0.15, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[392] time=0.14, avg_loss=0.0572, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[393] time=0.14, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[394] time=0.14, avg_loss=0.0568, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[395] time=0.15, avg_loss=0.0577, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[396] time=0.15, avg_loss=0.0582, train_err=0.0142, test_h1=0.8953, test_l2=0.2998\n",
      "[397] time=0.14, avg_loss=0.0578, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[398] time=0.14, avg_loss=0.0576, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[399] time=0.15, avg_loss=0.0581, train_err=0.0142, test_h1=0.8953, test_l2=0.2998\n",
      "[400] time=0.14, avg_loss=0.0565, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[401] time=0.14, avg_loss=0.0578, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[402] time=0.15, avg_loss=0.0577, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[403] time=0.14, avg_loss=0.0570, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[404] time=0.14, avg_loss=0.0567, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[405] time=0.14, avg_loss=0.0571, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[406] time=0.14, avg_loss=0.0571, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[407] time=0.14, avg_loss=0.0575, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[408] time=0.14, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[409] time=0.15, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[410] time=0.15, avg_loss=0.0571, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[411] time=0.15, avg_loss=0.0576, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[412] time=0.14, avg_loss=0.0574, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[413] time=0.14, avg_loss=0.0570, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[414] time=0.14, avg_loss=0.0571, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[415] time=0.14, avg_loss=0.0574, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[416] time=0.15, avg_loss=0.0575, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[417] time=0.14, avg_loss=0.0567, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[418] time=0.14, avg_loss=0.0586, train_err=0.0143, test_h1=0.8953, test_l2=0.2998\n",
      "[419] time=0.14, avg_loss=0.0571, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[420] time=0.14, avg_loss=0.0576, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[421] time=0.14, avg_loss=0.0579, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[422] time=0.14, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[423] time=0.15, avg_loss=0.0577, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[424] time=0.14, avg_loss=0.0577, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[425] time=0.14, avg_loss=0.0567, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[426] time=0.14, avg_loss=0.0568, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[427] time=0.15, avg_loss=0.0583, train_err=0.0142, test_h1=0.8953, test_l2=0.2998\n",
      "[428] time=0.14, avg_loss=0.0568, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[429] time=0.14, avg_loss=0.0570, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[430] time=0.14, avg_loss=0.0566, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[431] time=0.14, avg_loss=0.0572, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[432] time=0.15, avg_loss=0.0572, train_err=0.0140, test_h1=0.8953, test_l2=0.2999\n",
      "[433] time=0.14, avg_loss=0.0576, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[434] time=0.14, avg_loss=0.0577, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[435] time=0.15, avg_loss=0.0566, train_err=0.0138, test_h1=0.8953, test_l2=0.2999\n",
      "[436] time=0.15, avg_loss=0.0575, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[437] time=0.14, avg_loss=0.0574, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[438] time=0.14, avg_loss=0.0577, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[439] time=0.16, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[440] time=0.14, avg_loss=0.0576, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[441] time=0.14, avg_loss=0.0574, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[442] time=0.14, avg_loss=0.0563, train_err=0.0137, test_h1=0.8953, test_l2=0.2998\n",
      "[443] time=0.15, avg_loss=0.0564, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[444] time=0.16, avg_loss=0.0568, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[445] time=0.14, avg_loss=0.0571, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[446] time=0.14, avg_loss=0.0574, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[447] time=0.14, avg_loss=0.0571, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[448] time=0.14, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[449] time=0.14, avg_loss=0.0567, train_err=0.0138, test_h1=0.8953, test_l2=0.2999\n",
      "[450] time=0.14, avg_loss=0.0574, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[451] time=0.14, avg_loss=0.0572, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[452] time=0.14, avg_loss=0.0574, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[453] time=0.14, avg_loss=0.0574, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[454] time=0.14, avg_loss=0.0576, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[455] time=0.16, avg_loss=0.0580, train_err=0.0142, test_h1=0.8953, test_l2=0.2998\n",
      "[456] time=0.14, avg_loss=0.0569, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[457] time=0.14, avg_loss=0.0566, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[458] time=0.14, avg_loss=0.0566, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[459] time=0.15, avg_loss=0.0577, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[460] time=0.14, avg_loss=0.0562, train_err=0.0137, test_h1=0.8953, test_l2=0.2998\n",
      "[461] time=0.14, avg_loss=0.0576, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[462] time=0.15, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[463] time=0.15, avg_loss=0.0571, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[464] time=0.15, avg_loss=0.0575, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[465] time=0.14, avg_loss=0.0579, train_err=0.0141, test_h1=0.8953, test_l2=0.2999\n",
      "[466] time=0.14, avg_loss=0.0570, train_err=0.0139, test_h1=0.8953, test_l2=0.2999\n",
      "[467] time=0.15, avg_loss=0.0567, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[468] time=0.14, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[469] time=0.15, avg_loss=0.0569, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[470] time=0.15, avg_loss=0.0567, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[471] time=0.15, avg_loss=0.0575, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[472] time=0.15, avg_loss=0.0577, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[473] time=0.16, avg_loss=0.0572, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[474] time=0.15, avg_loss=0.0582, train_err=0.0142, test_h1=0.8953, test_l2=0.2998\n",
      "[475] time=0.14, avg_loss=0.0582, train_err=0.0142, test_h1=0.8953, test_l2=0.2998\n",
      "[476] time=0.15, avg_loss=0.0575, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[477] time=0.14, avg_loss=0.0565, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[478] time=0.14, avg_loss=0.0564, train_err=0.0138, test_h1=0.8953, test_l2=0.2998\n",
      "[479] time=0.14, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[480] time=0.14, avg_loss=0.0571, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[481] time=0.15, avg_loss=0.0567, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[482] time=0.14, avg_loss=0.0570, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[483] time=0.14, avg_loss=0.0581, train_err=0.0142, test_h1=0.8953, test_l2=0.2998\n",
      "[484] time=0.14, avg_loss=0.0572, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[485] time=0.14, avg_loss=0.0570, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[486] time=0.14, avg_loss=0.0568, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[487] time=0.14, avg_loss=0.0574, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[488] time=0.17, avg_loss=0.0576, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[489] time=0.15, avg_loss=0.0577, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[490] time=0.15, avg_loss=0.0581, train_err=0.0142, test_h1=0.8953, test_l2=0.2998\n",
      "[491] time=0.14, avg_loss=0.0573, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[492] time=0.15, avg_loss=0.0569, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[493] time=0.15, avg_loss=0.0579, train_err=0.0141, test_h1=0.8953, test_l2=0.2998\n",
      "[494] time=0.15, avg_loss=0.0574, train_err=0.0140, test_h1=0.8953, test_l2=0.2998\n",
      "[495] time=0.14, avg_loss=0.0571, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[496] time=0.14, avg_loss=0.0568, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[497] time=0.14, avg_loss=0.0580, train_err=0.0142, test_h1=0.8953, test_l2=0.2998\n",
      "[498] time=0.14, avg_loss=0.0570, train_err=0.0139, test_h1=0.8953, test_l2=0.2998\n",
      "[499] time=0.15, avg_loss=0.0580, train_err=0.0142, test_h1=0.8953, test_l2=0.2998\n"
     ]
    }
   ],
   "source": [
    "trainer.train(train_loader=train_loader,\n",
    "              test_loaders=test_loaders,\n",
    "              optimizer=optimizer, \n",
    "              scheduler=scheduler, \n",
    "              regularizer=False, \n",
    "              training_loss=train_loss, \n",
    "              eval_losses=eval_losses)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "path = 'ckpt/Burgers/FNO_prod_simple1.pth'\n",
    "\n",
    "# Create directories if they do not exist\n",
    "dir_path = os.path.dirname(path)\n",
    "if not os.path.exists(dir_path):\n",
    "    os.makedirs(dir_path)\n",
    "\n",
    "# Save the model\n",
    "torch.save(trainer.model.state_dict(), path)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "test",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
