{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "75858edf-817e-45d3-b9af-51123484dbd2",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "#!pip install torch torchvision torchaudio\n",
    "#!pip install pytorch-lightning\n",
    "#!pip install scikit-learn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "2b32bfa2-58bd-4e4a-ae99-8d428bf92674",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/lib64/python3.6/importlib/__init__.py:126: FutureWarning: MLflow support for Python 3.6 is deprecated and will be dropped in an upcoming release. At that point, existing Python 3.6 workflows that use MLflow will continue to work without modification, but Python 3.6 users will no longer get access to the latest MLflow features and bugfixes. We recommend that you upgrade to Python 3.7 or newer.\n",
      "  return _bootstrap._gcd_import(name[level:], package, level)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The available device is : cuda:0\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import torch\n",
    "from torch import nn \n",
    "from torchvision.datasets import FashionMNIST, MNIST, EMNIST, CIFAR100\n",
    "from torchvision import transforms\n",
    "from pytorch_lightning import Trainer\n",
    "from torch.utils.data import TensorDataset, DataLoader\n",
    "from sklearn.model_selection import train_test_split\n",
    "import matplotlib.pyplot as plt\n",
    "import pytorch_lightning as pl\n",
    "from models import *\n",
    "import pandas as pd\n",
    "from sklearn.model_selection import train_test_split\n",
    "from scipy.optimize import minimize\n",
    "from joblib import Parallel, delayed\n",
    "from tqdm import tqdm\n",
    "from collections import defaultdict\n",
    "\n",
    "\n",
    "import matplotlib as mpl\n",
    "mpl.style.use(\"classic\")\n",
    "mpl.rcParams[\"figure.figsize\"] = [5, 3]\n",
    "\n",
    "mpl.rcParams[\"axes.linewidth\"] = 0.75\n",
    "mpl.rcParams[\"figure.facecolor\"] = \"w\"\n",
    "mpl.rcParams[\"grid.linewidth\"] = 0.75\n",
    "mpl.rcParams[\"lines.linewidth\"] = 0.75\n",
    "mpl.rcParams[\"patch.linewidth\"] = 0.75\n",
    "mpl.rcParams[\"xtick.major.size\"] = 3\n",
    "mpl.rcParams[\"ytick.major.size\"] = 3\n",
    "\n",
    "mpl.rcParams[\"pdf.fonttype\"] = 42\n",
    "mpl.rcParams[\"ps.fonttype\"] = 42\n",
    "mpl.rcParams[\"font.size\"] = 9\n",
    "mpl.rcParams[\"axes.titlesize\"] = \"medium\"\n",
    "mpl.rcParams[\"legend.fontsize\"] = \"medium\"\n",
    "\n",
    "\n",
    "import os\n",
    "import warnings\n",
    "warnings.filterwarnings(\"ignore\")\n",
    "\n",
    "\n",
    "# Reproducibility\n",
    "seed = 0\n",
    "np.random.seed(seed)\n",
    "torch.manual_seed(seed)\n",
    "\n",
    "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
    "print('The available device is :', device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "d1942cb8-c098-4c10-bf9d-d4aa438b3a16",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "dict_datasets = {'MNIST' : MNIST,\n",
    "                 'FashionMNIST': FashionMNIST,\n",
    "                 'EMNIST': EMNIST}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "be30e9be-b9f2-4d91-8d45-3540d8630c4d",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "dataset used is : FashionMNIST\n"
     ]
    }
   ],
   "source": [
    "name = 'FashionMNIST'\n",
    "print('dataset used is :', name)\n",
    "dataset = dict_datasets[name]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "833519a4-b3b8-484a-ae6f-7eb3a612838f",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz\n",
      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to /mnt/nfs/home/i.aouali/2024_uai/Figure 3 and 4/FashionMNIST/raw/train-images-idx3-ubyte.gz\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b56426e9c73548ddb1800a8dd7791867",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/26421880 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Extracting /mnt/nfs/home/i.aouali/2024_uai/Figure 3 and 4/FashionMNIST/raw/train-images-idx3-ubyte.gz to /mnt/nfs/home/i.aouali/2024_uai/Figure 3 and 4/FashionMNIST/raw\n",
      "\n",
      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz\n",
      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to /mnt/nfs/home/i.aouali/2024_uai/Figure 3 and 4/FashionMNIST/raw/train-labels-idx1-ubyte.gz\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8c917c3030234333b8a388d2b4e6ce8e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/29515 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Extracting /mnt/nfs/home/i.aouali/2024_uai/Figure 3 and 4/FashionMNIST/raw/train-labels-idx1-ubyte.gz to /mnt/nfs/home/i.aouali/2024_uai/Figure 3 and 4/FashionMNIST/raw\n",
      "\n",
      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz\n",
      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to /mnt/nfs/home/i.aouali/2024_uai/Figure 3 and 4/FashionMNIST/raw/t10k-images-idx3-ubyte.gz\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d7baf5672d0341fc9e99c19610b06d80",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/4422102 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Extracting /mnt/nfs/home/i.aouali/2024_uai/Figure 3 and 4/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to /mnt/nfs/home/i.aouali/2024_uai/Figure 3 and 4/FashionMNIST/raw\n",
      "\n",
      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz\n",
      "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to /mnt/nfs/home/i.aouali/2024_uai/Figure 3 and 4/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a7faace857794610922d7a25c8b0e44a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/5148 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Extracting /mnt/nfs/home/i.aouali/2024_uai/Figure 3 and 4/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to /mnt/nfs/home/i.aouali/2024_uai/Figure 3 and 4/FashionMNIST/raw\n",
      "\n"
     ]
    }
   ],
   "source": [
    "if name == 'EMNIST':\n",
    "    mnist_train = dataset(os.getcwd(), train=True, split = 'balanced', download=True, transform=transforms.ToTensor())\n",
    "    mnist_test = dataset(os.getcwd(), train=False, split = 'balanced', download=True, transform=transforms.ToTensor())\n",
    "\n",
    "else:\n",
    "    mnist_train = dataset(os.getcwd(), train=True, download=True, transform=transforms.ToTensor())\n",
    "    mnist_test = dataset(os.getcwd(), train=False, download=True, transform=transforms.ToTensor())\n",
    "        \n",
    "X_train, Y_train = torch.tensor(mnist_train.data).float().reshape(len(mnist_train.data), -1), torch.tensor(mnist_train.targets)\n",
    "X_test, Y_test = torch.tensor(mnist_test.data).float().reshape(len(mnist_test.data), -1), torch.tensor(mnist_test.targets)\n",
    "\n",
    "# rescaling the contexts\n",
    "X_train /= torch.norm(X_train, p = 2, dim = -1, keepdim=True)\n",
    "X_test /= torch.norm(X_test, p = 2, dim = -1, keepdim=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "8a7168fd-dcc4-4372-95f8-5cf3fbc3cd88",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train : dimension of X is : torch.Size([60000, 784]) dimension of Y is : 60000\n",
      "Test : dimension of X_test is : torch.Size([10000, 784]) dimension of Y is : 10000\n",
      "num_actions:  10\n"
     ]
    }
   ],
   "source": [
    "# data subsampling to learn a logging Policy\n",
    "\n",
    "x0, X_log, y0, Y_log = train_test_split(X_train, Y_train, train_size = 0.05)\n",
    "\n",
    "N = len(X_log)\n",
    "N_test = len(X_test)\n",
    "\n",
    "print('Train : dimension of X is :', X_train.shape, 'dimension of Y is :', len(X_train))\n",
    "print('Test : dimension of X_test is :', X_test.shape, 'dimension of Y is :', N_test)\n",
    "context_dim = X_log.shape[1]\n",
    "num_actions = len(np.unique(Y_log))\n",
    "print('num_actions: ', num_actions)\n",
    "\n",
    "subsample_pt = TensorDataset(x0, y0)\n",
    "subsample_dataloader = DataLoader(subsample_pt, batch_size=128, shuffle=True)\n",
    "\n",
    "# create the logging split\n",
    "logging_split = TensorDataset(X_log, Y_log)\n",
    "logging_split_dataloader = DataLoader(logging_split, batch_size=128, shuffle=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "5c26c6d8-c41e-4afb-a795-921b02621567",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "632a1b59f0c04e34a2b7ebb78aca2338",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Training a logging policy\n",
    "etas = np.round(np.linspace(-0.5, 0.5, 10), 2) # Inverse temperature parameter (the higher eta the better the performance of the logging policy)\n",
    "\n",
    "dict_results = defaultdict(list)\n",
    "epochs_logging = 5 # logging policy is trained using 10 epochs\n",
    "epochs = 20 # learning policies are trained using 20 epochs\n",
    "\n",
    "logging_policy = SupervisedPolicy(n_actions=num_actions, context_dim=context_dim, softmax = True, reg=0, device = device)\n",
    "trainer = Trainer(max_epochs=epochs_logging, gpus=1, checkpoint_callback=False, weights_summary=None, logger=None)\n",
    "trainer.fit(logging_policy, subsample_dataloader)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "21033d3b-e204-41c9-aab6-db85a19c585a",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "eta parameter :  -0.5\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 198.04it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The reward of the logging policy:  0.0016548267943784594\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 446/446 [00:00<00:00, 827.90it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "max 0.7386767864227295\n",
      "min 2.866013346647378e-05\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "01613e8270db4f5ca2619405bb4620c5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 2251.13it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.07602058255076409\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3bc069550484462bb9892be51e20544b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 249.03it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.0017916135888546706\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "657765dcf95940c2aca242168e63fd44",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 258.74it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.09801436960697174\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c993e5ac6b744b9198f6bc83abb2f09a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 645.93it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.09796389709711074\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e822c127fdd44fa68c2030cba9f3a063",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 873.75it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.002764279397204518\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a20d1ee9bf934f66a5d52fc82bed04d2",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 722.82it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.0018089630821719765\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "51a13d7e323941aaad2a8d8147059022",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 231.15it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.0032764652632176878\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d19a82005c9c48af841f89c105eae606",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 764.27it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.024896203458309174\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "da71326941c14f01bf518efbcc8ad9fc",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1106.08it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.0019891275491565464\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "901db8f5d419476e896e719a09e7530c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1526.79it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.0019703325336799023\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8a92e254331d4aa6b459b5bc29704990",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1236.64it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.0020709035787731408\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "90a03ca43f234ca68aa67135a825b451",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1141.70it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.013822342406213283\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7fc9e0aac4aa42518149599dd24efe0b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1712.31it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.001880195019207895\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a9086223d2a54280a68446b54dda49e9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1434.59it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.0022611491721123457\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2b3681c94b7c4a35a7cc7ba336f5e14b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1817.65it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.0019280875962227582\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1048a4d0781f4a5594464b1d1157b394",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1276.60it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.008395666573941708\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f5ab387d9a29412f9be79a3fb8fea6d6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1473.03it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.0018060710359364747\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "88ae5ec9383846dbb41ad2deed5fae2a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1425.44it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.08659579053521156\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fb8e56a02448479eb2d48390d94df05a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 940.01it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.001910470351949334\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c184a61270494fa1a8ceb9cb83a70982",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 155.92it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.006533479517698288\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bc94bdd90beb485abdf1657824b27b5e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:01, 43.72it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.0018881696281954647\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2005ec700f3e4073adae32a3e80ca7c3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1092.67it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.09125853109359741\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d63ddc3599894155914e579fd3c0dc3e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1269.31it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.0018377723392099142\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6bd0de9e2b0842f990509c01cf6a5c08",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1218.58it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.00455253802575171\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bdecf86753e441e28d8e4581627dcfdd",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 989.63it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.0018513796916231513\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1e89015e699846ef9ce62b53b5f6abb0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1378.68it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.09532553238868713\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "64727b5a82374edf8457b84f3b4d11ef",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1130.82it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.0018130565382540227\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "efc08267906349ba89ce7c64e556fbd8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 900.75it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.0040792821876704695\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "85b7977572ae47dc9ca5c10861bddd51",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1363.64it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.0017630580589175224\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "54f3e396aaf44af496f183631e837092",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 890.70it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.09584725234508515\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a51eebd70b3d45e3978e0918604907a1",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1880.68it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.0017185146749019622\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0f910b07384e4625a7e1631ec5a0c040",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 996.65it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.0031308522544801235\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "cb70fcaa528e49a2a8e85e339b2bb323",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1037.14it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.0017270717725157737\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6294700110ac4005ba6e9bb4f40354d5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3603.98it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.09801413385272026\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3f0dd1b5bd164cf59709bcd878a847c2",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1295.48it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.0017546431645751\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "679feec74e7246b6b1b3613af39ced7a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 423.00it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.0026465058013796804\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f7f651bf75fe4fa7ac684b654f1fc2c6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1053.93it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.0017377557314932346\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "93caa1c340f44fccb7eb329de50a3bfb",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 2415.58it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.07572916606664658\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2c915257f49a475285d12881f841d9fe",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1046.05it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.001736136451922357\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9a75e0be80c64ee98d0508e0fc3a124e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1567.69it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.0016548267943784594\n",
      "################################################################################\n",
      "defaultdict(<class 'list'>, {'eta': [-0.5], 'logging_reward': [0.0016548267943784594], 'ix_0': [0.07602058255076409], 'smoothing_0': [0.0017916135888546706], 'clipping_0': [0.09801436960697174], 'harmonic_0': [0.09796389709711074], 'ix_1': [0.002764279397204518], 'smoothing_1': [0.0018089630821719765], 'clipping_1': [0.0032764652632176878], 'harmonic_1': [0.024896203458309174], 'ix_2': [0.0019891275491565464], 'smoothing_2': [0.0019703325336799023], 'clipping_2': [0.0020709035787731408], 'harmonic_2': [0.013822342406213283], 'ix_3': [0.001880195019207895], 'smoothing_3': [0.0022611491721123457], 'clipping_3': [0.0019280875962227582], 'harmonic_3': [0.008395666573941708], 'ix_4': [0.0018060710359364747], 'smoothing_4': [0.08659579053521156], 'clipping_4': [0.001910470351949334], 'harmonic_4': [0.006533479517698288], 'ix_5': [0.0018881696281954647], 'smoothing_5': [0.09125853109359741], 'clipping_5': [0.0018377723392099142], 'harmonic_5': [0.00455253802575171], 'ix_6': [0.0018513796916231513], 'smoothing_6': [0.09532553238868713], 'clipping_6': [0.0018130565382540227], 'harmonic_6': [0.0040792821876704695], 'ix_7': [0.0017630580589175224], 'smoothing_7': [0.09584725234508515], 'clipping_7': [0.0017185146749019622], 'harmonic_7': [0.0031308522544801235], 'ix_8': [0.0017270717725157737], 'smoothing_8': [0.09801413385272026], 'clipping_8': [0.0017546431645751], 'harmonic_8': [0.0026465058013796804], 'ix_9': [0.0017377557314932346], 'smoothing_9': [0.07572916606664658], 'clipping_9': [0.001736136451922357], 'harmonic_9': [0.0016548267943784594]})\n",
      "eta parameter :  -0.39\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3070.56it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The reward of the logging policy:  0.004011169955134392\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 446/446 [00:03<00:00, 127.14it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "max 0.6379942893981934\n",
      "min 0.00021061018924228847\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6c469a519ec84246b06fe56d6fd9dce5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 894.64it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.11107554304599762\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3bbfa66cbf01441cb7d6b43a872c0ba5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1012.38it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.004898103752732277\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fdf7397c15f04ab39bc0857a5c825c46",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 2782.42it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.15997750718593598\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ccf96f153153473897bdf8a70e335a47",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1404.62it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.1492506049156189\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2d7f328844ca455e98035b87eb183a04",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1361.93it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.09274670981764793\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "37a7b41f9ba348b6834cd8a1d64a74e9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1131.05it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.00768945246860385\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ce11c239d3484b0596ad8bc29e49ea9d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3258.88it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.09048381167650223\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ba43becc09884bfeb5f061303720b1ab",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1246.18it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.07318411912918091\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ac13e472deaa47eda93717a0a8f42b17",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1205.40it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.08548013907074928\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "668b3fd542c24557967c354a87471b20",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1844.16it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.07846598596572876\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c262edba05c44bb2b13e5cb281f53117",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1068.42it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.0814400324523449\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "70b25b0947a44d1abe08cbfc974da36e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1081.43it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.0406029051065445\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1a6d362fdf584f239760927ff008ef65",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1206.02it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.0734175749361515\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "49f52363f447431e8f37039b714ac101",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1163.49it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.09423755583167076\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4cbc0e7c5fae4c8e94ecd440f3bf2a3b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1452.87it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.07978782141208648\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4a2b5f4f55fb41d6a92c0d41aa4b8202",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 990.47it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.027062873888015745\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "cd0236cf49564500932898b9db480302",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1104.57it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.07332371616959572\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e4eaf6034a374337b4a870be1bbdc6cd",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1355.20it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.09584876535534859\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "78dc0479141b49d4a26958ac117af4ce",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 979.94it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.06577393084168434\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "617be4326527406fbd6f3fa73e4b37dd",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 954.65it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.020160479152202607\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "aef1a5ee683f4c2a934a7114ab16ffce",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1192.80it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.06749454309940338\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "44b3267357064c19ae5478b5c8f49fd6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 676.23it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.09734195493459702\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "952417458a6d4c4fa07ccd66bedd2513",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 2565.70it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.06436340960860253\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9b9d36f0a5ef41b59f38242e5aa4c686",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1454.41it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.0147464190274477\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6b09e32449f34df687dbade339001cb5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1290.43it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.006424173071980476\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a5942c9137c347d8952309e772120847",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3490.65it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.09844384480118752\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c12461781ff04ba1bb8ed2fe07c2f896",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1299.87it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.006666117901355028\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "cc9610868ab0458dbcff3edac9b784be",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1112.72it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.0127955187022686\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "265517c28308431f909fd475815a0d4c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 198.65it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.005618271762877703\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0f61b65729a3458186aae9338961bf81",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:01, 47.62it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.09909229539036751\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ea6f85a7cd844fb6a44145caf74ed1a0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:01, 54.91it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.005599219207465649\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "631ff18a33414b27a7fc1de59c248eb2",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 498.51it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.010182749785482883\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "db177f8c4be64bbaab3679d202516797",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1076.45it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.005078396270424127\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "33264fb92eaa4672ae3fe72ef97fcdcb",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1545.37it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.0995167630314827\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2a553f59f08a4917871f62fd0076f9f9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1224.22it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.005111382631957531\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3ea32c28e8174958a8d49b4b10d8e2d7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1535.91it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.00698241161108017\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d1f66b290b714b8790816fb15bd4b31d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1269.09it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.005292286267876625\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9e1b2b2e103643a09e9bb94207247fe6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 799.28it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.15154042177200316\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e4c219ec0c9749d4b5b87da8493d8b12",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 826.25it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.004862686868011951\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "faca9dcc0e2f4e0cbd1948041d6a2b89",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1108.80it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.004011169977858662\n",
      "################################################################################\n",
      "defaultdict(<class 'list'>, {'eta': [-0.5, -0.39], 'logging_reward': [0.0016548267943784594, 0.004011169955134392], 'ix_0': [0.07602058255076409, 0.11107554304599762], 'smoothing_0': [0.0017916135888546706, 0.004898103752732277], 'clipping_0': [0.09801436960697174, 0.15997750718593598], 'harmonic_0': [0.09796389709711074, 0.1492506049156189], 'ix_1': [0.002764279397204518, 0.09274670981764793], 'smoothing_1': [0.0018089630821719765, 0.00768945246860385], 'clipping_1': [0.0032764652632176878, 0.09048381167650223], 'harmonic_1': [0.024896203458309174, 0.07318411912918091], 'ix_2': [0.0019891275491565464, 0.08548013907074928], 'smoothing_2': [0.0019703325336799023, 0.07846598596572876], 'clipping_2': [0.0020709035787731408, 0.0814400324523449], 'harmonic_2': [0.013822342406213283, 0.0406029051065445], 'ix_3': [0.001880195019207895, 0.0734175749361515], 'smoothing_3': [0.0022611491721123457, 0.09423755583167076], 'clipping_3': [0.0019280875962227582, 0.07978782141208648], 'harmonic_3': [0.008395666573941708, 0.027062873888015745], 'ix_4': [0.0018060710359364747, 0.07332371616959572], 'smoothing_4': [0.08659579053521156, 0.09584876535534859], 'clipping_4': [0.001910470351949334, 0.06577393084168434], 'harmonic_4': [0.006533479517698288, 0.020160479152202607], 'ix_5': [0.0018881696281954647, 0.06749454309940338], 'smoothing_5': [0.09125853109359741, 0.09734195493459702], 'clipping_5': [0.0018377723392099142, 0.06436340960860253], 'harmonic_5': [0.00455253802575171, 0.0147464190274477], 'ix_6': [0.0018513796916231513, 0.006424173071980476], 'smoothing_6': [0.09532553238868713, 0.09844384480118752], 'clipping_6': [0.0018130565382540227, 0.006666117901355028], 'harmonic_6': [0.0040792821876704695, 0.0127955187022686], 'ix_7': [0.0017630580589175224, 0.005618271762877703], 'smoothing_7': [0.09584725234508515, 0.09909229539036751], 'clipping_7': [0.0017185146749019622, 0.005599219207465649], 'harmonic_7': [0.0031308522544801235, 0.010182749785482883], 'ix_8': [0.0017270717725157737, 0.005078396270424127], 'smoothing_8': [0.09801413385272026, 0.0995167630314827], 'clipping_8': [0.0017546431645751, 0.005111382631957531], 'harmonic_8': [0.0026465058013796804, 0.00698241161108017], 'ix_9': [0.0017377557314932346, 0.005292286267876625], 'smoothing_9': [0.07572916606664658, 0.15154042177200316], 'clipping_9': [0.001736136451922357, 0.004862686868011951], 'harmonic_9': [0.0016548267943784594, 0.004011169977858662]})\n",
      "eta parameter :  -0.28\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 2999.38it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The reward of the logging policy:  0.010384154437482357\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 446/446 [00:00<00:00, 613.11it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "max 0.49770107865333557\n",
      "min 0.0014961070846766233\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d53f7411a02545768b404655692ed2c3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1039.93it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.26763565740585327\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "04f96254c1634006ac358c09abec19f6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 878.53it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.08902953031659126\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0c951df0d38942c1992233d78743eafb",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1614.93it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.2676695785522461\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "54a7f573a93a407d92c178fc3282d5b8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1308.75it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.26604757924079897\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7ddd1b71907744e9bc0753bc21b9a13b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1068.28it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.0976970940887928\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "daad84aab59b4d59b21a830305fe998b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1301.39it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.08966888378858566\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a6c30f468f354c9d96bedccc3cec86e3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1205.08it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.16116888937950136\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3f4bc8460e6a4108a6b7a02245c7868d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1543.22it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.22160015420913695\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "af567feae3644523b8980a14cc82c96e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1402.77it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.09644433104395866\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b07e88576d1d43debf0fbca1ee839969",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1220.11it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.09405719060897827\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "edc85a47a92e4568964549161bfab348",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1135.98it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.09533011321425439\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e07d1799679642b78f695a962bf67527",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1482.43it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.13727533819675444\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8b8e6dfc11c24f618c6b738d7d911ce5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3587.20it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.09577681348919868\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e0cc2ddde58c48a9a5818c36f11563d4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 984.97it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.09554980810284615\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "09edccf2b68945afa3932811f2265563",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1139.10it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.09516231610178948\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3dbb85d8fd2241b8b59517a3b886deab",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 935.15it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.0959237770318985\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "32307a2368d64165a8f0388372f8c740",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1175.00it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.0909283899128437\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "672d4f2ca0af459b937ac9f483714d88",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1201.04it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.09811607581973075\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3e93a78d46e147e6bf47a2ec6e6575b0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1325.23it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.09337690369486809\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f7b4d7e67e454bbea7beb929c6c97447",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1190.01it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.07036076868772506\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "89f39031442548cb9ffe0c4238a5369a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1213.06it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.09089929768443107\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c02ab1b49d0e4f6fa670290fe047e769",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1062.34it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.17065021793842317\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "46a4c324f33249c8b05a79ae4ee1a4b9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1300.05it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.090901534563303\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "458a06d753c44ecaa3dcd4df635e5c5c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1293.16it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.054786331856250765\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "22e7566731884347864641a751eb669e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3421.59it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.08812359779477119\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "eb663c74b36449ef9e9a544a61b205d4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1266.63it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.1834357506752014\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e2e1f35012b344379f558cca4a8a57cc",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 2138.02it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.08943644857406616\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fb5a4be3b08f45deae96525d4c5265d7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 980.33it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.04222167618274689\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "53b5c9b5f0044e299b7608cb353201e0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3334.78it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.08624420105218887\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bd3f61a3652d4be7bf2136bfaa545391",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3663.03it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.18935596718788147\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e658d2d834c64c12bcfed709478ac635",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1404.40it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.08860154327750205\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "136a47872a724c0d977e18de234071ff",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1961.36it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.03299144434332848\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "703d9d4a7c644c598815b57234c22e2e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1006.90it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.09209122355580329\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b9ca822b58bc420ab92808a0b988b6dd",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1335.75it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.2754674090385437\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4deacff6b33e401c8d027b7ad1124ce5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 578.85it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.08758228687644005\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1d2352debaf64b53bbd3807afebde8bf",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:01, 44.33it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.024612572491168976\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1a6292d2093841ce861484a8e13fe1ce",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:01, 43.08it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.08852998181581498\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8374110aacb049cbadb19db329b01294",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1288.80it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.2787570029020309\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5d2589b02818443e858e9eedfa622d0a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1350.35it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.08163812893033028\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "75f2fa1a816a47a098471ddc05fb330a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1440.89it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.010384154446423054\n",
      "################################################################################\n",
      "defaultdict(<class 'list'>, {'eta': [-0.5, -0.39, -0.28], 'logging_reward': [0.0016548267943784594, 0.004011169955134392, 0.010384154437482357], 'ix_0': [0.07602058255076409, 0.11107554304599762, 0.26763565740585327], 'smoothing_0': [0.0017916135888546706, 0.004898103752732277, 0.08902953031659126], 'clipping_0': [0.09801436960697174, 0.15997750718593598, 0.2676695785522461], 'harmonic_0': [0.09796389709711074, 0.1492506049156189, 0.26604757924079897], 'ix_1': [0.002764279397204518, 0.09274670981764793, 0.0976970940887928], 'smoothing_1': [0.0018089630821719765, 0.00768945246860385, 0.08966888378858566], 'clipping_1': [0.0032764652632176878, 0.09048381167650223, 0.16116888937950136], 'harmonic_1': [0.024896203458309174, 0.07318411912918091, 0.22160015420913695], 'ix_2': [0.0019891275491565464, 0.08548013907074928, 0.09644433104395866], 'smoothing_2': [0.0019703325336799023, 0.07846598596572876, 0.09405719060897827], 'clipping_2': [0.0020709035787731408, 0.0814400324523449, 0.09533011321425439], 'harmonic_2': [0.013822342406213283, 0.0406029051065445, 0.13727533819675444], 'ix_3': [0.001880195019207895, 0.0734175749361515, 0.09577681348919868], 'smoothing_3': [0.0022611491721123457, 0.09423755583167076, 0.09554980810284615], 'clipping_3': [0.0019280875962227582, 0.07978782141208648, 0.09516231610178948], 'harmonic_3': [0.008395666573941708, 0.027062873888015745, 0.0959237770318985], 'ix_4': [0.0018060710359364747, 0.07332371616959572, 0.0909283899128437], 'smoothing_4': [0.08659579053521156, 0.09584876535534859, 0.09811607581973075], 'clipping_4': [0.001910470351949334, 0.06577393084168434, 0.09337690369486809], 'harmonic_4': [0.006533479517698288, 0.020160479152202607, 0.07036076868772506], 'ix_5': [0.0018881696281954647, 0.06749454309940338, 0.09089929768443107], 'smoothing_5': [0.09125853109359741, 0.09734195493459702, 0.17065021793842317], 'clipping_5': [0.0018377723392099142, 0.06436340960860253, 0.090901534563303], 'harmonic_5': [0.00455253802575171, 0.0147464190274477, 0.054786331856250765], 'ix_6': [0.0018513796916231513, 0.006424173071980476, 0.08812359779477119], 'smoothing_6': [0.09532553238868713, 0.09844384480118752, 0.1834357506752014], 'clipping_6': [0.0018130565382540227, 0.006666117901355028, 0.08943644857406616], 'harmonic_6': [0.0040792821876704695, 0.0127955187022686, 0.04222167618274689], 'ix_7': [0.0017630580589175224, 0.005618271762877703, 0.08624420105218887], 'smoothing_7': [0.09584725234508515, 0.09909229539036751, 0.18935596718788147], 'clipping_7': [0.0017185146749019622, 0.005599219207465649, 0.08860154327750205], 'harmonic_7': [0.0031308522544801235, 0.010182749785482883, 0.03299144434332848], 'ix_8': [0.0017270717725157737, 0.005078396270424127, 0.09209122355580329], 'smoothing_8': [0.09801413385272026, 0.0995167630314827, 0.2754674090385437], 'clipping_8': [0.0017546431645751, 0.005111382631957531, 0.08758228687644005], 'harmonic_8': [0.0026465058013796804, 0.00698241161108017, 0.024612572491168976], 'ix_9': [0.0017377557314932346, 0.005292286267876625, 0.08852998181581498], 'smoothing_9': [0.07572916606664658, 0.15154042177200316, 0.2787570029020309], 'clipping_9': [0.001736136451922357, 0.004862686868011951, 0.08163812893033028], 'harmonic_9': [0.0016548267943784594, 0.004011169977858662, 0.010384154446423054]})\n",
      "eta parameter :  -0.17\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3152.56it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The reward of the logging policy:  0.026894244468212126\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 446/446 [00:00<00:00, 618.17it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "max 0.32439476251602173\n",
      "min 0.009410472586750984\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b40c5e1e6f9f46329db0f9e878b15457",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3049.93it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.4850407739162445\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6bc22412f9c241538ae6947f92b6d3e8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 114.18it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.15568549218177796\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5852072d8c8443bb86136bca3ebed54f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3628.01it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.48730215854644776\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "070458c79179405f80380c0b06638649",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3640.73it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.4858971398830414\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fd04f87424f74e0b9c3592db2013631e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1077.68it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.28726141662597654\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4e9cbc6599fc40ddb5e5277f9c6b1589",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3528.15it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.16728602499961853\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b8c8fb1d54d64a4fbe7ea9784a1c8be5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3689.83it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.2905903428077698\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1d1e8bfe15f84742a3213a410b5123b8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1150.86it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.4566627492904663\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f86a69b62bc34b4d9dee796790b754f7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1227.75it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.27321705346107483\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "59b329abf9db44d6b9fc8b1291365718",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 971.44it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.17426096150875092\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "abd3c48d9e3747838faeebd740e3ee3c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1196.82it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.24878031051158905\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a2ad4ae2712a4c0e997cac79eb1c81ae",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1299.51it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.3448029336452484\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "71a0f94536194328bbfe4e3b2eeb5e33",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:01, 50.49it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.17854156079292297\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b9ee2b45c2f7487298215161a965338f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3104.94it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.28599316325187685\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ad0742653258467bb5cc5126324c184b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1164.61it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.17966635494232178\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e93110a22f0e4b0b858e7333ad070f1f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1263.94it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.2612605007171631\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "94c2b931b5124870a0629fb3c84997ab",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3439.70it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.1739023714542389\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "71a7f170d42d444c8be239285e6e1671",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1167.77it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.28075808057785034\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "93e75c0d4c3c41b383d55434dcca575d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 934.68it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.1749490626335144\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8d63a93b4d6649ada61b527a0462230c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 108.54it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.208617009973526\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "80b50b23cf2f4aeca7e091b8aee1dc05",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1126.57it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.17032699229717255\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0e08db47ed63470f997eb32d2900275c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1099.85it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.3058421569824219\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1318910ff9834f99b13b7bec7602736d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1009.25it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.1715635729074478\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e4f98a5313074b35a3d7d95bd31d8d11",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1302.34it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.1651239861011505\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "74b780469c5342cbad3217b5bb1cd337",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1455.54it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.16684231171607972\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6e3708d2a52d43649cc0ecdd47a9d4e7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3614.71it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.38168789234161377\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bed4102dd582414ea71cd7d21bf92d30",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1293.56it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.16656060495376587\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fd994fa75ab1480aaa48845a7732e5e1",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1014.77it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.13167021782398225\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "22a5cd5208604d3c92a8cef4ace03ba6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3587.40it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.16324604024887085\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d6f1966e6114402ea9f5a785548e4417",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3145.05it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.4697921859741211\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a4b0e5db1dce494ebb711dcce441a76c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1598.24it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.16241064960956572\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "aeb4fa5c2d164e7e815177a97f1e1c5c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1115.71it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.10184911907911301\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "520c5718642e4dc2980750e8760c017b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1142.10it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.15787675924301148\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "241b2720c60f4e6fb1af008ab45504b7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1239.68it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.4810739656448364\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b22691e1c7ef46c09cb8b89b260e7426",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 814.17it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.16011671500205993\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0328bc158beb46af888de11c6133af81",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 2094.96it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.07426629198789597\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f2e885be44184559b5833fd4e8bce80b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1068.50it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.15532025213241576\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "335bd7b75f614e47bf2c10ac1fe6d093",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1341.30it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.48718938760757446\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "303d1f64735446339502de47ce857b15",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1150.05it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.1570905291080475\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "20d66ba5e21b4f4daa1c9e3c071e03ac",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1119.44it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.02689424432218075\n",
      "################################################################################\n",
      "defaultdict(<class 'list'>, {'eta': [-0.5, -0.39, -0.28, -0.17], 'logging_reward': [0.0016548267943784594, 0.004011169955134392, 0.010384154437482357, 0.026894244468212126], 'ix_0': [0.07602058255076409, 0.11107554304599762, 0.26763565740585327, 0.4850407739162445], 'smoothing_0': [0.0017916135888546706, 0.004898103752732277, 0.08902953031659126, 0.15568549218177796], 'clipping_0': [0.09801436960697174, 0.15997750718593598, 0.2676695785522461, 0.48730215854644776], 'harmonic_0': [0.09796389709711074, 0.1492506049156189, 0.26604757924079897, 0.4858971398830414], 'ix_1': [0.002764279397204518, 0.09274670981764793, 0.0976970940887928, 0.28726141662597654], 'smoothing_1': [0.0018089630821719765, 0.00768945246860385, 0.08966888378858566, 0.16728602499961853], 'clipping_1': [0.0032764652632176878, 0.09048381167650223, 0.16116888937950136, 0.2905903428077698], 'harmonic_1': [0.024896203458309174, 0.07318411912918091, 0.22160015420913695, 0.4566627492904663], 'ix_2': [0.0019891275491565464, 0.08548013907074928, 0.09644433104395866, 0.27321705346107483], 'smoothing_2': [0.0019703325336799023, 0.07846598596572876, 0.09405719060897827, 0.17426096150875092], 'clipping_2': [0.0020709035787731408, 0.0814400324523449, 0.09533011321425439, 0.24878031051158905], 'harmonic_2': [0.013822342406213283, 0.0406029051065445, 0.13727533819675444, 0.3448029336452484], 'ix_3': [0.001880195019207895, 0.0734175749361515, 0.09577681348919868, 0.17854156079292297], 'smoothing_3': [0.0022611491721123457, 0.09423755583167076, 0.09554980810284615, 0.28599316325187685], 'clipping_3': [0.0019280875962227582, 0.07978782141208648, 0.09516231610178948, 0.17966635494232178], 'harmonic_3': [0.008395666573941708, 0.027062873888015745, 0.0959237770318985, 0.2612605007171631], 'ix_4': [0.0018060710359364747, 0.07332371616959572, 0.0909283899128437, 0.1739023714542389], 'smoothing_4': [0.08659579053521156, 0.09584876535534859, 0.09811607581973075, 0.28075808057785034], 'clipping_4': [0.001910470351949334, 0.06577393084168434, 0.09337690369486809, 0.1749490626335144], 'harmonic_4': [0.006533479517698288, 0.020160479152202607, 0.07036076868772506, 0.208617009973526], 'ix_5': [0.0018881696281954647, 0.06749454309940338, 0.09089929768443107, 0.17032699229717255], 'smoothing_5': [0.09125853109359741, 0.09734195493459702, 0.17065021793842317, 0.3058421569824219], 'clipping_5': [0.0018377723392099142, 0.06436340960860253, 0.090901534563303, 0.1715635729074478], 'harmonic_5': [0.00455253802575171, 0.0147464190274477, 0.054786331856250765, 0.1651239861011505], 'ix_6': [0.0018513796916231513, 0.006424173071980476, 0.08812359779477119, 0.16684231171607972], 'smoothing_6': [0.09532553238868713, 0.09844384480118752, 0.1834357506752014, 0.38168789234161377], 'clipping_6': [0.0018130565382540227, 0.006666117901355028, 0.08943644857406616, 0.16656060495376587], 'harmonic_6': [0.0040792821876704695, 0.0127955187022686, 0.04222167618274689, 0.13167021782398225], 'ix_7': [0.0017630580589175224, 0.005618271762877703, 0.08624420105218887, 0.16324604024887085], 'smoothing_7': [0.09584725234508515, 0.09909229539036751, 0.18935596718788147, 0.4697921859741211], 'clipping_7': [0.0017185146749019622, 0.005599219207465649, 0.08860154327750205, 0.16241064960956572], 'harmonic_7': [0.0031308522544801235, 0.010182749785482883, 0.03299144434332848, 0.10184911907911301], 'ix_8': [0.0017270717725157737, 0.005078396270424127, 0.09209122355580329, 0.15787675924301148], 'smoothing_8': [0.09801413385272026, 0.0995167630314827, 0.2754674090385437, 0.4810739656448364], 'clipping_8': [0.0017546431645751, 0.005111382631957531, 0.08758228687644005, 0.16011671500205993], 'harmonic_8': [0.0026465058013796804, 0.00698241161108017, 0.024612572491168976, 0.07426629198789597], 'ix_9': [0.0017377557314932346, 0.005292286267876625, 0.08852998181581498, 0.15532025213241576], 'smoothing_9': [0.07572916606664658, 0.15154042177200316, 0.2787570029020309, 0.48718938760757446], 'clipping_9': [0.001736136451922357, 0.004862686868011951, 0.08163812893033028, 0.1570905291080475], 'harmonic_9': [0.0016548267943784594, 0.004011169977858662, 0.010384154446423054, 0.02689424432218075]})\n",
      "eta parameter :  -0.06\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3177.26it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The reward of the logging policy:  0.06505892275571823\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 446/446 [00:00<00:00, 710.64it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "max 0.1650695651769638\n",
      "min 0.047008052468299866\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "05aa993f6f434d2aa549598b625e6a60",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:01, 51.69it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.6929360151290893\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "412a6ce47f3740cb943feaebb885773a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1460.36it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.534009139251709\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "27eb7e0c6c9a46bdaaa37b498f46a43e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 2797.31it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.6893377709388733\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6d5d1ce818d140608921821e468e43f9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3632.43it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.6924018848419189\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9cd5248955664d6c8fee2dde72932e4a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1085.71it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.6263253059387207\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d7988e824d6d43209181d3bb5a2b2d5b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:01, 50.86it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.558178358078003\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "12dc092eb03b4113b569f45dfcfb1a04",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3349.81it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.6620609937667846\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "df7b53b750154a25af19058e8689a5c1",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1181.77it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.624744464302063\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ef02b9ea063d4867a10adc25ba2c16d4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1472.83it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.6099065008163452\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7f9e4a68b3ab47c6b1479a85b0bc3f02",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 730.19it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.5776187893867493\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3f035b5ddd234cba9e6a0004a687edb8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 822.68it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.6188219171524048\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "40bc3c652993428fb5265db7f6f156ec",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 919.86it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.5498684995651245\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "485f9695cf2e42cc8ea5efec700fa6e7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1329.09it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.5955511766433715\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "625aed3d735e4b0aaa533218eb79305d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 956.70it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.595024810218811\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e7aee63fba974b87adede994d309990b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1899.18it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.6033873224258423\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a485865b561d41c99d1283ee729e58ea",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3715.52it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.48536629514694213\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bd66f73d26f3465aa849d9de656c9131",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 735.81it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.5825763339996338\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bca068b06dce48a0b5b29af6c878ab9f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1300.42it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.6090133407592774\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "510c8649b9b440718bfe7dc2acd76ea2",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1189.59it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.5899833444595337\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "cb897a61783f4c40b6a1de75959e2550",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1404.18it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.4266245581626892\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2cb326bb78e44de196467e35d5f539bd",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 943.87it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.5472121652603149\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "31b7393121ef484897a7656d493846cf",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1307.34it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.6197233964920044\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "87637ff21e2f450881459e7c96bf9245",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3181.77it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.5770972018241882\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fdb3b1f448bd47199f877891e11f7661",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 542.15it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.36930047016143797\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ab64d3168d9e4f778670a8dd982db320",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1290.04it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.5592613296508789\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ead78167c4624d8b9c0a6d53d3bebf8e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1327.83it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.6504468092918396\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "622c9f6930714dcb8b1da9c3e36c4037",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1954.45it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.5427015423774719\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "56b3e1de796a401990559bc5c1deec1b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1079.38it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.3112030908584595\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6b9dd7e1ab1c4188b9396b29263301e9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1471.93it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.5287852444648743\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "847df774b53d4a59b5dd2107b4bb97c1",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 968.82it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.667348966217041\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bf27bf394e874e128e646d852c15ac6c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3105.26it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.5329023422241211\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d5ac8191375049f38683e4524292bd55",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1275.97it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.25522911367416384\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "116bdbf9738946e9a432cbffd5f476df",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3149.54it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.5199557180404664\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f68f606e5862453390eccfa57d9fb620",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 980.84it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.6821598964691162\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "697a038112fd497cb034f95ac9d39937",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3097.51it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.5434505724906922\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "07be324432da4eefb879554440456118",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1023.57it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.19195594923496245\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "06491b74ee5244a6a1fa26372e3d54fe",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1305.96it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.5124221091270447\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "eb43e633eee04c0194dbf7c47ae4d511",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1183.53it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.692459008026123\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "cfbaf8f93155498c80546369b6e6fec9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:01, 71.40it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.5168498191833496\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ecedd8774c7340879db1d5abf5954d58",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1385.90it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.06505892304182052\n",
      "################################################################################\n",
      "defaultdict(<class 'list'>, {'eta': [-0.5, -0.39, -0.28, -0.17, -0.06], 'logging_reward': [0.0016548267943784594, 0.004011169955134392, 0.010384154437482357, 0.026894244468212126, 0.06505892275571823], 'ix_0': [0.07602058255076409, 0.11107554304599762, 0.26763565740585327, 0.4850407739162445, 0.6929360151290893], 'smoothing_0': [0.0017916135888546706, 0.004898103752732277, 0.08902953031659126, 0.15568549218177796, 0.534009139251709], 'clipping_0': [0.09801436960697174, 0.15997750718593598, 0.2676695785522461, 0.48730215854644776, 0.6893377709388733], 'harmonic_0': [0.09796389709711074, 0.1492506049156189, 0.26604757924079897, 0.4858971398830414, 0.6924018848419189], 'ix_1': [0.002764279397204518, 0.09274670981764793, 0.0976970940887928, 0.28726141662597654, 0.6263253059387207], 'smoothing_1': [0.0018089630821719765, 0.00768945246860385, 0.08966888378858566, 0.16728602499961853, 0.558178358078003], 'clipping_1': [0.0032764652632176878, 0.09048381167650223, 0.16116888937950136, 0.2905903428077698, 0.6620609937667846], 'harmonic_1': [0.024896203458309174, 0.07318411912918091, 0.22160015420913695, 0.4566627492904663, 0.624744464302063], 'ix_2': [0.0019891275491565464, 0.08548013907074928, 0.09644433104395866, 0.27321705346107483, 0.6099065008163452], 'smoothing_2': [0.0019703325336799023, 0.07846598596572876, 0.09405719060897827, 0.17426096150875092, 0.5776187893867493], 'clipping_2': [0.0020709035787731408, 0.0814400324523449, 0.09533011321425439, 0.24878031051158905, 0.6188219171524048], 'harmonic_2': [0.013822342406213283, 0.0406029051065445, 0.13727533819675444, 0.3448029336452484, 0.5498684995651245], 'ix_3': [0.001880195019207895, 0.0734175749361515, 0.09577681348919868, 0.17854156079292297, 0.5955511766433715], 'smoothing_3': [0.0022611491721123457, 0.09423755583167076, 0.09554980810284615, 0.28599316325187685, 0.595024810218811], 'clipping_3': [0.0019280875962227582, 0.07978782141208648, 0.09516231610178948, 0.17966635494232178, 0.6033873224258423], 'harmonic_3': [0.008395666573941708, 0.027062873888015745, 0.0959237770318985, 0.2612605007171631, 0.48536629514694213], 'ix_4': [0.0018060710359364747, 0.07332371616959572, 0.0909283899128437, 0.1739023714542389, 0.5825763339996338], 'smoothing_4': [0.08659579053521156, 0.09584876535534859, 0.09811607581973075, 0.28075808057785034, 0.6090133407592774], 'clipping_4': [0.001910470351949334, 0.06577393084168434, 0.09337690369486809, 0.1749490626335144, 0.5899833444595337], 'harmonic_4': [0.006533479517698288, 0.020160479152202607, 0.07036076868772506, 0.208617009973526, 0.4266245581626892], 'ix_5': [0.0018881696281954647, 0.06749454309940338, 0.09089929768443107, 0.17032699229717255, 0.5472121652603149], 'smoothing_5': [0.09125853109359741, 0.09734195493459702, 0.17065021793842317, 0.3058421569824219, 0.6197233964920044], 'clipping_5': [0.0018377723392099142, 0.06436340960860253, 0.090901534563303, 0.1715635729074478, 0.5770972018241882], 'harmonic_5': [0.00455253802575171, 0.0147464190274477, 0.054786331856250765, 0.1651239861011505, 0.36930047016143797], 'ix_6': [0.0018513796916231513, 0.006424173071980476, 0.08812359779477119, 0.16684231171607972, 0.5592613296508789], 'smoothing_6': [0.09532553238868713, 0.09844384480118752, 0.1834357506752014, 0.38168789234161377, 0.6504468092918396], 'clipping_6': [0.0018130565382540227, 0.006666117901355028, 0.08943644857406616, 0.16656060495376587, 0.5427015423774719], 'harmonic_6': [0.0040792821876704695, 0.0127955187022686, 0.04222167618274689, 0.13167021782398225, 0.3112030908584595], 'ix_7': [0.0017630580589175224, 0.005618271762877703, 0.08624420105218887, 0.16324604024887085, 0.5287852444648743], 'smoothing_7': [0.09584725234508515, 0.09909229539036751, 0.18935596718788147, 0.4697921859741211, 0.667348966217041], 'clipping_7': [0.0017185146749019622, 0.005599219207465649, 0.08860154327750205, 0.16241064960956572, 0.5329023422241211], 'harmonic_7': [0.0031308522544801235, 0.010182749785482883, 0.03299144434332848, 0.10184911907911301, 0.25522911367416384], 'ix_8': [0.0017270717725157737, 0.005078396270424127, 0.09209122355580329, 0.15787675924301148, 0.5199557180404664], 'smoothing_8': [0.09801413385272026, 0.0995167630314827, 0.2754674090385437, 0.4810739656448364, 0.6821598964691162], 'clipping_8': [0.0017546431645751, 0.005111382631957531, 0.08758228687644005, 0.16011671500205993, 0.5434505724906922], 'harmonic_8': [0.0026465058013796804, 0.00698241161108017, 0.024612572491168976, 0.07426629198789597, 0.19195594923496245], 'ix_9': [0.0017377557314932346, 0.005292286267876625, 0.08852998181581498, 0.15532025213241576, 0.5124221091270447], 'smoothing_9': [0.07572916606664658, 0.15154042177200316, 0.2787570029020309, 0.48718938760757446, 0.692459008026123], 'clipping_9': [0.001736136451922357, 0.004862686868011951, 0.08163812893033028, 0.1570905291080475, 0.5168498191833496], 'harmonic_9': [0.0016548267943784594, 0.004011169977858662, 0.010384154446423054, 0.02689424432218075, 0.06505892304182052]})\n",
      "eta parameter :  0.06\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 2728.33it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The reward of the logging policy:  0.14662103624343872\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 446/446 [00:00<00:00, 552.28it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "max 0.19209200143814087\n",
      "min 0.054918836802244186\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2d6b0db904a449edba5cf6407657e01c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1115.25it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.7155474009513855\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bcece00a2f7e45e2a0b05d1369f3992a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3032.79it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.6513540760993958\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6e7bb9510c644421ae4356a5f17ae289",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 938.53it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.7156016772270203\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7bb72ca4f8984da281c3ab4bcb783eb2",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1162.20it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.7151141323089599\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f5657dde8ddd40388aad12a851c91207",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3133.57it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.7042145915031434\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "12199570332a40c58f03b4e20c3f5747",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:01, 43.79it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.6628212430953979\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3eb598a118404a5ca172ec99069bf6f6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 2017.82it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.7152411190032959\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "852043258f8b4c18bafafa4ea089f286",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3583.09it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.6943077087402344\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5f31bf5c5eea43629ec41f8d117a37b3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1463.11it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.694782451248169\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "714c1f9d83574033b25af85d538da7e3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1092.10it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.6729674991607666\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1e525f1f895f40759757c7d3b47c7bc8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 754.08it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.7083725296020508\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2d58af07cbf842368b9357cac69dee55",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1375.66it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.6604203813552857\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "317ecbe9cb594844981fa670a927d8f7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1353.27it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.6856419325828552\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fa3f8fa2bce642aca919c25cd7f86f22",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1169.49it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.6820794361114502\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1df62efedb894d01a820e573e62426e0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1270.62it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.6976910985946655\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c90589492074421d858c7554ef977e0c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1868.10it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.6253743698120117\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bda7c85e781f405dbdafc3d8a258d839",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1452.16it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.676638300704956\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b70bae865de5482c806b6dd5050badaa",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1047.77it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.6892951133728027\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "50c9539a880b44cd91c32273b179621e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1497.53it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.6882037739753724\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1738b0ee5ab94d75bab3defb64c753f0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 177.62it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.5921717452049255\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b252a388d03e4ec29c78639528621c3a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1242.31it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.6689618106842041\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1c201526bac0496d824cd2711ff3eca9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1367.24it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.6960819185256958\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4773e5397bf146ccb6b7040583dabaff",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1512.94it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.6798695580482483\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a6d95ca6db1543da8c3ac82221f6ed7c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1192.29it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.5552163663864136\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6cafa6d79d81409bb8f1e73c43bdc6f8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1311.75it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.6624987567901611\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5feff94300de4274bfacd4c84f38c399",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1097.98it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.7026664342880249\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e739dfcf81a14347847b99a9ccf7a5c6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3524.14it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.6716056250572204\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "14607fb60bc34745b06f35d9303709f7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 902.88it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.5088063773155213\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "75616619de8748268d1f83e95c24e184",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 438.11it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.6552499485015869\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "228cdec6d2554e748c7f302d80fc37a9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1239.86it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.7075983729362488\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "672601b793ba41eabb2ce69460db4395",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1617.98it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.6636212114334107\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1036b126c61c465080711bcc5d14f0ce",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1219.80it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.4609757745742798\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d772ca5ece754eb591ea67f7ee45be76",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1100.38it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.6493330085754394\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a79fdcb2184e4d2facaaa9252805d2ee",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1331.64it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.7118321292877198\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "78e5f13f6c84487595ff1100f9b6f664",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1560.17it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.6576916046142578\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fb258bfaa0e343fcb072d0adbb41e5b0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 748.27it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.3861748868942261\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "97be4f474bb5452a943b7cfa55e26821",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1242.27it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.6432015367507935\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "60d69cc415c6460a846cf281077a1671",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 936.69it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.7155887610435486\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bcedb9f779cb401bb0f2ec0ffc180eed",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1071.98it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.6513873134613037\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ddf467f699da46f0afbb53daa6eb29e9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1605.29it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.14662103662490844\n",
      "################################################################################\n",
      "defaultdict(<class 'list'>, {'eta': [-0.5, -0.39, -0.28, -0.17, -0.06, 0.06], 'logging_reward': [0.0016548267943784594, 0.004011169955134392, 0.010384154437482357, 0.026894244468212126, 0.06505892275571823, 0.14662103624343872], 'ix_0': [0.07602058255076409, 0.11107554304599762, 0.26763565740585327, 0.4850407739162445, 0.6929360151290893, 0.7155474009513855], 'smoothing_0': [0.0017916135888546706, 0.004898103752732277, 0.08902953031659126, 0.15568549218177796, 0.534009139251709, 0.6513540760993958], 'clipping_0': [0.09801436960697174, 0.15997750718593598, 0.2676695785522461, 0.48730215854644776, 0.6893377709388733, 0.7156016772270203], 'harmonic_0': [0.09796389709711074, 0.1492506049156189, 0.26604757924079897, 0.4858971398830414, 0.6924018848419189, 0.7151141323089599], 'ix_1': [0.002764279397204518, 0.09274670981764793, 0.0976970940887928, 0.28726141662597654, 0.6263253059387207, 0.7042145915031434], 'smoothing_1': [0.0018089630821719765, 0.00768945246860385, 0.08966888378858566, 0.16728602499961853, 0.558178358078003, 0.6628212430953979], 'clipping_1': [0.0032764652632176878, 0.09048381167650223, 0.16116888937950136, 0.2905903428077698, 0.6620609937667846, 0.7152411190032959], 'harmonic_1': [0.024896203458309174, 0.07318411912918091, 0.22160015420913695, 0.4566627492904663, 0.624744464302063, 0.6943077087402344], 'ix_2': [0.0019891275491565464, 0.08548013907074928, 0.09644433104395866, 0.27321705346107483, 0.6099065008163452, 0.694782451248169], 'smoothing_2': [0.0019703325336799023, 0.07846598596572876, 0.09405719060897827, 0.17426096150875092, 0.5776187893867493, 0.6729674991607666], 'clipping_2': [0.0020709035787731408, 0.0814400324523449, 0.09533011321425439, 0.24878031051158905, 0.6188219171524048, 0.7083725296020508], 'harmonic_2': [0.013822342406213283, 0.0406029051065445, 0.13727533819675444, 0.3448029336452484, 0.5498684995651245, 0.6604203813552857], 'ix_3': [0.001880195019207895, 0.0734175749361515, 0.09577681348919868, 0.17854156079292297, 0.5955511766433715, 0.6856419325828552], 'smoothing_3': [0.0022611491721123457, 0.09423755583167076, 0.09554980810284615, 0.28599316325187685, 0.595024810218811, 0.6820794361114502], 'clipping_3': [0.0019280875962227582, 0.07978782141208648, 0.09516231610178948, 0.17966635494232178, 0.6033873224258423, 0.6976910985946655], 'harmonic_3': [0.008395666573941708, 0.027062873888015745, 0.0959237770318985, 0.2612605007171631, 0.48536629514694213, 0.6253743698120117], 'ix_4': [0.0018060710359364747, 0.07332371616959572, 0.0909283899128437, 0.1739023714542389, 0.5825763339996338, 0.676638300704956], 'smoothing_4': [0.08659579053521156, 0.09584876535534859, 0.09811607581973075, 0.28075808057785034, 0.6090133407592774, 0.6892951133728027], 'clipping_4': [0.001910470351949334, 0.06577393084168434, 0.09337690369486809, 0.1749490626335144, 0.5899833444595337, 0.6882037739753724], 'harmonic_4': [0.006533479517698288, 0.020160479152202607, 0.07036076868772506, 0.208617009973526, 0.4266245581626892, 0.5921717452049255], 'ix_5': [0.0018881696281954647, 0.06749454309940338, 0.09089929768443107, 0.17032699229717255, 0.5472121652603149, 0.6689618106842041], 'smoothing_5': [0.09125853109359741, 0.09734195493459702, 0.17065021793842317, 0.3058421569824219, 0.6197233964920044, 0.6960819185256958], 'clipping_5': [0.0018377723392099142, 0.06436340960860253, 0.090901534563303, 0.1715635729074478, 0.5770972018241882, 0.6798695580482483], 'harmonic_5': [0.00455253802575171, 0.0147464190274477, 0.054786331856250765, 0.1651239861011505, 0.36930047016143797, 0.5552163663864136], 'ix_6': [0.0018513796916231513, 0.006424173071980476, 0.08812359779477119, 0.16684231171607972, 0.5592613296508789, 0.6624987567901611], 'smoothing_6': [0.09532553238868713, 0.09844384480118752, 0.1834357506752014, 0.38168789234161377, 0.6504468092918396, 0.7026664342880249], 'clipping_6': [0.0018130565382540227, 0.006666117901355028, 0.08943644857406616, 0.16656060495376587, 0.5427015423774719, 0.6716056250572204], 'harmonic_6': [0.0040792821876704695, 0.0127955187022686, 0.04222167618274689, 0.13167021782398225, 0.3112030908584595, 0.5088063773155213], 'ix_7': [0.0017630580589175224, 0.005618271762877703, 0.08624420105218887, 0.16324604024887085, 0.5287852444648743, 0.6552499485015869], 'smoothing_7': [0.09584725234508515, 0.09909229539036751, 0.18935596718788147, 0.4697921859741211, 0.667348966217041, 0.7075983729362488], 'clipping_7': [0.0017185146749019622, 0.005599219207465649, 0.08860154327750205, 0.16241064960956572, 0.5329023422241211, 0.6636212114334107], 'harmonic_7': [0.0031308522544801235, 0.010182749785482883, 0.03299144434332848, 0.10184911907911301, 0.25522911367416384, 0.4609757745742798], 'ix_8': [0.0017270717725157737, 0.005078396270424127, 0.09209122355580329, 0.15787675924301148, 0.5199557180404664, 0.6493330085754394], 'smoothing_8': [0.09801413385272026, 0.0995167630314827, 0.2754674090385437, 0.4810739656448364, 0.6821598964691162, 0.7118321292877198], 'clipping_8': [0.0017546431645751, 0.005111382631957531, 0.08758228687644005, 0.16011671500205993, 0.5434505724906922, 0.6576916046142578], 'harmonic_8': [0.0026465058013796804, 0.00698241161108017, 0.024612572491168976, 0.07426629198789597, 0.19195594923496245, 0.3861748868942261], 'ix_9': [0.0017377557314932346, 0.005292286267876625, 0.08852998181581498, 0.15532025213241576, 0.5124221091270447, 0.6432015367507935], 'smoothing_9': [0.07572916606664658, 0.15154042177200316, 0.2787570029020309, 0.48718938760757446, 0.692459008026123, 0.7155887610435486], 'clipping_9': [0.001736136451922357, 0.004862686868011951, 0.08163812893033028, 0.1570905291080475, 0.5168498191833496, 0.6513873134613037], 'harmonic_9': [0.0016548267943784594, 0.004011169977858662, 0.010384154446423054, 0.02689424432218075, 0.06505892304182052, 0.14662103662490844]})\n",
      "eta parameter :  0.17\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3170.63it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The reward of the logging policy:  0.25754143171310423\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 446/446 [00:00<00:00, 707.94it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "max 0.4556087851524353\n",
      "min 0.014511926099658012\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7ce0c64edbb44f03919243d24205d8e3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1011.16it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.7385340782165527\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9ef81dd3fb684c8bb8d128a65f054d7e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 2886.45it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.6898782144546509\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6688228046a2433ab51ed60e8d8dab63",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1428.16it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.7384583802223206\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f6a65734b2a74b85acc8edd611f06811",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1192.83it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.7392931810379029\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "793b0021b2e14a6599f9bfef5d8320e0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1322.71it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.7304380737304688\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "399bba45396c4682ae0a47b04b5ca618",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1098.21it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.69566157913208\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "037de56dffa649d091dd75e83636640d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1153.08it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.7391100519180298\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0fd16feb67c944a79bb66fc3d4d82dc6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1490.92it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.7230437522888183\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b13b52fcf8f44082988f3a4067036e4c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1029.01it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.7132274128913879\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2a8573e5fbc845c1b37656f3463c3498",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3077.12it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.7008888475418091\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7a956f21c0704e42862ddbcf3bb8cb9c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:01, 52.18it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.7383727118492126\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d1a0748527e64040b812aec26b9f0e85",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1039.59it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.7033374090194702\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2c75ea3dac6e4d91941f7a197b3793dd",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1362.17it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.7076138130187988\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5c937374387f4606bc44e099db0ceff8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 201.71it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.7056313723564148\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6d4dc8c544f44f5a90467f4a1cd05b07",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 364.35it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.7213058994293213\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "355cf679491f490e8c7fa1a3b2d6d1c8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 220.86it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.683599383354187\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "862d2c4e39604385a2988426efae4778",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1084.85it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.7022604940414429\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "94a46f5211db48d2a4c0f94d97c5626b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1580.62it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.709959956741333\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5c3c18bfc3c848f898546df166fefdf8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3552.59it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.7144672473907471\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0ecc6ccb74cb4b80a38a8372d788779f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1653.84it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.6630746486663819\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4e886634f8ed495c89ec4602e40f0196",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:01, 68.93it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.6976556070327758\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "89726f60ad774cd088b0958ba14dd4a5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1128.59it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.7136844974517822\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f927f63efbcd47cc81a6f1fad86659a5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3628.81it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.7092524438858032\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d3862f50071d4935ab99d910844d2e78",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3443.74it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.6366729828834534\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fa0185849ee64c36bf90467016aa4861",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1333.42it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.6926921096801758\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "24048ed849e1451d8504645fd0f62775",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 2892.35it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.7170760981559754\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c3493707b58d4dac9cb255ad9e753041",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1265.06it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.7037381764411926\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "107f458c35f7429991b6e96cda353d7a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1457.85it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.611887191772461\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4866f948cde941e0b7816407a99bdb80",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1494.81it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.6886401908874512\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a6bbe91983664b3d93a06ed755d6a039",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 941.96it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.7297152667999267\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "15f7fbfd4c2b428489e6be7460223478",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1298.35it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.6987341665267944\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "01918edfc36c45eba38ad3a6863277fb",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1121.71it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.57460381565094\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d0db973df4024b8c906c46b5c37b8eb4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1279.52it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.6838704275131225\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9ae96b5a5d734ce08281af274ea90d92",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1071.75it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.7358313503265381\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e24ca4f304eb40b7bbe93711cfea169e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1258.88it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.6942304333686828\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b1bb367ccf264b7fbf42f1563a8efda1",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1172.88it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.5197342924118042\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f28f700a81f0405e8ca6333d84ed300d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3661.97it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.6800552347183227\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7e22c414ff694edcbe00d62214c7f51a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1184.78it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.7385059205055237\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7bb24f250586470f8dd5d31003f3fa71",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1866.99it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.6899493388175965\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3da14f6c10b247d38ca16b2e9792e21f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1370.85it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.2575413046836853\n",
      "################################################################################\n",
      "defaultdict(<class 'list'>, {'eta': [-0.5, -0.39, -0.28, -0.17, -0.06, 0.06, 0.17], 'logging_reward': [0.0016548267943784594, 0.004011169955134392, 0.010384154437482357, 0.026894244468212126, 0.06505892275571823, 0.14662103624343872, 0.25754143171310423], 'ix_0': [0.07602058255076409, 0.11107554304599762, 0.26763565740585327, 0.4850407739162445, 0.6929360151290893, 0.7155474009513855, 0.7385340782165527], 'smoothing_0': [0.0017916135888546706, 0.004898103752732277, 0.08902953031659126, 0.15568549218177796, 0.534009139251709, 0.6513540760993958, 0.6898782144546509], 'clipping_0': [0.09801436960697174, 0.15997750718593598, 0.2676695785522461, 0.48730215854644776, 0.6893377709388733, 0.7156016772270203, 0.7384583802223206], 'harmonic_0': [0.09796389709711074, 0.1492506049156189, 0.26604757924079897, 0.4858971398830414, 0.6924018848419189, 0.7151141323089599, 0.7392931810379029], 'ix_1': [0.002764279397204518, 0.09274670981764793, 0.0976970940887928, 0.28726141662597654, 0.6263253059387207, 0.7042145915031434, 0.7304380737304688], 'smoothing_1': [0.0018089630821719765, 0.00768945246860385, 0.08966888378858566, 0.16728602499961853, 0.558178358078003, 0.6628212430953979, 0.69566157913208], 'clipping_1': [0.0032764652632176878, 0.09048381167650223, 0.16116888937950136, 0.2905903428077698, 0.6620609937667846, 0.7152411190032959, 0.7391100519180298], 'harmonic_1': [0.024896203458309174, 0.07318411912918091, 0.22160015420913695, 0.4566627492904663, 0.624744464302063, 0.6943077087402344, 0.7230437522888183], 'ix_2': [0.0019891275491565464, 0.08548013907074928, 0.09644433104395866, 0.27321705346107483, 0.6099065008163452, 0.694782451248169, 0.7132274128913879], 'smoothing_2': [0.0019703325336799023, 0.07846598596572876, 0.09405719060897827, 0.17426096150875092, 0.5776187893867493, 0.6729674991607666, 0.7008888475418091], 'clipping_2': [0.0020709035787731408, 0.0814400324523449, 0.09533011321425439, 0.24878031051158905, 0.6188219171524048, 0.7083725296020508, 0.7383727118492126], 'harmonic_2': [0.013822342406213283, 0.0406029051065445, 0.13727533819675444, 0.3448029336452484, 0.5498684995651245, 0.6604203813552857, 0.7033374090194702], 'ix_3': [0.001880195019207895, 0.0734175749361515, 0.09577681348919868, 0.17854156079292297, 0.5955511766433715, 0.6856419325828552, 0.7076138130187988], 'smoothing_3': [0.0022611491721123457, 0.09423755583167076, 0.09554980810284615, 0.28599316325187685, 0.595024810218811, 0.6820794361114502, 0.7056313723564148], 'clipping_3': [0.0019280875962227582, 0.07978782141208648, 0.09516231610178948, 0.17966635494232178, 0.6033873224258423, 0.6976910985946655, 0.7213058994293213], 'harmonic_3': [0.008395666573941708, 0.027062873888015745, 0.0959237770318985, 0.2612605007171631, 0.48536629514694213, 0.6253743698120117, 0.683599383354187], 'ix_4': [0.0018060710359364747, 0.07332371616959572, 0.0909283899128437, 0.1739023714542389, 0.5825763339996338, 0.676638300704956, 0.7022604940414429], 'smoothing_4': [0.08659579053521156, 0.09584876535534859, 0.09811607581973075, 0.28075808057785034, 0.6090133407592774, 0.6892951133728027, 0.709959956741333], 'clipping_4': [0.001910470351949334, 0.06577393084168434, 0.09337690369486809, 0.1749490626335144, 0.5899833444595337, 0.6882037739753724, 0.7144672473907471], 'harmonic_4': [0.006533479517698288, 0.020160479152202607, 0.07036076868772506, 0.208617009973526, 0.4266245581626892, 0.5921717452049255, 0.6630746486663819], 'ix_5': [0.0018881696281954647, 0.06749454309940338, 0.09089929768443107, 0.17032699229717255, 0.5472121652603149, 0.6689618106842041, 0.6976556070327758], 'smoothing_5': [0.09125853109359741, 0.09734195493459702, 0.17065021793842317, 0.3058421569824219, 0.6197233964920044, 0.6960819185256958, 0.7136844974517822], 'clipping_5': [0.0018377723392099142, 0.06436340960860253, 0.090901534563303, 0.1715635729074478, 0.5770972018241882, 0.6798695580482483, 0.7092524438858032], 'harmonic_5': [0.00455253802575171, 0.0147464190274477, 0.054786331856250765, 0.1651239861011505, 0.36930047016143797, 0.5552163663864136, 0.6366729828834534], 'ix_6': [0.0018513796916231513, 0.006424173071980476, 0.08812359779477119, 0.16684231171607972, 0.5592613296508789, 0.6624987567901611, 0.6926921096801758], 'smoothing_6': [0.09532553238868713, 0.09844384480118752, 0.1834357506752014, 0.38168789234161377, 0.6504468092918396, 0.7026664342880249, 0.7170760981559754], 'clipping_6': [0.0018130565382540227, 0.006666117901355028, 0.08943644857406616, 0.16656060495376587, 0.5427015423774719, 0.6716056250572204, 0.7037381764411926], 'harmonic_6': [0.0040792821876704695, 0.0127955187022686, 0.04222167618274689, 0.13167021782398225, 0.3112030908584595, 0.5088063773155213, 0.611887191772461], 'ix_7': [0.0017630580589175224, 0.005618271762877703, 0.08624420105218887, 0.16324604024887085, 0.5287852444648743, 0.6552499485015869, 0.6886401908874512], 'smoothing_7': [0.09584725234508515, 0.09909229539036751, 0.18935596718788147, 0.4697921859741211, 0.667348966217041, 0.7075983729362488, 0.7297152667999267], 'clipping_7': [0.0017185146749019622, 0.005599219207465649, 0.08860154327750205, 0.16241064960956572, 0.5329023422241211, 0.6636212114334107, 0.6987341665267944], 'harmonic_7': [0.0031308522544801235, 0.010182749785482883, 0.03299144434332848, 0.10184911907911301, 0.25522911367416384, 0.4609757745742798, 0.57460381565094], 'ix_8': [0.0017270717725157737, 0.005078396270424127, 0.09209122355580329, 0.15787675924301148, 0.5199557180404664, 0.6493330085754394, 0.6838704275131225], 'smoothing_8': [0.09801413385272026, 0.0995167630314827, 0.2754674090385437, 0.4810739656448364, 0.6821598964691162, 0.7118321292877198, 0.7358313503265381], 'clipping_8': [0.0017546431645751, 0.005111382631957531, 0.08758228687644005, 0.16011671500205993, 0.5434505724906922, 0.6576916046142578, 0.6942304333686828], 'harmonic_8': [0.0026465058013796804, 0.00698241161108017, 0.024612572491168976, 0.07426629198789597, 0.19195594923496245, 0.3861748868942261, 0.5197342924118042], 'ix_9': [0.0017377557314932346, 0.005292286267876625, 0.08852998181581498, 0.15532025213241576, 0.5124221091270447, 0.6432015367507935, 0.6800552347183227], 'smoothing_9': [0.07572916606664658, 0.15154042177200316, 0.2787570029020309, 0.48718938760757446, 0.692459008026123, 0.7155887610435486, 0.7385059205055237], 'clipping_9': [0.001736136451922357, 0.004862686868011951, 0.08163812893033028, 0.1570905291080475, 0.5168498191833496, 0.6513873134613037, 0.6899493388175965], 'harmonic_9': [0.0016548267943784594, 0.004011169977858662, 0.010384154446423054, 0.02689424432218075, 0.06505892304182052, 0.14662103662490844, 0.2575413046836853]})\n",
      "eta parameter :  0.28\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3219.33it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The reward of the logging policy:  0.378658740901947\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 446/446 [00:00<00:00, 715.47it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "max 0.7068174481391907\n",
      "min 0.002894290955737233\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "868dae7a01f441faa1a3ff97c902700c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1046.14it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.7482165531158447\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "71253615cfe04522b8f3493919d0386c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1181.70it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.7226865338325501\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c3d3c229acdb46e68f8aa8d54279ae0a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1253.21it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.7489852600097656\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6e4f9ff21f774b4ab3fa00da3ecd86c1",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1913.17it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.7490078105926513\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0099b3ae17b845a9ad6376c573ec8f07",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 610.37it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.7440269391059875\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "56598aee44884231969467c11b3c4e91",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1454.69it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.7270801427841187\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "860211f88d62409c85b372a76a85fc04",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 973.00it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.748695439529419\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d9947729b1b14fbbb762a5fe757e2bdd",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 943.37it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.7377732810974121\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d12f022034e94d1da18ab249aef32184",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1515.94it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.738999019908905\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "055c76af6e444c54b1faf99a5a7b69e7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 883.22it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.7296944313049316\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5a2bad1e985842b892613e806e86cc5b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 895.16it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.7494883547782898\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "92e42b79051e4a059533c3dae63f51b4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1731.21it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.7260039847373962\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ceb371b7f99d41eaacec79f043547c4a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1075.73it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.7344950178146362\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "351f2da095304a58924cd4fe4b47faec",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 905.42it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.7336730207443237\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "23b0a5eab45741b4938fc4c8cad78487",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 3577.56it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.7486856609344482\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3bd46a14bbf4455c8ce4b9f9d8e66290",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1304.47it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.7121050464630126\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "df01aec7f9df4449bcb47d2430546742",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 973.80it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.7300612436294556\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9e93af2479d64a1ab0c64656233a5344",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1509.91it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.7367172898292541\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a1bc44f719be4ec2b32e18f792861956",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:01, 47.96it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.7445915666580201\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bd4fadfb369842e8b5ccda1ee41b7180",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1344.56it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.6985026326179504\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "851994bc43cf42bab45d7924725aec38",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 499.04it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.7262249588012696\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "698224567bcb4abcbc0bafe1598cf6bf",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1109.92it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.7400088214874267\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "db6797c01eab49a891bc2aab9aba4af3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 764.42it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.7399940269470214\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "366026ae51c245b29a4384f02a92638d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1144.18it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.6816568311691285\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5636369a822c4b3a8adfa815e83f20f8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1311.32it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.7223827383995056\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b23e32ba783e49a58acedc15af4ae79b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 976.14it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.7424555212974548\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "02130a9fad774f469baf2c61d227292e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1453.26it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.7350932571411133\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b56c84e1f295469788abf81ab4c185af",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 950.49it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.6613696160316467\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "579e7b921e104df195e1b32c38b9e4bf",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n",
      "79it [00:00, 3177.72it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.7185563278198243\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f8706e8f46f54f9aa9589cb1dd20726c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n",
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n",
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n",
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n",
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n",
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n",
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n",
      "79it [00:00, 2327.27it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.746955673122406\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a009afed7417414495a31c9b27d4fd43",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n",
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n",
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n",
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n",
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n",
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n",
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n",
      "79it [00:00, 3685.07it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.7231306160926819\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d08b609a19274e0fac3d2b364da1671d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n",
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n",
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n",
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n",
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n",
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n",
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n",
      "79it [00:00, 1686.68it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.7551824083328247\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d8ae9c15705540f0a09a977c94a40cf3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n",
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n",
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n",
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n",
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n",
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n",
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n",
      "IOPub message rate exceeded.\n",
      "The Jupyter server will temporarily stop sending output\n",
      "to the client in order to avoid crashing it.\n",
      "To change this limit, set the config variable\n",
      "`--ServerApp.iopub_msg_rate_limit`.\n",
      "\n",
      "Current values:\n",
      "ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
      "ServerApp.rate_limit_window=3.0 (secs)\n",
      "\n",
      "79it [00:00, 1330.35it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.6981331605911255\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "64f911af468c4c6e960707f8a497171d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 709.37it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.7398407636642456\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4ceadcdc1be44b089a8759c81e74c82f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1329.54it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.7593320260047912\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "345250ff7f1c4d349fcd26f884280795",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:01, 42.54it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.7504988132476806\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "abd8fb517fa243b8b50bb05628d5731e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1032.70it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.6750947231292724\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "dac3f38c72ec400bbbbc72fa77843ec1",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1551.12it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic IX after training  : 0.7379974435806275\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b6d148dd97f14470b54eb19eaea21999",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1523.34it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of Heuristic smoothing after training  : 0.7604377698898316\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1f942bec482c4883b010d998c0129aac",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1225.79it/s]\n",
      "GPU available: True, used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "IPU available: False, using: 0 IPUs\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of clipping after training  : 0.747860771560669\n",
      "################################################################################\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a215ae4d70e54e0a942757a29358abef",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "79it [00:00, 1246.23it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reward of harmonic after training  : 0.5624167359352111\n",
      "################################################################################\n",
      "defaultdict(<class 'list'>, {'eta': [-0.5, -0.39, -0.28, -0.17, -0.06, 0.06, 0.17, 0.28, 0.39, 0.5], 'logging_reward': [0.0016548267943784594, 0.004011169955134392, 0.010384154437482357, 0.026894244468212126, 0.06505892275571823, 0.14662103624343872, 0.25754143171310423, 0.378658740901947, 0.4833284400939941, 0.5624169057846069], 'ix_0': [0.07602058255076409, 0.11107554304599762, 0.26763565740585327, 0.4850407739162445, 0.6929360151290893, 0.7155474009513855, 0.7385340782165527, 0.7482165531158447, 0.7551396533966065, 0.7604418991088867], 'smoothing_0': [0.0017916135888546706, 0.004898103752732277, 0.08902953031659126, 0.15568549218177796, 0.534009139251709, 0.6513540760993958, 0.6898782144546509, 0.7226865338325501, 0.7387379315376281, 0.7478337938308716], 'clipping_0': [0.09801436960697174, 0.15997750718593598, 0.2676695785522461, 0.48730215854644776, 0.6893377709388733, 0.7156016772270203, 0.7384583802223206, 0.7489852600097656, 0.7552642223358155, 0.7604539568901062], 'harmonic_0': [0.09796389709711074, 0.1492506049156189, 0.26604757924079897, 0.4858971398830414, 0.6924018848419189, 0.7151141323089599, 0.7392931810379029, 0.7490078105926513, 0.7551824083328247, 0.7602114471435547], 'ix_1': [0.002764279397204518, 0.09274670981764793, 0.0976970940887928, 0.28726141662597654, 0.6263253059387207, 0.7042145915031434, 0.7304380737304688, 0.7440269391059875, 0.7521163496017456, 0.75811737575531], 'smoothing_1': [0.0018089630821719765, 0.00768945246860385, 0.08966888378858566, 0.16728602499961853, 0.558178358078003, 0.6628212430953979, 0.69566157913208, 0.7270801427841187, 0.740999501991272, 0.7496441310882568], 'clipping_1': [0.0032764652632176878, 0.09048381167650223, 0.16116888937950136, 0.2905903428077698, 0.6620609937667846, 0.7152411190032959, 0.7391100519180298, 0.748695439529419, 0.7552359489440919, 0.7604776706695556], 'harmonic_1': [0.024896203458309174, 0.07318411912918091, 0.22160015420913695, 0.4566627492904663, 0.624744464302063, 0.6943077087402344, 0.7230437522888183, 0.7377732810974121, 0.747099260520935, 0.7551865658760071], 'ix_2': [0.0019891275491565464, 0.08548013907074928, 0.09644433104395866, 0.27321705346107483, 0.6099065008163452, 0.694782451248169, 0.7132274128913879, 0.738999019908905, 0.7481182841300964, 0.7550241352081298], 'smoothing_2': [0.0019703325336799023, 0.07846598596572876, 0.09405719060897827, 0.17426096150875092, 0.5776187893867493, 0.6729674991607666, 0.7008888475418091, 0.7296944313049316, 0.7431597419738769, 0.7514238683700561], 'clipping_2': [0.0020709035787731408, 0.0814400324523449, 0.09533011321425439, 0.24878031051158905, 0.6188219171524048, 0.7083725296020508, 0.7383727118492126, 0.7494883547782898, 0.755126240158081, 0.7611199060440064], 'harmonic_2': [0.013822342406213283, 0.0406029051065445, 0.13727533819675444, 0.3448029336452484, 0.5498684995651245, 0.6604203813552857, 0.7033374090194702, 0.7260039847373962, 0.7387559295654297, 0.7479087420463562], 'ix_3': [0.001880195019207895, 0.0734175749361515, 0.09577681348919868, 0.17854156079292297, 0.5955511766433715, 0.6856419325828552, 0.7076138130187988, 0.7344950178146362, 0.7447900149345398, 0.7519753253936767], 'smoothing_3': [0.0022611491721123457, 0.09423755583167076, 0.09554980810284615, 0.28599316325187685, 0.595024810218811, 0.6820794361114502, 0.7056313723564148, 0.7336730207443237, 0.7453339960098266, 0.752904368686676], 'clipping_3': [0.0019280875962227582, 0.07978782141208648, 0.09516231610178948, 0.17966635494232178, 0.6033873224258423, 0.6976910985946655, 0.7213058994293213, 0.7486856609344482, 0.7560139099121094, 0.7612861700057983], 'harmonic_3': [0.008395666573941708, 0.027062873888015745, 0.0959237770318985, 0.2612605007171631, 0.48536629514694213, 0.6253743698120117, 0.683599383354187, 0.7121050464630126, 0.7291703439712525, 0.7408998455047607], 'ix_4': [0.0018060710359364747, 0.07332371616959572, 0.0909283899128437, 0.1739023714542389, 0.5825763339996338, 0.676638300704956, 0.7022604940414429, 0.7300612436294556, 0.7414654068946839, 0.7495346579551697], 'smoothing_4': [0.08659579053521156, 0.09584876535534859, 0.09811607581973075, 0.28075808057785034, 0.6090133407592774, 0.6892951133728027, 0.709959956741333, 0.7367172898292541, 0.747354458808899, 0.7547884197235107], 'clipping_4': [0.001910470351949334, 0.06577393084168434, 0.09337690369486809, 0.1749490626335144, 0.5899833444595337, 0.6882037739753724, 0.7144672473907471, 0.7445915666580201, 0.7545861951828003, 0.7609322025299072], 'harmonic_4': [0.006533479517698288, 0.020160479152202607, 0.07036076868772506, 0.208617009973526, 0.4266245581626892, 0.5921717452049255, 0.6630746486663819, 0.6985026326179504, 0.7191289154052735, 0.7327496525764465], 'ix_5': [0.0018881696281954647, 0.06749454309940338, 0.09089929768443107, 0.17032699229717255, 0.5472121652603149, 0.6689618106842041, 0.6976556070327758, 0.7262249588012696, 0.7383245094299317, 0.7465670944213867], 'smoothing_5': [0.09125853109359741, 0.09734195493459702, 0.17065021793842317, 0.3058421569824219, 0.6197233964920044, 0.6960819185256958, 0.7136844974517822, 0.7400088214874267, 0.7491568984985352, 0.7563619568824768], 'clipping_5': [0.0018377723392099142, 0.06436340960860253, 0.090901534563303, 0.1715635729074478, 0.5770972018241882, 0.6798695580482483, 0.7092524438858032, 0.7399940269470214, 0.7512603630065918, 0.7584972422599793], 'harmonic_5': [0.00455253802575171, 0.0147464190274477, 0.054786331856250765, 0.1651239861011505, 0.36930047016143797, 0.5552163663864136, 0.6366729828834534, 0.6816568311691285, 0.7066064432144165, 0.7231969673156738], 'ix_6': [0.0018513796916231513, 0.006424173071980476, 0.08812359779477119, 0.16684231171607972, 0.5592613296508789, 0.6624987567901611, 0.6926921096801758, 0.7223827383995056, 0.7353470172882081, 0.7439873777389526], 'smoothing_6': [0.09532553238868713, 0.09844384480118752, 0.1834357506752014, 0.38168789234161377, 0.6504468092918396, 0.7026664342880249, 0.7170760981559754, 0.7424555212974548, 0.7511673959732056, 0.7572425559997559], 'clipping_6': [0.0018130565382540227, 0.006666117901355028, 0.08943644857406616, 0.16656060495376587, 0.5427015423774719, 0.6716056250572204, 0.7037381764411926, 0.7350932571411133, 0.7480906522750854, 0.7560662410736084], 'harmonic_6': [0.0040792821876704695, 0.0127955187022686, 0.04222167618274689, 0.13167021782398225, 0.3112030908584595, 0.5088063773155213, 0.611887191772461, 0.6613696160316467, 0.6929211921691895, 0.7126577625274658], 'ix_7': [0.0017630580589175224, 0.005618271762877703, 0.08624420105218887, 0.16324604024887085, 0.5287852444648743, 0.6552499485015869, 0.6886401908874512, 0.7185563278198243, 0.7328234691619873, 0.742367220401764], 'smoothing_7': [0.09584725234508515, 0.09909229539036751, 0.18935596718788147, 0.4697921859741211, 0.667348966217041, 0.7075983729362488, 0.7297152667999267, 0.7450560174942017, 0.7527983173370362, 0.7590292125701904], 'clipping_7': [0.0017185146749019622, 0.005599219207465649, 0.08860154327750205, 0.16241064960956572, 0.5329023422241211, 0.6636212114334107, 0.6987341665267944, 0.731136946105957, 0.7447757094383239, 0.7535488828659057], 'harmonic_7': [0.0031308522544801235, 0.010182749785482883, 0.03299144434332848, 0.10184911907911301, 0.25522911367416384, 0.4609757745742798, 0.57460381565094, 0.6358141391754151, 0.673604790878296, 0.6981331605911255], 'ix_8': [0.0017270717725157737, 0.005078396270424127, 0.09209122355580329, 0.15787675924301148, 0.5199557180404664, 0.6493330085754394, 0.6838704275131225, 0.7155983480453492, 0.7301174398422241, 0.7398407636642456], 'smoothing_8': [0.09801413385272026, 0.0995167630314827, 0.2754674090385437, 0.4810739656448364, 0.6821598964691162, 0.7118321292877198, 0.7358313503265381, 0.746955673122406, 0.7541034955024719, 0.7593320260047912], 'clipping_8': [0.0017546431645751, 0.005111382631957531, 0.08758228687644005, 0.16011671500205993, 0.5434505724906922, 0.6576916046142578, 0.6942304333686828, 0.7267897221565247, 0.7416191524505615, 0.7504988132476806], 'harmonic_8': [0.0026465058013796804, 0.00698241161108017, 0.024612572491168976, 0.07426629198789597, 0.19195594923496245, 0.3861748868942261, 0.5197342924118042, 0.5962837358474732, 0.6444370343208313, 0.6750947231292724], 'ix_9': [0.0017377557314932346, 0.005292286267876625, 0.08852998181581498, 0.15532025213241576, 0.5124221091270447, 0.6432015367507935, 0.6800552347183227, 0.7118742380142212, 0.7276782861709594, 0.7379974435806275], 'smoothing_9': [0.07572916606664658, 0.15154042177200316, 0.2787570029020309, 0.48718938760757446, 0.692459008026123, 0.7155887610435486, 0.7385059205055237, 0.7492112977981568, 0.7549063985824584, 0.7604377698898316], 'clipping_9': [0.001736136451922357, 0.004862686868011951, 0.08163812893033028, 0.1570905291080475, 0.5168498191833496, 0.6513873134613037, 0.6899493388175965, 0.7231306160926819, 0.7383649604797363, 0.747860771560669], 'harmonic_9': [0.0016548267943784594, 0.004011169977858662, 0.010384154446423054, 0.02689424432218075, 0.06505892304182052, 0.14662103662490844, 0.2575413046836853, 0.37865876812934873, 0.4833285276412964, 0.5624167359352111]})\n",
      "    eta  logging_reward      ix_0  smoothing_0  clipping_0  harmonic_0  \\\n",
      "0 -0.50        0.001655  0.076021     0.001792    0.098014    0.097964   \n",
      "1 -0.39        0.004011  0.111076     0.004898    0.159978    0.149251   \n",
      "2 -0.28        0.010384  0.267636     0.089030    0.267670    0.266048   \n",
      "3 -0.17        0.026894  0.485041     0.155685    0.487302    0.485897   \n",
      "4 -0.06        0.065059  0.692936     0.534009    0.689338    0.692402   \n",
      "5  0.06        0.146621  0.715547     0.651354    0.715602    0.715114   \n",
      "6  0.17        0.257541  0.738534     0.689878    0.738458    0.739293   \n",
      "7  0.28        0.378659  0.748217     0.722687    0.748985    0.749008   \n",
      "8  0.39        0.483328  0.755140     0.738738    0.755264    0.755182   \n",
      "9  0.50        0.562417  0.760442     0.747834    0.760454    0.760211   \n",
      "\n",
      "       ix_1  smoothing_1  clipping_1  harmonic_1  ...  clipping_7  harmonic_7  \\\n",
      "0  0.002764     0.001809    0.003276    0.024896  ...    0.001719    0.003131   \n",
      "1  0.092747     0.007689    0.090484    0.073184  ...    0.005599    0.010183   \n",
      "2  0.097697     0.089669    0.161169    0.221600  ...    0.088602    0.032991   \n",
      "3  0.287261     0.167286    0.290590    0.456663  ...    0.162411    0.101849   \n",
      "4  0.626325     0.558178    0.662061    0.624744  ...    0.532902    0.255229   \n",
      "5  0.704215     0.662821    0.715241    0.694308  ...    0.663621    0.460976   \n",
      "6  0.730438     0.695662    0.739110    0.723044  ...    0.698734    0.574604   \n",
      "7  0.744027     0.727080    0.748695    0.737773  ...    0.731137    0.635814   \n",
      "8  0.752116     0.741000    0.755236    0.747099  ...    0.744776    0.673605   \n",
      "9  0.758117     0.749644    0.760478    0.755187  ...    0.753549    0.698133   \n",
      "\n",
      "       ix_8  smoothing_8  clipping_8  harmonic_8      ix_9  smoothing_9  \\\n",
      "0  0.001727     0.098014    0.001755    0.002647  0.001738     0.075729   \n",
      "1  0.005078     0.099517    0.005111    0.006982  0.005292     0.151540   \n",
      "2  0.092091     0.275467    0.087582    0.024613  0.088530     0.278757   \n",
      "3  0.157877     0.481074    0.160117    0.074266  0.155320     0.487189   \n",
      "4  0.519956     0.682160    0.543451    0.191956  0.512422     0.692459   \n",
      "5  0.649333     0.711832    0.657692    0.386175  0.643202     0.715589   \n",
      "6  0.683870     0.735831    0.694230    0.519734  0.680055     0.738506   \n",
      "7  0.715598     0.746956    0.726790    0.596284  0.711874     0.749211   \n",
      "8  0.730117     0.754103    0.741619    0.644437  0.727678     0.754906   \n",
      "9  0.739841     0.759332    0.750499    0.675095  0.737997     0.760438   \n",
      "\n",
      "   clipping_9  harmonic_9  \n",
      "0    0.001736    0.001655  \n",
      "1    0.004863    0.004011  \n",
      "2    0.081638    0.010384  \n",
      "3    0.157091    0.026894  \n",
      "4    0.516850    0.065059  \n",
      "5    0.651387    0.146621  \n",
      "6    0.689949    0.257541  \n",
      "7    0.723131    0.378659  \n",
      "8    0.738365    0.483329  \n",
      "9    0.747861    0.562417  \n",
      "\n",
      "[10 rows x 42 columns]\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "for i, eta in enumerate(etas):\n",
    "    logging_policy = logging_policy.to(device)\n",
    "    print('eta parameter : ', eta)  \n",
    "    logging_policy.alpha = eta\n",
    "    risk_logging = test_risk_exact_probit(X_test, Y_test, logging_policy)\n",
    "    print('The reward of the logging policy: ', -risk_logging)\n",
    "    \n",
    "    dict_results['eta'].append(eta)\n",
    "    dict_results['logging_reward'].append(-risk_logging)\n",
    "    \n",
    "    # Collect a bandit dataset\n",
    "    f, a, p, c = build_bandit_dataset(logging_split_dataloader, logging_policy, replay_count = 1)\n",
    "\n",
    "    print('max', p.max(dim = 0)[0].mean().item())\n",
    "    print('min', p.min(dim = 0)[0].mean().item())\n",
    "\n",
    "    bandit_train_posterior = TensorDataset(f, a, p, c)\n",
    "    bandit_train_posterior_dataloader = DataLoader(bandit_train_posterior, batch_size=128, shuffle=True)\n",
    "\n",
    "    mu_0 = eta * logging_policy.linear.weight.data\n",
    "        \n",
    "    taus = np.linspace(1e-5, 1, 10) #np.logspace(-10, 0, 20) #np.linspace(1e-10, 1, 10)\n",
    "    #taus_bis = np.array([0.1, 0.5, 1, 10, 100, 500, 1000, 1e5, 1e7, 1e10]) # for shrinkage whose hyperparmater is between 0 and infitnity\n",
    "\n",
    "    for j, tau in enumerate(taus):\n",
    "        \n",
    "        #####################################################################################################################\n",
    "        #####################################################################################################################\n",
    "        ######### Implicit Exploration\n",
    "        #######################################\n",
    "        model = HeuristicIX(n_actions=num_actions, context_dim=context_dim, tau=tau, N=N, lmbd1=1e-5, lmbd2=1e-5, lmbd3=1e-5, loc_weight=mu_0, device=device)\n",
    "\n",
    "        trainer = Trainer(max_epochs=epochs, gpus=1, checkpoint_callback=False, weights_summary=None, logger=None)\n",
    "        trainer.fit(model, bandit_train_posterior_dataloader)\n",
    "\n",
    "        model = model.to(device)\n",
    "        with torch.no_grad():\n",
    "            risk_after_train = test_risk_exact_probit(X_test, Y_test, model)\n",
    "\n",
    "        print('Reward of Heuristic IX after training  :', -risk_after_train)\n",
    "        print('################################################################################')\n",
    "\n",
    "        dict_results['ix_' + str(j)].append(-risk_after_train)\n",
    "    \n",
    "        #####################################################################################################################\n",
    "        #####################################################################################################################\n",
    "        ######### Exponential Smoothing\n",
    "        #######################################\n",
    "        model = HeuristicSmoothing(n_actions=num_actions, context_dim=context_dim, tau=tau, N=N, lmbd1=1e-5, lmbd2=1e-5, lmbd3=1e-5, loc_weight=mu_0, device=device)\n",
    "\n",
    "        trainer = Trainer(max_epochs=epochs, gpus=1, checkpoint_callback=False, weights_summary=None, logger=None)\n",
    "        trainer.fit(model, bandit_train_posterior_dataloader)\n",
    "\n",
    "        model = model.to(device)\n",
    "        with torch.no_grad():\n",
    "            risk_after_train = test_risk_exact_probit(X_test, Y_test, model)\n",
    "\n",
    "        print('Reward of Heuristic smoothing after training  :', -risk_after_train)\n",
    "        print('################################################################################')\n",
    "\n",
    "        dict_results['smoothing_' + str(j)].append(-risk_after_train)    \n",
    "\n",
    "        #####################################################################################################################\n",
    "        #####################################################################################################################\n",
    "        ######### Clipping\n",
    "        #######################################\n",
    "\n",
    "        model = HeuristicClipping(n_actions=num_actions, context_dim=context_dim, tau=tau, N=N, lmbd1=1e-5, lmbd2=1e-5, lmbd3=1e-5, loc_weight=mu_0, device=device)\n",
    "\n",
    "        trainer = Trainer(max_epochs=epochs, gpus=1, checkpoint_callback=False, weights_summary=None, logger=None)\n",
    "        trainer.fit(model, bandit_train_posterior_dataloader)\n",
    "\n",
    "        model = model.to(device)\n",
    "        with torch.no_grad():\n",
    "            risk_after_train = test_risk_exact_probit(X_test, Y_test, model)\n",
    "\n",
    "        print('Reward of clipping after training  :', -risk_after_train)\n",
    "        print('################################################################################')\n",
    "        dict_results['clipping_' + str(j)].append(-risk_after_train)    \n",
    "\n",
    "\n",
    "        #####################################################################################################################\n",
    "        #####################################################################################################################\n",
    "        ######### Harmonic\n",
    "        #######################################\n",
    "\n",
    "        model = HeuristicHarmonic(n_actions=num_actions, context_dim=context_dim, tau=tau, N=N, lmbd1=1e-5, lmbd2=1e-5, lmbd3=1e-5, loc_weight=mu_0, device=device)\n",
    "        \n",
    "        trainer = Trainer(max_epochs=epochs, gpus=1, checkpoint_callback=False, weights_summary=None, logger=None)\n",
    "        trainer.fit(model, bandit_train_posterior_dataloader)\n",
    "\n",
    "        model = model.to(device)\n",
    "        with torch.no_grad():\n",
    "            risk_after_train = test_risk_exact_probit(X_test, Y_test, model)\n",
    "\n",
    "        print('Reward of harmonic after training  :', -risk_after_train)\n",
    "        print('################################################################################')\n",
    "\n",
    "        dict_results['harmonic_' + str(j)].append(-risk_after_train)    \n",
    "\n",
    "    print(dict_results)\n",
    "\n",
    "df = pd.DataFrame(dict_results)\n",
    "print(df)\n",
    "df.to_csv('results/heuristic_optimization/varying_' + name +'.csv', index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9192cfa5-2f00-452f-8eda-c3680fc7a116",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python Kernel (MOAB #58857)",
   "language": "python",
   "name": "python-kernel-58857"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
