Logging to logs/HalfCheetahFH-v0/exp-16/pagar_fkl/2024_08_11_06_02_54
--2024-08-11 06:05:18.538988 UTC--
| Itration            | 0        |
| PAGAR Loss          | 47.1     |
| Real Det Return     | -22.3    |
| Real Sto Return     | -148     |
| Reward Loss         | 2.84e+06 |
| Running Env Steps   | 0        |
| Running Forward KL  | 32       |
| Running Reverse KL  | 11.9     |
| Running Update Time | 0        |
----------------------------------
--2024-08-11 06:07:45.991363 UTC--
| Itration            | 1        |
| PAGAR Loss          | 18.8     |
| Real Det Return     | -30.5    |
| Real Sto Return     | -116     |
| Reward Loss         | 2.94e+06 |
| Running Env Steps   | 5000     |
| Running Forward KL  | 32       |
| Running Reverse KL  | 12       |
| Running Update Time | 1        |
----------------------------------
--2024-08-11 06:10:15.921697 UTC--
| Itration            | 2        |
| PAGAR Loss          | 56.3     |
| Real Det Return     | 5.29     |
| Real Sto Return     | -72.5    |
| Reward Loss         | 2.84e+06 |
| Running Env Steps   | 10000    |
| Running Forward KL  | 31.9     |
| Running Reverse KL  | 12       |
| Running Update Time | 2        |
----------------------------------
--2024-08-11 06:12:47.608705 UTC--
| Itration            | 3        |
| PAGAR Loss          | -28.2    |
| Real Det Return     | 1.04     |
| Real Sto Return     | -70.3    |
| Reward Loss         | 2.66e+06 |
| Running Env Steps   | 15000    |
| Running Forward KL  | 32.4     |
| Running Reverse KL  | 12.9     |
| Running Update Time | 3        |
----------------------------------
--2024-08-11 06:15:18.521898 UTC--
| Itration            | 4        |
| PAGAR Loss          | 41.5     |
| Real Det Return     | -0.38    |
| Real Sto Return     | -60.5    |
| Reward Loss         | 2.55e+06 |
| Running Env Steps   | 20000    |
| Running Forward KL  | 32.4     |
| Running Reverse KL  | 12.8     |
| Running Update Time | 4        |
----------------------------------
--2024-08-11 06:17:47.781008 UTC--
| Itration            | 5        |
| PAGAR Loss          | -12.6    |
| Real Det Return     | 1.72     |
| Real Sto Return     | -47.7    |
| Reward Loss         | 2.4e+06  |
| Running Env Steps   | 25000    |
| Running Forward KL  | 32.3     |
| Running Reverse KL  | 12.7     |
| Running Update Time | 5        |
----------------------------------
--2024-08-11 06:20:19.574457 UTC--
| Itration            | 6        |
| PAGAR Loss          | -53.8    |
| Real Det Return     | -2.4     |
| Real Sto Return     | -56.7    |
| Reward Loss         | 2.12e+06 |
| Running Env Steps   | 30000    |
| Running Forward KL  | 32.3     |
| Running Reverse KL  | 12.8     |
| Running Update Time | 6        |
----------------------------------
--2024-08-11 06:22:50.036107 UTC--
| Itration            | 7        |
| PAGAR Loss          | 13.1     |
| Real Det Return     | 0.18     |
| Real Sto Return     | -46.7    |
| Reward Loss         | 2.01e+06 |
| Running Env Steps   | 35000    |
| Running Forward KL  | 32.1     |
| Running Reverse KL  | 12.7     |
| Running Update Time | 7        |
----------------------------------
--2024-08-11 06:25:21.491235 UTC--
| Itration            | 8        |
| PAGAR Loss          | -12.5    |
| Real Det Return     | 1.35     |
| Real Sto Return     | -58.4    |
| Reward Loss         | 1.77e+06 |
| Running Env Steps   | 40000    |
| Running Forward KL  | 32.1     |
| Running Reverse KL  | 12.1     |
| Running Update Time | 8        |
----------------------------------
--2024-08-11 06:27:51.994910 UTC--
| Itration            | 9        |
| PAGAR Loss          | 69.6     |
| Real Det Return     | 1.89     |
| Real Sto Return     | -51.8    |
| Reward Loss         | 1.59e+06 |
| Running Env Steps   | 45000    |
| Running Forward KL  | 32.4     |
| Running Reverse KL  | 12.9     |
| Running Update Time | 9        |
----------------------------------
--2024-08-11 06:30:23.034864 UTC--
| Itration            | 10       |
| PAGAR Loss          | -3.4     |
| Real Det Return     | 2.55     |
| Real Sto Return     | -42.7    |
| Reward Loss         | 1.4e+06  |
| Running Env Steps   | 50000    |
| Running Forward KL  | 32.3     |
| Running Reverse KL  | 12.8     |
| Running Update Time | 10       |
----------------------------------
--2024-08-11 06:32:54.803759 UTC--
| Itration            | 11       |
| PAGAR Loss          | 18       |
| Real Det Return     | 2.18     |
| Real Sto Return     | -34.1    |
| Reward Loss         | 1.25e+06 |
| Running Env Steps   | 55000    |
| Running Forward KL  | 32.1     |
| Running Reverse KL  | 12.2     |
| Running Update Time | 11       |
----------------------------------
--2024-08-11 06:35:26.288583 UTC--
| Itration            | 12       |
| PAGAR Loss          | 55.3     |
| Real Det Return     | -0.42    |
| Real Sto Return     | -38.1    |
| Reward Loss         | 1.1e+06  |
| Running Env Steps   | 60000    |
| Running Forward KL  | 32.3     |
| Running Reverse KL  | 12.9     |
| Running Update Time | 12       |
----------------------------------
--2024-08-11 06:37:57.849841 UTC--
| Itration            | 13       |
| PAGAR Loss          | -8.08    |
| Real Det Return     | 0.34     |
| Real Sto Return     | -33.4    |
| Reward Loss         | 8.66e+05 |
| Running Env Steps   | 65000    |
| Running Forward KL  | 32       |
| Running Reverse KL  | 12.3     |
| Running Update Time | 13       |
----------------------------------
--2024-08-11 06:40:29.196267 UTC--
| Itration            | 14       |
| PAGAR Loss          | 39.5     |
| Real Det Return     | 3.75     |
| Real Sto Return     | -42.9    |
| Reward Loss         | 7.44e+05 |
| Running Env Steps   | 70000    |
| Running Forward KL  | 32.1     |
| Running Reverse KL  | 12.4     |
| Running Update Time | 14       |
----------------------------------
--2024-08-11 06:43:00.602371 UTC--
| Itration            | 15       |
| PAGAR Loss          | -13.7    |
| Real Det Return     | 2.39     |
| Real Sto Return     | -37.6    |
| Reward Loss         | 5.28e+05 |
| Running Env Steps   | 75000    |
| Running Forward KL  | 32.4     |
| Running Reverse KL  | 12.9     |
| Running Update Time | 15       |
----------------------------------
--2024-08-11 06:45:31.981000 UTC--
| Itration            | 16       |
| PAGAR Loss          | 14.2     |
| Real Det Return     | -0.87    |
| Real Sto Return     | -43.3    |
| Reward Loss         | 3.64e+05 |
| Running Env Steps   | 80000    |
| Running Forward KL  | 32.3     |
| Running Reverse KL  | 12.9     |
| Running Update Time | 16       |
----------------------------------
--2024-08-11 06:48:00.384411 UTC--
| Itration            | 17       |
| PAGAR Loss          | -39      |
| Real Det Return     | 3.68     |
| Real Sto Return     | -47.5    |
| Reward Loss         | 1.59e+05 |
| Running Env Steps   | 85000    |
| Running Forward KL  | 32.2     |
| Running Reverse KL  | 12.6     |
| Running Update Time | 17       |
----------------------------------
--2024-08-11 06:50:31.343453 UTC---
| Itration            | 18        |
| PAGAR Loss          | 24.7      |
| Real Det Return     | 3.93      |
| Real Sto Return     | -35.7     |
| Reward Loss         | -2.99e+04 |
| Running Env Steps   | 90000     |
| Running Forward KL  | 32.2      |
| Running Reverse KL  | 12.5      |
| Running Update Time | 18        |
-----------------------------------
--2024-08-11 06:53:05.661633 UTC---
| Itration            | 19        |
| PAGAR Loss          | 4.97      |
| Real Det Return     | 4.79      |
| Real Sto Return     | -19.9     |
| Reward Loss         | -2.13e+05 |
| Running Env Steps   | 95000     |
| Running Forward KL  | 32.2      |
| Running Reverse KL  | 12.6      |
| Running Update Time | 19        |
-----------------------------------
--2024-08-11 06:55:36.733516 UTC---
| Itration            | 20        |
| PAGAR Loss          | 93.3      |
| Real Det Return     | 3.75      |
| Real Sto Return     | -50.8     |
| Reward Loss         | -3.38e+05 |
| Running Env Steps   | 100000    |
| Running Forward KL  | 31.9      |
| Running Reverse KL  | 12.1      |
| Running Update Time | 20        |
-----------------------------------
--2024-08-11 06:58:07.690921 UTC---
| Itration            | 21        |
| PAGAR Loss          | -30.7     |
| Real Det Return     | 3.83      |
| Real Sto Return     | -31.2     |
| Reward Loss         | -5.43e+05 |
| Running Env Steps   | 105000    |
| Running Forward KL  | 32.4      |
| Running Reverse KL  | 12.8      |
| Running Update Time | 21        |
-----------------------------------
--2024-08-11 07:00:38.321439 UTC---
| Itration            | 22        |
| PAGAR Loss          | -12.4     |
| Real Det Return     | 1.92      |
| Real Sto Return     | -31.1     |
| Reward Loss         | -6.97e+05 |
| Running Env Steps   | 110000    |
| Running Forward KL  | 32.3      |
| Running Reverse KL  | 12.6      |
| Running Update Time | 22        |
-----------------------------------
--2024-08-11 07:03:09.242707 UTC--
| Itration            | 23       |
| PAGAR Loss          | 4.28     |
| Real Det Return     | 3.41     |
| Real Sto Return     | -30.8    |
| Reward Loss         | -8.8e+05 |
| Running Env Steps   | 115000   |
| Running Forward KL  | 32.1     |
| Running Reverse KL  | 12.1     |
| Running Update Time | 23       |
----------------------------------
--2024-08-11 07:05:41.268561 UTC---
| Itration            | 24        |
| PAGAR Loss          | 18.4      |
| Real Det Return     | 4.73      |
| Real Sto Return     | -38.6     |
| Reward Loss         | -1.09e+06 |
| Running Env Steps   | 120000    |
| Running Forward KL  | 32.2      |
| Running Reverse KL  | 12.7      |
| Running Update Time | 24        |
-----------------------------------
--2024-08-11 07:08:11.150462 UTC---
| Itration            | 25        |
| PAGAR Loss          | 16.3      |
| Real Det Return     | 4.06      |
| Real Sto Return     | -35.7     |
| Reward Loss         | -1.23e+06 |
| Running Env Steps   | 125000    |
| Running Forward KL  | 32        |
| Running Reverse KL  | 12.1      |
| Running Update Time | 25        |
-----------------------------------
--2024-08-11 07:10:43.485346 UTC---
| Itration            | 26        |
| PAGAR Loss          | 20.2      |
| Real Det Return     | 4.24      |
| Real Sto Return     | -33       |
| Reward Loss         | -1.41e+06 |
| Running Env Steps   | 130000    |
| Running Forward KL  | 32.2      |
| Running Reverse KL  | 12.6      |
| Running Update Time | 26        |
-----------------------------------
--2024-08-11 07:13:14.233743 UTC--
| Itration            | 27       |
| PAGAR Loss          | 6.85     |
| Real Det Return     | 4.24     |
| Real Sto Return     | -30.6    |
| Reward Loss         | -1.6e+06 |
| Running Env Steps   | 135000   |
| Running Forward KL  | 32.1     |
| Running Reverse KL  | 12.6     |
| Running Update Time | 27       |
----------------------------------
--2024-08-11 07:15:46.052762 UTC---
| Itration            | 28        |
| PAGAR Loss          | -6.07     |
| Real Det Return     | 3.97      |
| Real Sto Return     | -26.1     |
| Reward Loss         | -1.75e+06 |
| Running Env Steps   | 140000    |
| Running Forward KL  | 32.1      |
| Running Reverse KL  | 12.3      |
| Running Update Time | 28        |
-----------------------------------
--2024-08-11 07:18:18.383337 UTC---
| Itration            | 29        |
| PAGAR Loss          | 40.6      |
| Real Det Return     | 5.45      |
| Real Sto Return     | -37       |
| Reward Loss         | -1.96e+06 |
| Running Env Steps   | 145000    |
| Running Forward KL  | 31.9      |
| Running Reverse KL  | 12.1      |
| Running Update Time | 29        |
-----------------------------------
--2024-08-11 07:20:48.241100 UTC---
| Itration            | 30        |
| PAGAR Loss          | 46.8      |
| Real Det Return     | 3.6       |
| Real Sto Return     | -30.1     |
| Reward Loss         | -2.14e+06 |
| Running Env Steps   | 150000    |
| Running Forward KL  | 32.2      |
| Running Reverse KL  | 12.6      |
| Running Update Time | 30        |
-----------------------------------
--2024-08-11 07:23:19.431334 UTC---
| Itration            | 31        |
| PAGAR Loss          | 42.1      |
| Real Det Return     | 2.52      |
| Real Sto Return     | -30.5     |
| Reward Loss         | -2.29e+06 |
| Running Env Steps   | 155000    |
| Running Forward KL  | 31.9      |
| Running Reverse KL  | 12        |
| Running Update Time | 31        |
-----------------------------------
--2024-08-11 07:25:51.336592 UTC---
| Itration            | 32        |
| PAGAR Loss          | 33.6      |
| Real Det Return     | 5.61      |
| Real Sto Return     | -45.8     |
| Reward Loss         | -2.45e+06 |
| Running Env Steps   | 160000    |
| Running Forward KL  | 32.1      |
| Running Reverse KL  | 12.5      |
| Running Update Time | 32        |
-----------------------------------
--2024-08-11 07:28:22.466807 UTC--
| Itration            | 33       |
| PAGAR Loss          | -8.13    |
| Real Det Return     | 4.88     |
| Real Sto Return     | -37      |
| Reward Loss         | -2.7e+06 |
| Running Env Steps   | 165000   |
| Running Forward KL  | 32       |
| Running Reverse KL  | 12       |
| Running Update Time | 33       |
----------------------------------
--2024-08-11 07:30:53.704304 UTC---
| Itration            | 34        |
| PAGAR Loss          | 11.4      |
| Real Det Return     | 5.94      |
| Real Sto Return     | -21.3     |
| Reward Loss         | -2.83e+06 |
| Running Env Steps   | 170000    |
| Running Forward KL  | 32        |
| Running Reverse KL  | 12.2      |
| Running Update Time | 34        |
-----------------------------------
--2024-08-11 07:33:24.853138 UTC---
| Itration            | 35        |
| PAGAR Loss          | -18       |
| Real Det Return     | 5.4       |
| Real Sto Return     | -19.5     |
| Reward Loss         | -3.02e+06 |
| Running Env Steps   | 175000    |
| Running Forward KL  | 31.9      |
| Running Reverse KL  | 12.3      |
| Running Update Time | 35        |
-----------------------------------
--2024-08-11 07:35:55.853074 UTC---
| Itration            | 36        |
| PAGAR Loss          | -9.13     |
| Real Det Return     | 4.68      |
| Real Sto Return     | -30.2     |
| Reward Loss         | -3.25e+06 |
| Running Env Steps   | 180000    |
| Running Forward KL  | 31.9      |
| Running Reverse KL  | 12.1      |
| Running Update Time | 36        |
-----------------------------------
--2024-08-11 07:38:27.977770 UTC---
| Itration            | 37        |
| PAGAR Loss          | -41.8     |
| Real Det Return     | 5.31      |
| Real Sto Return     | -24.5     |
| Reward Loss         | -3.32e+06 |
| Running Env Steps   | 185000    |
| Running Forward KL  | 32.1      |
| Running Reverse KL  | 12.4      |
| Running Update Time | 37        |
-----------------------------------
--2024-08-11 07:40:57.292195 UTC---
| Itration            | 38        |
| PAGAR Loss          | -10       |
| Real Det Return     | 5.21      |
| Real Sto Return     | -22.2     |
| Reward Loss         | -3.49e+06 |
| Running Env Steps   | 190000    |
| Running Forward KL  | 32.1      |
| Running Reverse KL  | 12.5      |
| Running Update Time | 38        |
-----------------------------------
--2024-08-11 07:43:27.925296 UTC---
| Itration            | 39        |
| PAGAR Loss          | 20.3      |
| Real Det Return     | 5.39      |
| Real Sto Return     | -47       |
| Reward Loss         | -3.66e+06 |
| Running Env Steps   | 195000    |
| Running Forward KL  | 32        |
| Running Reverse KL  | 12.2      |
| Running Update Time | 39        |
-----------------------------------
--2024-08-11 07:45:58.328999 UTC---
| Itration            | 40        |
| PAGAR Loss          | 53.8      |
| Real Det Return     | 4.51      |
| Real Sto Return     | -30.4     |
| Reward Loss         | -3.79e+06 |
| Running Env Steps   | 200000    |
| Running Forward KL  | 31.7      |
| Running Reverse KL  | 11.6      |
| Running Update Time | 40        |
-----------------------------------
--2024-08-11 07:48:28.702954 UTC---
| Itration            | 41        |
| PAGAR Loss          | -14.2     |
| Real Det Return     | 6.08      |
| Real Sto Return     | -34.3     |
| Reward Loss         | -4.14e+06 |
| Running Env Steps   | 205000    |
| Running Forward KL  | 32        |
| Running Reverse KL  | 12.2      |
| Running Update Time | 41        |
-----------------------------------
--2024-08-11 07:50:59.208550 UTC---
| Itration            | 42        |
| PAGAR Loss          | -32.6     |
| Real Det Return     | 6.04      |
| Real Sto Return     | -37.8     |
| Reward Loss         | -4.17e+06 |
| Running Env Steps   | 210000    |
| Running Forward KL  | 31.8      |
| Running Reverse KL  | 12        |
| Running Update Time | 42        |
-----------------------------------
--2024-08-11 07:53:27.242175 UTC---
| Itration            | 43        |
| PAGAR Loss          | 25        |
| Real Det Return     | 5.74      |
| Real Sto Return     | -32       |
| Reward Loss         | -4.34e+06 |
| Running Env Steps   | 215000    |
| Running Forward KL  | 31.6      |
| Running Reverse KL  | 11.6      |
| Running Update Time | 43        |
-----------------------------------
--2024-08-11 07:55:57.192220 UTC---
| Itration            | 44        |
| PAGAR Loss          | 51.5      |
| Real Det Return     | 6.37      |
| Real Sto Return     | -24.7     |
| Reward Loss         | -4.53e+06 |
| Running Env Steps   | 220000    |
| Running Forward KL  | 31.8      |
| Running Reverse KL  | 12        |
| Running Update Time | 44        |
-----------------------------------
--2024-08-11 07:58:27.866698 UTC---
| Itration            | 45        |
| PAGAR Loss          | 10.1      |
| Real Det Return     | 5.05      |
| Real Sto Return     | -17.6     |
| Reward Loss         | -4.72e+06 |
| Running Env Steps   | 225000    |
| Running Forward KL  | 31.9      |
| Running Reverse KL  | 12.2      |
| Running Update Time | 45        |
-----------------------------------
--2024-08-11 08:00:57.994784 UTC--
| Itration            | 46       |
| PAGAR Loss          | 4.69     |
| Real Det Return     | 4.25     |
| Real Sto Return     | -44.9    |
| Reward Loss         | -4.9e+06 |
| Running Env Steps   | 230000   |
| Running Forward KL  | 31.7     |
| Running Reverse KL  | 11.5     |
| Running Update Time | 46       |
----------------------------------
--2024-08-11 08:03:29.006621 UTC---
| Itration            | 47        |
| PAGAR Loss          | 30.6      |
| Real Det Return     | 5.06      |
| Real Sto Return     | -17.3     |
| Reward Loss         | -5.06e+06 |
| Running Env Steps   | 235000    |
| Running Forward KL  | 31.9      |
| Running Reverse KL  | 12        |
| Running Update Time | 47        |
-----------------------------------
--2024-08-11 08:05:56.497019 UTC--
| Itration            | 48       |
| PAGAR Loss          | 63.7     |
| Real Det Return     | 5.43     |
| Real Sto Return     | -29.3    |
| Reward Loss         | -5.3e+06 |
| Running Env Steps   | 240000   |
| Running Forward KL  | 32       |
| Running Reverse KL  | 12.3     |
| Running Update Time | 48       |
----------------------------------
--2024-08-11 08:08:26.395452 UTC---
| Itration            | 49        |
| PAGAR Loss          | -147      |
| Real Det Return     | 5.27      |
| Real Sto Return     | -57.8     |
| Reward Loss         | -5.54e+06 |
| Running Env Steps   | 245000    |
| Running Forward KL  | 31.5      |
| Running Reverse KL  | 11.5      |
| Running Update Time | 49        |
-----------------------------------
--2024-08-11 08:10:58.081685 UTC---
| Itration            | 50        |
| PAGAR Loss          | 103       |
| Real Det Return     | 5.55      |
| Real Sto Return     | -40.3     |
| Reward Loss         | -5.71e+06 |
| Running Env Steps   | 250000    |
| Running Forward KL  | 31.8      |
| Running Reverse KL  | 11.9      |
| Running Update Time | 50        |
-----------------------------------
--2024-08-11 08:13:28.833318 UTC---
| Itration            | 51        |
| PAGAR Loss          | -75.7     |
| Real Det Return     | 6.11      |
| Real Sto Return     | -36.6     |
| Reward Loss         | -5.88e+06 |
| Running Env Steps   | 255000    |
| Running Forward KL  | 31.8      |
| Running Reverse KL  | 12        |
| Running Update Time | 51        |
-----------------------------------
--2024-08-11 08:16:01.599406 UTC---
| Itration            | 52        |
| PAGAR Loss          | 15.4      |
| Real Det Return     | 3.85      |
| Real Sto Return     | -11.8     |
| Reward Loss         | -5.89e+06 |
| Running Env Steps   | 260000    |
| Running Forward KL  | 31.8      |
| Running Reverse KL  | 11.9      |
| Running Update Time | 52        |
-----------------------------------
--2024-08-11 08:18:30.065665 UTC---
| Itration            | 53        |
| PAGAR Loss          | 11.3      |
| Real Det Return     | 5.84      |
| Real Sto Return     | -14.1     |
| Reward Loss         | -6.08e+06 |
| Running Env Steps   | 265000    |
| Running Forward KL  | 31.9      |
| Running Reverse KL  | 12.1      |
| Running Update Time | 53        |
-----------------------------------
--2024-08-11 08:21:01.086675 UTC---
| Itration            | 54        |
| PAGAR Loss          | 1.37      |
| Real Det Return     | 4.9       |
| Real Sto Return     | -15.8     |
| Reward Loss         | -6.29e+06 |
| Running Env Steps   | 270000    |
| Running Forward KL  | 31.6      |
| Running Reverse KL  | 11.3      |
| Running Update Time | 54        |
-----------------------------------
--2024-08-11 08:23:32.845094 UTC---
| Itration            | 55        |
| PAGAR Loss          | 68.1      |
| Real Det Return     | 6.54      |
| Real Sto Return     | -41.5     |
| Reward Loss         | -6.63e+06 |
| Running Env Steps   | 275000    |
| Running Forward KL  | 31.6      |
| Running Reverse KL  | 11.9      |
| Running Update Time | 55        |
-----------------------------------
--2024-08-11 08:26:04.469958 UTC---
| Itration            | 56        |
| PAGAR Loss          | -16.2     |
| Real Det Return     | 5.1       |
| Real Sto Return     | -33.5     |
| Reward Loss         | -6.59e+06 |
| Running Env Steps   | 280000    |
| Running Forward KL  | 31.5      |
| Running Reverse KL  | 11.2      |
| Running Update Time | 56        |
-----------------------------------
--2024-08-11 08:28:36.670209 UTC---
| Itration            | 57        |
| PAGAR Loss          | 63.8      |
| Real Det Return     | 6.15      |
| Real Sto Return     | -44.9     |
| Reward Loss         | -6.78e+06 |
| Running Env Steps   | 285000    |
| Running Forward KL  | 31.6      |
| Running Reverse KL  | 11.7      |
| Running Update Time | 57        |
-----------------------------------
--2024-08-11 08:31:06.219964 UTC---
| Itration            | 58        |
| PAGAR Loss          | -81.4     |
| Real Det Return     | 5.65      |
| Real Sto Return     | -13.1     |
| Reward Loss         | -6.94e+06 |
| Running Env Steps   | 290000    |
| Running Forward KL  | 31.3      |
| Running Reverse KL  | 11.3      |
| Running Update Time | 58        |
-----------------------------------
--2024-08-11 08:33:37.387864 UTC---
| Itration            | 59        |
| PAGAR Loss          | 189       |
| Real Det Return     | 6.23      |
| Real Sto Return     | -19.8     |
| Reward Loss         | -7.02e+06 |
| Running Env Steps   | 295000    |
| Running Forward KL  | 31.1      |
| Running Reverse KL  | 10.9      |
| Running Update Time | 59        |
-----------------------------------
--2024-08-11 08:36:08.702850 UTC---
| Itration            | 60        |
| PAGAR Loss          | 52.1      |
| Real Det Return     | 6.02      |
| Real Sto Return     | -47       |
| Reward Loss         | -7.28e+06 |
| Running Env Steps   | 300000    |
| Running Forward KL  | 31.2      |
| Running Reverse KL  | 11.4      |
| Running Update Time | 60        |
-----------------------------------
--2024-08-11 08:38:39.588118 UTC---
| Itration            | 61        |
| PAGAR Loss          | 532       |
| Real Det Return     | 5.38      |
| Real Sto Return     | -62.5     |
| Reward Loss         | -7.34e+06 |
| Running Env Steps   | 305000    |
| Running Forward KL  | 30.7      |
| Running Reverse KL  | 10.8      |
| Running Update Time | 61        |
-----------------------------------
--2024-08-11 08:41:12.280221 UTC---
| Itration            | 62        |
| PAGAR Loss          | 765       |
| Real Det Return     | 6.09      |
| Real Sto Return     | -39.3     |
| Reward Loss         | -7.58e+06 |
| Running Env Steps   | 310000    |
| Running Forward KL  | 30.7      |
| Running Reverse KL  | 10.7      |
| Running Update Time | 62        |
-----------------------------------
--2024-08-11 08:43:42.423789 UTC---
| Itration            | 63        |
| PAGAR Loss          | 1.23e+03  |
| Real Det Return     | 5.75      |
| Real Sto Return     | -19.4     |
| Reward Loss         | -7.49e+06 |
| Running Env Steps   | 315000    |
| Running Forward KL  | 30.3      |
| Running Reverse KL  | 10.2      |
| Running Update Time | 63        |
-----------------------------------
--2024-08-11 08:46:13.500143 UTC---
| Itration            | 64        |
| PAGAR Loss          | -602      |
| Real Det Return     | 5.18      |
| Real Sto Return     | -21.5     |
| Reward Loss         | -7.17e+06 |
| Running Env Steps   | 320000    |
| Running Forward KL  | 30.2      |
| Running Reverse KL  | 9.77      |
| Running Update Time | 64        |
-----------------------------------
--2024-08-11 08:48:43.457254 UTC---
| Itration            | 65        |
| PAGAR Loss          | -5.08     |
| Real Det Return     | 4.99      |
| Real Sto Return     | -16       |
| Reward Loss         | -7.09e+06 |
| Running Env Steps   | 325000    |
| Running Forward KL  | 30        |
| Running Reverse KL  | 9.61      |
| Running Update Time | 65        |
-----------------------------------
--2024-08-11 08:51:13.568219 UTC---
| Itration            | 66        |
| PAGAR Loss          | -142      |
| Real Det Return     | 3.13      |
| Real Sto Return     | -5.1      |
| Reward Loss         | -6.68e+06 |
| Running Env Steps   | 330000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10        |
| Running Update Time | 66        |
-----------------------------------
--2024-08-11 08:53:43.642382 UTC--
| Itration            | 67       |
| PAGAR Loss          | -442     |
| Real Det Return     | 133      |
| Real Sto Return     | -35.1    |
| Reward Loss         | -6.5e+06 |
| Running Env Steps   | 335000   |
| Running Forward KL  | 30       |
| Running Reverse KL  | 10.6     |
| Running Update Time | 67       |
----------------------------------
--2024-08-11 08:56:13.059191 UTC---
| Itration            | 68        |
| PAGAR Loss          | 2.43e+05  |
| Real Det Return     | 153       |
| Real Sto Return     | -40       |
| Reward Loss         | -6.81e+06 |
| Running Env Steps   | 340000    |
| Running Forward KL  | 30.1      |
| Running Reverse KL  | 10.2      |
| Running Update Time | 68        |
-----------------------------------
--2024-08-11 08:58:42.703181 UTC---
| Itration            | 69        |
| PAGAR Loss          | nan       |
| Real Det Return     | 3.4       |
| Real Sto Return     | 34        |
| Reward Loss         | -8.97e+06 |
| Running Env Steps   | 345000    |
| Running Forward KL  | 30.6      |
| Running Reverse KL  | 11.4      |
| Running Update Time | 69        |
-----------------------------------
--2024-08-11 09:01:16.102902 UTC---
| Itration            | 70        |
| PAGAR Loss          | -489      |
| Real Det Return     | 158       |
| Real Sto Return     | -33.3     |
| Reward Loss         | -6.92e+06 |
| Running Env Steps   | 350000    |
| Running Forward KL  | 30        |
| Running Reverse KL  | 10.3      |
| Running Update Time | 70        |
-----------------------------------
--2024-08-11 09:03:46.127340 UTC---
| Itration            | 71        |
| PAGAR Loss          | 2.7e+03   |
| Real Det Return     | 134       |
| Real Sto Return     | -32.4     |
| Reward Loss         | -6.85e+06 |
| Running Env Steps   | 355000    |
| Running Forward KL  | 30        |
| Running Reverse KL  | 10.6      |
| Running Update Time | 71        |
-----------------------------------
--2024-08-11 09:06:16.773562 UTC---
| Itration            | 72        |
| PAGAR Loss          | -506      |
| Real Det Return     | 155       |
| Real Sto Return     | -47       |
| Reward Loss         | -7.34e+06 |
| Running Env Steps   | 360000    |
| Running Forward KL  | 30        |
| Running Reverse KL  | 10.6      |
| Running Update Time | 72        |
-----------------------------------
--2024-08-11 09:08:46.720220 UTC---
| Itration            | 73        |
| PAGAR Loss          | -6.37e+05 |
| Real Det Return     | 210       |
| Real Sto Return     | -23.3     |
| Reward Loss         | -7.48e+06 |
| Running Env Steps   | 365000    |
| Running Forward KL  | 29.4      |
| Running Reverse KL  | 10.1      |
| Running Update Time | 73        |
-----------------------------------
--2024-08-11 09:11:15.394289 UTC---
| Itration            | 74        |
| PAGAR Loss          | -1.42e+03 |
| Real Det Return     | 172       |
| Real Sto Return     | -13.2     |
| Reward Loss         | -7.19e+06 |
| Running Env Steps   | 370000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.7      |
| Running Update Time | 74        |
-----------------------------------
--2024-08-11 09:13:47.291634 UTC---
| Itration            | 75        |
| PAGAR Loss          | 1.84e+03  |
| Real Det Return     | 11.5      |
| Real Sto Return     | -8.26     |
| Reward Loss         | -7.68e+06 |
| Running Env Steps   | 375000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.1      |
| Running Update Time | 75        |
-----------------------------------
--2024-08-11 09:16:16.804770 UTC---
| Itration            | 76        |
| PAGAR Loss          | 836       |
| Real Det Return     | 198       |
| Real Sto Return     | -27.8     |
| Reward Loss         | -7.37e+06 |
| Running Env Steps   | 380000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.7      |
| Running Update Time | 76        |
-----------------------------------
--2024-08-11 09:18:48.174226 UTC---
| Itration            | 77        |
| PAGAR Loss          | -448      |
| Real Det Return     | 192       |
| Real Sto Return     | -55       |
| Reward Loss         | -7.93e+06 |
| Running Env Steps   | 385000    |
| Running Forward KL  | 29.7      |
| Running Reverse KL  | 10.2      |
| Running Update Time | 77        |
-----------------------------------
--2024-08-11 09:21:18.263977 UTC---
| Itration            | 78        |
| PAGAR Loss          | -247      |
| Real Det Return     | 209       |
| Real Sto Return     | -19.1     |
| Reward Loss         | -7.65e+06 |
| Running Env Steps   | 390000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.7      |
| Running Update Time | 78        |
-----------------------------------
--2024-08-11 09:23:48.443156 UTC---
| Itration            | 79        |
| PAGAR Loss          | -1.77e+03 |
| Real Det Return     | 154       |
| Real Sto Return     | -16.9     |
| Reward Loss         | -7.91e+06 |
| Running Env Steps   | 395000    |
| Running Forward KL  | 29.8      |
| Running Reverse KL  | 10.3      |
| Running Update Time | 79        |
-----------------------------------
--2024-08-11 09:26:20.497063 UTC--
| Itration            | 80       |
| PAGAR Loss          | 1.05e+03 |
| Real Det Return     | 253      |
| Real Sto Return     | -6.88    |
| Reward Loss         | -8.1e+06 |
| Running Env Steps   | 400000   |
| Running Forward KL  | 29.8     |
| Running Reverse KL  | 10.7     |
| Running Update Time | 80       |
----------------------------------
--2024-08-11 09:28:50.871383 UTC---
| Itration            | 81        |
| PAGAR Loss          | -1.16e+03 |
| Real Det Return     | 178       |
| Real Sto Return     | -30       |
| Reward Loss         | -8.09e+06 |
| Running Env Steps   | 405000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.7      |
| Running Update Time | 81        |
-----------------------------------
--2024-08-11 09:31:19.702392 UTC---
| Itration            | 82        |
| PAGAR Loss          | -1.27e+03 |
| Real Det Return     | 193       |
| Real Sto Return     | -33.8     |
| Reward Loss         | -8.16e+06 |
| Running Env Steps   | 410000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.7      |
| Running Update Time | 82        |
-----------------------------------
--2024-08-11 09:33:48.347705 UTC---
| Itration            | 83        |
| PAGAR Loss          | 1.31e+03  |
| Real Det Return     | 232       |
| Real Sto Return     | -18.8     |
| Reward Loss         | -8.34e+06 |
| Running Env Steps   | 415000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.7      |
| Running Update Time | 83        |
-----------------------------------
--2024-08-11 09:36:16.422309 UTC---
| Itration            | 84        |
| PAGAR Loss          | -859      |
| Real Det Return     | 221       |
| Real Sto Return     | -13.6     |
| Reward Loss         | -8.32e+06 |
| Running Env Steps   | 420000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.8      |
| Running Update Time | 84        |
-----------------------------------
--2024-08-11 09:38:46.693483 UTC---
| Itration            | 85        |
| PAGAR Loss          | -1.41e+03 |
| Real Det Return     | 190       |
| Real Sto Return     | -37       |
| Reward Loss         | -8.6e+06  |
| Running Env Steps   | 425000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.7      |
| Running Update Time | 85        |
-----------------------------------
--2024-08-11 09:41:18.083314 UTC---
| Itration            | 86        |
| PAGAR Loss          | -1.27e+03 |
| Real Det Return     | 194       |
| Real Sto Return     | -46.6     |
| Reward Loss         | -9.12e+06 |
| Running Env Steps   | 430000    |
| Running Forward KL  | 30        |
| Running Reverse KL  | 10.6      |
| Running Update Time | 86        |
-----------------------------------
--2024-08-11 09:43:48.739403 UTC---
| Itration            | 87        |
| PAGAR Loss          | -2.02e+03 |
| Real Det Return     | 214       |
| Real Sto Return     | -20.7     |
| Reward Loss         | -9.33e+06 |
| Running Env Steps   | 435000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 87        |
-----------------------------------
--2024-08-11 09:46:16.965074 UTC---
| Itration            | 88        |
| PAGAR Loss          | -1.71e+03 |
| Real Det Return     | 206       |
| Real Sto Return     | -17.4     |
| Reward Loss         | -8.85e+06 |
| Running Env Steps   | 440000    |
| Running Forward KL  | 29.8      |
| Running Reverse KL  | 10.7      |
| Running Update Time | 88        |
-----------------------------------
--2024-08-11 09:48:49.937973 UTC---
| Itration            | 89        |
| PAGAR Loss          | 3.54e+03  |
| Real Det Return     | 191       |
| Real Sto Return     | -26.7     |
| Reward Loss         | -9.37e+06 |
| Running Env Steps   | 445000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.8      |
| Running Update Time | 89        |
-----------------------------------
--2024-08-11 09:51:22.882027 UTC---
| Itration            | 90        |
| PAGAR Loss          | 772       |
| Real Det Return     | 227       |
| Real Sto Return     | -24.3     |
| Reward Loss         | -9.81e+06 |
| Running Env Steps   | 450000    |
| Running Forward KL  | 30        |
| Running Reverse KL  | 10.5      |
| Running Update Time | 90        |
-----------------------------------
--2024-08-11 09:53:52.928951 UTC---
| Itration            | 91        |
| PAGAR Loss          | 1.25e+03  |
| Real Det Return     | 221       |
| Real Sto Return     | -11.8     |
| Reward Loss         | -9.57e+06 |
| Running Env Steps   | 455000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 91        |
-----------------------------------
--2024-08-11 09:56:23.302362 UTC---
| Itration            | 92        |
| PAGAR Loss          | 645       |
| Real Det Return     | 220       |
| Real Sto Return     | -10.8     |
| Reward Loss         | -9.59e+06 |
| Running Env Steps   | 460000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.8      |
| Running Update Time | 92        |
-----------------------------------
--2024-08-11 09:58:51.897322 UTC---
| Itration            | 93        |
| PAGAR Loss          | -644      |
| Real Det Return     | 235       |
| Real Sto Return     | -27.6     |
| Reward Loss         | -9.78e+06 |
| Running Env Steps   | 465000    |
| Running Forward KL  | 30        |
| Running Reverse KL  | 10.5      |
| Running Update Time | 93        |
-----------------------------------
--2024-08-11 10:01:20.434449 UTC---
| Itration            | 94        |
| PAGAR Loss          | 2.18e+03  |
| Real Det Return     | 264       |
| Real Sto Return     | 1.49      |
| Reward Loss         | -9.76e+06 |
| Running Env Steps   | 470000    |
| Running Forward KL  | 29.8      |
| Running Reverse KL  | 10.7      |
| Running Update Time | 94        |
-----------------------------------
--2024-08-11 10:03:51.472361 UTC---
| Itration            | 95        |
| PAGAR Loss          | 2.27e+03  |
| Real Det Return     | 217       |
| Real Sto Return     | -30.1     |
| Reward Loss         | -1.04e+07 |
| Running Env Steps   | 475000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 95        |
-----------------------------------
--2024-08-11 10:06:23.298818 UTC---
| Itration            | 96        |
| PAGAR Loss          | 89.8      |
| Real Det Return     | 237       |
| Real Sto Return     | -12.9     |
| Reward Loss         | -1.07e+07 |
| Running Env Steps   | 480000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 96        |
-----------------------------------
--2024-08-11 10:08:55.290457 UTC---
| Itration            | 97        |
| PAGAR Loss          | 774       |
| Real Det Return     | 292       |
| Real Sto Return     | 2.56      |
| Reward Loss         | -1.03e+07 |
| Running Env Steps   | 485000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 97        |
-----------------------------------
--2024-08-11 10:11:23.027110 UTC--
| Itration            | 98       |
| PAGAR Loss          | 868      |
| Real Det Return     | 271      |
| Real Sto Return     | -8.23    |
| Reward Loss         | -1.1e+07 |
| Running Env Steps   | 490000   |
| Running Forward KL  | 29.9     |
| Running Reverse KL  | 10.5     |
| Running Update Time | 98       |
----------------------------------
--2024-08-11 10:13:52.004946 UTC---
| Itration            | 99        |
| PAGAR Loss          | 644       |
| Real Det Return     | 243       |
| Real Sto Return     | -10.4     |
| Reward Loss         | -1.09e+07 |
| Running Env Steps   | 495000    |
| Running Forward KL  | 29.8      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 99        |
-----------------------------------
--2024-08-11 10:16:22.501419 UTC---
| Itration            | 100       |
| PAGAR Loss          | 1.71e+03  |
| Real Det Return     | 251       |
| Real Sto Return     | -7.15     |
| Reward Loss         | -1.07e+07 |
| Running Env Steps   | 500000    |
| Running Forward KL  | 29.8      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 100       |
-----------------------------------
--2024-08-11 10:18:52.678397 UTC---
| Itration            | 101       |
| PAGAR Loss          | 1.1e+03   |
| Real Det Return     | 305       |
| Real Sto Return     | 11.8      |
| Reward Loss         | -1.13e+07 |
| Running Env Steps   | 505000    |
| Running Forward KL  | 29.8      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 101       |
-----------------------------------
--2024-08-11 10:21:23.850107 UTC---
| Itration            | 102       |
| PAGAR Loss          | 1.15e+03  |
| Real Det Return     | 239       |
| Real Sto Return     | -18       |
| Reward Loss         | -1.12e+07 |
| Running Env Steps   | 510000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 102       |
-----------------------------------
--2024-08-11 10:23:51.586078 UTC---
| Itration            | 103       |
| PAGAR Loss          | -1.48e+03 |
| Real Det Return     | 255       |
| Real Sto Return     | -1.53     |
| Reward Loss         | -1.12e+07 |
| Running Env Steps   | 515000    |
| Running Forward KL  | 29.8      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 103       |
-----------------------------------
--2024-08-11 10:26:22.857425 UTC---
| Itration            | 104       |
| PAGAR Loss          | -739      |
| Real Det Return     | 269       |
| Real Sto Return     | -3.7      |
| Reward Loss         | -1.13e+07 |
| Running Env Steps   | 520000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 104       |
-----------------------------------
--2024-08-11 10:28:53.211972 UTC---
| Itration            | 105       |
| PAGAR Loss          | 2.3e+03   |
| Real Det Return     | 276       |
| Real Sto Return     | -9        |
| Reward Loss         | -1.17e+07 |
| Running Env Steps   | 525000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.4      |
| Running Update Time | 105       |
-----------------------------------
--2024-08-11 10:31:23.713296 UTC---
| Itration            | 106       |
| PAGAR Loss          | 210       |
| Real Det Return     | 230       |
| Real Sto Return     | -24.3     |
| Reward Loss         | -1.21e+07 |
| Running Env Steps   | 530000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 106       |
-----------------------------------
--2024-08-11 10:33:55.643696 UTC---
| Itration            | 107       |
| PAGAR Loss          | -1.77e+03 |
| Real Det Return     | 245       |
| Real Sto Return     | 4.74      |
| Reward Loss         | -1.17e+07 |
| Running Env Steps   | 535000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.7      |
| Running Update Time | 107       |
-----------------------------------
--2024-08-11 10:36:22.364390 UTC---
| Itration            | 108       |
| PAGAR Loss          | -882      |
| Real Det Return     | 266       |
| Real Sto Return     | -11.2     |
| Reward Loss         | -1.21e+07 |
| Running Env Steps   | 540000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.4      |
| Running Update Time | 108       |
-----------------------------------
--2024-08-11 10:38:52.093090 UTC---
| Itration            | 109       |
| PAGAR Loss          | -718      |
| Real Det Return     | 262       |
| Real Sto Return     | 9.1       |
| Reward Loss         | -1.21e+07 |
| Running Env Steps   | 545000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 109       |
-----------------------------------
--2024-08-11 10:41:22.112456 UTC---
| Itration            | 110       |
| PAGAR Loss          | 1.78e+03  |
| Real Det Return     | 273       |
| Real Sto Return     | 15.1      |
| Reward Loss         | -1.23e+07 |
| Running Env Steps   | 550000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 110       |
-----------------------------------
--2024-08-11 10:43:52.914697 UTC---
| Itration            | 111       |
| PAGAR Loss          | 766       |
| Real Det Return     | 251       |
| Real Sto Return     | 6.38      |
| Reward Loss         | -1.22e+07 |
| Running Env Steps   | 555000    |
| Running Forward KL  | 29.8      |
| Running Reverse KL  | 10.7      |
| Running Update Time | 111       |
-----------------------------------
--2024-08-11 10:46:24.542256 UTC---
| Itration            | 112       |
| PAGAR Loss          | 595       |
| Real Det Return     | 275       |
| Real Sto Return     | 7.38      |
| Reward Loss         | -1.29e+07 |
| Running Env Steps   | 560000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 112       |
-----------------------------------
--2024-08-11 10:48:53.363612 UTC---
| Itration            | 113       |
| PAGAR Loss          | 2.11e+03  |
| Real Det Return     | 284       |
| Real Sto Return     | 7.78      |
| Reward Loss         | -1.28e+07 |
| Running Env Steps   | 565000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 113       |
-----------------------------------
--2024-08-11 10:51:22.724811 UTC---
| Itration            | 114       |
| PAGAR Loss          | 3.45e+03  |
| Real Det Return     | 302       |
| Real Sto Return     | 2.78      |
| Reward Loss         | -1.29e+07 |
| Running Env Steps   | 570000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 114       |
-----------------------------------
--2024-08-11 10:53:52.956153 UTC--
| Itration            | 115      |
| PAGAR Loss          | 776      |
| Real Det Return     | 269      |
| Real Sto Return     | 29.4     |
| Reward Loss         | -1.3e+07 |
| Running Env Steps   | 575000   |
| Running Forward KL  | 29.8     |
| Running Reverse KL  | 10.7     |
| Running Update Time | 115      |
----------------------------------
--2024-08-11 10:56:23.316293 UTC---
| Itration            | 116       |
| PAGAR Loss          | 427       |
| Real Det Return     | 301       |
| Real Sto Return     | 34.5      |
| Reward Loss         | -1.35e+07 |
| Running Env Steps   | 580000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.7      |
| Running Update Time | 116       |
-----------------------------------
--2024-08-11 10:58:54.037034 UTC---
| Itration            | 117       |
| PAGAR Loss          | -933      |
| Real Det Return     | 260       |
| Real Sto Return     | -4.83     |
| Reward Loss         | -1.35e+07 |
| Running Env Steps   | 585000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 117       |
-----------------------------------
--2024-08-11 11:01:21.695657 UTC---
| Itration            | 118       |
| PAGAR Loss          | 606       |
| Real Det Return     | 256       |
| Real Sto Return     | -0.29     |
| Reward Loss         | -1.39e+07 |
| Running Env Steps   | 590000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 118       |
-----------------------------------
--2024-08-11 11:03:50.891214 UTC---
| Itration            | 119       |
| PAGAR Loss          | 4.73e+03  |
| Real Det Return     | 287       |
| Real Sto Return     | 13.2      |
| Reward Loss         | -1.36e+07 |
| Running Env Steps   | 595000    |
| Running Forward KL  | 29.8      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 119       |
-----------------------------------
--2024-08-11 11:06:22.629904 UTC---
| Itration            | 120       |
| PAGAR Loss          | -240      |
| Real Det Return     | 284       |
| Real Sto Return     | 31.7      |
| Reward Loss         | -1.38e+07 |
| Running Env Steps   | 600000    |
| Running Forward KL  | 29.8      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 120       |
-----------------------------------
--2024-08-11 11:08:53.432242 UTC--
| Itration            | 121      |
| PAGAR Loss          | 252      |
| Real Det Return     | 281      |
| Real Sto Return     | 9.34     |
| Reward Loss         | -1.4e+07 |
| Running Env Steps   | 605000   |
| Running Forward KL  | 29.9     |
| Running Reverse KL  | 10.6     |
| Running Update Time | 121      |
----------------------------------
--2024-08-11 11:11:24.927183 UTC---
| Itration            | 122       |
| PAGAR Loss          | 3.22e+03  |
| Real Det Return     | 274       |
| Real Sto Return     | 6.11      |
| Reward Loss         | -1.42e+07 |
| Running Env Steps   | 610000    |
| Running Forward KL  | 29.8      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 122       |
-----------------------------------
--2024-08-11 11:13:53.476091 UTC---
| Itration            | 123       |
| PAGAR Loss          | -41       |
| Real Det Return     | 316       |
| Real Sto Return     | 37.5      |
| Reward Loss         | -1.46e+07 |
| Running Env Steps   | 615000    |
| Running Forward KL  | 29.8      |
| Running Reverse KL  | 10.7      |
| Running Update Time | 123       |
-----------------------------------
--2024-08-11 11:16:21.409659 UTC---
| Itration            | 124       |
| PAGAR Loss          | -1.65e+03 |
| Real Det Return     | 285       |
| Real Sto Return     | 21.7      |
| Reward Loss         | -1.45e+07 |
| Running Env Steps   | 620000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 124       |
-----------------------------------
--2024-08-11 11:18:49.130099 UTC---
| Itration            | 125       |
| PAGAR Loss          | -570      |
| Real Det Return     | 266       |
| Real Sto Return     | -5.49     |
| Reward Loss         | -1.47e+07 |
| Running Env Steps   | 625000    |
| Running Forward KL  | 29.8      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 125       |
-----------------------------------
--2024-08-11 11:21:16.512666 UTC---
| Itration            | 126       |
| PAGAR Loss          | -784      |
| Real Det Return     | 268       |
| Real Sto Return     | 24.1      |
| Reward Loss         | -1.48e+07 |
| Running Env Steps   | 630000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.7      |
| Running Update Time | 126       |
-----------------------------------
--2024-08-11 11:23:47.446620 UTC--
| Itration            | 127      |
| PAGAR Loss          | -337     |
| Real Det Return     | 292      |
| Real Sto Return     | 29.5     |
| Reward Loss         | -1.5e+07 |
| Running Env Steps   | 635000   |
| Running Forward KL  | 29.9     |
| Running Reverse KL  | 10.6     |
| Running Update Time | 127      |
----------------------------------
--2024-08-11 11:26:18.359621 UTC---
| Itration            | 128       |
| PAGAR Loss          | 1.86e+03  |
| Real Det Return     | 303       |
| Real Sto Return     | 42.8      |
| Reward Loss         | -1.51e+07 |
| Running Env Steps   | 640000    |
| Running Forward KL  | 29.8      |
| Running Reverse KL  | 10.7      |
| Running Update Time | 128       |
-----------------------------------
--2024-08-11 11:28:48.300874 UTC---
| Itration            | 129       |
| PAGAR Loss          | -2.03e+03 |
| Real Det Return     | 287       |
| Real Sto Return     | 23.1      |
| Reward Loss         | -1.53e+07 |
| Running Env Steps   | 645000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 129       |
-----------------------------------
--2024-08-11 11:31:16.517065 UTC---
| Itration            | 130       |
| PAGAR Loss          | 2.02e+03  |
| Real Det Return     | 281       |
| Real Sto Return     | 21.7      |
| Reward Loss         | -1.53e+07 |
| Running Env Steps   | 650000    |
| Running Forward KL  | 29.8      |
| Running Reverse KL  | 10.4      |
| Running Update Time | 130       |
-----------------------------------
--2024-08-11 11:33:45.424674 UTC---
| Itration            | 131       |
| PAGAR Loss          | 1.29e+03  |
| Real Det Return     | 281       |
| Real Sto Return     | 22.9      |
| Reward Loss         | -1.63e+07 |
| Running Env Steps   | 655000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 131       |
-----------------------------------
--2024-08-11 11:36:15.820842 UTC---
| Itration            | 132       |
| PAGAR Loss          | -1.79e+03 |
| Real Det Return     | 284       |
| Real Sto Return     | 16.3      |
| Reward Loss         | -1.58e+07 |
| Running Env Steps   | 660000    |
| Running Forward KL  | 29.8      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 132       |
-----------------------------------
--2024-08-11 11:38:45.693062 UTC---
| Itration            | 133       |
| PAGAR Loss          | 760       |
| Real Det Return     | 249       |
| Real Sto Return     | 23        |
| Reward Loss         | -1.59e+07 |
| Running Env Steps   | 665000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 133       |
-----------------------------------
--2024-08-11 11:41:15.529601 UTC--
| Itration            | 134      |
| PAGAR Loss          | 1.78e+03 |
| Real Det Return     | 302      |
| Real Sto Return     | 51.2     |
| Reward Loss         | -1.6e+07 |
| Running Env Steps   | 670000   |
| Running Forward KL  | 29.9     |
| Running Reverse KL  | 10.6     |
| Running Update Time | 134      |
----------------------------------
--2024-08-11 11:43:44.208582 UTC---
| Itration            | 135       |
| PAGAR Loss          | -401      |
| Real Det Return     | 304       |
| Real Sto Return     | 55.7      |
| Reward Loss         | -1.63e+07 |
| Running Env Steps   | 675000    |
| Running Forward KL  | 29.8      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 135       |
-----------------------------------
--2024-08-11 11:46:13.870919 UTC---
| Itration            | 136       |
| PAGAR Loss          | 465       |
| Real Det Return     | 290       |
| Real Sto Return     | 49.8      |
| Reward Loss         | -1.64e+07 |
| Running Env Steps   | 680000    |
| Running Forward KL  | 29.7      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 136       |
-----------------------------------
--2024-08-11 11:48:42.364096 UTC--
| Itration            | 137      |
| PAGAR Loss          | 900      |
| Real Det Return     | 274      |
| Real Sto Return     | 34.1     |
| Reward Loss         | -1.7e+07 |
| Running Env Steps   | 685000   |
| Running Forward KL  | 29.8     |
| Running Reverse KL  | 10.6     |
| Running Update Time | 137      |
----------------------------------
--2024-08-11 11:51:11.372636 UTC---
| Itration            | 138       |
| PAGAR Loss          | 1.09e+03  |
| Real Det Return     | 280       |
| Real Sto Return     | 46.7      |
| Reward Loss         | -1.67e+07 |
| Running Env Steps   | 690000    |
| Running Forward KL  | 29.7      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 138       |
-----------------------------------
--2024-08-11 11:53:42.810682 UTC---
| Itration            | 139       |
| PAGAR Loss          | -1.44e+03 |
| Real Det Return     | 271       |
| Real Sto Return     | 35.5      |
| Reward Loss         | -1.7e+07  |
| Running Env Steps   | 695000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 139       |
-----------------------------------
--2024-08-11 11:56:11.202652 UTC---
| Itration            | 140       |
| PAGAR Loss          | 3.06e+03  |
| Real Det Return     | 263       |
| Real Sto Return     | 42.9      |
| Reward Loss         | -1.72e+07 |
| Running Env Steps   | 700000    |
| Running Forward KL  | 29.8      |
| Running Reverse KL  | 10.2      |
| Running Update Time | 140       |
-----------------------------------
--2024-08-11 11:58:41.457332 UTC---
| Itration            | 141       |
| PAGAR Loss          | 37.7      |
| Real Det Return     | 259       |
| Real Sto Return     | 32.3      |
| Reward Loss         | -1.72e+07 |
| Running Env Steps   | 705000    |
| Running Forward KL  | 29.8      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 141       |
-----------------------------------
--2024-08-11 12:01:12.074632 UTC---
| Itration            | 142       |
| PAGAR Loss          | -270      |
| Real Det Return     | 340       |
| Real Sto Return     | 69.9      |
| Reward Loss         | -1.82e+07 |
| Running Env Steps   | 710000    |
| Running Forward KL  | 29.8      |
| Running Reverse KL  | 10.4      |
| Running Update Time | 142       |
-----------------------------------
--2024-08-11 12:03:42.414735 UTC---
| Itration            | 143       |
| PAGAR Loss          | 3.04e+03  |
| Real Det Return     | 281       |
| Real Sto Return     | 48.7      |
| Reward Loss         | -1.76e+07 |
| Running Env Steps   | 715000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.4      |
| Running Update Time | 143       |
-----------------------------------
--2024-08-11 12:06:13.776552 UTC---
| Itration            | 144       |
| PAGAR Loss          | -546      |
| Real Det Return     | 272       |
| Real Sto Return     | 49.9      |
| Reward Loss         | -1.77e+07 |
| Running Env Steps   | 720000    |
| Running Forward KL  | 29.8      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 144       |
-----------------------------------
--2024-08-11 12:08:43.602411 UTC---
| Itration            | 145       |
| PAGAR Loss          | -28.4     |
| Real Det Return     | 364       |
| Real Sto Return     | 91.5      |
| Reward Loss         | -1.86e+07 |
| Running Env Steps   | 725000    |
| Running Forward KL  | 29.7      |
| Running Reverse KL  | 10.3      |
| Running Update Time | 145       |
-----------------------------------
--2024-08-11 12:11:11.415873 UTC---
| Itration            | 146       |
| PAGAR Loss          | 410       |
| Real Det Return     | 316       |
| Real Sto Return     | 73.7      |
| Reward Loss         | -1.89e+07 |
| Running Env Steps   | 730000    |
| Running Forward KL  | 29.8      |
| Running Reverse KL  | 10.1      |
| Running Update Time | 146       |
-----------------------------------
--2024-08-11 12:13:44.986845 UTC---
| Itration            | 147       |
| PAGAR Loss          | 3.98e+03  |
| Real Det Return     | 304       |
| Real Sto Return     | 94.7      |
| Reward Loss         | -1.91e+07 |
| Running Env Steps   | 735000    |
| Running Forward KL  | 29.8      |
| Running Reverse KL  | 9.99      |
| Running Update Time | 147       |
-----------------------------------
--2024-08-11 12:16:14.957384 UTC---
| Itration            | 148       |
| PAGAR Loss          | -1.85e+03 |
| Real Det Return     | 337       |
| Real Sto Return     | 54.9      |
| Reward Loss         | -1.85e+07 |
| Running Env Steps   | 740000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 10.7      |
| Running Update Time | 148       |
-----------------------------------
--2024-08-11 12:18:45.384709 UTC---
| Itration            | 149       |
| PAGAR Loss          | 1.53e+03  |
| Real Det Return     | 400       |
| Real Sto Return     | 100       |
| Reward Loss         | -1.85e+07 |
| Running Env Steps   | 745000    |
| Running Forward KL  | 29.8      |
| Running Reverse KL  | 10.2      |
| Running Update Time | 149       |
-----------------------------------
--2024-08-11 12:21:17.167447 UTC---
| Itration            | 150       |
| PAGAR Loss          | -1.46e+03 |
| Real Det Return     | 308       |
| Real Sto Return     | 109       |
| Reward Loss         | -1.95e+07 |
| Running Env Steps   | 750000    |
| Running Forward KL  | 29.8      |
| Running Reverse KL  | 10        |
| Running Update Time | 150       |
-----------------------------------
--2024-08-11 12:23:47.597706 UTC---
| Itration            | 151       |
| PAGAR Loss          | 2.46e+03  |
| Real Det Return     | 476       |
| Real Sto Return     | 117       |
| Reward Loss         | -1.94e+07 |
| Running Env Steps   | 755000    |
| Running Forward KL  | 29.7      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 151       |
-----------------------------------
--2024-08-11 12:26:19.179617 UTC---
| Itration            | 152       |
| PAGAR Loss          | -1.45e+03 |
| Real Det Return     | 295       |
| Real Sto Return     | 157       |
| Reward Loss         | -1.98e+07 |
| Running Env Steps   | 760000    |
| Running Forward KL  | 29.7      |
| Running Reverse KL  | 9.95      |
| Running Update Time | 152       |
-----------------------------------
--2024-08-11 12:28:50.681182 UTC---
| Itration            | 153       |
| PAGAR Loss          | 186       |
| Real Det Return     | 295       |
| Real Sto Return     | 114       |
| Reward Loss         | -1.97e+07 |
| Running Env Steps   | 765000    |
| Running Forward KL  | 29.9      |
| Running Reverse KL  | 9.72      |
| Running Update Time | 153       |
-----------------------------------
--2024-08-11 12:31:22.805749 UTC---
| Itration            | 154       |
| PAGAR Loss          | 3.13e+03  |
| Real Det Return     | 296       |
| Real Sto Return     | 145       |
| Reward Loss         | -2.03e+07 |
| Running Env Steps   | 770000    |
| Running Forward KL  | 29.7      |
| Running Reverse KL  | 9.88      |
| Running Update Time | 154       |
-----------------------------------
--2024-08-11 12:33:53.793749 UTC---
| Itration            | 155       |
| PAGAR Loss          | 810       |
| Real Det Return     | 596       |
| Real Sto Return     | 96.6      |
| Reward Loss         | -1.97e+07 |
| Running Env Steps   | 775000    |
| Running Forward KL  | 29.7      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 155       |
-----------------------------------
--2024-08-11 12:36:26.256994 UTC---
| Itration            | 156       |
| PAGAR Loss          | 1.49e+03  |
| Real Det Return     | 566       |
| Real Sto Return     | 131       |
| Reward Loss         | -2.04e+07 |
| Running Env Steps   | 780000    |
| Running Forward KL  | 29.7      |
| Running Reverse KL  | 10.1      |
| Running Update Time | 156       |
-----------------------------------
--2024-08-11 12:38:58.533674 UTC---
| Itration            | 157       |
| PAGAR Loss          | -4.48e+03 |
| Real Det Return     | 598       |
| Real Sto Return     | 79        |
| Reward Loss         | -2.07e+07 |
| Running Env Steps   | 785000    |
| Running Forward KL  | 29.8      |
| Running Reverse KL  | 10.1      |
| Running Update Time | 157       |
-----------------------------------
--2024-08-11 12:41:30.989468 UTC---
| Itration            | 158       |
| PAGAR Loss          | 2.51e+03  |
| Real Det Return     | 482       |
| Real Sto Return     | 105       |
| Reward Loss         | -2.02e+07 |
| Running Env Steps   | 790000    |
| Running Forward KL  | 29.7      |
| Running Reverse KL  | 10.4      |
| Running Update Time | 158       |
-----------------------------------
--2024-08-11 12:44:00.886691 UTC---
| Itration            | 159       |
| PAGAR Loss          | 1.69e+03  |
| Real Det Return     | 431       |
| Real Sto Return     | 144       |
| Reward Loss         | -2.07e+07 |
| Running Env Steps   | 795000    |
| Running Forward KL  | 29.6      |
| Running Reverse KL  | 10.3      |
| Running Update Time | 159       |
-----------------------------------
--2024-08-11 12:46:31.277512 UTC---
| Itration            | 160       |
| PAGAR Loss          | -2.61e+03 |
| Real Det Return     | 676       |
| Real Sto Return     | 101       |
| Reward Loss         | -2.05e+07 |
| Running Env Steps   | 800000    |
| Running Forward KL  | 29.7      |
| Running Reverse KL  | 10.2      |
| Running Update Time | 160       |
-----------------------------------
--2024-08-11 12:49:03.506241 UTC---
| Itration            | 161       |
| PAGAR Loss          | -1.34e+03 |
| Real Det Return     | 450       |
| Real Sto Return     | 144       |
| Reward Loss         | -2.1e+07  |
| Running Env Steps   | 805000    |
| Running Forward KL  | 29.7      |
| Running Reverse KL  | 10.3      |
| Running Update Time | 161       |
-----------------------------------
--2024-08-11 12:51:33.708334 UTC---
| Itration            | 162       |
| PAGAR Loss          | 328       |
| Real Det Return     | 385       |
| Real Sto Return     | 151       |
| Reward Loss         | -2.13e+07 |
| Running Env Steps   | 810000    |
| Running Forward KL  | 29.6      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 162       |
-----------------------------------
--2024-08-11 12:54:03.793358 UTC---
| Itration            | 163       |
| PAGAR Loss          | 3.79e+03  |
| Real Det Return     | 392       |
| Real Sto Return     | 134       |
| Reward Loss         | -2.13e+07 |
| Running Env Steps   | 815000    |
| Running Forward KL  | 29.7      |
| Running Reverse KL  | 10.3      |
| Running Update Time | 163       |
-----------------------------------
--2024-08-11 12:56:34.473716 UTC---
| Itration            | 164       |
| PAGAR Loss          | 1.13e+03  |
| Real Det Return     | 465       |
| Real Sto Return     | 137       |
| Reward Loss         | -2.15e+07 |
| Running Env Steps   | 820000    |
| Running Forward KL  | 29.7      |
| Running Reverse KL  | 10.3      |
| Running Update Time | 164       |
-----------------------------------
--2024-08-11 12:59:03.172476 UTC---
| Itration            | 165       |
| PAGAR Loss          | 1.03e+03  |
| Real Det Return     | 608       |
| Real Sto Return     | 129       |
| Reward Loss         | -2.17e+07 |
| Running Env Steps   | 825000    |
| Running Forward KL  | 29.7      |
| Running Reverse KL  | 10.4      |
| Running Update Time | 165       |
-----------------------------------
--2024-08-11 13:01:33.323664 UTC---
| Itration            | 166       |
| PAGAR Loss          | 3.56e+03  |
| Real Det Return     | 309       |
| Real Sto Return     | 151       |
| Reward Loss         | -2.17e+07 |
| Running Env Steps   | 830000    |
| Running Forward KL  | 29.7      |
| Running Reverse KL  | 10.7      |
| Running Update Time | 166       |
-----------------------------------
--2024-08-11 13:04:01.521759 UTC---
| Itration            | 167       |
| PAGAR Loss          | 1.61e+03  |
| Real Det Return     | 428       |
| Real Sto Return     | 150       |
| Reward Loss         | -2.28e+07 |
| Running Env Steps   | 835000    |
| Running Forward KL  | 29.6      |
| Running Reverse KL  | 10.1      |
| Running Update Time | 167       |
-----------------------------------
--2024-08-11 13:06:30.566158 UTC---
| Itration            | 168       |
| PAGAR Loss          | 1.02e+04  |
| Real Det Return     | 631       |
| Real Sto Return     | 228       |
| Reward Loss         | -2.24e+07 |
| Running Env Steps   | 840000    |
| Running Forward KL  | 29.7      |
| Running Reverse KL  | 10.2      |
| Running Update Time | 168       |
-----------------------------------
--2024-08-11 13:09:01.438341 UTC---
| Itration            | 169       |
| PAGAR Loss          | -6.01e+03 |
| Real Det Return     | 647       |
| Real Sto Return     | 206       |
| Reward Loss         | -2.22e+07 |
| Running Env Steps   | 845000    |
| Running Forward KL  | 29.6      |
| Running Reverse KL  | 10.2      |
| Running Update Time | 169       |
-----------------------------------
--2024-08-11 13:11:29.594290 UTC---
| Itration            | 170       |
| PAGAR Loss          | 6.85e+03  |
| Real Det Return     | 339       |
| Real Sto Return     | 180       |
| Reward Loss         | -2.24e+07 |
| Running Env Steps   | 850000    |
| Running Forward KL  | 29.6      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 170       |
-----------------------------------
--2024-08-11 13:13:58.039232 UTC---
| Itration            | 171       |
| PAGAR Loss          | -2.97e+03 |
| Real Det Return     | 680       |
| Real Sto Return     | 215       |
| Reward Loss         | -2.24e+07 |
| Running Env Steps   | 855000    |
| Running Forward KL  | 29.6      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 171       |
-----------------------------------
--2024-08-11 13:16:30.710318 UTC---
| Itration            | 172       |
| PAGAR Loss          | -4.91e+03 |
| Real Det Return     | 687       |
| Real Sto Return     | 175       |
| Reward Loss         | -2.27e+07 |
| Running Env Steps   | 860000    |
| Running Forward KL  | 29.7      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 172       |
-----------------------------------
--2024-08-11 13:19:00.921065 UTC---
| Itration            | 173       |
| PAGAR Loss          | 1.64e+03  |
| Real Det Return     | 681       |
| Real Sto Return     | 158       |
| Reward Loss         | -2.31e+07 |
| Running Env Steps   | 865000    |
| Running Forward KL  | 29.7      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 173       |
-----------------------------------
--2024-08-11 13:21:31.413276 UTC---
| Itration            | 174       |
| PAGAR Loss          | -1.79e+03 |
| Real Det Return     | 749       |
| Real Sto Return     | 146       |
| Reward Loss         | -2.34e+07 |
| Running Env Steps   | 870000    |
| Running Forward KL  | 29.6      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 174       |
-----------------------------------
--2024-08-11 13:24:01.602006 UTC---
| Itration            | 175       |
| PAGAR Loss          | -8.31e+03 |
| Real Det Return     | 798       |
| Real Sto Return     | 164       |
| Reward Loss         | -2.41e+07 |
| Running Env Steps   | 875000    |
| Running Forward KL  | 29.7      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 175       |
-----------------------------------
--2024-08-11 13:26:31.514451 UTC---
| Itration            | 176       |
| PAGAR Loss          | 4.79e+03  |
| Real Det Return     | 945       |
| Real Sto Return     | 177       |
| Reward Loss         | -2.33e+07 |
| Running Env Steps   | 880000    |
| Running Forward KL  | 29.6      |
| Running Reverse KL  | 11        |
| Running Update Time | 176       |
-----------------------------------
--2024-08-11 13:29:01.880894 UTC---
| Itration            | 177       |
| PAGAR Loss          | 5.85e+03  |
| Real Det Return     | 906       |
| Real Sto Return     | 156       |
| Reward Loss         | -2.45e+07 |
| Running Env Steps   | 885000    |
| Running Forward KL  | 29.6      |
| Running Reverse KL  | 11        |
| Running Update Time | 177       |
-----------------------------------
--2024-08-11 13:31:30.795861 UTC---
| Itration            | 178       |
| PAGAR Loss          | 6.03e+03  |
| Real Det Return     | 931       |
| Real Sto Return     | 168       |
| Reward Loss         | -2.49e+07 |
| Running Env Steps   | 890000    |
| Running Forward KL  | 29.6      |
| Running Reverse KL  | 10.7      |
| Running Update Time | 178       |
-----------------------------------
--2024-08-11 13:34:01.317670 UTC---
| Itration            | 179       |
| PAGAR Loss          | -2.07e+03 |
| Real Det Return     | 739       |
| Real Sto Return     | 202       |
| Reward Loss         | -2.52e+07 |
| Running Env Steps   | 895000    |
| Running Forward KL  | 29.6      |
| Running Reverse KL  | 10.2      |
| Running Update Time | 179       |
-----------------------------------
--2024-08-11 13:36:36.414476 UTC---
| Itration            | 180       |
| PAGAR Loss          | 4.41e+03  |
| Real Det Return     | 674       |
| Real Sto Return     | 189       |
| Reward Loss         | -2.44e+07 |
| Running Env Steps   | 900000    |
| Running Forward KL  | 29.6      |
| Running Reverse KL  | 10.7      |
| Running Update Time | 180       |
-----------------------------------
--2024-08-11 13:39:08.057789 UTC---
| Itration            | 181       |
| PAGAR Loss          | -1.6e+03  |
| Real Det Return     | 936       |
| Real Sto Return     | 294       |
| Reward Loss         | -2.45e+07 |
| Running Env Steps   | 905000    |
| Running Forward KL  | 29.6      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 181       |
-----------------------------------
--2024-08-11 13:41:39.371840 UTC---
| Itration            | 182       |
| PAGAR Loss          | -2.43e+03 |
| Real Det Return     | 1.18e+03  |
| Real Sto Return     | 219       |
| Reward Loss         | -2.55e+07 |
| Running Env Steps   | 910000    |
| Running Forward KL  | 29.6      |
| Running Reverse KL  | 10.9      |
| Running Update Time | 182       |
-----------------------------------
--2024-08-11 13:44:07.484515 UTC--
| Itration            | 183      |
| PAGAR Loss          | 2.26e+03 |
| Real Det Return     | 985      |
| Real Sto Return     | 221      |
| Reward Loss         | -2.6e+07 |
| Running Env Steps   | 915000   |
| Running Forward KL  | 29.6     |
| Running Reverse KL  | 10.6     |
| Running Update Time | 183      |
----------------------------------
--2024-08-11 13:46:35.895757 UTC---
| Itration            | 184       |
| PAGAR Loss          | 1.16e+03  |
| Real Det Return     | 904       |
| Real Sto Return     | 382       |
| Reward Loss         | -2.64e+07 |
| Running Env Steps   | 920000    |
| Running Forward KL  | 29.5      |
| Running Reverse KL  | 10.3      |
| Running Update Time | 184       |
-----------------------------------
--2024-08-11 13:49:06.374989 UTC---
| Itration            | 185       |
| PAGAR Loss          | 8.15e+03  |
| Real Det Return     | 1.05e+03  |
| Real Sto Return     | 413       |
| Reward Loss         | -2.56e+07 |
| Running Env Steps   | 925000    |
| Running Forward KL  | 29.6      |
| Running Reverse KL  | 10.7      |
| Running Update Time | 185       |
-----------------------------------
--2024-08-11 13:51:36.719424 UTC---
| Itration            | 186       |
| PAGAR Loss          | 3.37e+03  |
| Real Det Return     | 1.04e+03  |
| Real Sto Return     | 403       |
| Reward Loss         | -2.68e+07 |
| Running Env Steps   | 930000    |
| Running Forward KL  | 29.5      |
| Running Reverse KL  | 10.4      |
| Running Update Time | 186       |
-----------------------------------
--2024-08-11 13:54:07.386618 UTC---
| Itration            | 187       |
| PAGAR Loss          | 1.29e+04  |
| Real Det Return     | 710       |
| Real Sto Return     | 529       |
| Reward Loss         | -2.72e+07 |
| Running Env Steps   | 935000    |
| Running Forward KL  | 30.8      |
| Running Reverse KL  | 12.1      |
| Running Update Time | 187       |
-----------------------------------
--2024-08-11 13:56:35.826901 UTC---
| Itration            | 188       |
| PAGAR Loss          | 4.08e+03  |
| Real Det Return     | 1.18e+03  |
| Real Sto Return     | 406       |
| Reward Loss         | -2.62e+07 |
| Running Env Steps   | 940000    |
| Running Forward KL  | 29.5      |
| Running Reverse KL  | 10.4      |
| Running Update Time | 188       |
-----------------------------------
--2024-08-11 13:59:03.812470 UTC---
| Itration            | 189       |
| PAGAR Loss          | -3.78e+03 |
| Real Det Return     | 1.14e+03  |
| Real Sto Return     | 588       |
| Reward Loss         | -2.66e+07 |
| Running Env Steps   | 945000    |
| Running Forward KL  | 29.4      |
| Running Reverse KL  | 10.4      |
| Running Update Time | 189       |
-----------------------------------
--2024-08-11 14:01:35.507812 UTC---
| Itration            | 190       |
| PAGAR Loss          | 1.96e+04  |
| Real Det Return     | 857       |
| Real Sto Return     | 647       |
| Reward Loss         | -2.69e+07 |
| Running Env Steps   | 950000    |
| Running Forward KL  | 30.7      |
| Running Reverse KL  | 12.1      |
| Running Update Time | 190       |
-----------------------------------
--2024-08-11 14:04:04.379691 UTC---
| Itration            | 191       |
| PAGAR Loss          | -1.02e+04 |
| Real Det Return     | 1.28e+03  |
| Real Sto Return     | 551       |
| Reward Loss         | -2.71e+07 |
| Running Env Steps   | 955000    |
| Running Forward KL  | 29.5      |
| Running Reverse KL  | 10.4      |
| Running Update Time | 191       |
-----------------------------------
--2024-08-11 14:06:33.285074 UTC--
| Itration            | 192      |
| PAGAR Loss          | 6.45e+03 |
| Real Det Return     | 1.35e+03 |
| Real Sto Return     | 543      |
| Reward Loss         | -2.8e+07 |
| Running Env Steps   | 960000   |
| Running Forward KL  | 29.5     |
| Running Reverse KL  | 10.9     |
| Running Update Time | 192      |
----------------------------------
--2024-08-11 14:09:02.490566 UTC---
| Itration            | 193       |
| PAGAR Loss          | 3.56e+03  |
| Real Det Return     | 1.46e+03  |
| Real Sto Return     | 651       |
| Reward Loss         | -2.71e+07 |
| Running Env Steps   | 965000    |
| Running Forward KL  | 29.3      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 193       |
-----------------------------------
--2024-08-11 14:11:31.123411 UTC---
| Itration            | 194       |
| PAGAR Loss          | 1.21e+04  |
| Real Det Return     | 1.53e+03  |
| Real Sto Return     | 526       |
| Reward Loss         | -2.75e+07 |
| Running Env Steps   | 970000    |
| Running Forward KL  | 29.3      |
| Running Reverse KL  | 10.3      |
| Running Update Time | 194       |
-----------------------------------
--2024-08-11 14:14:00.168210 UTC---
| Itration            | 195       |
| PAGAR Loss          | 1.75e+04  |
| Real Det Return     | 1.41e+03  |
| Real Sto Return     | 324       |
| Reward Loss         | -2.74e+07 |
| Running Env Steps   | 975000    |
| Running Forward KL  | 29.3      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 195       |
-----------------------------------
--2024-08-11 14:16:30.169236 UTC---
| Itration            | 196       |
| PAGAR Loss          | 2.36e+03  |
| Real Det Return     | 529       |
| Real Sto Return     | 280       |
| Reward Loss         | -2.71e+07 |
| Running Env Steps   | 980000    |
| Running Forward KL  | 29.4      |
| Running Reverse KL  | 11.7      |
| Running Update Time | 196       |
-----------------------------------
--2024-08-11 14:18:59.586847 UTC---
| Itration            | 197       |
| PAGAR Loss          | 8.01e+03  |
| Real Det Return     | 1.37e+03  |
| Real Sto Return     | 342       |
| Reward Loss         | -2.86e+07 |
| Running Env Steps   | 985000    |
| Running Forward KL  | 29.2      |
| Running Reverse KL  | 10.8      |
| Running Update Time | 197       |
-----------------------------------
--2024-08-11 14:21:29.144035 UTC---
| Itration            | 198       |
| PAGAR Loss          | -2.25e+03 |
| Real Det Return     | 449       |
| Real Sto Return     | 243       |
| Reward Loss         | -2.88e+07 |
| Running Env Steps   | 990000    |
| Running Forward KL  | 29.3      |
| Running Reverse KL  | 11.4      |
| Running Update Time | 198       |
-----------------------------------
--2024-08-11 14:23:58.052810 UTC---
| Itration            | 199       |
| PAGAR Loss          | -2.82e+03 |
| Real Det Return     | 462       |
| Real Sto Return     | 245       |
| Reward Loss         | -2.81e+07 |
| Running Env Steps   | 995000    |
| Running Forward KL  | 29.2      |
| Running Reverse KL  | 11.6      |
| Running Update Time | 199       |
-----------------------------------
--2024-08-11 14:26:28.856884 UTC---
| Itration            | 200       |
| PAGAR Loss          | -4.28e+03 |
| Real Det Return     | 481       |
| Real Sto Return     | 256       |
| Reward Loss         | -2.9e+07  |
| Running Env Steps   | 1000000   |
| Running Forward KL  | 29.3      |
| Running Reverse KL  | 11.7      |
| Running Update Time | 200       |
-----------------------------------
--2024-08-11 14:28:58.697192 UTC---
| Itration            | 201       |
| PAGAR Loss          | 3.69e+03  |
| Real Det Return     | 385       |
| Real Sto Return     | 202       |
| Reward Loss         | -2.82e+07 |
| Running Env Steps   | 1005000   |
| Running Forward KL  | 29.2      |
| Running Reverse KL  | 11.8      |
| Running Update Time | 201       |
-----------------------------------
--2024-08-11 14:31:28.710895 UTC---
| Itration            | 202       |
| PAGAR Loss          | -9.87e+03 |
| Real Det Return     | 442       |
| Real Sto Return     | 240       |
| Reward Loss         | -2.85e+07 |
| Running Env Steps   | 1010000   |
| Running Forward KL  | 29.1      |
| Running Reverse KL  | 11.6      |
| Running Update Time | 202       |
-----------------------------------
--2024-08-11 14:33:55.681197 UTC---
| Itration            | 203       |
| PAGAR Loss          | -1.75e+03 |
| Real Det Return     | 435       |
| Real Sto Return     | 271       |
| Reward Loss         | -2.98e+07 |
| Running Env Steps   | 1015000   |
| Running Forward KL  | 29.1      |
| Running Reverse KL  | 11.4      |
| Running Update Time | 203       |
-----------------------------------
--2024-08-11 14:36:24.558154 UTC---
| Itration            | 204       |
| PAGAR Loss          | -2.78e+04 |
| Real Det Return     | 482       |
| Real Sto Return     | 278       |
| Reward Loss         | -2.86e+07 |
| Running Env Steps   | 1020000   |
| Running Forward KL  | 29.2      |
| Running Reverse KL  | 11.8      |
| Running Update Time | 204       |
-----------------------------------
--2024-08-11 14:38:54.033183 UTC---
| Itration            | 205       |
| PAGAR Loss          | -1.59e+04 |
| Real Det Return     | 545       |
| Real Sto Return     | 271       |
| Reward Loss         | -2.94e+07 |
| Running Env Steps   | 1025000   |
| Running Forward KL  | 29.2      |
| Running Reverse KL  | 11.2      |
| Running Update Time | 205       |
-----------------------------------
--2024-08-11 14:41:24.208520 UTC---
| Itration            | 206       |
| PAGAR Loss          | -1.28e+03 |
| Real Det Return     | 497       |
| Real Sto Return     | 268       |
| Reward Loss         | -2.96e+07 |
| Running Env Steps   | 1030000   |
| Running Forward KL  | 29        |
| Running Reverse KL  | 11.3      |
| Running Update Time | 206       |
-----------------------------------
--2024-08-11 14:43:53.939034 UTC---
| Itration            | 207       |
| PAGAR Loss          | 1.33e+04  |
| Real Det Return     | 666       |
| Real Sto Return     | 274       |
| Reward Loss         | -3.08e+07 |
| Running Env Steps   | 1035000   |
| Running Forward KL  | 29.2      |
| Running Reverse KL  | 11.6      |
| Running Update Time | 207       |
-----------------------------------
--2024-08-11 14:46:21.565742 UTC---
| Itration            | 208       |
| PAGAR Loss          | 7.01e+03  |
| Real Det Return     | 570       |
| Real Sto Return     | 324       |
| Reward Loss         | -3.13e+07 |
| Running Env Steps   | 1040000   |
| Running Forward KL  | 29.1      |
| Running Reverse KL  | 11.2      |
| Running Update Time | 208       |
-----------------------------------
--2024-08-11 14:48:50.634665 UTC---
| Itration            | 209       |
| PAGAR Loss          | -1.44e+04 |
| Real Det Return     | 948       |
| Real Sto Return     | 376       |
| Reward Loss         | -3.09e+07 |
| Running Env Steps   | 1045000   |
| Running Forward KL  | 28.8      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 209       |
-----------------------------------
--2024-08-11 14:51:19.853900 UTC---
| Itration            | 210       |
| PAGAR Loss          | -6.34e+03 |
| Real Det Return     | 1.31e+03  |
| Real Sto Return     | 663       |
| Reward Loss         | -3.09e+07 |
| Running Env Steps   | 1050000   |
| Running Forward KL  | 28.7      |
| Running Reverse KL  | 10.2      |
| Running Update Time | 210       |
-----------------------------------
--2024-08-11 14:53:48.854531 UTC---
| Itration            | 211       |
| PAGAR Loss          | -2.69e+03 |
| Real Det Return     | 1.63e+03  |
| Real Sto Return     | 1.01e+03  |
| Reward Loss         | -3.02e+07 |
| Running Env Steps   | 1055000   |
| Running Forward KL  | 28.7      |
| Running Reverse KL  | 10.3      |
| Running Update Time | 211       |
-----------------------------------
--2024-08-11 14:56:19.365240 UTC---
| Itration            | 212       |
| PAGAR Loss          | -3.68e+04 |
| Real Det Return     | 1.64e+03  |
| Real Sto Return     | 1.39e+03  |
| Reward Loss         | -3.02e+07 |
| Running Env Steps   | 1060000   |
| Running Forward KL  | 28.7      |
| Running Reverse KL  | 10.2      |
| Running Update Time | 212       |
-----------------------------------
--2024-08-11 14:58:47.791185 UTC---
| Itration            | 213       |
| PAGAR Loss          | 1.47e+04  |
| Real Det Return     | 1.65e+03  |
| Real Sto Return     | 1.36e+03  |
| Reward Loss         | -3.06e+07 |
| Running Env Steps   | 1065000   |
| Running Forward KL  | 28.7      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 213       |
-----------------------------------
--2024-08-11 15:01:16.406152 UTC---
| Itration            | 214       |
| PAGAR Loss          | -8.64e+03 |
| Real Det Return     | 2.27e+03  |
| Real Sto Return     | 1.72e+03  |
| Reward Loss         | -2.95e+07 |
| Running Env Steps   | 1070000   |
| Running Forward KL  | 29.4      |
| Running Reverse KL  | 11.2      |
| Running Update Time | 214       |
-----------------------------------
--2024-08-11 15:03:46.005392 UTC---
| Itration            | 215       |
| PAGAR Loss          | -2.44e+08 |
| Real Det Return     | 2.35e+03  |
| Real Sto Return     | 1.7e+03   |
| Reward Loss         | -3.06e+07 |
| Running Env Steps   | 1075000   |
| Running Forward KL  | 28.2      |
| Running Reverse KL  | 11.4      |
| Running Update Time | 215       |
-----------------------------------
--2024-08-11 15:06:15.345716 UTC---
| Itration            | 216       |
| PAGAR Loss          | -9.11e+03 |
| Real Det Return     | 2.75e+03  |
| Real Sto Return     | 1.82e+03  |
| Reward Loss         | -3.01e+07 |
| Running Env Steps   | 1080000   |
| Running Forward KL  | 28.4      |
| Running Reverse KL  | 10.8      |
| Running Update Time | 216       |
-----------------------------------
--2024-08-11 15:08:45.147025 UTC---
| Itration            | 217       |
| PAGAR Loss          | -7.65e+04 |
| Real Det Return     | 3.14e+03  |
| Real Sto Return     | 2.24e+03  |
| Reward Loss         | -2.98e+07 |
| Running Env Steps   | 1085000   |
| Running Forward KL  | 28.4      |
| Running Reverse KL  | 11.1      |
| Running Update Time | 217       |
-----------------------------------
--2024-08-11 15:11:12.411077 UTC---
| Itration            | 218       |
| PAGAR Loss          | 2.12e+05  |
| Real Det Return     | 3.2e+03   |
| Real Sto Return     | 2.41e+03  |
| Reward Loss         | -2.86e+07 |
| Running Env Steps   | 1090000   |
| Running Forward KL  | 28        |
| Running Reverse KL  | 11.4      |
| Running Update Time | 218       |
-----------------------------------
--2024-08-11 15:13:42.308902 UTC---
| Itration            | 219       |
| PAGAR Loss          | nan       |
| Real Det Return     | 3.4e+03   |
| Real Sto Return     | 2.49e+03  |
| Reward Loss         | -2.86e+07 |
| Running Env Steps   | 1095000   |
| Running Forward KL  | 28        |
| Running Reverse KL  | 11.5      |
| Running Update Time | 219       |
-----------------------------------
--2024-08-11 15:16:12.668545 UTC---
| Itration            | 220       |
| PAGAR Loss          | -2.19e+05 |
| Real Det Return     | 3.47e+03  |
| Real Sto Return     | 2.57e+03  |
| Reward Loss         | -2.8e+07  |
| Running Env Steps   | 1100000   |
| Running Forward KL  | 27.4      |
| Running Reverse KL  | 10.9      |
| Running Update Time | 220       |
-----------------------------------
--2024-08-11 15:18:42.882125 UTC---
| Itration            | 221       |
| PAGAR Loss          | -1.27e+05 |
| Real Det Return     | 3.52e+03  |
| Real Sto Return     | 2.71e+03  |
| Reward Loss         | -2.79e+07 |
| Running Env Steps   | 1105000   |
| Running Forward KL  | 27        |
| Running Reverse KL  | 10.8      |
| Running Update Time | 221       |
-----------------------------------
--2024-08-11 15:21:13.386468 UTC---
| Itration            | 222       |
| PAGAR Loss          | -3.52e+05 |
| Real Det Return     | 3.8e+03   |
| Real Sto Return     | 2.91e+03  |
| Reward Loss         | -2.77e+07 |
| Running Env Steps   | 1110000   |
| Running Forward KL  | 27.4      |
| Running Reverse KL  | 11.3      |
| Running Update Time | 222       |
-----------------------------------
--2024-08-11 15:23:41.697450 UTC---
| Itration            | 223       |
| PAGAR Loss          | 1.19e+05  |
| Real Det Return     | 3.76e+03  |
| Real Sto Return     | 2.9e+03   |
| Reward Loss         | -2.78e+07 |
| Running Env Steps   | 1115000   |
| Running Forward KL  | 27.2      |
| Running Reverse KL  | 10.9      |
| Running Update Time | 223       |
-----------------------------------
--2024-08-11 15:26:11.019099 UTC--
| Itration            | 224      |
| PAGAR Loss          | -9.4e+04 |
| Real Det Return     | 4.2e+03  |
| Real Sto Return     | 3.31e+03 |
| Reward Loss         | -2.7e+07 |
| Running Env Steps   | 1120000  |
| Running Forward KL  | 26.9     |
| Running Reverse KL  | 11.4     |
| Running Update Time | 224      |
----------------------------------
--2024-08-11 15:28:40.065605 UTC---
| Itration            | 225       |
| PAGAR Loss          | -4.36e+04 |
| Real Det Return     | 4.46e+03  |
| Real Sto Return     | 3.61e+03  |
| Reward Loss         | -2.67e+07 |
| Running Env Steps   | 1125000   |
| Running Forward KL  | 25.9      |
| Running Reverse KL  | 11.1      |
| Running Update Time | 225       |
-----------------------------------
--2024-08-11 15:31:06.922647 UTC---
| Itration            | 226       |
| PAGAR Loss          | 1.43e+05  |
| Real Det Return     | 4.51e+03  |
| Real Sto Return     | 3.48e+03  |
| Reward Loss         | -2.68e+07 |
| Running Env Steps   | 1130000   |
| Running Forward KL  | 26.8      |
| Running Reverse KL  | 11.4      |
| Running Update Time | 226       |
-----------------------------------
--2024-08-11 15:33:39.660733 UTC---
| Itration            | 227       |
| PAGAR Loss          | 1.52e+04  |
| Real Det Return     | 4.45e+03  |
| Real Sto Return     | 3.71e+03  |
| Reward Loss         | -2.67e+07 |
| Running Env Steps   | 1135000   |
| Running Forward KL  | 25.6      |
| Running Reverse KL  | 10.4      |
| Running Update Time | 227       |
-----------------------------------
--2024-08-11 15:36:08.902384 UTC---
| Itration            | 228       |
| PAGAR Loss          | 6.8e+04   |
| Real Det Return     | 4.9e+03   |
| Real Sto Return     | 4.05e+03  |
| Reward Loss         | -2.64e+07 |
| Running Env Steps   | 1140000   |
| Running Forward KL  | 25.3      |
| Running Reverse KL  | 11.2      |
| Running Update Time | 228       |
-----------------------------------
--2024-08-11 15:38:38.009597 UTC---
| Itration            | 229       |
| PAGAR Loss          | -2.6e+05  |
| Real Det Return     | 5.1e+03   |
| Real Sto Return     | 4.09e+03  |
| Reward Loss         | -2.58e+07 |
| Running Env Steps   | 1145000   |
| Running Forward KL  | 25.5      |
| Running Reverse KL  | 11.2      |
| Running Update Time | 229       |
-----------------------------------
--2024-08-11 15:41:07.408509 UTC---
| Itration            | 230       |
| PAGAR Loss          | -2.72e+05 |
| Real Det Return     | 4.47e+03  |
| Real Sto Return     | 3.62e+03  |
| Reward Loss         | -2.72e+07 |
| Running Env Steps   | 1150000   |
| Running Forward KL  | 26.1      |
| Running Reverse KL  | 11        |
| Running Update Time | 230       |
-----------------------------------
--2024-08-11 15:43:35.091836 UTC---
| Itration            | 231       |
| PAGAR Loss          | -7.15e+05 |
| Real Det Return     | 5.22e+03  |
| Real Sto Return     | 4.18e+03  |
| Reward Loss         | -2.64e+07 |
| Running Env Steps   | 1155000   |
| Running Forward KL  | 25.1      |
| Running Reverse KL  | 10.7      |
| Running Update Time | 231       |
-----------------------------------
--2024-08-11 15:46:03.338714 UTC---
| Itration            | 232       |
| PAGAR Loss          | 6.75e+05  |
| Real Det Return     | 5.05e+03  |
| Real Sto Return     | 4.26e+03  |
| Reward Loss         | -2.54e+07 |
| Running Env Steps   | 1160000   |
| Running Forward KL  | 24.9      |
| Running Reverse KL  | 10.4      |
| Running Update Time | 232       |
-----------------------------------
--2024-08-11 15:48:32.924610 UTC---
| Itration            | 233       |
| PAGAR Loss          | -4.5e+05  |
| Real Det Return     | 5.36e+03  |
| Real Sto Return     | 4.63e+03  |
| Reward Loss         | -2.47e+07 |
| Running Env Steps   | 1165000   |
| Running Forward KL  | 24.3      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 233       |
-----------------------------------
--2024-08-11 15:51:02.152499 UTC---
| Itration            | 234       |
| PAGAR Loss          | 6.15e+04  |
| Real Det Return     | 5.51e+03  |
| Real Sto Return     | 4.81e+03  |
| Reward Loss         | -2.41e+07 |
| Running Env Steps   | 1170000   |
| Running Forward KL  | 23.8      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 234       |
-----------------------------------
--2024-08-11 15:53:31.577832 UTC---
| Itration            | 235       |
| PAGAR Loss          | -6.1e+04  |
| Real Det Return     | 5.33e+03  |
| Real Sto Return     | 4.83e+03  |
| Reward Loss         | -2.44e+07 |
| Running Env Steps   | 1175000   |
| Running Forward KL  | 24        |
| Running Reverse KL  | 10.8      |
| Running Update Time | 235       |
-----------------------------------
--2024-08-11 15:56:03.511551 UTC---
| Itration            | 236       |
| PAGAR Loss          | 9.41e+05  |
| Real Det Return     | 5.3e+03   |
| Real Sto Return     | 4.84e+03  |
| Reward Loss         | -2.47e+07 |
| Running Env Steps   | 1180000   |
| Running Forward KL  | 24        |
| Running Reverse KL  | 10.6      |
| Running Update Time | 236       |
-----------------------------------
--2024-08-11 15:58:34.128502 UTC---
| Itration            | 237       |
| PAGAR Loss          | -3.69e+06 |
| Real Det Return     | 6.15e+03  |
| Real Sto Return     | 5.22e+03  |
| Reward Loss         | -2.37e+07 |
| Running Env Steps   | 1185000   |
| Running Forward KL  | 22.7      |
| Running Reverse KL  | 10.1      |
| Running Update Time | 237       |
-----------------------------------
--2024-08-11 16:01:04.170068 UTC---
| Itration            | 238       |
| PAGAR Loss          | -2.61e+06 |
| Real Det Return     | 6.45e+03  |
| Real Sto Return     | 5.34e+03  |
| Reward Loss         | -2.32e+07 |
| Running Env Steps   | 1190000   |
| Running Forward KL  | 22.8      |
| Running Reverse KL  | 9.93      |
| Running Update Time | 238       |
-----------------------------------
--2024-08-11 16:03:33.202552 UTC---
| Itration            | 239       |
| PAGAR Loss          | -1.25e+06 |
| Real Det Return     | 5.8e+03   |
| Real Sto Return     | 5.38e+03  |
| Reward Loss         | -2.33e+07 |
| Running Env Steps   | 1195000   |
| Running Forward KL  | 23.1      |
| Running Reverse KL  | 10.1      |
| Running Update Time | 239       |
-----------------------------------
--2024-08-11 16:06:01.580122 UTC---
| Itration            | 240       |
| PAGAR Loss          | -3.4e+06  |
| Real Det Return     | 6.5e+03   |
| Real Sto Return     | 5.57e+03  |
| Reward Loss         | -2.36e+07 |
| Running Env Steps   | 1200000   |
| Running Forward KL  | 22.6      |
| Running Reverse KL  | 10.1      |
| Running Update Time | 240       |
-----------------------------------
--2024-08-11 16:08:32.307644 UTC---
| Itration            | 241       |
| PAGAR Loss          | -5.48e+05 |
| Real Det Return     | 6.62e+03  |
| Real Sto Return     | 5.65e+03  |
| Reward Loss         | -2.22e+07 |
| Running Env Steps   | 1205000   |
| Running Forward KL  | 22.2      |
| Running Reverse KL  | 9.66      |
| Running Update Time | 241       |
-----------------------------------
--2024-08-11 16:11:00.813042 UTC---
| Itration            | 242       |
| PAGAR Loss          | 6.84e+05  |
| Real Det Return     | 6.14e+03  |
| Real Sto Return     | 5.57e+03  |
| Reward Loss         | -2.38e+07 |
| Running Env Steps   | 1210000   |
| Running Forward KL  | 22.8      |
| Running Reverse KL  | 9.6       |
| Running Update Time | 242       |
-----------------------------------
--2024-08-11 16:13:28.838061 UTC---
| Itration            | 243       |
| PAGAR Loss          | 1.33e+05  |
| Real Det Return     | 6.94e+03  |
| Real Sto Return     | 5.36e+03  |
| Reward Loss         | -2.32e+07 |
| Running Env Steps   | 1215000   |
| Running Forward KL  | 22.3      |
| Running Reverse KL  | 10        |
| Running Update Time | 243       |
-----------------------------------
--2024-08-11 16:15:57.636213 UTC---
| Itration            | 244       |
| PAGAR Loss          | -3.31e+06 |
| Real Det Return     | 6.63e+03  |
| Real Sto Return     | 5.84e+03  |
| Reward Loss         | -2.23e+07 |
| Running Env Steps   | 1220000   |
| Running Forward KL  | 22.2      |
| Running Reverse KL  | 9.91      |
| Running Update Time | 244       |
-----------------------------------
--2024-08-11 16:18:26.569542 UTC---
| Itration            | 245       |
| PAGAR Loss          | 1.71e+05  |
| Real Det Return     | 6.7e+03   |
| Real Sto Return     | 5.94e+03  |
| Reward Loss         | -2.13e+07 |
| Running Env Steps   | 1225000   |
| Running Forward KL  | 21.8      |
| Running Reverse KL  | 9.96      |
| Running Update Time | 245       |
-----------------------------------
--2024-08-11 16:20:55.267338 UTC---
| Itration            | 246       |
| PAGAR Loss          | -1.49e+06 |
| Real Det Return     | 6.48e+03  |
| Real Sto Return     | 6e+03     |
| Reward Loss         | -2.03e+07 |
| Running Env Steps   | 1230000   |
| Running Forward KL  | 21.2      |
| Running Reverse KL  | 10        |
| Running Update Time | 246       |
-----------------------------------
--2024-08-11 16:23:23.600349 UTC---
| Itration            | 247       |
| PAGAR Loss          | 6.17e+05  |
| Real Det Return     | 7.14e+03  |
| Real Sto Return     | 6.08e+03  |
| Reward Loss         | -2.19e+07 |
| Running Env Steps   | 1235000   |
| Running Forward KL  | 21.4      |
| Running Reverse KL  | 9.66      |
| Running Update Time | 247       |
-----------------------------------
--2024-08-11 16:25:52.800884 UTC---
| Itration            | 248       |
| PAGAR Loss          | 8.73e+05  |
| Real Det Return     | 7.44e+03  |
| Real Sto Return     | 6.51e+03  |
| Reward Loss         | -2.08e+07 |
| Running Env Steps   | 1240000   |
| Running Forward KL  | 21.2      |
| Running Reverse KL  | 10        |
| Running Update Time | 248       |
-----------------------------------
--2024-08-11 16:28:20.288581 UTC---
| Itration            | 249       |
| PAGAR Loss          | -3.26e+06 |
| Real Det Return     | 7.64e+03  |
| Real Sto Return     | 6.49e+03  |
| Reward Loss         | -2.06e+07 |
| Running Env Steps   | 1245000   |
| Running Forward KL  | 21.2      |
| Running Reverse KL  | 10        |
| Running Update Time | 249       |
-----------------------------------
--2024-08-11 16:30:49.104820 UTC---
| Itration            | 250       |
| PAGAR Loss          | 1.13e+05  |
| Real Det Return     | 6.96e+03  |
| Real Sto Return     | 6.22e+03  |
| Reward Loss         | -2.12e+07 |
| Running Env Steps   | 1250000   |
| Running Forward KL  | 21.5      |
| Running Reverse KL  | 9.64      |
| Running Update Time | 250       |
-----------------------------------
--2024-08-11 16:33:17.608241 UTC--
| Itration            | 251      |
| PAGAR Loss          | 1.64e+06 |
| Real Det Return     | 6.76e+03 |
| Real Sto Return     | 6.28e+03 |
| Reward Loss         | -2.1e+07 |
| Running Env Steps   | 1255000  |
| Running Forward KL  | 21.6     |
| Running Reverse KL  | 10       |
| Running Update Time | 251      |
----------------------------------
--2024-08-11 16:35:46.047399 UTC---
| Itration            | 252       |
| PAGAR Loss          | -1.36e+06 |
| Real Det Return     | 6.91e+03  |
| Real Sto Return     | 6.33e+03  |
| Reward Loss         | -2.16e+07 |
| Running Env Steps   | 1260000   |
| Running Forward KL  | 21.7      |
| Running Reverse KL  | 10.1      |
| Running Update Time | 252       |
-----------------------------------
--2024-08-11 16:38:15.122057 UTC---
| Itration            | 253       |
| PAGAR Loss          | 1.6e+06   |
| Real Det Return     | 7.73e+03  |
| Real Sto Return     | 6.62e+03  |
| Reward Loss         | -1.94e+07 |
| Running Env Steps   | 1265000   |
| Running Forward KL  | 20.9      |
| Running Reverse KL  | 9.94      |
| Running Update Time | 253       |
-----------------------------------
--2024-08-11 16:40:42.306347 UTC---
| Itration            | 254       |
| PAGAR Loss          | -3.28e+06 |
| Real Det Return     | 7.91e+03  |
| Real Sto Return     | 6.74e+03  |
| Reward Loss         | -1.92e+07 |
| Running Env Steps   | 1270000   |
| Running Forward KL  | 20.1      |
| Running Reverse KL  | 9.58      |
| Running Update Time | 254       |
-----------------------------------
--2024-08-11 16:43:10.122420 UTC---
| Itration            | 255       |
| PAGAR Loss          | -1.83e+06 |
| Real Det Return     | 8.36e+03  |
| Real Sto Return     | 7.01e+03  |
| Reward Loss         | -1.81e+07 |
| Running Env Steps   | 1275000   |
| Running Forward KL  | 19.5      |
| Running Reverse KL  | 9.75      |
| Running Update Time | 255       |
-----------------------------------
--2024-08-11 16:45:38.531099 UTC---
| Itration            | 256       |
| PAGAR Loss          | -1.2e+07  |
| Real Det Return     | 8.3e+03   |
| Real Sto Return     | 6.89e+03  |
| Reward Loss         | -1.86e+07 |
| Running Env Steps   | 1280000   |
| Running Forward KL  | 19.6      |
| Running Reverse KL  | 10.1      |
| Running Update Time | 256       |
-----------------------------------
--2024-08-11 16:48:07.064712 UTC---
| Itration            | 257       |
| PAGAR Loss          | 3.95e+06  |
| Real Det Return     | 8.23e+03  |
| Real Sto Return     | 7.15e+03  |
| Reward Loss         | -1.95e+07 |
| Running Env Steps   | 1285000   |
| Running Forward KL  | 20.3      |
| Running Reverse KL  | 10        |
| Running Update Time | 257       |
-----------------------------------
--2024-08-11 16:50:36.956343 UTC---
| Itration            | 258       |
| PAGAR Loss          | 4.19e+05  |
| Real Det Return     | 8.11e+03  |
| Real Sto Return     | 6.39e+03  |
| Reward Loss         | -1.92e+07 |
| Running Env Steps   | 1290000   |
| Running Forward KL  | 19.9      |
| Running Reverse KL  | 9.83      |
| Running Update Time | 258       |
-----------------------------------
--2024-08-11 16:53:04.553603 UTC---
| Itration            | 259       |
| PAGAR Loss          | -3.7e+06  |
| Real Det Return     | 7.92e+03  |
| Real Sto Return     | 6.79e+03  |
| Reward Loss         | -2.05e+07 |
| Running Env Steps   | 1295000   |
| Running Forward KL  | 20.2      |
| Running Reverse KL  | 9.97      |
| Running Update Time | 259       |
-----------------------------------
--2024-08-11 16:55:32.789465 UTC---
| Itration            | 260       |
| PAGAR Loss          | -7.18e+05 |
| Real Det Return     | 7.66e+03  |
| Real Sto Return     | 7.1e+03   |
| Reward Loss         | -1.93e+07 |
| Running Env Steps   | 1300000   |
| Running Forward KL  | 20.6      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 260       |
-----------------------------------
--2024-08-11 16:58:00.306469 UTC---
| Itration            | 261       |
| PAGAR Loss          | -6.88e+06 |
| Real Det Return     | 8.85e+03  |
| Real Sto Return     | 7.86e+03  |
| Reward Loss         | -1.91e+07 |
| Running Env Steps   | 1305000   |
| Running Forward KL  | 19.5      |
| Running Reverse KL  | 9.72      |
| Running Update Time | 261       |
-----------------------------------
--2024-08-11 17:00:28.800893 UTC---
| Itration            | 262       |
| PAGAR Loss          | 3.4e+06   |
| Real Det Return     | 7.42e+03  |
| Real Sto Return     | 6.75e+03  |
| Reward Loss         | -1.97e+07 |
| Running Env Steps   | 1310000   |
| Running Forward KL  | 20.6      |
| Running Reverse KL  | 10.1      |
| Running Update Time | 262       |
-----------------------------------
--2024-08-11 17:02:57.893961 UTC---
| Itration            | 263       |
| PAGAR Loss          | -5.89e+06 |
| Real Det Return     | 8.9e+03   |
| Real Sto Return     | 7.81e+03  |
| Reward Loss         | -1.77e+07 |
| Running Env Steps   | 1315000   |
| Running Forward KL  | 19.6      |
| Running Reverse KL  | 10.1      |
| Running Update Time | 263       |
-----------------------------------
--2024-08-11 17:05:25.182957 UTC---
| Itration            | 264       |
| PAGAR Loss          | 2.38e+06  |
| Real Det Return     | 8.92e+03  |
| Real Sto Return     | 7.78e+03  |
| Reward Loss         | -1.71e+07 |
| Running Env Steps   | 1320000   |
| Running Forward KL  | 19.4      |
| Running Reverse KL  | 10.1      |
| Running Update Time | 264       |
-----------------------------------
--2024-08-11 17:07:53.377150 UTC---
| Itration            | 265       |
| PAGAR Loss          | -3.22e+06 |
| Real Det Return     | 8.83e+03  |
| Real Sto Return     | 7.74e+03  |
| Reward Loss         | -1.74e+07 |
| Running Env Steps   | 1325000   |
| Running Forward KL  | 19.2      |
| Running Reverse KL  | 9.72      |
| Running Update Time | 265       |
-----------------------------------
--2024-08-11 17:10:21.525145 UTC---
| Itration            | 266       |
| PAGAR Loss          | -7.03e+06 |
| Real Det Return     | 8.89e+03  |
| Real Sto Return     | 7.94e+03  |
| Reward Loss         | -1.76e+07 |
| Running Env Steps   | 1330000   |
| Running Forward KL  | 19.3      |
| Running Reverse KL  | 10.1      |
| Running Update Time | 266       |
-----------------------------------
--2024-08-11 17:12:48.154578 UTC---
| Itration            | 267       |
| PAGAR Loss          | -4.87e+06 |
| Real Det Return     | 8.7e+03   |
| Real Sto Return     | 7.62e+03  |
| Reward Loss         | -1.79e+07 |
| Running Env Steps   | 1335000   |
| Running Forward KL  | 19.8      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 267       |
-----------------------------------
--2024-08-11 17:15:16.689606 UTC---
| Itration            | 268       |
| PAGAR Loss          | -4.51e+06 |
| Real Det Return     | 8.76e+03  |
| Real Sto Return     | 7.8e+03   |
| Reward Loss         | -1.7e+07  |
| Running Env Steps   | 1340000   |
| Running Forward KL  | 19.4      |
| Running Reverse KL  | 10.3      |
| Running Update Time | 268       |
-----------------------------------
--2024-08-11 17:17:44.306589 UTC---
| Itration            | 269       |
| PAGAR Loss          | -6.09e+06 |
| Real Det Return     | 8.77e+03  |
| Real Sto Return     | 7.63e+03  |
| Reward Loss         | -1.8e+07  |
| Running Env Steps   | 1345000   |
| Running Forward KL  | 20        |
| Running Reverse KL  | 9.96      |
| Running Update Time | 269       |
-----------------------------------
--2024-08-11 17:20:12.130247 UTC---
| Itration            | 270       |
| PAGAR Loss          | -2.24e+06 |
| Real Det Return     | 9.4e+03   |
| Real Sto Return     | 8.11e+03  |
| Reward Loss         | -1.69e+07 |
| Running Env Steps   | 1350000   |
| Running Forward KL  | 18.9      |
| Running Reverse KL  | 9.88      |
| Running Update Time | 270       |
-----------------------------------
--2024-08-11 17:22:38.047209 UTC---
| Itration            | 271       |
| PAGAR Loss          | 6.29e+05  |
| Real Det Return     | 9.06e+03  |
| Real Sto Return     | 8.05e+03  |
| Reward Loss         | -1.79e+07 |
| Running Env Steps   | 1355000   |
| Running Forward KL  | 19        |
| Running Reverse KL  | 9.75      |
| Running Update Time | 271       |
-----------------------------------
--2024-08-11 17:25:05.083878 UTC---
| Itration            | 272       |
| PAGAR Loss          | -1.22e+07 |
| Real Det Return     | 8.91e+03  |
| Real Sto Return     | 7.93e+03  |
| Reward Loss         | -1.61e+07 |
| Running Env Steps   | 1360000   |
| Running Forward KL  | 18.7      |
| Running Reverse KL  | 10.1      |
| Running Update Time | 272       |
-----------------------------------
--2024-08-11 17:27:33.276834 UTC---
| Itration            | 273       |
| PAGAR Loss          | nan       |
| Real Det Return     | 9.5e+03   |
| Real Sto Return     | 8.19e+03  |
| Reward Loss         | -1.68e+07 |
| Running Env Steps   | 1365000   |
| Running Forward KL  | 18.4      |
| Running Reverse KL  | 9.84      |
| Running Update Time | 273       |
-----------------------------------
--2024-08-11 17:30:00.535129 UTC---
| Itration            | 274       |
| PAGAR Loss          | nan       |
| Real Det Return     | 9.15e+03  |
| Real Sto Return     | 8.14e+03  |
| Reward Loss         | -1.81e+07 |
| Running Env Steps   | 1370000   |
| Running Forward KL  | 18.9      |
| Running Reverse KL  | 9.82      |
| Running Update Time | 274       |
-----------------------------------
--2024-08-11 17:32:28.348446 UTC---
| Itration            | 275       |
| PAGAR Loss          | -1.27e+07 |
| Real Det Return     | 8.9e+03   |
| Real Sto Return     | 7.89e+03  |
| Reward Loss         | -1.82e+07 |
| Running Env Steps   | 1375000   |
| Running Forward KL  | 18.9      |
| Running Reverse KL  | 9.9       |
| Running Update Time | 275       |
-----------------------------------
--2024-08-11 17:34:54.735613 UTC---
| Itration            | 276       |
| PAGAR Loss          | -2.27e+07 |
| Real Det Return     | 8.98e+03  |
| Real Sto Return     | 7.9e+03   |
| Reward Loss         | -1.69e+07 |
| Running Env Steps   | 1380000   |
| Running Forward KL  | 19.2      |
| Running Reverse KL  | 10        |
| Running Update Time | 276       |
-----------------------------------
--2024-08-11 17:37:21.528318 UTC---
| Itration            | 277       |
| PAGAR Loss          | 3.08e+06  |
| Real Det Return     | 9.62e+03  |
| Real Sto Return     | 7.89e+03  |
| Reward Loss         | -1.56e+07 |
| Running Env Steps   | 1385000   |
| Running Forward KL  | 18.4      |
| Running Reverse KL  | 9.55      |
| Running Update Time | 277       |
-----------------------------------
--2024-08-11 17:39:50.096893 UTC---
| Itration            | 278       |
| PAGAR Loss          | -4.76e+06 |
| Real Det Return     | 9.37e+03  |
| Real Sto Return     | 8.11e+03  |
| Reward Loss         | -1.65e+07 |
| Running Env Steps   | 1390000   |
| Running Forward KL  | 19        |
| Running Reverse KL  | 10        |
| Running Update Time | 278       |
-----------------------------------
--2024-08-11 17:42:17.895732 UTC---
| Itration            | 279       |
| PAGAR Loss          | -5.29e+06 |
| Real Det Return     | 9.32e+03  |
| Real Sto Return     | 8.51e+03  |
| Reward Loss         | -1.57e+07 |
| Running Env Steps   | 1395000   |
| Running Forward KL  | 18.2      |
| Running Reverse KL  | 10.1      |
| Running Update Time | 279       |
-----------------------------------
--2024-08-11 17:44:45.697027 UTC---
| Itration            | 280       |
| PAGAR Loss          | -3.23e+06 |
| Real Det Return     | 9.23e+03  |
| Real Sto Return     | 8.4e+03   |
| Reward Loss         | -1.64e+07 |
| Running Env Steps   | 1400000   |
| Running Forward KL  | 18.5      |
| Running Reverse KL  | 10.1      |
| Running Update Time | 280       |
-----------------------------------
--2024-08-11 17:47:12.969726 UTC---
| Itration            | 281       |
| PAGAR Loss          | -8.08e+06 |
| Real Det Return     | 9.52e+03  |
| Real Sto Return     | 8.16e+03  |
| Reward Loss         | -1.72e+07 |
| Running Env Steps   | 1405000   |
| Running Forward KL  | 18.1      |
| Running Reverse KL  | 9.37      |
| Running Update Time | 281       |
-----------------------------------
--2024-08-11 17:49:40.278316 UTC---
| Itration            | 282       |
| PAGAR Loss          | 8.69e+06  |
| Real Det Return     | 9.71e+03  |
| Real Sto Return     | 8.75e+03  |
| Reward Loss         | -1.55e+07 |
| Running Env Steps   | 1410000   |
| Running Forward KL  | 18.3      |
| Running Reverse KL  | 9.77      |
| Running Update Time | 282       |
-----------------------------------
--2024-08-11 17:52:11.668304 UTC---
| Itration            | 283       |
| PAGAR Loss          | 333       |
| Real Det Return     | 9.48e+03  |
| Real Sto Return     | 8.5e+03   |
| Reward Loss         | -1.61e+07 |
| Running Env Steps   | 1415000   |
| Running Forward KL  | 18.4      |
| Running Reverse KL  | 9.68      |
| Running Update Time | 283       |
-----------------------------------
--2024-08-11 17:54:40.066099 UTC---
| Itration            | 284       |
| PAGAR Loss          | 5.67e+05  |
| Real Det Return     | 9.98e+03  |
| Real Sto Return     | 8.73e+03  |
| Reward Loss         | -1.53e+07 |
| Running Env Steps   | 1420000   |
| Running Forward KL  | 17.6      |
| Running Reverse KL  | 9.48      |
| Running Update Time | 284       |
-----------------------------------
--2024-08-11 17:57:07.788583 UTC---
| Itration            | 285       |
| PAGAR Loss          | 5.25e+06  |
| Real Det Return     | 9.92e+03  |
| Real Sto Return     | 8.73e+03  |
| Reward Loss         | -1.54e+07 |
| Running Env Steps   | 1425000   |
| Running Forward KL  | 18.2      |
| Running Reverse KL  | 9.83      |
| Running Update Time | 285       |
-----------------------------------
--2024-08-11 17:59:35.532243 UTC---
| Itration            | 286       |
| PAGAR Loss          | nan       |
| Real Det Return     | 9.98e+03  |
| Real Sto Return     | 8.58e+03  |
| Reward Loss         | -1.49e+07 |
| Running Env Steps   | 1430000   |
| Running Forward KL  | 17.9      |
| Running Reverse KL  | 9.76      |
| Running Update Time | 286       |
-----------------------------------
--2024-08-11 18:02:03.995523 UTC---
| Itration            | 287       |
| PAGAR Loss          | 1.51e+06  |
| Real Det Return     | 9.51e+03  |
| Real Sto Return     | 8.6e+03   |
| Reward Loss         | -1.71e+07 |
| Running Env Steps   | 1435000   |
| Running Forward KL  | 18        |
| Running Reverse KL  | 9.97      |
| Running Update Time | 287       |
-----------------------------------
--2024-08-11 18:04:33.430309 UTC---
| Itration            | 288       |
| PAGAR Loss          | 7.39e+06  |
| Real Det Return     | 9.81e+03  |
| Real Sto Return     | 8.93e+03  |
| Reward Loss         | -1.49e+07 |
| Running Env Steps   | 1440000   |
| Running Forward KL  | 18.3      |
| Running Reverse KL  | 9.55      |
| Running Update Time | 288       |
-----------------------------------
--2024-08-11 18:07:02.460782 UTC---
| Itration            | 289       |
| PAGAR Loss          | 2.43e+06  |
| Real Det Return     | 9.61e+03  |
| Real Sto Return     | 8.88e+03  |
| Reward Loss         | -1.43e+07 |
| Running Env Steps   | 1445000   |
| Running Forward KL  | 18        |
| Running Reverse KL  | 9.8       |
| Running Update Time | 289       |
-----------------------------------
--2024-08-11 18:09:30.760754 UTC---
| Itration            | 290       |
| PAGAR Loss          | -8.07e+05 |
| Real Det Return     | 1.02e+04  |
| Real Sto Return     | 8.72e+03  |
| Reward Loss         | -1.54e+07 |
| Running Env Steps   | 1450000   |
| Running Forward KL  | 17.6      |
| Running Reverse KL  | 9.89      |
| Running Update Time | 290       |
-----------------------------------
--2024-08-11 18:11:58.234150 UTC---
| Itration            | 291       |
| PAGAR Loss          | -1.68e+06 |
| Real Det Return     | 1.02e+04  |
| Real Sto Return     | 9.03e+03  |
| Reward Loss         | -1.42e+07 |
| Running Env Steps   | 1455000   |
| Running Forward KL  | 17.8      |
| Running Reverse KL  | 9.83      |
| Running Update Time | 291       |
-----------------------------------
--2024-08-11 18:14:27.171791 UTC---
| Itration            | 292       |
| PAGAR Loss          | 3.53e+06  |
| Real Det Return     | 1.02e+04  |
| Real Sto Return     | 9.17e+03  |
| Reward Loss         | -1.45e+07 |
| Running Env Steps   | 1460000   |
| Running Forward KL  | 18.3      |
| Running Reverse KL  | 10.1      |
| Running Update Time | 292       |
-----------------------------------
--2024-08-11 18:16:56.631191 UTC---
| Itration            | 293       |
| PAGAR Loss          | -7.74e+06 |
| Real Det Return     | 1e+04     |
| Real Sto Return     | 8.97e+03  |
| Reward Loss         | -1.34e+07 |
| Running Env Steps   | 1465000   |
| Running Forward KL  | 17.5      |
| Running Reverse KL  | 9.83      |
| Running Update Time | 293       |
-----------------------------------
--2024-08-11 18:19:24.332606 UTC---
| Itration            | 294       |
| PAGAR Loss          | -8.13e+06 |
| Real Det Return     | 1.01e+04  |
| Real Sto Return     | 9.06e+03  |
| Reward Loss         | -1.44e+07 |
| Running Env Steps   | 1470000   |
| Running Forward KL  | 17.7      |
| Running Reverse KL  | 9.9       |
| Running Update Time | 294       |
-----------------------------------
--2024-08-11 18:21:53.017122 UTC---
| Itration            | 295       |
| PAGAR Loss          | 5.98e+06  |
| Real Det Return     | 9.46e+03  |
| Real Sto Return     | 8.77e+03  |
| Reward Loss         | -1.37e+07 |
| Running Env Steps   | 1475000   |
| Running Forward KL  | 17.5      |
| Running Reverse KL  | 8.98      |
| Running Update Time | 295       |
-----------------------------------
--2024-08-11 18:24:20.953976 UTC---
| Itration            | 296       |
| PAGAR Loss          | 7.73e+06  |
| Real Det Return     | 9.86e+03  |
| Real Sto Return     | 8.98e+03  |
| Reward Loss         | -1.48e+07 |
| Running Env Steps   | 1480000   |
| Running Forward KL  | 17.8      |
| Running Reverse KL  | 10.2      |
| Running Update Time | 296       |
-----------------------------------
--2024-08-11 18:26:52.082323 UTC---
| Itration            | 297       |
| PAGAR Loss          | 9.31e+06  |
| Real Det Return     | 9.96e+03  |
| Real Sto Return     | 9.16e+03  |
| Reward Loss         | -1.42e+07 |
| Running Env Steps   | 1485000   |
| Running Forward KL  | 18        |
| Running Reverse KL  | 10.7      |
| Running Update Time | 297       |
-----------------------------------
--2024-08-11 18:29:21.451130 UTC--
| Itration            | 298      |
| PAGAR Loss          | 1.2e+06  |
| Real Det Return     | 1.04e+04 |
| Real Sto Return     | 9.29e+03 |
| Reward Loss         | -1.4e+07 |
| Running Env Steps   | 1490000  |
| Running Forward KL  | 17.2     |
| Running Reverse KL  | 9.04     |
| Running Update Time | 298      |
----------------------------------
--2024-08-11 18:31:48.711336 UTC---
| Itration            | 299       |
| PAGAR Loss          | -9.16e+03 |
| Real Det Return     | 1.03e+04  |
| Real Sto Return     | 9.21e+03  |
| Reward Loss         | -1.39e+07 |
| Running Env Steps   | 1495000   |
| Running Forward KL  | 17.8      |
| Running Reverse KL  | 10.2      |
| Running Update Time | 299       |
-----------------------------------
--2024-08-11 18:34:17.278355 UTC---
| Itration            | 300       |
| PAGAR Loss          | 7.98e+06  |
| Real Det Return     | 1.04e+04  |
| Real Sto Return     | 9.4e+03   |
| Reward Loss         | -1.24e+07 |
| Running Env Steps   | 1500000   |
| Running Forward KL  | 17        |
| Running Reverse KL  | 9.64      |
| Running Update Time | 300       |
-----------------------------------
--2024-08-11 18:36:45.278538 UTC---
| Itration            | 301       |
| PAGAR Loss          | -1.07e+06 |
| Real Det Return     | 1.03e+04  |
| Real Sto Return     | 9.09e+03  |
| Reward Loss         | -1.53e+07 |
| Running Env Steps   | 1505000   |
| Running Forward KL  | 17.3      |
| Running Reverse KL  | 9.45      |
| Running Update Time | 301       |
-----------------------------------
--2024-08-11 18:39:13.174912 UTC---
| Itration            | 302       |
| PAGAR Loss          | 2.54e+06  |
| Real Det Return     | 1.01e+04  |
| Real Sto Return     | 6.96e+03  |
| Reward Loss         | -1.99e+07 |
| Running Env Steps   | 1510000   |
| Running Forward KL  | 17        |
| Running Reverse KL  | 8.26      |
| Running Update Time | 302       |
-----------------------------------
--2024-08-11 18:41:41.260823 UTC---
| Itration            | 303       |
| PAGAR Loss          | 3.32e+06  |
| Real Det Return     | 1e+04     |
| Real Sto Return     | 9.13e+03  |
| Reward Loss         | -1.42e+07 |
| Running Env Steps   | 1515000   |
| Running Forward KL  | 17.4      |
| Running Reverse KL  | 10.3      |
| Running Update Time | 303       |
-----------------------------------
--2024-08-11 18:44:08.234709 UTC--
| Itration            | 304      |
| PAGAR Loss          | 3.05e+07 |
| Real Det Return     | 9.94e+03 |
| Real Sto Return     | 9.15e+03 |
| Reward Loss         | -1.5e+07 |
| Running Env Steps   | 1520000  |
| Running Forward KL  | 16.9     |
| Running Reverse KL  | 9.38     |
| Running Update Time | 304      |
----------------------------------
--2024-08-11 18:46:35.684266 UTC---
| Itration            | 305       |
| PAGAR Loss          | 1.37e+07  |
| Real Det Return     | 1.02e+04  |
| Real Sto Return     | 9.48e+03  |
| Reward Loss         | -1.28e+07 |
| Running Env Steps   | 1525000   |
| Running Forward KL  | 17.1      |
| Running Reverse KL  | 10.1      |
| Running Update Time | 305       |
-----------------------------------
--2024-08-11 18:49:03.134598 UTC---
| Itration            | 306       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.06e+04  |
| Real Sto Return     | 9.2e+03   |
| Reward Loss         | -1.45e+07 |
| Running Env Steps   | 1530000   |
| Running Forward KL  | 16.7      |
| Running Reverse KL  | 8.85      |
| Running Update Time | 306       |
-----------------------------------
--2024-08-11 18:51:30.981816 UTC---
| Itration            | 307       |
| PAGAR Loss          | -1.38e+07 |
| Real Det Return     | 1.02e+04  |
| Real Sto Return     | 9.3e+03   |
| Reward Loss         | -1.38e+07 |
| Running Env Steps   | 1535000   |
| Running Forward KL  | 17        |
| Running Reverse KL  | 9.41      |
| Running Update Time | 307       |
-----------------------------------
--2024-08-11 18:53:59.389749 UTC--
| Itration            | 308      |
| PAGAR Loss          | nan      |
| Real Det Return     | 1.07e+04 |
| Real Sto Return     | 9.32e+03 |
| Reward Loss         | -1.3e+07 |
| Running Env Steps   | 1540000  |
| Running Forward KL  | 16.9     |
| Running Reverse KL  | 9.49     |
| Running Update Time | 308      |
----------------------------------
--2024-08-11 18:56:27.244588 UTC---
| Itration            | 309       |
| PAGAR Loss          | nan       |
| Real Det Return     | 9.84e+03  |
| Real Sto Return     | 9.01e+03  |
| Reward Loss         | -1.52e+07 |
| Running Env Steps   | 1545000   |
| Running Forward KL  | 17.1      |
| Running Reverse KL  | 9.28      |
| Running Update Time | 309       |
-----------------------------------
--2024-08-11 18:58:55.504377 UTC---
| Itration            | 310       |
| PAGAR Loss          | -3.14e+06 |
| Real Det Return     | 1.05e+04  |
| Real Sto Return     | 9.13e+03  |
| Reward Loss         | -1.32e+07 |
| Running Env Steps   | 1550000   |
| Running Forward KL  | 16.3      |
| Running Reverse KL  | 9.1       |
| Running Update Time | 310       |
-----------------------------------
--2024-08-11 19:01:24.371618 UTC--
| Itration            | 311      |
| PAGAR Loss          | 1.31e+07 |
| Real Det Return     | 1.03e+04 |
| Real Sto Return     | 9.83e+03 |
| Reward Loss         | -1.2e+07 |
| Running Env Steps   | 1555000  |
| Running Forward KL  | 17.1     |
| Running Reverse KL  | 10.1     |
| Running Update Time | 311      |
----------------------------------
--2024-08-11 19:03:52.210915 UTC---
| Itration            | 312       |
| PAGAR Loss          | 1.25e+07  |
| Real Det Return     | 1.02e+04  |
| Real Sto Return     | 9.23e+03  |
| Reward Loss         | -1.26e+07 |
| Running Env Steps   | 1560000   |
| Running Forward KL  | 16.6      |
| Running Reverse KL  | 9.7       |
| Running Update Time | 312       |
-----------------------------------
--2024-08-11 19:06:20.994369 UTC---
| Itration            | 313       |
| PAGAR Loss          | 3.91e+06  |
| Real Det Return     | 1.03e+04  |
| Real Sto Return     | 9.39e+03  |
| Reward Loss         | -1.39e+07 |
| Running Env Steps   | 1565000   |
| Running Forward KL  | 17        |
| Running Reverse KL  | 10        |
| Running Update Time | 313       |
-----------------------------------
--2024-08-11 19:08:48.056072 UTC---
| Itration            | 314       |
| PAGAR Loss          | 1.66e+07  |
| Real Det Return     | 1.07e+04  |
| Real Sto Return     | 9.74e+03  |
| Reward Loss         | -1.32e+07 |
| Running Env Steps   | 1570000   |
| Running Forward KL  | 16.4      |
| Running Reverse KL  | 9.7       |
| Running Update Time | 314       |
-----------------------------------
--2024-08-11 19:11:15.664009 UTC---
| Itration            | 315       |
| PAGAR Loss          | -5.76e+06 |
| Real Det Return     | 1.06e+04  |
| Real Sto Return     | 9.62e+03  |
| Reward Loss         | -1.22e+07 |
| Running Env Steps   | 1575000   |
| Running Forward KL  | 17.1      |
| Running Reverse KL  | 10.2      |
| Running Update Time | 315       |
-----------------------------------
--2024-08-11 19:13:41.653403 UTC---
| Itration            | 316       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.07e+04  |
| Real Sto Return     | 9.66e+03  |
| Reward Loss         | -1.21e+07 |
| Running Env Steps   | 1580000   |
| Running Forward KL  | 17.2      |
| Running Reverse KL  | 10.8      |
| Running Update Time | 316       |
-----------------------------------
--2024-08-11 19:16:08.629041 UTC---
| Itration            | 317       |
| PAGAR Loss          | 1.86e+07  |
| Real Det Return     | 1.1e+04   |
| Real Sto Return     | 1.01e+04  |
| Reward Loss         | -1.31e+07 |
| Running Env Steps   | 1585000   |
| Running Forward KL  | 16.8      |
| Running Reverse KL  | 9.49      |
| Running Update Time | 317       |
-----------------------------------
--2024-08-11 19:18:36.168240 UTC--
| Itration            | 318      |
| PAGAR Loss          | 3.4e+06  |
| Real Det Return     | 1.06e+04 |
| Real Sto Return     | 9.95e+03 |
| Reward Loss         | -1.3e+07 |
| Running Env Steps   | 1590000  |
| Running Forward KL  | 16.3     |
| Running Reverse KL  | 8.91     |
| Running Update Time | 318      |
----------------------------------
--2024-08-11 19:21:02.400248 UTC---
| Itration            | 319       |
| PAGAR Loss          | 8.43e+06  |
| Real Det Return     | 1.13e+04  |
| Real Sto Return     | 9.08e+03  |
| Reward Loss         | -1.22e+07 |
| Running Env Steps   | 1595000   |
| Running Forward KL  | 16.1      |
| Running Reverse KL  | 9.34      |
| Running Update Time | 319       |
-----------------------------------
--2024-08-11 19:23:29.048871 UTC---
| Itration            | 320       |
| PAGAR Loss          | -5.11e+06 |
| Real Det Return     | 1.04e+04  |
| Real Sto Return     | 9.84e+03  |
| Reward Loss         | -1.21e+07 |
| Running Env Steps   | 1600000   |
| Running Forward KL  | 16.9      |
| Running Reverse KL  | 9.73      |
| Running Update Time | 320       |
-----------------------------------
--2024-08-11 19:25:54.813998 UTC---
| Itration            | 321       |
| PAGAR Loss          | 8.04e+06  |
| Real Det Return     | 1.11e+04  |
| Real Sto Return     | 9.97e+03  |
| Reward Loss         | -1.15e+07 |
| Running Env Steps   | 1605000   |
| Running Forward KL  | 16.8      |
| Running Reverse KL  | 9.91      |
| Running Update Time | 321       |
-----------------------------------
--2024-08-11 19:28:23.014826 UTC---
| Itration            | 322       |
| PAGAR Loss          | 2.21e+06  |
| Real Det Return     | 1.12e+04  |
| Real Sto Return     | 9.5e+03   |
| Reward Loss         | -1.13e+07 |
| Running Env Steps   | 1610000   |
| Running Forward KL  | 16.6      |
| Running Reverse KL  | 9.82      |
| Running Update Time | 322       |
-----------------------------------
--2024-08-11 19:30:51.761379 UTC---
| Itration            | 323       |
| PAGAR Loss          | 6.8e+06   |
| Real Det Return     | 1.08e+04  |
| Real Sto Return     | 1.02e+04  |
| Reward Loss         | -1.14e+07 |
| Running Env Steps   | 1615000   |
| Running Forward KL  | 16.7      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 323       |
-----------------------------------
--2024-08-11 19:33:20.091648 UTC--
| Itration            | 324      |
| PAGAR Loss          | 1.05e+06 |
| Real Det Return     | 1.1e+04  |
| Real Sto Return     | 9.9e+03  |
| Reward Loss         | -1.2e+07 |
| Running Env Steps   | 1620000  |
| Running Forward KL  | 17       |
| Running Reverse KL  | 9.68     |
| Running Update Time | 324      |
----------------------------------
--2024-08-11 19:35:48.201175 UTC---
| Itration            | 325       |
| PAGAR Loss          | 2.69e+07  |
| Real Det Return     | 1.15e+04  |
| Real Sto Return     | 1.03e+04  |
| Reward Loss         | -1.08e+07 |
| Running Env Steps   | 1625000   |
| Running Forward KL  | 16.8      |
| Running Reverse KL  | 9.94      |
| Running Update Time | 325       |
-----------------------------------
--2024-08-11 19:38:16.195359 UTC---
| Itration            | 326       |
| PAGAR Loss          | 5.1e+06   |
| Real Det Return     | 1.01e+04  |
| Real Sto Return     | 9.72e+03  |
| Reward Loss         | -1.27e+07 |
| Running Env Steps   | 1630000   |
| Running Forward KL  | 16.1      |
| Running Reverse KL  | 9.65      |
| Running Update Time | 326       |
-----------------------------------
--2024-08-11 19:40:44.328470 UTC---
| Itration            | 327       |
| PAGAR Loss          | 3.27e+07  |
| Real Det Return     | 1.05e+04  |
| Real Sto Return     | 9.98e+03  |
| Reward Loss         | -1.25e+07 |
| Running Env Steps   | 1635000   |
| Running Forward KL  | 16.7      |
| Running Reverse KL  | 9.8       |
| Running Update Time | 327       |
-----------------------------------
--2024-08-11 19:43:14.215349 UTC---
| Itration            | 328       |
| PAGAR Loss          | 1.95e+07  |
| Real Det Return     | 1.1e+04   |
| Real Sto Return     | 1.03e+04  |
| Reward Loss         | -1.04e+07 |
| Running Env Steps   | 1640000   |
| Running Forward KL  | 16.1      |
| Running Reverse KL  | 10.1      |
| Running Update Time | 328       |
-----------------------------------
--2024-08-11 19:45:43.318300 UTC---
| Itration            | 329       |
| PAGAR Loss          | 7.98e+06  |
| Real Det Return     | 1.05e+04  |
| Real Sto Return     | 9.74e+03  |
| Reward Loss         | -1.24e+07 |
| Running Env Steps   | 1645000   |
| Running Forward KL  | 16.7      |
| Running Reverse KL  | 10.4      |
| Running Update Time | 329       |
-----------------------------------
--2024-08-11 19:48:12.410269 UTC---
| Itration            | 330       |
| PAGAR Loss          | 6.95e+06  |
| Real Det Return     | 1.1e+04   |
| Real Sto Return     | 1.03e+04  |
| Reward Loss         | -1.04e+07 |
| Running Env Steps   | 1650000   |
| Running Forward KL  | 16.3      |
| Running Reverse KL  | 10.1      |
| Running Update Time | 330       |
-----------------------------------
--2024-08-11 19:50:41.707198 UTC---
| Itration            | 331       |
| PAGAR Loss          | 4.41e+07  |
| Real Det Return     | 1.11e+04  |
| Real Sto Return     | 1e+04     |
| Reward Loss         | -1.05e+07 |
| Running Env Steps   | 1655000   |
| Running Forward KL  | 16.6      |
| Running Reverse KL  | 10.8      |
| Running Update Time | 331       |
-----------------------------------
--2024-08-11 19:53:11.814860 UTC---
| Itration            | 332       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.08e+04  |
| Real Sto Return     | 1.03e+04  |
| Reward Loss         | -1.25e+07 |
| Running Env Steps   | 1660000   |
| Running Forward KL  | 15.8      |
| Running Reverse KL  | 8.89      |
| Running Update Time | 332       |
-----------------------------------
--2024-08-11 19:55:42.807440 UTC--
| Itration            | 333      |
| PAGAR Loss          | 1.87e+07 |
| Real Det Return     | 1.12e+04 |
| Real Sto Return     | 1.04e+04 |
| Reward Loss         | -9.3e+06 |
| Running Env Steps   | 1665000  |
| Running Forward KL  | 15.9     |
| Running Reverse KL  | 9.32     |
| Running Update Time | 333      |
----------------------------------
--2024-08-11 19:58:10.737352 UTC---
| Itration            | 334       |
| PAGAR Loss          | 2.04e+07  |
| Real Det Return     | 1.15e+04  |
| Real Sto Return     | 1.05e+04  |
| Reward Loss         | -1.09e+07 |
| Running Env Steps   | 1670000   |
| Running Forward KL  | 16.2      |
| Running Reverse KL  | 9.85      |
| Running Update Time | 334       |
-----------------------------------
--2024-08-11 20:00:39.145394 UTC---
| Itration            | 335       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.13e+04  |
| Real Sto Return     | 1.05e+04  |
| Reward Loss         | -9.15e+06 |
| Running Env Steps   | 1675000   |
| Running Forward KL  | 16.4      |
| Running Reverse KL  | 10.2      |
| Running Update Time | 335       |
-----------------------------------
--2024-08-11 20:03:06.453413 UTC---
| Itration            | 336       |
| PAGAR Loss          | -1.21e+06 |
| Real Det Return     | 1.11e+04  |
| Real Sto Return     | 1.05e+04  |
| Reward Loss         | -1.02e+07 |
| Running Env Steps   | 1680000   |
| Running Forward KL  | 16.1      |
| Running Reverse KL  | 9.76      |
| Running Update Time | 336       |
-----------------------------------
--2024-08-11 20:05:35.165357 UTC--
| Itration            | 337      |
| PAGAR Loss          | nan      |
| Real Det Return     | 1.13e+04 |
| Real Sto Return     | 1.07e+04 |
| Reward Loss         | -1.1e+07 |
| Running Env Steps   | 1685000  |
| Running Forward KL  | 16.3     |
| Running Reverse KL  | 9.61     |
| Running Update Time | 337      |
----------------------------------
--2024-08-11 20:08:04.051657 UTC---
| Itration            | 338       |
| PAGAR Loss          | 2.93e+07  |
| Real Det Return     | 1.15e+04  |
| Real Sto Return     | 1.05e+04  |
| Reward Loss         | -1.07e+07 |
| Running Env Steps   | 1690000   |
| Running Forward KL  | 15.9      |
| Running Reverse KL  | 9.36      |
| Running Update Time | 338       |
-----------------------------------
--2024-08-11 20:10:31.953824 UTC---
| Itration            | 339       |
| PAGAR Loss          | 1.89e+07  |
| Real Det Return     | 1.15e+04  |
| Real Sto Return     | 1.06e+04  |
| Reward Loss         | -9.36e+06 |
| Running Env Steps   | 1695000   |
| Running Forward KL  | 15.6      |
| Running Reverse KL  | 9.53      |
| Running Update Time | 339       |
-----------------------------------
--2024-08-11 20:13:00.321210 UTC---
| Itration            | 340       |
| PAGAR Loss          | 5.09e+07  |
| Real Det Return     | 1.09e+04  |
| Real Sto Return     | 1.05e+04  |
| Reward Loss         | -9.94e+06 |
| Running Env Steps   | 1700000   |
| Running Forward KL  | 16.7      |
| Running Reverse KL  | 9.82      |
| Running Update Time | 340       |
-----------------------------------
--2024-08-11 20:15:28.307927 UTC---
| Itration            | 341       |
| PAGAR Loss          | 1.75e+07  |
| Real Det Return     | 1.12e+04  |
| Real Sto Return     | 1.04e+04  |
| Reward Loss         | -9.33e+06 |
| Running Env Steps   | 1705000   |
| Running Forward KL  | 16.4      |
| Running Reverse KL  | 10.1      |
| Running Update Time | 341       |
-----------------------------------
--2024-08-11 20:17:56.046332 UTC---
| Itration            | 342       |
| PAGAR Loss          | 3.44e+07  |
| Real Det Return     | 1.15e+04  |
| Real Sto Return     | 1.08e+04  |
| Reward Loss         | -8.96e+06 |
| Running Env Steps   | 1710000   |
| Running Forward KL  | 15.9      |
| Running Reverse KL  | 9.53      |
| Running Update Time | 342       |
-----------------------------------
--2024-08-11 20:20:25.261483 UTC---
| Itration            | 343       |
| PAGAR Loss          | 1.77e+06  |
| Real Det Return     | 1.12e+04  |
| Real Sto Return     | 9.82e+03  |
| Reward Loss         | -1.01e+07 |
| Running Env Steps   | 1715000   |
| Running Forward KL  | 15.7      |
| Running Reverse KL  | 9.2       |
| Running Update Time | 343       |
-----------------------------------
--2024-08-11 20:22:53.256743 UTC---
| Itration            | 344       |
| PAGAR Loss          | -1.49e+06 |
| Real Det Return     | 1.17e+04  |
| Real Sto Return     | 1.08e+04  |
| Reward Loss         | -9.09e+06 |
| Running Env Steps   | 1720000   |
| Running Forward KL  | 15.9      |
| Running Reverse KL  | 9.78      |
| Running Update Time | 344       |
-----------------------------------
--2024-08-11 20:25:21.101753 UTC---
| Itration            | 345       |
| PAGAR Loss          | -9.6e+05  |
| Real Det Return     | 1.14e+04  |
| Real Sto Return     | 1.07e+04  |
| Reward Loss         | -1.03e+07 |
| Running Env Steps   | 1725000   |
| Running Forward KL  | 16        |
| Running Reverse KL  | 9.42      |
| Running Update Time | 345       |
-----------------------------------
--2024-08-11 20:27:50.713867 UTC---
| Itration            | 346       |
| PAGAR Loss          | 1.92e+07  |
| Real Det Return     | 1.07e+04  |
| Real Sto Return     | 1.02e+04  |
| Reward Loss         | -1.13e+07 |
| Running Env Steps   | 1730000   |
| Running Forward KL  | 16.2      |
| Running Reverse KL  | 9.56      |
| Running Update Time | 346       |
-----------------------------------
--2024-08-11 20:30:18.704428 UTC---
| Itration            | 347       |
| PAGAR Loss          | 3.01e+06  |
| Real Det Return     | 1.11e+04  |
| Real Sto Return     | 9.45e+03  |
| Reward Loss         | -9.44e+06 |
| Running Env Steps   | 1735000   |
| Running Forward KL  | 16.2      |
| Running Reverse KL  | 9.9       |
| Running Update Time | 347       |
-----------------------------------
--2024-08-11 20:32:46.856936 UTC---
| Itration            | 348       |
| PAGAR Loss          | -1.67e+07 |
| Real Det Return     | 1.12e+04  |
| Real Sto Return     | 1.09e+04  |
| Reward Loss         | -9.51e+06 |
| Running Env Steps   | 1740000   |
| Running Forward KL  | 15.7      |
| Running Reverse KL  | 9.97      |
| Running Update Time | 348       |
-----------------------------------
--2024-08-11 20:35:13.842633 UTC---
| Itration            | 349       |
| PAGAR Loss          | 3.3e+07   |
| Real Det Return     | 1.14e+04  |
| Real Sto Return     | 9.76e+03  |
| Reward Loss         | -8.69e+06 |
| Running Env Steps   | 1745000   |
| Running Forward KL  | 16.4      |
| Running Reverse KL  | 10.7      |
| Running Update Time | 349       |
-----------------------------------
--2024-08-11 20:37:42.463150 UTC---
| Itration            | 350       |
| PAGAR Loss          | 3.45e+07  |
| Real Det Return     | 1.08e+04  |
| Real Sto Return     | 1.05e+04  |
| Reward Loss         | -1.19e+07 |
| Running Env Steps   | 1750000   |
| Running Forward KL  | 15.8      |
| Running Reverse KL  | 9.64      |
| Running Update Time | 350       |
-----------------------------------
--2024-08-11 20:40:10.876591 UTC---
| Itration            | 351       |
| PAGAR Loss          | 5.45e+07  |
| Real Det Return     | 1.09e+04  |
| Real Sto Return     | 1.04e+04  |
| Reward Loss         | -1.04e+07 |
| Running Env Steps   | 1755000   |
| Running Forward KL  | 16.4      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 351       |
-----------------------------------
--2024-08-11 20:42:40.148841 UTC---
| Itration            | 352       |
| PAGAR Loss          | -7.85e+05 |
| Real Det Return     | 1.14e+04  |
| Real Sto Return     | 1.02e+04  |
| Reward Loss         | -8.48e+06 |
| Running Env Steps   | 1760000   |
| Running Forward KL  | 16.1      |
| Running Reverse KL  | 9.87      |
| Running Update Time | 352       |
-----------------------------------
--2024-08-11 20:45:09.474328 UTC---
| Itration            | 353       |
| PAGAR Loss          | 3.76e+07  |
| Real Det Return     | 1.1e+04   |
| Real Sto Return     | 9.86e+03  |
| Reward Loss         | -1.23e+07 |
| Running Env Steps   | 1765000   |
| Running Forward KL  | 16.5      |
| Running Reverse KL  | 9.4       |
| Running Update Time | 353       |
-----------------------------------
--2024-08-11 20:47:38.110643 UTC---
| Itration            | 354       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.18e+04  |
| Real Sto Return     | 1.09e+04  |
| Reward Loss         | -8.01e+06 |
| Running Env Steps   | 1770000   |
| Running Forward KL  | 15.7      |
| Running Reverse KL  | 9.82      |
| Running Update Time | 354       |
-----------------------------------
--2024-08-11 20:50:06.011318 UTC---
| Itration            | 355       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.15e+04  |
| Real Sto Return     | 1.05e+04  |
| Reward Loss         | -8.16e+06 |
| Running Env Steps   | 1775000   |
| Running Forward KL  | 16.2      |
| Running Reverse KL  | 10.3      |
| Running Update Time | 355       |
-----------------------------------
--2024-08-11 20:52:34.801408 UTC---
| Itration            | 356       |
| PAGAR Loss          | 9.77e+06  |
| Real Det Return     | 1.16e+04  |
| Real Sto Return     | 9.68e+03  |
| Reward Loss         | -9.36e+06 |
| Running Env Steps   | 1780000   |
| Running Forward KL  | 15.8      |
| Running Reverse KL  | 9.73      |
| Running Update Time | 356       |
-----------------------------------
--2024-08-11 20:55:03.561207 UTC---
| Itration            | 357       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.16e+04  |
| Real Sto Return     | 1.05e+04  |
| Reward Loss         | -9.06e+06 |
| Running Env Steps   | 1785000   |
| Running Forward KL  | 15.9      |
| Running Reverse KL  | 9.53      |
| Running Update Time | 357       |
-----------------------------------
--2024-08-11 20:57:33.669075 UTC---
| Itration            | 358       |
| PAGAR Loss          | -5.05e+06 |
| Real Det Return     | 1.16e+04  |
| Real Sto Return     | 1.08e+04  |
| Reward Loss         | -8.98e+06 |
| Running Env Steps   | 1790000   |
| Running Forward KL  | 15.6      |
| Running Reverse KL  | 9.58      |
| Running Update Time | 358       |
-----------------------------------
--2024-08-11 21:00:03.054390 UTC---
| Itration            | 359       |
| PAGAR Loss          | -1.08e+07 |
| Real Det Return     | 1.16e+04  |
| Real Sto Return     | 1.03e+04  |
| Reward Loss         | -9.85e+06 |
| Running Env Steps   | 1795000   |
| Running Forward KL  | 15.3      |
| Running Reverse KL  | 8.72      |
| Running Update Time | 359       |
-----------------------------------
--2024-08-11 21:02:32.425625 UTC---
| Itration            | 360       |
| PAGAR Loss          | 1.96e+07  |
| Real Det Return     | 1.12e+04  |
| Real Sto Return     | 1.07e+04  |
| Reward Loss         | -9.51e+06 |
| Running Env Steps   | 1800000   |
| Running Forward KL  | 16.1      |
| Running Reverse KL  | 9.93      |
| Running Update Time | 360       |
-----------------------------------
--2024-08-11 21:05:02.312773 UTC---
| Itration            | 361       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.19e+04  |
| Real Sto Return     | 1.11e+04  |
| Reward Loss         | -8.24e+06 |
| Running Env Steps   | 1805000   |
| Running Forward KL  | 15.4      |
| Running Reverse KL  | 9.43      |
| Running Update Time | 361       |
-----------------------------------
--2024-08-11 21:07:32.548118 UTC---
| Itration            | 362       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.16e+04  |
| Real Sto Return     | 1.01e+04  |
| Reward Loss         | -1.55e+07 |
| Running Env Steps   | 1810000   |
| Running Forward KL  | 15.7      |
| Running Reverse KL  | 8.17      |
| Running Update Time | 362       |
-----------------------------------
--2024-08-11 21:10:02.009187 UTC---
| Itration            | 363       |
| PAGAR Loss          | -1.47e+06 |
| Real Det Return     | 1.14e+04  |
| Real Sto Return     | 1.08e+04  |
| Reward Loss         | -9.33e+06 |
| Running Env Steps   | 1815000   |
| Running Forward KL  | 15.8      |
| Running Reverse KL  | 9.67      |
| Running Update Time | 363       |
-----------------------------------
--2024-08-11 21:12:29.129253 UTC---
| Itration            | 364       |
| PAGAR Loss          | 5.05e+05  |
| Real Det Return     | 9.91e+03  |
| Real Sto Return     | 9.97e+03  |
| Reward Loss         | -1.04e+07 |
| Running Env Steps   | 1820000   |
| Running Forward KL  | 16.5      |
| Running Reverse KL  | 9.93      |
| Running Update Time | 364       |
-----------------------------------
--2024-08-11 21:14:57.427905 UTC---
| Itration            | 365       |
| PAGAR Loss          | -1.13e+07 |
| Real Det Return     | 1.16e+04  |
| Real Sto Return     | 9.27e+03  |
| Reward Loss         | -7.49e+06 |
| Running Env Steps   | 1825000   |
| Running Forward KL  | 15.8      |
| Running Reverse KL  | 9.58      |
| Running Update Time | 365       |
-----------------------------------
--2024-08-11 21:17:26.404034 UTC---
| Itration            | 366       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.06e+04  |
| Real Sto Return     | 1.03e+04  |
| Reward Loss         | -1.03e+07 |
| Running Env Steps   | 1830000   |
| Running Forward KL  | 16.2      |
| Running Reverse KL  | 9.56      |
| Running Update Time | 366       |
-----------------------------------
--2024-08-11 21:19:55.394318 UTC---
| Itration            | 367       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.09e+04  |
| Real Sto Return     | 7.59e+03  |
| Reward Loss         | -2.21e+07 |
| Running Env Steps   | 1835000   |
| Running Forward KL  | 16.2      |
| Running Reverse KL  | 7.56      |
| Running Update Time | 367       |
-----------------------------------
--2024-08-11 21:22:23.942422 UTC---
| Itration            | 368       |
| PAGAR Loss          | -1.78e+07 |
| Real Det Return     | 1.1e+04   |
| Real Sto Return     | 9.54e+03  |
| Reward Loss         | -9.06e+06 |
| Running Env Steps   | 1840000   |
| Running Forward KL  | 15.8      |
| Running Reverse KL  | 9.29      |
| Running Update Time | 368       |
-----------------------------------
--2024-08-11 21:24:44.033815 UTC---
| Itration            | 369       |
| PAGAR Loss          | -1.78e+07 |
| Real Det Return     | 1.15e+04  |
| Real Sto Return     | 1.09e+04  |
| Reward Loss         | -1.43e+07 |
| Running Env Steps   | 1845000   |
| Running Forward KL  | 15.9      |
| Running Reverse KL  | 8.52      |
| Running Update Time | 369       |
-----------------------------------
--2024-08-11 21:26:25.678190 UTC---
| Itration            | 370       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.16e+04  |
| Real Sto Return     | 9.13e+03  |
| Reward Loss         | -9.56e+06 |
| Running Env Steps   | 1850000   |
| Running Forward KL  | 15.4      |
| Running Reverse KL  | 8.63      |
| Running Update Time | 370       |
-----------------------------------
--2024-08-11 21:28:08.460215 UTC---
| Itration            | 371       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.16e+04  |
| Real Sto Return     | 1.11e+04  |
| Reward Loss         | -8.31e+06 |
| Running Env Steps   | 1855000   |
| Running Forward KL  | 15.6      |
| Running Reverse KL  | 9.67      |
| Running Update Time | 371       |
-----------------------------------
--2024-08-11 21:29:50.379671 UTC---
| Itration            | 372       |
| PAGAR Loss          | -4.68e+07 |
| Real Det Return     | 1.15e+04  |
| Real Sto Return     | 1.1e+04   |
| Reward Loss         | -8.12e+06 |
| Running Env Steps   | 1860000   |
| Running Forward KL  | 15.6      |
| Running Reverse KL  | 10.1      |
| Running Update Time | 372       |
-----------------------------------
--2024-08-11 21:31:31.747203 UTC---
| Itration            | 373       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.18e+04  |
| Real Sto Return     | 1.08e+04  |
| Reward Loss         | -7.24e+06 |
| Running Env Steps   | 1865000   |
| Running Forward KL  | 15.1      |
| Running Reverse KL  | 9.13      |
| Running Update Time | 373       |
-----------------------------------
--2024-08-11 21:33:12.520037 UTC---
| Itration            | 374       |
| PAGAR Loss          | -5.98e+06 |
| Real Det Return     | 1.1e+04   |
| Real Sto Return     | 1.11e+04  |
| Reward Loss         | -7.64e+06 |
| Running Env Steps   | 1870000   |
| Running Forward KL  | 15.3      |
| Running Reverse KL  | 9.8       |
| Running Update Time | 374       |
-----------------------------------
--2024-08-11 21:34:54.014911 UTC---
| Itration            | 375       |
| PAGAR Loss          | -2.67e+07 |
| Real Det Return     | 1.18e+04  |
| Real Sto Return     | 1.05e+04  |
| Reward Loss         | -8.04e+06 |
| Running Env Steps   | 1875000   |
| Running Forward KL  | 15.4      |
| Running Reverse KL  | 10.1      |
| Running Update Time | 375       |
-----------------------------------
--2024-08-11 21:36:36.119855 UTC---
| Itration            | 376       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.16e+04  |
| Real Sto Return     | 1.1e+04   |
| Reward Loss         | -9.04e+06 |
| Running Env Steps   | 1880000   |
| Running Forward KL  | 15.7      |
| Running Reverse KL  | 9.42      |
| Running Update Time | 376       |
-----------------------------------
--2024-08-11 21:38:17.533408 UTC---
| Itration            | 377       |
| PAGAR Loss          | -6.07e+06 |
| Real Det Return     | 1.15e+04  |
| Real Sto Return     | 1.11e+04  |
| Reward Loss         | -7.48e+06 |
| Running Env Steps   | 1885000   |
| Running Forward KL  | 16        |
| Running Reverse KL  | 9.83      |
| Running Update Time | 377       |
-----------------------------------
--2024-08-11 21:39:59.403102 UTC---
| Itration            | 378       |
| PAGAR Loss          | -5.82e+08 |
| Real Det Return     | 1.14e+04  |
| Real Sto Return     | 1.04e+04  |
| Reward Loss         | -7.65e+06 |
| Running Env Steps   | 1890000   |
| Running Forward KL  | 15.3      |
| Running Reverse KL  | 9.54      |
| Running Update Time | 378       |
-----------------------------------
--2024-08-11 21:41:41.008703 UTC---
| Itration            | 379       |
| PAGAR Loss          | -1.46e+07 |
| Real Det Return     | 1.16e+04  |
| Real Sto Return     | 1.14e+04  |
| Reward Loss         | -8.26e+06 |
| Running Env Steps   | 1895000   |
| Running Forward KL  | 15.7      |
| Running Reverse KL  | 10        |
| Running Update Time | 379       |
-----------------------------------
--2024-08-11 21:43:22.605231 UTC---
| Itration            | 380       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.2e+04   |
| Real Sto Return     | 1.1e+04   |
| Reward Loss         | -6.59e+06 |
| Running Env Steps   | 1900000   |
| Running Forward KL  | 15.1      |
| Running Reverse KL  | 9.3       |
| Running Update Time | 380       |
-----------------------------------
--2024-08-11 21:45:04.270356 UTC---
| Itration            | 381       |
| PAGAR Loss          | -1.97e+07 |
| Real Det Return     | 1.19e+04  |
| Real Sto Return     | 1.14e+04  |
| Reward Loss         | -6.55e+06 |
| Running Env Steps   | 1905000   |
| Running Forward KL  | 15.6      |
| Running Reverse KL  | 10.1      |
| Running Update Time | 381       |
-----------------------------------
--2024-08-11 21:46:44.929046 UTC--
| Itration            | 382      |
| PAGAR Loss          | nan      |
| Real Det Return     | 1.18e+04 |
| Real Sto Return     | 1.09e+04 |
| Reward Loss         | -5.8e+06 |
| Running Env Steps   | 1910000  |
| Running Forward KL  | 15.5     |
| Running Reverse KL  | 9.97     |
| Running Update Time | 382      |
----------------------------------
--2024-08-11 21:48:26.825285 UTC---
| Itration            | 383       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.17e+04  |
| Real Sto Return     | 1.07e+04  |
| Reward Loss         | -1.32e+07 |
| Running Env Steps   | 1915000   |
| Running Forward KL  | 15.6      |
| Running Reverse KL  | 8.62      |
| Running Update Time | 383       |
-----------------------------------
--2024-08-11 21:50:07.771819 UTC---
| Itration            | 384       |
| PAGAR Loss          | 4.04e+07  |
| Real Det Return     | 1.1e+04   |
| Real Sto Return     | 1.04e+04  |
| Reward Loss         | -1.17e+07 |
| Running Env Steps   | 1920000   |
| Running Forward KL  | 16.2      |
| Running Reverse KL  | 9.24      |
| Running Update Time | 384       |
-----------------------------------
--2024-08-11 21:51:48.591276 UTC---
| Itration            | 385       |
| PAGAR Loss          | -3.53e+07 |
| Real Det Return     | 1.1e+04   |
| Real Sto Return     | 1.1e+04   |
| Reward Loss         | -8.32e+06 |
| Running Env Steps   | 1925000   |
| Running Forward KL  | 15.2      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 385       |
-----------------------------------
--2024-08-11 21:53:31.504192 UTC---
| Itration            | 386       |
| PAGAR Loss          | -3.45e+07 |
| Real Det Return     | 1.2e+04   |
| Real Sto Return     | 1.12e+04  |
| Reward Loss         | -6.39e+06 |
| Running Env Steps   | 1930000   |
| Running Forward KL  | 14.9      |
| Running Reverse KL  | 9.81      |
| Running Update Time | 386       |
-----------------------------------
--2024-08-11 21:55:13.120530 UTC---
| Itration            | 387       |
| PAGAR Loss          | -1.78e+07 |
| Real Det Return     | 1.2e+04   |
| Real Sto Return     | 1.13e+04  |
| Reward Loss         | -7.21e+06 |
| Running Env Steps   | 1935000   |
| Running Forward KL  | 15.4      |
| Running Reverse KL  | 10.4      |
| Running Update Time | 387       |
-----------------------------------
--2024-08-11 21:56:53.771047 UTC---
| Itration            | 388       |
| PAGAR Loss          | 2.44e+07  |
| Real Det Return     | 1.18e+04  |
| Real Sto Return     | 1.14e+04  |
| Reward Loss         | -6.53e+06 |
| Running Env Steps   | 1940000   |
| Running Forward KL  | 15.8      |
| Running Reverse KL  | 10.3      |
| Running Update Time | 388       |
-----------------------------------
--2024-08-11 21:58:35.882769 UTC---
| Itration            | 389       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.14e+04  |
| Real Sto Return     | 1.15e+04  |
| Reward Loss         | -8.49e+06 |
| Running Env Steps   | 1945000   |
| Running Forward KL  | 15.7      |
| Running Reverse KL  | 9.78      |
| Running Update Time | 389       |
-----------------------------------
--2024-08-11 22:00:17.331101 UTC---
| Itration            | 390       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.16e+04  |
| Real Sto Return     | 1.13e+04  |
| Reward Loss         | -7.37e+06 |
| Running Env Steps   | 1950000   |
| Running Forward KL  | 15.3      |
| Running Reverse KL  | 9.43      |
| Running Update Time | 390       |
-----------------------------------
--2024-08-11 22:01:59.167881 UTC---
| Itration            | 391       |
| PAGAR Loss          | -2.22e+06 |
| Real Det Return     | 1.2e+04   |
| Real Sto Return     | 1.14e+04  |
| Reward Loss         | -6.05e+06 |
| Running Env Steps   | 1955000   |
| Running Forward KL  | 15.2      |
| Running Reverse KL  | 9.78      |
| Running Update Time | 391       |
-----------------------------------
--2024-08-11 22:03:39.667331 UTC---
| Itration            | 392       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.13e+04  |
| Real Sto Return     | 1.06e+04  |
| Reward Loss         | -1.04e+07 |
| Running Env Steps   | 1960000   |
| Running Forward KL  | 15.4      |
| Running Reverse KL  | 9.73      |
| Running Update Time | 392       |
-----------------------------------
--2024-08-11 22:05:20.578570 UTC---
| Itration            | 393       |
| PAGAR Loss          | -4.8e+07  |
| Real Det Return     | 1.19e+04  |
| Real Sto Return     | 1.14e+04  |
| Reward Loss         | -7.23e+06 |
| Running Env Steps   | 1965000   |
| Running Forward KL  | 15        |
| Running Reverse KL  | 9.61      |
| Running Update Time | 393       |
-----------------------------------
--2024-08-11 22:07:02.077105 UTC---
| Itration            | 394       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.17e+04  |
| Real Sto Return     | 1.13e+04  |
| Reward Loss         | -7.33e+06 |
| Running Env Steps   | 1970000   |
| Running Forward KL  | 15.5      |
| Running Reverse KL  | 9.85      |
| Running Update Time | 394       |
-----------------------------------
--2024-08-11 22:08:42.986884 UTC--
| Itration            | 395      |
| PAGAR Loss          | nan      |
| Real Det Return     | 1.21e+04 |
| Real Sto Return     | 1.13e+04 |
| Reward Loss         | -7.3e+06 |
| Running Env Steps   | 1975000  |
| Running Forward KL  | 15.3     |
| Running Reverse KL  | 9.6      |
| Running Update Time | 395      |
----------------------------------
--2024-08-11 22:10:25.852513 UTC---
| Itration            | 396       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.16e+04  |
| Real Sto Return     | 1.05e+04  |
| Reward Loss         | -1.25e+07 |
| Running Env Steps   | 1980000   |
| Running Forward KL  | 15.5      |
| Running Reverse KL  | 8.56      |
| Running Update Time | 396       |
-----------------------------------
--2024-08-11 22:12:09.112912 UTC---
| Itration            | 397       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.22e+04  |
| Real Sto Return     | 1.14e+04  |
| Reward Loss         | -6.91e+06 |
| Running Env Steps   | 1985000   |
| Running Forward KL  | 15        |
| Running Reverse KL  | 9.8       |
| Running Update Time | 397       |
-----------------------------------
--2024-08-11 22:14:01.846268 UTC---
| Itration            | 398       |
| PAGAR Loss          | 3.88e+07  |
| Real Det Return     | 1.17e+04  |
| Real Sto Return     | 1.13e+04  |
| Reward Loss         | -6.41e+06 |
| Running Env Steps   | 1990000   |
| Running Forward KL  | 15.4      |
| Running Reverse KL  | 10.2      |
| Running Update Time | 398       |
-----------------------------------
--2024-08-11 22:16:01.963050 UTC---
| Itration            | 399       |
| PAGAR Loss          | 9.08e+06  |
| Real Det Return     | 1.13e+04  |
| Real Sto Return     | 1.09e+04  |
| Reward Loss         | -8.35e+06 |
| Running Env Steps   | 1995000   |
| Running Forward KL  | 15.7      |
| Running Reverse KL  | 10.3      |
| Running Update Time | 399       |
-----------------------------------
--2024-08-11 22:17:59.390232 UTC---
| Itration            | 400       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.21e+04  |
| Real Sto Return     | 1.15e+04  |
| Reward Loss         | -6.95e+06 |
| Running Env Steps   | 2000000   |
| Running Forward KL  | 15.2      |
| Running Reverse KL  | 9.88      |
| Running Update Time | 400       |
-----------------------------------
--2024-08-11 22:20:00.380270 UTC---
| Itration            | 401       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.16e+04  |
| Real Sto Return     | 1.11e+04  |
| Reward Loss         | -7.97e+06 |
| Running Env Steps   | 2005000   |
| Running Forward KL  | 15.3      |
| Running Reverse KL  | 10.1      |
| Running Update Time | 401       |
-----------------------------------
--2024-08-11 22:22:00.459157 UTC---
| Itration            | 402       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.06e+04  |
| Real Sto Return     | 1.03e+04  |
| Reward Loss         | -1.74e+07 |
| Running Env Steps   | 2010000   |
| Running Forward KL  | 15.4      |
| Running Reverse KL  | 7.72      |
| Running Update Time | 402       |
-----------------------------------
--2024-08-11 22:23:56.111306 UTC---
| Itration            | 403       |
| PAGAR Loss          | -4.16e+06 |
| Real Det Return     | 1.16e+04  |
| Real Sto Return     | 1.14e+04  |
| Reward Loss         | -8.11e+06 |
| Running Env Steps   | 2015000   |
| Running Forward KL  | 15.3      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 403       |
-----------------------------------
--2024-08-11 22:25:51.101860 UTC---
| Itration            | 404       |
| PAGAR Loss          | -7.93e+07 |
| Real Det Return     | 1.12e+04  |
| Real Sto Return     | 9.66e+03  |
| Reward Loss         | -9.03e+06 |
| Running Env Steps   | 2020000   |
| Running Forward KL  | 15.6      |
| Running Reverse KL  | 9.88      |
| Running Update Time | 404       |
-----------------------------------
--2024-08-11 22:27:45.802603 UTC---
| Itration            | 405       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.16e+04  |
| Real Sto Return     | 1.1e+04   |
| Reward Loss         | -1.04e+07 |
| Running Env Steps   | 2025000   |
| Running Forward KL  | 15.5      |
| Running Reverse KL  | 8.99      |
| Running Update Time | 405       |
-----------------------------------
--2024-08-11 22:29:32.873258 UTC---
| Itration            | 406       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.2e+04   |
| Real Sto Return     | 1.14e+04  |
| Reward Loss         | -6.68e+06 |
| Running Env Steps   | 2030000   |
| Running Forward KL  | 14.9      |
| Running Reverse KL  | 9.88      |
| Running Update Time | 406       |
-----------------------------------
--2024-08-11 22:31:19.405231 UTC--
| Itration            | 407      |
| PAGAR Loss          | nan      |
| Real Det Return     | 1.14e+04 |
| Real Sto Return     | 1.1e+04  |
| Reward Loss         | -8.8e+06 |
| Running Env Steps   | 2035000  |
| Running Forward KL  | 15.3     |
| Running Reverse KL  | 9.39     |
| Running Update Time | 407      |
----------------------------------
--2024-08-11 22:33:04.922390 UTC---
| Itration            | 408       |
| PAGAR Loss          | 3.08e+07  |
| Real Det Return     | 1.2e+04   |
| Real Sto Return     | 1.16e+04  |
| Reward Loss         | -6.08e+06 |
| Running Env Steps   | 2040000   |
| Running Forward KL  | 14.9      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 408       |
-----------------------------------
--2024-08-11 22:35:06.499191 UTC---
| Itration            | 409       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.2e+04   |
| Real Sto Return     | 9.66e+03  |
| Reward Loss         | -1.17e+07 |
| Running Env Steps   | 2045000   |
| Running Forward KL  | 15        |
| Running Reverse KL  | 8.11      |
| Running Update Time | 409       |
-----------------------------------
--2024-08-11 22:36:54.530240 UTC---
| Itration            | 410       |
| PAGAR Loss          | 5.71e+06  |
| Real Det Return     | 1.23e+04  |
| Real Sto Return     | 1.16e+04  |
| Reward Loss         | -5.84e+06 |
| Running Env Steps   | 2050000   |
| Running Forward KL  | 14.7      |
| Running Reverse KL  | 9.31      |
| Running Update Time | 410       |
-----------------------------------
--2024-08-11 22:38:39.235506 UTC---
| Itration            | 411       |
| PAGAR Loss          | 2.53e+07  |
| Real Det Return     | 1.2e+04   |
| Real Sto Return     | 1.15e+04  |
| Reward Loss         | -5.76e+06 |
| Running Env Steps   | 2055000   |
| Running Forward KL  | 14.7      |
| Running Reverse KL  | 9.53      |
| Running Update Time | 411       |
-----------------------------------
--2024-08-11 22:40:25.119596 UTC---
| Itration            | 412       |
| PAGAR Loss          | 9.94e+05  |
| Real Det Return     | 1.18e+04  |
| Real Sto Return     | 1.13e+04  |
| Reward Loss         | -5.84e+06 |
| Running Env Steps   | 2060000   |
| Running Forward KL  | 15.4      |
| Running Reverse KL  | 9.84      |
| Running Update Time | 412       |
-----------------------------------
--2024-08-11 22:42:08.578619 UTC---
| Itration            | 413       |
| PAGAR Loss          | 2.93e+07  |
| Real Det Return     | 1.2e+04   |
| Real Sto Return     | 1.15e+04  |
| Reward Loss         | -5.07e+06 |
| Running Env Steps   | 2065000   |
| Running Forward KL  | 15.2      |
| Running Reverse KL  | 9.79      |
| Running Update Time | 413       |
-----------------------------------
--2024-08-11 22:43:55.149189 UTC---
| Itration            | 414       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.21e+04  |
| Real Sto Return     | 1.1e+04   |
| Reward Loss         | -2.43e+06 |
| Running Env Steps   | 2070000   |
| Running Forward KL  | 15.1      |
| Running Reverse KL  | 9.58      |
| Running Update Time | 414       |
-----------------------------------
--2024-08-11 22:45:39.861296 UTC---
| Itration            | 415       |
| PAGAR Loss          | 3.99e+07  |
| Real Det Return     | 1.2e+04   |
| Real Sto Return     | 1.16e+04  |
| Reward Loss         | -5.36e+06 |
| Running Env Steps   | 2075000   |
| Running Forward KL  | 15.2      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 415       |
-----------------------------------
--2024-08-11 22:47:26.939357 UTC---
| Itration            | 416       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.22e+04  |
| Real Sto Return     | 1.11e+04  |
| Reward Loss         | -5.09e+06 |
| Running Env Steps   | 2080000   |
| Running Forward KL  | 14.7      |
| Running Reverse KL  | 9.81      |
| Running Update Time | 416       |
-----------------------------------
--2024-08-11 22:49:11.640272 UTC---
| Itration            | 417       |
| PAGAR Loss          | -2.07e+07 |
| Real Det Return     | 1.2e+04   |
| Real Sto Return     | 1.15e+04  |
| Reward Loss         | -5.85e+06 |
| Running Env Steps   | 2085000   |
| Running Forward KL  | 14.5      |
| Running Reverse KL  | 9.29      |
| Running Update Time | 417       |
-----------------------------------
--2024-08-11 22:50:56.148327 UTC---
| Itration            | 418       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.21e+04  |
| Real Sto Return     | 9.15e+03  |
| Reward Loss         | -1.08e+07 |
| Running Env Steps   | 2090000   |
| Running Forward KL  | 14.6      |
| Running Reverse KL  | 8.4       |
| Running Update Time | 418       |
-----------------------------------
--2024-08-11 22:52:42.097068 UTC---
| Itration            | 419       |
| PAGAR Loss          | 6.46e+07  |
| Real Det Return     | 1.12e+04  |
| Real Sto Return     | 1.04e+04  |
| Reward Loss         | -8.39e+06 |
| Running Env Steps   | 2095000   |
| Running Forward KL  | 15.3      |
| Running Reverse KL  | 10        |
| Running Update Time | 419       |
-----------------------------------
--2024-08-11 22:54:27.503490 UTC---
| Itration            | 420       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.14e+04  |
| Real Sto Return     | 1.1e+04   |
| Reward Loss         | -6.85e+06 |
| Running Env Steps   | 2100000   |
| Running Forward KL  | 15.1      |
| Running Reverse KL  | 9.85      |
| Running Update Time | 420       |
-----------------------------------
--2024-08-11 22:56:11.460157 UTC---
| Itration            | 421       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.11e+04  |
| Real Sto Return     | 1e+04     |
| Reward Loss         | -1.29e+07 |
| Running Env Steps   | 2105000   |
| Running Forward KL  | 15.1      |
| Running Reverse KL  | 8.74      |
| Running Update Time | 421       |
-----------------------------------
--2024-08-11 22:57:55.527537 UTC---
| Itration            | 422       |
| PAGAR Loss          | 3e+07     |
| Real Det Return     | 1.13e+04  |
| Real Sto Return     | 1.1e+04   |
| Reward Loss         | -7.38e+06 |
| Running Env Steps   | 2110000   |
| Running Forward KL  | 15.4      |
| Running Reverse KL  | 10.9      |
| Running Update Time | 422       |
-----------------------------------
--2024-08-11 22:59:54.648159 UTC---
| Itration            | 423       |
| PAGAR Loss          | 1.3e+05   |
| Real Det Return     | 1.2e+04   |
| Real Sto Return     | 1.15e+04  |
| Reward Loss         | -5.68e+06 |
| Running Env Steps   | 2115000   |
| Running Forward KL  | 15        |
| Running Reverse KL  | 9.89      |
| Running Update Time | 423       |
-----------------------------------
--2024-08-11 23:01:39.289291 UTC---
| Itration            | 424       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.15e+04  |
| Real Sto Return     | 1.09e+04  |
| Reward Loss         | -7.43e+06 |
| Running Env Steps   | 2120000   |
| Running Forward KL  | 15.2      |
| Running Reverse KL  | 9.99      |
| Running Update Time | 424       |
-----------------------------------
--2024-08-11 23:03:23.817553 UTC---
| Itration            | 425       |
| PAGAR Loss          | -2.85e+07 |
| Real Det Return     | 1.14e+04  |
| Real Sto Return     | 1.09e+04  |
| Reward Loss         | -5.94e+06 |
| Running Env Steps   | 2125000   |
| Running Forward KL  | 14.8      |
| Running Reverse KL  | 8.94      |
| Running Update Time | 425       |
-----------------------------------
--2024-08-11 23:05:07.141226 UTC---
| Itration            | 426       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.22e+04  |
| Real Sto Return     | 1.16e+04  |
| Reward Loss         | -5.27e+06 |
| Running Env Steps   | 2130000   |
| Running Forward KL  | 14.5      |
| Running Reverse KL  | 9.31      |
| Running Update Time | 426       |
-----------------------------------
--2024-08-11 23:06:51.671283 UTC---
| Itration            | 427       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.18e+04  |
| Real Sto Return     | 1.14e+04  |
| Reward Loss         | -6.05e+06 |
| Running Env Steps   | 2135000   |
| Running Forward KL  | 14.8      |
| Running Reverse KL  | 9.97      |
| Running Update Time | 427       |
-----------------------------------
--2024-08-11 23:08:40.723440 UTC---
| Itration            | 428       |
| PAGAR Loss          | 4.62e+07  |
| Real Det Return     | 1.22e+04  |
| Real Sto Return     | 1.1e+04   |
| Reward Loss         | -6.91e+06 |
| Running Env Steps   | 2140000   |
| Running Forward KL  | 14.5      |
| Running Reverse KL  | 9.22      |
| Running Update Time | 428       |
-----------------------------------
--2024-08-11 23:10:30.126202 UTC---
| Itration            | 429       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.06e+04  |
| Real Sto Return     | 1.09e+04  |
| Reward Loss         | -8.26e+06 |
| Running Env Steps   | 2145000   |
| Running Forward KL  | 14.9      |
| Running Reverse KL  | 9.52      |
| Running Update Time | 429       |
-----------------------------------
--2024-08-11 23:12:15.310763 UTC--
| Itration            | 430      |
| PAGAR Loss          | nan      |
| Real Det Return     | 1.2e+04  |
| Real Sto Return     | 1.14e+04 |
| Reward Loss         | -6.1e+06 |
| Running Env Steps   | 2150000  |
| Running Forward KL  | 14.6     |
| Running Reverse KL  | 9.72     |
| Running Update Time | 430      |
----------------------------------
--2024-08-11 23:13:59.092758 UTC---
| Itration            | 431       |
| PAGAR Loss          | -1.66e+07 |
| Real Det Return     | 1.19e+04  |
| Real Sto Return     | 1.11e+04  |
| Reward Loss         | -6.58e+06 |
| Running Env Steps   | 2155000   |
| Running Forward KL  | 14.7      |
| Running Reverse KL  | 9.37      |
| Running Update Time | 431       |
-----------------------------------
--2024-08-11 23:15:42.662651 UTC---
| Itration            | 432       |
| PAGAR Loss          | -1.29e+08 |
| Real Det Return     | 1.21e+04  |
| Real Sto Return     | 1.16e+04  |
| Reward Loss         | -5.38e+06 |
| Running Env Steps   | 2160000   |
| Running Forward KL  | 14.6      |
| Running Reverse KL  | 9.23      |
| Running Update Time | 432       |
-----------------------------------
--2024-08-11 23:17:23.985090 UTC---
| Itration            | 433       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.23e+04  |
| Real Sto Return     | 1.11e+04  |
| Reward Loss         | -5.07e+06 |
| Running Env Steps   | 2165000   |
| Running Forward KL  | 14.2      |
| Running Reverse KL  | 9.16      |
| Running Update Time | 433       |
-----------------------------------
--2024-08-11 23:19:04.998042 UTC---
| Itration            | 434       |
| PAGAR Loss          | -1.28e+07 |
| Real Det Return     | 1.14e+04  |
| Real Sto Return     | 1.09e+04  |
| Reward Loss         | -8.1e+06  |
| Running Env Steps   | 2170000   |
| Running Forward KL  | 15.2      |
| Running Reverse KL  | 9.7       |
| Running Update Time | 434       |
-----------------------------------
--2024-08-11 23:20:46.768857 UTC--
| Itration            | 435      |
| PAGAR Loss          | nan      |
| Real Det Return     | 1.11e+04 |
| Real Sto Return     | 8.92e+03 |
| Reward Loss         | -2.3e+07 |
| Running Env Steps   | 2175000  |
| Running Forward KL  | 15.2     |
| Running Reverse KL  | 7.57     |
| Running Update Time | 435      |
----------------------------------
--2024-08-11 23:22:27.717500 UTC---
| Itration            | 436       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.18e+04  |
| Real Sto Return     | 1.13e+04  |
| Reward Loss         | -6.18e+06 |
| Running Env Steps   | 2180000   |
| Running Forward KL  | 14.5      |
| Running Reverse KL  | 9.6       |
| Running Update Time | 436       |
-----------------------------------
--2024-08-11 23:24:11.002274 UTC---
| Itration            | 437       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.23e+04  |
| Real Sto Return     | 1.16e+04  |
| Reward Loss         | -6.14e+06 |
| Running Env Steps   | 2185000   |
| Running Forward KL  | 14.4      |
| Running Reverse KL  | 10.1      |
| Running Update Time | 437       |
-----------------------------------
--2024-08-11 23:25:54.822197 UTC---
| Itration            | 438       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.21e+04  |
| Real Sto Return     | 1.16e+04  |
| Reward Loss         | -5.73e+06 |
| Running Env Steps   | 2190000   |
| Running Forward KL  | 14.1      |
| Running Reverse KL  | 9.61      |
| Running Update Time | 438       |
-----------------------------------
--2024-08-11 23:27:38.777613 UTC---
| Itration            | 439       |
| PAGAR Loss          | 4.2e+07   |
| Real Det Return     | 1.15e+04  |
| Real Sto Return     | 1.11e+04  |
| Reward Loss         | -6.33e+06 |
| Running Env Steps   | 2195000   |
| Running Forward KL  | 14.9      |
| Running Reverse KL  | 10.1      |
| Running Update Time | 439       |
-----------------------------------
--2024-08-11 23:29:22.637904 UTC---
| Itration            | 440       |
| PAGAR Loss          | -7.08e+06 |
| Real Det Return     | 1.03e+04  |
| Real Sto Return     | 1.02e+04  |
| Reward Loss         | -6.12e+06 |
| Running Env Steps   | 2200000   |
| Running Forward KL  | 14.2      |
| Running Reverse KL  | 9.34      |
| Running Update Time | 440       |
-----------------------------------
--2024-08-11 23:31:04.337851 UTC--
| Itration            | 441      |
| PAGAR Loss          | nan      |
| Real Det Return     | 1.21e+04 |
| Real Sto Return     | 1.14e+04 |
| Reward Loss         | -8e+06   |
| Running Env Steps   | 2205000  |
| Running Forward KL  | 14.6     |
| Running Reverse KL  | 9.13     |
| Running Update Time | 441      |
----------------------------------
--2024-08-11 23:32:47.300778 UTC---
| Itration            | 442       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.18e+04  |
| Real Sto Return     | 1.12e+04  |
| Reward Loss         | -6.26e+06 |
| Running Env Steps   | 2210000   |
| Running Forward KL  | 14.6      |
| Running Reverse KL  | 9.14      |
| Running Update Time | 442       |
-----------------------------------
--2024-08-11 23:34:29.424544 UTC---
| Itration            | 443       |
| PAGAR Loss          | -2.72e+07 |
| Real Det Return     | 1.19e+04  |
| Real Sto Return     | 1.11e+04  |
| Reward Loss         | -6.38e+06 |
| Running Env Steps   | 2215000   |
| Running Forward KL  | 14.7      |
| Running Reverse KL  | 9.5       |
| Running Update Time | 443       |
-----------------------------------
--2024-08-11 23:36:13.017047 UTC---
| Itration            | 444       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.21e+04  |
| Real Sto Return     | 1.14e+04  |
| Reward Loss         | -8.55e+06 |
| Running Env Steps   | 2220000   |
| Running Forward KL  | 14.5      |
| Running Reverse KL  | 8.62      |
| Running Update Time | 444       |
-----------------------------------
--2024-08-11 23:37:55.561300 UTC---
| Itration            | 445       |
| PAGAR Loss          | 2.33e+08  |
| Real Det Return     | 1.16e+04  |
| Real Sto Return     | 1.11e+04  |
| Reward Loss         | -7.74e+06 |
| Running Env Steps   | 2225000   |
| Running Forward KL  | 14.8      |
| Running Reverse KL  | 10.3      |
| Running Update Time | 445       |
-----------------------------------
--2024-08-11 23:39:37.168606 UTC---
| Itration            | 446       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.16e+04  |
| Real Sto Return     | 1.06e+04  |
| Reward Loss         | -1.24e+07 |
| Running Env Steps   | 2230000   |
| Running Forward KL  | 15        |
| Running Reverse KL  | 9.22      |
| Running Update Time | 446       |
-----------------------------------
--2024-08-11 23:41:20.938353 UTC---
| Itration            | 447       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.17e+04  |
| Real Sto Return     | 1.1e+04   |
| Reward Loss         | -9.02e+06 |
| Running Env Steps   | 2235000   |
| Running Forward KL  | 15        |
| Running Reverse KL  | 9.51      |
| Running Update Time | 447       |
-----------------------------------
--2024-08-11 23:43:03.313595 UTC--
| Itration            | 448      |
| PAGAR Loss          | nan      |
| Real Det Return     | 1.15e+04 |
| Real Sto Return     | 1.08e+04 |
| Reward Loss         | -7.8e+06 |
| Running Env Steps   | 2240000  |
| Running Forward KL  | 15.2     |
| Running Reverse KL  | 10.5     |
| Running Update Time | 448      |
----------------------------------
--2024-08-11 23:44:48.000156 UTC---
| Itration            | 449       |
| PAGAR Loss          | 2.34e+07  |
| Real Det Return     | 1.11e+04  |
| Real Sto Return     | 1.05e+04  |
| Reward Loss         | -8.91e+06 |
| Running Env Steps   | 2245000   |
| Running Forward KL  | 15.2      |
| Running Reverse KL  | 9.61      |
| Running Update Time | 449       |
-----------------------------------
--2024-08-11 23:46:29.629839 UTC--
| Itration            | 450      |
| PAGAR Loss          | nan      |
| Real Det Return     | 1.18e+04 |
| Real Sto Return     | 1.15e+04 |
| Reward Loss         | -6e+06   |
| Running Env Steps   | 2250000  |
| Running Forward KL  | 14.5     |
| Running Reverse KL  | 10.5     |
| Running Update Time | 450      |
----------------------------------
--2024-08-11 23:48:13.183923 UTC---
| Itration            | 451       |
| PAGAR Loss          | 6.17e+07  |
| Real Det Return     | 1.19e+04  |
| Real Sto Return     | 1.15e+04  |
| Reward Loss         | -6.69e+06 |
| Running Env Steps   | 2255000   |
| Running Forward KL  | 14.4      |
| Running Reverse KL  | 9.76      |
| Running Update Time | 451       |
-----------------------------------
--2024-08-11 23:49:54.298347 UTC--
| Itration            | 452      |
| PAGAR Loss          | nan      |
| Real Det Return     | 1.17e+04 |
| Real Sto Return     | 1.06e+04 |
| Reward Loss         | -9.3e+06 |
| Running Env Steps   | 2260000  |
| Running Forward KL  | 14.4     |
| Running Reverse KL  | 8.67     |
| Running Update Time | 452      |
----------------------------------
--2024-08-11 23:51:36.723460 UTC---
| Itration            | 453       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.19e+04  |
| Real Sto Return     | 1.08e+04  |
| Reward Loss         | -5.91e+06 |
| Running Env Steps   | 2265000   |
| Running Forward KL  | 15.1      |
| Running Reverse KL  | 9.71      |
| Running Update Time | 453       |
-----------------------------------
--2024-08-11 23:53:18.444715 UTC---
| Itration            | 454       |
| PAGAR Loss          | 5.66e+07  |
| Real Det Return     | 1.16e+04  |
| Real Sto Return     | 1.13e+04  |
| Reward Loss         | -6.06e+06 |
| Running Env Steps   | 2270000   |
| Running Forward KL  | 15.2      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 454       |
-----------------------------------
--2024-08-11 23:55:00.010487 UTC--
| Itration            | 455      |
| PAGAR Loss          | nan      |
| Real Det Return     | 1.21e+04 |
| Real Sto Return     | 9.97e+03 |
| Reward Loss         | -7.9e+06 |
| Running Env Steps   | 2275000  |
| Running Forward KL  | 14.2     |
| Running Reverse KL  | 9.52     |
| Running Update Time | 455      |
----------------------------------
--2024-08-11 23:56:44.138514 UTC---
| Itration            | 456       |
| PAGAR Loss          | -5.53e+07 |
| Real Det Return     | 1.23e+04  |
| Real Sto Return     | 1.17e+04  |
| Reward Loss         | -6.44e+06 |
| Running Env Steps   | 2280000   |
| Running Forward KL  | 14.2      |
| Running Reverse KL  | 9.23      |
| Running Update Time | 456       |
-----------------------------------
--2024-08-11 23:58:27.497456 UTC---
| Itration            | 457       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.16e+04  |
| Real Sto Return     | 1.15e+04  |
| Reward Loss         | -5.73e+06 |
| Running Env Steps   | 2285000   |
| Running Forward KL  | 14.7      |
| Running Reverse KL  | 10.2      |
| Running Update Time | 457       |
-----------------------------------
--2024-08-12 00:00:10.395024 UTC---
| Itration            | 458       |
| PAGAR Loss          | -2.13e+07 |
| Real Det Return     | 1.2e+04   |
| Real Sto Return     | 1.09e+04  |
| Reward Loss         | -5.58e+06 |
| Running Env Steps   | 2290000   |
| Running Forward KL  | 14.1      |
| Running Reverse KL  | 9.81      |
| Running Update Time | 458       |
-----------------------------------
--2024-08-12 00:01:50.536958 UTC---
| Itration            | 459       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.19e+04  |
| Real Sto Return     | 1.1e+04   |
| Reward Loss         | -6.43e+06 |
| Running Env Steps   | 2295000   |
| Running Forward KL  | 14.6      |
| Running Reverse KL  | 9.96      |
| Running Update Time | 459       |
-----------------------------------
--2024-08-12 00:03:32.410935 UTC---
| Itration            | 460       |
| PAGAR Loss          | -2.95e+08 |
| Real Det Return     | 1.22e+04  |
| Real Sto Return     | 1.11e+04  |
| Reward Loss         | -1.08e+07 |
| Running Env Steps   | 2300000   |
| Running Forward KL  | 14.6      |
| Running Reverse KL  | 8.89      |
| Running Update Time | 460       |
-----------------------------------
--2024-08-12 00:05:14.826415 UTC---
| Itration            | 461       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.22e+04  |
| Real Sto Return     | 1.16e+04  |
| Reward Loss         | -5.49e+06 |
| Running Env Steps   | 2305000   |
| Running Forward KL  | 14.5      |
| Running Reverse KL  | 9.76      |
| Running Update Time | 461       |
-----------------------------------
--2024-08-12 00:06:55.400902 UTC---
| Itration            | 462       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.21e+04  |
| Real Sto Return     | 1.16e+04  |
| Reward Loss         | -7.79e+06 |
| Running Env Steps   | 2310000   |
| Running Forward KL  | 14.5      |
| Running Reverse KL  | 8.72      |
| Running Update Time | 462       |
-----------------------------------
--2024-08-12 00:08:36.235234 UTC---
| Itration            | 463       |
| PAGAR Loss          | -9.58e+06 |
| Real Det Return     | 1.21e+04  |
| Real Sto Return     | 1.18e+04  |
| Reward Loss         | -7.4e+06  |
| Running Env Steps   | 2315000   |
| Running Forward KL  | 14.4      |
| Running Reverse KL  | 9.32      |
| Running Update Time | 463       |
-----------------------------------
--2024-08-12 00:10:17.187443 UTC---
| Itration            | 464       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.2e+04   |
| Real Sto Return     | 1.13e+04  |
| Reward Loss         | -5.98e+06 |
| Running Env Steps   | 2320000   |
| Running Forward KL  | 14.3      |
| Running Reverse KL  | 9.84      |
| Running Update Time | 464       |
-----------------------------------
--2024-08-12 00:11:58.868546 UTC---
| Itration            | 465       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.19e+04  |
| Real Sto Return     | 1.13e+04  |
| Reward Loss         | -6.86e+06 |
| Running Env Steps   | 2325000   |
| Running Forward KL  | 14.8      |
| Running Reverse KL  | 8.88      |
| Running Update Time | 465       |
-----------------------------------
--2024-08-12 00:13:39.878477 UTC---
| Itration            | 466       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.15e+04  |
| Real Sto Return     | 1.08e+04  |
| Reward Loss         | -7.37e+06 |
| Running Env Steps   | 2330000   |
| Running Forward KL  | 14.6      |
| Running Reverse KL  | 9.97      |
| Running Update Time | 466       |
-----------------------------------
--2024-08-12 00:15:19.791023 UTC---
| Itration            | 467       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.21e+04  |
| Real Sto Return     | 1.11e+04  |
| Reward Loss         | -6.92e+06 |
| Running Env Steps   | 2335000   |
| Running Forward KL  | 14.5      |
| Running Reverse KL  | 9.99      |
| Running Update Time | 467       |
-----------------------------------
--2024-08-12 00:17:00.986967 UTC---
| Itration            | 468       |
| PAGAR Loss          | 4.17e+06  |
| Real Det Return     | 1.16e+04  |
| Real Sto Return     | 1.1e+04   |
| Reward Loss         | -9.73e+06 |
| Running Env Steps   | 2340000   |
| Running Forward KL  | 14.8      |
| Running Reverse KL  | 9.37      |
| Running Update Time | 468       |
-----------------------------------
--2024-08-12 00:18:41.909947 UTC---
| Itration            | 469       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.16e+04  |
| Real Sto Return     | 1.07e+04  |
| Reward Loss         | -6.96e+06 |
| Running Env Steps   | 2345000   |
| Running Forward KL  | 13.8      |
| Running Reverse KL  | 8.59      |
| Running Update Time | 469       |
-----------------------------------
--2024-08-12 00:20:24.714229 UTC---
| Itration            | 470       |
| PAGAR Loss          | -2.63e+07 |
| Real Det Return     | 1.13e+04  |
| Real Sto Return     | 1.07e+04  |
| Reward Loss         | -9.41e+06 |
| Running Env Steps   | 2350000   |
| Running Forward KL  | 15.3      |
| Running Reverse KL  | 9.94      |
| Running Update Time | 470       |
-----------------------------------
--2024-08-12 00:22:06.248456 UTC---
| Itration            | 471       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.25e+04  |
| Real Sto Return     | 1.19e+04  |
| Reward Loss         | -5.85e+06 |
| Running Env Steps   | 2355000   |
| Running Forward KL  | 13.9      |
| Running Reverse KL  | 9.15      |
| Running Update Time | 471       |
-----------------------------------
--2024-08-12 00:23:46.556687 UTC---
| Itration            | 472       |
| PAGAR Loss          | nan       |
| Real Det Return     | 4.97e+03  |
| Real Sto Return     | 5.09e+03  |
| Reward Loss         | -2.34e+07 |
| Running Env Steps   | 2360000   |
| Running Forward KL  | 15.4      |
| Running Reverse KL  | 7.33      |
| Running Update Time | 472       |
-----------------------------------
--2024-08-12 00:25:28.045256 UTC---
| Itration            | 473       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.17e+04  |
| Real Sto Return     | 1.13e+04  |
| Reward Loss         | -6.47e+06 |
| Running Env Steps   | 2365000   |
| Running Forward KL  | 13.9      |
| Running Reverse KL  | 9.92      |
| Running Update Time | 473       |
-----------------------------------
--2024-08-12 00:27:10.084334 UTC---
| Itration            | 474       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.2e+04   |
| Real Sto Return     | 9.94e+03  |
| Reward Loss         | -1.33e+07 |
| Running Env Steps   | 2370000   |
| Running Forward KL  | 14.3      |
| Running Reverse KL  | 8.3       |
| Running Update Time | 474       |
-----------------------------------
--2024-08-12 00:28:51.500849 UTC---
| Itration            | 475       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.25e+04  |
| Real Sto Return     | 1.13e+04  |
| Reward Loss         | -6.91e+06 |
| Running Env Steps   | 2375000   |
| Running Forward KL  | 14        |
| Running Reverse KL  | 9.09      |
| Running Update Time | 475       |
-----------------------------------
--2024-08-12 00:30:31.521465 UTC---
| Itration            | 476       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.19e+04  |
| Real Sto Return     | 1.08e+04  |
| Reward Loss         | -7.78e+06 |
| Running Env Steps   | 2380000   |
| Running Forward KL  | 14.3      |
| Running Reverse KL  | 9.6       |
| Running Update Time | 476       |
-----------------------------------
--2024-08-12 00:32:13.450965 UTC---
| Itration            | 477       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.18e+04  |
| Real Sto Return     | 1.14e+04  |
| Reward Loss         | -6.13e+06 |
| Running Env Steps   | 2385000   |
| Running Forward KL  | 14.5      |
| Running Reverse KL  | 9.66      |
| Running Update Time | 477       |
-----------------------------------
--2024-08-12 00:33:56.600016 UTC---
| Itration            | 478       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.22e+04  |
| Real Sto Return     | 1.18e+04  |
| Reward Loss         | -4.08e+06 |
| Running Env Steps   | 2390000   |
| Running Forward KL  | 14        |
| Running Reverse KL  | 10        |
| Running Update Time | 478       |
-----------------------------------
--2024-08-12 00:35:39.604001 UTC---
| Itration            | 479       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.09e+04  |
| Real Sto Return     | 9.6e+03   |
| Reward Loss         | -9.43e+06 |
| Running Env Steps   | 2395000   |
| Running Forward KL  | 14.6      |
| Running Reverse KL  | 9.5       |
| Running Update Time | 479       |
-----------------------------------
--2024-08-12 00:37:21.109530 UTC---
| Itration            | 480       |
| PAGAR Loss          | 3.73e+09  |
| Real Det Return     | 1.2e+04   |
| Real Sto Return     | 1.07e+04  |
| Reward Loss         | -4.93e+06 |
| Running Env Steps   | 2400000   |
| Running Forward KL  | 14        |
| Running Reverse KL  | 10        |
| Running Update Time | 480       |
-----------------------------------
--2024-08-12 00:39:06.435386 UTC---
| Itration            | 481       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.24e+04  |
| Real Sto Return     | 1.18e+04  |
| Reward Loss         | -6.67e+06 |
| Running Env Steps   | 2405000   |
| Running Forward KL  | 13.6      |
| Running Reverse KL  | 9.2       |
| Running Update Time | 481       |
-----------------------------------
--2024-08-12 00:40:49.560460 UTC---
| Itration            | 482       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.23e+04  |
| Real Sto Return     | 1.08e+04  |
| Reward Loss         | -5.83e+06 |
| Running Env Steps   | 2410000   |
| Running Forward KL  | 13.8      |
| Running Reverse KL  | 10.1      |
| Running Update Time | 482       |
-----------------------------------
--2024-08-12 00:42:31.958283 UTC--
| Itration            | 483      |
| PAGAR Loss          | nan      |
| Real Det Return     | 1.17e+04 |
| Real Sto Return     | 1.07e+04 |
| Reward Loss         | -6.6e+06 |
| Running Env Steps   | 2415000  |
| Running Forward KL  | 13.7     |
| Running Reverse KL  | 8.94     |
| Running Update Time | 483      |
----------------------------------
--2024-08-12 00:44:15.009041 UTC---
| Itration            | 484       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.22e+04  |
| Real Sto Return     | 1.12e+04  |
| Reward Loss         | -4.46e+06 |
| Running Env Steps   | 2420000   |
| Running Forward KL  | 14        |
| Running Reverse KL  | 9.56      |
| Running Update Time | 484       |
-----------------------------------
--2024-08-12 00:45:57.000940 UTC---
| Itration            | 485       |
| PAGAR Loss          | 5.76e+07  |
| Real Det Return     | 1.23e+04  |
| Real Sto Return     | 1.14e+04  |
| Reward Loss         | -4.78e+06 |
| Running Env Steps   | 2425000   |
| Running Forward KL  | 14.1      |
| Running Reverse KL  | 9.7       |
| Running Update Time | 485       |
-----------------------------------
--2024-08-12 00:47:40.442129 UTC--
| Itration            | 486      |
| PAGAR Loss          | nan      |
| Real Det Return     | 1.18e+04 |
| Real Sto Return     | 1.1e+04  |
| Reward Loss         | -6.3e+06 |
| Running Env Steps   | 2430000  |
| Running Forward KL  | 14.4     |
| Running Reverse KL  | 10       |
| Running Update Time | 486      |
----------------------------------
--2024-08-12 00:49:22.055953 UTC---
| Itration            | 487       |
| PAGAR Loss          | 1.17e+10  |
| Real Det Return     | 1.22e+04  |
| Real Sto Return     | 1.17e+04  |
| Reward Loss         | -5.05e+06 |
| Running Env Steps   | 2435000   |
| Running Forward KL  | 14.5      |
| Running Reverse KL  | 9.71      |
| Running Update Time | 487       |
-----------------------------------
--2024-08-12 00:51:03.763144 UTC---
| Itration            | 488       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.23e+04  |
| Real Sto Return     | 1.2e+04   |
| Reward Loss         | -5.44e+06 |
| Running Env Steps   | 2440000   |
| Running Forward KL  | 14.3      |
| Running Reverse KL  | 10.2      |
| Running Update Time | 488       |
-----------------------------------
--2024-08-12 00:52:46.500138 UTC---
| Itration            | 489       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.16e+04  |
| Real Sto Return     | 1.11e+04  |
| Reward Loss         | -5.31e+06 |
| Running Env Steps   | 2445000   |
| Running Forward KL  | 14        |
| Running Reverse KL  | 9.41      |
| Running Update Time | 489       |
-----------------------------------
--2024-08-12 00:54:28.534143 UTC---
| Itration            | 490       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.25e+04  |
| Real Sto Return     | 1.19e+04  |
| Reward Loss         | -4.61e+06 |
| Running Env Steps   | 2450000   |
| Running Forward KL  | 13.6      |
| Running Reverse KL  | 9.14      |
| Running Update Time | 490       |
-----------------------------------
--2024-08-12 00:56:11.479622 UTC---
| Itration            | 491       |
| PAGAR Loss          | 1.75e+06  |
| Real Det Return     | 1.24e+04  |
| Real Sto Return     | 1.14e+04  |
| Reward Loss         | -5.52e+06 |
| Running Env Steps   | 2455000   |
| Running Forward KL  | 14        |
| Running Reverse KL  | 9.4       |
| Running Update Time | 491       |
-----------------------------------
--2024-08-12 00:57:54.548767 UTC--
| Itration            | 492      |
| PAGAR Loss          | nan      |
| Real Det Return     | 1.25e+04 |
| Real Sto Return     | 1.18e+04 |
| Reward Loss         | -5.2e+06 |
| Running Env Steps   | 2460000  |
| Running Forward KL  | 14.1     |
| Running Reverse KL  | 9.5      |
| Running Update Time | 492      |
----------------------------------
--2024-08-12 00:59:38.172735 UTC---
| Itration            | 493       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.25e+04  |
| Real Sto Return     | 1.2e+04   |
| Reward Loss         | -4.83e+06 |
| Running Env Steps   | 2465000   |
| Running Forward KL  | 13.6      |
| Running Reverse KL  | 9.25      |
| Running Update Time | 493       |
-----------------------------------
--2024-08-12 01:01:20.859603 UTC---
| Itration            | 494       |
| PAGAR Loss          | -2.86e+07 |
| Real Det Return     | 1.17e+04  |
| Real Sto Return     | 1.1e+04   |
| Reward Loss         | -7.63e+06 |
| Running Env Steps   | 2470000   |
| Running Forward KL  | 14.3      |
| Running Reverse KL  | 9.12      |
| Running Update Time | 494       |
-----------------------------------
--2024-08-12 01:03:04.465052 UTC---
| Itration            | 495       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.22e+04  |
| Real Sto Return     | 1.14e+04  |
| Reward Loss         | -8.27e+06 |
| Running Env Steps   | 2475000   |
| Running Forward KL  | 14.3      |
| Running Reverse KL  | 8.91      |
| Running Update Time | 495       |
-----------------------------------
--2024-08-12 01:04:46.554215 UTC---
| Itration            | 496       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.23e+04  |
| Real Sto Return     | 1.12e+04  |
| Reward Loss         | -5.74e+06 |
| Running Env Steps   | 2480000   |
| Running Forward KL  | 14.4      |
| Running Reverse KL  | 9.46      |
| Running Update Time | 496       |
-----------------------------------
--2024-08-12 01:06:28.466863 UTC---
| Itration            | 497       |
| PAGAR Loss          | -2.49e+08 |
| Real Det Return     | 1.26e+04  |
| Real Sto Return     | 1.21e+04  |
| Reward Loss         | -5.31e+06 |
| Running Env Steps   | 2485000   |
| Running Forward KL  | 13.5      |
| Running Reverse KL  | 8.77      |
| Running Update Time | 497       |
-----------------------------------
--2024-08-12 01:08:10.322178 UTC---
| Itration            | 498       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.22e+04  |
| Real Sto Return     | 1.17e+04  |
| Reward Loss         | -5.24e+06 |
| Running Env Steps   | 2490000   |
| Running Forward KL  | 14.2      |
| Running Reverse KL  | 9.51      |
| Running Update Time | 498       |
-----------------------------------
--2024-08-12 01:09:53.005276 UTC--
| Itration            | 499      |
| PAGAR Loss          | nan      |
| Real Det Return     | 1.2e+04  |
| Real Sto Return     | 1.14e+04 |
| Reward Loss         | -5.8e+06 |
| Running Env Steps   | 2495000  |
| Running Forward KL  | 14.1     |
| Running Reverse KL  | 8.96     |
| Running Update Time | 499      |
----------------------------------
--2024-08-12 01:11:34.825439 UTC--
| Itration            | 500      |
| PAGAR Loss          | nan      |
| Real Det Return     | 1.22e+04 |
| Real Sto Return     | 1.18e+04 |
| Reward Loss         | -4.8e+06 |
| Running Env Steps   | 2500000  |
| Running Forward KL  | 14.1     |
| Running Reverse KL  | 10       |
| Running Update Time | 500      |
----------------------------------
--2024-08-12 01:13:17.585633 UTC--
| Itration            | 501      |
| PAGAR Loss          | nan      |
| Real Det Return     | 1.12e+04 |
| Real Sto Return     | 8.93e+03 |
| Reward Loss         | -9.6e+06 |
| Running Env Steps   | 2505000  |
| Running Forward KL  | 14.8     |
| Running Reverse KL  | 9.64     |
| Running Update Time | 501      |
----------------------------------
--2024-08-12 01:14:59.830138 UTC---
| Itration            | 502       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.12e+04  |
| Real Sto Return     | 1.09e+04  |
| Reward Loss         | -7.56e+06 |
| Running Env Steps   | 2510000   |
| Running Forward KL  | 14.6      |
| Running Reverse KL  | 9.96      |
| Running Update Time | 502       |
-----------------------------------
--2024-08-12 01:16:42.314405 UTC--
| Itration            | 503      |
| PAGAR Loss          | nan      |
| Real Det Return     | 1.18e+04 |
| Real Sto Return     | 1.12e+04 |
| Reward Loss         | -5.6e+06 |
| Running Env Steps   | 2515000  |
| Running Forward KL  | 13.9     |
| Running Reverse KL  | 9.32     |
| Running Update Time | 503      |
----------------------------------
--2024-08-12 01:18:26.587076 UTC---
| Itration            | 504       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.22e+04  |
| Real Sto Return     | 1.15e+04  |
| Reward Loss         | -5.85e+06 |
| Running Env Steps   | 2520000   |
| Running Forward KL  | 14.2      |
| Running Reverse KL  | 9.22      |
| Running Update Time | 504       |
-----------------------------------
--2024-08-12 01:20:09.906408 UTC---
| Itration            | 505       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.03e+04  |
| Real Sto Return     | 9.02e+03  |
| Reward Loss         | -8.45e+06 |
| Running Env Steps   | 2525000   |
| Running Forward KL  | 14.5      |
| Running Reverse KL  | 9.38      |
| Running Update Time | 505       |
-----------------------------------
--2024-08-12 01:21:52.908672 UTC---
| Itration            | 506       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.23e+04  |
| Real Sto Return     | 1.17e+04  |
| Reward Loss         | -6.49e+06 |
| Running Env Steps   | 2530000   |
| Running Forward KL  | 13.3      |
| Running Reverse KL  | 8.24      |
| Running Update Time | 506       |
-----------------------------------
--2024-08-12 01:23:37.202311 UTC---
| Itration            | 507       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.22e+04  |
| Real Sto Return     | 1.17e+04  |
| Reward Loss         | -6.26e+06 |
| Running Env Steps   | 2535000   |
| Running Forward KL  | 14        |
| Running Reverse KL  | 9.27      |
| Running Update Time | 507       |
-----------------------------------
--2024-08-12 01:25:18.985255 UTC---
| Itration            | 508       |
| PAGAR Loss          | -2.42e+07 |
| Real Det Return     | 1.2e+04   |
| Real Sto Return     | 1.12e+04  |
| Reward Loss         | -6.43e+06 |
| Running Env Steps   | 2540000   |
| Running Forward KL  | 13.8      |
| Running Reverse KL  | 9.31      |
| Running Update Time | 508       |
-----------------------------------
--2024-08-12 01:27:00.908242 UTC---
| Itration            | 509       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.23e+04  |
| Real Sto Return     | 1.18e+04  |
| Reward Loss         | -4.29e+06 |
| Running Env Steps   | 2545000   |
| Running Forward KL  | 13.3      |
| Running Reverse KL  | 8.51      |
| Running Update Time | 509       |
-----------------------------------
--2024-08-12 01:28:43.679518 UTC---
| Itration            | 510       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.16e+04  |
| Real Sto Return     | 1.11e+04  |
| Reward Loss         | -7.61e+06 |
| Running Env Steps   | 2550000   |
| Running Forward KL  | 13.8      |
| Running Reverse KL  | 8.84      |
| Running Update Time | 510       |
-----------------------------------
--2024-08-12 01:30:25.183344 UTC---
| Itration            | 511       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.14e+04  |
| Real Sto Return     | 1.11e+04  |
| Reward Loss         | -6.82e+06 |
| Running Env Steps   | 2555000   |
| Running Forward KL  | 14.2      |
| Running Reverse KL  | 9.1       |
| Running Update Time | 511       |
-----------------------------------
--2024-08-12 01:32:06.586630 UTC---
| Itration            | 512       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.24e+04  |
| Real Sto Return     | 1.18e+04  |
| Reward Loss         | -4.61e+06 |
| Running Env Steps   | 2560000   |
| Running Forward KL  | 13.8      |
| Running Reverse KL  | 9.43      |
| Running Update Time | 512       |
-----------------------------------
--2024-08-12 01:33:50.535131 UTC---
| Itration            | 513       |
| PAGAR Loss          | 9.36e+07  |
| Real Det Return     | 1.13e+04  |
| Real Sto Return     | 1.1e+04   |
| Reward Loss         | -7.32e+06 |
| Running Env Steps   | 2565000   |
| Running Forward KL  | 14.3      |
| Running Reverse KL  | 9.6       |
| Running Update Time | 513       |
-----------------------------------
--2024-08-12 01:35:32.347256 UTC---
| Itration            | 514       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.21e+04  |
| Real Sto Return     | 1.09e+04  |
| Reward Loss         | -1.05e+07 |
| Running Env Steps   | 2570000   |
| Running Forward KL  | 14.1      |
| Running Reverse KL  | 7.89      |
| Running Update Time | 514       |
-----------------------------------
--2024-08-12 01:37:12.032534 UTC---
| Itration            | 515       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.2e+04   |
| Real Sto Return     | 1.14e+04  |
| Reward Loss         | -7.75e+06 |
| Running Env Steps   | 2575000   |
| Running Forward KL  | 14.2      |
| Running Reverse KL  | 9.4       |
| Running Update Time | 515       |
-----------------------------------
--2024-08-12 01:38:51.180697 UTC---
| Itration            | 516       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.23e+04  |
| Real Sto Return     | 1.18e+04  |
| Reward Loss         | -4.65e+06 |
| Running Env Steps   | 2580000   |
| Running Forward KL  | 13.2      |
| Running Reverse KL  | 9.01      |
| Running Update Time | 516       |
-----------------------------------
--2024-08-12 01:40:32.779001 UTC---
| Itration            | 517       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.24e+04  |
| Real Sto Return     | 1.16e+04  |
| Reward Loss         | -5.04e+06 |
| Running Env Steps   | 2585000   |
| Running Forward KL  | 13.7      |
| Running Reverse KL  | 9.17      |
| Running Update Time | 517       |
-----------------------------------
--2024-08-12 01:42:14.480576 UTC---
| Itration            | 518       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.23e+04  |
| Real Sto Return     | 1.17e+04  |
| Reward Loss         | -6.08e+06 |
| Running Env Steps   | 2590000   |
| Running Forward KL  | 13.1      |
| Running Reverse KL  | 8.53      |
| Running Update Time | 518       |
-----------------------------------
--2024-08-12 01:43:56.228752 UTC---
| Itration            | 519       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.1e+04   |
| Real Sto Return     | 1.08e+04  |
| Reward Loss         | -5.96e+06 |
| Running Env Steps   | 2595000   |
| Running Forward KL  | 13.3      |
| Running Reverse KL  | 9.1       |
| Running Update Time | 519       |
-----------------------------------
--2024-08-12 01:45:37.982604 UTC--
| Itration            | 520      |
| PAGAR Loss          | 2.25e+08 |
| Real Det Return     | 1.17e+04 |
| Real Sto Return     | 1.12e+04 |
| Reward Loss         | -6.6e+06 |
| Running Env Steps   | 2600000  |
| Running Forward KL  | 13.9     |
| Running Reverse KL  | 9.55     |
| Running Update Time | 520      |
----------------------------------
--2024-08-12 01:47:19.731722 UTC---
| Itration            | 521       |
| PAGAR Loss          | 7.05e+07  |
| Real Det Return     | 1.17e+04  |
| Real Sto Return     | 1.1e+04   |
| Reward Loss         | -6.61e+06 |
| Running Env Steps   | 2605000   |
| Running Forward KL  | 14.6      |
| Running Reverse KL  | 10        |
| Running Update Time | 521       |
-----------------------------------
--2024-08-12 01:49:01.695330 UTC---
| Itration            | 522       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.2e+04   |
| Real Sto Return     | 1.12e+04  |
| Reward Loss         | -4.82e+06 |
| Running Env Steps   | 2610000   |
| Running Forward KL  | 13.5      |
| Running Reverse KL  | 8.98      |
| Running Update Time | 522       |
-----------------------------------
--2024-08-12 01:50:43.382358 UTC---
| Itration            | 523       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.2e+04   |
| Real Sto Return     | 1.15e+04  |
| Reward Loss         | -6.18e+06 |
| Running Env Steps   | 2615000   |
| Running Forward KL  | 14.3      |
| Running Reverse KL  | 9.55      |
| Running Update Time | 523       |
-----------------------------------
--2024-08-12 01:52:25.334520 UTC---
| Itration            | 524       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.19e+04  |
| Real Sto Return     | 1.13e+04  |
| Reward Loss         | -6.53e+06 |
| Running Env Steps   | 2620000   |
| Running Forward KL  | 13.7      |
| Running Reverse KL  | 8.85      |
| Running Update Time | 524       |
-----------------------------------
--2024-08-12 01:54:11.208939 UTC---
| Itration            | 525       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.23e+04  |
| Real Sto Return     | 1.17e+04  |
| Reward Loss         | -5.65e+06 |
| Running Env Steps   | 2625000   |
| Running Forward KL  | 13.8      |
| Running Reverse KL  | 9.37      |
| Running Update Time | 525       |
-----------------------------------
--2024-08-12 01:55:54.551815 UTC---
| Itration            | 526       |
| PAGAR Loss          | 3.09e+07  |
| Real Det Return     | 1.16e+04  |
| Real Sto Return     | 1.11e+04  |
| Reward Loss         | -7.62e+06 |
| Running Env Steps   | 2630000   |
| Running Forward KL  | 14.3      |
| Running Reverse KL  | 9.77      |
| Running Update Time | 526       |
-----------------------------------
--2024-08-12 01:57:39.642027 UTC---
| Itration            | 527       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1e+04     |
| Real Sto Return     | 9.24e+03  |
| Reward Loss         | -1.64e+07 |
| Running Env Steps   | 2635000   |
| Running Forward KL  | 14.3      |
| Running Reverse KL  | 7.94      |
| Running Update Time | 527       |
-----------------------------------
--2024-08-12 01:59:22.189874 UTC---
| Itration            | 528       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.1e+04   |
| Real Sto Return     | 1.04e+04  |
| Reward Loss         | -8.69e+06 |
| Running Env Steps   | 2640000   |
| Running Forward KL  | 13.9      |
| Running Reverse KL  | 9.08      |
| Running Update Time | 528       |
-----------------------------------
--2024-08-12 02:01:04.182552 UTC---
| Itration            | 529       |
| PAGAR Loss          | 2.16e+08  |
| Real Det Return     | 1.23e+04  |
| Real Sto Return     | 1.16e+04  |
| Reward Loss         | -4.77e+06 |
| Running Env Steps   | 2645000   |
| Running Forward KL  | 13.2      |
| Running Reverse KL  | 8.82      |
| Running Update Time | 529       |
-----------------------------------
--2024-08-12 02:02:46.392583 UTC--
| Itration            | 530      |
| PAGAR Loss          | 3.78e+07 |
| Real Det Return     | 1.18e+04 |
| Real Sto Return     | 1.11e+04 |
| Reward Loss         | -5.5e+06 |
| Running Env Steps   | 2650000  |
| Running Forward KL  | 13.8     |
| Running Reverse KL  | 9.25     |
| Running Update Time | 530      |
----------------------------------
--2024-08-12 02:04:30.194855 UTC---
| Itration            | 531       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.22e+04  |
| Real Sto Return     | 1.17e+04  |
| Reward Loss         | -4.26e+06 |
| Running Env Steps   | 2655000   |
| Running Forward KL  | 13        |
| Running Reverse KL  | 8.71      |
| Running Update Time | 531       |
-----------------------------------
--2024-08-12 02:06:12.079669 UTC---
| Itration            | 532       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.22e+04  |
| Real Sto Return     | 1.17e+04  |
| Reward Loss         | -4.92e+06 |
| Running Env Steps   | 2660000   |
| Running Forward KL  | 13.1      |
| Running Reverse KL  | 8.98      |
| Running Update Time | 532       |
-----------------------------------
--2024-08-12 02:07:53.502378 UTC---
| Itration            | 533       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.18e+04  |
| Real Sto Return     | 1.13e+04  |
| Reward Loss         | -6.64e+06 |
| Running Env Steps   | 2665000   |
| Running Forward KL  | 13.4      |
| Running Reverse KL  | 8.84      |
| Running Update Time | 533       |
-----------------------------------
--2024-08-12 02:09:35.107521 UTC---
| Itration            | 534       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.21e+04  |
| Real Sto Return     | 1.16e+04  |
| Reward Loss         | -6.09e+06 |
| Running Env Steps   | 2670000   |
| Running Forward KL  | 13.4      |
| Running Reverse KL  | 9.22      |
| Running Update Time | 534       |
-----------------------------------
--2024-08-12 02:11:16.910596 UTC---
| Itration            | 535       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.22e+04  |
| Real Sto Return     | 1.15e+04  |
| Reward Loss         | -4.79e+06 |
| Running Env Steps   | 2675000   |
| Running Forward KL  | 13.6      |
| Running Reverse KL  | 9.03      |
| Running Update Time | 535       |
-----------------------------------
--2024-08-12 02:12:58.663517 UTC---
| Itration            | 536       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.23e+04  |
| Real Sto Return     | 1.16e+04  |
| Reward Loss         | -5.46e+06 |
| Running Env Steps   | 2680000   |
| Running Forward KL  | 13.2      |
| Running Reverse KL  | 9.11      |
| Running Update Time | 536       |
-----------------------------------
--2024-08-12 02:14:40.605320 UTC---
| Itration            | 537       |
| PAGAR Loss          | 2.48e+07  |
| Real Det Return     | 1.21e+04  |
| Real Sto Return     | 1.15e+04  |
| Reward Loss         | -5.92e+06 |
| Running Env Steps   | 2685000   |
| Running Forward KL  | 13.6      |
| Running Reverse KL  | 8.89      |
| Running Update Time | 537       |
-----------------------------------
--2024-08-12 02:16:22.131316 UTC---
| Itration            | 538       |
| PAGAR Loss          | -1.54e+08 |
| Real Det Return     | 1.24e+04  |
| Real Sto Return     | 1.17e+04  |
| Reward Loss         | -4.08e+06 |
| Running Env Steps   | 2690000   |
| Running Forward KL  | 12.9      |
| Running Reverse KL  | 9.2       |
| Running Update Time | 538       |
-----------------------------------
--2024-08-12 02:18:04.065060 UTC---
| Itration            | 539       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.22e+04  |
| Real Sto Return     | 1.16e+04  |
| Reward Loss         | -5.42e+06 |
| Running Env Steps   | 2695000   |
| Running Forward KL  | 13.6      |
| Running Reverse KL  | 9.26      |
| Running Update Time | 539       |
-----------------------------------
--2024-08-12 02:19:46.917988 UTC---
| Itration            | 540       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.22e+04  |
| Real Sto Return     | 1.07e+04  |
| Reward Loss         | -5.52e+06 |
| Running Env Steps   | 2700000   |
| Running Forward KL  | 12.9      |
| Running Reverse KL  | 7.88      |
| Running Update Time | 540       |
-----------------------------------
--2024-08-12 02:21:28.048052 UTC--
| Itration            | 541      |
| PAGAR Loss          | nan      |
| Real Det Return     | 1.03e+04 |
| Real Sto Return     | 1.02e+04 |
| Reward Loss         | -1.3e+07 |
| Running Env Steps   | 2705000  |
| Running Forward KL  | 13.3     |
| Running Reverse KL  | 7.7      |
| Running Update Time | 541      |
----------------------------------
--2024-08-12 02:23:07.660832 UTC--
| Itration            | 542      |
| PAGAR Loss          | nan      |
| Real Det Return     | 1.13e+04 |
| Real Sto Return     | 1.08e+04 |
| Reward Loss         | -7e+06   |
| Running Env Steps   | 2710000  |
| Running Forward KL  | 13.5     |
| Running Reverse KL  | 9.07     |
| Running Update Time | 542      |
----------------------------------
--2024-08-12 02:24:48.594640 UTC---
| Itration            | 543       |
| PAGAR Loss          | 3.13e+07  |
| Real Det Return     | 1.23e+04  |
| Real Sto Return     | 1.17e+04  |
| Reward Loss         | -4.46e+06 |
| Running Env Steps   | 2715000   |
| Running Forward KL  | 13.3      |
| Running Reverse KL  | 8.99      |
| Running Update Time | 543       |
-----------------------------------
--2024-08-12 02:26:29.925867 UTC---
| Itration            | 544       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.24e+04  |
| Real Sto Return     | 1.17e+04  |
| Reward Loss         | -3.92e+06 |
| Running Env Steps   | 2720000   |
| Running Forward KL  | 12.8      |
| Running Reverse KL  | 9.03      |
| Running Update Time | 544       |
-----------------------------------
--2024-08-12 02:28:12.469011 UTC---
| Itration            | 545       |
| PAGAR Loss          | -6.01e+05 |
| Real Det Return     | 1.25e+04  |
| Real Sto Return     | 1.19e+04  |
| Reward Loss         | -4.42e+06 |
| Running Env Steps   | 2725000   |
| Running Forward KL  | 13.2      |
| Running Reverse KL  | 9.35      |
| Running Update Time | 545       |
-----------------------------------
--2024-08-12 02:29:53.976219 UTC---
| Itration            | 546       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.23e+04  |
| Real Sto Return     | 1.17e+04  |
| Reward Loss         | -5.66e+06 |
| Running Env Steps   | 2730000   |
| Running Forward KL  | 13.1      |
| Running Reverse KL  | 8.72      |
| Running Update Time | 546       |
-----------------------------------
--2024-08-12 02:31:35.227356 UTC---
| Itration            | 547       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.23e+04  |
| Real Sto Return     | 1.18e+04  |
| Reward Loss         | -4.59e+06 |
| Running Env Steps   | 2735000   |
| Running Forward KL  | 12.8      |
| Running Reverse KL  | 8.91      |
| Running Update Time | 547       |
-----------------------------------
--2024-08-12 02:33:16.794561 UTC---
| Itration            | 548       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.2e+04   |
| Real Sto Return     | 1.1e+04   |
| Reward Loss         | -5.53e+06 |
| Running Env Steps   | 2740000   |
| Running Forward KL  | 13.3      |
| Running Reverse KL  | 9.45      |
| Running Update Time | 548       |
-----------------------------------
--2024-08-12 02:34:58.243820 UTC---
| Itration            | 549       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.25e+04  |
| Real Sto Return     | 1.18e+04  |
| Reward Loss         | -5.15e+06 |
| Running Env Steps   | 2745000   |
| Running Forward KL  | 12.8      |
| Running Reverse KL  | 9.04      |
| Running Update Time | 549       |
-----------------------------------
--2024-08-12 02:36:39.846803 UTC--
| Itration            | 550      |
| PAGAR Loss          | nan      |
| Real Det Return     | 1.21e+04 |
| Real Sto Return     | 1.15e+04 |
| Reward Loss         | -5.6e+06 |
| Running Env Steps   | 2750000  |
| Running Forward KL  | 13       |
| Running Reverse KL  | 8.75     |
| Running Update Time | 550      |
----------------------------------
--2024-08-12 02:38:20.391425 UTC---
| Itration            | 551       |
| PAGAR Loss          | 2.3e+08   |
| Real Det Return     | 1.24e+04  |
| Real Sto Return     | 1.17e+04  |
| Reward Loss         | -4.83e+06 |
| Running Env Steps   | 2755000   |
| Running Forward KL  | 13        |
| Running Reverse KL  | 9.19      |
| Running Update Time | 551       |
-----------------------------------
--2024-08-12 02:40:01.883615 UTC---
| Itration            | 552       |
| PAGAR Loss          | -1.47e+07 |
| Real Det Return     | 1.22e+04  |
| Real Sto Return     | 1.15e+04  |
| Reward Loss         | -6.5e+06  |
| Running Env Steps   | 2760000   |
| Running Forward KL  | 13.6      |
| Running Reverse KL  | 9.34      |
| Running Update Time | 552       |
-----------------------------------
--2024-08-12 02:41:43.522017 UTC---
| Itration            | 553       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.23e+04  |
| Real Sto Return     | 1.16e+04  |
| Reward Loss         | -5.83e+06 |
| Running Env Steps   | 2765000   |
| Running Forward KL  | 13.2      |
| Running Reverse KL  | 9.49      |
| Running Update Time | 553       |
-----------------------------------
--2024-08-12 02:43:25.341630 UTC---
| Itration            | 554       |
| PAGAR Loss          | -1.38e+08 |
| Real Det Return     | 1.18e+04  |
| Real Sto Return     | 1.14e+04  |
| Reward Loss         | -6.15e+06 |
| Running Env Steps   | 2770000   |
| Running Forward KL  | 13.3      |
| Running Reverse KL  | 9.3       |
| Running Update Time | 554       |
-----------------------------------
--2024-08-12 02:45:07.071667 UTC---
| Itration            | 555       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.11e+04  |
| Real Sto Return     | 1.04e+04  |
| Reward Loss         | -7.02e+06 |
| Running Env Steps   | 2775000   |
| Running Forward KL  | 13.2      |
| Running Reverse KL  | 9.05      |
| Running Update Time | 555       |
-----------------------------------
--2024-08-12 02:46:48.471850 UTC---
| Itration            | 556       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.25e+04  |
| Real Sto Return     | 1.18e+04  |
| Reward Loss         | -4.67e+06 |
| Running Env Steps   | 2780000   |
| Running Forward KL  | 12.9      |
| Running Reverse KL  | 9.31      |
| Running Update Time | 556       |
-----------------------------------
--2024-08-12 02:48:30.650879 UTC---
| Itration            | 557       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.24e+04  |
| Real Sto Return     | 1.13e+04  |
| Reward Loss         | -5.18e+06 |
| Running Env Steps   | 2785000   |
| Running Forward KL  | 12.9      |
| Running Reverse KL  | 8.69      |
| Running Update Time | 557       |
-----------------------------------
--2024-08-12 02:50:12.082519 UTC---
| Itration            | 558       |
| PAGAR Loss          | 6.84e+07  |
| Real Det Return     | 1.21e+04  |
| Real Sto Return     | 1.16e+04  |
| Reward Loss         | -6.36e+06 |
| Running Env Steps   | 2790000   |
| Running Forward KL  | 13        |
| Running Reverse KL  | 9.32      |
| Running Update Time | 558       |
-----------------------------------
--2024-08-12 02:51:53.927305 UTC---
| Itration            | 559       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.24e+04  |
| Real Sto Return     | 1.18e+04  |
| Reward Loss         | -5.42e+06 |
| Running Env Steps   | 2795000   |
| Running Forward KL  | 13.5      |
| Running Reverse KL  | 9.36      |
| Running Update Time | 559       |
-----------------------------------
--2024-08-12 02:53:35.625145 UTC---
| Itration            | 560       |
| PAGAR Loss          | -1.87e+08 |
| Real Det Return     | 1.18e+04  |
| Real Sto Return     | 1.16e+04  |
| Reward Loss         | -5.58e+06 |
| Running Env Steps   | 2800000   |
| Running Forward KL  | 12.2      |
| Running Reverse KL  | 8.5       |
| Running Update Time | 560       |
-----------------------------------
--2024-08-12 02:55:17.735413 UTC---
| Itration            | 561       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1e+04     |
| Real Sto Return     | 9.59e+03  |
| Reward Loss         | -1.18e+07 |
| Running Env Steps   | 2805000   |
| Running Forward KL  | 15.1      |
| Running Reverse KL  | 9.72      |
| Running Update Time | 561       |
-----------------------------------
--2024-08-12 02:57:01.520487 UTC---
| Itration            | 562       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.23e+04  |
| Real Sto Return     | 1.17e+04  |
| Reward Loss         | -4.25e+06 |
| Running Env Steps   | 2810000   |
| Running Forward KL  | 12.6      |
| Running Reverse KL  | 8.95      |
| Running Update Time | 562       |
-----------------------------------
--2024-08-12 02:58:44.825126 UTC---
| Itration            | 563       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.21e+04  |
| Real Sto Return     | 1.15e+04  |
| Reward Loss         | -6.58e+06 |
| Running Env Steps   | 2815000   |
| Running Forward KL  | 13.3      |
| Running Reverse KL  | 9.6       |
| Running Update Time | 563       |
-----------------------------------
--2024-08-12 03:00:26.159458 UTC---
| Itration            | 564       |
| PAGAR Loss          | 8.4e+07   |
| Real Det Return     | 1.22e+04  |
| Real Sto Return     | 1.12e+04  |
| Reward Loss         | -6.38e+06 |
| Running Env Steps   | 2820000   |
| Running Forward KL  | 12.8      |
| Running Reverse KL  | 9.13      |
| Running Update Time | 564       |
-----------------------------------
--2024-08-12 03:02:07.735839 UTC---
| Itration            | 565       |
| PAGAR Loss          | 1.08e+09  |
| Real Det Return     | 1.24e+04  |
| Real Sto Return     | 1.17e+04  |
| Reward Loss         | -4.87e+06 |
| Running Env Steps   | 2825000   |
| Running Forward KL  | 12.5      |
| Running Reverse KL  | 8.54      |
| Running Update Time | 565       |
-----------------------------------
--2024-08-12 03:03:49.694197 UTC---
| Itration            | 566       |
| PAGAR Loss          | 1.1e+07   |
| Real Det Return     | 1.17e+04  |
| Real Sto Return     | 1.15e+04  |
| Reward Loss         | -7.37e+06 |
| Running Env Steps   | 2830000   |
| Running Forward KL  | 12.5      |
| Running Reverse KL  | 7.62      |
| Running Update Time | 566       |
-----------------------------------
--2024-08-12 03:05:30.995314 UTC---
| Itration            | 567       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.21e+04  |
| Real Sto Return     | 1.14e+04  |
| Reward Loss         | -4.93e+06 |
| Running Env Steps   | 2835000   |
| Running Forward KL  | 13.2      |
| Running Reverse KL  | 8.66      |
| Running Update Time | 567       |
-----------------------------------
--2024-08-12 03:07:13.729027 UTC---
| Itration            | 568       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.16e+04  |
| Real Sto Return     | 1.13e+04  |
| Reward Loss         | -6.12e+06 |
| Running Env Steps   | 2840000   |
| Running Forward KL  | 12.5      |
| Running Reverse KL  | 7.6       |
| Running Update Time | 568       |
-----------------------------------
--2024-08-12 03:08:55.051501 UTC---
| Itration            | 569       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.16e+04  |
| Real Sto Return     | 1.15e+04  |
| Reward Loss         | -5.01e+06 |
| Running Env Steps   | 2845000   |
| Running Forward KL  | 12.7      |
| Running Reverse KL  | 8.95      |
| Running Update Time | 569       |
-----------------------------------
--2024-08-12 03:10:36.555701 UTC---
| Itration            | 570       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.2e+04   |
| Real Sto Return     | 1.09e+04  |
| Reward Loss         | -7.05e+06 |
| Running Env Steps   | 2850000   |
| Running Forward KL  | 12.9      |
| Running Reverse KL  | 8.35      |
| Running Update Time | 570       |
-----------------------------------
--2024-08-12 03:12:18.481564 UTC---
| Itration            | 571       |
| PAGAR Loss          | 7.41e+05  |
| Real Det Return     | 1.24e+04  |
| Real Sto Return     | 1.18e+04  |
| Reward Loss         | -4.92e+06 |
| Running Env Steps   | 2855000   |
| Running Forward KL  | 13.2      |
| Running Reverse KL  | 9.2       |
| Running Update Time | 571       |
-----------------------------------
--2024-08-12 03:13:58.013750 UTC---
| Itration            | 572       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.21e+04  |
| Real Sto Return     | 1.17e+04  |
| Reward Loss         | -5.57e+06 |
| Running Env Steps   | 2860000   |
| Running Forward KL  | 13        |
| Running Reverse KL  | 8.73      |
| Running Update Time | 572       |
-----------------------------------
--2024-08-12 03:15:39.890893 UTC---
| Itration            | 573       |
| PAGAR Loss          | 9.04e+07  |
| Real Det Return     | 1.11e+04  |
| Real Sto Return     | 1.07e+04  |
| Reward Loss         | -7.28e+06 |
| Running Env Steps   | 2865000   |
| Running Forward KL  | 13.5      |
| Running Reverse KL  | 8.71      |
| Running Update Time | 573       |
-----------------------------------
--2024-08-12 03:17:24.259701 UTC---
| Itration            | 574       |
| PAGAR Loss          | -2.28e+08 |
| Real Det Return     | 1.27e+04  |
| Real Sto Return     | 1.2e+04   |
| Reward Loss         | -4.31e+06 |
| Running Env Steps   | 2870000   |
| Running Forward KL  | 12.4      |
| Running Reverse KL  | 8.85      |
| Running Update Time | 574       |
-----------------------------------
--2024-08-12 03:19:07.779216 UTC--
| Itration            | 575      |
| PAGAR Loss          | nan      |
| Real Det Return     | 1.15e+04 |
| Real Sto Return     | 1.11e+04 |
| Reward Loss         | -7.9e+06 |
| Running Env Steps   | 2875000  |
| Running Forward KL  | 12.5     |
| Running Reverse KL  | 8.21     |
| Running Update Time | 575      |
----------------------------------
--2024-08-12 03:20:50.210002 UTC---
| Itration            | 576       |
| PAGAR Loss          | -1.15e+07 |
| Real Det Return     | 1.25e+04  |
| Real Sto Return     | 1.18e+04  |
| Reward Loss         | -5.7e+06  |
| Running Env Steps   | 2880000   |
| Running Forward KL  | 13.2      |
| Running Reverse KL  | 9.13      |
| Running Update Time | 576       |
-----------------------------------
--2024-08-12 03:22:35.512201 UTC---
| Itration            | 577       |
| PAGAR Loss          | 5.39e+07  |
| Real Det Return     | 1.23e+04  |
| Real Sto Return     | 1.17e+04  |
| Reward Loss         | -5.14e+06 |
| Running Env Steps   | 2885000   |
| Running Forward KL  | 12.4      |
| Running Reverse KL  | 8.85      |
| Running Update Time | 577       |
-----------------------------------
--2024-08-12 03:24:19.768198 UTC---
| Itration            | 578       |
| PAGAR Loss          | 2.25e+07  |
| Real Det Return     | 1.25e+04  |
| Real Sto Return     | 1.16e+04  |
| Reward Loss         | -4.21e+06 |
| Running Env Steps   | 2890000   |
| Running Forward KL  | 12.8      |
| Running Reverse KL  | 8.73      |
| Running Update Time | 578       |
-----------------------------------
--2024-08-12 03:26:04.809299 UTC---
| Itration            | 579       |
| PAGAR Loss          | 1.24e+08  |
| Real Det Return     | 1.22e+04  |
| Real Sto Return     | 1.17e+04  |
| Reward Loss         | -7.98e+06 |
| Running Env Steps   | 2895000   |
| Running Forward KL  | 12.3      |
| Running Reverse KL  | 8.2       |
| Running Update Time | 579       |
-----------------------------------
--2024-08-12 03:27:47.877657 UTC---
| Itration            | 580       |
| PAGAR Loss          | 8.84e+07  |
| Real Det Return     | 1.22e+04  |
| Real Sto Return     | 1.13e+04  |
| Reward Loss         | -5.49e+06 |
| Running Env Steps   | 2900000   |
| Running Forward KL  | 12.6      |
| Running Reverse KL  | 8.91      |
| Running Update Time | 580       |
-----------------------------------
--2024-08-12 03:29:29.678088 UTC---
| Itration            | 581       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.25e+04  |
| Real Sto Return     | 1.21e+04  |
| Reward Loss         | -3.82e+06 |
| Running Env Steps   | 2905000   |
| Running Forward KL  | 12        |
| Running Reverse KL  | 8.62      |
| Running Update Time | 581       |
-----------------------------------
--2024-08-12 03:31:09.556583 UTC---
| Itration            | 582       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.24e+04  |
| Real Sto Return     | 1.19e+04  |
| Reward Loss         | -4.56e+06 |
| Running Env Steps   | 2910000   |
| Running Forward KL  | 13        |
| Running Reverse KL  | 9.38      |
| Running Update Time | 582       |
-----------------------------------
--2024-08-12 03:32:48.786743 UTC---
| Itration            | 583       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.12e+04  |
| Real Sto Return     | 1.1e+04   |
| Reward Loss         | -6.64e+06 |
| Running Env Steps   | 2915000   |
| Running Forward KL  | 12.5      |
| Running Reverse KL  | 8.16      |
| Running Update Time | 583       |
-----------------------------------
--2024-08-12 03:34:28.011872 UTC---
| Itration            | 584       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.26e+04  |
| Real Sto Return     | 1.19e+04  |
| Reward Loss         | -4.97e+06 |
| Running Env Steps   | 2920000   |
| Running Forward KL  | 11.9      |
| Running Reverse KL  | 8.67      |
| Running Update Time | 584       |
-----------------------------------
--2024-08-12 03:36:07.303870 UTC---
| Itration            | 585       |
| PAGAR Loss          | -1.73e+08 |
| Real Det Return     | 1.26e+04  |
| Real Sto Return     | 1.21e+04  |
| Reward Loss         | -4.04e+06 |
| Running Env Steps   | 2925000   |
| Running Forward KL  | 12.5      |
| Running Reverse KL  | 8.83      |
| Running Update Time | 585       |
-----------------------------------
--2024-08-12 03:37:46.610017 UTC---
| Itration            | 586       |
| PAGAR Loss          | 6.12e+07  |
| Real Det Return     | 1.24e+04  |
| Real Sto Return     | 1.16e+04  |
| Reward Loss         | -4.26e+06 |
| Running Env Steps   | 2930000   |
| Running Forward KL  | 12.6      |
| Running Reverse KL  | 8.48      |
| Running Update Time | 586       |
-----------------------------------
--2024-08-12 03:39:30.929089 UTC--
| Itration            | 587      |
| PAGAR Loss          | nan      |
| Real Det Return     | 1.26e+04 |
| Real Sto Return     | 1.2e+04  |
| Reward Loss         | -4.3e+06 |
| Running Env Steps   | 2935000  |
| Running Forward KL  | 12.2     |
| Running Reverse KL  | 8.54     |
| Running Update Time | 587      |
----------------------------------
--2024-08-12 03:41:11.655702 UTC---
| Itration            | 588       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.26e+04  |
| Real Sto Return     | 1.19e+04  |
| Reward Loss         | -4.14e+06 |
| Running Env Steps   | 2940000   |
| Running Forward KL  | 12.1      |
| Running Reverse KL  | 8.42      |
| Running Update Time | 588       |
-----------------------------------
--2024-08-12 03:42:53.564970 UTC---
| Itration            | 589       |
| PAGAR Loss          | 3.34e+07  |
| Real Det Return     | 1.25e+04  |
| Real Sto Return     | 1.19e+04  |
| Reward Loss         | -4.53e+06 |
| Running Env Steps   | 2945000   |
| Running Forward KL  | 12.4      |
| Running Reverse KL  | 9.13      |
| Running Update Time | 589       |
-----------------------------------
--2024-08-12 03:44:35.294134 UTC---
| Itration            | 590       |
| PAGAR Loss          | -4.2e+07  |
| Real Det Return     | 1.23e+04  |
| Real Sto Return     | 1.17e+04  |
| Reward Loss         | -6.68e+06 |
| Running Env Steps   | 2950000   |
| Running Forward KL  | 12.1      |
| Running Reverse KL  | 8.03      |
| Running Update Time | 590       |
-----------------------------------
--2024-08-12 03:46:17.348267 UTC---
| Itration            | 591       |
| PAGAR Loss          | 1.1e+08   |
| Real Det Return     | 1.23e+04  |
| Real Sto Return     | 1.12e+04  |
| Reward Loss         | -4.65e+06 |
| Running Env Steps   | 2955000   |
| Running Forward KL  | 12.5      |
| Running Reverse KL  | 8.51      |
| Running Update Time | 591       |
-----------------------------------
--2024-08-12 03:47:59.735760 UTC---
| Itration            | 592       |
| PAGAR Loss          | 1.76e+07  |
| Real Det Return     | 1.26e+04  |
| Real Sto Return     | 1.2e+04   |
| Reward Loss         | -5.21e+06 |
| Running Env Steps   | 2960000   |
| Running Forward KL  | 12.4      |
| Running Reverse KL  | 8.73      |
| Running Update Time | 592       |
-----------------------------------
--2024-08-12 03:49:41.204372 UTC---
| Itration            | 593       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.17e+04  |
| Real Sto Return     | 1.09e+04  |
| Reward Loss         | -9.53e+06 |
| Running Env Steps   | 2965000   |
| Running Forward KL  | 12.4      |
| Running Reverse KL  | 8.2       |
| Running Update Time | 593       |
-----------------------------------
--2024-08-12 03:51:22.918487 UTC--
| Itration            | 594      |
| PAGAR Loss          | nan      |
| Real Det Return     | 1.23e+04 |
| Real Sto Return     | 1.17e+04 |
| Reward Loss         | -4.7e+06 |
| Running Env Steps   | 2970000  |
| Running Forward KL  | 12.1     |
| Running Reverse KL  | 9.12     |
| Running Update Time | 594      |
----------------------------------
--2024-08-12 03:53:04.671084 UTC---
| Itration            | 595       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.22e+04  |
| Real Sto Return     | 1.16e+04  |
| Reward Loss         | -4.73e+06 |
| Running Env Steps   | 2975000   |
| Running Forward KL  | 12.9      |
| Running Reverse KL  | 9.11      |
| Running Update Time | 595       |
-----------------------------------
--2024-08-12 03:54:45.731061 UTC---
| Itration            | 596       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.21e+04  |
| Real Sto Return     | 1.16e+04  |
| Reward Loss         | -5.16e+06 |
| Running Env Steps   | 2980000   |
| Running Forward KL  | 12.2      |
| Running Reverse KL  | 8.42      |
| Running Update Time | 596       |
-----------------------------------
--2024-08-12 03:56:27.032473 UTC--
| Itration            | 597      |
| PAGAR Loss          | nan      |
| Real Det Return     | 1.27e+04 |
| Real Sto Return     | 1.2e+04  |
| Reward Loss         | -4.6e+06 |
| Running Env Steps   | 2985000  |
| Running Forward KL  | 12.7     |
| Running Reverse KL  | 8.7      |
| Running Update Time | 597      |
----------------------------------
--2024-08-12 03:58:09.685536 UTC---
| Itration            | 598       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.24e+04  |
| Real Sto Return     | 1.14e+04  |
| Reward Loss         | -4.07e+06 |
| Running Env Steps   | 2990000   |
| Running Forward KL  | 11.8      |
| Running Reverse KL  | 8.99      |
| Running Update Time | 598       |
-----------------------------------
--2024-08-12 03:59:53.429839 UTC---
| Itration            | 599       |
| PAGAR Loss          | nan       |
| Real Det Return     | 1.23e+04  |
| Real Sto Return     | 1.17e+04  |
| Reward Loss         | -4.72e+06 |
| Running Env Steps   | 2995000   |
| Running Forward KL  | 12.3      |
| Running Reverse KL  | 8.53      |
| Running Update Time | 599       |
-----------------------------------
