Logging to logs/HopperFH-v0/exp-4/pagar_fkl/2024_08_11_06_05_53
--2024-08-11 06:08:02.011826 UTC--
| Itration            | 0        |
| PAGAR Loss          | 4.02e+03 |
| Real Det Return     | 235      |
| Real Sto Return     | 227      |
| Reward Loss         | 4.53e+05 |
| Running Env Steps   | 0        |
| Running Forward KL  | 19.6     |
| Running Reverse KL  | 241      |
| Running Update Time | 0        |
----------------------------------
--2024-08-11 06:10:18.938857 UTC--
| Itration            | 1        |
| PAGAR Loss          | -304     |
| Real Det Return     | 284      |
| Real Sto Return     | 284      |
| Reward Loss         | 2.69e+05 |
| Running Env Steps   | 5000     |
| Running Forward KL  | 19.3     |
| Running Reverse KL  | 233      |
| Running Update Time | 1        |
----------------------------------
--2024-08-11 06:12:33.947525 UTC--
| Itration            | 2        |
| PAGAR Loss          | -649     |
| Real Det Return     | 281      |
| Real Sto Return     | 274      |
| Reward Loss         | 1.94e+06 |
| Running Env Steps   | 10000    |
| Running Forward KL  | 18.8     |
| Running Reverse KL  | 238      |
| Running Update Time | 2        |
----------------------------------
--2024-08-11 06:14:53.410333 UTC--
| Itration            | 3        |
| PAGAR Loss          | -496     |
| Real Det Return     | 442      |
| Real Sto Return     | 317      |
| Reward Loss         | 1.26e+06 |
| Running Env Steps   | 15000    |
| Running Forward KL  | 19       |
| Running Reverse KL  | 233      |
| Running Update Time | 3        |
----------------------------------
--2024-08-11 06:17:51.385641 UTC--
| Itration            | 4        |
| PAGAR Loss          | -568     |
| Real Det Return     | 1.42e+03 |
| Real Sto Return     | 444      |
| Reward Loss         | 1.05e+06 |
| Running Env Steps   | 20000    |
| Running Forward KL  | 19.5     |
| Running Reverse KL  | 204      |
| Running Update Time | 4        |
----------------------------------
--2024-08-11 06:21:31.380995 UTC--
| Itration            | 5        |
| PAGAR Loss          | 192      |
| Real Det Return     | 1.26e+03 |
| Real Sto Return     | 1.33e+03 |
| Reward Loss         | 7.9e+05  |
| Running Env Steps   | 25000    |
| Running Forward KL  | 16.5     |
| Running Reverse KL  | 70.1     |
| Running Update Time | 5        |
----------------------------------
--2024-08-11 06:25:05.732257 UTC---
| Itration            | 6         |
| PAGAR Loss          | -2.08e+03 |
| Real Det Return     | 1.18e+03  |
| Real Sto Return     | 1.2e+03   |
| Reward Loss         | 7e+05     |
| Running Env Steps   | 30000     |
| Running Forward KL  | 16.9      |
| Running Reverse KL  | 38.9      |
| Running Update Time | 6         |
-----------------------------------
--2024-08-11 06:28:44.599930 UTC--
| Itration            | 7        |
| PAGAR Loss          | -508     |
| Real Det Return     | 1.04e+03 |
| Real Sto Return     | 1.11e+03 |
| Reward Loss         | 4.03e+05 |
| Running Env Steps   | 35000    |
| Running Forward KL  | 17.8     |
| Running Reverse KL  | 44.2     |
| Running Update Time | 7        |
----------------------------------
--2024-08-11 06:32:23.611745 UTC--
| Itration            | 8        |
| PAGAR Loss          | 472      |
| Real Det Return     | 1.04e+03 |
| Real Sto Return     | 1.04e+03 |
| Reward Loss         | 4.4e+05  |
| Running Env Steps   | 40000    |
| Running Forward KL  | 17.2     |
| Running Reverse KL  | 28.6     |
| Running Update Time | 8        |
----------------------------------
--2024-08-11 06:35:55.736769 UTC---
| Itration            | 9         |
| PAGAR Loss          | -1.35e+03 |
| Real Det Return     | 1.24e+03  |
| Real Sto Return     | 1.09e+03  |
| Reward Loss         | 6.99e+05  |
| Running Env Steps   | 45000     |
| Running Forward KL  | 18.4      |
| Running Reverse KL  | 85.8      |
| Running Update Time | 9         |
-----------------------------------
--2024-08-11 06:39:15.194219 UTC--
| Itration            | 10       |
| PAGAR Loss          | -1.3e+03 |
| Real Det Return     | 515      |
| Real Sto Return     | 1.02e+03 |
| Reward Loss         | 2e+05    |
| Running Env Steps   | 50000    |
| Running Forward KL  | 18.6     |
| Running Reverse KL  | 114      |
| Running Update Time | 10       |
----------------------------------
--2024-08-11 06:42:52.136619 UTC--
| Itration            | 11       |
| PAGAR Loss          | -393     |
| Real Det Return     | 1.06e+03 |
| Real Sto Return     | 1.34e+03 |
| Reward Loss         | 1e+06    |
| Running Env Steps   | 55000    |
| Running Forward KL  | 16.8     |
| Running Reverse KL  | 59.3     |
| Running Update Time | 11       |
----------------------------------
--2024-08-11 06:46:28.170424 UTC--
| Itration            | 12       |
| PAGAR Loss          | -972     |
| Real Det Return     | 1.04e+03 |
| Real Sto Return     | 1.27e+03 |
| Reward Loss         | 7.42e+05 |
| Running Env Steps   | 60000    |
| Running Forward KL  | 17       |
| Running Reverse KL  | 73.3     |
| Running Update Time | 12       |
----------------------------------
--2024-08-11 06:50:04.828679 UTC--
| Itration            | 13       |
| PAGAR Loss          | -195     |
| Real Det Return     | 1.04e+03 |
| Real Sto Return     | 1.25e+03 |
| Reward Loss         | 8.62e+05 |
| Running Env Steps   | 65000    |
| Running Forward KL  | 18.2     |
| Running Reverse KL  | 39.1     |
| Running Update Time | 13       |
----------------------------------
--2024-08-11 06:53:49.467816 UTC--
| Itration            | 14       |
| PAGAR Loss          | -457     |
| Real Det Return     | 1.03e+03 |
| Real Sto Return     | 1.24e+03 |
| Reward Loss         | 3.55e+05 |
| Running Env Steps   | 70000    |
| Running Forward KL  | 17.1     |
| Running Reverse KL  | 20.1     |
| Running Update Time | 14       |
----------------------------------
--2024-08-11 06:57:25.562730 UTC--
| Itration            | 15       |
| PAGAR Loss          | -231     |
| Real Det Return     | 1.04e+03 |
| Real Sto Return     | 1.03e+03 |
| Reward Loss         | 5.42e+05 |
| Running Env Steps   | 75000    |
| Running Forward KL  | 19.1     |
| Running Reverse KL  | 32.8     |
| Running Update Time | 15       |
----------------------------------
--2024-08-11 07:01:00.452378 UTC--
| Itration            | 16       |
| PAGAR Loss          | -1e+03   |
| Real Det Return     | 1.04e+03 |
| Real Sto Return     | 901      |
| Reward Loss         | 4.4e+05  |
| Running Env Steps   | 80000    |
| Running Forward KL  | 18.7     |
| Running Reverse KL  | 76.3     |
| Running Update Time | 16       |
----------------------------------
--2024-08-11 07:04:39.132724 UTC--
| Itration            | 17       |
| PAGAR Loss          | 106      |
| Real Det Return     | 1.03e+03 |
| Real Sto Return     | 1.34e+03 |
| Reward Loss         | 2.62e+05 |
| Running Env Steps   | 85000    |
| Running Forward KL  | 18.6     |
| Running Reverse KL  | 12.1     |
| Running Update Time | 17       |
----------------------------------
--2024-08-11 07:08:21.402585 UTC--
| Itration            | 18       |
| PAGAR Loss          | 303      |
| Real Det Return     | 1.16e+03 |
| Real Sto Return     | 1.34e+03 |
| Reward Loss         | 2.3e+05  |
| Running Env Steps   | 90000    |
| Running Forward KL  | 18.9     |
| Running Reverse KL  | 12.7     |
| Running Update Time | 18       |
----------------------------------
--2024-08-11 07:12:02.794756 UTC--
| Itration            | 19       |
| PAGAR Loss          | -766     |
| Real Det Return     | 1.04e+03 |
| Real Sto Return     | 1.29e+03 |
| Reward Loss         | 2.43e+05 |
| Running Env Steps   | 95000    |
| Running Forward KL  | 18.2     |
| Running Reverse KL  | 12.7     |
| Running Update Time | 19       |
----------------------------------
--2024-08-11 07:15:42.500678 UTC--
| Itration            | 20       |
| PAGAR Loss          | 323      |
| Real Det Return     | 1.01e+03 |
| Real Sto Return     | 1.04e+03 |
| Reward Loss         | 6.64e+04 |
| Running Env Steps   | 100000   |
| Running Forward KL  | 20.9     |
| Running Reverse KL  | 14.8     |
| Running Update Time | 20       |
----------------------------------
--2024-08-11 07:19:26.325635 UTC--
| Itration            | 21       |
| PAGAR Loss          | 532      |
| Real Det Return     | 1.02e+03 |
| Real Sto Return     | 1.12e+03 |
| Reward Loss         | 2.74e+04 |
| Running Env Steps   | 105000   |
| Running Forward KL  | 20.3     |
| Running Reverse KL  | 14.1     |
| Running Update Time | 21       |
----------------------------------
--2024-08-11 07:23:02.099809 UTC--
| Itration            | 22       |
| PAGAR Loss          | -206     |
| Real Det Return     | 1.03e+03 |
| Real Sto Return     | 946      |
| Reward Loss         | 1.65e+05 |
| Running Env Steps   | 110000   |
| Running Forward KL  | 20.2     |
| Running Reverse KL  | 53       |
| Running Update Time | 22       |
----------------------------------
--2024-08-11 07:26:44.238045 UTC--
| Itration            | 23       |
| PAGAR Loss          | 139      |
| Real Det Return     | 1.03e+03 |
| Real Sto Return     | 1.25e+03 |
| Reward Loss         | 3.7e+04  |
| Running Env Steps   | 115000   |
| Running Forward KL  | 19.3     |
| Running Reverse KL  | 13.1     |
| Running Update Time | 23       |
----------------------------------
--2024-08-11 07:30:24.369832 UTC---
| Itration            | 24        |
| PAGAR Loss          | -2.57     |
| Real Det Return     | 1.03e+03  |
| Real Sto Return     | 1.14e+03  |
| Reward Loss         | -2.14e+03 |
| Running Env Steps   | 120000    |
| Running Forward KL  | 18.2      |
| Running Reverse KL  | 24.6      |
| Running Update Time | 24        |
-----------------------------------
--2024-08-11 07:34:05.371337 UTC---
| Itration            | 25        |
| PAGAR Loss          | 3.99e+03  |
| Real Det Return     | 1.02e+03  |
| Real Sto Return     | 1.32e+03  |
| Reward Loss         | -6.04e+04 |
| Running Env Steps   | 125000    |
| Running Forward KL  | 18        |
| Running Reverse KL  | 12.3      |
| Running Update Time | 25        |
-----------------------------------
--2024-08-11 07:37:45.983690 UTC---
| Itration            | 26        |
| PAGAR Loss          | 192       |
| Real Det Return     | 1.04e+03  |
| Real Sto Return     | 1.2e+03   |
| Reward Loss         | -1.15e+05 |
| Running Env Steps   | 130000    |
| Running Forward KL  | 18.8      |
| Running Reverse KL  | 27.3      |
| Running Update Time | 26        |
-----------------------------------
--2024-08-11 07:41:23.027535 UTC--
| Itration            | 27       |
| PAGAR Loss          | -691     |
| Real Det Return     | 1.03e+03 |
| Real Sto Return     | 1.02e+03 |
| Reward Loss         | -1.9e+05 |
| Running Env Steps   | 135000   |
| Running Forward KL  | 19.9     |
| Running Reverse KL  | 25.7     |
| Running Update Time | 27       |
----------------------------------
--2024-08-11 07:45:02.259549 UTC---
| Itration            | 28        |
| PAGAR Loss          | 240       |
| Real Det Return     | 1.04e+03  |
| Real Sto Return     | 1.31e+03  |
| Reward Loss         | -1.66e+05 |
| Running Env Steps   | 140000    |
| Running Forward KL  | 18.6      |
| Running Reverse KL  | 12.8      |
| Running Update Time | 28        |
-----------------------------------
--2024-08-11 07:48:38.604696 UTC---
| Itration            | 29        |
| PAGAR Loss          | 145       |
| Real Det Return     | 1.04e+03  |
| Real Sto Return     | 1.11e+03  |
| Reward Loss         | -2.71e+05 |
| Running Env Steps   | 145000    |
| Running Forward KL  | 19.1      |
| Running Reverse KL  | 25.6      |
| Running Update Time | 29        |
-----------------------------------
--2024-08-11 07:52:16.770266 UTC---
| Itration            | 30        |
| PAGAR Loss          | 107       |
| Real Det Return     | 1.03e+03  |
| Real Sto Return     | 1.32e+03  |
| Reward Loss         | -1.94e+05 |
| Running Env Steps   | 150000    |
| Running Forward KL  | 17.7      |
| Running Reverse KL  | 26.3      |
| Running Update Time | 30        |
-----------------------------------
--2024-08-11 07:55:56.383968 UTC---
| Itration            | 31        |
| PAGAR Loss          | 2.43e+03  |
| Real Det Return     | 1.04e+03  |
| Real Sto Return     | 1.33e+03  |
| Reward Loss         | -1.94e+05 |
| Running Env Steps   | 155000    |
| Running Forward KL  | 17.5      |
| Running Reverse KL  | 12.1      |
| Running Update Time | 31        |
-----------------------------------
--2024-08-11 07:59:33.579731 UTC---
| Itration            | 32        |
| PAGAR Loss          | -203      |
| Real Det Return     | 1.04e+03  |
| Real Sto Return     | 1.33e+03  |
| Reward Loss         | -2.84e+05 |
| Running Env Steps   | 160000    |
| Running Forward KL  | 17.7      |
| Running Reverse KL  | 16.2      |
| Running Update Time | 32        |
-----------------------------------
--2024-08-11 08:03:14.082844 UTC---
| Itration            | 33        |
| PAGAR Loss          | 753       |
| Real Det Return     | 1.04e+03  |
| Real Sto Return     | 1.43e+03  |
| Reward Loss         | -2.89e+05 |
| Running Env Steps   | 165000    |
| Running Forward KL  | 17.6      |
| Running Reverse KL  | 12        |
| Running Update Time | 33        |
-----------------------------------
--2024-08-11 08:06:52.946948 UTC---
| Itration            | 34        |
| PAGAR Loss          | -30.1     |
| Real Det Return     | 1.04e+03  |
| Real Sto Return     | 1.63e+03  |
| Reward Loss         | -2.65e+05 |
| Running Env Steps   | 170000    |
| Running Forward KL  | 16.5      |
| Running Reverse KL  | 12        |
| Running Update Time | 34        |
-----------------------------------
--2024-08-11 08:10:34.474664 UTC---
| Itration            | 35        |
| PAGAR Loss          | 27.8      |
| Real Det Return     | 1.04e+03  |
| Real Sto Return     | 1.29e+03  |
| Reward Loss         | -4.17e+05 |
| Running Env Steps   | 175000    |
| Running Forward KL  | 17.5      |
| Running Reverse KL  | 26.8      |
| Running Update Time | 35        |
-----------------------------------
--2024-08-11 08:14:12.703716 UTC---
| Itration            | 36        |
| PAGAR Loss          | 232       |
| Real Det Return     | 1.04e+03  |
| Real Sto Return     | 1.3e+03   |
| Reward Loss         | -4.66e+05 |
| Running Env Steps   | 180000    |
| Running Forward KL  | 18.1      |
| Running Reverse KL  | 12.5      |
| Running Update Time | 36        |
-----------------------------------
--2024-08-11 08:17:52.850168 UTC---
| Itration            | 37        |
| PAGAR Loss          | 35        |
| Real Det Return     | 1.04e+03  |
| Real Sto Return     | 1.32e+03  |
| Reward Loss         | -4.84e+05 |
| Running Env Steps   | 185000    |
| Running Forward KL  | 18.5      |
| Running Reverse KL  | 12.5      |
| Running Update Time | 37        |
-----------------------------------
--2024-08-11 08:21:29.934368 UTC---
| Itration            | 38        |
| PAGAR Loss          | -270      |
| Real Det Return     | 1.04e+03  |
| Real Sto Return     | 1.48e+03  |
| Reward Loss         | -4.43e+05 |
| Running Env Steps   | 190000    |
| Running Forward KL  | 16.9      |
| Running Reverse KL  | 31.9      |
| Running Update Time | 38        |
-----------------------------------
--2024-08-11 08:25:11.713862 UTC---
| Itration            | 39        |
| PAGAR Loss          | -308      |
| Real Det Return     | 1.04e+03  |
| Real Sto Return     | 1.58e+03  |
| Reward Loss         | -4.52e+05 |
| Running Env Steps   | 195000    |
| Running Forward KL  | 16.7      |
| Running Reverse KL  | 11.5      |
| Running Update Time | 39        |
-----------------------------------
--2024-08-11 08:28:50.429997 UTC---
| Itration            | 40        |
| PAGAR Loss          | -126      |
| Real Det Return     | 1.04e+03  |
| Real Sto Return     | 1.4e+03   |
| Reward Loss         | -5.76e+05 |
| Running Env Steps   | 200000    |
| Running Forward KL  | 17.7      |
| Running Reverse KL  | 11.9      |
| Running Update Time | 40        |
-----------------------------------
--2024-08-11 08:32:30.883098 UTC---
| Itration            | 41        |
| PAGAR Loss          | -25.3     |
| Real Det Return     | 1.04e+03  |
| Real Sto Return     | 1.41e+03  |
| Reward Loss         | -5.46e+05 |
| Running Env Steps   | 205000    |
| Running Forward KL  | 17.7      |
| Running Reverse KL  | 11.9      |
| Running Update Time | 41        |
-----------------------------------
--2024-08-11 08:36:12.690978 UTC---
| Itration            | 42        |
| PAGAR Loss          | -296      |
| Real Det Return     | 1.06e+03  |
| Real Sto Return     | 1.47e+03  |
| Reward Loss         | -5.56e+05 |
| Running Env Steps   | 210000    |
| Running Forward KL  | 16.5      |
| Running Reverse KL  | 11.6      |
| Running Update Time | 42        |
-----------------------------------
--2024-08-11 08:39:50.548742 UTC---
| Itration            | 43        |
| PAGAR Loss          | -630      |
| Real Det Return     | 1.05e+03  |
| Real Sto Return     | 1.58e+03  |
| Reward Loss         | -5.45e+05 |
| Running Env Steps   | 215000    |
| Running Forward KL  | 16.5      |
| Running Reverse KL  | 11.5      |
| Running Update Time | 43        |
-----------------------------------
--2024-08-11 08:43:31.707054 UTC---
| Itration            | 44        |
| PAGAR Loss          | 187       |
| Real Det Return     | 1.04e+03  |
| Real Sto Return     | 1.33e+03  |
| Reward Loss         | -6.81e+05 |
| Running Env Steps   | 220000    |
| Running Forward KL  | 17.1      |
| Running Reverse KL  | 11        |
| Running Update Time | 44        |
-----------------------------------
--2024-08-11 08:47:10.770164 UTC---
| Itration            | 45        |
| PAGAR Loss          | -6.33     |
| Real Det Return     | 1.05e+03  |
| Real Sto Return     | 1.61e+03  |
| Reward Loss         | -6.17e+05 |
| Running Env Steps   | 225000    |
| Running Forward KL  | 16.6      |
| Running Reverse KL  | 11        |
| Running Update Time | 45        |
-----------------------------------
--2024-08-11 08:50:55.604942 UTC---
| Itration            | 46        |
| PAGAR Loss          | 2.92e+03  |
| Real Det Return     | 1.05e+03  |
| Real Sto Return     | 1.65e+03  |
| Reward Loss         | -6.13e+05 |
| Running Env Steps   | 230000    |
| Running Forward KL  | 15.6      |
| Running Reverse KL  | 13.1      |
| Running Update Time | 46        |
-----------------------------------
--2024-08-11 08:54:36.815691 UTC---
| Itration            | 47        |
| PAGAR Loss          | -2.35e+05 |
| Real Det Return     | 1.12e+03  |
| Real Sto Return     | 1.47e+03  |
| Reward Loss         | -7.84e+05 |
| Running Env Steps   | 235000    |
| Running Forward KL  | 16.9      |
| Running Reverse KL  | 21.8      |
| Running Update Time | 47        |
-----------------------------------
--2024-08-11 08:58:18.442518 UTC---
| Itration            | 48        |
| PAGAR Loss          | -129      |
| Real Det Return     | 1.38e+03  |
| Real Sto Return     | 1.79e+03  |
| Reward Loss         | -5.91e+05 |
| Running Env Steps   | 240000    |
| Running Forward KL  | 15.1      |
| Running Reverse KL  | 10.7      |
| Running Update Time | 48        |
-----------------------------------
--2024-08-11 09:02:00.877885 UTC---
| Itration            | 49        |
| PAGAR Loss          | -356      |
| Real Det Return     | 1.05e+03  |
| Real Sto Return     | 1.7e+03   |
| Reward Loss         | -6.36e+05 |
| Running Env Steps   | 245000    |
| Running Forward KL  | 15.5      |
| Running Reverse KL  | 10.4      |
| Running Update Time | 49        |
-----------------------------------
--2024-08-11 09:05:38.019291 UTC---
| Itration            | 50        |
| PAGAR Loss          | -316      |
| Real Det Return     | 1.05e+03  |
| Real Sto Return     | 1.64e+03  |
| Reward Loss         | -6.94e+05 |
| Running Env Steps   | 250000    |
| Running Forward KL  | 15.9      |
| Running Reverse KL  | 11        |
| Running Update Time | 50        |
-----------------------------------
--2024-08-11 09:09:17.842644 UTC---
| Itration            | 51        |
| PAGAR Loss          | -6.52e+04 |
| Real Det Return     | 1.04e+03  |
| Real Sto Return     | 1.56e+03  |
| Reward Loss         | -8.39e+05 |
| Running Env Steps   | 255000    |
| Running Forward KL  | 16.5      |
| Running Reverse KL  | 32.7      |
| Running Update Time | 51        |
-----------------------------------
--2024-08-11 09:12:58.708889 UTC---
| Itration            | 52        |
| PAGAR Loss          | -359      |
| Real Det Return     | 1.31e+03  |
| Real Sto Return     | 1.66e+03  |
| Reward Loss         | -7.25e+05 |
| Running Env Steps   | 260000    |
| Running Forward KL  | 15.3      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 52        |
-----------------------------------
--2024-08-11 09:16:41.469638 UTC---
| Itration            | 53        |
| PAGAR Loss          | -212      |
| Real Det Return     | 1.26e+03  |
| Real Sto Return     | 1.72e+03  |
| Reward Loss         | -7.12e+05 |
| Running Env Steps   | 265000    |
| Running Forward KL  | 15.1      |
| Running Reverse KL  | 25.3      |
| Running Update Time | 53        |
-----------------------------------
--2024-08-11 09:20:21.946445 UTC---
| Itration            | 54        |
| PAGAR Loss          | 162       |
| Real Det Return     | 1.45e+03  |
| Real Sto Return     | 1.69e+03  |
| Reward Loss         | -8.18e+05 |
| Running Env Steps   | 270000    |
| Running Forward KL  | 15.9      |
| Running Reverse KL  | 10.9      |
| Running Update Time | 54        |
-----------------------------------
--2024-08-11 09:24:03.878886 UTC---
| Itration            | 55        |
| PAGAR Loss          | -406      |
| Real Det Return     | 1.42e+03  |
| Real Sto Return     | 1.72e+03  |
| Reward Loss         | -8.09e+05 |
| Running Env Steps   | 275000    |
| Running Forward KL  | 15.8      |
| Running Reverse KL  | 10.8      |
| Running Update Time | 55        |
-----------------------------------
--2024-08-11 09:27:42.807536 UTC--
| Itration            | 56       |
| PAGAR Loss          | -176     |
| Real Det Return     | 1.7e+03  |
| Real Sto Return     | 1.81e+03 |
| Reward Loss         | -7.9e+05 |
| Running Env Steps   | 280000   |
| Running Forward KL  | 15.1     |
| Running Reverse KL  | 10.4     |
| Running Update Time | 56       |
----------------------------------
--2024-08-11 09:31:25.435651 UTC---
| Itration            | 57        |
| PAGAR Loss          | 166       |
| Real Det Return     | 1.67e+03  |
| Real Sto Return     | 1.85e+03  |
| Reward Loss         | -7.61e+05 |
| Running Env Steps   | 285000    |
| Running Forward KL  | 14.5      |
| Running Reverse KL  | 13.3      |
| Running Update Time | 57        |
-----------------------------------
--2024-08-11 09:35:02.923912 UTC---
| Itration            | 58        |
| PAGAR Loss          | -242      |
| Real Det Return     | 1.57e+03  |
| Real Sto Return     | 1.74e+03  |
| Reward Loss         | -8.73e+05 |
| Running Env Steps   | 290000    |
| Running Forward KL  | 15.8      |
| Running Reverse KL  | 10.8      |
| Running Update Time | 58        |
-----------------------------------
--2024-08-11 09:38:42.651807 UTC---
| Itration            | 59        |
| PAGAR Loss          | 204       |
| Real Det Return     | 1.7e+03   |
| Real Sto Return     | 1.72e+03  |
| Reward Loss         | -8.05e+05 |
| Running Env Steps   | 295000    |
| Running Forward KL  | 15        |
| Running Reverse KL  | 11        |
| Running Update Time | 59        |
-----------------------------------
--2024-08-11 09:42:19.791348 UTC---
| Itration            | 60        |
| PAGAR Loss          | 486       |
| Real Det Return     | 1.72e+03  |
| Real Sto Return     | 1.73e+03  |
| Reward Loss         | -8.14e+05 |
| Running Env Steps   | 300000    |
| Running Forward KL  | 14.7      |
| Running Reverse KL  | 10.8      |
| Running Update Time | 60        |
-----------------------------------
--2024-08-11 09:45:58.238978 UTC---
| Itration            | 61        |
| PAGAR Loss          | -347      |
| Real Det Return     | 1.88e+03  |
| Real Sto Return     | 1.79e+03  |
| Reward Loss         | -7.84e+05 |
| Running Env Steps   | 305000    |
| Running Forward KL  | 14.7      |
| Running Reverse KL  | 17.6      |
| Running Update Time | 61        |
-----------------------------------
--2024-08-11 09:49:40.858117 UTC---
| Itration            | 62        |
| PAGAR Loss          | 312       |
| Real Det Return     | 1.83e+03  |
| Real Sto Return     | 1.97e+03  |
| Reward Loss         | -8.06e+05 |
| Running Env Steps   | 310000    |
| Running Forward KL  | 14.6      |
| Running Reverse KL  | 10.5      |
| Running Update Time | 62        |
-----------------------------------
--2024-08-11 09:53:23.178977 UTC---
| Itration            | 63        |
| PAGAR Loss          | -100      |
| Real Det Return     | 1.91e+03  |
| Real Sto Return     | 1.99e+03  |
| Reward Loss         | -8.33e+05 |
| Running Env Steps   | 315000    |
| Running Forward KL  | 14.5      |
| Running Reverse KL  | 13.4      |
| Running Update Time | 63        |
-----------------------------------
--2024-08-11 09:57:01.995551 UTC---
| Itration            | 64        |
| PAGAR Loss          | -244      |
| Real Det Return     | 1.97e+03  |
| Real Sto Return     | 2.11e+03  |
| Reward Loss         | -7.98e+05 |
| Running Env Steps   | 320000    |
| Running Forward KL  | 13.8      |
| Running Reverse KL  | 9.97      |
| Running Update Time | 64        |
-----------------------------------
--2024-08-11 10:00:42.867830 UTC---
| Itration            | 65        |
| PAGAR Loss          | -241      |
| Real Det Return     | 2.07e+03  |
| Real Sto Return     | 1.99e+03  |
| Reward Loss         | -7.48e+05 |
| Running Env Steps   | 325000    |
| Running Forward KL  | 14        |
| Running Reverse KL  | 10.5      |
| Running Update Time | 65        |
-----------------------------------
--2024-08-11 10:04:20.250149 UTC--
| Itration            | 66       |
| PAGAR Loss          | -10.2    |
| Real Det Return     | 1.87e+03 |
| Real Sto Return     | 1.84e+03 |
| Reward Loss         | -6.6e+05 |
| Running Env Steps   | 330000   |
| Running Forward KL  | 14.3     |
| Running Reverse KL  | 42.6     |
| Running Update Time | 66       |
----------------------------------
--2024-08-11 10:08:02.030692 UTC--
| Itration            | 67       |
| PAGAR Loss          | 826      |
| Real Det Return     | 1.83e+03 |
| Real Sto Return     | 2.04e+03 |
| Reward Loss         | -8.7e+05 |
| Running Env Steps   | 335000   |
| Running Forward KL  | 14.4     |
| Running Reverse KL  | 10.8     |
| Running Update Time | 67       |
----------------------------------
--2024-08-11 10:11:42.535568 UTC---
| Itration            | 68        |
| PAGAR Loss          | -241      |
| Real Det Return     | 1.77e+03  |
| Real Sto Return     | 1.93e+03  |
| Reward Loss         | -8.01e+05 |
| Running Env Steps   | 340000    |
| Running Forward KL  | 14.5      |
| Running Reverse KL  | 11.2      |
| Running Update Time | 68        |
-----------------------------------
--2024-08-11 10:15:19.752335 UTC---
| Itration            | 69        |
| PAGAR Loss          | -3.43e+04 |
| Real Det Return     | 2e+03     |
| Real Sto Return     | 1.92e+03  |
| Reward Loss         | -9.59e+05 |
| Running Env Steps   | 345000    |
| Running Forward KL  | 13.9      |
| Running Reverse KL  | 27.9      |
| Running Update Time | 69        |
-----------------------------------
--2024-08-11 10:18:57.816089 UTC---
| Itration            | 70        |
| PAGAR Loss          | -1.83e+03 |
| Real Det Return     | 2.02e+03  |
| Real Sto Return     | 1.77e+03  |
| Reward Loss         | -8.96e+05 |
| Running Env Steps   | 350000    |
| Running Forward KL  | 13.7      |
| Running Reverse KL  | 25.7      |
| Running Update Time | 70        |
-----------------------------------
--2024-08-11 10:22:33.229410 UTC---
| Itration            | 71        |
| PAGAR Loss          | -2.42e+05 |
| Real Det Return     | 1.85e+03  |
| Real Sto Return     | 1.9e+03   |
| Reward Loss         | -1.04e+06 |
| Running Env Steps   | 355000    |
| Running Forward KL  | 14.6      |
| Running Reverse KL  | 41        |
| Running Update Time | 71        |
-----------------------------------
--2024-08-11 10:26:15.620384 UTC---
| Itration            | 72        |
| PAGAR Loss          | 959       |
| Real Det Return     | 2e+03     |
| Real Sto Return     | 2.17e+03  |
| Reward Loss         | -7.76e+05 |
| Running Env Steps   | 360000    |
| Running Forward KL  | 13.7      |
| Running Reverse KL  | 11.3      |
| Running Update Time | 72        |
-----------------------------------
--2024-08-11 10:29:52.530146 UTC---
| Itration            | 73        |
| PAGAR Loss          | 171       |
| Real Det Return     | 1.95e+03  |
| Real Sto Return     | 2.09e+03  |
| Reward Loss         | -8.07e+05 |
| Running Env Steps   | 365000    |
| Running Forward KL  | 13.7      |
| Running Reverse KL  | 10.7      |
| Running Update Time | 73        |
-----------------------------------
--2024-08-11 10:33:35.457953 UTC---
| Itration            | 74        |
| PAGAR Loss          | -571      |
| Real Det Return     | 1.97e+03  |
| Real Sto Return     | 2.11e+03  |
| Reward Loss         | -8.17e+05 |
| Running Env Steps   | 370000    |
| Running Forward KL  | 13.6      |
| Running Reverse KL  | 29.5      |
| Running Update Time | 74        |
-----------------------------------
--2024-08-11 10:37:14.060354 UTC---
| Itration            | 75        |
| PAGAR Loss          | -44.6     |
| Real Det Return     | 2.09e+03  |
| Real Sto Return     | 2.05e+03  |
| Reward Loss         | -8.75e+05 |
| Running Env Steps   | 375000    |
| Running Forward KL  | 13.5      |
| Running Reverse KL  | 19.8      |
| Running Update Time | 75        |
-----------------------------------
--2024-08-11 10:40:55.165708 UTC---
| Itration            | 76        |
| PAGAR Loss          | -2.97e+03 |
| Real Det Return     | 2.04e+03  |
| Real Sto Return     | 2.2e+03   |
| Reward Loss         | -9.48e+05 |
| Running Env Steps   | 380000    |
| Running Forward KL  | 13.9      |
| Running Reverse KL  | 18.8      |
| Running Update Time | 76        |
-----------------------------------
--2024-08-11 10:44:37.522795 UTC---
| Itration            | 77        |
| PAGAR Loss          | -1.6e+03  |
| Real Det Return     | 1.92e+03  |
| Real Sto Return     | 2.04e+03  |
| Reward Loss         | -9.81e+05 |
| Running Env Steps   | 385000    |
| Running Forward KL  | 14.2      |
| Running Reverse KL  | 11.1      |
| Running Update Time | 77        |
-----------------------------------
--2024-08-11 10:48:13.918285 UTC---
| Itration            | 78        |
| PAGAR Loss          | -138      |
| Real Det Return     | 2.2e+03   |
| Real Sto Return     | 2.17e+03  |
| Reward Loss         | -8.07e+05 |
| Running Env Steps   | 390000    |
| Running Forward KL  | 12.9      |
| Running Reverse KL  | 38.6      |
| Running Update Time | 78        |
-----------------------------------
--2024-08-11 10:51:55.941658 UTC---
| Itration            | 79        |
| PAGAR Loss          | -21.4     |
| Real Det Return     | 2.17e+03  |
| Real Sto Return     | 2.26e+03  |
| Reward Loss         | -6.04e+05 |
| Running Env Steps   | 395000    |
| Running Forward KL  | 12.4      |
| Running Reverse KL  | 27.5      |
| Running Update Time | 79        |
-----------------------------------
--2024-08-11 10:55:36.249869 UTC---
| Itration            | 80        |
| PAGAR Loss          | 49        |
| Real Det Return     | 2.18e+03  |
| Real Sto Return     | 2.29e+03  |
| Reward Loss         | -8.62e+05 |
| Running Env Steps   | 400000    |
| Running Forward KL  | 13        |
| Running Reverse KL  | 10.7      |
| Running Update Time | 80        |
-----------------------------------
--2024-08-11 10:59:18.017405 UTC---
| Itration            | 81        |
| PAGAR Loss          | -445      |
| Real Det Return     | 2.22e+03  |
| Real Sto Return     | 2.31e+03  |
| Reward Loss         | -6.82e+05 |
| Running Env Steps   | 405000    |
| Running Forward KL  | 12.4      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 81        |
-----------------------------------
--2024-08-11 11:02:58.945019 UTC---
| Itration            | 82        |
| PAGAR Loss          | 5.93      |
| Real Det Return     | 2.42e+03  |
| Real Sto Return     | 2.43e+03  |
| Reward Loss         | -5.96e+05 |
| Running Env Steps   | 410000    |
| Running Forward KL  | 11.3      |
| Running Reverse KL  | 9.69      |
| Running Update Time | 82        |
-----------------------------------
--2024-08-11 11:06:40.124363 UTC---
| Itration            | 83        |
| PAGAR Loss          | 678       |
| Real Det Return     | 2.36e+03  |
| Real Sto Return     | 2.47e+03  |
| Reward Loss         | -7.39e+05 |
| Running Env Steps   | 415000    |
| Running Forward KL  | 12.4      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 83        |
-----------------------------------
--2024-08-11 11:10:18.351717 UTC---
| Itration            | 84        |
| PAGAR Loss          | -804      |
| Real Det Return     | 2.55e+03  |
| Real Sto Return     | 2.26e+03  |
| Reward Loss         | -4.01e+05 |
| Running Env Steps   | 420000    |
| Running Forward KL  | 11.4      |
| Running Reverse KL  | 33.2      |
| Running Update Time | 84        |
-----------------------------------
--2024-08-11 11:13:56.533478 UTC---
| Itration            | 85        |
| PAGAR Loss          | 3.75      |
| Real Det Return     | 2.63e+03  |
| Real Sto Return     | 2.39e+03  |
| Reward Loss         | -6.45e+05 |
| Running Env Steps   | 425000    |
| Running Forward KL  | 11.7      |
| Running Reverse KL  | 10.4      |
| Running Update Time | 85        |
-----------------------------------
--2024-08-11 11:17:34.677506 UTC---
| Itration            | 86        |
| PAGAR Loss          | 18.3      |
| Real Det Return     | 2.33e+03  |
| Real Sto Return     | 2.29e+03  |
| Reward Loss         | -6.92e+05 |
| Running Env Steps   | 430000    |
| Running Forward KL  | 12.1      |
| Running Reverse KL  | 29.3      |
| Running Update Time | 86        |
-----------------------------------
--2024-08-11 11:21:13.416307 UTC---
| Itration            | 87        |
| PAGAR Loss          | 544       |
| Real Det Return     | 2.44e+03  |
| Real Sto Return     | 2.36e+03  |
| Reward Loss         | -6.48e+05 |
| Running Env Steps   | 435000    |
| Running Forward KL  | 11.4      |
| Running Reverse KL  | 9.83      |
| Running Update Time | 87        |
-----------------------------------
--2024-08-11 11:24:52.959735 UTC---
| Itration            | 88        |
| PAGAR Loss          | 272       |
| Real Det Return     | 2.46e+03  |
| Real Sto Return     | 2.42e+03  |
| Reward Loss         | -7.06e+05 |
| Running Env Steps   | 440000    |
| Running Forward KL  | 11.2      |
| Running Reverse KL  | 9.9       |
| Running Update Time | 88        |
-----------------------------------
--2024-08-11 11:28:31.224462 UTC---
| Itration            | 89        |
| PAGAR Loss          | 189       |
| Real Det Return     | 2.42e+03  |
| Real Sto Return     | 2.23e+03  |
| Reward Loss         | -6.26e+05 |
| Running Env Steps   | 445000    |
| Running Forward KL  | 11.3      |
| Running Reverse KL  | 10.1      |
| Running Update Time | 89        |
-----------------------------------
--2024-08-11 11:32:12.760385 UTC---
| Itration            | 90        |
| PAGAR Loss          | 315       |
| Real Det Return     | 2.6e+03   |
| Real Sto Return     | 2.44e+03  |
| Reward Loss         | -6.52e+05 |
| Running Env Steps   | 450000    |
| Running Forward KL  | 11.5      |
| Running Reverse KL  | 11        |
| Running Update Time | 90        |
-----------------------------------
--2024-08-11 11:35:54.793793 UTC---
| Itration            | 91        |
| PAGAR Loss          | -85.6     |
| Real Det Return     | 2.48e+03  |
| Real Sto Return     | 2.56e+03  |
| Reward Loss         | -6.41e+05 |
| Running Env Steps   | 455000    |
| Running Forward KL  | 11        |
| Running Reverse KL  | 10.3      |
| Running Update Time | 91        |
-----------------------------------
--2024-08-11 11:39:35.563161 UTC---
| Itration            | 92        |
| PAGAR Loss          | 377       |
| Real Det Return     | 2.37e+03  |
| Real Sto Return     | 2.35e+03  |
| Reward Loss         | -7.07e+05 |
| Running Env Steps   | 460000    |
| Running Forward KL  | 11.4      |
| Running Reverse KL  | 10.4      |
| Running Update Time | 92        |
-----------------------------------
--2024-08-11 11:43:17.967938 UTC---
| Itration            | 93        |
| PAGAR Loss          | 8.79      |
| Real Det Return     | 2.47e+03  |
| Real Sto Return     | 2.64e+03  |
| Reward Loss         | -5.49e+05 |
| Running Env Steps   | 465000    |
| Running Forward KL  | 11.1      |
| Running Reverse KL  | 10.6      |
| Running Update Time | 93        |
-----------------------------------
--2024-08-11 11:46:58.290715 UTC---
| Itration            | 94        |
| PAGAR Loss          | -1.04e+03 |
| Real Det Return     | 2.53e+03  |
| Real Sto Return     | 2.51e+03  |
| Reward Loss         | -5.24e+05 |
| Running Env Steps   | 470000    |
| Running Forward KL  | 11.4      |
| Running Reverse KL  | 37.9      |
| Running Update Time | 94        |
-----------------------------------
--2024-08-11 11:50:37.284928 UTC---
| Itration            | 95        |
| PAGAR Loss          | 2.18e+03  |
| Real Det Return     | 2.68e+03  |
| Real Sto Return     | 2.47e+03  |
| Reward Loss         | -4.21e+05 |
| Running Env Steps   | 475000    |
| Running Forward KL  | 10.4      |
| Running Reverse KL  | 28.8      |
| Running Update Time | 95        |
-----------------------------------
--2024-08-11 11:54:18.777635 UTC--
| Itration            | 96       |
| PAGAR Loss          | 1.41e+03 |
| Real Det Return     | 2.47e+03 |
| Real Sto Return     | 2.68e+03 |
| Reward Loss         | -5.5e+05 |
| Running Env Steps   | 480000   |
| Running Forward KL  | 10.8     |
| Running Reverse KL  | 12.5     |
| Running Update Time | 96       |
----------------------------------
--2024-08-11 11:57:55.492184 UTC---
| Itration            | 97        |
| PAGAR Loss          | 2.79e+03  |
| Real Det Return     | 2.55e+03  |
| Real Sto Return     | 2.22e+03  |
| Reward Loss         | -4.49e+05 |
| Running Env Steps   | 485000    |
| Running Forward KL  | 11.4      |
| Running Reverse KL  | 32.9      |
| Running Update Time | 97        |
-----------------------------------
--2024-08-11 12:01:35.340540 UTC---
| Itration            | 98        |
| PAGAR Loss          | 2.37e+03  |
| Real Det Return     | 2.6e+03   |
| Real Sto Return     | 2.64e+03  |
| Reward Loss         | -5.92e+05 |
| Running Env Steps   | 490000    |
| Running Forward KL  | 11.5      |
| Running Reverse KL  | 10.8      |
| Running Update Time | 98        |
-----------------------------------
--2024-08-11 12:05:12.824429 UTC---
| Itration            | 99        |
| PAGAR Loss          | -645      |
| Real Det Return     | 2.55e+03  |
| Real Sto Return     | 2.46e+03  |
| Reward Loss         | -6.11e+05 |
| Running Env Steps   | 495000    |
| Running Forward KL  | 11.5      |
| Running Reverse KL  | 19.9      |
| Running Update Time | 99        |
-----------------------------------
--2024-08-11 12:08:53.078282 UTC---
| Itration            | 100       |
| PAGAR Loss          | -1.45e+03 |
| Real Det Return     | 2.76e+03  |
| Real Sto Return     | 2.58e+03  |
| Reward Loss         | -4.66e+05 |
| Running Env Steps   | 500000    |
| Running Forward KL  | 10.7      |
| Running Reverse KL  | 27.5      |
| Running Update Time | 100       |
-----------------------------------
--2024-08-11 12:12:30.671936 UTC---
| Itration            | 101       |
| PAGAR Loss          | 1.38e+03  |
| Real Det Return     | 2.97e+03  |
| Real Sto Return     | 2.92e+03  |
| Reward Loss         | -4.39e+05 |
| Running Env Steps   | 505000    |
| Running Forward KL  | 8.84      |
| Running Reverse KL  | 8.54      |
| Running Update Time | 101       |
-----------------------------------
--2024-08-11 12:16:10.115151 UTC---
| Itration            | 102       |
| PAGAR Loss          | nan       |
| Real Det Return     | 2.75e+03  |
| Real Sto Return     | 2.62e+03  |
| Reward Loss         | -2.05e+05 |
| Running Env Steps   | 510000    |
| Running Forward KL  | 10.4      |
| Running Reverse KL  | 31.7      |
| Running Update Time | 102       |
-----------------------------------
--2024-08-11 12:19:46.419421 UTC---
| Itration            | 103       |
| PAGAR Loss          | 749       |
| Real Det Return     | 2.83e+03  |
| Real Sto Return     | 2.65e+03  |
| Reward Loss         | -3.54e+05 |
| Running Env Steps   | 515000    |
| Running Forward KL  | 9.3       |
| Running Reverse KL  | 9.23      |
| Running Update Time | 103       |
-----------------------------------
--2024-08-11 12:23:23.366739 UTC---
| Itration            | 104       |
| PAGAR Loss          | -1.01e+03 |
| Real Det Return     | 2.75e+03  |
| Real Sto Return     | 2.48e+03  |
| Reward Loss         | -4.19e+05 |
| Running Env Steps   | 520000    |
| Running Forward KL  | 9.6       |
| Running Reverse KL  | 39.9      |
| Running Update Time | 104       |
-----------------------------------
--2024-08-11 12:26:55.702960 UTC--
| Itration            | 105      |
| PAGAR Loss          | -4.2e+03 |
| Real Det Return     | 2.88e+03 |
| Real Sto Return     | 2.47e+03 |
| Reward Loss         | -1e+05   |
| Running Env Steps   | 525000   |
| Running Forward KL  | 9.4      |
| Running Reverse KL  | 70.6     |
| Running Update Time | 105      |
----------------------------------
--2024-08-11 12:30:31.383642 UTC---
| Itration            | 106       |
| PAGAR Loss          | -3.57e+04 |
| Real Det Return     | 2.82e+03  |
| Real Sto Return     | 2.51e+03  |
| Reward Loss         | -5.73e+05 |
| Running Env Steps   | 530000    |
| Running Forward KL  | 9.25      |
| Running Reverse KL  | 97.1      |
| Running Update Time | 106       |
-----------------------------------
--2024-08-11 12:33:59.393326 UTC---
| Itration            | 107       |
| PAGAR Loss          | 6.97e+03  |
| Real Det Return     | 2.66e+03  |
| Real Sto Return     | 1.86e+03  |
| Reward Loss         | -1.44e+05 |
| Running Env Steps   | 535000    |
| Running Forward KL  | 10.4      |
| Running Reverse KL  | 68.7      |
| Running Update Time | 107       |
-----------------------------------
--2024-08-11 12:37:33.930168 UTC---
| Itration            | 108       |
| PAGAR Loss          | 1.77e+03  |
| Real Det Return     | 2.97e+03  |
| Real Sto Return     | 2.5e+03   |
| Reward Loss         | -7.25e+04 |
| Running Env Steps   | 540000    |
| Running Forward KL  | 8.6       |
| Running Reverse KL  | 49        |
| Running Update Time | 108       |
-----------------------------------
--2024-08-11 12:41:13.394446 UTC---
| Itration            | 109       |
| PAGAR Loss          | 1.16e+03  |
| Real Det Return     | 2.82e+03  |
| Real Sto Return     | 2.8e+03   |
| Reward Loss         | -4.07e+05 |
| Running Env Steps   | 545000    |
| Running Forward KL  | 9.39      |
| Running Reverse KL  | 9.69      |
| Running Update Time | 109       |
-----------------------------------
--2024-08-11 12:44:50.202466 UTC---
| Itration            | 110       |
| PAGAR Loss          | 7e+03     |
| Real Det Return     | 3e+03     |
| Real Sto Return     | 2.74e+03  |
| Reward Loss         | -2.77e+05 |
| Running Env Steps   | 550000    |
| Running Forward KL  | 8.43      |
| Running Reverse KL  | 8.85      |
| Running Update Time | 110       |
-----------------------------------
--2024-08-11 12:48:30.483052 UTC---
| Itration            | 111       |
| PAGAR Loss          | 4.09e+03  |
| Real Det Return     | 2.87e+03  |
| Real Sto Return     | 2.64e+03  |
| Reward Loss         | -3.02e+05 |
| Running Env Steps   | 555000    |
| Running Forward KL  | 8.89      |
| Running Reverse KL  | 9.2       |
| Running Update Time | 111       |
-----------------------------------
--2024-08-11 12:52:09.177447 UTC---
| Itration            | 112       |
| PAGAR Loss          | 1.35e+03  |
| Real Det Return     | 2.82e+03  |
| Real Sto Return     | 2.84e+03  |
| Reward Loss         | -4.91e+05 |
| Running Env Steps   | 560000    |
| Running Forward KL  | 9.46      |
| Running Reverse KL  | 50.4      |
| Running Update Time | 112       |
-----------------------------------
--2024-08-11 12:55:31.229595 UTC---
| Itration            | 113       |
| PAGAR Loss          | -4.49e+03 |
| Real Det Return     | 2.96e+03  |
| Real Sto Return     | 1.24e+03  |
| Reward Loss         | -3.08e+05 |
| Running Env Steps   | 565000    |
| Running Forward KL  | 11.2      |
| Running Reverse KL  | 175       |
| Running Update Time | 113       |
-----------------------------------
--2024-08-11 12:59:07.381727 UTC---
| Itration            | 114       |
| PAGAR Loss          | -1.53e+03 |
| Real Det Return     | 2.87e+03  |
| Real Sto Return     | 2.69e+03  |
| Reward Loss         | -3.87e+05 |
| Running Env Steps   | 570000    |
| Running Forward KL  | 9.26      |
| Running Reverse KL  | 29.5      |
| Running Update Time | 114       |
-----------------------------------
--2024-08-11 13:02:47.119717 UTC---
| Itration            | 115       |
| PAGAR Loss          | -1.42e+03 |
| Real Det Return     | 3.01e+03  |
| Real Sto Return     | 2.72e+03  |
| Reward Loss         | -2.14e+05 |
| Running Env Steps   | 575000    |
| Running Forward KL  | 8.93      |
| Running Reverse KL  | 23.5      |
| Running Update Time | 115       |
-----------------------------------
--2024-08-11 13:06:19.688408 UTC---
| Itration            | 116       |
| PAGAR Loss          | -6.58e+05 |
| Real Det Return     | 2.97e+03  |
| Real Sto Return     | 2.39e+03  |
| Reward Loss         | -3.68e+05 |
| Running Env Steps   | 580000    |
| Running Forward KL  | 9.94      |
| Running Reverse KL  | 129       |
| Running Update Time | 116       |
-----------------------------------
--2024-08-11 13:09:56.983042 UTC---
| Itration            | 117       |
| PAGAR Loss          | -2.96e+03 |
| Real Det Return     | 3.03e+03  |
| Real Sto Return     | 2.85e+03  |
| Reward Loss         | -2.16e+05 |
| Running Env Steps   | 585000    |
| Running Forward KL  | 7.96      |
| Running Reverse KL  | 22.9      |
| Running Update Time | 117       |
-----------------------------------
--2024-08-11 13:13:34.320503 UTC---
| Itration            | 118       |
| PAGAR Loss          | 960       |
| Real Det Return     | 3.02e+03  |
| Real Sto Return     | 2.87e+03  |
| Reward Loss         | -2.62e+05 |
| Running Env Steps   | 590000    |
| Running Forward KL  | 8.43      |
| Running Reverse KL  | 8.86      |
| Running Update Time | 118       |
-----------------------------------
--2024-08-11 13:17:07.305705 UTC---
| Itration            | 119       |
| PAGAR Loss          | -3.17e+03 |
| Real Det Return     | 2.91e+03  |
| Real Sto Return     | 2.34e+03  |
| Reward Loss         | 3.08e+04  |
| Running Env Steps   | 595000    |
| Running Forward KL  | 8.64      |
| Running Reverse KL  | 52.4      |
| Running Update Time | 119       |
-----------------------------------
--2024-08-11 13:20:45.750731 UTC---
| Itration            | 120       |
| PAGAR Loss          | 3.51e+03  |
| Real Det Return     | 2.95e+03  |
| Real Sto Return     | 2.84e+03  |
| Reward Loss         | -4.07e+05 |
| Running Env Steps   | 600000    |
| Running Forward KL  | 7.84      |
| Running Reverse KL  | 8.07      |
| Running Update Time | 120       |
-----------------------------------
--2024-08-11 13:24:25.053072 UTC---
| Itration            | 121       |
| PAGAR Loss          | -170      |
| Real Det Return     | 2.95e+03  |
| Real Sto Return     | 2.89e+03  |
| Reward Loss         | -4.58e+05 |
| Running Env Steps   | 605000    |
| Running Forward KL  | 8.26      |
| Running Reverse KL  | 8.5       |
| Running Update Time | 121       |
-----------------------------------
--2024-08-11 13:27:59.742312 UTC---
| Itration            | 122       |
| PAGAR Loss          | 2.95e+03  |
| Real Det Return     | 2.95e+03  |
| Real Sto Return     | 2.94e+03  |
| Reward Loss         | -4.26e+05 |
| Running Env Steps   | 610000    |
| Running Forward KL  | 8.94      |
| Running Reverse KL  | 9.77      |
| Running Update Time | 122       |
-----------------------------------
--2024-08-11 13:30:53.788395 UTC---
| Itration            | 123       |
| PAGAR Loss          | -636      |
| Real Det Return     | 3e+03     |
| Real Sto Return     | 2.68e+03  |
| Reward Loss         | -2.01e+05 |
| Running Env Steps   | 615000    |
| Running Forward KL  | 7.82      |
| Running Reverse KL  | 48.3      |
| Running Update Time | 123       |
-----------------------------------
--2024-08-11 13:33:53.104696 UTC---
| Itration            | 124       |
| PAGAR Loss          | 1.09e+03  |
| Real Det Return     | 2.99e+03  |
| Real Sto Return     | 2.99e+03  |
| Reward Loss         | -3.93e+05 |
| Running Env Steps   | 620000    |
| Running Forward KL  | 7.84      |
| Running Reverse KL  | 30.3      |
| Running Update Time | 124       |
-----------------------------------
--2024-08-11 13:36:53.971758 UTC---
| Itration            | 125       |
| PAGAR Loss          | 1.23e+03  |
| Real Det Return     | 2.99e+03  |
| Real Sto Return     | 2.97e+03  |
| Reward Loss         | -3.56e+05 |
| Running Env Steps   | 625000    |
| Running Forward KL  | 8.16      |
| Running Reverse KL  | 8.79      |
| Running Update Time | 125       |
-----------------------------------
--2024-08-11 13:39:52.732482 UTC---
| Itration            | 126       |
| PAGAR Loss          | -624      |
| Real Det Return     | 3.04e+03  |
| Real Sto Return     | 3.03e+03  |
| Reward Loss         | -2.68e+05 |
| Running Env Steps   | 630000    |
| Running Forward KL  | 6.57      |
| Running Reverse KL  | 7.6       |
| Running Update Time | 126       |
-----------------------------------
--2024-08-11 13:42:51.505338 UTC---
| Itration            | 127       |
| PAGAR Loss          | 0.821     |
| Real Det Return     | 2.96e+03  |
| Real Sto Return     | 2.67e+03  |
| Reward Loss         | -4.48e+05 |
| Running Env Steps   | 635000    |
| Running Forward KL  | 7.31      |
| Running Reverse KL  | 7.88      |
| Running Update Time | 127       |
-----------------------------------
--2024-08-11 13:45:46.334463 UTC--
| Itration            | 128      |
| PAGAR Loss          | -315     |
| Real Det Return     | 2.99e+03 |
| Real Sto Return     | 2.84e+03 |
| Reward Loss         | -3.6e+05 |
| Running Env Steps   | 640000   |
| Running Forward KL  | 7.53     |
| Running Reverse KL  | 8.07     |
| Running Update Time | 128      |
----------------------------------
--2024-08-11 13:48:46.125197 UTC---
| Itration            | 129       |
| PAGAR Loss          | 1.11e+03  |
| Real Det Return     | 2.98e+03  |
| Real Sto Return     | 2.93e+03  |
| Reward Loss         | -4.59e+05 |
| Running Env Steps   | 645000    |
| Running Forward KL  | 8.41      |
| Running Reverse KL  | 9.15      |
| Running Update Time | 129       |
-----------------------------------
--2024-08-11 13:51:43.513398 UTC---
| Itration            | 130       |
| PAGAR Loss          | -192      |
| Real Det Return     | 3.08e+03  |
| Real Sto Return     | 3.06e+03  |
| Reward Loss         | -3.11e+05 |
| Running Env Steps   | 650000    |
| Running Forward KL  | 6.51      |
| Running Reverse KL  | 12.5      |
| Running Update Time | 130       |
-----------------------------------
--2024-08-11 13:54:42.536653 UTC---
| Itration            | 131       |
| PAGAR Loss          | -1.93e+03 |
| Real Det Return     | 2.99e+03  |
| Real Sto Return     | 2.98e+03  |
| Reward Loss         | -2.99e+05 |
| Running Env Steps   | 655000    |
| Running Forward KL  | 7.49      |
| Running Reverse KL  | 11.9      |
| Running Update Time | 131       |
-----------------------------------
--2024-08-11 13:57:40.640858 UTC---
| Itration            | 132       |
| PAGAR Loss          | -908      |
| Real Det Return     | 3.03e+03  |
| Real Sto Return     | 3.01e+03  |
| Reward Loss         | -3.86e+05 |
| Running Env Steps   | 660000    |
| Running Forward KL  | 7.43      |
| Running Reverse KL  | 8.04      |
| Running Update Time | 132       |
-----------------------------------
--2024-08-11 14:00:38.339307 UTC---
| Itration            | 133       |
| PAGAR Loss          | 698       |
| Real Det Return     | 3.04e+03  |
| Real Sto Return     | 3.06e+03  |
| Reward Loss         | -2.99e+05 |
| Running Env Steps   | 665000    |
| Running Forward KL  | 7.04      |
| Running Reverse KL  | 7.74      |
| Running Update Time | 133       |
-----------------------------------
--2024-08-11 14:03:35.483184 UTC---
| Itration            | 134       |
| PAGAR Loss          | -792      |
| Real Det Return     | 3.1e+03   |
| Real Sto Return     | 3.06e+03  |
| Reward Loss         | -3.77e+05 |
| Running Env Steps   | 670000    |
| Running Forward KL  | 5.83      |
| Running Reverse KL  | 6.63      |
| Running Update Time | 134       |
-----------------------------------
--2024-08-11 14:06:26.072368 UTC---
| Itration            | 135       |
| PAGAR Loss          | -1.67e+04 |
| Real Det Return     | 3.08e+03  |
| Real Sto Return     | 2.34e+03  |
| Reward Loss         | -2.66e+05 |
| Running Env Steps   | 675000    |
| Running Forward KL  | 7.27      |
| Running Reverse KL  | 109       |
| Running Update Time | 135       |
-----------------------------------
--2024-08-11 14:09:15.146109 UTC---
| Itration            | 136       |
| PAGAR Loss          | -2.23e+04 |
| Real Det Return     | 3.04e+03  |
| Real Sto Return     | 1.67e+03  |
| Reward Loss         | 3.24e+04  |
| Running Env Steps   | 680000    |
| Running Forward KL  | 6.1       |
| Running Reverse KL  | 91.6      |
| Running Update Time | 136       |
-----------------------------------
--2024-08-11 14:12:14.194505 UTC--
| Itration            | 137      |
| PAGAR Loss          | 433      |
| Real Det Return     | 3.11e+03 |
| Real Sto Return     | 3.05e+03 |
| Reward Loss         | -3.3e+05 |
| Running Env Steps   | 685000   |
| Running Forward KL  | 5.49     |
| Running Reverse KL  | 6.56     |
| Running Update Time | 137      |
----------------------------------
--2024-08-11 14:15:11.468043 UTC---
| Itration            | 138       |
| PAGAR Loss          | 90.4      |
| Real Det Return     | 3.01e+03  |
| Real Sto Return     | 3e+03     |
| Reward Loss         | -4.28e+05 |
| Running Env Steps   | 690000    |
| Running Forward KL  | 5.58      |
| Running Reverse KL  | 6.15      |
| Running Update Time | 138       |
-----------------------------------
--2024-08-11 14:18:10.134741 UTC---
| Itration            | 139       |
| PAGAR Loss          | -889      |
| Real Det Return     | 3.07e+03  |
| Real Sto Return     | 3.03e+03  |
| Reward Loss         | -3.49e+05 |
| Running Env Steps   | 695000    |
| Running Forward KL  | 7.15      |
| Running Reverse KL  | 7.36      |
| Running Update Time | 139       |
-----------------------------------
--2024-08-11 14:21:05.796229 UTC---
| Itration            | 140       |
| PAGAR Loss          | -1.09e+03 |
| Real Det Return     | 3.07e+03  |
| Real Sto Return     | 2.98e+03  |
| Reward Loss         | -3.44e+05 |
| Running Env Steps   | 700000    |
| Running Forward KL  | 7.23      |
| Running Reverse KL  | 7.76      |
| Running Update Time | 140       |
-----------------------------------
--2024-08-11 14:24:04.274680 UTC---
| Itration            | 141       |
| PAGAR Loss          | 1.6e+03   |
| Real Det Return     | 3.06e+03  |
| Real Sto Return     | 2.94e+03  |
| Reward Loss         | -4.28e+05 |
| Running Env Steps   | 705000    |
| Running Forward KL  | 6.2       |
| Running Reverse KL  | 7.05      |
| Running Update Time | 141       |
-----------------------------------
--2024-08-11 14:26:56.866992 UTC---
| Itration            | 142       |
| PAGAR Loss          | -8.13e+04 |
| Real Det Return     | 3.06e+03  |
| Real Sto Return     | 2.61e+03  |
| Reward Loss         | -1.76e+05 |
| Running Env Steps   | 710000    |
| Running Forward KL  | 6.65      |
| Running Reverse KL  | 35.6      |
| Running Update Time | 142       |
-----------------------------------
--2024-08-11 14:29:56.422980 UTC---
| Itration            | 143       |
| PAGAR Loss          | 674       |
| Real Det Return     | 3.08e+03  |
| Real Sto Return     | 3.05e+03  |
| Reward Loss         | -4.19e+05 |
| Running Env Steps   | 715000    |
| Running Forward KL  | 6.22      |
| Running Reverse KL  | 6.79      |
| Running Update Time | 143       |
-----------------------------------
--2024-08-11 14:32:53.525815 UTC---
| Itration            | 144       |
| PAGAR Loss          | 2.1e+03   |
| Real Det Return     | 3.09e+03  |
| Real Sto Return     | 3.05e+03  |
| Reward Loss         | -4.13e+05 |
| Running Env Steps   | 720000    |
| Running Forward KL  | 6.36      |
| Running Reverse KL  | 6.89      |
| Running Update Time | 144       |
-----------------------------------
--2024-08-11 14:35:49.132094 UTC---
| Itration            | 145       |
| PAGAR Loss          | 1.09e+03  |
| Real Det Return     | 3.01e+03  |
| Real Sto Return     | 2.87e+03  |
| Reward Loss         | -3.62e+05 |
| Running Env Steps   | 725000    |
| Running Forward KL  | 6.52      |
| Running Reverse KL  | 20.3      |
| Running Update Time | 145       |
-----------------------------------
--2024-08-11 14:38:44.864904 UTC---
| Itration            | 146       |
| PAGAR Loss          | -435      |
| Real Det Return     | 3.07e+03  |
| Real Sto Return     | 2.86e+03  |
| Reward Loss         | -3.99e+05 |
| Running Env Steps   | 730000    |
| Running Forward KL  | 6.13      |
| Running Reverse KL  | 9.24      |
| Running Update Time | 146       |
-----------------------------------
--2024-08-11 14:41:42.800280 UTC---
| Itration            | 147       |
| PAGAR Loss          | -281      |
| Real Det Return     | 3.09e+03  |
| Real Sto Return     | 3.05e+03  |
| Reward Loss         | -4.56e+05 |
| Running Env Steps   | 735000    |
| Running Forward KL  | 6.07      |
| Running Reverse KL  | 6.47      |
| Running Update Time | 147       |
-----------------------------------
--2024-08-11 14:44:39.446122 UTC---
| Itration            | 148       |
| PAGAR Loss          | 7.2e+03   |
| Real Det Return     | 3.13e+03  |
| Real Sto Return     | 3.12e+03  |
| Reward Loss         | -3.49e+05 |
| Running Env Steps   | 740000    |
| Running Forward KL  | 4.51      |
| Running Reverse KL  | 5.2       |
| Running Update Time | 148       |
-----------------------------------
--2024-08-11 14:47:37.245142 UTC---
| Itration            | 149       |
| PAGAR Loss          | -9.21e+04 |
| Real Det Return     | 3.11e+03  |
| Real Sto Return     | 3.08e+03  |
| Reward Loss         | -4.55e+05 |
| Running Env Steps   | 745000    |
| Running Forward KL  | 6.3       |
| Running Reverse KL  | 19.3      |
| Running Update Time | 149       |
-----------------------------------
--2024-08-11 14:50:35.210650 UTC---
| Itration            | 150       |
| PAGAR Loss          | -1.92e+04 |
| Real Det Return     | 2.83e+03  |
| Real Sto Return     | 2.99e+03  |
| Reward Loss         | -5.51e+05 |
| Running Env Steps   | 750000    |
| Running Forward KL  | 7.1       |
| Running Reverse KL  | 27.4      |
| Running Update Time | 150       |
-----------------------------------
--2024-08-11 14:53:31.791056 UTC---
| Itration            | 151       |
| PAGAR Loss          | -1.12e+03 |
| Real Det Return     | 3.16e+03  |
| Real Sto Return     | 3.03e+03  |
| Reward Loss         | -2.9e+05  |
| Running Env Steps   | 755000    |
| Running Forward KL  | 5.9       |
| Running Reverse KL  | 18.8      |
| Running Update Time | 151       |
-----------------------------------
--2024-08-11 14:56:32.223181 UTC---
| Itration            | 152       |
| PAGAR Loss          | -472      |
| Real Det Return     | 3.06e+03  |
| Real Sto Return     | 3.01e+03  |
| Reward Loss         | -4.52e+05 |
| Running Env Steps   | 760000    |
| Running Forward KL  | 6.09      |
| Running Reverse KL  | 6.89      |
| Running Update Time | 152       |
-----------------------------------
--2024-08-11 14:59:28.872542 UTC---
| Itration            | 153       |
| PAGAR Loss          | -1.09e+03 |
| Real Det Return     | 3.14e+03  |
| Real Sto Return     | 3.06e+03  |
| Reward Loss         | -3.79e+05 |
| Running Env Steps   | 765000    |
| Running Forward KL  | 5.64      |
| Running Reverse KL  | 6.22      |
| Running Update Time | 153       |
-----------------------------------
--2024-08-11 15:02:27.777182 UTC---
| Itration            | 154       |
| PAGAR Loss          | 508       |
| Real Det Return     | 3.09e+03  |
| Real Sto Return     | 3.07e+03  |
| Reward Loss         | -4.72e+05 |
| Running Env Steps   | 770000    |
| Running Forward KL  | 6.27      |
| Running Reverse KL  | 6.38      |
| Running Update Time | 154       |
-----------------------------------
--2024-08-11 15:05:24.579923 UTC---
| Itration            | 155       |
| PAGAR Loss          | 1.26e+03  |
| Real Det Return     | 3.09e+03  |
| Real Sto Return     | 3.01e+03  |
| Reward Loss         | -3.55e+05 |
| Running Env Steps   | 775000    |
| Running Forward KL  | 5.96      |
| Running Reverse KL  | 6.33      |
| Running Update Time | 155       |
-----------------------------------
--2024-08-11 15:08:21.820435 UTC---
| Itration            | 156       |
| PAGAR Loss          | -793      |
| Real Det Return     | 3.06e+03  |
| Real Sto Return     | 2.86e+03  |
| Reward Loss         | -4.22e+05 |
| Running Env Steps   | 780000    |
| Running Forward KL  | 6.42      |
| Running Reverse KL  | 9.56      |
| Running Update Time | 156       |
-----------------------------------
--2024-08-11 15:11:16.898307 UTC---
| Itration            | 157       |
| PAGAR Loss          | 546       |
| Real Det Return     | 3.01e+03  |
| Real Sto Return     | 2.88e+03  |
| Reward Loss         | -4.98e+05 |
| Running Env Steps   | 785000    |
| Running Forward KL  | 6.3       |
| Running Reverse KL  | 6.55      |
| Running Update Time | 157       |
-----------------------------------
--2024-08-11 15:14:15.611011 UTC---
| Itration            | 158       |
| PAGAR Loss          | -1.24e+03 |
| Real Det Return     | 3.07e+03  |
| Real Sto Return     | 3.07e+03  |
| Reward Loss         | -3.59e+05 |
| Running Env Steps   | 790000    |
| Running Forward KL  | 6.13      |
| Running Reverse KL  | 6.3       |
| Running Update Time | 158       |
-----------------------------------
--2024-08-11 15:17:09.712602 UTC---
| Itration            | 159       |
| PAGAR Loss          | -642      |
| Real Det Return     | 3.06e+03  |
| Real Sto Return     | 2.79e+03  |
| Reward Loss         | -4.46e+05 |
| Running Env Steps   | 795000    |
| Running Forward KL  | 5.97      |
| Running Reverse KL  | 17        |
| Running Update Time | 159       |
-----------------------------------
--2024-08-11 15:20:08.930070 UTC---
| Itration            | 160       |
| PAGAR Loss          | 1.86e+03  |
| Real Det Return     | 3.05e+03  |
| Real Sto Return     | 3.06e+03  |
| Reward Loss         | -5.05e+05 |
| Running Env Steps   | 800000    |
| Running Forward KL  | 6.64      |
| Running Reverse KL  | 6.9       |
| Running Update Time | 160       |
-----------------------------------
--2024-08-11 15:23:03.831472 UTC---
| Itration            | 161       |
| PAGAR Loss          | -2.31e+03 |
| Real Det Return     | 3.17e+03  |
| Real Sto Return     | 2.88e+03  |
| Reward Loss         | -3.75e+05 |
| Running Env Steps   | 805000    |
| Running Forward KL  | 5.52      |
| Running Reverse KL  | 51.3      |
| Running Update Time | 161       |
-----------------------------------
--2024-08-11 15:26:00.654010 UTC---
| Itration            | 162       |
| PAGAR Loss          | 791       |
| Real Det Return     | 3.12e+03  |
| Real Sto Return     | 3.12e+03  |
| Reward Loss         | -4.02e+05 |
| Running Env Steps   | 810000    |
| Running Forward KL  | 5.83      |
| Running Reverse KL  | 6.27      |
| Running Update Time | 162       |
-----------------------------------
--2024-08-11 15:28:55.488894 UTC---
| Itration            | 163       |
| PAGAR Loss          | -19.9     |
| Real Det Return     | 3.13e+03  |
| Real Sto Return     | 3.14e+03  |
| Reward Loss         | -3.64e+05 |
| Running Env Steps   | 815000    |
| Running Forward KL  | 5.31      |
| Running Reverse KL  | 6.09      |
| Running Update Time | 163       |
-----------------------------------
--2024-08-11 15:31:52.659133 UTC---
| Itration            | 164       |
| PAGAR Loss          | 133       |
| Real Det Return     | 3.13e+03  |
| Real Sto Return     | 3.03e+03  |
| Reward Loss         | -3.62e+05 |
| Running Env Steps   | 820000    |
| Running Forward KL  | 5.89      |
| Running Reverse KL  | 6.35      |
| Running Update Time | 164       |
-----------------------------------
--2024-08-11 15:34:50.912073 UTC---
| Itration            | 165       |
| PAGAR Loss          | 460       |
| Real Det Return     | 3.01e+03  |
| Real Sto Return     | 2.97e+03  |
| Reward Loss         | -4.98e+05 |
| Running Env Steps   | 825000    |
| Running Forward KL  | 5.88      |
| Running Reverse KL  | 6.38      |
| Running Update Time | 165       |
-----------------------------------
--2024-08-11 15:37:48.617918 UTC---
| Itration            | 166       |
| PAGAR Loss          | -145      |
| Real Det Return     | 3.11e+03  |
| Real Sto Return     | 3.1e+03   |
| Reward Loss         | -4.19e+05 |
| Running Env Steps   | 830000    |
| Running Forward KL  | 5.94      |
| Running Reverse KL  | 6.19      |
| Running Update Time | 166       |
-----------------------------------
--2024-08-11 15:40:46.545225 UTC---
| Itration            | 167       |
| PAGAR Loss          | 199       |
| Real Det Return     | 3.15e+03  |
| Real Sto Return     | 3.15e+03  |
| Reward Loss         | -3.35e+05 |
| Running Env Steps   | 835000    |
| Running Forward KL  | 5.11      |
| Running Reverse KL  | 5.42      |
| Running Update Time | 167       |
-----------------------------------
--2024-08-11 15:43:44.025486 UTC---
| Itration            | 168       |
| PAGAR Loss          | -516      |
| Real Det Return     | 3.11e+03  |
| Real Sto Return     | 3.14e+03  |
| Reward Loss         | -5.15e+05 |
| Running Env Steps   | 840000    |
| Running Forward KL  | 5.73      |
| Running Reverse KL  | 45.5      |
| Running Update Time | 168       |
-----------------------------------
--2024-08-11 15:46:43.081912 UTC--
| Itration            | 169      |
| PAGAR Loss          | -499     |
| Real Det Return     | 3.19e+03 |
| Real Sto Return     | 3.17e+03 |
| Reward Loss         | -3.5e+05 |
| Running Env Steps   | 845000   |
| Running Forward KL  | 5.77     |
| Running Reverse KL  | 6.31     |
| Running Update Time | 169      |
----------------------------------
--2024-08-11 15:49:40.621322 UTC---
| Itration            | 170       |
| PAGAR Loss          | -621      |
| Real Det Return     | 3.19e+03  |
| Real Sto Return     | 3.16e+03  |
| Reward Loss         | -3.89e+05 |
| Running Env Steps   | 850000    |
| Running Forward KL  | 5.8       |
| Running Reverse KL  | 6.27      |
| Running Update Time | 170       |
-----------------------------------
--2024-08-11 15:52:40.020713 UTC---
| Itration            | 171       |
| PAGAR Loss          | -133      |
| Real Det Return     | 3.18e+03  |
| Real Sto Return     | 3.14e+03  |
| Reward Loss         | -2.69e+05 |
| Running Env Steps   | 855000    |
| Running Forward KL  | 4.5       |
| Running Reverse KL  | 5.47      |
| Running Update Time | 171       |
-----------------------------------
--2024-08-11 15:55:37.780945 UTC---
| Itration            | 172       |
| PAGAR Loss          | -157      |
| Real Det Return     | 3.16e+03  |
| Real Sto Return     | 3.12e+03  |
| Reward Loss         | -3.83e+05 |
| Running Env Steps   | 860000    |
| Running Forward KL  | 5.64      |
| Running Reverse KL  | 6.13      |
| Running Update Time | 172       |
-----------------------------------
--2024-08-11 15:58:36.185078 UTC---
| Itration            | 173       |
| PAGAR Loss          | 1.8e+03   |
| Real Det Return     | 3.2e+03   |
| Real Sto Return     | 3.03e+03  |
| Reward Loss         | -4.22e+05 |
| Running Env Steps   | 865000    |
| Running Forward KL  | 6.08      |
| Running Reverse KL  | 6.87      |
| Running Update Time | 173       |
-----------------------------------
--2024-08-11 16:01:33.509098 UTC---
| Itration            | 174       |
| PAGAR Loss          | -175      |
| Real Det Return     | 3.2e+03   |
| Real Sto Return     | 3.15e+03  |
| Reward Loss         | -3.56e+05 |
| Running Env Steps   | 870000    |
| Running Forward KL  | 5.33      |
| Running Reverse KL  | 5.78      |
| Running Update Time | 174       |
-----------------------------------
--2024-08-11 16:04:33.536172 UTC---
| Itration            | 175       |
| PAGAR Loss          | -88.7     |
| Real Det Return     | 3.17e+03  |
| Real Sto Return     | 3.19e+03  |
| Reward Loss         | -3.25e+05 |
| Running Env Steps   | 875000    |
| Running Forward KL  | 4.83      |
| Running Reverse KL  | 5.51      |
| Running Update Time | 175       |
-----------------------------------
--2024-08-11 16:07:31.607557 UTC---
| Itration            | 176       |
| PAGAR Loss          | 258       |
| Real Det Return     | 3.15e+03  |
| Real Sto Return     | 3.15e+03  |
| Reward Loss         | -4.18e+05 |
| Running Env Steps   | 880000    |
| Running Forward KL  | 5.67      |
| Running Reverse KL  | 6.2       |
| Running Update Time | 176       |
-----------------------------------
--2024-08-11 16:10:29.662894 UTC---
| Itration            | 177       |
| PAGAR Loss          | 771       |
| Real Det Return     | 3.11e+03  |
| Real Sto Return     | 2.92e+03  |
| Reward Loss         | -4.87e+05 |
| Running Env Steps   | 885000    |
| Running Forward KL  | 6.22      |
| Running Reverse KL  | 6.37      |
| Running Update Time | 177       |
-----------------------------------
--2024-08-11 16:13:25.662505 UTC---
| Itration            | 178       |
| PAGAR Loss          | -1.01e+03 |
| Real Det Return     | 3.21e+03  |
| Real Sto Return     | 3.08e+03  |
| Reward Loss         | -3.64e+05 |
| Running Env Steps   | 890000    |
| Running Forward KL  | 5.18      |
| Running Reverse KL  | 13.9      |
| Running Update Time | 178       |
-----------------------------------
--2024-08-11 16:16:25.055514 UTC--
| Itration            | 179      |
| PAGAR Loss          | -949     |
| Real Det Return     | 3.15e+03 |
| Real Sto Return     | 3.15e+03 |
| Reward Loss         | -4e+05   |
| Running Env Steps   | 895000   |
| Running Forward KL  | 5.48     |
| Running Reverse KL  | 5.93     |
| Running Update Time | 179      |
----------------------------------
--2024-08-11 16:19:22.131590 UTC---
| Itration            | 180       |
| PAGAR Loss          | -8.47e+03 |
| Real Det Return     | 3.3e+03   |
| Real Sto Return     | 3.16e+03  |
| Reward Loss         | -2.08e+05 |
| Running Env Steps   | 900000    |
| Running Forward KL  | 4.84      |
| Running Reverse KL  | 5.7       |
| Running Update Time | 180       |
-----------------------------------
--2024-08-11 16:22:22.049658 UTC---
| Itration            | 181       |
| PAGAR Loss          | -798      |
| Real Det Return     | 3.26e+03  |
| Real Sto Return     | 3.22e+03  |
| Reward Loss         | -3.36e+05 |
| Running Env Steps   | 905000    |
| Running Forward KL  | 4.91      |
| Running Reverse KL  | 5.88      |
| Running Update Time | 181       |
-----------------------------------
--2024-08-11 16:25:19.668893 UTC---
| Itration            | 182       |
| PAGAR Loss          | -395      |
| Real Det Return     | 3.15e+03  |
| Real Sto Return     | 3.13e+03  |
| Reward Loss         | -4.53e+05 |
| Running Env Steps   | 910000    |
| Running Forward KL  | 5.94      |
| Running Reverse KL  | 6.11      |
| Running Update Time | 182       |
-----------------------------------
--2024-08-11 16:28:17.134336 UTC---
| Itration            | 183       |
| PAGAR Loss          | -818      |
| Real Det Return     | 3.23e+03  |
| Real Sto Return     | 3.19e+03  |
| Reward Loss         | -3.79e+05 |
| Running Env Steps   | 915000    |
| Running Forward KL  | 5.2       |
| Running Reverse KL  | 5.77      |
| Running Update Time | 183       |
-----------------------------------
--2024-08-11 16:31:16.802068 UTC---
| Itration            | 184       |
| PAGAR Loss          | -1.87e+03 |
| Real Det Return     | 3.19e+03  |
| Real Sto Return     | 3.17e+03  |
| Reward Loss         | -3.99e+05 |
| Running Env Steps   | 920000    |
| Running Forward KL  | 5.19      |
| Running Reverse KL  | 5.84      |
| Running Update Time | 184       |
-----------------------------------
--2024-08-11 16:34:14.424281 UTC---
| Itration            | 185       |
| PAGAR Loss          | 1.35e+03  |
| Real Det Return     | 3.15e+03  |
| Real Sto Return     | 3.15e+03  |
| Reward Loss         | -3.92e+05 |
| Running Env Steps   | 925000    |
| Running Forward KL  | 4.91      |
| Running Reverse KL  | 5.73      |
| Running Update Time | 185       |
-----------------------------------
--2024-08-11 16:37:10.902866 UTC---
| Itration            | 186       |
| PAGAR Loss          | -777      |
| Real Det Return     | 3.22e+03  |
| Real Sto Return     | 2.92e+03  |
| Reward Loss         | -3.97e+05 |
| Running Env Steps   | 930000    |
| Running Forward KL  | 5.22      |
| Running Reverse KL  | 7.62      |
| Running Update Time | 186       |
-----------------------------------
--2024-08-11 16:40:07.745818 UTC---
| Itration            | 187       |
| PAGAR Loss          | -499      |
| Real Det Return     | 3.18e+03  |
| Real Sto Return     | 3.13e+03  |
| Reward Loss         | -4.68e+05 |
| Running Env Steps   | 935000    |
| Running Forward KL  | 5.14      |
| Running Reverse KL  | 5.63      |
| Running Update Time | 187       |
-----------------------------------
--2024-08-11 16:43:04.635459 UTC--
| Itration            | 188      |
| PAGAR Loss          | -471     |
| Real Det Return     | 3.23e+03 |
| Real Sto Return     | 3.07e+03 |
| Reward Loss         | -4.9e+05 |
| Running Env Steps   | 940000   |
| Running Forward KL  | 4.78     |
| Running Reverse KL  | 28.4     |
| Running Update Time | 188      |
----------------------------------
--2024-08-11 16:46:01.428437 UTC---
| Itration            | 189       |
| PAGAR Loss          | 155       |
| Real Det Return     | 3.24e+03  |
| Real Sto Return     | 3.17e+03  |
| Reward Loss         | -3.64e+05 |
| Running Env Steps   | 945000    |
| Running Forward KL  | 5.23      |
| Running Reverse KL  | 5.78      |
| Running Update Time | 189       |
-----------------------------------
--2024-08-11 16:49:03.405456 UTC---
| Itration            | 190       |
| PAGAR Loss          | 377       |
| Real Det Return     | 3.22e+03  |
| Real Sto Return     | 3.2e+03   |
| Reward Loss         | -3.92e+05 |
| Running Env Steps   | 950000    |
| Running Forward KL  | 4.84      |
| Running Reverse KL  | 5.27      |
| Running Update Time | 190       |
-----------------------------------
--2024-08-11 16:52:02.106852 UTC---
| Itration            | 191       |
| PAGAR Loss          | -380      |
| Real Det Return     | 3.2e+03   |
| Real Sto Return     | 3.14e+03  |
| Reward Loss         | -4.97e+05 |
| Running Env Steps   | 955000    |
| Running Forward KL  | 5.41      |
| Running Reverse KL  | 5.69      |
| Running Update Time | 191       |
-----------------------------------
--2024-08-11 16:54:59.901274 UTC---
| Itration            | 192       |
| PAGAR Loss          | -276      |
| Real Det Return     | 3.16e+03  |
| Real Sto Return     | 3.16e+03  |
| Reward Loss         | -4.06e+05 |
| Running Env Steps   | 960000    |
| Running Forward KL  | 5.34      |
| Running Reverse KL  | 16.1      |
| Running Update Time | 192       |
-----------------------------------
--2024-08-11 16:57:58.638277 UTC---
| Itration            | 193       |
| PAGAR Loss          | 1.21e+03  |
| Real Det Return     | 3.24e+03  |
| Real Sto Return     | 3.21e+03  |
| Reward Loss         | -3.24e+05 |
| Running Env Steps   | 965000    |
| Running Forward KL  | 3.75      |
| Running Reverse KL  | 4.96      |
| Running Update Time | 193       |
-----------------------------------
--2024-08-11 17:00:55.787432 UTC---
| Itration            | 194       |
| PAGAR Loss          | -629      |
| Real Det Return     | 3.21e+03  |
| Real Sto Return     | 3.22e+03  |
| Reward Loss         | -3.37e+05 |
| Running Env Steps   | 970000    |
| Running Forward KL  | 4.39      |
| Running Reverse KL  | 5.18      |
| Running Update Time | 194       |
-----------------------------------
--2024-08-11 17:03:55.612169 UTC---
| Itration            | 195       |
| PAGAR Loss          | -367      |
| Real Det Return     | 3.28e+03  |
| Real Sto Return     | 3.26e+03  |
| Reward Loss         | -2.49e+05 |
| Running Env Steps   | 975000    |
| Running Forward KL  | 5.31      |
| Running Reverse KL  | 14.8      |
| Running Update Time | 195       |
-----------------------------------
--2024-08-11 17:06:53.967674 UTC---
| Itration            | 196       |
| PAGAR Loss          | 208       |
| Real Det Return     | 3.26e+03  |
| Real Sto Return     | 3.03e+03  |
| Reward Loss         | -3.94e+05 |
| Running Env Steps   | 980000    |
| Running Forward KL  | 4.78      |
| Running Reverse KL  | 5.55      |
| Running Update Time | 196       |
-----------------------------------
--2024-08-11 17:09:50.779480 UTC---
| Itration            | 197       |
| PAGAR Loss          | -2.91e+03 |
| Real Det Return     | 3.28e+03  |
| Real Sto Return     | 3.13e+03  |
| Reward Loss         | -3.59e+05 |
| Running Env Steps   | 985000    |
| Running Forward KL  | 5.01      |
| Running Reverse KL  | 5.52      |
| Running Update Time | 197       |
-----------------------------------
--2024-08-11 17:12:49.719076 UTC---
| Itration            | 198       |
| PAGAR Loss          | -513      |
| Real Det Return     | 3.24e+03  |
| Real Sto Return     | 3.19e+03  |
| Reward Loss         | -4.04e+05 |
| Running Env Steps   | 990000    |
| Running Forward KL  | 4.76      |
| Running Reverse KL  | 5.49      |
| Running Update Time | 198       |
-----------------------------------
--2024-08-11 17:15:46.702222 UTC---
| Itration            | 199       |
| PAGAR Loss          | 313       |
| Real Det Return     | 3.25e+03  |
| Real Sto Return     | 3.22e+03  |
| Reward Loss         | -3.52e+05 |
| Running Env Steps   | 995000    |
| Running Forward KL  | 4.87      |
| Running Reverse KL  | 14.5      |
| Running Update Time | 199       |
-----------------------------------
