Logging to logs/AntFH-v0/exp-16/fkl/2024_08_10_20_58_33
--2024-08-10 20:59:41.151389 UTC--
| Itration            | 0        |
| Real Det Return     | 887      |
| Real Sto Return     | -194     |
| Reward Loss         | 3.56e+04 |
| Running Env Steps   | 0        |
| Running Forward KL  | 153      |
| Running Reverse KL  | 2.58e+03 |
| Running Update Time | 0        |
----------------------------------
--2024-08-10 21:00:52.790662 UTC--
| Itration            | 1        |
| Real Det Return     | 710      |
| Real Sto Return     | -129     |
| Reward Loss         | 1.88e+05 |
| Running Env Steps   | 5000     |
| Running Forward KL  | 150      |
| Running Reverse KL  | 2.56e+03 |
| Running Update Time | 1        |
----------------------------------
--2024-08-10 21:02:05.473359 UTC--
| Itration            | 2        |
| Real Det Return     | 801      |
| Real Sto Return     | -119     |
| Reward Loss         | 1.84e+05 |
| Running Env Steps   | 10000    |
| Running Forward KL  | 146      |
| Running Reverse KL  | 1.57e+03 |
| Running Update Time | 2        |
----------------------------------
--2024-08-10 21:03:18.690001 UTC---
| Itration            | 3         |
| Real Det Return     | 738       |
| Real Sto Return     | -133      |
| Reward Loss         | -1.14e+05 |
| Running Env Steps   | 15000     |
| Running Forward KL  | 145       |
| Running Reverse KL  | 1.71e+03  |
| Running Update Time | 3         |
-----------------------------------
--2024-08-10 21:04:30.816596 UTC---
| Itration            | 4         |
| Real Det Return     | 704       |
| Real Sto Return     | -91.8     |
| Reward Loss         | -2.41e+05 |
| Running Env Steps   | 20000     |
| Running Forward KL  | 145       |
| Running Reverse KL  | 1.96e+03  |
| Running Update Time | 4         |
-----------------------------------
--2024-08-10 21:05:45.313776 UTC---
| Itration            | 5         |
| Real Det Return     | 619       |
| Real Sto Return     | -158      |
| Reward Loss         | -1.94e+05 |
| Running Env Steps   | 25000     |
| Running Forward KL  | 143       |
| Running Reverse KL  | 1.79e+03  |
| Running Update Time | 5         |
-----------------------------------
--2024-08-10 21:06:59.447604 UTC---
| Itration            | 6         |
| Real Det Return     | 817       |
| Real Sto Return     | -108      |
| Reward Loss         | -2.35e+05 |
| Running Env Steps   | 30000     |
| Running Forward KL  | 145       |
| Running Reverse KL  | 1.03e+03  |
| Running Update Time | 6         |
-----------------------------------
--2024-08-10 21:08:15.028068 UTC---
| Itration            | 7         |
| Real Det Return     | 675       |
| Real Sto Return     | -212      |
| Reward Loss         | -2.82e+05 |
| Running Env Steps   | 35000     |
| Running Forward KL  | 144       |
| Running Reverse KL  | 1.76e+03  |
| Running Update Time | 7         |
-----------------------------------
--2024-08-10 21:09:31.053677 UTC---
| Itration            | 8         |
| Real Det Return     | 707       |
| Real Sto Return     | -173      |
| Reward Loss         | -6.58e+05 |
| Running Env Steps   | 40000     |
| Running Forward KL  | 146       |
| Running Reverse KL  | 1.04e+03  |
| Running Update Time | 8         |
-----------------------------------
--2024-08-10 21:10:46.739506 UTC---
| Itration            | 9         |
| Real Det Return     | 633       |
| Real Sto Return     | -156      |
| Reward Loss         | -5.07e+05 |
| Running Env Steps   | 45000     |
| Running Forward KL  | 148       |
| Running Reverse KL  | 1.76e+03  |
| Running Update Time | 9         |
-----------------------------------
--2024-08-10 21:12:00.184730 UTC---
| Itration            | 10        |
| Real Det Return     | 646       |
| Real Sto Return     | -149      |
| Reward Loss         | -6.55e+05 |
| Running Env Steps   | 50000     |
| Running Forward KL  | 147       |
| Running Reverse KL  | 2.13e+03  |
| Running Update Time | 10        |
-----------------------------------
--2024-08-10 21:13:16.449008 UTC---
| Itration            | 11        |
| Real Det Return     | 587       |
| Real Sto Return     | -162      |
| Reward Loss         | -6.96e+05 |
| Running Env Steps   | 55000     |
| Running Forward KL  | 145       |
| Running Reverse KL  | 1.15e+03  |
| Running Update Time | 11        |
-----------------------------------
--2024-08-10 21:14:32.468176 UTC---
| Itration            | 12        |
| Real Det Return     | 764       |
| Real Sto Return     | -172      |
| Reward Loss         | -6.07e+05 |
| Running Env Steps   | 60000     |
| Running Forward KL  | 146       |
| Running Reverse KL  | 1.62e+03  |
| Running Update Time | 12        |
-----------------------------------
--2024-08-10 21:15:49.106751 UTC--
| Itration            | 13       |
| Real Det Return     | 650      |
| Real Sto Return     | -173     |
| Reward Loss         | -7.6e+05 |
| Running Env Steps   | 65000    |
| Running Forward KL  | 146      |
| Running Reverse KL  | 1.68e+03 |
| Running Update Time | 13       |
----------------------------------
--2024-08-10 21:17:11.574526 UTC---
| Itration            | 14        |
| Real Det Return     | 720       |
| Real Sto Return     | -214      |
| Reward Loss         | -6.86e+05 |
| Running Env Steps   | 70000     |
| Running Forward KL  | 143       |
| Running Reverse KL  | 1.26e+03  |
| Running Update Time | 14        |
-----------------------------------
--2024-08-10 21:18:37.199442 UTC---
| Itration            | 15        |
| Real Det Return     | 782       |
| Real Sto Return     | -185      |
| Reward Loss         | -9.24e+05 |
| Running Env Steps   | 75000     |
| Running Forward KL  | 140       |
| Running Reverse KL  | 990       |
| Running Update Time | 15        |
-----------------------------------
--2024-08-10 21:20:03.095014 UTC---
| Itration            | 16        |
| Real Det Return     | 714       |
| Real Sto Return     | -153      |
| Reward Loss         | -1.01e+06 |
| Running Env Steps   | 80000     |
| Running Forward KL  | 139       |
| Running Reverse KL  | 942       |
| Running Update Time | 16        |
-----------------------------------
--2024-08-10 21:21:28.492105 UTC---
| Itration            | 17        |
| Real Det Return     | 730       |
| Real Sto Return     | -157      |
| Reward Loss         | -9.22e+05 |
| Running Env Steps   | 85000     |
| Running Forward KL  | 145       |
| Running Reverse KL  | 1.4e+03   |
| Running Update Time | 17        |
-----------------------------------
--2024-08-10 21:22:54.553259 UTC---
| Itration            | 18        |
| Real Det Return     | 595       |
| Real Sto Return     | -165      |
| Reward Loss         | -1.01e+06 |
| Running Env Steps   | 90000     |
| Running Forward KL  | 139       |
| Running Reverse KL  | 1.09e+03  |
| Running Update Time | 18        |
-----------------------------------
--2024-08-10 21:24:21.747733 UTC---
| Itration            | 19        |
| Real Det Return     | 716       |
| Real Sto Return     | -162      |
| Reward Loss         | -1.15e+06 |
| Running Env Steps   | 95000     |
| Running Forward KL  | 143       |
| Running Reverse KL  | 792       |
| Running Update Time | 19        |
-----------------------------------
--2024-08-10 21:25:47.576059 UTC---
| Itration            | 20        |
| Real Det Return     | 785       |
| Real Sto Return     | -161      |
| Reward Loss         | -1.38e+06 |
| Running Env Steps   | 100000    |
| Running Forward KL  | 143       |
| Running Reverse KL  | 1.63e+03  |
| Running Update Time | 20        |
-----------------------------------
--2024-08-10 21:27:15.434730 UTC---
| Itration            | 21        |
| Real Det Return     | 691       |
| Real Sto Return     | -163      |
| Reward Loss         | -1.34e+06 |
| Running Env Steps   | 105000    |
| Running Forward KL  | 142       |
| Running Reverse KL  | 462       |
| Running Update Time | 21        |
-----------------------------------
--2024-08-10 21:28:43.805849 UTC---
| Itration            | 22        |
| Real Det Return     | 708       |
| Real Sto Return     | -182      |
| Reward Loss         | -1.64e+06 |
| Running Env Steps   | 110000    |
| Running Forward KL  | 142       |
| Running Reverse KL  | 1.04e+03  |
| Running Update Time | 22        |
-----------------------------------
--2024-08-10 21:30:12.611238 UTC---
| Itration            | 23        |
| Real Det Return     | 744       |
| Real Sto Return     | -172      |
| Reward Loss         | -1.62e+06 |
| Running Env Steps   | 115000    |
| Running Forward KL  | 137       |
| Running Reverse KL  | 545       |
| Running Update Time | 23        |
-----------------------------------
--2024-08-10 21:31:41.385817 UTC---
| Itration            | 24        |
| Real Det Return     | 692       |
| Real Sto Return     | -229      |
| Reward Loss         | -1.48e+06 |
| Running Env Steps   | 120000    |
| Running Forward KL  | 142       |
| Running Reverse KL  | 1.36e+03  |
| Running Update Time | 24        |
-----------------------------------
--2024-08-10 21:33:11.623636 UTC---
| Itration            | 25        |
| Real Det Return     | 707       |
| Real Sto Return     | -252      |
| Reward Loss         | -1.69e+06 |
| Running Env Steps   | 125000    |
| Running Forward KL  | 139       |
| Running Reverse KL  | 1.33e+03  |
| Running Update Time | 25        |
-----------------------------------
--2024-08-10 21:34:40.635157 UTC--
| Itration            | 26       |
| Real Det Return     | 751      |
| Real Sto Return     | -190     |
| Reward Loss         | -1.9e+06 |
| Running Env Steps   | 130000   |
| Running Forward KL  | 141      |
| Running Reverse KL  | 1.09e+03 |
| Running Update Time | 26       |
----------------------------------
--2024-08-10 21:36:09.912074 UTC---
| Itration            | 27        |
| Real Det Return     | 776       |
| Real Sto Return     | -195      |
| Reward Loss         | -1.97e+06 |
| Running Env Steps   | 135000    |
| Running Forward KL  | 141       |
| Running Reverse KL  | 1.02e+03  |
| Running Update Time | 27        |
-----------------------------------
--2024-08-10 21:37:40.060489 UTC---
| Itration            | 28        |
| Real Det Return     | 681       |
| Real Sto Return     | -221      |
| Reward Loss         | -1.91e+06 |
| Running Env Steps   | 140000    |
| Running Forward KL  | 140       |
| Running Reverse KL  | 843       |
| Running Update Time | 28        |
-----------------------------------
--2024-08-10 21:39:10.752415 UTC---
| Itration            | 29        |
| Real Det Return     | 742       |
| Real Sto Return     | -240      |
| Reward Loss         | -1.97e+06 |
| Running Env Steps   | 145000    |
| Running Forward KL  | 142       |
| Running Reverse KL  | 908       |
| Running Update Time | 29        |
-----------------------------------
--2024-08-10 21:40:40.150159 UTC---
| Itration            | 30        |
| Real Det Return     | 800       |
| Real Sto Return     | -206      |
| Reward Loss         | -2.06e+06 |
| Running Env Steps   | 150000    |
| Running Forward KL  | 140       |
| Running Reverse KL  | 1.09e+03  |
| Running Update Time | 30        |
-----------------------------------
--2024-08-10 21:42:09.485138 UTC---
| Itration            | 31        |
| Real Det Return     | 819       |
| Real Sto Return     | -196      |
| Reward Loss         | -2.37e+06 |
| Running Env Steps   | 155000    |
| Running Forward KL  | 140       |
| Running Reverse KL  | 742       |
| Running Update Time | 31        |
-----------------------------------
--2024-08-10 21:43:39.776897 UTC--
| Itration            | 32       |
| Real Det Return     | 679      |
| Real Sto Return     | -231     |
| Reward Loss         | -2.3e+06 |
| Running Env Steps   | 160000   |
| Running Forward KL  | 143      |
| Running Reverse KL  | 1.38e+03 |
| Running Update Time | 32       |
----------------------------------
--2024-08-10 21:45:09.924488 UTC---
| Itration            | 33        |
| Real Det Return     | 707       |
| Real Sto Return     | -182      |
| Reward Loss         | -2.24e+06 |
| Running Env Steps   | 165000    |
| Running Forward KL  | 139       |
| Running Reverse KL  | 882       |
| Running Update Time | 33        |
-----------------------------------
--2024-08-10 21:46:38.398896 UTC---
| Itration            | 34        |
| Real Det Return     | 829       |
| Real Sto Return     | -191      |
| Reward Loss         | -2.54e+06 |
| Running Env Steps   | 170000    |
| Running Forward KL  | 142       |
| Running Reverse KL  | 1.7e+03   |
| Running Update Time | 34        |
-----------------------------------
--2024-08-10 21:48:10.949781 UTC---
| Itration            | 35        |
| Real Det Return     | 764       |
| Real Sto Return     | -240      |
| Reward Loss         | -2.55e+06 |
| Running Env Steps   | 175000    |
| Running Forward KL  | 142       |
| Running Reverse KL  | 634       |
| Running Update Time | 35        |
-----------------------------------
--2024-08-10 21:49:42.089315 UTC---
| Itration            | 36        |
| Real Det Return     | 786       |
| Real Sto Return     | -187      |
| Reward Loss         | -2.75e+06 |
| Running Env Steps   | 180000    |
| Running Forward KL  | 140       |
| Running Reverse KL  | 1.17e+03  |
| Running Update Time | 36        |
-----------------------------------
--2024-08-10 21:51:15.235197 UTC--
| Itration            | 37       |
| Real Det Return     | 791      |
| Real Sto Return     | -253     |
| Reward Loss         | -2.6e+06 |
| Running Env Steps   | 185000   |
| Running Forward KL  | 142      |
| Running Reverse KL  | 594      |
| Running Update Time | 37       |
----------------------------------
--2024-08-10 21:52:46.920929 UTC---
| Itration            | 38        |
| Real Det Return     | 758       |
| Real Sto Return     | -204      |
| Reward Loss         | -2.57e+06 |
| Running Env Steps   | 190000    |
| Running Forward KL  | 140       |
| Running Reverse KL  | 845       |
| Running Update Time | 38        |
-----------------------------------
--2024-08-10 21:54:18.689498 UTC---
| Itration            | 39        |
| Real Det Return     | 705       |
| Real Sto Return     | -175      |
| Reward Loss         | -3.06e+06 |
| Running Env Steps   | 195000    |
| Running Forward KL  | 143       |
| Running Reverse KL  | 610       |
| Running Update Time | 39        |
-----------------------------------
--2024-08-10 21:55:51.304309 UTC---
| Itration            | 40        |
| Real Det Return     | 719       |
| Real Sto Return     | -242      |
| Reward Loss         | -2.78e+06 |
| Running Env Steps   | 200000    |
| Running Forward KL  | 137       |
| Running Reverse KL  | 715       |
| Running Update Time | 40        |
-----------------------------------
--2024-08-10 21:57:24.138551 UTC---
| Itration            | 41        |
| Real Det Return     | 753       |
| Real Sto Return     | -184      |
| Reward Loss         | -2.86e+06 |
| Running Env Steps   | 205000    |
| Running Forward KL  | 140       |
| Running Reverse KL  | 795       |
| Running Update Time | 41        |
-----------------------------------
--2024-08-10 21:58:58.071084 UTC---
| Itration            | 42        |
| Real Det Return     | 774       |
| Real Sto Return     | -246      |
| Reward Loss         | -3.04e+06 |
| Running Env Steps   | 210000    |
| Running Forward KL  | 137       |
| Running Reverse KL  | 402       |
| Running Update Time | 42        |
-----------------------------------
--2024-08-10 22:00:32.039683 UTC---
| Itration            | 43        |
| Real Det Return     | 763       |
| Real Sto Return     | -251      |
| Reward Loss         | -2.92e+06 |
| Running Env Steps   | 215000    |
| Running Forward KL  | 140       |
| Running Reverse KL  | 796       |
| Running Update Time | 43        |
-----------------------------------
--2024-08-10 22:02:05.682506 UTC---
| Itration            | 44        |
| Real Det Return     | 756       |
| Real Sto Return     | -217      |
| Reward Loss         | -3.22e+06 |
| Running Env Steps   | 220000    |
| Running Forward KL  | 137       |
| Running Reverse KL  | 523       |
| Running Update Time | 44        |
-----------------------------------
--2024-08-10 22:03:39.785259 UTC---
| Itration            | 45        |
| Real Det Return     | 687       |
| Real Sto Return     | -202      |
| Reward Loss         | -3.46e+06 |
| Running Env Steps   | 225000    |
| Running Forward KL  | 139       |
| Running Reverse KL  | 416       |
| Running Update Time | 45        |
-----------------------------------
--2024-08-10 22:05:12.362920 UTC---
| Itration            | 46        |
| Real Det Return     | 800       |
| Real Sto Return     | -184      |
| Reward Loss         | -3.41e+06 |
| Running Env Steps   | 230000    |
| Running Forward KL  | 142       |
| Running Reverse KL  | 907       |
| Running Update Time | 46        |
-----------------------------------
--2024-08-10 22:06:45.343547 UTC---
| Itration            | 47        |
| Real Det Return     | 840       |
| Real Sto Return     | -224      |
| Reward Loss         | -3.45e+06 |
| Running Env Steps   | 235000    |
| Running Forward KL  | 138       |
| Running Reverse KL  | 948       |
| Running Update Time | 47        |
-----------------------------------
--2024-08-10 22:08:20.900942 UTC---
| Itration            | 48        |
| Real Det Return     | 849       |
| Real Sto Return     | -278      |
| Reward Loss         | -3.57e+06 |
| Running Env Steps   | 240000    |
| Running Forward KL  | 140       |
| Running Reverse KL  | 282       |
| Running Update Time | 48        |
-----------------------------------
--2024-08-10 22:09:56.054746 UTC---
| Itration            | 49        |
| Real Det Return     | 764       |
| Real Sto Return     | -220      |
| Reward Loss         | -3.61e+06 |
| Running Env Steps   | 245000    |
| Running Forward KL  | 138       |
| Running Reverse KL  | 230       |
| Running Update Time | 49        |
-----------------------------------
--2024-08-10 22:11:30.620307 UTC---
| Itration            | 50        |
| Real Det Return     | 703       |
| Real Sto Return     | -206      |
| Reward Loss         | -3.71e+06 |
| Running Env Steps   | 250000    |
| Running Forward KL  | 141       |
| Running Reverse KL  | 440       |
| Running Update Time | 50        |
-----------------------------------
--2024-08-10 22:13:05.824506 UTC---
| Itration            | 51        |
| Real Det Return     | 762       |
| Real Sto Return     | -219      |
| Reward Loss         | -3.66e+06 |
| Running Env Steps   | 255000    |
| Running Forward KL  | 139       |
| Running Reverse KL  | 611       |
| Running Update Time | 51        |
-----------------------------------
--2024-08-10 22:14:40.707606 UTC---
| Itration            | 52        |
| Real Det Return     | 747       |
| Real Sto Return     | -182      |
| Reward Loss         | -4.04e+06 |
| Running Env Steps   | 260000    |
| Running Forward KL  | 140       |
| Running Reverse KL  | 324       |
| Running Update Time | 52        |
-----------------------------------
--2024-08-10 22:16:15.817137 UTC---
| Itration            | 53        |
| Real Det Return     | 779       |
| Real Sto Return     | -213      |
| Reward Loss         | -4.17e+06 |
| Running Env Steps   | 265000    |
| Running Forward KL  | 139       |
| Running Reverse KL  | 658       |
| Running Update Time | 53        |
-----------------------------------
--2024-08-10 22:17:52.578863 UTC---
| Itration            | 54        |
| Real Det Return     | 789       |
| Real Sto Return     | -245      |
| Reward Loss         | -4.14e+06 |
| Running Env Steps   | 270000    |
| Running Forward KL  | 136       |
| Running Reverse KL  | 473       |
| Running Update Time | 54        |
-----------------------------------
--2024-08-10 22:19:27.356278 UTC---
| Itration            | 55        |
| Real Det Return     | 815       |
| Real Sto Return     | -186      |
| Reward Loss         | -4.13e+06 |
| Running Env Steps   | 275000    |
| Running Forward KL  | 137       |
| Running Reverse KL  | 507       |
| Running Update Time | 55        |
-----------------------------------
--2024-08-10 22:21:04.970438 UTC---
| Itration            | 56        |
| Real Det Return     | 814       |
| Real Sto Return     | -242      |
| Reward Loss         | -4.25e+06 |
| Running Env Steps   | 280000    |
| Running Forward KL  | 137       |
| Running Reverse KL  | 270       |
| Running Update Time | 56        |
-----------------------------------
--2024-08-10 22:22:40.699530 UTC---
| Itration            | 57        |
| Real Det Return     | 738       |
| Real Sto Return     | -229      |
| Reward Loss         | -4.25e+06 |
| Running Env Steps   | 285000    |
| Running Forward KL  | 136       |
| Running Reverse KL  | 325       |
| Running Update Time | 57        |
-----------------------------------
--2024-08-10 22:24:17.256803 UTC---
| Itration            | 58        |
| Real Det Return     | 759       |
| Real Sto Return     | -256      |
| Reward Loss         | -4.51e+06 |
| Running Env Steps   | 290000    |
| Running Forward KL  | 142       |
| Running Reverse KL  | 509       |
| Running Update Time | 58        |
-----------------------------------
--2024-08-10 22:25:56.190683 UTC---
| Itration            | 59        |
| Real Det Return     | 652       |
| Real Sto Return     | -288      |
| Reward Loss         | -4.53e+06 |
| Running Env Steps   | 295000    |
| Running Forward KL  | 139       |
| Running Reverse KL  | 390       |
| Running Update Time | 59        |
-----------------------------------
--2024-08-10 22:27:34.702844 UTC---
| Itration            | 60        |
| Real Det Return     | 715       |
| Real Sto Return     | -248      |
| Reward Loss         | -4.68e+06 |
| Running Env Steps   | 300000    |
| Running Forward KL  | 140       |
| Running Reverse KL  | 166       |
| Running Update Time | 60        |
-----------------------------------
--2024-08-10 22:29:12.442558 UTC---
| Itration            | 61        |
| Real Det Return     | 733       |
| Real Sto Return     | -232      |
| Reward Loss         | -4.78e+06 |
| Running Env Steps   | 305000    |
| Running Forward KL  | 141       |
| Running Reverse KL  | 547       |
| Running Update Time | 61        |
-----------------------------------
--2024-08-10 22:30:50.109253 UTC--
| Itration            | 62       |
| Real Det Return     | 675      |
| Real Sto Return     | -260     |
| Reward Loss         | -5e+06   |
| Running Env Steps   | 310000   |
| Running Forward KL  | 142      |
| Running Reverse KL  | 562      |
| Running Update Time | 62       |
----------------------------------
--2024-08-10 22:32:27.500936 UTC---
| Itration            | 63        |
| Real Det Return     | 789       |
| Real Sto Return     | -255      |
| Reward Loss         | -4.95e+06 |
| Running Env Steps   | 315000    |
| Running Forward KL  | 138       |
| Running Reverse KL  | 408       |
| Running Update Time | 63        |
-----------------------------------
--2024-08-10 22:34:04.098683 UTC---
| Itration            | 64        |
| Real Det Return     | 639       |
| Real Sto Return     | -249      |
| Reward Loss         | -4.91e+06 |
| Running Env Steps   | 320000    |
| Running Forward KL  | 139       |
| Running Reverse KL  | 929       |
| Running Update Time | 64        |
-----------------------------------
--2024-08-10 22:35:41.742078 UTC---
| Itration            | 65        |
| Real Det Return     | 830       |
| Real Sto Return     | -235      |
| Reward Loss         | -4.89e+06 |
| Running Env Steps   | 325000    |
| Running Forward KL  | 135       |
| Running Reverse KL  | 299       |
| Running Update Time | 65        |
-----------------------------------
--2024-08-10 22:37:18.692331 UTC---
| Itration            | 66        |
| Real Det Return     | 783       |
| Real Sto Return     | -213      |
| Reward Loss         | -5.15e+06 |
| Running Env Steps   | 330000    |
| Running Forward KL  | 138       |
| Running Reverse KL  | 345       |
| Running Update Time | 66        |
-----------------------------------
--2024-08-10 22:38:55.771576 UTC---
| Itration            | 67        |
| Real Det Return     | 829       |
| Real Sto Return     | -200      |
| Reward Loss         | -5.18e+06 |
| Running Env Steps   | 335000    |
| Running Forward KL  | 135       |
| Running Reverse KL  | 642       |
| Running Update Time | 67        |
-----------------------------------
--2024-08-10 22:40:33.921216 UTC---
| Itration            | 68        |
| Real Det Return     | 678       |
| Real Sto Return     | -283      |
| Reward Loss         | -5.46e+06 |
| Running Env Steps   | 340000    |
| Running Forward KL  | 139       |
| Running Reverse KL  | 702       |
| Running Update Time | 68        |
-----------------------------------
--2024-08-10 22:42:12.279179 UTC---
| Itration            | 69        |
| Real Det Return     | 763       |
| Real Sto Return     | -247      |
| Reward Loss         | -5.37e+06 |
| Running Env Steps   | 345000    |
| Running Forward KL  | 141       |
| Running Reverse KL  | 510       |
| Running Update Time | 69        |
-----------------------------------
--2024-08-10 22:43:51.264395 UTC--
| Itration            | 70       |
| Real Det Return     | 807      |
| Real Sto Return     | -227     |
| Reward Loss         | -5.6e+06 |
| Running Env Steps   | 350000   |
| Running Forward KL  | 137      |
| Running Reverse KL  | 219      |
| Running Update Time | 70       |
----------------------------------
--2024-08-10 22:45:29.258106 UTC---
| Itration            | 71        |
| Real Det Return     | 713       |
| Real Sto Return     | -233      |
| Reward Loss         | -5.83e+06 |
| Running Env Steps   | 355000    |
| Running Forward KL  | 138       |
| Running Reverse KL  | 548       |
| Running Update Time | 71        |
-----------------------------------
--2024-08-10 22:47:07.370291 UTC---
| Itration            | 72        |
| Real Det Return     | 684       |
| Real Sto Return     | -219      |
| Reward Loss         | -5.74e+06 |
| Running Env Steps   | 360000    |
| Running Forward KL  | 140       |
| Running Reverse KL  | 569       |
| Running Update Time | 72        |
-----------------------------------
--2024-08-10 22:48:45.338727 UTC---
| Itration            | 73        |
| Real Det Return     | 620       |
| Real Sto Return     | -211      |
| Reward Loss         | -5.98e+06 |
| Running Env Steps   | 365000    |
| Running Forward KL  | 141       |
| Running Reverse KL  | 149       |
| Running Update Time | 73        |
-----------------------------------
--2024-08-10 22:50:23.127148 UTC---
| Itration            | 74        |
| Real Det Return     | 789       |
| Real Sto Return     | -237      |
| Reward Loss         | -5.77e+06 |
| Running Env Steps   | 370000    |
| Running Forward KL  | 140       |
| Running Reverse KL  | 1.05e+03  |
| Running Update Time | 74        |
-----------------------------------
--2024-08-10 22:52:01.616824 UTC---
| Itration            | 75        |
| Real Det Return     | 798       |
| Real Sto Return     | -217      |
| Reward Loss         | -6.11e+06 |
| Running Env Steps   | 375000    |
| Running Forward KL  | 137       |
| Running Reverse KL  | 537       |
| Running Update Time | 75        |
-----------------------------------
--2024-08-10 22:53:41.434254 UTC--
| Itration            | 76       |
| Real Det Return     | 770      |
| Real Sto Return     | -251     |
| Reward Loss         | -6.1e+06 |
| Running Env Steps   | 380000   |
| Running Forward KL  | 139      |
| Running Reverse KL  | 411      |
| Running Update Time | 76       |
----------------------------------
--2024-08-10 22:55:20.432773 UTC--
| Itration            | 77       |
| Real Det Return     | 632      |
| Real Sto Return     | -229     |
| Reward Loss         | -6.3e+06 |
| Running Env Steps   | 385000   |
| Running Forward KL  | 137      |
| Running Reverse KL  | 46.4     |
| Running Update Time | 77       |
----------------------------------
--2024-08-10 22:56:58.401256 UTC--
| Itration            | 78       |
| Real Det Return     | 760      |
| Real Sto Return     | -185     |
| Reward Loss         | -6.4e+06 |
| Running Env Steps   | 390000   |
| Running Forward KL  | 138      |
| Running Reverse KL  | 534      |
| Running Update Time | 78       |
----------------------------------
--2024-08-10 22:58:37.751416 UTC---
| Itration            | 79        |
| Real Det Return     | 739       |
| Real Sto Return     | -214      |
| Reward Loss         | -6.57e+06 |
| Running Env Steps   | 395000    |
| Running Forward KL  | 137       |
| Running Reverse KL  | 74.1      |
| Running Update Time | 79        |
-----------------------------------
--2024-08-10 23:00:16.511595 UTC---
| Itration            | 80        |
| Real Det Return     | 741       |
| Real Sto Return     | -224      |
| Reward Loss         | -6.72e+06 |
| Running Env Steps   | 400000    |
| Running Forward KL  | 141       |
| Running Reverse KL  | 928       |
| Running Update Time | 80        |
-----------------------------------
--2024-08-10 23:01:57.846310 UTC---
| Itration            | 81        |
| Real Det Return     | 801       |
| Real Sto Return     | -248      |
| Reward Loss         | -6.79e+06 |
| Running Env Steps   | 405000    |
| Running Forward KL  | 140       |
| Running Reverse KL  | 124       |
| Running Update Time | 81        |
-----------------------------------
