Running Update Time,Itration,Real Det Return,Real Sto Return,Running Reverse KL,Reward Loss,Running Forward KL,Running Env Steps
0,0,123.56,-325.25,12.3404,150.0644073486328,19.6368,0
1,1,74.45,-388.62,11.8837,150.54534912109375,19.3428,5000
2,2,137.91,-369.94,12.3983,171.8399658203125,19.6106,10000
3,3,236.82,-297.12,12.3888,111.47563171386719,19.7123,15000
4,4,-33.4,-352.17,12.8627,94.27352142333984,20.0067,20000
5,5,88.84,-314.8,12.6215,103.63972473144531,19.6319,25000
6,6,188.0,-258.47,12.8506,49.961448669433594,19.8734,30000
7,7,59.25,-282.63,12.9777,46.57402801513672,19.874,35000
8,8,122.25,-287.22,12.9879,28.991832733154297,19.935,40000
9,9,-157.02,-233.44,12.7952,21.59677505493164,19.8877,45000
10,10,284.43,-278.19,12.7646,-24.872272491455078,19.9365,50000
11,11,222.24,-226.69,12.6894,-27.68164825439453,19.8234,55000
12,12,631.96,-236.96,12.7543,-65.00944519042969,19.6316,60000
13,13,182.07,-250.64,12.5003,-76.0894546508789,19.6398,65000
14,14,436.88,-219.44,12.5211,-87.28340148925781,19.6866,70000
15,15,503.85,-204.47,12.3205,-109.43356323242188,19.3764,75000
16,16,537.07,-225.47,12.2629,-140.78646850585938,19.7331,80000
17,17,750.05,-204.12,12.3051,-156.7428436279297,19.6173,85000
18,18,707.77,-185.35,11.9745,-170.00607299804688,19.1749,90000
19,19,716.43,-213.45,12.453,-220.6397705078125,19.557,95000
20,20,844.41,-153.84,12.2491,-207.6145782470703,19.246,100000
21,21,645.78,-164.21,12.233,-232.67352294921875,19.3626,105000
