Time	Iteration	AverageReward	StdRewards	MaxRewardRollout	MinRewardRollout	timesteps	gradnorms	maxnorms
37.705026388168335	0	0.10479357355305183	19.050881585284408	28.75317772757262	-27.52097297552973	0	0.0	0
662.4616124629974	800	342.56006262426934	1.0881140280324906	344.9531306666904	340.1395373871492	1600000	7.924863562871544	8.260889465720847
1284.9062676429749	1600	351.65593029140024	4.51219298260501	357.5896330225514	330.2727344176674	3200000	7.869003088393462	8.37528894303408
1907.457118988037	2400	342.27000210333523	1.6193023746588726	347.495070942794	339.2614562191593	4800000	7.897083953141737	8.37528894303408
2529.4618151187897	3200	341.8000487095199	1.461427953214803	345.9999601787131	337.12940391135635	6400000	7.833177100323225	7.638693100622875
3152.1538424491882	4000	342.7840705426879	42.94624500915777	357.8747548247338	37.677050588536076	8000000	7.934125038110108	7.787934785185086
3775.1503763198853	4800	342.6265384296419	1.9585519296720337	346.70250639227015	337.03786762827076	9600000	7.861790833369963	8.154878941402751
3940.954236268997	5000	341.61724278005903	1.5741634073298156	344.54852077178657	337.70588084543124	10000000	7.821659637154364	8.177973026888267
