Time	Iteration	AverageReward	StdRewards	MaxRewardRollout	MinRewardRollout	timesteps	gradnorms	maxnorms
36.93788814544678	0	-0.3023757225339068	20.43353154720061	29.695772328414023	-28.46130512468517	0	0.0	0
657.7353870868683	800	352.6438364505196	1.4411889307683083	355.7925628396915	349.43146664553205	1600000	1.8563111249253923	0.5
1276.3118333816528	1600	351.1772695533619	3.672059907937177	358.03854354610667	342.7268456162419	3200000	1.8571040168901516	0.5
1895.8669157028198	2400	350.9189283350052	3.3724047704982447	356.93017601655447	341.1685632125591	4800000	1.856802835108109	0.5
2514.4355132579803	3200	353.8103990971416	2.4477580275151083	361.4824507670601	348.8270832215203	6400000	1.8573941506976792	0.5
3136.035579442978	4000	351.82681917639854	5.5644101836750295	361.662915003486	332.7913883002475	8000000	1.859359805565966	0.5
3756.829734802246	4800	355.51172368560145	2.1515692601505494	359.2450665843935	348.84697520779446	9600000	1.85772531919381	0.5
3922.6096534729004	5000	355.3005352800457	1.7771407514261104	358.4056938881986	350.016039806389	10000000	1.8591976632268479	0.5
