Time	Iteration	AverageReward	StdRewards	MaxRewardRollout	MinRewardRollout	timesteps	gradnorms	maxnorms
37.28464102745056	0	0.10479357355305183	19.050881585284408	28.75317772757262	-27.52097297552973	0	0.0	0
662.0359437465668	800	351.6067670889708	1.2090586739265827	354.10772171104327	349.4610067207832	1600000	0.9987515605493134	0.25
1284.327487230301	1600	353.37806735338154	1.6221376364736562	356.2859251003392	346.8748064482934	3200000	0.999000999000999	0.25
1908.0763363838196	2400	354.6264623595726	1.4605209030807231	357.14173317470704	350.5463291464257	4800000	0.999000999000999	0.25
2529.6578176021576	3200	354.89048025908386	2.0850632880279383	359.38288602244575	343.9349208722124	6400000	0.999000999000999	0.25
3153.0376949310303	4000	357.6679746887837	1.9837715749222453	360.66285866557155	351.4582334297884	8000000	0.999000999000999	0.25
3774.191458463669	4800	354.1872189968864	1.2281247498679015	356.8184579675726	351.23395518556936	9600000	0.999000999000999	0.25
3939.254330396652	5000	358.487865137645	2.0100465280799975	361.14687380016403	350.214000556618	10000000	0.999000999000999	0.25
