Time	Iteration	AverageReward	StdRewards	MaxRewardRollout	MinRewardRollout	timesteps	gradnorms	maxnorms
38.41207957267761	0	-1.4489036436221607	18.17712229330612	27.28727449197322	-27.71342122834176	0	0.0	0
658.8410382270813	800	352.72014172914163	1.9022548467636258	356.37196719700296	347.4248797299806	1600000	0.9987515605493134	0.25
1275.9959168434143	1600	352.52399320803323	2.4505914529921107	356.8802410986391	344.32618996715473	3200000	0.999000999000999	0.25
1892.4606747627258	2400	356.85093469283544	1.920660897590956	360.5904980318446	352.0451242935378	4800000	0.999000999000999	0.25
2507.743226289749	3200	357.32773627118513	1.9986200931570697	360.5781715810881	347.2915437493939	6400000	0.999000999000999	0.25
3123.6978163719177	4000	357.36240675014363	1.4340231134616332	360.36195564949776	353.50728368846467	8000000	0.999000999000999	0.25
3738.708917617798	4800	359.4023386796478	1.3068723714678356	361.64117964555044	356.9879038219806	9600000	0.999000999000999	0.25
3904.9712913036346	5000	356.06411552404205	1.2219533412269705	358.1832397840917	353.9489219210227	10000000	0.999000999000999	0.25
