Time	Iteration	AverageReward	StdRewards	MaxRewardRollout	MinRewardRollout	timesteps	gradnorms	maxnorms
38.121564626693726	0	-0.3023757225339068	20.43353154720061	29.695772328414023	-28.46130512468517	0	0.0	0
666.0777907371521	800	355.83767911396325	2.6480424784840872	360.3186180590419	347.2382999457768	1600000	0.9987515605493134	0.8313389235496507
1290.508939743042	1600	354.260616911896	1.8249826235868558	358.4901904181752	350.16992054691946	3200000	0.999000999000999	0.8175755011293354
1922.2514567375183	2400	355.0306553449767	2.0887114636269177	359.50895933480933	349.02234121772926	4800000	0.999000999000999	0.8337903824682213
2548.3448991775513	3200	355.19041198004675	2.302882870980064	360.014756329736	349.58016361220507	6400000	0.999000999000999	0.8653676224052976
3174.324637889862	4000	355.9947442987327	2.3933509618146958	360.96146925073117	349.7334064368624	8000000	0.999000999000999	0.8212950366383993
3801.9393515586853	4800	355.41368669031965	1.605386357616544	359.798604642594	351.8376666284166	9600000	0.999000999000999	0.8029206065350465
3968.4616301059723	5000	356.72838422909774	2.282335364325667	361.1427277006442	350.19788436515955	10000000	0.999000999000999	0.7981390429669729
