Time	Iteration	AverageReward	StdRewards	MaxRewardRollout	MinRewardRollout	timesteps	gradnorms	maxnorms
16.00397777557373	0	-0.3023757225339068	20.43353154720061	29.695772328414023	-28.46130512468517	0	0.0	0
73.10229301452637	10	114.56904789808046	4.460975710038755	122.86792038450949	102.56336882673531	1000000	0.9090909090909091	0.25
129.60883140563965	20	234.00672839633498	3.2048886014467133	240.7104010695184	226.99152465260704	2000000	0.9523809523809523	0.25
184.67628860473633	30	325.75530568287763	2.4187134916968502	331.64578011460253	318.8633198989555	3000000	0.967741935483871	0.25
238.73594975471497	40	349.5803399451607	2.58093455592772	355.2368696129415	343.28888922499027	4000000	0.975609756097561	0.25
296.6079170703888	50	353.4139758094327	2.3810627692337154	358.29563048214186	348.0420491398254	5000000	0.9803921568627451	0.25
353.19278597831726	60	354.2576322709937	2.256610352594443	359.65133570029866	349.28018662316026	6000000	0.9836065573770492	0.25
408.96921133995056	70	353.93980997580206	1.9677441648068386	357.80514266907085	348.04463399206725	7000000	0.9859154929577465	0.25
468.6166937351227	80	355.3567853354356	2.6671132325795526	360.0900418072124	345.56012676359387	8000000	0.9876543209876543	0.25
526.4596865177155	90	356.2216464155387	2.6575924315896686	361.36479266980314	349.83978775476135	9000000	0.989010989010989	0.25
584.352264881134	100	355.932156338817	2.63723869661457	360.8616644100148	348.4499301846372	10000000	0.9900990099009901	0.25
