Time	Iteration	AverageReward	StdRewards	MaxRewardRollout	MinRewardRollout	timesteps	gradnorms	maxnorms
16.29118299484253	0	-0.3023757225339068	20.43353154720061	29.695772328414023	-28.46130512468517	0	0.0	0
88.24430823326111	10	158.60262172333518	3.806085647601576	165.07276586879743	149.3188817386981	1000000	0.9090909090909091	0.25
156.59124422073364	20	230.20146556087664	3.179990188959927	236.13949512373074	222.88989665964618	2000000	0.9523809523809523	0.25
225.2088930606842	30	282.9454953126162	2.8697362375225914	288.64009041065583	275.84549814589263	3000000	0.967741935483871	0.25
293.7207567691803	40	318.94970725429425	3.0955060042323645	325.2100664540776	311.384637803596	4000000	0.975609756097561	0.25
361.9173491001129	50	337.0961348976442	2.6315398216256463	344.40186855744105	331.6327412420651	5000000	0.9803921568627451	0.25
430.2803523540497	60	347.1183831990735	2.495281621355745	353.12368559348397	340.77692246466177	6000000	0.9836065573770492	0.25
497.415554523468	70	352.1633595225129	2.7710274266772497	356.86678081870195	343.8118753074232	7000000	0.9859154929577465	0.25
566.832704782486	80	353.6983714625843	1.9954607693100956	357.9933174746111	348.7735354367178	8000000	0.9876543209876543	0.25
632.7028684616089	90	353.28812568736146	1.8891144206063726	358.938791793189	349.42536983620084	9000000	0.989010989010989	0.25
699.3658838272095	100	355.9651613522732	2.2063609313819725	359.973568642803	350.27593198791146	10000000	0.9900990099009901	0.25
