Time	Iteration	AverageReward	StdRewards	MaxRewardRollout	MinRewardRollout	timesteps	gradnorms	maxnorms
14.910943031311035	0	-1.4489036436221607	18.17712229330612	27.28727449197322	-27.71342122834176	0	0.0	0
72.65046000480652	10	123.05878977613821	5.128334624125557	133.17923238524236	110.81765821197769	1000000	0.9090909090909091	0.25
122.67949438095093	20	236.43075985187144	2.796983238684019	243.35935327026527	228.69617151207058	2000000	0.9523809523809523	0.25
176.25395131111145	30	326.5743425100217	2.4652227462061007	332.25135773129296	320.5254148046952	3000000	0.967741935483871	0.25
233.53301858901978	40	348.26451828486375	2.365478088741287	352.949277879874	342.7015419860836	4000000	0.975609756097561	0.25
288.1283779144287	50	353.59067921247697	2.4461242920129367	358.483806514545	347.77396126044914	5000000	0.9803921568627451	0.25
346.69389176368713	60	354.8351338863856	2.351509872490415	359.8949585244991	347.3526805911388	6000000	0.9836065573770492	0.25
402.116818189621	70	354.23571581641505	2.4997764069381767	360.1970862200251	347.57201298349537	7000000	0.9859154929577465	0.25
456.7372052669525	80	353.79590764190885	2.2061372558084495	357.7890797484433	346.6099925135495	8000000	0.9876543209876543	0.25
514.6878473758698	90	354.7830928464298	2.343740975728898	359.3410147822797	349.2069659666595	9000000	0.989010989010989	0.25
568.3056929111481	100	353.98371128865654	2.1291529847077584	358.87550392902267	347.231482720701	10000000	0.9900990099009901	0.25
