Time	Iteration	AverageReward	StdRewards	MaxRewardRollout	MinRewardRollout	timesteps	gradnorms	maxnorms
14.64535665512085	0	-1.4489036436221607	18.17712229330612	27.28727449197322	-27.71342122834176	0	0.0	0
70.48167562484741	10	69.01593953285746	12.60474788639019	94.31486495313584	40.51907332323026	1000000	0.9090909090909091	0.25
126.61514043807983	20	206.25477008935792	3.982021639786878	214.04916452476755	194.4438265011413	2000000	0.9523809523809523	0.25
177.16090488433838	30	313.30375579559114	2.707820236267943	319.6398046298418	305.79509550120565	3000000	0.967741935483871	0.25
229.72395300865173	40	346.816726210181	2.352947368528754	351.2716394169838	341.36129233572865	4000000	0.975609756097561	0.25
283.11728715896606	50	351.96631873573375	2.4950119468669385	357.9584597493522	346.44350912130903	5000000	0.9803921568627451	0.25
339.10705971717834	60	354.17493279034636	2.372000942093457	358.65854661294725	348.1064298292622	6000000	0.9836065573770492	0.25
391.73437905311584	70	353.4624425911737	1.9183206615500779	358.17294066003524	348.75169626652496	7000000	0.9859154929577465	0.25
445.78993916511536	80	353.83932634387634	2.2536825024240503	358.48327384286677	348.7623431123793	8000000	0.9876543209876543	0.25
504.78996443748474	90	354.3345780159689	2.5116835617907975	359.00019623200933	345.76930460886797	9000000	0.989010989010989	0.25
561.7788343429565	100	353.9293857475793	2.108671608701867	358.75613883160986	346.5648549378384	10000000	0.9900990099009901	0.25
