Time	Iteration	AverageReward	StdRewards	MaxRewardRollout	MinRewardRollout	timesteps	gradnorms	maxnorms
16.09101676940918	0	-0.813261981599876	19.297677396125206	29.944989854469895	-27.727565122768283	0	0.0	0
76.68282675743103	10	65.66792062294688	6.691674898606225	83.14631197290873	49.77648113817486	1000000	0.9090909090909091	0.25
133.33283495903015	20	175.4110268306506	4.561251239172583	183.90359177239952	166.19332991763076	2000000	0.9523809523809523	0.25
191.92092633247375	30	291.28084016637905	3.4873110626029056	297.95351122156717	282.9826069052797	3000000	0.967741935483871	0.25
247.84765148162842	40	339.8586325972668	2.1573629748538825	344.3563012572122	334.6689953475725	4000000	0.975609756097561	0.25
306.8979423046112	50	352.06419118906706	2.7555489687996215	357.97019403713057	345.6136634289287	5000000	0.9803921568627451	0.25
360.7993788719177	60	353.3367589565178	2.1849812247453113	358.1951971676899	347.77491465373896	6000000	0.9836065573770492	0.25
420.0377731323242	70	355.493471528966	2.324111735234033	359.99297741389455	347.78265924728476	7000000	0.9859154929577465	0.25
474.4819903373718	80	356.55107760494474	2.4747711554193774	361.03316690219344	348.7107915900415	8000000	0.9876543209876543	0.25
530.8775253295898	90	355.4477409942612	2.240040229039984	359.92208369105356	349.44229362136684	9000000	0.989010989010989	0.25
590.8475670814514	100	355.74159312802607	2.7276357507535764	361.30978295207024	348.252729311047	10000000	0.9900990099009901	0.25
