Time	Iteration	AverageReward	StdRewards	MaxRewardRollout	MinRewardRollout	timesteps	gradnorms	maxnorms
14.702413082122803	0	-0.813261981599876	19.297677396125206	29.944989854469895	-27.727565122768283	0	0.0	0
74.76758766174316	10	30.646280044690293	10.746947424790728	48.36267713038251	6.807979497796623	1000000	0.9090909090909091	0.25
135.03845262527466	20	33.81654483520283	8.752492977379616	49.34231580980122	15.477585303284286	2000000	0.9523809523809523	0.25
194.9023880958557	30	35.992312650820885	6.223822830554156	46.25279922876507	22.51976159377955	3000000	0.967741935483871	0.25
253.48615074157715	40	38.00384198049391	6.211730098103456	49.240801722276956	23.346843839564826	4000000	0.975609756097561	0.25
316.9052257537842	50	39.28433258838664	4.769549132560072	47.371947842184454	28.727831420197617	5000000	0.9803921568627451	0.25
373.1349895000458	60	42.69589231972525	15.09038268013829	79.54920439164562	9.965785714746744	6000000	0.9836065573770492	0.25
429.4614248275757	70	98.91625900092244	7.428024416013978	114.14544049775782	79.36621552181896	7000000	0.9859154929577465	0.25
484.63449239730835	80	147.83958973811474	5.88413589608088	158.7531275436013	130.74022515979595	8000000	0.9876543209876543	0.25
540.0233867168427	90	211.3844518981671	4.98636232553291	220.62509991636034	198.28229227665724	9000000	0.989010989010989	0.25
596.7461500167847	100	287.99722650033226	3.488443332479055	294.7569696948631	281.9354249527096	10000000	0.9900990099009901	0.25
