Time	Iteration	AverageReward	StdRewards	MaxRewardRollout	MinRewardRollout	timesteps	gradnorms	maxnorms
8.648420810699463	0	0.10479357355305183	19.050881585284408	28.75317772757262	-27.52097297552973	0	0.0	0
67.14652013778687	10	64.5931682540813	10.967058414010278	89.64380553818773	40.758660032006446	1000000	0.9090909090909091	0.25
121.17968845367432	20	200.07860624287423	4.317724438850601	208.0270108913537	186.98115552859963	2000000	0.9523809523809523	0.25
178.7774875164032	30	314.6891978615044	2.841273120214472	319.89131219661795	308.4788959020516	3000000	0.967741935483871	0.25
234.68499970436096	40	345.6516607071782	2.0245953988514436	350.7054160712287	340.6316803343361	4000000	0.975609756097561	0.25
293.2829430103302	50	353.4345295655125	2.380014878203368	358.052247675485	346.6391724962741	5000000	0.9803921568627451	0.25
350.7696440219879	60	354.08322874265565	3.0113736928426706	359.29391794349067	345.82856537995394	6000000	0.9836065573770492	0.25
408.78290581703186	70	355.7244841794363	2.8114282524558547	361.5077614861075	349.04065463133156	7000000	0.9859154929577465	0.25
464.7855041027069	80	354.38370029012515	2.5449049985273615	359.05359453089477	347.5323876195762	8000000	0.9876543209876543	0.25
521.5575668811798	90	355.8154042742327	2.242252188684986	360.2945503515366	348.78006663911947	9000000	0.989010989010989	0.25
580.2615706920624	100	355.9567043347285	2.674164349932506	360.9242240471067	348.567303067146	10000000	0.9900990099009901	0.25
