Time	Iteration	AverageReward	StdRewards	MaxRewardRollout	MinRewardRollout	timesteps	gradnorms	maxnorms
15.800830125808716	0	-0.813261981599876	19.297677396125206	29.944989854469895	-27.727565122768283	0	0.0	0
86.08830142021179	10	27.65587338471364	13.065283238888542	48.777241428382695	0.1964836017905327	1000000	0.9090909090909091	0.25
157.38528895378113	20	26.121499171105114	15.056758088264875	48.20443117758259	-2.879611598993506	2000000	0.9523809523809523	0.25
226.62666034698486	30	22.332060494431445	17.86322042869826	48.1931734951213	-17.518442684062393	3000000	0.967741935483871	0.25
296.7919719219208	40	23.802346494171307	14.827829263984121	46.5595860555768	-2.0011121940915473	4000000	0.975609756097561	0.25
366.89525294303894	50	22.327786807061496	16.21794441858488	47.839391256682575	-6.908348544544424	5000000	0.9803921568627451	0.25
435.88890743255615	60	37.96596127346366	9.025237577835025	52.61437811358337	10.808509279841019	6000000	0.9836065573770492	0.25
505.45019364356995	70	51.08209494401737	14.967774721374449	66.67862094608427	25.363569874672976	7000000	0.9859154929577465	0.25
575.8370051383972	80	50.55731717944272	17.207899516278335	71.61593926543719	24.22569443415705	8000000	0.9876543209876543	0.25
644.7993137836456	90	54.633467412450756	15.89268233595198	71.13856080034748	27.96281374791579	9000000	0.989010989010989	0.25
714.4755954742432	100	54.387268782668045	18.48174131614085	74.49745173432166	27.144196414665203	10000000	0.9900990099009901	0.25
