Time	Iteration	AverageReward	StdRewards	MaxRewardRollout	MinRewardRollout	timesteps	gradnorms	maxnorms
15.32203197479248	0	-0.3023757225339068	20.43353154720061	29.695772328414023	-28.46130512468517	0	0.0	0
84.86010575294495	10	162.32054669720256	3.592691985974256	168.65255002574122	155.1114368622366	1000000	0.9090909090909091	0.25
153.69709181785583	20	231.27917470018102	3.8356667922875296	237.637715889432	219.02560529691982	2000000	0.9523809523809523	0.25
220.59092330932617	30	286.29791568496034	3.0764132817907037	292.4125681509613	278.56603101460496	3000000	0.967741935483871	0.25
287.9353573322296	40	316.9141083267517	2.4210637834502373	321.21094116801396	310.36733236094005	4000000	0.975609756097561	0.25
354.6957788467407	50	338.16458156622446	1.956228270242129	342.5049184082309	331.75866872043116	5000000	0.9803921568627451	0.25
422.65435099601746	60	346.47183770702105	2.2526759229052913	351.113891349989	338.9131495889742	6000000	0.9836065573770492	0.25
490.05597162246704	70	351.1124247013049	2.419301001845058	355.6696846004925	344.2142248440068	7000000	0.9859154929577465	0.25
557.1071033477783	80	350.80684198818255	1.4695057102146427	353.85669885156676	347.40250882646615	8000000	0.9876543209876543	0.25
625.3738331794739	90	355.078823717832	1.931901553487751	358.8370199894998	350.0149814116303	9000000	0.989010989010989	0.25
693.6888520717621	100	353.6097772942365	2.0243185594628827	357.7507473062724	348.9321331360843	10000000	0.9900990099009901	0.25
