Time	Iteration	AverageReward	StdRewards	MaxRewardRollout	MinRewardRollout	timesteps	gradnorms	maxnorms
16.918078422546387	0	-0.3023757225339068	20.43353154720061	29.695772328414023	-28.46130512468517	0	0.0	0
74.75297379493713	10	203.88398249472087	3.443725093095515	211.76642163476208	197.19444193469826	1000000	1.3709833807580827	0.5
131.9177496433258	20	345.192866627942	2.8549863549227648	350.7745386266615	339.21276010756264	2000000	1.4477886540993279	0.5
189.01949954032898	30	354.87465571962224	2.4072075380352724	359.7999223775405	347.99291589560744	3000000	1.3932872331801864	0.5
247.1921842098236	40	353.719185666751	1.9815603356615685	359.0572814459447	347.76500010694144	4000000	1.3284382464871642	0.5
305.1606607437134	50	353.5504481802934	2.3464240781204984	358.0392528526718	342.9896764694713	5000000	1.300483961854986	0.5
361.0414528846741	60	356.5463036124627	2.266476312169597	361.5658572897082	348.79007094094413	6000000	1.2749901267944	0.5
411.2876675128937	70	355.96741084395086	2.4365583555684127	360.6995516993775	349.52980426332215	7000000	1.2485503658689125	0.5
466.5525586605072	80	355.2368800480267	2.6705219306988335	359.9566183030256	346.89994088877575	8000000	1.2275029616659519	0.5
523.5546832084656	90	355.41433458470476	1.8622551838856694	359.6642039329163	350.83923802551	9000000	1.2142271735684682	0.5
580.8760945796967	100	357.19999167677713	2.704273901562255	361.4966163756326	348.44198315955873	10000000	1.203432180920605	0.5
