Time	Iteration	AverageReward	StdRewards	MaxRewardRollout	MinRewardRollout	timesteps	gradnorms	maxnorms
15.1549551486969	0	0.10479357355305183	19.050881585284408	28.75317772757262	-27.52097297552973	0	0.0	0
83.45640635490417	10	206.1058407538753	3.7115861537039194	213.29949880081404	198.70739378897997	1000000	1.5529949565144967	0.5
148.79137134552002	20	306.9114977897209	3.022122761421725	314.5905374730937	301.3260256102076	2000000	1.6077766117815468	0.5
215.4414975643158	30	340.89466972936805	2.194817094805187	345.9755956514273	335.3573541471269	3000000	1.6371160345430493	0.5
284.7422661781311	40	350.5361635851493	2.599196694451905	355.7802394009195	343.09603108791634	4000000	1.6282023030490211	0.5
351.98387598991394	50	351.68269852223705	2.7260150340725655	357.1717943761032	337.6491318739354	5000000	1.5881679149069645	0.5
420.07373571395874	60	353.62517124573156	1.9958861058708586	357.5404417392565	349.588305825484	6000000	1.5732458788546968	0.5
487.81839871406555	70	352.6307287458947	2.0758977951679465	357.2128528824542	347.71573007758707	7000000	1.563263685871234	0.5
555.9864535331726	80	353.1750066906509	2.001673835065829	358.23972545866854	346.0277680455474	8000000	1.557939049295645	0.5
624.0219528675079	90	355.4769380764166	2.5301372651172285	359.68920182227157	348.0510399688137	9000000	1.5571883569598053	0.5
689.2773478031158	100	355.56036648892274	2.6286029509149778	360.71660326304846	348.70188870176935	10000000	1.5585306540550947	0.5
