Time	Iteration	AverageReward	StdRewards	MaxRewardRollout	MinRewardRollout	timesteps	gradnorms	maxnorms
15.034139633178711	0	-1.4489036436221607	18.17712229330612	27.28727449197322	-27.71342122834176	0	0.0	0
88.61994647979736	10	34.972767807010044	5.991131977389786	51.28486264098319	24.792003863374703	1000000	0.9090909090909091	0.6440578098746864
156.99502801895142	20	207.75163330719064	3.132571150559743	217.35458198410925	201.43087909923634	2000000	0.9523809523809523	0.6440578098746864
224.30889415740967	30	293.954422271698	2.6872132730875746	300.0673830565065	288.67439218692016	3000000	0.967741935483871	0.6440578098746864
291.0690248012543	40	328.60774906950826	3.122999607129303	334.51090276799187	320.67643164479523	4000000	0.975609756097561	0.6440578098746864
357.39421677589417	50	341.1350840839233	2.5929074697583387	347.2190161764156	334.4043159972571	5000000	0.9803921568627451	0.6440578098746864
425.52202224731445	60	346.722682178596	2.944382200516428	351.992761195841	339.91604302372434	6000000	0.9836065573770492	0.6721218570674716
492.69145011901855	70	348.14879680733634	2.196527047652685	353.563605891075	342.33306526253	7000000	0.9859154929577465	0.6935415987676335
560.4733457565308	80	350.45894622681254	2.088706944494013	355.62233221903443	345.12977769610006	8000000	0.9876543209876543	0.7187664698429228
