Time	Iteration	AverageReward	StdRewards	MaxRewardRollout	MinRewardRollout	timesteps	gradnorms	maxnorms
8.831820726394653	0	-2.034883604204183	19.01462660909523	29.63334692362696	-28.492423448711634	0	0.0	0
44.82718086242676	10	30.84581792362038	4.3391318274295765	38.51171204331331	23.089215182932094	1000000	1.2018258216627205	0.5
80.91844058036804	20	40.034160521133906	3.6809157608946204	46.40758249023929	31.73518735804828	2000000	1.1958438067996873	0.5
117.03966021537781	30	40.69373195545355	3.379862716281608	47.08217220660299	33.477410573512316	3000000	1.1696240241708045	0.5
153.29273128509521	40	42.63568285945221	2.2097002875804748	46.71327716065571	37.13393013784662	4000000	1.1707073327340718	0.5
190.20498657226562	50	42.56786155247748	2.5823247191006615	47.94683462381363	38.32419748138636	5000000	1.1755578639738837	0.5
227.1311981678009	60	43.87563756337746	2.232376307995687	48.12297434685752	38.241619595792145	6000000	1.1727955374463035	0.5
263.9375879764557	70	44.6578000265899	2.2322346251811305	48.70914239436388	38.402937579900026	7000000	1.162707528603949	0.5
301.0124146938324	80	45.1149839747489	1.9769010928725395	48.77981313201599	40.35165051696822	8000000	1.1619254088342015	0.5
338.7115602493286	90	45.417931536097534	1.8812391997930344	48.95271945884451	41.7894810740836	9000000	1.1568882381895293	0.5
376.36081767082214	100	46.02202661076612	1.5299177971347848	49.003434523008764	42.98528018826619	10000000	1.1577780368146573	0.5
