Time	Iteration	AverageReward	StdRewards	MaxRewardRollout	MinRewardRollout	timesteps	gradnorms	maxnorms
37.57294201850891	0	-0.813261981599876	19.297677396125206	29.944989854469895	-27.727565122768283	0	0.0	0
651.3204395771027	800	355.9910096936022	1.5438096356470374	358.9994658160431	352.4527464725543	1600000	1.8503074838002926	0.5
1263.210376739502	1600	357.04228288340533	2.242406042173101	361.24723856447963	351.2980691001576	3200000	1.8568884959242442	0.5
1877.5302679538727	2400	355.1880970240834	2.1434972523636957	359.98797367257066	349.90187876299024	4800000	1.8598314428470444	0.5
2488.6152532100677	3200	354.21258341219504	1.952547196150823	358.14556549850386	349.6406298287329	6400000	1.8618070443199408	0.5
3100.1615800857544	4000	351.3891828735685	1.7017432424746908	356.3099944447167	347.5923855083529	8000000	1.8599505868943538	0.5
3713.6703255176544	4800	356.0170485234625	1.9503703432471733	360.1061291825026	351.6887520319433	9600000	1.861866724371636	0.5
3875.462793827057	5000	356.00966287159645	2.3694315488028685	360.3058323595542	349.8998918015277	10000000	1.8608363029261237	0.5
