Time	Iteration	AverageReward	StdRewards	MaxRewardRollout	MinRewardRollout	timesteps	gradnorms	maxnorms
37.41475009918213	0	0.10479357355305183	19.050881585284408	28.75317772757262	-27.52097297552973	0	0.0	0
659.8711829185486	800	354.69781894979286	2.383745463528207	359.90261123271193	348.3067701503169	1600000	0.9987515605493134	0.7937241636825247
1280.2538650035858	1600	356.62292793618616	2.5483367232946055	360.81723634246737	347.81584729405586	3200000	0.999000999000999	0.7964806341798906
1913.0778007507324	2400	353.29806178608777	2.164513531158139	356.24480452318676	343.15976802818477	4800000	0.999000999000999	0.8737748315729913
2540.579967737198	3200	355.3753004398942	3.934637174277577	359.4465273054084	344.0723862750456	6400000	0.999000999000999	0.8276926782645935
3167.3389179706573	4000	355.47038804540745	3.253984693610262	360.20099029000266	344.18152410678886	8000000	0.999000999000999	0.8528708510165973
3791.048896074295	4800	353.72629405911493	3.293919889589813	357.67070695135044	334.49855541423676	9600000	0.999000999000999	0.8528708510165973
3958.9026300907135	5000	353.1070536583358	3.014904426320983	358.5079595866846	343.7082428103313	10000000	0.999000999000999	0.8864461018470594
