Time	Iteration	AverageReward	StdRewards	MaxRewardRollout	MinRewardRollout	timesteps	gradnorms	maxnorms
37.25416803359985	0	-0.3023757225339068	20.43353154720061	29.695772328414023	-28.46130512468517	0	0.0	0
676.8769114017487	800	189.18634259752406	3.13474760837103	193.47327114618383	169.24769254948478	1600000	7.900820441932399	7.768367627135068
1309.7072834968567	1600	177.620717323927	12.111342954674887	192.10332747028588	132.30890862108208	3200000	7.841125802743797	8.322377554599612
1947.1204771995544	2400	157.2721257988344	17.053328150539425	176.280583771877	81.46397302788682	4800000	7.741298664906459	8.485854909253396
2581.802590608597	3200	167.45522004883708	17.545510237858508	181.5445885330555	101.49462165415753	6400000	7.85908273621829	8.485854909253396
3221.3757026195526	4000	114.86132867728597	44.55371208501285	177.66344971675426	0.576190605876036	8000000	7.892705217693989	7.8406216085883464
3864.690835237503	4800	104.88034277680357	47.639888265252466	184.76023450598586	17.384718687266286	9600000	7.833505126354058	7.8406216085883464
4034.7889533042908	5000	122.3895543099176	41.330105064254404	177.06354242004454	14.448585641104728	10000000	7.864841166276334	8.08510728319863
