Time	Iteration	AverageReward	StdRewards	MaxRewardRollout	MinRewardRollout	timesteps	gradnorms	maxnorms
37.02461647987366	0	0.10479357355305183	19.050881585284408	28.75317772757262	-27.52097297552973	0	0.0	0
653.8495807647705	800	353.1067275760166	2.0457319022538014	359.28484267638123	348.01965547626605	1600000	1.860575814709649	0.5
1272.5302560329437	1600	357.68429522728417	2.7639509266535334	361.7926226382551	348.8636630040128	3200000	1.8590245730506179	0.5
1888.4043157100677	2400	351.3156760647048	2.036123915808973	356.653391230735	344.00549206032883	4800000	1.8567509676696734	0.5
2504.5456886291504	3200	350.4331110110122	2.135780243083029	353.8849553328182	343.0099651593482	6400000	1.8579664484483025	0.5
3119.730628013611	4000	354.987596720981	3.6245872830566443	360.4189706351899	342.9918509388808	8000000	1.8600895510611724	0.5
3735.120491027832	4800	355.32106461226033	1.2831208566111905	358.911534932442	352.07642219844274	9600000	1.8609433748542956	0.5
3898.3553433418274	5000	357.185582803273	1.7241710614396843	360.28147866376094	351.52788994810544	10000000	1.8579031619568558	0.5
