training_iteration,time_total_s,custom_metrics/true_reward_mean,custom_metrics/proxy_reward_mean
1,2.934568166732788,0,0
2,4.562361240386963,0,0
3,6.194001197814941,0,0
4,7.70394492149353,0,0
5,26.626986026763916,0,0
6,28.205811023712158,0,0
7,29.724292278289795,0,0
8,31.478253841400146,0,0
9,32.99733519554138,0,0
10,150.19793319702148,0,0
11,151.7264437675476,0,0
12,153.2555491924286,0,0
13,154.77832436561584,0,0
14,156.5320451259613,0,0
15,171.95351028442383,0,0
