{"custom_metrics": {"true_reward_mean": 0.9040000000000005, "proxy_reward_mean": 0.9040000000000005, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0.9040000000000005, "current/proxy_reward_mean": 0.9040000000000005}, "training_iteration": 1, "time_total_s": 1.9428765773773193}
{"custom_metrics": {"true_reward_mean": 1.7220000000000006, "proxy_reward_mean": 1.9820000000000004, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 1.7220000000000006, "current/proxy_reward_mean": 1.9820000000000004}, "training_iteration": 2, "time_total_s": 3.3967204093933105}
{"custom_metrics": {"true_reward_mean": 1.7220000000000009, "proxy_reward_mean": 2.034000000000001, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 1.7220000000000009, "current/proxy_reward_mean": 2.034000000000001}, "training_iteration": 3, "time_total_s": 4.8798828125}
{"custom_metrics": {"true_reward_mean": 1.6960000000000008, "proxy_reward_mean": 1.6960000000000008, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 1.6960000000000008, "current/proxy_reward_mean": 1.6960000000000008}, "training_iteration": 4, "time_total_s": 6.404928922653198}
{"custom_metrics": {"true_reward_mean": 2.3540000000000014, "proxy_reward_mean": 2.3540000000000014, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 2.3540000000000014, "current/proxy_reward_mean": 2.3540000000000014}, "training_iteration": 5, "time_total_s": 7.881325721740723}
{"custom_metrics": {"true_reward_mean": 2.1380000000000012, "proxy_reward_mean": 2.1380000000000012, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 2.1380000000000012, "current/proxy_reward_mean": 2.1380000000000012}, "training_iteration": 6, "time_total_s": 9.37862491607666}
{"custom_metrics": {"true_reward_mean": 2.3720000000000017, "proxy_reward_mean": 3.3600000000000017, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 2.3720000000000017, "current/proxy_reward_mean": 3.3600000000000017}, "training_iteration": 7, "time_total_s": 10.920368194580078}
{"custom_metrics": {"true_reward_mean": 2.436000000000001, "proxy_reward_mean": 2.436000000000001, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 2.436000000000001, "current/proxy_reward_mean": 2.436000000000001}, "training_iteration": 8, "time_total_s": 12.492834329605103}
{"custom_metrics": {"true_reward_mean": 2.494000000000001, "proxy_reward_mean": 2.494000000000001, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 2.494000000000001, "current/proxy_reward_mean": 2.494000000000001}, "training_iteration": 9, "time_total_s": 14.052980422973633}
{"custom_metrics": {"true_reward_mean": 3.030000000000002, "proxy_reward_mean": 3.030000000000002, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 3.030000000000002, "current/proxy_reward_mean": 3.030000000000002}, "training_iteration": 10, "time_total_s": 15.747234106063843}
{"custom_metrics": {"true_reward_mean": 3.2240000000000015, "proxy_reward_mean": 3.2240000000000015, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 3.2240000000000015, "current/proxy_reward_mean": 3.2240000000000015}, "training_iteration": 11, "time_total_s": 17.241597414016724}
{"custom_metrics": {"true_reward_mean": 3.652000000000001, "proxy_reward_mean": 3.652000000000001, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 3.652000000000001, "current/proxy_reward_mean": 3.652000000000001}, "training_iteration": 12, "time_total_s": 18.719625234603882}
{"custom_metrics": {"true_reward_mean": 3.1720000000000015, "proxy_reward_mean": 3.1720000000000015, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 3.1720000000000015, "current/proxy_reward_mean": 3.1720000000000015}, "training_iteration": 13, "time_total_s": 20.25011658668518}
{"custom_metrics": {"true_reward_mean": 3.680000000000002, "proxy_reward_mean": 3.680000000000002, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 3.680000000000002, "current/proxy_reward_mean": 3.680000000000002}, "training_iteration": 14, "time_total_s": 21.775413751602173}
{"custom_metrics": {"true_reward_mean": 3.464, "proxy_reward_mean": 3.464, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 3.464, "current/proxy_reward_mean": 3.464}, "training_iteration": 15, "time_total_s": 23.342792510986328}
{"custom_metrics": {"true_reward_mean": 3.4340000000000024, "proxy_reward_mean": 3.4340000000000024, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 3.4340000000000024, "current/proxy_reward_mean": 3.4340000000000024}, "training_iteration": 16, "time_total_s": 24.788323640823364}
{"custom_metrics": {"true_reward_mean": 4.166000000000001, "proxy_reward_mean": 4.166000000000001, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.166000000000001, "current/proxy_reward_mean": 4.166000000000001}, "training_iteration": 17, "time_total_s": 26.231117963790894}
{"custom_metrics": {"true_reward_mean": 3.8820000000000006, "proxy_reward_mean": 3.8820000000000006, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 3.8820000000000006, "current/proxy_reward_mean": 3.8820000000000006}, "training_iteration": 18, "time_total_s": 27.741652011871338}
{"custom_metrics": {"true_reward_mean": 3.4000000000000012, "proxy_reward_mean": 5.22, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 3.4000000000000012, "current/proxy_reward_mean": 5.22}, "training_iteration": 19, "time_total_s": 29.178386688232422}
{"custom_metrics": {"true_reward_mean": 3.8180000000000023, "proxy_reward_mean": 3.8180000000000023, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 3.8180000000000023, "current/proxy_reward_mean": 3.8180000000000023}, "training_iteration": 20, "time_total_s": 30.589230060577393}
{"custom_metrics": {"true_reward_mean": 3.876000000000002, "proxy_reward_mean": 3.876000000000002, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 3.876000000000002, "current/proxy_reward_mean": 3.876000000000002}, "training_iteration": 21, "time_total_s": 32.06752610206604}
{"custom_metrics": {"true_reward_mean": 4.196000000000001, "proxy_reward_mean": 4.196000000000001, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.196000000000001, "current/proxy_reward_mean": 4.196000000000001}, "training_iteration": 22, "time_total_s": 33.391112327575684}
{"custom_metrics": {"true_reward_mean": 3.918, "proxy_reward_mean": 3.918, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 3.918, "current/proxy_reward_mean": 3.918}, "training_iteration": 23, "time_total_s": 34.58618140220642}
{"custom_metrics": {"true_reward_mean": 4.048000000000001, "proxy_reward_mean": 4.048000000000001, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.048000000000001, "current/proxy_reward_mean": 4.048000000000001}, "training_iteration": 24, "time_total_s": 35.81438422203064}
{"custom_metrics": {"true_reward_mean": 4.284000000000001, "proxy_reward_mean": 4.284000000000001, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.284000000000001, "current/proxy_reward_mean": 4.284000000000001}, "training_iteration": 25, "time_total_s": 37.2843279838562}
{"custom_metrics": {"true_reward_mean": 2.8000000000000016, "proxy_reward_mean": 2.8000000000000016, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 2.8000000000000016, "current/proxy_reward_mean": 2.8000000000000016}, "training_iteration": 26, "time_total_s": 38.87139272689819}
{"custom_metrics": {"true_reward_mean": 4.0040000000000004, "proxy_reward_mean": 4.0040000000000004, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.0040000000000004, "current/proxy_reward_mean": 4.0040000000000004}, "training_iteration": 27, "time_total_s": 40.538854122161865}
{"custom_metrics": {"true_reward_mean": 4.034000000000002, "proxy_reward_mean": 4.034000000000002, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.034000000000002, "current/proxy_reward_mean": 4.034000000000002}, "training_iteration": 28, "time_total_s": 42.12855505943298}
{"custom_metrics": {"true_reward_mean": 3.958000000000001, "proxy_reward_mean": 3.958000000000001, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 3.958000000000001, "current/proxy_reward_mean": 3.958000000000001}, "training_iteration": 29, "time_total_s": 43.57236289978027}
{"custom_metrics": {"true_reward_mean": 4.0440000000000005, "proxy_reward_mean": 4.0440000000000005, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.0440000000000005, "current/proxy_reward_mean": 4.0440000000000005}, "training_iteration": 30, "time_total_s": 45.04759120941162}
{"custom_metrics": {"true_reward_mean": 3.4800000000000018, "proxy_reward_mean": 3.4800000000000018, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 3.4800000000000018, "current/proxy_reward_mean": 3.4800000000000018}, "training_iteration": 31, "time_total_s": 46.33078956604004}
{"custom_metrics": {"true_reward_mean": 3.6540000000000012, "proxy_reward_mean": 3.6540000000000012, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 3.6540000000000012, "current/proxy_reward_mean": 3.6540000000000012}, "training_iteration": 32, "time_total_s": 47.7751350402832}
{"custom_metrics": {"true_reward_mean": 3.928000000000001, "proxy_reward_mean": 3.928000000000001, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 3.928000000000001, "current/proxy_reward_mean": 3.928000000000001}, "training_iteration": 33, "time_total_s": 49.30844330787659}
{"custom_metrics": {"true_reward_mean": 3.9860000000000015, "proxy_reward_mean": 4.090000000000002, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 3.9860000000000015, "current/proxy_reward_mean": 4.090000000000002}, "training_iteration": 34, "time_total_s": 50.79287242889404}
{"custom_metrics": {"true_reward_mean": 3.4240000000000017, "proxy_reward_mean": 3.4240000000000017, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 3.4240000000000017, "current/proxy_reward_mean": 3.4240000000000017}, "training_iteration": 35, "time_total_s": 52.26073908805847}
{"custom_metrics": {"true_reward_mean": 3.6540000000000012, "proxy_reward_mean": 3.6540000000000012, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 3.6540000000000012, "current/proxy_reward_mean": 3.6540000000000012}, "training_iteration": 36, "time_total_s": 53.697633028030396}
{"custom_metrics": {"true_reward_mean": 3.768000000000002, "proxy_reward_mean": 3.768000000000002, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 3.768000000000002, "current/proxy_reward_mean": 3.768000000000002}, "training_iteration": 37, "time_total_s": 55.107872009277344}
{"custom_metrics": {"true_reward_mean": 4.204, "proxy_reward_mean": 4.204, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.204, "current/proxy_reward_mean": 4.204}, "training_iteration": 38, "time_total_s": 56.508718490600586}
{"custom_metrics": {"true_reward_mean": 4.395999999999999, "proxy_reward_mean": 4.395999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.395999999999999, "current/proxy_reward_mean": 4.395999999999999}, "training_iteration": 39, "time_total_s": 57.989874601364136}
{"custom_metrics": {"true_reward_mean": 3.8520000000000016, "proxy_reward_mean": 3.8520000000000016, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 3.8520000000000016, "current/proxy_reward_mean": 3.8520000000000016}, "training_iteration": 40, "time_total_s": 59.452200174331665}
{"custom_metrics": {"true_reward_mean": 4.553999999999998, "proxy_reward_mean": 4.553999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.553999999999998, "current/proxy_reward_mean": 4.553999999999998}, "training_iteration": 41, "time_total_s": 61.00528359413147}
{"custom_metrics": {"true_reward_mean": 3.9040000000000012, "proxy_reward_mean": 3.9040000000000012, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 3.9040000000000012, "current/proxy_reward_mean": 3.9040000000000012}, "training_iteration": 42, "time_total_s": 62.659013509750366}
{"custom_metrics": {"true_reward_mean": 4.357999999999999, "proxy_reward_mean": 4.357999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.357999999999999, "current/proxy_reward_mean": 4.357999999999999}, "training_iteration": 43, "time_total_s": 64.3270480632782}
{"custom_metrics": {"true_reward_mean": 4.272, "proxy_reward_mean": 4.272, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.272, "current/proxy_reward_mean": 4.272}, "training_iteration": 44, "time_total_s": 65.81206226348877}
{"custom_metrics": {"true_reward_mean": 4.230000000000001, "proxy_reward_mean": 4.230000000000001, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.230000000000001, "current/proxy_reward_mean": 4.230000000000001}, "training_iteration": 45, "time_total_s": 67.28975439071655}
{"custom_metrics": {"true_reward_mean": 4.050000000000001, "proxy_reward_mean": 4.050000000000001, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.050000000000001, "current/proxy_reward_mean": 4.050000000000001}, "training_iteration": 46, "time_total_s": 68.99002504348755}
{"custom_metrics": {"true_reward_mean": 3.9740000000000015, "proxy_reward_mean": 3.9740000000000015, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 3.9740000000000015, "current/proxy_reward_mean": 3.9740000000000015}, "training_iteration": 47, "time_total_s": 70.52918076515198}
{"custom_metrics": {"true_reward_mean": 4.052000000000001, "proxy_reward_mean": 4.052000000000001, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.052000000000001, "current/proxy_reward_mean": 4.052000000000001}, "training_iteration": 48, "time_total_s": 72.01326537132263}
{"custom_metrics": {"true_reward_mean": 4.056, "proxy_reward_mean": 4.056, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.056, "current/proxy_reward_mean": 4.056}, "training_iteration": 49, "time_total_s": 73.44975900650024}
{"custom_metrics": {"true_reward_mean": 3.8420000000000014, "proxy_reward_mean": 3.8420000000000014, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 3.8420000000000014, "current/proxy_reward_mean": 3.8420000000000014}, "training_iteration": 50, "time_total_s": 74.9034857749939}
{"custom_metrics": {"true_reward_mean": 4.651999999999998, "proxy_reward_mean": 4.651999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.651999999999998, "current/proxy_reward_mean": 4.651999999999998}, "training_iteration": 51, "time_total_s": 76.3880672454834}
{"custom_metrics": {"true_reward_mean": 4.366, "proxy_reward_mean": 4.366, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.366, "current/proxy_reward_mean": 4.366}, "training_iteration": 52, "time_total_s": 77.89839625358582}
{"custom_metrics": {"true_reward_mean": 4.765999999999999, "proxy_reward_mean": 4.765999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.765999999999999, "current/proxy_reward_mean": 4.765999999999999}, "training_iteration": 53, "time_total_s": 79.38198947906494}
{"custom_metrics": {"true_reward_mean": 4.789999999999998, "proxy_reward_mean": 4.789999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.789999999999998, "current/proxy_reward_mean": 4.789999999999998}, "training_iteration": 54, "time_total_s": 80.88103175163269}
{"custom_metrics": {"true_reward_mean": 4.002000000000001, "proxy_reward_mean": 4.002000000000001, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.002000000000001, "current/proxy_reward_mean": 4.002000000000001}, "training_iteration": 55, "time_total_s": 82.34899115562439}
{"custom_metrics": {"true_reward_mean": 4.574, "proxy_reward_mean": 4.574, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.574, "current/proxy_reward_mean": 4.574}, "training_iteration": 56, "time_total_s": 83.85593056678772}
{"custom_metrics": {"true_reward_mean": 4.525999999999999, "proxy_reward_mean": 4.525999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.525999999999999, "current/proxy_reward_mean": 4.525999999999999}, "training_iteration": 57, "time_total_s": 85.33795285224915}
{"custom_metrics": {"true_reward_mean": 4.54, "proxy_reward_mean": 4.54, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.54, "current/proxy_reward_mean": 4.54}, "training_iteration": 58, "time_total_s": 86.76581358909607}
{"custom_metrics": {"true_reward_mean": 4.717999999999998, "proxy_reward_mean": 4.717999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.717999999999998, "current/proxy_reward_mean": 4.717999999999998}, "training_iteration": 59, "time_total_s": 88.30843353271484}
{"custom_metrics": {"true_reward_mean": 4.481999999999999, "proxy_reward_mean": 4.481999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.481999999999999, "current/proxy_reward_mean": 4.481999999999999}, "training_iteration": 60, "time_total_s": 89.71200156211853}
{"custom_metrics": {"true_reward_mean": 4.5859999999999985, "proxy_reward_mean": 4.5859999999999985, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.5859999999999985, "current/proxy_reward_mean": 4.5859999999999985}, "training_iteration": 61, "time_total_s": 91.38515734672546}
{"custom_metrics": {"true_reward_mean": 4.228, "proxy_reward_mean": 4.228, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.228, "current/proxy_reward_mean": 4.228}, "training_iteration": 62, "time_total_s": 92.78615522384644}
{"custom_metrics": {"true_reward_mean": 4.478, "proxy_reward_mean": 4.478, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.478, "current/proxy_reward_mean": 4.478}, "training_iteration": 63, "time_total_s": 94.31120371818542}
{"custom_metrics": {"true_reward_mean": 4.863999999999998, "proxy_reward_mean": 4.863999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.863999999999998, "current/proxy_reward_mean": 4.863999999999998}, "training_iteration": 64, "time_total_s": 95.79938769340515}
{"custom_metrics": {"true_reward_mean": 4.482, "proxy_reward_mean": 4.482, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.482, "current/proxy_reward_mean": 4.482}, "training_iteration": 65, "time_total_s": 97.21957278251648}
{"custom_metrics": {"true_reward_mean": 4.807999999999998, "proxy_reward_mean": 4.807999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.807999999999998, "current/proxy_reward_mean": 4.807999999999998}, "training_iteration": 66, "time_total_s": 98.70977354049683}
{"custom_metrics": {"true_reward_mean": 4.531999999999998, "proxy_reward_mean": 4.531999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.531999999999998, "current/proxy_reward_mean": 4.531999999999998}, "training_iteration": 67, "time_total_s": 100.32653307914734}
{"custom_metrics": {"true_reward_mean": 4.51, "proxy_reward_mean": 4.51, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.51, "current/proxy_reward_mean": 4.51}, "training_iteration": 68, "time_total_s": 101.74783825874329}
{"custom_metrics": {"true_reward_mean": 4.453999999999999, "proxy_reward_mean": 4.453999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.453999999999999, "current/proxy_reward_mean": 4.453999999999999}, "training_iteration": 69, "time_total_s": 103.24209475517273}
{"custom_metrics": {"true_reward_mean": 4.537999999999999, "proxy_reward_mean": 4.537999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.537999999999999, "current/proxy_reward_mean": 4.537999999999999}, "training_iteration": 70, "time_total_s": 104.73288774490356}
{"custom_metrics": {"true_reward_mean": 4.515999999999999, "proxy_reward_mean": 4.515999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.515999999999999, "current/proxy_reward_mean": 4.515999999999999}, "training_iteration": 71, "time_total_s": 106.38211965560913}
{"custom_metrics": {"true_reward_mean": 4.679999999999998, "proxy_reward_mean": 4.679999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.679999999999998, "current/proxy_reward_mean": 4.679999999999998}, "training_iteration": 72, "time_total_s": 107.85800909996033}
{"custom_metrics": {"true_reward_mean": 4.344000000000001, "proxy_reward_mean": 4.344000000000001, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.344000000000001, "current/proxy_reward_mean": 4.344000000000001}, "training_iteration": 73, "time_total_s": 109.31857490539551}
{"custom_metrics": {"true_reward_mean": 4.41, "proxy_reward_mean": 4.41, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.41, "current/proxy_reward_mean": 4.41}, "training_iteration": 74, "time_total_s": 110.81131315231323}
{"custom_metrics": {"true_reward_mean": 4.291999999999998, "proxy_reward_mean": 4.291999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.291999999999998, "current/proxy_reward_mean": 4.291999999999998}, "training_iteration": 75, "time_total_s": 112.354008436203}
{"custom_metrics": {"true_reward_mean": 3.9619999999999997, "proxy_reward_mean": 6.249999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 3.9619999999999997, "current/proxy_reward_mean": 6.249999999999998}, "training_iteration": 76, "time_total_s": 113.79596042633057}
{"custom_metrics": {"true_reward_mean": 4.3660000000000005, "proxy_reward_mean": 4.3660000000000005, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.3660000000000005, "current/proxy_reward_mean": 4.3660000000000005}, "training_iteration": 77, "time_total_s": 115.23747396469116}
{"custom_metrics": {"true_reward_mean": 4.4879999999999995, "proxy_reward_mean": 4.4879999999999995, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.4879999999999995, "current/proxy_reward_mean": 4.4879999999999995}, "training_iteration": 78, "time_total_s": 116.6482400894165}
{"custom_metrics": {"true_reward_mean": 4.318, "proxy_reward_mean": 4.318, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.318, "current/proxy_reward_mean": 4.318}, "training_iteration": 79, "time_total_s": 118.02487707138062}
{"custom_metrics": {"true_reward_mean": 4.332000000000001, "proxy_reward_mean": 4.332000000000001, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.332000000000001, "current/proxy_reward_mean": 4.332000000000001}, "training_iteration": 80, "time_total_s": 119.4546525478363}
{"custom_metrics": {"true_reward_mean": 4.098000000000001, "proxy_reward_mean": 4.098000000000001, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.098000000000001, "current/proxy_reward_mean": 4.098000000000001}, "training_iteration": 81, "time_total_s": 121.0726957321167}
{"custom_metrics": {"true_reward_mean": 4.136000000000001, "proxy_reward_mean": 4.136000000000001, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.136000000000001, "current/proxy_reward_mean": 4.136000000000001}, "training_iteration": 82, "time_total_s": 122.51043438911438}
{"custom_metrics": {"true_reward_mean": 4.050000000000002, "proxy_reward_mean": 4.050000000000002, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.050000000000002, "current/proxy_reward_mean": 4.050000000000002}, "training_iteration": 83, "time_total_s": 124.01633954048157}
{"custom_metrics": {"true_reward_mean": 4.613999999999998, "proxy_reward_mean": 4.613999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.613999999999998, "current/proxy_reward_mean": 4.613999999999998}, "training_iteration": 84, "time_total_s": 125.42879581451416}
{"custom_metrics": {"true_reward_mean": 4.210000000000001, "proxy_reward_mean": 4.210000000000001, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.210000000000001, "current/proxy_reward_mean": 4.210000000000001}, "training_iteration": 85, "time_total_s": 127.09393644332886}
{"custom_metrics": {"true_reward_mean": 4.529999999999999, "proxy_reward_mean": 4.529999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.529999999999999, "current/proxy_reward_mean": 4.529999999999999}, "training_iteration": 86, "time_total_s": 128.58716797828674}
{"custom_metrics": {"true_reward_mean": 4.589999999999998, "proxy_reward_mean": 4.589999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.589999999999998, "current/proxy_reward_mean": 4.589999999999998}, "training_iteration": 87, "time_total_s": 130.26208305358887}
{"custom_metrics": {"true_reward_mean": 4.287999999999999, "proxy_reward_mean": 4.287999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.287999999999999, "current/proxy_reward_mean": 4.287999999999999}, "training_iteration": 88, "time_total_s": 131.99815702438354}
{"custom_metrics": {"true_reward_mean": 4.386, "proxy_reward_mean": 4.386, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.386, "current/proxy_reward_mean": 4.386}, "training_iteration": 89, "time_total_s": 133.4732048511505}
{"custom_metrics": {"true_reward_mean": 4.667999999999998, "proxy_reward_mean": 4.667999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.667999999999998, "current/proxy_reward_mean": 4.667999999999998}, "training_iteration": 90, "time_total_s": 134.9764244556427}
{"custom_metrics": {"true_reward_mean": 4.397999999999999, "proxy_reward_mean": 4.397999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.397999999999999, "current/proxy_reward_mean": 4.397999999999999}, "training_iteration": 91, "time_total_s": 136.4688048362732}
{"custom_metrics": {"true_reward_mean": 3.926000000000001, "proxy_reward_mean": 3.926000000000001, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 3.926000000000001, "current/proxy_reward_mean": 3.926000000000001}, "training_iteration": 92, "time_total_s": 137.9027602672577}
{"custom_metrics": {"true_reward_mean": 4.541999999999999, "proxy_reward_mean": 4.541999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.541999999999999, "current/proxy_reward_mean": 4.541999999999999}, "training_iteration": 93, "time_total_s": 139.29951524734497}
{"custom_metrics": {"true_reward_mean": 4.2559999999999985, "proxy_reward_mean": 4.2559999999999985, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.2559999999999985, "current/proxy_reward_mean": 4.2559999999999985}, "training_iteration": 94, "time_total_s": 140.80801939964294}
{"custom_metrics": {"true_reward_mean": 3.934, "proxy_reward_mean": 3.934, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 3.934, "current/proxy_reward_mean": 3.934}, "training_iteration": 95, "time_total_s": 142.29001116752625}
{"custom_metrics": {"true_reward_mean": 4.256, "proxy_reward_mean": 4.256, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.256, "current/proxy_reward_mean": 4.256}, "training_iteration": 96, "time_total_s": 143.70955395698547}
{"custom_metrics": {"true_reward_mean": 4.563999999999999, "proxy_reward_mean": 4.563999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.563999999999999, "current/proxy_reward_mean": 4.563999999999999}, "training_iteration": 97, "time_total_s": 145.1979832649231}
{"custom_metrics": {"true_reward_mean": 4.216, "proxy_reward_mean": 4.216, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.216, "current/proxy_reward_mean": 4.216}, "training_iteration": 98, "time_total_s": 146.71902918815613}
{"custom_metrics": {"true_reward_mean": 4.528, "proxy_reward_mean": 4.528, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.528, "current/proxy_reward_mean": 4.528}, "training_iteration": 99, "time_total_s": 148.22978401184082}
{"custom_metrics": {"true_reward_mean": 4.629999999999999, "proxy_reward_mean": 4.629999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.629999999999999, "current/proxy_reward_mean": 4.629999999999999}, "training_iteration": 100, "time_total_s": 149.74615359306335}
{"custom_metrics": {"true_reward_mean": 4.707999999999998, "proxy_reward_mean": 4.707999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.707999999999998, "current/proxy_reward_mean": 4.707999999999998}, "training_iteration": 101, "time_total_s": 151.28124833106995}
{"custom_metrics": {"true_reward_mean": 4.276000000000001, "proxy_reward_mean": 4.276000000000001, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.276000000000001, "current/proxy_reward_mean": 4.276000000000001}, "training_iteration": 102, "time_total_s": 152.74130821228027}
{"custom_metrics": {"true_reward_mean": 4.292, "proxy_reward_mean": 4.292, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.292, "current/proxy_reward_mean": 4.292}, "training_iteration": 103, "time_total_s": 154.46181774139404}
{"custom_metrics": {"true_reward_mean": 4.4159999999999995, "proxy_reward_mean": 4.4159999999999995, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.4159999999999995, "current/proxy_reward_mean": 4.4159999999999995}, "training_iteration": 104, "time_total_s": 155.92290210723877}
{"custom_metrics": {"true_reward_mean": 4.649999999999999, "proxy_reward_mean": 4.649999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.649999999999999, "current/proxy_reward_mean": 4.649999999999999}, "training_iteration": 105, "time_total_s": 157.43668293952942}
{"custom_metrics": {"true_reward_mean": 4.060000000000001, "proxy_reward_mean": 4.060000000000001, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.060000000000001, "current/proxy_reward_mean": 4.060000000000001}, "training_iteration": 106, "time_total_s": 158.91929721832275}
{"custom_metrics": {"true_reward_mean": 4.409999999999999, "proxy_reward_mean": 4.409999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.409999999999999, "current/proxy_reward_mean": 4.409999999999999}, "training_iteration": 107, "time_total_s": 160.44319105148315}
{"custom_metrics": {"true_reward_mean": 4.298, "proxy_reward_mean": 4.298, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.298, "current/proxy_reward_mean": 4.298}, "training_iteration": 108, "time_total_s": 161.9180827140808}
{"custom_metrics": {"true_reward_mean": 4.4479999999999995, "proxy_reward_mean": 4.4479999999999995, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.4479999999999995, "current/proxy_reward_mean": 4.4479999999999995}, "training_iteration": 109, "time_total_s": 163.32211542129517}
{"custom_metrics": {"true_reward_mean": 4.039999999999999, "proxy_reward_mean": 4.039999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.039999999999999, "current/proxy_reward_mean": 4.039999999999999}, "training_iteration": 110, "time_total_s": 164.7871069908142}
{"custom_metrics": {"true_reward_mean": 4.497999999999999, "proxy_reward_mean": 4.497999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.497999999999999, "current/proxy_reward_mean": 4.497999999999999}, "training_iteration": 111, "time_total_s": 166.26819849014282}
{"custom_metrics": {"true_reward_mean": 4.537999999999999, "proxy_reward_mean": 4.537999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.537999999999999, "current/proxy_reward_mean": 4.537999999999999}, "training_iteration": 112, "time_total_s": 167.95740580558777}
{"custom_metrics": {"true_reward_mean": 4.050000000000002, "proxy_reward_mean": 4.050000000000002, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.050000000000002, "current/proxy_reward_mean": 4.050000000000002}, "training_iteration": 113, "time_total_s": 169.40973567962646}
{"custom_metrics": {"true_reward_mean": 4.5539999999999985, "proxy_reward_mean": 4.5539999999999985, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.5539999999999985, "current/proxy_reward_mean": 4.5539999999999985}, "training_iteration": 114, "time_total_s": 170.8557379245758}
{"custom_metrics": {"true_reward_mean": 4.771999999999998, "proxy_reward_mean": 4.771999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.771999999999998, "current/proxy_reward_mean": 4.771999999999998}, "training_iteration": 115, "time_total_s": 172.28078842163086}
{"custom_metrics": {"true_reward_mean": 4.528, "proxy_reward_mean": 4.528, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.528, "current/proxy_reward_mean": 4.528}, "training_iteration": 116, "time_total_s": 173.69017624855042}
{"custom_metrics": {"true_reward_mean": 4.42, "proxy_reward_mean": 4.42, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.42, "current/proxy_reward_mean": 4.42}, "training_iteration": 117, "time_total_s": 175.14045357704163}
{"custom_metrics": {"true_reward_mean": 4.369999999999999, "proxy_reward_mean": 4.369999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.369999999999999, "current/proxy_reward_mean": 4.369999999999999}, "training_iteration": 118, "time_total_s": 176.709618806839}
{"custom_metrics": {"true_reward_mean": 4.058000000000002, "proxy_reward_mean": 4.058000000000002, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.058000000000002, "current/proxy_reward_mean": 4.058000000000002}, "training_iteration": 119, "time_total_s": 178.1712076663971}
{"custom_metrics": {"true_reward_mean": 4.196000000000001, "proxy_reward_mean": 4.196000000000001, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.196000000000001, "current/proxy_reward_mean": 4.196000000000001}, "training_iteration": 120, "time_total_s": 179.8222529888153}
{"custom_metrics": {"true_reward_mean": 4.426, "proxy_reward_mean": 4.426, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.426, "current/proxy_reward_mean": 4.426}, "training_iteration": 121, "time_total_s": 181.33218359947205}
{"custom_metrics": {"true_reward_mean": 4.711999999999998, "proxy_reward_mean": 4.711999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.711999999999998, "current/proxy_reward_mean": 4.711999999999998}, "training_iteration": 122, "time_total_s": 182.9507966041565}
{"custom_metrics": {"true_reward_mean": 4.565999999999998, "proxy_reward_mean": 4.565999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.565999999999998, "current/proxy_reward_mean": 4.565999999999998}, "training_iteration": 123, "time_total_s": 184.35777688026428}
{"custom_metrics": {"true_reward_mean": 4.112, "proxy_reward_mean": 4.112, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.112, "current/proxy_reward_mean": 4.112}, "training_iteration": 124, "time_total_s": 186.06099653244019}
{"custom_metrics": {"true_reward_mean": 4.390000000000001, "proxy_reward_mean": 4.390000000000001, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.390000000000001, "current/proxy_reward_mean": 4.390000000000001}, "training_iteration": 125, "time_total_s": 187.54428124427795}
{"custom_metrics": {"true_reward_mean": 4.294, "proxy_reward_mean": 4.294, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.294, "current/proxy_reward_mean": 4.294}, "training_iteration": 126, "time_total_s": 188.9736611843109}
{"custom_metrics": {"true_reward_mean": 3.992000000000001, "proxy_reward_mean": 3.992000000000001, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 3.992000000000001, "current/proxy_reward_mean": 3.992000000000001}, "training_iteration": 127, "time_total_s": 190.3847393989563}
{"custom_metrics": {"true_reward_mean": 3.9500000000000006, "proxy_reward_mean": 3.9500000000000006, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 3.9500000000000006, "current/proxy_reward_mean": 3.9500000000000006}, "training_iteration": 128, "time_total_s": 191.90168142318726}
{"custom_metrics": {"true_reward_mean": 4.308000000000001, "proxy_reward_mean": 4.308000000000001, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.308000000000001, "current/proxy_reward_mean": 4.308000000000001}, "training_iteration": 129, "time_total_s": 193.37137842178345}
{"custom_metrics": {"true_reward_mean": 4.214000000000001, "proxy_reward_mean": 4.214000000000001, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.214000000000001, "current/proxy_reward_mean": 4.214000000000001}, "training_iteration": 130, "time_total_s": 194.76766228675842}
{"custom_metrics": {"true_reward_mean": 4.388, "proxy_reward_mean": 4.388, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.388, "current/proxy_reward_mean": 4.388}, "training_iteration": 131, "time_total_s": 196.20740747451782}
{"custom_metrics": {"true_reward_mean": 4.278, "proxy_reward_mean": 4.278, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.278, "current/proxy_reward_mean": 4.278}, "training_iteration": 132, "time_total_s": 197.63384795188904}
{"custom_metrics": {"true_reward_mean": 4.527999999999999, "proxy_reward_mean": 4.527999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.527999999999999, "current/proxy_reward_mean": 4.527999999999999}, "training_iteration": 133, "time_total_s": 199.08524346351624}
{"custom_metrics": {"true_reward_mean": 4.477999999999999, "proxy_reward_mean": 4.477999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.477999999999999, "current/proxy_reward_mean": 4.477999999999999}, "training_iteration": 134, "time_total_s": 200.5254783630371}
{"custom_metrics": {"true_reward_mean": 4.166, "proxy_reward_mean": 4.166, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.166, "current/proxy_reward_mean": 4.166}, "training_iteration": 135, "time_total_s": 201.9780147075653}
{"custom_metrics": {"true_reward_mean": 4.372000000000001, "proxy_reward_mean": 4.372000000000001, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.372000000000001, "current/proxy_reward_mean": 4.372000000000001}, "training_iteration": 136, "time_total_s": 203.38403582572937}
{"custom_metrics": {"true_reward_mean": 4.452, "proxy_reward_mean": 4.452, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.452, "current/proxy_reward_mean": 4.452}, "training_iteration": 137, "time_total_s": 204.80005741119385}
{"custom_metrics": {"true_reward_mean": 4.409999999999999, "proxy_reward_mean": 4.409999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.409999999999999, "current/proxy_reward_mean": 4.409999999999999}, "training_iteration": 138, "time_total_s": 206.18118739128113}
{"custom_metrics": {"true_reward_mean": 4.605999999999999, "proxy_reward_mean": 4.605999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.605999999999999, "current/proxy_reward_mean": 4.605999999999999}, "training_iteration": 139, "time_total_s": 207.60080122947693}
{"custom_metrics": {"true_reward_mean": 4.459999999999998, "proxy_reward_mean": 4.459999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.459999999999998, "current/proxy_reward_mean": 4.459999999999998}, "training_iteration": 140, "time_total_s": 209.0720076560974}
{"custom_metrics": {"true_reward_mean": 4.657999999999999, "proxy_reward_mean": 4.657999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.657999999999999, "current/proxy_reward_mean": 4.657999999999999}, "training_iteration": 141, "time_total_s": 210.45563101768494}
{"custom_metrics": {"true_reward_mean": 4.5040000000000004, "proxy_reward_mean": 4.5040000000000004, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.5040000000000004, "current/proxy_reward_mean": 4.5040000000000004}, "training_iteration": 142, "time_total_s": 212.00231671333313}
{"custom_metrics": {"true_reward_mean": 4.638, "proxy_reward_mean": 4.638, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.638, "current/proxy_reward_mean": 4.638}, "training_iteration": 143, "time_total_s": 213.43297958374023}
{"custom_metrics": {"true_reward_mean": 4.262, "proxy_reward_mean": 4.262, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.262, "current/proxy_reward_mean": 4.262}, "training_iteration": 144, "time_total_s": 214.86112928390503}
{"custom_metrics": {"true_reward_mean": 4.441999999999998, "proxy_reward_mean": 4.441999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.441999999999998, "current/proxy_reward_mean": 4.441999999999998}, "training_iteration": 145, "time_total_s": 216.30588698387146}
{"custom_metrics": {"true_reward_mean": 4.264, "proxy_reward_mean": 4.264, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.264, "current/proxy_reward_mean": 4.264}, "training_iteration": 146, "time_total_s": 217.7962303161621}
{"custom_metrics": {"true_reward_mean": 4.467999999999999, "proxy_reward_mean": 4.467999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.467999999999999, "current/proxy_reward_mean": 4.467999999999999}, "training_iteration": 147, "time_total_s": 219.603093624115}
{"custom_metrics": {"true_reward_mean": 4.5379999999999985, "proxy_reward_mean": 4.5379999999999985, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.5379999999999985, "current/proxy_reward_mean": 4.5379999999999985}, "training_iteration": 148, "time_total_s": 221.10132694244385}
{"custom_metrics": {"true_reward_mean": 4.709999999999998, "proxy_reward_mean": 4.709999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.709999999999998, "current/proxy_reward_mean": 4.709999999999998}, "training_iteration": 149, "time_total_s": 222.59072017669678}
{"custom_metrics": {"true_reward_mean": 3.9540000000000006, "proxy_reward_mean": 3.9540000000000006, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 3.9540000000000006, "current/proxy_reward_mean": 3.9540000000000006}, "training_iteration": 150, "time_total_s": 224.0648410320282}
{"custom_metrics": {"true_reward_mean": 4.244000000000001, "proxy_reward_mean": 4.244000000000001, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.244000000000001, "current/proxy_reward_mean": 4.244000000000001}, "training_iteration": 151, "time_total_s": 225.54905724525452}
{"custom_metrics": {"true_reward_mean": 4.010000000000002, "proxy_reward_mean": 4.010000000000002, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.010000000000002, "current/proxy_reward_mean": 4.010000000000002}, "training_iteration": 152, "time_total_s": 226.77756261825562}
{"custom_metrics": {"true_reward_mean": 4.589999999999999, "proxy_reward_mean": 4.589999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.589999999999999, "current/proxy_reward_mean": 4.589999999999999}, "training_iteration": 153, "time_total_s": 228.01273226737976}
{"custom_metrics": {"true_reward_mean": 4.583999999999999, "proxy_reward_mean": 4.583999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.583999999999999, "current/proxy_reward_mean": 4.583999999999999}, "training_iteration": 154, "time_total_s": 229.1970410346985}
{"custom_metrics": {"true_reward_mean": 4.324, "proxy_reward_mean": 4.324, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.324, "current/proxy_reward_mean": 4.324}, "training_iteration": 155, "time_total_s": 230.68938374519348}
{"custom_metrics": {"true_reward_mean": 3.843999999999999, "proxy_reward_mean": 3.843999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 3.843999999999999, "current/proxy_reward_mean": 3.843999999999999}, "training_iteration": 156, "time_total_s": 232.18706059455872}
{"custom_metrics": {"true_reward_mean": 4.351999999999999, "proxy_reward_mean": 4.351999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.351999999999999, "current/proxy_reward_mean": 4.351999999999999}, "training_iteration": 157, "time_total_s": 233.9184865951538}
{"custom_metrics": {"true_reward_mean": 4.339999999999999, "proxy_reward_mean": 4.339999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.339999999999999, "current/proxy_reward_mean": 4.339999999999999}, "training_iteration": 158, "time_total_s": 235.38316988945007}
{"custom_metrics": {"true_reward_mean": 4.4819999999999975, "proxy_reward_mean": 4.4819999999999975, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.4819999999999975, "current/proxy_reward_mean": 4.4819999999999975}, "training_iteration": 159, "time_total_s": 236.85300874710083}
{"custom_metrics": {"true_reward_mean": 4.358, "proxy_reward_mean": 4.358, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.358, "current/proxy_reward_mean": 4.358}, "training_iteration": 160, "time_total_s": 238.2972972393036}
{"custom_metrics": {"true_reward_mean": 4.542, "proxy_reward_mean": 4.542, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.542, "current/proxy_reward_mean": 4.542}, "training_iteration": 161, "time_total_s": 239.7711853981018}
{"custom_metrics": {"true_reward_mean": 4.747999999999997, "proxy_reward_mean": 4.747999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.747999999999997, "current/proxy_reward_mean": 4.747999999999997}, "training_iteration": 162, "time_total_s": 241.32481837272644}
{"custom_metrics": {"true_reward_mean": 4.605999999999998, "proxy_reward_mean": 4.605999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.605999999999998, "current/proxy_reward_mean": 4.605999999999998}, "training_iteration": 163, "time_total_s": 242.7498106956482}
{"custom_metrics": {"true_reward_mean": 4.773999999999999, "proxy_reward_mean": 4.773999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.773999999999999, "current/proxy_reward_mean": 4.773999999999999}, "training_iteration": 164, "time_total_s": 244.25657176971436}
{"custom_metrics": {"true_reward_mean": 4.294, "proxy_reward_mean": 4.294, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.294, "current/proxy_reward_mean": 4.294}, "training_iteration": 165, "time_total_s": 245.7274248600006}
{"custom_metrics": {"true_reward_mean": 4.923999999999998, "proxy_reward_mean": 4.923999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.923999999999998, "current/proxy_reward_mean": 4.923999999999998}, "training_iteration": 166, "time_total_s": 247.12733578681946}
{"custom_metrics": {"true_reward_mean": 4.979999999999998, "proxy_reward_mean": 4.979999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.979999999999998, "current/proxy_reward_mean": 4.979999999999998}, "training_iteration": 167, "time_total_s": 248.5795931816101}
{"custom_metrics": {"true_reward_mean": 4.6019999999999985, "proxy_reward_mean": 4.6019999999999985, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.6019999999999985, "current/proxy_reward_mean": 4.6019999999999985}, "training_iteration": 168, "time_total_s": 250.02935075759888}
{"custom_metrics": {"true_reward_mean": 4.503999999999998, "proxy_reward_mean": 4.503999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.503999999999998, "current/proxy_reward_mean": 4.503999999999998}, "training_iteration": 169, "time_total_s": 251.5115683078766}
{"custom_metrics": {"true_reward_mean": 4.791999999999998, "proxy_reward_mean": 4.791999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.791999999999998, "current/proxy_reward_mean": 4.791999999999998}, "training_iteration": 170, "time_total_s": 252.94793009757996}
{"custom_metrics": {"true_reward_mean": 4.965999999999997, "proxy_reward_mean": 4.965999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.965999999999997, "current/proxy_reward_mean": 4.965999999999997}, "training_iteration": 171, "time_total_s": 254.40823101997375}
{"custom_metrics": {"true_reward_mean": 4.4899999999999975, "proxy_reward_mean": 4.4899999999999975, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.4899999999999975, "current/proxy_reward_mean": 4.4899999999999975}, "training_iteration": 172, "time_total_s": 255.95884561538696}
{"custom_metrics": {"true_reward_mean": 4.693999999999998, "proxy_reward_mean": 4.693999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.693999999999998, "current/proxy_reward_mean": 4.693999999999998}, "training_iteration": 173, "time_total_s": 257.5164613723755}
{"custom_metrics": {"true_reward_mean": 4.903999999999996, "proxy_reward_mean": 4.903999999999996, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.903999999999996, "current/proxy_reward_mean": 4.903999999999996}, "training_iteration": 174, "time_total_s": 258.9913549423218}
{"custom_metrics": {"true_reward_mean": 4.977999999999997, "proxy_reward_mean": 4.977999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.977999999999997, "current/proxy_reward_mean": 4.977999999999997}, "training_iteration": 175, "time_total_s": 260.42048835754395}
{"custom_metrics": {"true_reward_mean": 5.115999999999995, "proxy_reward_mean": 5.115999999999995, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.115999999999995, "current/proxy_reward_mean": 5.115999999999995}, "training_iteration": 176, "time_total_s": 262.0108873844147}
{"custom_metrics": {"true_reward_mean": 5.1539999999999955, "proxy_reward_mean": 5.1539999999999955, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.1539999999999955, "current/proxy_reward_mean": 5.1539999999999955}, "training_iteration": 177, "time_total_s": 263.4621465206146}
{"custom_metrics": {"true_reward_mean": 4.959999999999997, "proxy_reward_mean": 4.959999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.959999999999997, "current/proxy_reward_mean": 4.959999999999997}, "training_iteration": 178, "time_total_s": 264.9945647716522}
{"custom_metrics": {"true_reward_mean": 5.005999999999998, "proxy_reward_mean": 5.005999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.005999999999998, "current/proxy_reward_mean": 5.005999999999998}, "training_iteration": 179, "time_total_s": 266.6631646156311}
{"custom_metrics": {"true_reward_mean": 5.207999999999996, "proxy_reward_mean": 5.207999999999996, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.207999999999996, "current/proxy_reward_mean": 5.207999999999996}, "training_iteration": 180, "time_total_s": 268.0877208709717}
{"custom_metrics": {"true_reward_mean": 5.165999999999995, "proxy_reward_mean": 5.165999999999995, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.165999999999995, "current/proxy_reward_mean": 5.165999999999995}, "training_iteration": 181, "time_total_s": 269.54873538017273}
{"custom_metrics": {"true_reward_mean": 4.975999999999997, "proxy_reward_mean": 4.975999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.975999999999997, "current/proxy_reward_mean": 4.975999999999997}, "training_iteration": 182, "time_total_s": 271.034699678421}
{"custom_metrics": {"true_reward_mean": 4.463999999999997, "proxy_reward_mean": 4.463999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.463999999999997, "current/proxy_reward_mean": 4.463999999999997}, "training_iteration": 183, "time_total_s": 272.5202124118805}
{"custom_metrics": {"true_reward_mean": 4.987999999999997, "proxy_reward_mean": 4.987999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.987999999999997, "current/proxy_reward_mean": 4.987999999999997}, "training_iteration": 184, "time_total_s": 274.0210464000702}
{"custom_metrics": {"true_reward_mean": 4.747999999999998, "proxy_reward_mean": 4.747999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.747999999999998, "current/proxy_reward_mean": 4.747999999999998}, "training_iteration": 185, "time_total_s": 275.5210597515106}
{"custom_metrics": {"true_reward_mean": 4.883999999999999, "proxy_reward_mean": 4.883999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.883999999999999, "current/proxy_reward_mean": 4.883999999999999}, "training_iteration": 186, "time_total_s": 277.17596650123596}
{"custom_metrics": {"true_reward_mean": 5.155999999999995, "proxy_reward_mean": 5.155999999999995, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.155999999999995, "current/proxy_reward_mean": 5.155999999999995}, "training_iteration": 187, "time_total_s": 278.70801281929016}
{"custom_metrics": {"true_reward_mean": 5.175999999999996, "proxy_reward_mean": 5.175999999999996, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.175999999999996, "current/proxy_reward_mean": 5.175999999999996}, "training_iteration": 188, "time_total_s": 280.21710324287415}
{"custom_metrics": {"true_reward_mean": 5.115999999999997, "proxy_reward_mean": 5.115999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.115999999999997, "current/proxy_reward_mean": 5.115999999999997}, "training_iteration": 189, "time_total_s": 281.6810677051544}
{"custom_metrics": {"true_reward_mean": 5.021999999999997, "proxy_reward_mean": 5.021999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.021999999999997, "current/proxy_reward_mean": 5.021999999999997}, "training_iteration": 190, "time_total_s": 283.2218849658966}
{"custom_metrics": {"true_reward_mean": 5.069999999999996, "proxy_reward_mean": 5.069999999999996, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.069999999999996, "current/proxy_reward_mean": 5.069999999999996}, "training_iteration": 191, "time_total_s": 284.68581557273865}
{"custom_metrics": {"true_reward_mean": 5.163999999999995, "proxy_reward_mean": 5.163999999999995, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.163999999999995, "current/proxy_reward_mean": 5.163999999999995}, "training_iteration": 192, "time_total_s": 286.10837292671204}
{"custom_metrics": {"true_reward_mean": 5.137999999999996, "proxy_reward_mean": 5.137999999999996, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.137999999999996, "current/proxy_reward_mean": 5.137999999999996}, "training_iteration": 193, "time_total_s": 287.6136124134064}
{"custom_metrics": {"true_reward_mean": 4.791999999999996, "proxy_reward_mean": 4.791999999999996, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.791999999999996, "current/proxy_reward_mean": 4.791999999999996}, "training_iteration": 194, "time_total_s": 289.0012848377228}
{"custom_metrics": {"true_reward_mean": 4.655999999999999, "proxy_reward_mean": 4.655999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.655999999999999, "current/proxy_reward_mean": 4.655999999999999}, "training_iteration": 195, "time_total_s": 290.6949350833893}
{"custom_metrics": {"true_reward_mean": 4.943999999999996, "proxy_reward_mean": 4.943999999999996, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.943999999999996, "current/proxy_reward_mean": 4.943999999999996}, "training_iteration": 196, "time_total_s": 292.39378023147583}
{"custom_metrics": {"true_reward_mean": 4.963999999999997, "proxy_reward_mean": 4.963999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.963999999999997, "current/proxy_reward_mean": 4.963999999999997}, "training_iteration": 197, "time_total_s": 294.0305688381195}
{"custom_metrics": {"true_reward_mean": 4.8519999999999985, "proxy_reward_mean": 4.8519999999999985, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.8519999999999985, "current/proxy_reward_mean": 4.8519999999999985}, "training_iteration": 198, "time_total_s": 295.47422671318054}
{"custom_metrics": {"true_reward_mean": 4.837999999999996, "proxy_reward_mean": 4.837999999999996, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.837999999999996, "current/proxy_reward_mean": 4.837999999999996}, "training_iteration": 199, "time_total_s": 296.9796931743622}
{"custom_metrics": {"true_reward_mean": 4.695999999999999, "proxy_reward_mean": 4.695999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.695999999999999, "current/proxy_reward_mean": 4.695999999999999}, "training_iteration": 200, "time_total_s": 298.4870402812958}
{"custom_metrics": {"true_reward_mean": 4.721999999999998, "proxy_reward_mean": 4.721999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.721999999999998, "current/proxy_reward_mean": 4.721999999999998}, "training_iteration": 201, "time_total_s": 300.0233769416809}
{"custom_metrics": {"true_reward_mean": 4.5859999999999985, "proxy_reward_mean": 4.5859999999999985, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.5859999999999985, "current/proxy_reward_mean": 4.5859999999999985}, "training_iteration": 202, "time_total_s": 301.54375290870667}
{"custom_metrics": {"true_reward_mean": 4.639999999999998, "proxy_reward_mean": 4.639999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.639999999999998, "current/proxy_reward_mean": 4.639999999999998}, "training_iteration": 203, "time_total_s": 302.9093780517578}
{"custom_metrics": {"true_reward_mean": 4.735999999999998, "proxy_reward_mean": 4.735999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.735999999999998, "current/proxy_reward_mean": 4.735999999999998}, "training_iteration": 204, "time_total_s": 304.61374258995056}
{"custom_metrics": {"true_reward_mean": 5.171999999999995, "proxy_reward_mean": 5.171999999999995, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.171999999999995, "current/proxy_reward_mean": 5.171999999999995}, "training_iteration": 205, "time_total_s": 306.14929819107056}
{"custom_metrics": {"true_reward_mean": 4.677999999999998, "proxy_reward_mean": 4.677999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.677999999999998, "current/proxy_reward_mean": 4.677999999999998}, "training_iteration": 206, "time_total_s": 307.568740606308}
{"custom_metrics": {"true_reward_mean": 4.953999999999997, "proxy_reward_mean": 4.953999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.953999999999997, "current/proxy_reward_mean": 4.953999999999997}, "training_iteration": 207, "time_total_s": 309.08393836021423}
{"custom_metrics": {"true_reward_mean": 4.8779999999999974, "proxy_reward_mean": 4.8779999999999974, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.8779999999999974, "current/proxy_reward_mean": 4.8779999999999974}, "training_iteration": 208, "time_total_s": 310.60102820396423}
{"custom_metrics": {"true_reward_mean": 4.883999999999998, "proxy_reward_mean": 4.883999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.883999999999998, "current/proxy_reward_mean": 4.883999999999998}, "training_iteration": 209, "time_total_s": 312.01944065093994}
{"custom_metrics": {"true_reward_mean": 4.997999999999996, "proxy_reward_mean": 4.997999999999996, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.997999999999996, "current/proxy_reward_mean": 4.997999999999996}, "training_iteration": 210, "time_total_s": 313.4368803501129}
{"custom_metrics": {"true_reward_mean": 4.797999999999997, "proxy_reward_mean": 4.797999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.797999999999997, "current/proxy_reward_mean": 4.797999999999997}, "training_iteration": 211, "time_total_s": 314.88213086128235}
{"custom_metrics": {"true_reward_mean": 4.617999999999999, "proxy_reward_mean": 4.617999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.617999999999999, "current/proxy_reward_mean": 4.617999999999999}, "training_iteration": 212, "time_total_s": 316.40397477149963}
{"custom_metrics": {"true_reward_mean": 4.769999999999998, "proxy_reward_mean": 4.769999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.769999999999998, "current/proxy_reward_mean": 4.769999999999998}, "training_iteration": 213, "time_total_s": 317.8551983833313}
{"custom_metrics": {"true_reward_mean": 5.083999999999996, "proxy_reward_mean": 5.083999999999996, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.083999999999996, "current/proxy_reward_mean": 5.083999999999996}, "training_iteration": 214, "time_total_s": 319.41494512557983}
{"custom_metrics": {"true_reward_mean": 5.045999999999996, "proxy_reward_mean": 5.045999999999996, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.045999999999996, "current/proxy_reward_mean": 5.045999999999996}, "training_iteration": 215, "time_total_s": 320.90446066856384}
{"custom_metrics": {"true_reward_mean": 4.6499999999999995, "proxy_reward_mean": 4.6499999999999995, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.6499999999999995, "current/proxy_reward_mean": 4.6499999999999995}, "training_iteration": 216, "time_total_s": 322.31486225128174}
{"custom_metrics": {"true_reward_mean": 4.813999999999997, "proxy_reward_mean": 4.813999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.813999999999997, "current/proxy_reward_mean": 4.813999999999997}, "training_iteration": 217, "time_total_s": 323.741827249527}
{"custom_metrics": {"true_reward_mean": 4.763999999999998, "proxy_reward_mean": 4.763999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.763999999999998, "current/proxy_reward_mean": 4.763999999999998}, "training_iteration": 218, "time_total_s": 325.2470462322235}
{"custom_metrics": {"true_reward_mean": 4.282000000000001, "proxy_reward_mean": 4.282000000000001, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.282000000000001, "current/proxy_reward_mean": 4.282000000000001}, "training_iteration": 219, "time_total_s": 326.541885137558}
{"custom_metrics": {"true_reward_mean": 4.475999999999999, "proxy_reward_mean": 4.475999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.475999999999999, "current/proxy_reward_mean": 4.475999999999999}, "training_iteration": 220, "time_total_s": 328.0538077354431}
{"custom_metrics": {"true_reward_mean": 5.0799999999999965, "proxy_reward_mean": 5.0799999999999965, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.0799999999999965, "current/proxy_reward_mean": 5.0799999999999965}, "training_iteration": 221, "time_total_s": 329.57217836380005}
{"custom_metrics": {"true_reward_mean": 5.037999999999997, "proxy_reward_mean": 5.037999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.037999999999997, "current/proxy_reward_mean": 5.037999999999997}, "training_iteration": 222, "time_total_s": 331.0324852466583}
{"custom_metrics": {"true_reward_mean": 4.713999999999997, "proxy_reward_mean": 4.713999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.713999999999997, "current/proxy_reward_mean": 4.713999999999997}, "training_iteration": 223, "time_total_s": 332.5691840648651}
{"custom_metrics": {"true_reward_mean": 4.933999999999996, "proxy_reward_mean": 4.933999999999996, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.933999999999996, "current/proxy_reward_mean": 4.933999999999996}, "training_iteration": 224, "time_total_s": 334.0585825443268}
{"custom_metrics": {"true_reward_mean": 5.083999999999995, "proxy_reward_mean": 5.083999999999995, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.083999999999995, "current/proxy_reward_mean": 5.083999999999995}, "training_iteration": 225, "time_total_s": 335.580096244812}
{"custom_metrics": {"true_reward_mean": 5.133999999999996, "proxy_reward_mean": 5.133999999999996, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.133999999999996, "current/proxy_reward_mean": 5.133999999999996}, "training_iteration": 226, "time_total_s": 337.16803002357483}
{"custom_metrics": {"true_reward_mean": 5.099999999999996, "proxy_reward_mean": 5.099999999999996, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.099999999999996, "current/proxy_reward_mean": 5.099999999999996}, "training_iteration": 227, "time_total_s": 338.6126289367676}
{"custom_metrics": {"true_reward_mean": 4.987999999999998, "proxy_reward_mean": 4.987999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.987999999999998, "current/proxy_reward_mean": 4.987999999999998}, "training_iteration": 228, "time_total_s": 340.30682826042175}
{"custom_metrics": {"true_reward_mean": 4.983999999999996, "proxy_reward_mean": 4.983999999999996, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.983999999999996, "current/proxy_reward_mean": 4.983999999999996}, "training_iteration": 229, "time_total_s": 341.8398554325104}
{"custom_metrics": {"true_reward_mean": 5.157999999999997, "proxy_reward_mean": 5.157999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.157999999999997, "current/proxy_reward_mean": 5.157999999999997}, "training_iteration": 230, "time_total_s": 343.33104133605957}
{"custom_metrics": {"true_reward_mean": 5.069999999999996, "proxy_reward_mean": 5.069999999999996, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.069999999999996, "current/proxy_reward_mean": 5.069999999999996}, "training_iteration": 231, "time_total_s": 344.81129693984985}
{"custom_metrics": {"true_reward_mean": 4.8619999999999965, "proxy_reward_mean": 4.8619999999999965, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.8619999999999965, "current/proxy_reward_mean": 4.8619999999999965}, "training_iteration": 232, "time_total_s": 346.0736629962921}
{"custom_metrics": {"true_reward_mean": 5.239999999999995, "proxy_reward_mean": 5.239999999999995, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.239999999999995, "current/proxy_reward_mean": 5.239999999999995}, "training_iteration": 233, "time_total_s": 347.6387219429016}
{"custom_metrics": {"true_reward_mean": 5.129999999999996, "proxy_reward_mean": 5.129999999999996, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.129999999999996, "current/proxy_reward_mean": 5.129999999999996}, "training_iteration": 234, "time_total_s": 349.1753478050232}
{"custom_metrics": {"true_reward_mean": 4.669999999999997, "proxy_reward_mean": 4.669999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.669999999999997, "current/proxy_reward_mean": 4.669999999999997}, "training_iteration": 235, "time_total_s": 350.7472584247589}
{"custom_metrics": {"true_reward_mean": 5.037999999999997, "proxy_reward_mean": 5.037999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.037999999999997, "current/proxy_reward_mean": 5.037999999999997}, "training_iteration": 236, "time_total_s": 352.23828959465027}
{"custom_metrics": {"true_reward_mean": 4.987999999999995, "proxy_reward_mean": 4.987999999999995, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.987999999999995, "current/proxy_reward_mean": 4.987999999999995}, "training_iteration": 237, "time_total_s": 354.0408504009247}
{"custom_metrics": {"true_reward_mean": 4.849999999999998, "proxy_reward_mean": 4.849999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.849999999999998, "current/proxy_reward_mean": 4.849999999999998}, "training_iteration": 238, "time_total_s": 355.6193118095398}
{"custom_metrics": {"true_reward_mean": 5.197999999999995, "proxy_reward_mean": 5.197999999999995, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.197999999999995, "current/proxy_reward_mean": 5.197999999999995}, "training_iteration": 239, "time_total_s": 357.12118673324585}
{"custom_metrics": {"true_reward_mean": 4.935999999999996, "proxy_reward_mean": 4.935999999999996, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.935999999999996, "current/proxy_reward_mean": 4.935999999999996}, "training_iteration": 240, "time_total_s": 358.5809690952301}
{"custom_metrics": {"true_reward_mean": 5.385999999999995, "proxy_reward_mean": 5.385999999999995, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.385999999999995, "current/proxy_reward_mean": 5.385999999999995}, "training_iteration": 241, "time_total_s": 360.0843358039856}
{"custom_metrics": {"true_reward_mean": 5.231999999999994, "proxy_reward_mean": 5.231999999999994, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.231999999999994, "current/proxy_reward_mean": 5.231999999999994}, "training_iteration": 242, "time_total_s": 361.66070437431335}
{"custom_metrics": {"true_reward_mean": 4.969999999999996, "proxy_reward_mean": 4.969999999999996, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.969999999999996, "current/proxy_reward_mean": 4.969999999999996}, "training_iteration": 243, "time_total_s": 363.15034890174866}
{"custom_metrics": {"true_reward_mean": 4.963999999999997, "proxy_reward_mean": 4.963999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.963999999999997, "current/proxy_reward_mean": 4.963999999999997}, "training_iteration": 244, "time_total_s": 364.450279712677}
{"custom_metrics": {"true_reward_mean": 5.011999999999997, "proxy_reward_mean": 5.011999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.011999999999997, "current/proxy_reward_mean": 5.011999999999997}, "training_iteration": 245, "time_total_s": 365.6470832824707}
{"custom_metrics": {"true_reward_mean": 5.103999999999995, "proxy_reward_mean": 5.103999999999995, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.103999999999995, "current/proxy_reward_mean": 5.103999999999995}, "training_iteration": 246, "time_total_s": 367.37054085731506}
{"custom_metrics": {"true_reward_mean": 5.0639999999999965, "proxy_reward_mean": 5.0639999999999965, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.0639999999999965, "current/proxy_reward_mean": 5.0639999999999965}, "training_iteration": 247, "time_total_s": 368.8328413963318}
{"custom_metrics": {"true_reward_mean": 4.7239999999999975, "proxy_reward_mean": 4.7239999999999975, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.7239999999999975, "current/proxy_reward_mean": 4.7239999999999975}, "training_iteration": 248, "time_total_s": 370.29838252067566}
{"custom_metrics": {"true_reward_mean": 4.987999999999997, "proxy_reward_mean": 4.987999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.987999999999997, "current/proxy_reward_mean": 4.987999999999997}, "training_iteration": 249, "time_total_s": 371.71610164642334}
{"custom_metrics": {"true_reward_mean": 5.115999999999995, "proxy_reward_mean": 5.115999999999995, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.115999999999995, "current/proxy_reward_mean": 5.115999999999995}, "training_iteration": 250, "time_total_s": 373.1797420978546}
{"custom_metrics": {"true_reward_mean": 5.021999999999997, "proxy_reward_mean": 5.021999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.021999999999997, "current/proxy_reward_mean": 5.021999999999997}, "training_iteration": 251, "time_total_s": 374.6400456428528}
{"custom_metrics": {"true_reward_mean": 4.819999999999998, "proxy_reward_mean": 4.819999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.819999999999998, "current/proxy_reward_mean": 4.819999999999998}, "training_iteration": 252, "time_total_s": 375.90929770469666}
{"custom_metrics": {"true_reward_mean": 4.9499999999999975, "proxy_reward_mean": 4.9499999999999975, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.9499999999999975, "current/proxy_reward_mean": 4.9499999999999975}, "training_iteration": 253, "time_total_s": 377.46471095085144}
{"custom_metrics": {"true_reward_mean": 4.565999999999999, "proxy_reward_mean": 4.565999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.565999999999999, "current/proxy_reward_mean": 4.565999999999999}, "training_iteration": 254, "time_total_s": 378.8803622722626}
{"custom_metrics": {"true_reward_mean": 4.891999999999998, "proxy_reward_mean": 4.891999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.891999999999998, "current/proxy_reward_mean": 4.891999999999998}, "training_iteration": 255, "time_total_s": 380.3409230709076}
{"custom_metrics": {"true_reward_mean": 4.717999999999998, "proxy_reward_mean": 4.717999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.717999999999998, "current/proxy_reward_mean": 4.717999999999998}, "training_iteration": 256, "time_total_s": 381.8029954433441}
{"custom_metrics": {"true_reward_mean": 4.595999999999998, "proxy_reward_mean": 4.595999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.595999999999998, "current/proxy_reward_mean": 4.595999999999998}, "training_iteration": 257, "time_total_s": 383.39169096946716}
{"custom_metrics": {"true_reward_mean": 4.951999999999996, "proxy_reward_mean": 4.951999999999996, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.951999999999996, "current/proxy_reward_mean": 4.951999999999996}, "training_iteration": 258, "time_total_s": 385.10124373435974}
{"custom_metrics": {"true_reward_mean": 4.8859999999999975, "proxy_reward_mean": 4.8859999999999975, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.8859999999999975, "current/proxy_reward_mean": 4.8859999999999975}, "training_iteration": 259, "time_total_s": 386.6886398792267}
{"custom_metrics": {"true_reward_mean": 5.073999999999996, "proxy_reward_mean": 5.073999999999996, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.073999999999996, "current/proxy_reward_mean": 5.073999999999996}, "training_iteration": 260, "time_total_s": 388.1983873844147}
{"custom_metrics": {"true_reward_mean": 4.647999999999998, "proxy_reward_mean": 4.647999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.647999999999998, "current/proxy_reward_mean": 4.647999999999998}, "training_iteration": 261, "time_total_s": 389.72005009651184}
{"custom_metrics": {"true_reward_mean": 4.817999999999996, "proxy_reward_mean": 4.817999999999996, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.817999999999996, "current/proxy_reward_mean": 4.817999999999996}, "training_iteration": 262, "time_total_s": 391.2691876888275}
{"custom_metrics": {"true_reward_mean": 4.3740000000000006, "proxy_reward_mean": 4.3740000000000006, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.3740000000000006, "current/proxy_reward_mean": 4.3740000000000006}, "training_iteration": 263, "time_total_s": 392.9396388530731}
{"custom_metrics": {"true_reward_mean": 5.227999999999995, "proxy_reward_mean": 5.227999999999995, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.227999999999995, "current/proxy_reward_mean": 5.227999999999995}, "training_iteration": 264, "time_total_s": 394.4488251209259}
{"custom_metrics": {"true_reward_mean": 5.135999999999996, "proxy_reward_mean": 5.135999999999996, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.135999999999996, "current/proxy_reward_mean": 5.135999999999996}, "training_iteration": 265, "time_total_s": 395.8867652416229}
{"custom_metrics": {"true_reward_mean": 4.603999999999997, "proxy_reward_mean": 4.603999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.603999999999997, "current/proxy_reward_mean": 4.603999999999997}, "training_iteration": 266, "time_total_s": 397.50988817214966}
{"custom_metrics": {"true_reward_mean": 4.8779999999999974, "proxy_reward_mean": 4.8779999999999974, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.8779999999999974, "current/proxy_reward_mean": 4.8779999999999974}, "training_iteration": 267, "time_total_s": 399.15281343460083}
{"custom_metrics": {"true_reward_mean": 5.075999999999997, "proxy_reward_mean": 5.075999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.075999999999997, "current/proxy_reward_mean": 5.075999999999997}, "training_iteration": 268, "time_total_s": 400.61357522010803}
{"custom_metrics": {"true_reward_mean": 4.799999999999996, "proxy_reward_mean": 4.799999999999996, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.799999999999996, "current/proxy_reward_mean": 4.799999999999996}, "training_iteration": 269, "time_total_s": 402.09038257598877}
{"custom_metrics": {"true_reward_mean": 4.441999999999998, "proxy_reward_mean": 4.441999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.441999999999998, "current/proxy_reward_mean": 4.441999999999998}, "training_iteration": 270, "time_total_s": 403.7159643173218}
{"custom_metrics": {"true_reward_mean": 4.769999999999998, "proxy_reward_mean": 4.769999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.769999999999998, "current/proxy_reward_mean": 4.769999999999998}, "training_iteration": 271, "time_total_s": 405.1874535083771}
{"custom_metrics": {"true_reward_mean": 4.977999999999996, "proxy_reward_mean": 4.977999999999996, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.977999999999996, "current/proxy_reward_mean": 4.977999999999996}, "training_iteration": 272, "time_total_s": 406.7008533477783}
{"custom_metrics": {"true_reward_mean": 5.371999999999994, "proxy_reward_mean": 5.371999999999994, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.371999999999994, "current/proxy_reward_mean": 5.371999999999994}, "training_iteration": 273, "time_total_s": 408.13190031051636}
{"custom_metrics": {"true_reward_mean": 4.941999999999997, "proxy_reward_mean": 4.941999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.941999999999997, "current/proxy_reward_mean": 4.941999999999997}, "training_iteration": 274, "time_total_s": 409.61894726753235}
{"custom_metrics": {"true_reward_mean": 5.197999999999996, "proxy_reward_mean": 5.197999999999996, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.197999999999996, "current/proxy_reward_mean": 5.197999999999996}, "training_iteration": 275, "time_total_s": 411.0954215526581}
{"custom_metrics": {"true_reward_mean": 4.097999999999999, "proxy_reward_mean": 4.097999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.097999999999999, "current/proxy_reward_mean": 4.097999999999999}, "training_iteration": 276, "time_total_s": 412.55870032310486}
{"custom_metrics": {"true_reward_mean": 5.133999999999995, "proxy_reward_mean": 5.133999999999995, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.133999999999995, "current/proxy_reward_mean": 5.133999999999995}, "training_iteration": 277, "time_total_s": 414.0266649723053}
{"custom_metrics": {"true_reward_mean": 4.955999999999997, "proxy_reward_mean": 4.955999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.955999999999997, "current/proxy_reward_mean": 4.955999999999997}, "training_iteration": 278, "time_total_s": 415.6248707771301}
{"custom_metrics": {"true_reward_mean": 4.689999999999999, "proxy_reward_mean": 4.689999999999999, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.689999999999999, "current/proxy_reward_mean": 4.689999999999999}, "training_iteration": 279, "time_total_s": 417.12695574760437}
{"custom_metrics": {"true_reward_mean": 4.801999999999998, "proxy_reward_mean": 4.801999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.801999999999998, "current/proxy_reward_mean": 4.801999999999998}, "training_iteration": 280, "time_total_s": 418.55920004844666}
{"custom_metrics": {"true_reward_mean": 4.681999999999997, "proxy_reward_mean": 4.681999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.681999999999997, "current/proxy_reward_mean": 4.681999999999997}, "training_iteration": 281, "time_total_s": 420.24122047424316}
{"custom_metrics": {"true_reward_mean": 5.043999999999995, "proxy_reward_mean": 5.043999999999995, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.043999999999995, "current/proxy_reward_mean": 5.043999999999995}, "training_iteration": 282, "time_total_s": 421.73649764060974}
{"custom_metrics": {"true_reward_mean": 4.969999999999997, "proxy_reward_mean": 4.969999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.969999999999997, "current/proxy_reward_mean": 4.969999999999997}, "training_iteration": 283, "time_total_s": 423.2890646457672}
{"custom_metrics": {"true_reward_mean": 4.987999999999997, "proxy_reward_mean": 4.987999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.987999999999997, "current/proxy_reward_mean": 4.987999999999997}, "training_iteration": 284, "time_total_s": 424.73430490493774}
{"custom_metrics": {"true_reward_mean": 5.0159999999999965, "proxy_reward_mean": 5.0159999999999965, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.0159999999999965, "current/proxy_reward_mean": 5.0159999999999965}, "training_iteration": 285, "time_total_s": 426.1968939304352}
{"custom_metrics": {"true_reward_mean": 5.0079999999999965, "proxy_reward_mean": 5.0079999999999965, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.0079999999999965, "current/proxy_reward_mean": 5.0079999999999965}, "training_iteration": 286, "time_total_s": 427.7008464336395}
{"custom_metrics": {"true_reward_mean": 5.075999999999996, "proxy_reward_mean": 5.075999999999996, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.075999999999996, "current/proxy_reward_mean": 5.075999999999996}, "training_iteration": 287, "time_total_s": 429.15886664390564}
{"custom_metrics": {"true_reward_mean": 4.723999999999997, "proxy_reward_mean": 4.723999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.723999999999997, "current/proxy_reward_mean": 4.723999999999997}, "training_iteration": 288, "time_total_s": 430.6918394565582}
{"custom_metrics": {"true_reward_mean": 4.29, "proxy_reward_mean": 4.29, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.29, "current/proxy_reward_mean": 4.29}, "training_iteration": 289, "time_total_s": 432.1185836791992}
{"custom_metrics": {"true_reward_mean": 5.229999999999995, "proxy_reward_mean": 5.229999999999995, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.229999999999995, "current/proxy_reward_mean": 5.229999999999995}, "training_iteration": 290, "time_total_s": 433.5835027694702}
{"custom_metrics": {"true_reward_mean": 4.939999999999996, "proxy_reward_mean": 4.939999999999996, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.939999999999996, "current/proxy_reward_mean": 4.939999999999996}, "training_iteration": 291, "time_total_s": 435.07293939590454}
{"custom_metrics": {"true_reward_mean": 5.079999999999996, "proxy_reward_mean": 5.079999999999996, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.079999999999996, "current/proxy_reward_mean": 5.079999999999996}, "training_iteration": 292, "time_total_s": 436.7639994621277}
{"custom_metrics": {"true_reward_mean": 4.777999999999997, "proxy_reward_mean": 4.777999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.777999999999997, "current/proxy_reward_mean": 4.777999999999997}, "training_iteration": 293, "time_total_s": 438.46663331985474}
{"custom_metrics": {"true_reward_mean": 4.883999999999997, "proxy_reward_mean": 4.883999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.883999999999997, "current/proxy_reward_mean": 4.883999999999997}, "training_iteration": 294, "time_total_s": 439.8815679550171}
{"custom_metrics": {"true_reward_mean": 5.1859999999999955, "proxy_reward_mean": 5.1859999999999955, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.1859999999999955, "current/proxy_reward_mean": 5.1859999999999955}, "training_iteration": 295, "time_total_s": 441.3840823173523}
{"custom_metrics": {"true_reward_mean": 5.073999999999996, "proxy_reward_mean": 5.073999999999996, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.073999999999996, "current/proxy_reward_mean": 5.073999999999996}, "training_iteration": 296, "time_total_s": 442.82279658317566}
{"custom_metrics": {"true_reward_mean": 4.815999999999998, "proxy_reward_mean": 4.815999999999998, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.815999999999998, "current/proxy_reward_mean": 4.815999999999998}, "training_iteration": 297, "time_total_s": 444.31278586387634}
{"custom_metrics": {"true_reward_mean": 5.127999999999997, "proxy_reward_mean": 5.127999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 5.127999999999997, "current/proxy_reward_mean": 5.127999999999997}, "training_iteration": 298, "time_total_s": 445.9108304977417}
{"custom_metrics": {"true_reward_mean": 4.9739999999999975, "proxy_reward_mean": 4.9739999999999975, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.9739999999999975, "current/proxy_reward_mean": 4.9739999999999975}, "training_iteration": 299, "time_total_s": 447.6564300060272}
{"custom_metrics": {"true_reward_mean": 4.867999999999997, "proxy_reward_mean": 4.867999999999997, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 4.867999999999997, "current/proxy_reward_mean": 4.867999999999997}, "training_iteration": 300, "time_total_s": 449.11888551712036}
