{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 1, "time_total_s": 2.6015751361846924}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 2, "time_total_s": 4.350100755691528}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 3, "time_total_s": 6.290434122085571}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 4, "time_total_s": 7.97137975692749}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 5, "time_total_s": 61.84723353385925}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 6, "time_total_s": 63.625237226486206}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 7, "time_total_s": 65.55274105072021}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 8, "time_total_s": 67.33792424201965}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 9, "time_total_s": 69.25418734550476}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 10, "time_total_s": 79.51118016242981}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 11, "time_total_s": 81.41731834411621}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 12, "time_total_s": 83.18284940719604}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 13, "time_total_s": 85.09089303016663}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 14, "time_total_s": 86.84414458274841}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 15, "time_total_s": 134.49101734161377}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 16, "time_total_s": 136.44376420974731}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 17, "time_total_s": 138.33969044685364}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 18, "time_total_s": 140.05762887001038}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 19, "time_total_s": 141.95477938652039}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 20, "time_total_s": 195.72491812705994}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 21, "time_total_s": 197.3179862499237}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 22, "time_total_s": 199.21703171730042}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 23, "time_total_s": 200.94579315185547}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 24, "time_total_s": 202.88710856437683}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 25, "time_total_s": 253.68934631347656}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 26, "time_total_s": 255.47687292099}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 27, "time_total_s": 257.1203351020813}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 28, "time_total_s": 258.58265352249146}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 29, "time_total_s": 260.10059118270874}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 30, "time_total_s": 288.96818232536316}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 31, "time_total_s": 290.965695142746}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 32, "time_total_s": 292.80226945877075}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 33, "time_total_s": 294.8736593723297}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 34, "time_total_s": 296.90886521339417}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 35, "time_total_s": 349.87428426742554}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 36, "time_total_s": 351.74733424186707}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 37, "time_total_s": 353.838928937912}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 38, "time_total_s": 355.7137050628662}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 39, "time_total_s": 357.79985451698303}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 40, "time_total_s": 415.99378490448}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 41, "time_total_s": 418.03838777542114}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 42, "time_total_s": 419.72463274002075}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 43, "time_total_s": 421.5288076400757}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 44, "time_total_s": 423.23159098625183}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 45, "time_total_s": 474.0350556373596}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 46, "time_total_s": 475.9274504184723}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 47, "time_total_s": 477.975332736969}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 48, "time_total_s": 479.8500483036041}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 49, "time_total_s": 481.9454896450043}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 50, "time_total_s": 490.5041768550873}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 51, "time_total_s": 492.35913252830505}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 52, "time_total_s": 494.1795198917389}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 53, "time_total_s": 495.7732763290405}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 54, "time_total_s": 497.5679099559784}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 55, "time_total_s": 552.1885612010956}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 56, "time_total_s": 553.6824374198914}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 57, "time_total_s": 555.1753809452057}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 58, "time_total_s": 556.8419761657715}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 59, "time_total_s": 558.3248310089111}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 60, "time_total_s": 564.0437631607056}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 61, "time_total_s": 565.826812505722}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 62, "time_total_s": 567.6121702194214}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 63, "time_total_s": 569.2167325019836}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 64, "time_total_s": 571.3295545578003}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 65, "time_total_s": 593.1171987056732}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 66, "time_total_s": 594.9920761585236}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 67, "time_total_s": 596.6752350330353}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 68, "time_total_s": 598.5047187805176}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 69, "time_total_s": 600.0997042655945}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 70, "time_total_s": 656.865550994873}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 71, "time_total_s": 658.2821636199951}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 72, "time_total_s": 660.0918564796448}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 73, "time_total_s": 661.7786989212036}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 74, "time_total_s": 663.8593235015869}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 75, "time_total_s": 697.8121745586395}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 76, "time_total_s": 699.8522250652313}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 77, "time_total_s": 701.7248530387878}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 78, "time_total_s": 703.606308221817}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 79, "time_total_s": 705.1498625278473}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 80, "time_total_s": 711.3384816646576}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 81, "time_total_s": 712.9426941871643}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 82, "time_total_s": 714.5837786197662}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 83, "time_total_s": 716.1834464073181}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 84, "time_total_s": 718.0015523433685}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 85, "time_total_s": 762.8347384929657}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 86, "time_total_s": 764.5117800235748}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 87, "time_total_s": 766.5681631565094}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 88, "time_total_s": 768.4090342521667}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 89, "time_total_s": 770.143137216568}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 90, "time_total_s": 787.5994002819061}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 91, "time_total_s": 789.4035837650299}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 92, "time_total_s": 791.1810922622681}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 93, "time_total_s": 793.2417869567871}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 94, "time_total_s": 795.089982509613}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 95, "time_total_s": 827.3929808139801}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 96, "time_total_s": 829.4680705070496}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 97, "time_total_s": 831.3746244907379}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 98, "time_total_s": 833.2159197330475}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 99, "time_total_s": 835.2809700965881}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 100, "time_total_s": 838.9576926231384}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 101, "time_total_s": 841.0015776157379}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 102, "time_total_s": 842.7288780212402}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 103, "time_total_s": 844.5502502918243}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 104, "time_total_s": 846.2017350196838}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 105, "time_total_s": 883.0377697944641}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 106, "time_total_s": 884.9255647659302}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 107, "time_total_s": 886.6678068637848}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 108, "time_total_s": 888.4799513816833}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 109, "time_total_s": 890.099280834198}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 110, "time_total_s": 920.6690895557404}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 111, "time_total_s": 922.5331091880798}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 112, "time_total_s": 924.362547159195}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 113, "time_total_s": 925.9667825698853}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 114, "time_total_s": 927.9042601585388}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 115, "time_total_s": 961.7888612747192}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 116, "time_total_s": 963.6144552230835}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 117, "time_total_s": 965.6697273254395}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 118, "time_total_s": 967.5137937068939}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 119, "time_total_s": 969.5823316574097}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 120, "time_total_s": 999.6203219890594}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 121, "time_total_s": 1001.3886516094208}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 122, "time_total_s": 1002.9683017730713}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 123, "time_total_s": 1004.7322099208832}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 124, "time_total_s": 1006.3437738418579}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 125, "time_total_s": 1030.8890810012817}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 126, "time_total_s": 1032.6755738258362}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 127, "time_total_s": 1034.2834408283234}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 128, "time_total_s": 1035.8868699073792}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 129, "time_total_s": 1037.7115170955658}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 130, "time_total_s": 1069.6643207073212}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 131, "time_total_s": 1071.3099460601807}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 132, "time_total_s": 1072.75404214859}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 133, "time_total_s": 1074.2105560302734}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 134, "time_total_s": 1075.5193209648132}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 135, "time_total_s": 1079.174512386322}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 136, "time_total_s": 1080.9370746612549}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 137, "time_total_s": 1082.866408109665}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 138, "time_total_s": 1084.5006356239319}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 139, "time_total_s": 1086.2835717201233}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 140, "time_total_s": 1122.6371092796326}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 141, "time_total_s": 1124.4836587905884}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 142, "time_total_s": 1126.5024161338806}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 143, "time_total_s": 1128.096224308014}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 144, "time_total_s": 1129.882891178131}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 145, "time_total_s": 1161.5058100223541}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 146, "time_total_s": 1162.995003938675}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 147, "time_total_s": 1164.6272356510162}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 148, "time_total_s": 1166.1126205921173}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 149, "time_total_s": 1167.791501045227}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 150, "time_total_s": 1199.2624974250793}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 151, "time_total_s": 1201.072255373001}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 152, "time_total_s": 1202.6648292541504}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 153, "time_total_s": 1204.4704184532166}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 154, "time_total_s": 1206.0679726600647}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 155, "time_total_s": 1240.2029030323029}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 156, "time_total_s": 1242.178350687027}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 157, "time_total_s": 1243.8456346988678}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 158, "time_total_s": 1245.5193390846252}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 159, "time_total_s": 1247.0439410209656}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 160, "time_total_s": 1279.7047061920166}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 161, "time_total_s": 1281.5859508514404}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 162, "time_total_s": 1283.441657781601}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 163, "time_total_s": 1285.079859495163}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 164, "time_total_s": 1286.8571379184723}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 165, "time_total_s": 1291.097666501999}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 166, "time_total_s": 1292.7103161811829}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 167, "time_total_s": 1294.2992706298828}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 168, "time_total_s": 1296.0792200565338}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 169, "time_total_s": 1297.8739576339722}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 170, "time_total_s": 1331.9135904312134}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 171, "time_total_s": 1333.569685459137}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 172, "time_total_s": 1335.0698130130768}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 173, "time_total_s": 1336.7049593925476}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 174, "time_total_s": 1338.19469332695}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 175, "time_total_s": 1347.3414540290833}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 176, "time_total_s": 1349.111406803131}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 177, "time_total_s": 1350.6998221874237}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 178, "time_total_s": 1352.2954909801483}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 179, "time_total_s": 1354.1261672973633}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 180, "time_total_s": 1390.7376277446747}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 181, "time_total_s": 1392.3458533287048}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 182, "time_total_s": 1394.1634187698364}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 183, "time_total_s": 1395.765786409378}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 184, "time_total_s": 1397.5577125549316}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 185, "time_total_s": 1429.1781272888184}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 186, "time_total_s": 1430.7800495624542}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 187, "time_total_s": 1432.591915845871}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 188, "time_total_s": 1434.2416551113129}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 189, "time_total_s": 1435.838523864746}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 190, "time_total_s": 1472.1083035469055}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 191, "time_total_s": 1473.7335770130157}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 192, "time_total_s": 1475.3352348804474}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 193, "time_total_s": 1477.1031324863434}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 194, "time_total_s": 1478.7017705440521}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 195, "time_total_s": 1483.882947921753}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 196, "time_total_s": 1485.4891526699066}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 197, "time_total_s": 1487.3022816181183}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 198, "time_total_s": 1489.1074068546295}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 199, "time_total_s": 1490.7036538124084}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 200, "time_total_s": 1521.7252378463745}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 201, "time_total_s": 1523.2626190185547}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 202, "time_total_s": 1525.0572328567505}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 203, "time_total_s": 1526.6650669574738}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 204, "time_total_s": 1528.266146183014}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 205, "time_total_s": 1568.6428880691528}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 206, "time_total_s": 1570.0942120552063}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 207, "time_total_s": 1571.73135972023}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 208, "time_total_s": 1573.3134384155273}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 209, "time_total_s": 1575.108862876892}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 210, "time_total_s": 1580.4955394268036}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 211, "time_total_s": 1582.0783686637878}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 212, "time_total_s": 1583.8384339809418}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 213, "time_total_s": 1585.4306948184967}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 214, "time_total_s": 1587.2088611125946}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 215, "time_total_s": 1594.7594769001007}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 216, "time_total_s": 1596.4033036231995}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 217, "time_total_s": 1597.8932194709778}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 218, "time_total_s": 1599.4643247127533}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 219, "time_total_s": 1601.019812822342}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 220, "time_total_s": 1604.9840939044952}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 221, "time_total_s": 1606.8415987491608}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 222, "time_total_s": 1608.452525138855}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 223, "time_total_s": 1610.2543380260468}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 224, "time_total_s": 1611.8605906963348}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 225, "time_total_s": 1616.3341748714447}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 226, "time_total_s": 1617.938360452652}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 227, "time_total_s": 1619.69140291214}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 228, "time_total_s": 1621.3177375793457}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 229, "time_total_s": 1623.1021301746368}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 230, "time_total_s": 1627.352300643921}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 231, "time_total_s": 1629.1317520141602}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 232, "time_total_s": 1630.7389969825745}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 233, "time_total_s": 1632.5233068466187}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 234, "time_total_s": 1634.1787712574005}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 235, "time_total_s": 1666.2237701416016}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 236, "time_total_s": 1668.0151438713074}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 237, "time_total_s": 1669.661898612976}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 238, "time_total_s": 1671.444964170456}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 239, "time_total_s": 1673.0218460559845}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 240, "time_total_s": 1705.6393332481384}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 241, "time_total_s": 1707.4323644638062}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 242, "time_total_s": 1709.2347567081451}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 243, "time_total_s": 1710.8337137699127}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 244, "time_total_s": 1712.6339588165283}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 245, "time_total_s": 1717.1543855667114}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 246, "time_total_s": 1718.9853100776672}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 247, "time_total_s": 1720.707260131836}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 248, "time_total_s": 1722.7828772068024}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 249, "time_total_s": 1724.6223084926605}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 250, "time_total_s": 1760.8223989009857}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 251, "time_total_s": 1762.3301932811737}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 252, "time_total_s": 1763.9228613376617}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 253, "time_total_s": 1765.8693897724152}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 254, "time_total_s": 1767.633599281311}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 255, "time_total_s": 1772.3384418487549}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 256, "time_total_s": 1773.8104910850525}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 257, "time_total_s": 1775.4589290618896}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 258, "time_total_s": 1776.82985496521}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 259, "time_total_s": 1778.2674622535706}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 260, "time_total_s": 1810.9587244987488}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 261, "time_total_s": 1812.5501952171326}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 262, "time_total_s": 1814.3444871902466}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 263, "time_total_s": 1815.9616227149963}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 264, "time_total_s": 1817.8377010822296}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 265, "time_total_s": 1825.241453886032}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 266, "time_total_s": 1827.0669658184052}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 267, "time_total_s": 1828.6626737117767}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 268, "time_total_s": 1830.4595758914948}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 269, "time_total_s": 1832.0586559772491}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 270, "time_total_s": 1836.5119986534119}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 271, "time_total_s": 1838.112431049347}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 272, "time_total_s": 1839.8911662101746}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 273, "time_total_s": 1841.512241601944}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 274, "time_total_s": 1843.2820115089417}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 275, "time_total_s": 1889.343605041504}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 276, "time_total_s": 1890.8087146282196}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 277, "time_total_s": 1892.6002128124237}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 278, "time_total_s": 1894.223519563675}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 279, "time_total_s": 1895.8447382450104}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 280, "time_total_s": 1931.493672132492}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 281, "time_total_s": 1933.5683720111847}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 282, "time_total_s": 1935.3427817821503}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 283, "time_total_s": 1937.0211679935455}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 284, "time_total_s": 1938.5180203914642}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 285, "time_total_s": 1981.703851699829}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 286, "time_total_s": 1983.4901149272919}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 287, "time_total_s": 1985.1057946681976}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 288, "time_total_s": 1986.9083807468414}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 289, "time_total_s": 1988.544755935669}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 290, "time_total_s": 2033.3188166618347}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 291, "time_total_s": 2034.9881224632263}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 292, "time_total_s": 2036.4879581928253}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 293, "time_total_s": 2038.1868615150452}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 294, "time_total_s": 2039.8662934303284}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 295, "time_total_s": 2074.3034830093384}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 296, "time_total_s": 2075.9127752780914}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 297, "time_total_s": 2077.7489836215973}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 298, "time_total_s": 2079.3532469272614}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 299, "time_total_s": 2081.2107615470886}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 300, "time_total_s": 2086.185361146927}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 301, "time_total_s": 2087.9546530246735}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 302, "time_total_s": 2089.5548465251923}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 303, "time_total_s": 2091.329392194748}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 304, "time_total_s": 2092.920177936554}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 305, "time_total_s": 2143.581246614456}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 306, "time_total_s": 2145.16148519516}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 307, "time_total_s": 2146.9961788654327}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 308, "time_total_s": 2148.6065332889557}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 309, "time_total_s": 2150.392321586609}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 310, "time_total_s": 2187.97061753273}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 311, "time_total_s": 2189.777753353119}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 312, "time_total_s": 2191.425833940506}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 313, "time_total_s": 2193.2106940746307}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 314, "time_total_s": 2194.81938624382}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 315, "time_total_s": 2226.8483216762543}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 316, "time_total_s": 2228.497475385666}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 317, "time_total_s": 2230.296005964279}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 318, "time_total_s": 2231.9099295139313}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 319, "time_total_s": 2233.7042281627655}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 320, "time_total_s": 2238.5873658657074}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 321, "time_total_s": 2240.435797214508}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 322, "time_total_s": 2242.0376884937286}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 323, "time_total_s": 2243.8501329421997}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 324, "time_total_s": 2245.434656381607}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 325, "time_total_s": 2279.1123864650726}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 326, "time_total_s": 2280.7153816223145}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 327, "time_total_s": 2282.2793157100677}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 328, "time_total_s": 2284.01624417305}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 329, "time_total_s": 2285.6857566833496}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 330, "time_total_s": 2316.6769773960114}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 331, "time_total_s": 2318.4537031650543}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 332, "time_total_s": 2320.0534222126007}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 333, "time_total_s": 2321.8655281066895}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 334, "time_total_s": 2323.4767405986786}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 335, "time_total_s": 2369.5893738269806}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 336, "time_total_s": 2371.146435022354}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 337, "time_total_s": 2372.6357946395874}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 338, "time_total_s": 2374.2841567993164}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 339, "time_total_s": 2375.775158405304}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 340, "time_total_s": 2426.9716897010803}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 341, "time_total_s": 2428.5951442718506}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 342, "time_total_s": 2430.324458837509}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 343, "time_total_s": 2431.879002571106}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 344, "time_total_s": 2433.673410654068}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 345, "time_total_s": 2486.337379217148}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 346, "time_total_s": 2487.7968533039093}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 347, "time_total_s": 2489.362460374832}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 348, "time_total_s": 2491.1514146327972}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 349, "time_total_s": 2492.815684080124}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 350, "time_total_s": 2526.926885843277}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 351, "time_total_s": 2528.534946203232}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 352, "time_total_s": 2530.361525297165}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 353, "time_total_s": 2531.9344618320465}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 354, "time_total_s": 2533.6848781108856}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 355, "time_total_s": 2548.191451072693}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 356, "time_total_s": 2549.8986542224884}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 357, "time_total_s": 2551.4073185920715}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 358, "time_total_s": 2553.072433948517}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 359, "time_total_s": 2554.5690054893494}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 360, "time_total_s": 2600.872658967972}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 361, "time_total_s": 2602.5523512363434}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 362, "time_total_s": 2604.232581615448}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 363, "time_total_s": 2605.9098002910614}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 364, "time_total_s": 2607.5194907188416}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 365, "time_total_s": 2659.6525962352753}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 366, "time_total_s": 2661.4777257442474}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 367, "time_total_s": 2663.1097745895386}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 368, "time_total_s": 2664.8771045207977}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 369, "time_total_s": 2666.476109981537}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 370, "time_total_s": 2724.03844499588}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 371, "time_total_s": 2725.645580768585}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 372, "time_total_s": 2727.4522857666016}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 373, "time_total_s": 2729.0914928913116}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 374, "time_total_s": 2730.788426399231}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 375, "time_total_s": 2735.654379606247}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 376, "time_total_s": 2737.098706960678}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 377, "time_total_s": 2738.6956930160522}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 378, "time_total_s": 2740.183688402176}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 379, "time_total_s": 2741.8458199501038}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 380, "time_total_s": 2796.7218928337097}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 381, "time_total_s": 2798.3382601737976}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 382, "time_total_s": 2800.163470029831}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 383, "time_total_s": 2801.776383161545}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 384, "time_total_s": 2803.5719327926636}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 385, "time_total_s": 2807.820071697235}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 386, "time_total_s": 2809.6126792430878}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 387, "time_total_s": 2811.2344994544983}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 388, "time_total_s": 2813.0615701675415}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 389, "time_total_s": 2814.660580396652}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 390, "time_total_s": 2830.512013912201}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 391, "time_total_s": 2832.135976076126}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 392, "time_total_s": 2833.6204059123993}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 393, "time_total_s": 2835.3302376270294}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 394, "time_total_s": 2836.938625574112}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 395, "time_total_s": 2842.452184200287}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 396, "time_total_s": 2844.252852678299}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 397, "time_total_s": 2845.865818500519}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 398, "time_total_s": 2847.695962905884}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 399, "time_total_s": 2849.304060935974}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 400, "time_total_s": 2859.140154838562}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 401, "time_total_s": 2860.744319677353}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 402, "time_total_s": 2862.5680990219116}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 403, "time_total_s": 2864.1629767417908}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 404, "time_total_s": 2865.951523542404}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 405, "time_total_s": 2874.4840474128723}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 406, "time_total_s": 2876.067412376404}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 407, "time_total_s": 2877.728134393692}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 408, "time_total_s": 2879.1881880760193}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 409, "time_total_s": 2880.8155126571655}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 410, "time_total_s": 2900.8504083156586}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 411, "time_total_s": 2902.453226327896}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 412, "time_total_s": 2904.2565791606903}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 413, "time_total_s": 2905.8100411891937}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 414, "time_total_s": 2907.4454979896545}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 415, "time_total_s": 2929.4832775592804}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 416, "time_total_s": 2931.082480430603}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 417, "time_total_s": 2932.8814129829407}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 418, "time_total_s": 2934.5238206386566}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 419, "time_total_s": 2936.321056127548}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 420, "time_total_s": 2990.1072590351105}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 421, "time_total_s": 2991.5125098228455}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 422, "time_total_s": 2993.326666355133}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 423, "time_total_s": 2994.856696844101}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 424, "time_total_s": 2996.4867129325867}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 425, "time_total_s": 3002.496912717819}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 426, "time_total_s": 3004.1455965042114}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 427, "time_total_s": 3005.644790172577}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 428, "time_total_s": 3007.3079376220703}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 429, "time_total_s": 3008.8022768497467}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 430, "time_total_s": 3027.030070066452}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 431, "time_total_s": 3028.8310437202454}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 432, "time_total_s": 3030.440430164337}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 433, "time_total_s": 3032.255528688431}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 434, "time_total_s": 3033.835689306259}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 435, "time_total_s": 3051.053122282028}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 436, "time_total_s": 3052.718214035034}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 437, "time_total_s": 3054.5074470043182}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 438, "time_total_s": 3056.106496810913}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 439, "time_total_s": 3057.7241790294647}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 440, "time_total_s": 3089.5687839984894}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 441, "time_total_s": 3091.05166387558}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 442, "time_total_s": 3092.607964038849}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 443, "time_total_s": 3094.076054573059}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 444, "time_total_s": 3095.7893345355988}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 445, "time_total_s": 3144.6868426799774}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 446, "time_total_s": 3146.317949771881}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 447, "time_total_s": 3147.9343485832214}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 448, "time_total_s": 3149.7788207530975}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 449, "time_total_s": 3151.417707681656}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 450, "time_total_s": 3158.4031686782837}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 451, "time_total_s": 3160.0128812789917}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 452, "time_total_s": 3161.8223791122437}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 453, "time_total_s": 3163.4305119514465}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 454, "time_total_s": 3165.2804238796234}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 455, "time_total_s": 3169.955178260803}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 456, "time_total_s": 3171.422919511795}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 457, "time_total_s": 3173.0837228298187}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 458, "time_total_s": 3174.7178604602814}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 459, "time_total_s": 3176.641524076462}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 460, "time_total_s": 3207.771894931793}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 461, "time_total_s": 3209.4367344379425}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 462, "time_total_s": 3211.193402528763}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 463, "time_total_s": 3212.810099840164}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 464, "time_total_s": 3214.61683511734}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 465, "time_total_s": 3220.412833929062}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 466, "time_total_s": 3222.2192091941833}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 467, "time_total_s": 3224.0402958393097}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 468, "time_total_s": 3225.633976459503}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 469, "time_total_s": 3227.390126466751}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 470, "time_total_s": 3266.956664800644}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 471, "time_total_s": 3268.627513885498}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 472, "time_total_s": 3270.1254498958588}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 473, "time_total_s": 3271.8166210651398}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 474, "time_total_s": 3273.4030079841614}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 475, "time_total_s": 3321.216817378998}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 476, "time_total_s": 3323.1046805381775}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 477, "time_total_s": 3324.716850757599}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 478, "time_total_s": 3326.509212255478}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 479, "time_total_s": 3328.14213848114}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 480, "time_total_s": 3349.218802213669}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 481, "time_total_s": 3350.586594581604}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 482, "time_total_s": 3352.060922384262}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 483, "time_total_s": 3353.3663964271545}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 484, "time_total_s": 3355.011961221695}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 485, "time_total_s": 3358.791317462921}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 486, "time_total_s": 3360.5887775421143}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 487, "time_total_s": 3362.2196323871613}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 488, "time_total_s": 3364.0177495479584}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 489, "time_total_s": 3365.625498533249}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 490, "time_total_s": 3399.572345495224}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 491, "time_total_s": 3401.190710544586}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 492, "time_total_s": 3402.986806154251}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 493, "time_total_s": 3404.597109079361}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 494, "time_total_s": 3406.4127616882324}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 495, "time_total_s": 3417.9767010211945}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 496, "time_total_s": 3419.6924209594727}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 497, "time_total_s": 3421.3242807388306}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 498, "time_total_s": 3423.1568179130554}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 499, "time_total_s": 3424.783043861389}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 500, "time_total_s": 3469.323652267456}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 501, "time_total_s": 3470.9338586330414}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 502, "time_total_s": 3472.616828918457}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 503, "time_total_s": 3474.4804797172546}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 504, "time_total_s": 3476.1133904457092}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 505, "time_total_s": 3509.772591114044}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 506, "time_total_s": 3511.380747318268}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 507, "time_total_s": 3513.1937885284424}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 508, "time_total_s": 3514.8029844760895}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 509, "time_total_s": 3516.5551855564117}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 510, "time_total_s": 3522.173038005829}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 511, "time_total_s": 3523.6447536945343}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 512, "time_total_s": 3525.2741775512695}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 513, "time_total_s": 3526.7675285339355}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 514, "time_total_s": 3528.41708612442}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 515, "time_total_s": 3564.243387937546}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 516, "time_total_s": 3566.0196595191956}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 517, "time_total_s": 3567.652146100998}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 518, "time_total_s": 3569.447489500046}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 519, "time_total_s": 3571.0590114593506}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 520, "time_total_s": 3608.7045187950134}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 521, "time_total_s": 3610.157340526581}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 522, "time_total_s": 3611.500124692917}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 523, "time_total_s": 3613.1538846492767}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 524, "time_total_s": 3614.6513299942017}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 525, "time_total_s": 3622.6069378852844}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 526, "time_total_s": 3624.223439693451}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 527, "time_total_s": 3626.044097185135}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 528, "time_total_s": 3627.674194097519}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 529, "time_total_s": 3629.5043120384216}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 530, "time_total_s": 3685.883093357086}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 531, "time_total_s": 3687.304031610489}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 532, "time_total_s": 3689.1101548671722}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 533, "time_total_s": 3690.716867685318}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 534, "time_total_s": 3692.56947016716}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 535, "time_total_s": 3725.0438673496246}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 536, "time_total_s": 3726.6887085437775}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 537, "time_total_s": 3728.5046815872192}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 538, "time_total_s": 3730.132272720337}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 539, "time_total_s": 3731.761674642563}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 540, "time_total_s": 3736.5379707813263}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 541, "time_total_s": 3738.23503780365}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 542, "time_total_s": 3740.1092672348022}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 543, "time_total_s": 3741.7934062480927}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 544, "time_total_s": 3743.689788579941}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 545, "time_total_s": 3786.7098639011383}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 546, "time_total_s": 3788.1977891921997}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 547, "time_total_s": 3789.686578989029}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 548, "time_total_s": 3791.350730895996}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 549, "time_total_s": 3792.847069501877}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 550, "time_total_s": 3827.55721449852}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 551, "time_total_s": 3829.248283147812}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 552, "time_total_s": 3830.856888771057}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 553, "time_total_s": 3832.656749725342}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 554, "time_total_s": 3834.267602443695}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 555, "time_total_s": 3838.704986333847}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 556, "time_total_s": 3840.370759487152}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 557, "time_total_s": 3842.173727989197}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 558, "time_total_s": 3843.77396440506}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 559, "time_total_s": 3845.5635175704956}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 560, "time_total_s": 3858.8187386989594}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 561, "time_total_s": 3860.4612345695496}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 562, "time_total_s": 3862.221526622772}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 563, "time_total_s": 3863.8259103298187}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 564, "time_total_s": 3865.62212061882}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 565, "time_total_s": 3906.460027694702}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 566, "time_total_s": 3908.149962902069}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 567, "time_total_s": 3909.7478868961334}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 568, "time_total_s": 3911.5299339294434}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 569, "time_total_s": 3913.1341943740845}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 570, "time_total_s": 3942.669439315796}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 571, "time_total_s": 3944.262636899948}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 572, "time_total_s": 3946.0579142570496}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 573, "time_total_s": 3947.687397480011}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 574, "time_total_s": 3949.4858236312866}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 575, "time_total_s": 3958.1955285072327}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 576, "time_total_s": 3959.8861544132233}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 577, "time_total_s": 3961.3936026096344}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 578, "time_total_s": 3963.0438225269318}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 579, "time_total_s": 3964.5358426570892}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 580, "time_total_s": 3970.673581123352}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 581, "time_total_s": 3972.179761171341}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 582, "time_total_s": 3973.9755289554596}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 583, "time_total_s": 3975.6048080921173}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 584, "time_total_s": 3977.397812128067}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 585, "time_total_s": 3983.562824010849}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 586, "time_total_s": 3985.1761260032654}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 587, "time_total_s": 3986.9669694900513}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 588, "time_total_s": 3988.5830030441284}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 589, "time_total_s": 3990.345412015915}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 590, "time_total_s": 4024.1898839473724}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 591, "time_total_s": 4026.0108046531677}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 592, "time_total_s": 4027.6539766788483}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 593, "time_total_s": 4029.453963279724}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 594, "time_total_s": 4031.0442118644714}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 595, "time_total_s": 4070.8808488845825}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 596, "time_total_s": 4072.4923827648163}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 597, "time_total_s": 4074.2712399959564}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 598, "time_total_s": 4075.894066095352}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 599, "time_total_s": 4077.4838156700134}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 600, "time_total_s": 4112.762883901596}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 601, "time_total_s": 4114.571510076523}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 602, "time_total_s": 4116.166666269302}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 603, "time_total_s": 4117.968132972717}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 604, "time_total_s": 4119.615265369415}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 605, "time_total_s": 4134.467578887939}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 606, "time_total_s": 4136.12399148941}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 607, "time_total_s": 4137.618961572647}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 608, "time_total_s": 4139.2927458286285}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 609, "time_total_s": 4140.782360076904}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 610, "time_total_s": 4146.302460193634}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 611, "time_total_s": 4147.96683382988}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 612, "time_total_s": 4149.775719404221}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 613, "time_total_s": 4151.433578968048}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 614, "time_total_s": 4153.238345861435}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 615, "time_total_s": 4193.086220979691}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 616, "time_total_s": 4194.693280696869}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 617, "time_total_s": 4196.3078191280365}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 618, "time_total_s": 4198.134113311768}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 619, "time_total_s": 4199.750955581665}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 620, "time_total_s": 4237.679353713989}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 621, "time_total_s": 4239.339161872864}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 622, "time_total_s": 4240.81786942482}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 623, "time_total_s": 4242.483551979065}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 624, "time_total_s": 4243.970136880875}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 625, "time_total_s": 4260.37869644165}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 626, "time_total_s": 4262.0267379283905}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 627, "time_total_s": 4263.514153242111}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 628, "time_total_s": 4265.16015958786}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 629, "time_total_s": 4266.645838260651}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 630, "time_total_s": 4295.579249620438}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 631, "time_total_s": 4297.068109512329}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 632, "time_total_s": 4298.556778669357}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 633, "time_total_s": 4300.217488050461}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 634, "time_total_s": 4301.713407754898}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 635, "time_total_s": 4309.147033691406}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 636, "time_total_s": 4310.795300960541}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 637, "time_total_s": 4312.28351187706}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 638, "time_total_s": 4313.91298532486}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 639, "time_total_s": 4315.217261552811}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 640, "time_total_s": 4320.07372879982}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 641, "time_total_s": 4321.593140363693}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 642, "time_total_s": 4323.272782325745}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 643, "time_total_s": 4324.80614233017}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 644, "time_total_s": 4326.470084667206}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 645, "time_total_s": 4330.612509965897}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 646, "time_total_s": 4332.235897064209}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 647, "time_total_s": 4333.709496974945}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 648, "time_total_s": 4335.34801530838}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 649, "time_total_s": 4336.825916528702}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 650, "time_total_s": 4350.926972389221}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 651, "time_total_s": 4352.584049701691}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 652, "time_total_s": 4354.074728488922}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 653, "time_total_s": 4355.725825071335}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 654, "time_total_s": 4357.223393917084}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 655, "time_total_s": 4389.240649700165}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 656, "time_total_s": 4390.740206241608}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 657, "time_total_s": 4392.4101803302765}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 658, "time_total_s": 4393.909028768539}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 659, "time_total_s": 4395.577741622925}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 660, "time_total_s": 4399.25407576561}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 661, "time_total_s": 4400.892787218094}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 662, "time_total_s": 4402.556962251663}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 663, "time_total_s": 4404.043248653412}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 664, "time_total_s": 4405.697691679001}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 665, "time_total_s": 4429.6703424453735}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 666, "time_total_s": 4431.130189657211}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 667, "time_total_s": 4432.7671909332275}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 668, "time_total_s": 4434.233606338501}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 669, "time_total_s": 4435.879750490189}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 670, "time_total_s": 4479.124110221863}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 671, "time_total_s": 4480.623209476471}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 672, "time_total_s": 4482.285836696625}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 673, "time_total_s": 4483.781918287277}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 674, "time_total_s": 4485.446003675461}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 675, "time_total_s": 4498.536733388901}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 676, "time_total_s": 4500.196800470352}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 677, "time_total_s": 4501.703470945358}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 678, "time_total_s": 4503.368289232254}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 679, "time_total_s": 4504.86025094986}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 680, "time_total_s": 4510.282486200333}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 681, "time_total_s": 4511.79198384285}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 682, "time_total_s": 4513.434131860733}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 683, "time_total_s": 4514.923111200333}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 684, "time_total_s": 4516.583599090576}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 685, "time_total_s": 4560.98527598381}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 686, "time_total_s": 4562.325829029083}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 687, "time_total_s": 4564.008731842041}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 688, "time_total_s": 4565.516050815582}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 689, "time_total_s": 4567.201758861542}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 690, "time_total_s": 4583.240135908127}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 691, "time_total_s": 4584.760988950729}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 692, "time_total_s": 4586.442813158035}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 693, "time_total_s": 4587.9530618190765}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 694, "time_total_s": 4589.776240348816}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 695, "time_total_s": 4618.876163482666}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 696, "time_total_s": 4620.608251094818}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 697, "time_total_s": 4622.223216295242}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 698, "time_total_s": 4623.5259256362915}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 699, "time_total_s": 4625.008768796921}
{"custom_metrics": {"true_reward_mean": 0, "proxy_reward_mean": 0, "safe_policy0/true_reward_mean": 0, "safe_policy0/proxy_reward_mean": 0, "current/true_reward_mean": 0, "current/proxy_reward_mean": 0}, "training_iteration": 700, "time_total_s": 4674.199942111969}
