{"critic/kl_coeff":0.001,"critic/returns/max":1.000997543334961,"response_length/clip_ratio":0.009765625,"prompt_length/max":205,"critic/advantages/max":4.437600135803223,"actor/pg_clipfrac":0,"timing_per_token_ms/update_critic":1.036981470143297,"prompt_length/min":55,"critic/rewards/min":-0.0013466797536239028,"prompt_length/mean":94.912109375,"actor/grad_norm":3.6943893432617188,"critic/vpred_mean":-0.3150615692138672,"timing_s/step":502.2593907676637,"timing_per_token_ms/adv":0.0009690614000888242,"global_seqlen/min":36979,"critic/rewards/mean":0.2577992379665375,"critic/vf_loss":9.409236118197441,"critic/kl":5.264367428026162e-05,"critic/score/max":1,"critic/vf_explained_var":-95.92330932617188,"timing_per_token_ms/ref":0.5326451068626404,"response_length/max":512,"actor/ppo_kl":0,"global_seqlen/minmax_diff":1898,"timing_s/update_critic":156.15281770005822,"_timestamp":1.7416042948223212e+09,"val/test_score//data/yangjunxiao/Reasoning_Hallucination/verl/data/gsm8k":0.22820318423047764,"actor/pg_loss":0.028935165573784616,"critic/returns/min":-0.0014190673828125,"prompt_length/clip_ratio":0,"critic/lr":1e-05,"critic/values/max":16.75,"critic/advantages/min":-4.0720624923706055,"timing_per_token_ms/update_actor":1.076082588170332,"global_seqlen/balanced_max":37646,"critic/rewards/max":1.000999927520752,"critic/grad_norm":357.8920028209686,"_wandb":{"runtime":579},"critic/advantages/mean":6.695141774315516e-09,"timing_per_token_ms/gen":0.3079553074973088,"global_seqlen/mean":37646,"_step":1,"critic/values/min":-19,"response_length/mean":199.197265625,"critic/vf_clipfrac":0,"actor/entropy_loss":0.2933495866600424,"critic/values/mean":-0.4609375,"timing_s/gen":31.40805385634303,"timing_s/old_log_prob":37.7607082799077,"critic/score/min":0,"_runtime":579.219664582,"mfu/actor":0.03434972208538802,"timing_s/update_actor":162.04082045704126,"timing_s/ref":80.20783077180386,"critic/score/mean":0.2578125,"actor/lr":1e-06,"global_seqlen/balanced_min":37646,"mfu/critic":0.03563965067679328,"timing_per_token_ms/values":0.22928159998028524,"response_length/min":36,"timing_s/adv":0.1459251418709755,"critic/returns/mean":0.25462377071380615,"timing_s/values":34.526140451431274,"global_seqlen/max":38877}