{"actor/entropy_loss":0.5575061440467834,"critic/advantages/mean":-9.085940888553523e-09,"mfu/actor":0.060415403975540884,"prompt_length/max":140,"actor/grad_norm":4.129310131072998,"timing_s/gen":16.87186661362648,"critic/score/max":1,"response_length/clip_ratio":0,"critic/score/mean":0.6875,"critic/values/min":-4.34375,"global_seqlen/balanced_min":1057,"response_length/max":301,"critic/rewards/max":1.0006868839263916,"timing_s/update_actor":2.735100708901882,"critic/lr":1e-05,"timing_per_token_ms/update_actor":0.6391915655297692,"actor/pg_clipfrac":0,"global_seqlen/mean":1069.75,"timing_s/adv":0.1736220233142376,"critic/returns/mean":0.6273689270019531,"critic/vf_clipfrac":0,"timing_per_token_ms/values":0.16480911506562346,"val/test_score//data/yangjunxiao/Reasoning_Hallucination/verl/data/gsm8k":0.620166793025019,"response_length/min":90,"timing_s/testing":54.44338746741414,"critic/score/min":0,"timing_s/old_log_prob":0.8059326633810997,"critic/vpred_mean":0.75390625,"critic/values/mean":0.62890625,"critic/kl_coeff":0.001,"critic/advantages/max":5.01462459564209,"critic/values/max":6.625,"_wandb":{"runtime":924},"_runtime":924.459846472,"global_seqlen/min":983,"timing_per_token_ms/gen":6.182435549148582,"global_seqlen/max":1188,"timing_per_token_ms/update_critic":0.5874186144807846,"critic/grad_norm":28.81000328063965,"critic/advantages/min":-5.27506685256958,"critic/vf_explained_var":-3.873084545135498,"prompt_length/mean":96.875,"critic/returns/min":-0.0032869577407836914,"critic/kl":0.013541202060878277,"timing_per_token_ms/ref":0.3000658008087793,"timing_s/update_critic":2.5135642513632774,"critic/vf_loss":0.7157586216926575,"critic/rewards/mean":0.6855171322822571,"response_length/mean":170.5625,"prompt_length/min":76,"global_seqlen/minmax_diff":205,"actor/ppo_kl":0,"critic/rewards/min":-0.002342517953366041,"_step":17,"timing_s/values":0.7052182033658028,"timing_per_token_ms/adv":0.04057537352517822,"actor/pg_loss":-0.01635473035275936,"timing_s/step":25.09376285225153,"timing_s/ref":1.2839815616607666,"mfu/critic":0.06573879732341868,"actor/lr":1e-06,"global_seqlen/balanced_max":1087,"critic/returns/max":1.0011742115020752,"_timestamp":1.741602440733002e+09,"prompt_length/clip_ratio":0}