{"critic/vf_clipfrac":0,"critic/rewards/max":1.0008182525634766,"timing_s/old_log_prob":20.67348812520504,"critic/grad_norm":1411.5733795166016,"prompt_length/min":55,"timing_per_token_ms/gen":0.27267541784761834,"response_length/max":512,"global_seqlen/mean":37646,"timing_s/step":251.02215195819736,"prompt_length/mean":94.912109375,"actor/entropy_loss":0.296780358068645,"global_seqlen/balanced_min":37646,"critic/values/min":-22.875,"val/test_score//data/yangjunxiao/Reasoning_Hallucination/verl/data/gsm8k":0.22820318423047764,"critic/vf_loss":22.88480195403099,"_wandb":{"runtime":313},"timing_s/update_actor":80.52025756239891,"actor/grad_norm":1.7722890377044678,"actor/ppo_kl":0,"timing_per_token_ms/adv":0.0009557967452869806,"actor/pg_clipfrac":0,"critic/score/mean":0.2578125,"critic/advantages/max":3.662727117538452,"global_seqlen/minmax_diff":1898,"critic/advantages/mean":-6.717583289628237e-08,"mfu/actor":0.06919638025087056,"critic/rewards/mean":0.25779658555984497,"_timestamp":1.7416030741553898e+09,"timing_s/values":17.75004155933857,"critic/returns/max":1.0009918212890625,"timing_s/update_critic":78.31496548652649,"response_length/clip_ratio":0.009765625,"prompt_length/max":205,"timing_per_token_ms/ref":0.17129379390766958,"critic/vpred_mean":-4.13037109375,"global_seqlen/min":36979,"timing_per_token_ms/update_actor":0.5347198743717719,"critic/vf_explained_var":-133.41665649414062,"_runtime":313.114315827,"critic/score/min":0,"response_length/mean":199.197265625,"critic/score/max":1,"_step":1,"actor/pg_loss":-0.007217089121695608,"critic/values/max":20.5,"critic/kl_coeff":0.001,"prompt_length/clip_ratio":0,"critic/lr":1e-05,"critic/returns/mean":0.25462761521339417,"global_seqlen/max":38877,"critic/kl":9.884543396765366e-05,"global_seqlen/balanced_max":37646,"timing_s/adv":0.1439276970922947,"timing_per_token_ms/values":0.11787468495549706,"actor/lr":1e-06,"critic/rewards/min":-0.0011221696622669697,"critic/values/mean":-4.125,"mfu/critic":0.07113016813928216,"response_length/min":36,"critic/returns/min":-0.0012149810791015625,"timing_s/ref":25.794104661792517,"critic/advantages/min":-4.924524784088135,"timing_s/gen":27.80989319086075,"timing_per_token_ms/update_critic":0.5200749447917873}