{"prompt_length/min":55,"actor/entropy_loss":0.296780358068645,"actor/grad_norm":2.480738401412964,"critic/vf_explained_var":-73.47998046875,"global_seqlen/mean":37646,"mfu/actor":0.06859184004195336,"response_length/min":36,"critic/values/mean":-0.56640625,"critic/rewards/min":-0.0011221696622669697,"mfu/critic":0.07111381840991922,"prompt_length/mean":94.912109375,"critic/values/max":18.25,"critic/kl_coeff":0.001,"prompt_length/max":205,"timing_s/update_actor":81.21378511562943,"global_seqlen/max":38877,"response_length/clip_ratio":0.009765625,"timing_s/gen":32.12061185389757,"critic/score/min":0,"global_seqlen/minmax_diff":1898,"timing_s/update_critic":78.3222778737545,"critic/lr":1e-05,"critic/returns/mean":0.25463560223579407,"prompt_length/clip_ratio":0,"critic/advantages/min":-5.07263708114624,"timing_per_token_ms/gen":0.3149419236770394,"critic/score/max":1,"val/test_score//data/yangjunxiao/Reasoning_Hallucination/verl/data/gsm8k":0.22820318423047764,"critic/score/mean":0.2578125,"response_length/max":512,"critic/grad_norm":317.0416259765625,"timing_per_token_ms/update_actor":0.539325460312048,"timing_per_token_ms/ref":0.31794410380807697,"timing_s/ref":47.877294927835464,"_runtime":347.813764475,"global_seqlen/balanced_min":37646,"actor/ppo_kl":0,"timing_s/adv":0.14250285550951958,"timing_s/old_log_prob":21.252147432416677,"actor/pg_loss":-0.019357236858922988,"timing_per_token_ms/adv":0.0009463346405296683,"critic/vpred_mean":-0.5658798217773438,"actor/lr":1e-06,"critic/advantages/max":4.369076251983643,"critic/kl":2.2170159354573116e-05,"critic/values/min":-17.25,"actor/pg_clipfrac":0,"critic/returns/min":-0.0014123916625976562,"critic/rewards/mean":0.25781214237213135,"global_seqlen/min":36979,"_step":1,"timing_s/values":17.775053821504116,"_wandb":{"runtime":347},"critic/rewards/max":1.0008913278579712,"response_length/mean":199.197265625,"_timestamp":1.7416035683324103e+09,"global_seqlen/balanced_max":37646,"critic/advantages/mean":-6.956963005677608e-09,"critic/vf_loss":7.5925567746162415,"timing_per_token_ms/values":0.11804078668055117,"critic/returns/max":1.0009918212890625,"timing_per_token_ms/update_critic":0.5201235049789785,"timing_s/step":278.71863774210215,"critic/vf_clipfrac":0}