{"_wandb":{"runtime":3426},"timing_per_token_ms/update_actor":0.1837160007572549,"critic/values/max":1.4765625,"critic/score/min":0,"actor/entropy_loss":0.33207105100154877,"critic/vf_clipfrac":0,"actor/pg_loss":0.15664433501660824,"timing_s/old_log_prob":1.6844921633601189,"response_length/clip_ratio":0,"_runtime":3426.350984362,"actor/lr":1e-06,"response_length/max":937,"val/test_score//data/yangjunxiao/Reasoning_Hallucination/verl/data/gsm8k":0.8802122820318423,"timing_s/update_critic":10.4736947119236,"actor/pg_clipfrac":0.0019443589262664318,"mfu/critic":0.21926407397349487,"prompt_length/max":169,"critic/values/mean":1.21875,"timing_s/ref":1.8504881262779236,"critic/score/mean":0.96484375,"critic/advantages/min":-4.77902889251709,"_timestamp":1.741609337819651e+09,"mfu/actor":0.20381328910496838,"response_length/min":44,"global_seqlen/minmax_diff":2261,"critic/rewards/max":1.0012538433074951,"response_length/mean":146.4296875,"prompt_length/mean":93.13671875,"global_seqlen/balanced_min":15227,"_step":39,"critic/returns/min":-0.024699807167053223,"critic/vf_loss":0.09861765010282397,"critic/lr":1e-05,"critic/vf_explained_var":0.26503485441207886,"prompt_length/clip_ratio":0,"critic/rewards/mean":0.9570273160934448,"timing_s/gen":35.65751262009144,"global_seqlen/max":16711,"critic/values/min":0.2353515625,"critic/rewards/min":-0.024504786357283592,"actor/ppo_kl":0.0009815123685257277,"timing_s/adv":0.26985783129930496,"global_seqlen/min":14450,"timing_s/values":1.5318645276129246,"critic/grad_norm":41.302485942840576,"actor/grad_norm":2.252298355102539,"timing_s/step":62.76604575663805,"timing_per_token_ms/values":0.024977816817703283,"timing_per_token_ms/update_critic":0.17077882750287138,"timing_s/testing":139.8311174288392,"global_seqlen/balanced_max":15622,"timing_per_token_ms/adv":0.0044001668264492325,"global_seqlen/mean":15332.25,"timing_per_token_ms/ref":0.030173133856379913,"critic/score/max":1,"critic/advantages/mean":-2.6458446100718902e-08,"critic/vpred_mean":1.1142578125,"timing_per_token_ms/gen":0.9512221261295267,"critic/advantages/max":1.6198848485946655,"critic/kl_coeff":0.001,"timing_s/update_actor":11.267118610441685,"prompt_length/min":55,"critic/returns/mean":0.9186133742332458,"critic/returns/max":1.0026763677597046,"critic/kl":0.06267351657152176}