{"timing_per_token_ms/gen":5.722131193538237,"timing_s/testing":156.45450795069337,"critic/values/mean":0.72265625,"critic/vf_explained_var":-0.7251237630844116,"response_length/mean":207.1875,"global_seqlen/max":1535,"critic/rewards/mean":0.3083066940307617,"global_seqlen/minmax_diff":575,"mfu/critic":0.07614339362704096,"critic/advantages/max":4.065160751342773,"critic/rewards/max":1.0001968145370483,"timing_s/old_log_prob":0.8675131686031818,"critic/vf_loss":0.20822706818580627,"prompt_length/mean":96.625,"critic/advantages/min":-4.6284260749816895,"timing_per_token_ms/values":0.14059838674682032,"critic/vf_clipfrac":0,"actor/ppo_kl":0,"timing_s/ref":1.3012159168720245,"_runtime":1370.089048838,"_timestamp":1.7416057856931043e+09,"timing_s/gen":18.968864906579256,"timing_s/update_critic":2.473425604403019,"global_seqlen/min":960,"response_length/clip_ratio":0,"timing_per_token_ms/update_critic":0.5088306118911785,"critic/score/min":0,"timing_per_token_ms/adv":0.032913148237633326,"timing_per_token_ms/ref":0.26768482140959154,"global_seqlen/balanced_min":1200,"critic/lr":1e-05,"critic/score/max":1,"critic/rewards/min":-0.010663645341992378,"mfu/actor":0.06795547929035155,"_wandb":{"runtime":1370},"prompt_length/clip_ratio":0,"critic/kl":0.025636550039052963,"critic/grad_norm":77.50334167480469,"critic/vpred_mean":0.671875,"response_length/max":396,"val/test_score//data/yangjunxiao/Reasoning_Hallucination/verl/data/gsm8k":0.2357846853677028,"critic/advantages/mean":-1.9274866147611647e-08,"critic/values/min":-1.53125,"critic/score/mean":0.3125,"critic/returns/max":1.0001966953277588,"global_seqlen/balanced_max":1223,"timing_s/step":27.224377743899822,"timing_s/update_actor":2.7657583616673946,"timing_s/values":0.6834487579762936,"prompt_length/min":68,"critic/returns/min":-0.010960936546325684,"actor/lr":1e-06,"prompt_length/max":173,"timing_per_token_ms/update_actor":0.5689690108346831,"actor/pg_clipfrac":0,"critic/kl_coeff":0.001,"critic/values/max":3.296875,"_step":24,"actor/entropy_loss":0.2617957592010498,"timing_s/adv":0.1599908135831356,"global_seqlen/mean":1215.25,"critic/returns/mean":0.3808746337890625,"actor/grad_norm":3.1871933937072754,"response_length/min":78,"actor/pg_loss":-0.482360303401947}