
{
    'exp_name': 'VDPO',
    'env': 'Humanoid-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)',
    'noise': 0.05
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9998/1000000 [04:10<9:44:38, 28.22it/s]global step 10000, trans_decision ep_re 283.4015517276243

{"global_step": 10000, "eval_re": [264.8956653927503, 302.31396053530204, 
328.4943173987176, 233.97641956138085, 284.2321612742609, 294.44260333064483, 
246.28146346217943, 308.9533403061834, 311.2196519833926, 259.2059340314312], 
"eval_len": [50, 57, 62, 45, 53, 55, 47, 58, 58, 49]}

  2%|▏         | 19997/1000000 [12:00<9:26:44, 28.82it/s]global step 20000, trans_decision ep_re 399.0391423213736

{"global_step": 20000, "eval_re": [158.02997424691708, 442.58147932686336, 
422.9917184508378, 309.4734932698113, 367.0712522033595, 405.29328373252105, 
520.268423790088, 486.1152671346009, 346.2344219215687, 532.3321091371685], 
"eval_len": [30, 81, 76, 58, 66, 73, 95, 88, 63, 103]}

  3%|▎         | 29997/1000000 [19:35<9:30:16, 28.35it/s]global step 30000, trans_decision ep_re 379.3537600151538

{"global_step": 30000, "eval_re": [445.2573367660697, 311.1346805960127, 
326.1932060153353, 391.15357041303264, 460.8760649532334, 371.2450246676672, 
333.6316857358334, 344.4507879829417, 388.47247185559684, 421.122771165815], 
"eval_len": [81, 57, 60, 74, 84, 67, 61, 64, 73, 78]}

  4%|▍         | 39997/1000000 [27:25<9:33:00, 27.92it/s]global step 40000, trans_decision ep_re 365.2646719922722

{"global_step": 40000, "eval_re": [378.20758094273157, 348.7851982063763, 
392.2266600425699, 312.46975904115664, 345.7656999865511, 440.2342669877661, 
369.4423219665732, 289.34465838807967, 428.6510545854904, 347.51951977542734], 
"eval_len": [68, 66, 73, 59, 64, 81, 69, 55, 80, 64]}

  5%|▍         | 49997/1000000 [35:15<9:25:11, 28.01it/s]global step 50000, trans_decision ep_re 381.55983075712663

{"global_step": 50000, "eval_re": [388.13500376427936, 401.1900688958328, 
332.4320900358104, 386.02882645382846, 411.28156056943783, 549.5518709706988, 
358.9146283138387, 437.5352435197642, 152.89828611637515, 397.63072893140065], 
"eval_len": [73, 73, 62, 75, 79, 108, 72, 82, 29, 73]}

  6%|▌         | 59998/1000000 [43:01<9:00:07, 29.01it/s]global step 60000, trans_decision ep_re 358.4854343800905

{"global_step": 60000, "eval_re": [475.9841040246391, 422.10284696141946, 
385.9439420474191, 315.32793413033227, 275.1700245415127, 451.9689083100659, 
349.80213155690893, 263.211284775434, 392.8645180718076, 252.47864938136593], 
"eval_len": [86, 76, 69, 58, 52, 81, 64, 51, 71, 48]}

  7%|▋         | 69997/1000000 [51:00<9:01:49, 28.61it/s]global step 70000, trans_decision ep_re 391.8726819078337

{"global_step": 70000, "eval_re": [366.9258952179747, 373.055642564267, 
368.55802561546244, 400.02356404826486, 318.19039408405837, 439.54368338279977, 
368.1965300678057, 412.74336401955867, 401.7720549396704, 469.7176651384742], 
"eval_len": [67, 67, 69, 72, 61, 80, 68, 75, 73, 85]}

  8%|▊         | 79997/1000000 [58:33<9:09:10, 27.92it/s]global step 80000, trans_decision ep_re 347.92578841829834

{"global_step": 80000, "eval_re": [293.35162188489755, 380.8881262637708, 
307.49897514837335, 607.3945767868122, 444.7039217091142, 374.42367318419537, 
197.67678737760392, 131.1774421627204, 383.55282426717616, 358.58993539831897], 
"eval_len": [55, 68, 57, 113, 82, 69, 37, 25, 69, 65]}

  9%|▉         | 89999/1000000 [1:06:30<8:48:24, 28.70it/s]global step 90000, trans_decision ep_re 379.766959188736

{"global_step": 90000, "eval_re": [516.1440509267013, 321.7340625508832, 
391.70900095519454, 399.2468956317905, 332.46153143123786, 339.45300151131846, 
358.86741408905704, 347.75883035638054, 349.9854282334699, 440.3093762013265], 
"eval_len": [93, 59, 72, 73, 62, 62, 66, 64, 63, 81]}

 10%|▉         | 99998/1000000 [1:14:06<8:52:08, 28.19it/s]global step 100000, trans_decision ep_re 386.1308668738942

{"global_step": 100000, "eval_re": [415.74973180543594, 398.90396353687237, 
486.48450687789534, 474.4202073065438, 317.80047255586567, 379.2747810643877, 
388.7717154518462, 214.13208511312033, 315.1474113347859, 470.62379369218854], 
"eval_len": [75, 71, 89, 85, 60, 68, 70, 42, 57, 85]}

 11%|█         | 109997/1000000 [1:21:52<8:49:57, 27.99it/s]global step 110000, trans_decision ep_re 397.5520815671965

{"global_step": 110000, "eval_re": [350.7210203882764, 574.6413215156487, 
456.2414822352523, 293.15455339669, 479.33759284591247, 409.4839111061358, 
246.0135497273658, 336.57831697835667, 353.3929204314951, 475.95614704683214], 
"eval_len": [64, 102, 82, 56, 87, 76, 50, 62, 64, 85]}

 12%|█▏        | 119999/1000000 [1:29:50<8:45:40, 27.90it/s]global step 120000, trans_decision ep_re 397.712698613462

{"global_step": 120000, "eval_re": [560.0234255624712, 375.3600767172997, 
425.19679076886183, 367.3327708732195, 340.7513571480278, 346.21571200228385, 
360.5237372636996, 374.0241973410182, 466.37396528255846, 361.3249531751803], 
"eval_len": [100, 69, 76, 69, 66, 65, 66, 68, 86, 70]}

 13%|█▎        | 129998/1000000 [1:37:40<8:40:55, 27.84it/s]global step 130000, trans_decision ep_re 422.64611252385174

{"global_step": 130000, "eval_re": [373.321734117745, 279.02303647892893, 
358.3305723442149, 376.42886169420024, 393.8891257828536, 533.7816880632168, 
368.1647510277422, 658.807726590362, 468.6821683812887, 416.0314607579657], 
"eval_len": [70, 53, 69, 69, 71, 98, 69, 122, 89, 82]}

 14%|█▍        | 139999/1000000 [1:45:20<8:40:09, 27.56it/s]global step 140000, trans_decision ep_re 424.11103713153807

{"global_step": 140000, "eval_re": [591.0826204384734, 325.63796679614956, 
474.5448091344956, 249.26603732991592, 412.0052982115897, 291.5704836536754, 
586.7873229657883, 474.1163850450747, 432.7313996893977, 403.3680480508209], 
"eval_len": [119, 64, 88, 48, 73, 54, 121, 86, 79, 74]}

 15%|█▍        | 149998/1000000 [1:53:11<8:15:01, 28.62it/s]global step 150000, trans_decision ep_re 308.14481591859476

{"global_step": 150000, "eval_re": [318.6727426594764, 228.44167957774312, 
411.34858818753315, 363.4235962893011, 308.368520543674, 401.2174716663449, 
376.4401820506859, 247.03067172324012, 279.2913734660593, 147.21333302188924], 
"eval_len": [58, 45, 77, 67, 57, 74, 68, 48, 52, 28]}

 16%|█▌        | 159997/1000000 [2:01:01<8:24:06, 27.77it/s]global step 160000, trans_decision ep_re 465.84544653537284

{"global_step": 160000, "eval_re": [258.2708382664191, 289.64793838768634, 
384.9058521383409, 380.82282029853496, 418.6207875689561, 787.4445535584758, 
239.29770007267206, 1084.4580377671004, 290.05175583398596, 524.9341814615568], 
"eval_len": [53, 57, 71, 69, 81, 161, 46, 205, 55, 93]}

 17%|█▋        | 169999/1000000 [2:08:52<8:13:45, 28.02it/s]global step 170000, trans_decision ep_re 347.05571150631255

{"global_step": 170000, "eval_re": [219.13407039912136, 351.8493267598344, 
316.62402533596827, 338.9296871587291, 306.48958358024043, 452.637954514155, 
418.56044497931555, 410.4394159052817, 313.810868406041, 342.0817380244386], 
"eval_len": [41, 67, 63, 63, 58, 81, 75, 74, 58, 63]}

 18%|█▊        | 179998/1000000 [2:16:43<7:59:05, 28.53it/s]global step 180000, trans_decision ep_re 430.0821229831955

{"global_step": 180000, "eval_re": [385.9921057663204, 705.9885407454096, 
326.1141398201049, 523.6201474146601, 444.104855475908, 292.0243685426268, 
464.83468846959414, 425.25757220899663, 359.49974437150524, 373.38506701682934],
"eval_len": [68, 140, 61, 102, 79, 54, 82, 76, 67, 69]}

 19%|█▉        | 189998/1000000 [2:24:34<7:54:58, 28.42it/s]global step 190000, trans_decision ep_re 281.18883495089244

{"global_step": 190000, "eval_re": [340.93667038640604, 340.290821571236, 
346.93102656406137, 305.91676567799914, 141.4040518382336, 246.06556688256848, 
263.35493947454387, 240.78089524786733, 299.8184495935489, 286.38916227245954], 
"eval_len": [63, 62, 64, 58, 27, 48, 50, 47, 56, 56]}

 20%|█▉        | 199997/1000000 [2:32:23<7:53:57, 28.13it/s]global step 200000, trans_decision ep_re 340.6002707664028

{"global_step": 200000, "eval_re": [531.9526448570234, 517.7423476719274, 
169.6221281081537, 282.5039896406968, 246.60485163796923, 373.3801447179778, 
351.99814247205495, 360.5871763615633, 331.48656724792505, 240.12471494873637], 
"eval_len": [95, 93, 32, 54, 48, 68, 64, 67, 61, 47]}

 21%|██        | 209997/1000000 [2:40:12<7:54:54, 27.72it/s]global step 210000, trans_decision ep_re 369.1684318289514

{"global_step": 210000, "eval_re": [615.857699921354, 449.56750090767156, 
378.8577183158218, 241.90282469038775, 626.1348628868664, 233.25699473044762, 
311.01422948208966, 223.97986717335553, 326.9082765060354, 284.2043436754843], 
"eval_len": [109, 85, 70, 47, 119, 45, 61, 43, 60, 55]}

 22%|██▏       | 219997/1000000 [2:48:04<7:53:46, 27.44it/s]global step 220000, trans_decision ep_re 484.9473070792934

{"global_step": 220000, "eval_re": [753.1147958808799, 635.1562094736198, 
864.5247997136237, 553.3753224758028, 247.75556660564044, 308.859163029831, 
275.6055696553827, 525.1515971732808, 249.4993516937804, 436.4306950910934], 
"eval_len": [140, 125, 161, 98, 48, 58, 52, 100, 48, 77]}

 23%|██▎       | 229997/1000000 [2:56:10<7:56:59, 26.91it/s]global step 230000, trans_decision ep_re 351.0521497745482

{"global_step": 230000, "eval_re": [270.19847477989475, 315.76347068271616, 
337.8783994038941, 323.97374928142204, 294.75379089561744, 419.6339480865506, 
531.3315531402211, 402.7180071793695, 361.6040081274639, 252.66609616833188], 
"eval_len": [55, 58, 62, 63, 55, 77, 92, 71, 64, 48]}

 24%|██▍       | 239999/1000000 [3:04:00<7:33:05, 27.96it/s]global step 240000, trans_decision ep_re 369.86436066078005

{"global_step": 240000, "eval_re": [351.8941548769197, 424.6161765564834, 
257.5909882848732, 426.29774919295636, 361.2819297443818, 391.70947190478034, 
416.23112331754703, 251.05940899443038, 504.9004421479252, 313.0621615875031], 
"eval_len": [64, 75, 49, 75, 65, 71, 74, 48, 90, 57]}

 25%|██▍       | 249999/1000000 [3:11:50<7:31:13, 27.70it/s]global step 250000, trans_decision ep_re 272.6325030523466

{"global_step": 250000, "eval_re": [242.08804155664043, 206.87485094241134, 
244.84144093520825, 289.5625205984817, 296.23478732109817, 382.84089546316153, 
228.20115623057785, 247.52691530310005, 365.78675061069754, 222.36767156208916],
"eval_len": [47, 41, 47, 55, 57, 72, 44, 48, 69, 43]}

 26%|██▌       | 259999/1000000 [3:19:40<7:24:24, 27.75it/s]global step 260000, trans_decision ep_re 509.31897463224607

{"global_step": 260000, "eval_re": [569.9526002141537, 674.3112740905257, 
297.8074145121753, 755.2426513946252, 457.1634302505752, 385.77982109304844, 
163.24900561871468, 444.5992202061092, 1060.1104075473218, 284.9739213952116], 
"eval_len": [102, 120, 55, 134, 81, 68, 31, 78, 200, 54]}

 27%|██▋       | 269998/1000000 [3:27:20<7:19:52, 27.66it/s]global step 270000, trans_decision ep_re 410.8440691524719

{"global_step": 270000, "eval_re": [400.4449777871483, 437.4345688546301, 
643.4688692853789, 397.8601095869338, 323.26591134179074, 401.340939221626, 
321.28352784727383, 396.11782487618103, 371.83434979804366, 415.38961292571315],
"eval_len": [71, 76, 114, 71, 59, 73, 59, 74, 67, 74]}

 28%|██▊       | 279997/1000000 [3:35:30<7:03:57, 28.31it/s]global step 280000, trans_decision ep_re 355.1582175601849

{"global_step": 280000, "eval_re": [275.67908488837656, 527.6102884144425, 
368.8834834922975, 390.4027070958421, 510.46902714227923, 243.08973905480437, 
381.1030308255999, 250.56295958789315, 302.0316918997599, 301.75016320055346], 
"eval_len": [52, 95, 69, 70, 98, 46, 72, 48, 57, 56]}

 29%|██▉       | 289999/1000000 [3:43:13<7:23:03, 26.71it/s]global step 290000, trans_decision ep_re 456.80464594848826

{"global_step": 290000, "eval_re": [595.3806840471331, 400.36893429257464, 
559.6498969120929, 389.7009414687325, 623.2928081916081, 462.15999267510665, 
431.83444590447505, 258.12476287861244, 401.9092961112111, 445.62469700333594], 
"eval_len": [108, 72, 106, 74, 123, 83, 78, 49, 74, 77]}

 30%|██▉       | 299997/1000000 [3:51:20<6:53:37, 28.21it/s]global step 300000, trans_decision ep_re 422.3374789068698

{"global_step": 300000, "eval_re": [517.5481209695498, 252.55083192443453, 
575.7498314139243, 446.13398448914444, 158.62902072635148, 341.66469133616016, 
368.5175711097128, 499.72556864083464, 756.9932435900099, 305.8619248685756], 
"eval_len": [97, 48, 111, 80, 30, 63, 65, 89, 134, 57]}

 31%|███       | 309997/1000000 [3:59:03<7:02:19, 27.23it/s]global step 310000, trans_decision ep_re 431.94600265401397

{"global_step": 310000, "eval_re": [322.496464019714, 432.3554020215871, 
455.6283661528265, 427.06796021740865, 151.74002044657243, 449.4674098567294, 
775.7420057129441, 433.5729228225696, 423.940313142531, 447.4491621472566], 
"eval_len": [61, 76, 88, 73, 29, 79, 138, 77, 75, 81]}

 32%|███▏      | 319998/1000000 [4:07:10<6:43:10, 28.11it/s]global step 320000, trans_decision ep_re 378.24338800974664

{"global_step": 320000, "eval_re": [223.30793782471244, 267.64297533925946, 
408.13736576012974, 439.7437506157576, 306.6066620263995, 310.56712358409067, 
550.2939831343261, 733.1011734039113, 262.20138334287986, 280.8315250659997], 
"eval_len": [44, 50, 76, 85, 56, 58, 99, 131, 49, 53]}

 33%|███▎      | 329997/1000000 [4:15:10<6:48:23, 27.34it/s]global step 330000, trans_decision ep_re 495.6451602322956

{"global_step": 330000, "eval_re": [322.43818760745086, 934.7048282070323, 
674.7902332604511, 669.7652134656678, 381.2544501584541, 237.3315858442325, 
381.68734385106404, 497.18478701428245, 439.08080579870295, 418.2141671156182], 
"eval_len": [60, 176, 124, 125, 68, 44, 67, 86, 81, 72]}

 34%|███▍      | 339998/1000000 [4:22:55<6:37:59, 27.64it/s]global step 340000, trans_decision ep_re 312.763024753035

{"global_step": 340000, "eval_re": [661.388787131156, 346.8783923055815, 
218.94487963062016, 532.311548094596, 362.26923811356073, 221.03755095763563, 
130.71254992193667, 198.3847079585235, 157.93202890949036, 297.77056450724973], 
"eval_len": [125, 63, 43, 97, 65, 43, 25, 40, 30, 56]}

 35%|███▍      | 349998/1000000 [4:31:00<6:24:06, 28.20it/s]global step 350000, trans_decision ep_re 440.22254096675067

{"global_step": 350000, "eval_re": [468.03732708411843, 200.3264283176856, 
509.7104709403006, 420.37144791375334, 431.2785536589281, 673.9874845892061, 
346.66036982258737, 390.9042844939286, 478.05157863611385, 482.8974642108852], 
"eval_len": [81, 40, 87, 78, 74, 118, 63, 72, 86, 85]}

 36%|███▌      | 359997/1000000 [4:38:44<6:30:45, 27.30it/s]global step 360000, trans_decision ep_re 357.2035612871902

{"global_step": 360000, "eval_re": [684.1352789743482, 244.95160907116025, 
325.08167869109246, 278.8287011830843, 368.8692838908094, 296.56673836641437, 
382.7818135897556, 414.4269983473932, 265.3728302784391, 311.02068047940503], 
"eval_len": [119, 46, 60, 52, 64, 55, 71, 74, 50, 58]}

 37%|███▋      | 369999/1000000 [4:46:50<6:18:20, 27.75it/s]global step 370000, trans_decision ep_re 398.9769690803656

{"global_step": 370000, "eval_re": [356.7705872716623, 487.778253656688, 
353.8380365751576, 533.9967118605729, 365.467660861595, 232.56167215097528, 
408.7144263726831, 188.72979632824698, 727.840939505364, 334.0716062207112], 
"eval_len": [65, 91, 64, 92, 63, 45, 75, 38, 128, 61]}

 38%|███▊      | 379997/1000000 [4:54:40<6:15:16, 27.54it/s]global step 380000, trans_decision ep_re 370.46601659028397

{"global_step": 380000, "eval_re": [476.9347200070837, 249.83516979713087, 
273.7373285300038, 366.80813692328064, 378.1216610910341, 515.1762048283226, 
353.5487588148277, 446.91575249849814, 401.7708450118809, 241.8115884007771], 
"eval_len": [84, 48, 51, 66, 69, 91, 63, 80, 71, 47]}

 39%|███▉      | 389999/1000000 [5:02:26<6:12:39, 27.28it/s]global step 390000, trans_decision ep_re 399.05290811439505

{"global_step": 390000, "eval_re": [211.07486865570803, 345.0371426425965, 
463.1184561147294, 598.5999726494528, 260.5321025521346, 504.4378249591515, 
513.6800586522783, 339.6606518560027, 317.2382077217615, 437.1497953401353], 
"eval_len": [42, 63, 82, 115, 50, 88, 88, 63, 58, 80]}

 40%|███▉      | 399999/1000000 [5:10:21<5:52:11, 28.39it/s]global step 400000, trans_decision ep_re 419.98918376790715

{"global_step": 400000, "eval_re": [378.74789533076984, 613.4179710960993, 
544.8189846571142, 241.76928381027776, 525.0443869232483, 499.72487573286327, 
275.8169264023761, 405.077981924965, 473.0770224787619, 242.39650932259576], 
"eval_len": [71, 104, 90, 46, 87, 85, 52, 74, 84, 47]}

 41%|████      | 409997/1000000 [5:18:10<5:49:12, 28.16it/s]global step 410000, trans_decision ep_re 415.1306223540606

{"global_step": 410000, "eval_re": [737.2076306753387, 399.8248278804131, 
527.2288328275458, 251.8661364673373, 460.77812976408933, 335.6081319726794, 
317.151807941441, 473.3047904038838, 501.39095161217074, 146.9449839957064], 
"eval_len": [126, 70, 97, 47, 78, 61, 58, 82, 92, 28]}

 42%|████▏     | 419999/1000000 [5:26:01<5:45:27, 27.98it/s]global step 420000, trans_decision ep_re 403.42674858897334

{"global_step": 420000, "eval_re": [294.81033401524144, 608.6235301523853, 
494.93705642971383, 353.4268017305326, 670.5987485121038, 391.15742039155583, 
334.8730539056246, 425.46315755417606, 229.85752905670805, 230.51985414169164], 
"eval_len": [55, 104, 84, 64, 116, 70, 62, 73, 45, 44]}

 43%|████▎     | 429999/1000000 [5:33:50<5:42:12, 27.76it/s]global step 430000, trans_decision ep_re 474.50182364949444

{"global_step": 430000, "eval_re": [338.206248410479, 606.924474642956, 
462.98077674592474, 440.07400395243434, 535.6399938286904, 469.2870941793371, 
436.1266997986779, 428.98035280198417, 596.3966606352628, 430.4019314991975], 
"eval_len": [64, 108, 83, 77, 96, 85, 76, 76, 109, 79]}

 44%|████▍     | 439997/1000000 [5:41:41<5:35:07, 27.85it/s]global step 440000, trans_decision ep_re 443.10098498435434

{"global_step": 440000, "eval_re": [261.3923562749301, 206.4151439985955, 
468.7477497439174, 633.1335443432879, 561.3688467392382, 271.73871485245957, 
308.33304901797163, 611.2822604600999, 380.3253246418491, 728.272859771194], 
"eval_len": [49, 41, 84, 119, 106, 51, 56, 115, 68, 125]}

 45%|████▍     | 449998/1000000 [5:49:31<5:24:52, 28.22it/s]global step 450000, trans_decision ep_re 473.395764745144

{"global_step": 450000, "eval_re": [441.86926517916453, 510.98723183709495, 
434.7461393974721, 269.3025167121003, 494.0452580127452, 440.19732670909093, 
846.6079487326334, 482.72162856742574, 567.0282300618123, 246.45210224190026], 
"eval_len": [77, 86, 75, 50, 83, 75, 143, 81, 99, 47]}

 46%|████▌     | 459997/1000000 [5:57:30<5:16:53, 28.40it/s]global step 460000, trans_decision ep_re 455.94956950068945

{"global_step": 460000, "eval_re": [331.60452829340466, 555.2374760244028, 
598.9950877481668, 960.0884272508059, 246.03801287375032, 202.04919372795104, 
428.5570073243188, 644.7521990031256, 203.11341149611687, 389.06035126485205], 
"eval_len": [61, 102, 101, 168, 47, 40, 76, 124, 40, 69]}

 47%|████▋     | 469997/1000000 [6:05:20<5:18:34, 27.73it/s]global step 470000, trans_decision ep_re 571.2248885103835

{"global_step": 470000, "eval_re": [606.5407549858994, 487.1091657199934, 
565.5854432721633, 533.307900456966, 152.28215628635922, 967.2445927453176, 
708.6323077176592, 366.5980260697838, 717.8424680786532, 607.10606977104], 
"eval_len": [109, 85, 109, 87, 29, 179, 126, 66, 123, 113]}

 48%|████▊     | 479999/1000000 [6:13:10<5:07:11, 28.21it/s]global step 480000, trans_decision ep_re 531.8183838481065

{"global_step": 480000, "eval_re": [410.2120449699207, 481.67445060668405, 
464.33412315186905, 640.2932744783634, 618.3750205946551, 185.50321490547108, 
1082.3349552046452, 627.0875759113223, 162.87549949266048, 645.4936791654727], 
"eval_len": [73, 91, 81, 121, 114, 35, 200, 108, 31, 123]}

 49%|████▉     | 489999/1000000 [6:21:00<5:09:15, 27.48it/s]global step 490000, trans_decision ep_re 532.23678192147

{"global_step": 490000, "eval_re": [504.01719119332387, 496.4293565949544, 
361.6312091616064, 554.403693831218, 541.352122392654, 555.3716442319921, 
725.7775192877255, 600.426036066981, 526.5026591145297, 456.45638733971424], 
"eval_len": [84, 86, 66, 96, 100, 100, 121, 108, 94, 79]}

 50%|████▉     | 499997/1000000 [6:28:50<4:50:07, 28.72it/s]global step 500000, trans_decision ep_re 476.3591516362777

{"global_step": 500000, "eval_re": [299.10903551275226, 204.79130425525483, 
744.7561699750413, 437.4804072768791, 336.5207041706381, 477.05933750533825, 
916.1726823519367, 497.52773097842345, 302.06094060117726, 548.1132037353352], 
"eval_len": [54, 40, 143, 77, 62, 83, 162, 84, 55, 98]}

 51%|█████     | 509997/1000000 [6:36:40<4:53:44, 27.80it/s]global step 510000, trans_decision ep_re 595.9790902038005

{"global_step": 510000, "eval_re": [241.4514569547505, 382.90178254543184, 
681.4881694893159, 762.3800444206297, 466.88711550437546, 734.1814380349273, 
959.3365478837244, 700.3114696739806, 567.0691103758085, 463.78376715506243], 
"eval_len": [46, 69, 117, 143, 80, 128, 171, 121, 97, 81]}

 52%|█████▏    | 519999/1000000 [6:44:14<4:49:23, 27.64it/s]global step 520000, trans_decision ep_re 378.35240974024725

{"global_step": 520000, "eval_re": [244.90591000876003, 276.2442854140843, 
559.6175716909693, 545.2744566887088, 302.3367953010133, 189.48418539362882, 
640.3137577970899, 249.1174667567521, 416.642044193638, 359.5876241578277], 
"eval_len": [47, 52, 94, 100, 56, 38, 117, 48, 75, 66]}

 53%|█████▎    | 529999/1000000 [6:52:02<4:41:37, 27.81it/s]global step 530000, trans_decision ep_re 374.5855854701849

{"global_step": 530000, "eval_re": [211.64536491012242, 277.01293991212225, 
568.8204358059088, 158.94327933109324, 749.4450770357627, 476.65652861783286, 
261.14343666063223, 396.270048653826, 231.7842007289695, 414.13454304557916], 
"eval_len": [41, 52, 99, 30, 130, 81, 49, 72, 45, 75]}

 54%|█████▍    | 539999/1000000 [6:59:51<4:29:46, 28.42it/s]global step 540000, trans_decision ep_re 629.90779127921

{"global_step": 540000, "eval_re": [731.269592407674, 456.1434776041002, 
480.61866717847124, 530.8046620521922, 814.060225007095, 600.9108033730541, 
740.719644394213, 844.3874469992892, 603.2513376395582, 496.91205613645263], 
"eval_len": [120, 83, 85, 92, 142, 104, 130, 142, 106, 96]}

 55%|█████▍    | 549999/1000000 [7:07:50<4:23:40, 28.44it/s]global step 550000, trans_decision ep_re 464.29365072614564

{"global_step": 550000, "eval_re": [563.8665831821004, 416.7407995629477, 
531.8281289406505, 416.0384347672122, 522.8221799325678, 495.5978514965604, 
202.92943654563086, 292.50470436168183, 809.3377709153657, 391.2706175567388], 
"eval_len": [98, 72, 90, 74, 88, 84, 40, 53, 143, 67]}

 56%|█████▌    | 559999/1000000 [7:15:40<4:21:22, 28.06it/s]global step 560000, trans_decision ep_re 496.2125260171876

{"global_step": 560000, "eval_re": [601.1396115718925, 244.79361873665, 
287.23199251319494, 723.9867767358562, 260.54596121033427, 246.4991053870401, 
711.9688153306616, 859.5709283859029, 263.81293162497644, 762.5755186753678], 
"eval_len": [101, 47, 53, 134, 49, 47, 117, 160, 50, 138]}

 57%|█████▋    | 569999/1000000 [7:23:30<4:22:07, 27.34it/s]global step 570000, trans_decision ep_re 584.3555069990161

{"global_step": 570000, "eval_re": [245.5575178530017, 832.6408481421223, 
239.16536087831412, 214.94080724425513, 174.7536662745556, 567.2154767095024, 
1055.9739720975597, 1531.0569878445424, 536.6903548906888, 445.56007805562], 
"eval_len": [46, 157, 46, 42, 33, 102, 189, 281, 94, 79]}

 58%|█████▊    | 579998/1000000 [7:31:10<4:07:43, 28.26it/s]global step 580000, trans_decision ep_re 607.0237499110006

{"global_step": 580000, "eval_re": [247.4281245381362, 245.96844815947946, 
892.1165091302869, 1186.2398329805976, 440.9504998238695, 566.041415302529, 
727.4117867378769, 530.5236118425249, 641.1879374473801, 592.3693331473247], 
"eval_len": [47, 47, 159, 220, 77, 101, 132, 92, 112, 101]}

 59%|█████▉    | 589999/1000000 [7:39:01<4:04:01, 28.00it/s]global step 590000, trans_decision ep_re 550.843695395555

{"global_step": 590000, "eval_re": [724.0826389720164, 392.71915493980055, 
329.53960857610593, 443.9794255852063, 778.630972047387, 699.2046082323495, 
527.1875657195882, 884.3298109231357, 357.34724893176985, 371.41592002818993], 
"eval_len": [131, 69, 60, 79, 135, 129, 88, 154, 64, 65]}

 60%|█████▉    | 599997/1000000 [7:46:52<3:59:44, 27.81it/s]global step 600000, trans_decision ep_re 615.4888814900728

{"global_step": 600000, "eval_re": [628.5392224836659, 884.5596712398788, 
279.5084796779901, 429.2024501895583, 277.7254656843543, 1431.5424518954592, 
774.8761317598222, 250.41943547745421, 424.5601432802422, 773.9553632123024], 
"eval_len": [109, 154, 51, 73, 51, 261, 126, 47, 72, 127]}

 61%|██████    | 609999/1000000 [7:54:43<3:55:26, 27.61it/s]global step 610000, trans_decision ep_re 623.0496067304988

{"global_step": 610000, "eval_re": [607.274113767462, 244.10546243410022, 
366.14865638107364, 628.2184372723968, 1404.3577592644972, 769.3152838265031, 
695.4495257050863, 230.4036952811343, 703.6517011229653, 581.5714322497693], 
"eval_len": [105, 46, 65, 124, 232, 130, 116, 44, 121, 104]}

 62%|██████▏   | 619999/1000000 [8:02:34<3:47:23, 27.85it/s]global step 620000, trans_decision ep_re 537.133345316079

{"global_step": 620000, "eval_re": [242.76554657493656, 487.42023721841787, 
396.3374291343905, 649.5514525477297, 542.0070292820218, 344.64705644185915, 
504.5203921363001, 266.8079426949379, 1092.3338752998407, 844.9424918303564], 
"eval_len": [46, 94, 71, 126, 92, 61, 91, 50, 197, 151]}

 63%|██████▎   | 629997/1000000 [8:10:40<3:42:06, 27.76it/s]global step 630000, trans_decision ep_re 544.6211294500204

{"global_step": 630000, "eval_re": [791.8736070814687, 582.3897601434213, 
396.00880713288757, 354.77619165139396, 872.0155115861653, 730.7266201914351, 
611.0736935310313, 147.37198011743345, 408.9199849439689, 551.0551381209976], 
"eval_len": [148, 115, 69, 62, 148, 129, 119, 28, 70, 99]}

 64%|██████▍   | 639999/1000000 [8:18:30<3:29:18, 28.67it/s]global step 640000, trans_decision ep_re 694.2548958629199

{"global_step": 640000, "eval_re": [1123.452993101071, 119.63858453851402, 
575.3927899105714, 488.28083384243456, 261.05718044391887, 706.1734875348175, 
363.0073457344589, 961.7382140510019, 1156.3157364227661, 1187.4917930496442], 
"eval_len": [201, 23, 99, 89, 48, 136, 65, 171, 205, 203]}

 65%|██████▍   | 649997/1000000 [8:26:20<3:31:51, 27.54it/s]global step 650000, trans_decision ep_re 596.0881769483697

{"global_step": 650000, "eval_re": [460.0192292405432, 489.37605996778615, 
455.2474035399984, 749.0279865912164, 539.5740527299853, 996.6682481486279, 
265.3239878943391, 801.6783206224927, 401.2960728556822, 802.6704078930247], 
"eval_len": [77, 80, 76, 125, 92, 173, 49, 144, 68, 145]}

 66%|██████▌   | 659998/1000000 [8:34:01<3:21:09, 28.17it/s]global step 660000, trans_decision ep_re 557.9497915919146

{"global_step": 660000, "eval_re": [931.8804335548998, 564.3228919226966, 
175.10050442717676, 498.7656054760032, 542.47795832136, 701.2920538671738, 
718.910717987847, 439.4419441265075, 865.3941321268762, 141.9116741086057], 
"eval_len": [157, 91, 33, 86, 95, 122, 130, 79, 161, 27]}

 67%|██████▋   | 669999/1000000 [8:41:52<3:18:40, 27.68it/s]global step 670000, trans_decision ep_re 588.2229259674641

{"global_step": 670000, "eval_re": [1225.7608795841968, 301.1744963232637, 
334.12583832743604, 798.2275510436682, 483.5031016148836, 468.1974343538607, 
517.8999581037847, 326.22242815284613, 602.2040607580906, 824.913511412611], 
"eval_len": [218, 56, 60, 143, 84, 84, 86, 60, 111, 163]}

 68%|██████▊   | 679998/1000000 [8:50:00<3:10:53, 27.94it/s]global step 680000, trans_decision ep_re 774.5782063834557

{"global_step": 680000, "eval_re": [710.1776879059615, 582.3123576311037, 
422.5041181815004, 2082.519680945562, 849.9943273019214, 451.04085958800016, 
997.8700897890219, 735.3561947192302, 793.9318369703644, 120.07491080189156], 
"eval_len": [122, 97, 73, 368, 152, 75, 171, 121, 135, 23]}

 69%|██████▉   | 689999/1000000 [8:57:50<3:03:08, 28.21it/s]global step 690000, trans_decision ep_re 614.6890993403815

{"global_step": 690000, "eval_re": [415.5026474260079, 1428.7459940806432, 
755.093321431014, 730.3371719894561, 742.832876156683, 576.7183396984377, 
335.5259498007034, 283.09748257681633, 573.403563684562, 305.6336465594915], 
"eval_len": [73, 267, 128, 136, 121, 96, 60, 53, 113, 58]}

 70%|██████▉   | 699997/1000000 [9:05:31<2:55:49, 28.44it/s]global step 700000, trans_decision ep_re 718.3630975242429

{"global_step": 700000, "eval_re": [1226.6649300130161, 429.00605274123103, 
694.7391523980798, 787.0744324867909, 848.1887038457476, 479.3242278311635, 
637.2113439255794, 1134.84498225871, 377.86539322063174, 568.7117565214784], 
"eval_len": [240, 73, 128, 135, 159, 82, 112, 194, 66, 100]}

 71%|███████   | 709999/1000000 [9:13:40<2:53:29, 27.86it/s]global step 710000, trans_decision ep_re 636.953406025533

{"global_step": 710000, "eval_re": [509.234607011131, 324.28339742928733, 
993.0932663449937, 896.7313174764432, 255.65639040494867, 663.8191300187639, 
1063.7257422560735, 374.4022718862805, 798.719509663854, 489.86842776355456], 
"eval_len": [87, 61, 184, 152, 48, 121, 209, 65, 140, 90]}

 72%|███████▏  | 719997/1000000 [9:21:30<2:48:01, 27.77it/s]global step 720000, trans_decision ep_re 648.8373814296582

{"global_step": 720000, "eval_re": [831.853974432553, 460.6149190206292, 
487.589431597981, 806.6190788977, 518.3137012944809, 652.2319481778306, 
498.9441337386668, 694.9083764317526, 1389.5297279799984, 147.76852272499028], 
"eval_len": [154, 87, 87, 157, 98, 113, 95, 121, 261, 28]}

 73%|███████▎  | 729997/1000000 [9:29:13<2:42:45, 27.65it/s]global step 730000, trans_decision ep_re 729.0573483834036

{"global_step": 730000, "eval_re": [230.61184360207758, 789.3987834906319, 
1189.5288178982273, 674.7252383538089, 918.2650774712315, 873.3839425324124, 
509.67524401829974, 667.301520553069, 860.5240694501166, 577.158946464161], 
"eval_len": [44, 137, 196, 111, 157, 152, 87, 113, 150, 106]}

 74%|███████▍  | 739999/1000000 [9:37:20<2:34:15, 28.09it/s]global step 740000, trans_decision ep_re 665.8306158619141

{"global_step": 740000, "eval_re": [489.64861481382343, 435.33244610314125, 
526.2592819877566, 464.07834078841387, 893.4757149271987, 681.8201608678833, 
1273.6102638273235, 759.3239746646356, 731.2196543514424, 403.53770628752267], 
"eval_len": [88, 76, 97, 84, 156, 117, 224, 129, 127, 78]}

 75%|███████▍  | 749998/1000000 [9:45:00<2:30:18, 27.72it/s]global step 750000, trans_decision ep_re 529.2743694254661

{"global_step": 750000, "eval_re": [794.7919499180612, 411.75239583070135, 
424.8419046404234, 250.5196523543877, 639.0938030823869, 394.024441282295, 
569.5580997265835, 631.1532121057115, 531.1980199598561, 645.8102153542562], 
"eval_len": [141, 81, 76, 48, 120, 70, 110, 117, 96, 120]}

 76%|███████▌  | 759998/1000000 [9:52:53<2:19:32, 28.66it/s]global step 760000, trans_decision ep_re 621.8521157950893

{"global_step": 760000, "eval_re": [433.4885063868732, 634.7362196711853, 
738.1587560846237, 849.2101968331275, 291.58944697960305, 776.4304238241599, 
805.4655536064906, 565.4926418652043, 883.4681001486505, 240.48131255097454], 
"eval_len": [83, 113, 135, 153, 54, 138, 156, 93, 151, 46]}

 77%|███████▋  | 769997/1000000 [10:01:00<2:15:27, 28.30it/s]global step 770000, trans_decision ep_re 675.5348126727351

{"global_step": 770000, "eval_re": [594.6355000215754, 983.3964595125184, 
256.32421628771107, 886.7410936935162, 472.9143562322133, 759.6626965878727, 
776.6046653355503, 739.6443650079772, 642.1196249495453, 643.305149098871], 
"eval_len": [102, 163, 48, 164, 85, 131, 145, 131, 110, 114]}

 78%|███████▊  | 779997/1000000 [10:08:50<2:11:46, 27.82it/s]global step 780000, trans_decision ep_re 619.6176942972795

{"global_step": 780000, "eval_re": [716.6982800157163, 628.6279340067887, 
643.4995607143309, 405.8462189369394, 519.3053810477247, 442.07236819995444, 
913.2539215081624, 884.4320598311223, 649.6111052846672, 392.8301134273889], 
"eval_len": [131, 118, 126, 74, 93, 81, 168, 155, 116, 71]}

 79%|███████▉  | 789999/1000000 [10:16:33<2:08:59, 27.13it/s]global step 790000, trans_decision ep_re 668.5570309327751

{"global_step": 790000, "eval_re": [661.9038662457115, 1115.148522813672, 
602.7506544409191, 784.7545314073496, 554.5190753566878, 622.2039541781379, 
1122.312347161393, 533.3371360372205, 180.2849404610879, 508.3552812255713], 
"eval_len": [112, 208, 108, 133, 101, 113, 202, 101, 34, 94]}

 80%|███████▉  | 799997/1000000 [10:24:40<1:58:55, 28.03it/s]global step 800000, trans_decision ep_re 909.1061393099513

{"global_step": 800000, "eval_re": [908.6858134907775, 1363.197839546265, 
719.6579615066966, 957.1171593344537, 406.48937600957873, 571.2086971466348, 
1014.7821623176401, 581.1869021342103, 1947.1655180960618, 621.5699635171954], 
"eval_len": [178, 244, 126, 173, 75, 99, 175, 106, 368, 108]}

 81%|████████  | 809998/1000000 [10:32:30<1:53:01, 28.02it/s]global step 810000, trans_decision ep_re 653.2021464197107

{"global_step": 810000, "eval_re": [619.7938744581099, 517.0684741773786, 
644.2653170437246, 809.6918741458436, 796.5667431696938, 559.5943559910845, 
365.2574455297589, 671.9518751758137, 1060.0128387333227, 487.8186657723767], 
"eval_len": [111, 90, 112, 143, 142, 98, 65, 120, 180, 87]}

 82%|████████▏ | 819998/1000000 [10:40:12<1:45:54, 28.33it/s]global step 820000, trans_decision ep_re 626.6949063580569

{"global_step": 820000, "eval_re": [444.9432243356229, 343.8941120400867, 
608.2474124436206, 785.5427074151859, 246.22049920682213, 462.38447298008913, 
1000.0709654555308, 699.6665612885957, 893.1940768024018, 782.7850316126131], 
"eval_len": [77, 61, 117, 144, 47, 82, 187, 136, 158, 148]}

 83%|████████▎ | 829998/1000000 [10:48:20<1:41:18, 27.97it/s]global step 830000, trans_decision ep_re 728.3954060987659

{"global_step": 830000, "eval_re": [907.0605761158789, 1516.6066567295695, 
946.7004470409964, 658.9470000964066, 247.4375723871571, 1497.1723864357268, 
300.22047222602146, 405.6197442839574, 439.52203790748666, 364.66716776445827], 
"eval_len": [159, 274, 161, 108, 47, 275, 54, 72, 76, 64]}

 84%|████████▍ | 839998/1000000 [10:56:10<1:34:08, 28.33it/s]global step 840000, trans_decision ep_re 761.1720192413968

{"global_step": 840000, "eval_re": [493.95827399801533, 685.2412423051218, 
806.0276743372209, 203.13413632930758, 1287.0327085070728, 429.4807488602363, 
667.2551450510342, 202.47204103283033, 1868.1124816166036, 969.0057403765252], 
"eval_len": [93, 135, 160, 38, 233, 78, 123, 38, 345, 181]}

 85%|████████▍ | 849999/1000000 [11:03:53<1:28:15, 28.32it/s]global step 850000, trans_decision ep_re 597.4383412623849

{"global_step": 850000, "eval_re": [159.20054734122056, 843.9353373507988, 
563.2103271343342, 478.7981505251614, 400.3572685292972, 669.0802340404831, 
569.9814261791806, 1005.445446291934, 764.7767585792673, 519.5979166521726], 
"eval_len": [30, 152, 109, 82, 70, 115, 99, 185, 153, 96]}

 86%|████████▌ | 859997/1000000 [11:12:00<1:22:01, 28.44it/s]global step 860000, trans_decision ep_re 943.4483224733483

{"global_step": 860000, "eval_re": [749.7846550685479, 1050.1664856752159, 
990.6291300067348, 866.441076223494, 837.8268784646842, 1004.584563168761, 
821.3482323031149, 1263.5004748591386, 1137.7346086685793, 712.4671202952123], 
"eval_len": [127, 213, 171, 158, 170, 192, 144, 215, 220, 120]}

 87%|████████▋ | 869999/1000000 [11:19:43<1:18:19, 27.66it/s]global step 870000, trans_decision ep_re 673.1094933545658

{"global_step": 870000, "eval_re": [650.2489381693268, 752.0690799373756, 
652.5230311606714, 1092.2140056973049, 738.6386815620094, 1036.316116478916, 
629.5643674275912, 175.2090350561614, 608.5376295250644, 395.77404853123727], 
"eval_len": [113, 138, 112, 193, 129, 176, 107, 33, 98, 68]}

 88%|████████▊ | 879998/1000000 [11:27:50<1:09:49, 28.64it/s]global step 880000, trans_decision ep_re 853.0006061436443

{"global_step": 880000, "eval_re": [180.0761666274862, 1026.2932587635619, 
519.82773049551, 1052.4721799077618, 1084.2188256817403, 793.7757104168066, 
299.7836555132669, 564.4256159458639, 2228.2875634038596, 780.8453546805872], 
"eval_len": [34, 177, 89, 181, 196, 133, 55, 100, 416, 152]}

 89%|████████▉ | 889999/1000000 [11:35:40<1:06:09, 27.71it/s]global step 890000, trans_decision ep_re 602.4188520620303

{"global_step": 890000, "eval_re": [162.87958611346187, 514.2752264768936, 
671.9075700052159, 280.97816638300606, 770.2298266861422, 454.2430586498054, 
625.2566282926964, 501.55697006806145, 954.125639678757, 1088.7358482662623], 
"eval_len": [31, 89, 118, 52, 143, 78, 116, 84, 169, 191]}

 90%|████████▉ | 899998/1000000 [11:43:24<59:04, 28.21it/s]  global step 900000, trans_decision ep_re 740.3038750561427

{"global_step": 900000, "eval_re": [773.26223504359, 1585.8509277473404, 
673.3232817973594, 682.0135150963323, 663.0024681726923, 387.0892894283043, 
865.0247645384944, 153.08851920506737, 844.8610789449441, 775.5226705873021], 
"eval_len": [122, 253, 119, 128, 129, 68, 142, 29, 136, 136]}

 91%|█████████ | 909999/1000000 [11:51:30<53:28, 28.05it/s]global step 910000, trans_decision ep_re 803.8409386420396

{"global_step": 910000, "eval_re": [1384.9572311900654, 535.9079512501984, 
830.9070529295901, 650.7317795383848, 584.9058552247651, 576.4854214490515, 
549.20674984174, 379.0743801295732, 1735.1327331221212, 811.1002317449066], 
"eval_len": [228, 99, 144, 103, 98, 98, 94, 69, 314, 143]}

 92%|█████████▏| 919998/1000000 [11:59:20<47:16, 28.21it/s]global step 920000, trans_decision ep_re 739.737920540096

{"global_step": 920000, "eval_re": [526.7302000712015, 958.0057831190679, 
616.1421579314726, 496.77873926042435, 912.3605985169996, 754.1304267294199, 
1066.3453616767401, 1157.2582099141177, 394.6598665981465, 514.9678615833692], 
"eval_len": [92, 170, 105, 86, 156, 132, 180, 203, 69, 86]}

 93%|█████████▎| 929999/1000000 [12:07:03<41:18, 28.24it/s]global step 930000, trans_decision ep_re 663.2740829375314

{"global_step": 930000, "eval_re": [707.3962646895568, 522.8535245037489, 
563.8716934146729, 532.381291919548, 463.77338409581455, 387.61368051091193, 
1109.1124358499706, 1046.768049731645, 645.6353748186707, 653.3351298407737], 
"eval_len": [120, 85, 104, 91, 77, 69, 184, 176, 112, 111]}

 94%|█████████▍| 939999/1000000 [12:15:10<36:38, 27.29it/s]global step 940000, trans_decision ep_re 656.5975301997685

{"global_step": 940000, "eval_re": [638.0181437862668, 1110.12667260493, 
312.770569795388, 883.2312064844775, 469.04372235416497, 570.5322834331807, 
519.3086005784213, 905.6733484700558, 467.4169235493042, 689.8538309414955], 
"eval_len": [112, 214, 57, 155, 84, 96, 88, 155, 87, 138]}

 95%|█████████▍| 949999/1000000 [12:22:50<30:17, 27.51it/s]global step 950000, trans_decision ep_re 600.7540906753787

{"global_step": 950000, "eval_re": [404.0881007086418, 359.261123349901, 
645.2703318157601, 521.7230524556192, 770.3133667564067, 1103.1413200220165, 
609.7391269609748, 646.3611629030845, 500.8988844477323, 446.7444373336499], 
"eval_len": [70, 65, 119, 101, 144, 211, 114, 118, 89, 79]}

 96%|█████████▌| 959997/1000000 [12:30:41<23:38, 28.19it/s]global step 960000, trans_decision ep_re 630.0816830108197

{"global_step": 960000, "eval_re": [729.3725053973831, 375.1109433237015, 
813.6228918312643, 390.4532945875436, 841.7767728083645, 945.6283262931039, 
793.4775752327391, 394.0668055994304, 347.45209790399645, 669.8556171306697], 
"eval_len": [127, 66, 135, 68, 141, 179, 131, 69, 62, 130]}

 97%|█████████▋| 969999/1000000 [12:38:32<18:06, 27.60it/s]global step 970000, trans_decision ep_re 798.8871615901744

{"global_step": 970000, "eval_re": [1041.970405838239, 1099.2333360960943, 
501.94224280390546, 628.0607578490611, 1105.5433305334993, 750.7509287162547, 
766.7068115898902, 695.5681281215153, 562.9775034277678, 836.1181709255159], 
"eval_len": [184, 201, 90, 122, 185, 133, 144, 116, 99, 147]}

 98%|█████████▊| 979998/1000000 [12:46:40<12:14, 27.23it/s]global step 980000, trans_decision ep_re 841.2484555644603

{"global_step": 980000, "eval_re": [485.46025168139425, 666.878204455901, 
576.334719776999, 623.0474927028721, 741.1012359111141, 1062.4411871176387, 
456.82631074456225, 1775.66257496203, 795.393788300278, 1229.338789991814], 
"eval_len": [92, 131, 98, 110, 129, 209, 84, 320, 157, 220]}

 99%|█████████▉| 989997/1000000 [12:54:23<05:54, 28.23it/s]global step 990000, trans_decision ep_re 589.2556417497696

{"global_step": 990000, "eval_re": [459.32250241524906, 807.8231051643257, 
655.9103309585903, 653.0728087039696, 568.4123663922355, 457.80775846579286, 
660.5339833022306, 746.2787540900358, 574.3127607976008, 309.0820472076651], 
"eval_len": [78, 150, 136, 115, 101, 83, 124, 137, 108, 56]}

100%|█████████▉| 999998/1000000 [13:02:30<00:00, 28.58it/s]global step 1000000, trans_decision ep_re 856.964777446934

{"global_step": 1000000, "eval_re": [754.8312315782337, 926.3823749806692, 
869.6350116596731, 1331.1502527162397, 452.7750908631018, 1024.9792679701156, 
672.4255316679573, 689.2150062932322, 1215.7997849764517, 632.4542217636653], 
"eval_len": [144, 173, 152, 264, 85, 177, 134, 120, 207, 115]}

100%|██████████| 1000000/1000000 [13:02:33<00:00, 21.30it/s]
