
{
    'exp_name': 'VDPO',
    'env': 'Hopper-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)',
    'noise': 0.05
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9999/1000000 [03:08<7:53:54, 34.82it/s]global step 10000, trans_decision ep_re 176.1768240348025

{"global_step": 10000, "eval_re": [211.4840971412546, 215.12827524806056, 
159.096188136335, 168.6540810474103, 169.8424950929124, 147.4605839412075, 
233.21408627795896, 141.9983823764363, 95.43107688409357, 219.45897420235593], 
"eval_len": [105, 110, 91, 92, 96, 85, 126, 88, 71, 114]}

  2%|▏         | 19998/1000000 [09:22<7:50:39, 34.70it/s]global step 20000, trans_decision ep_re 46.88733522388238

{"global_step": 20000, "eval_re": [46.39445625684027, 46.27735910856023, 
47.88802020315781, 53.2001835079753, 52.25447172199471, 44.073754773294375, 
46.365282299225434, 37.549025237740814, 49.00480973744417, 45.865989392590734], 
"eval_len": [40, 40, 41, 44, 42, 40, 39, 38, 41, 40]}

  3%|▎         | 29998/1000000 [15:36<7:47:31, 34.58it/s]global step 30000, trans_decision ep_re 142.97123681020216

{"global_step": 30000, "eval_re": [349.4323946386582, 88.7106612824354, 
95.38701072660892, 89.72200021720988, 318.5043440638231, 95.29437549784821, 
91.77074750037211, 97.95063771715394, 91.06351652896149, 111.87667992895058], 
"eval_len": [186, 63, 63, 60, 186, 65, 65, 67, 65, 65]}

  4%|▍         | 39997/1000000 [21:51<7:40:23, 34.75it/s]global step 40000, trans_decision ep_re 259.36089717136673

{"global_step": 40000, "eval_re": [258.9024954075648, 231.32637513347507, 
92.70920745391199, 354.0747346053259, 217.2966148400622, 421.85653628430543, 
175.79631682053179, 156.14537420052403, 118.70300610238493, 566.7983108655808], 
"eval_len": [184, 175, 69, 258, 161, 252, 135, 116, 82, 374]}

  5%|▍         | 49996/1000000 [28:20<7:37:22, 34.62it/s]global step 50000, trans_decision ep_re 293.3778134223885

{"global_step": 50000, "eval_re": [346.96229258168336, 341.40719435401917, 
116.56722414968021, 337.46073918501236, 343.4268183804929, 340.15041023790235, 
340.9804952224108, 79.31355415716324, 343.6840079817016, 343.8253979738193], 
"eval_len": [153, 153, 74, 152, 148, 151, 148, 54, 150, 151]}

  6%|▌         | 59999/1000000 [34:25<7:31:29, 34.70it/s]global step 60000, trans_decision ep_re 261.32935531761643

{"global_step": 60000, "eval_re": [169.20377808986774, 101.630678793525, 
301.52597144709745, 223.43134170377547, 412.6408397785818, 264.1443102488173, 
406.9186875032796, 239.55700597022476, 103.23023503670689, 391.01070460428843], 
"eval_len": [91, 68, 140, 115, 163, 129, 157, 120, 68, 160]}

  7%|▋         | 69998/1000000 [40:42<7:26:49, 34.69it/s]global step 70000, trans_decision ep_re 205.47275250339072

{"global_step": 70000, "eval_re": [287.91096797524415, 228.4840184684803, 
23.434923159100464, 219.57618414272966, 296.82303948331685, 277.96359500653284, 
292.7861988392787, 247.2084283617745, 28.923403720201083, 151.61676587724887], 
"eval_len": [161, 128, 25, 115, 162, 137, 147, 137, 30, 90]}

  8%|▊         | 79997/1000000 [47:10<7:24:20, 34.51it/s]global step 80000, trans_decision ep_re 426.4188692590801

{"global_step": 80000, "eval_re": [451.7369408959298, 566.6994573470896, 
612.1391040280737, 351.0989991174813, 285.4861202000377, 610.703346332456, 
588.937904963371, 351.17216100896735, 127.13935411178647, 319.0753045856086], 
"eval_len": [174, 204, 218, 147, 135, 217, 211, 150, 84, 145]}

  9%|▉         | 89999/1000000 [53:17<7:18:56, 34.55it/s]global step 90000, trans_decision ep_re 221.6902065219943

{"global_step": 90000, "eval_re": [112.87477899135757, 196.65815231592353, 
37.37869809651848, 689.4049145059407, 45.35347396157109, 48.081444808997475, 
200.89249279111613, 668.2493876450234, 169.81282673097095, 48.195895372523346], 
"eval_len": [70, 102, 34, 265, 40, 44, 102, 260, 91, 42]}

 10%|▉         | 99998/1000000 [59:35<7:12:47, 34.66it/s]global step 100000, trans_decision ep_re 439.2554460980838

{"global_step": 100000, "eval_re": [305.3030920053592, 414.6172362084923, 
294.20333773284193, 303.6894462016354, 332.9048082488383, 654.3982925288902, 
194.02942926776586, 521.7059093429715, 191.79196505908303, 1179.91094438496], 
"eval_len": [143, 184, 140, 141, 155, 249, 109, 205, 99, 426]}

 11%|█         | 109997/1000000 [1:06:10<7:07:01, 34.74it/s]global step 110000, trans_decision ep_re 1181.9796348539833

{"global_step": 110000, "eval_re": [1522.5722062495488, 166.59428821628399, 
1781.675062230657, 1400.3312999958762, 743.8244465774684, 40.103971796043545, 
1870.3062748457335, 1477.1246869215556, 1482.881698805256, 1334.3824129014092], 
"eval_len": [514, 90, 583, 470, 282, 37, 620, 485, 491, 434]}

 12%|█▏        | 119999/1000000 [1:12:30<7:04:18, 34.57it/s]global step 120000, trans_decision ep_re 782.7739407963033

{"global_step": 120000, "eval_re": [758.0192198696234, 670.964793703221, 
840.6017044902696, 620.9832132975182, 933.3938449468044, 777.2173787557474, 
988.1466406961769, 503.07120794537434, 870.5776266449516, 864.7637776133461], 
"eval_len": [278, 260, 312, 249, 340, 300, 405, 203, 339, 311]}

 13%|█▎        | 129999/1000000 [1:18:50<7:03:12, 34.26it/s]global step 130000, trans_decision ep_re 761.9568815318746

{"global_step": 130000, "eval_re": [1186.310674772276, 502.2638176368329, 
1007.507019960092, 509.29163985682624, 1048.185814584903, 293.54617336382285, 
418.42913963214636, 832.6003267041591, 809.7186996194968, 1011.7155091881907], 
"eval_len": [386, 185, 314, 188, 338, 133, 173, 307, 299, 343]}

 14%|█▍        | 139999/1000000 [1:25:02<6:53:22, 34.67it/s]global step 140000, trans_decision ep_re 487.882537185733

{"global_step": 140000, "eval_re": [805.5755313342925, 65.55115331374047, 
396.3260165829225, 63.05874072600418, 69.45608347953868, 722.3153922483812, 
740.7469177157117, 434.8394972515747, 812.9351898072434, 768.0208493979211], 
"eval_len": [250, 49, 164, 47, 49, 243, 242, 173, 272, 257]}

 15%|█▍        | 149998/1000000 [1:31:21<6:50:15, 34.53it/s]global step 150000, trans_decision ep_re 398.56173301476696

{"global_step": 150000, "eval_re": [410.59647741623655, 381.8614366545183, 
393.7524501034004, 379.7320968674412, 194.7447080794421, 825.6778179691993, 
131.96439099445433, 694.3182101761702, 390.4306965348711, 182.53904535193635], 
"eval_len": [167, 156, 170, 155, 100, 266, 74, 255, 162, 99]}

 16%|█▌        | 159997/1000000 [1:37:41<6:42:09, 34.81it/s]global step 160000, trans_decision ep_re 474.61962091194835

{"global_step": 160000, "eval_re": [972.3208072617813, 630.4583362048509, 
359.07463786150174, 299.88786693234954, 820.5955183418104, 408.30896803137784, 
202.63745161965514, 715.5533376782714, 148.8743275897829, 188.48495759810274], 
"eval_len": [326, 234, 153, 137, 265, 166, 106, 244, 79, 94]}

 17%|█▋        | 169996/1000000 [1:44:01<6:40:34, 34.53it/s]global step 170000, trans_decision ep_re 509.90136554392654

{"global_step": 170000, "eval_re": [435.91999934750663, 370.2854889310829, 
131.45905911269145, 431.617956960579, 125.09133925709065, 961.0340668742232, 
274.37377714571585, 512.163053871906, 886.7239447253486, 970.344969213122], 
"eval_len": [175, 161, 84, 178, 86, 299, 135, 190, 283, 319]}

 18%|█▊        | 179999/1000000 [1:50:20<6:35:40, 34.54it/s]global step 180000, trans_decision ep_re 333.0965506153499

{"global_step": 180000, "eval_re": [179.92468720666955, 72.00028069473574, 
68.27801827814547, 474.52560862763534, 76.88873000467058, 20.613813110735265, 
760.2398156514965, 447.33678684311053, 482.2998423980892, 748.8579233382104], 
"eval_len": [100, 49, 51, 189, 56, 24, 273, 190, 206, 278]}

 19%|█▉        | 189998/1000000 [1:56:50<6:29:46, 34.64it/s]global step 190000, trans_decision ep_re 397.88461824704655

{"global_step": 190000, "eval_re": [150.2677460428263, 168.89552498107264, 
1114.6068489898944, 296.31784733694667, 116.93902650441835, 290.59210824311475, 
302.3969013162696, 855.6854381750373, 386.248908046663, 296.8958328342224], 
"eval_len": [92, 91, 399, 139, 75, 139, 142, 312, 165, 142]}

 20%|█▉        | 199999/1000000 [2:03:10<6:23:45, 34.74it/s]global step 200000, trans_decision ep_re 377.56961214753125

{"global_step": 200000, "eval_re": [209.12232766510937, 948.840493657364, 
877.1912144897942, 283.2037190264134, 271.76264884789776, 287.9973465851117, 
120.73825481774253, 270.6251046936751, 263.80952359782225, 242.40548809438206], 
"eval_len": [112, 322, 330, 130, 131, 138, 74, 135, 127, 120]}

 21%|██        | 209999/1000000 [2:09:30<6:18:28, 34.79it/s]global step 210000, trans_decision ep_re 798.7976649865446

{"global_step": 210000, "eval_re": [1016.7016906389658, 962.3267257165082, 
530.1848814994011, 273.36995700987677, 1191.3944779213257, 1139.094961581097, 
1174.0099858681365, 655.8153633941188, 95.13628771966187, 949.942318516355], 
"eval_len": [321, 312, 211, 126, 406, 368, 367, 235, 63, 333]}

 22%|██▏       | 219999/1000000 [2:15:50<6:16:41, 34.51it/s]global step 220000, trans_decision ep_re 412.745973522742

{"global_step": 220000, "eval_re": [113.91327234931117, 105.89497170220871, 
366.93245653929915, 696.7016989667881, 538.7429817078786, 121.86383963012747, 
147.64113638257274, 116.44560800239968, 597.3001589979958, 1322.023610948839], 
"eval_len": [76, 69, 157, 254, 202, 78, 92, 77, 232, 465]}

 23%|██▎       | 229999/1000000 [2:22:10<6:10:33, 34.63it/s]global step 230000, trans_decision ep_re 724.3136938867328

{"global_step": 230000, "eval_re": [608.734276727196, 1043.0242583867775, 
872.4169428483497, 877.3381037376053, 842.1780504382399, 318.12529294942755, 
1127.448513112145, 646.5485307904927, 314.42702477184355, 592.8959451052513], 
"eval_len": [217, 325, 297, 293, 287, 137, 367, 230, 140, 217]}

 24%|██▍       | 239999/1000000 [2:28:21<6:06:58, 34.52it/s]global step 240000, trans_decision ep_re 396.134522307169

{"global_step": 240000, "eval_re": [110.48242133109186, 436.1506817447956, 
162.32696294767132, 207.45871845140726, 67.19338048422031, 654.85597750898, 
555.9855710454613, 1163.2913578771186, 165.35258347799981, 438.2475682029434], 
"eval_len": [76, 188, 88, 103, 45, 240, 223, 413, 88, 179]}

 25%|██▍       | 249998/1000000 [2:34:40<6:01:18, 34.60it/s]global step 250000, trans_decision ep_re 423.1745924200953

{"global_step": 250000, "eval_re": [58.202210326942236, 1005.3047843443671, 
176.98659996145489, 103.11007587841175, 427.3979419448522, 755.2182135027356, 
157.6207678765029, 590.6691931147718, 855.1422382366682, 102.09389901424542], 
"eval_len": [49, 328, 99, 71, 173, 273, 90, 222, 289, 73]}

 26%|██▌       | 259997/1000000 [2:41:00<5:56:54, 34.56it/s]global step 260000, trans_decision ep_re 249.75125049669447

{"global_step": 260000, "eval_re": [438.1705831301618, 143.46916360320458, 
296.6327843879477, 90.37196876002231, 562.509787871565, 103.14236113604711, 
364.8885930906336, 324.2717277769514, 69.488732786547, 104.56680242386405], 
"eval_len": [180, 94, 140, 66, 216, 69, 155, 143, 52, 69]}

 27%|██▋       | 269996/1000000 [2:47:30<5:52:14, 34.54it/s]global step 270000, trans_decision ep_re 454.986309536221

{"global_step": 270000, "eval_re": [552.8775944029987, 397.617689177248, 
669.7416633347686, 394.56938878201146, 409.07008488053737, 393.432911716998, 
445.7939204626594, 746.4836802505505, 66.3029347323506, 473.9732276220867], 
"eval_len": [227, 169, 257, 167, 169, 164, 191, 279, 44, 193]}

 28%|██▊       | 279999/1000000 [2:53:50<5:47:24, 34.54it/s]global step 280000, trans_decision ep_re 527.7867074079331

{"global_step": 280000, "eval_re": [1267.0517287971388, 488.06049052934424, 
272.8380789749378, 278.8626011068314, 285.7722916947313, 869.493574128018, 
137.22374488825884, 456.2622788339092, 1134.528020621415, 87.77426450474731], 
"eval_len": [410, 194, 131, 129, 129, 313, 84, 179, 391, 66]}

 29%|██▉       | 289999/1000000 [3:00:10<5:39:22, 34.87it/s]global step 290000, trans_decision ep_re 567.2303641853675

{"global_step": 290000, "eval_re": [177.248504768096, 314.06371366511314, 
1129.2249392970746, 411.78908209612797, 1074.21760068086, 66.77071571882458, 
901.4444854767397, 606.9024532962471, 919.7219139151066, 70.92023293948634], 
"eval_len": [92, 135, 364, 176, 331, 47, 283, 219, 290, 49]}

 30%|██▉       | 299999/1000000 [3:06:21<5:37:15, 34.59it/s]global step 300000, trans_decision ep_re 412.5539696152852

{"global_step": 300000, "eval_re": [624.7974273844618, 172.31328173542985, 
647.4959973213182, 439.4325499567178, 931.7794106426499, 160.5003676808249, 
349.4445861244111, 184.38298772572415, 437.9993763244515, 177.39371125686242], 
"eval_len": [235, 97, 235, 177, 299, 87, 156, 98, 178, 101]}

 31%|███       | 309998/1000000 [3:12:41<5:31:47, 34.66it/s]global step 310000, trans_decision ep_re 611.7691943493602

{"global_step": 310000, "eval_re": [1041.9474794127775, 1084.08452995645, 
65.36878928027384, 328.6480789755711, 65.09654935222629, 972.3925612294504, 
354.4807833766255, 241.2546525368488, 974.4820636698598, 989.936455703519], 
"eval_len": [324, 342, 45, 137, 45, 301, 157, 114, 305, 309]}

 32%|███▏      | 319997/1000000 [3:19:01<5:25:34, 34.81it/s]global step 320000, trans_decision ep_re 743.3099158986649

{"global_step": 320000, "eval_re": [1283.0121926723803, 520.8586541332841, 
277.0409824318209, 397.2974621065126, 1918.3992138894523, 437.847729437594, 
810.3458862569765, 535.5199918211079, 820.5737895618803, 432.2032566756398], 
"eval_len": [429, 203, 134, 167, 586, 171, 281, 205, 292, 171]}

 33%|███▎      | 329996/1000000 [3:25:23<5:20:52, 34.80it/s]global step 330000, trans_decision ep_re 538.4490668998236

{"global_step": 330000, "eval_re": [394.06194709980315, 225.33747652432837, 
176.57061871353037, 736.8967493756774, 586.0743910229553, 582.0995353009906, 
567.5795797016218, 667.8377600439386, 711.8267305792878, 736.2058806361031], 
"eval_len": [152, 109, 95, 235, 192, 198, 194, 215, 230, 239]}

 34%|███▍      | 339999/1000000 [3:31:43<5:19:01, 34.48it/s]global step 340000, trans_decision ep_re 289.68505382777886

{"global_step": 340000, "eval_re": [107.27536003527857, 638.4225175515533, 
156.85453146635044, 97.84362512712438, 571.4189670017327, 341.6711958407179, 
124.19554870904727, 203.9470813176501, 126.82297581075206, 528.3987354175817], 
"eval_len": [76, 233, 93, 71, 234, 153, 77, 107, 80, 207]}

 35%|███▍      | 349998/1000000 [3:38:02<5:11:35, 34.77it/s]global step 350000, trans_decision ep_re 676.3811417038341

{"global_step": 350000, "eval_re": [66.68481691140823, 807.8591304256398, 
66.38212030975967, 860.9641118325942, 538.7010853366762, 846.8995807141987, 
1412.1666039512713, 72.31168697047208, 1723.7604561199644, 368.081824466357], 
"eval_len": [56, 264, 59, 271, 210, 286, 452, 60, 546, 152]}

 36%|███▌      | 359997/1000000 [3:44:40<5:08:48, 34.54it/s]global step 360000, trans_decision ep_re 841.6177153870374

{"global_step": 360000, "eval_re": [89.75856446895438, 468.97477567229515, 
876.2399240213986, 1310.0763966207971, 871.5397767586906, 60.313661137103274, 
1236.337927461326, 75.6997817469129, 954.3659085354118, 2472.8704374474837], 
"eval_len": [71, 180, 302, 397, 289, 54, 373, 59, 321, 767]}

 37%|███▋      | 369999/1000000 [3:51:00<5:04:51, 34.44it/s]global step 370000, trans_decision ep_re 488.7283700104489

{"global_step": 370000, "eval_re": [117.59667032569378, 141.34347282852926, 
218.36859410812704, 1141.3564106224092, 788.0165589578514, 399.15689179594, 
707.8359958171204, 840.5408021015947, 462.72914697444634, 70.3391565727776], 
"eval_len": [84, 82, 105, 374, 276, 159, 251, 284, 182, 55]}

 38%|███▊      | 379999/1000000 [3:57:20<4:58:39, 34.60it/s]global step 380000, trans_decision ep_re 373.90556305307837

{"global_step": 380000, "eval_re": [954.8469492406704, 261.3048169317203, 
174.51336490495433, 367.11851344028213, 256.1453287965232, 622.2760282724629, 
589.7038038749581, 254.13018122505326, 133.0952854774713, 125.921358366688], 
"eval_len": [323, 121, 89, 157, 114, 226, 225, 115, 82, 79]}

 39%|███▉      | 389999/1000000 [4:03:40<4:54:32, 34.52it/s]global step 390000, trans_decision ep_re 709.9454202445645

{"global_step": 390000, "eval_re": [669.6406196572071, 355.58725697062835, 
471.25817184054114, 352.52703857337497, 506.21123256255834, 951.7185527675753, 
356.62995915832937, 210.63124723355745, 2668.793425250329, 556.4566984315433], 
"eval_len": [241, 145, 179, 147, 183, 320, 147, 107, 805, 204]}

 40%|███▉      | 399999/1000000 [4:10:10<4:51:25, 34.31it/s]global step 400000, trans_decision ep_re 1100.327657145522

{"global_step": 400000, "eval_re": [207.43240319772494, 1789.5350418425476, 
1089.4521724013289, 1396.3887785957681, 215.27306969181504, 1139.5971536642933, 
2484.80866968175, 1448.5351008155958, 763.734244587007, 468.5199369773891], 
"eval_len": [104, 557, 357, 444, 108, 378, 756, 459, 260, 177]}

 41%|████      | 409999/1000000 [4:16:30<4:44:42, 34.54it/s]global step 410000, trans_decision ep_re 1027.1085233941265

{"global_step": 410000, "eval_re": [3335.406395503748, 287.2921166355493, 
785.8181331498246, 350.35756302074196, 1136.616628171935, 2062.263998748358, 
253.7477925297747, 287.26469083796917, 960.566802817163, 811.7511125262023], 
"eval_len": [1000, 125, 273, 148, 379, 651, 125, 126, 317, 284]}

 42%|████▏     | 419999/1000000 [4:23:00<4:41:23, 34.35it/s]global step 420000, trans_decision ep_re 707.8865300952594

{"global_step": 420000, "eval_re": [650.9568826208796, 318.319885450638, 
353.207725777932, 775.2345289857303, 1386.6886536527795, 456.65666946308824, 
2007.4866662049546, 165.81007601760078, 365.8097940994897, 598.694418679501], 
"eval_len": [232, 128, 146, 267, 433, 176, 591, 90, 145, 218]}

 43%|████▎     | 429999/1000000 [4:29:20<4:34:41, 34.58it/s]global step 430000, trans_decision ep_re 888.7283173078958

{"global_step": 430000, "eval_re": [2068.7647778186792, 36.840470088737874, 
2194.7974307390587, 27.6382718799223, 36.86043862157469, 41.64174188441371, 
753.4110056087235, 252.10412385515352, 2199.3328795499765, 1275.8920330327182], 
"eval_len": [616, 34, 643, 29, 36, 42, 256, 116, 640, 387]}

 44%|████▍     | 439999/1000000 [4:35:33<4:30:04, 34.56it/s]global step 440000, trans_decision ep_re 757.3677060109288

{"global_step": 440000, "eval_re": [1776.2264313413677, 361.96927154170436, 
451.20338872589497, 1075.572018214325, 600.6623009520507, 293.5737275765255, 
609.3706740896093, 858.8583183791646, 1283.286529805167, 262.95439948347826], 
"eval_len": [546, 154, 174, 335, 216, 127, 220, 289, 364, 119]}

 45%|████▍     | 449999/1000000 [4:42:10<4:27:09, 34.31it/s]global step 450000, trans_decision ep_re 583.881188125006

{"global_step": 450000, "eval_re": [568.7923334323009, 669.1666365379768, 
714.4618583005341, 612.8606044203609, 822.4644948625887, 272.33548872871734, 
341.5392789083602, 781.0209015875439, 556.2515759596404, 499.9187085120356], 
"eval_len": [198, 221, 226, 211, 261, 120, 135, 234, 183, 177]}

 46%|████▌     | 459999/1000000 [4:48:30<4:21:15, 34.45it/s]global step 460000, trans_decision ep_re 354.6204232301527

{"global_step": 460000, "eval_re": [56.52389602502146, 199.0042787127396, 
70.25309429019218, 205.4837762619358, 841.1426623420512, 67.29607783807285, 
833.4083193809523, 1152.7151375062647, 59.939295532475334, 60.43769441182224], 
"eval_len": [55, 104, 60, 103, 289, 61, 280, 367, 56, 56]}

 47%|████▋     | 469999/1000000 [4:54:50<4:14:00, 34.78it/s]global step 470000, trans_decision ep_re 219.0949043487836

{"global_step": 470000, "eval_re": [39.032471631618755, 64.59725186910997, 
292.88719363242393, 60.985118855397026, 72.52193105376854, 603.9878693644227, 
603.7495564063578, 75.6360511242128, 121.5527914388968, 255.99880811162765], 
"eval_len": [39, 57, 126, 56, 59, 214, 209, 62, 75, 119]}

 48%|████▊     | 479999/1000000 [5:01:10<4:09:52, 34.68it/s]global step 480000, trans_decision ep_re 284.0339194934812

{"global_step": 480000, "eval_re": [58.59612724327422, 53.590838491013336, 
50.485070451498444, 47.18013363883414, 2220.895945716945, 54.080286221398, 
54.33665475837663, 179.0581799857071, 70.49941847193271, 51.61653995583276], 
"eval_len": [51, 50, 48, 46, 657, 53, 48, 90, 59, 47]}

 49%|████▉     | 489999/1000000 [5:07:30<4:04:42, 34.74it/s]global step 490000, trans_decision ep_re 519.3589879968191

{"global_step": 490000, "eval_re": [1081.970561755612, 223.70475207180536, 
590.2596411367672, 214.47065694368754, 469.88830232938335, 783.9517157451337, 
216.25750453251942, 464.65618720748677, 36.94962562690042, 1111.4809326188947], 
"eval_len": [355, 113, 210, 102, 183, 270, 97, 177, 39, 356]}

 50%|████▉     | 499999/1000000 [5:13:35<4:00:14, 34.69it/s]global step 500000, trans_decision ep_re 306.62113462928363

{"global_step": 500000, "eval_re": [31.20260902064044, 420.9296100093182, 
157.11347480914617, 151.12349597434405, 67.12726237266602, 39.22832784024769, 
1176.6805606307435, 568.4698970812835, 429.3800937281244, 24.956014826322384], 
"eval_len": [36, 167, 90, 90, 56, 42, 354, 204, 171, 28]}

 51%|█████     | 509998/1000000 [5:19:51<3:55:10, 34.72it/s]global step 510000, trans_decision ep_re 441.74193020323384

{"global_step": 510000, "eval_re": [815.7569793893985, 169.37476306148656, 
596.1134824846853, 520.9749980783406, 267.80322625343354, 46.882046353075694, 
166.50695236010935, 457.1831079894667, 709.6549205161941, 667.1688255461485], 
"eval_len": [250, 90, 197, 180, 115, 43, 88, 161, 228, 227]}

 52%|█████▏    | 519997/1000000 [5:26:07<3:49:01, 34.93it/s]global step 520000, trans_decision ep_re 130.18389382835616

{"global_step": 520000, "eval_re": [49.84604398903378, 27.17477154939128, 
56.65421715350991, 42.57971201783017, 34.4444064752837, 163.42325804009315, 
53.817710869064534, 796.6815966767551, 41.614151860458286, 35.60306965214162], 
"eval_len": [45, 33, 50, 41, 36, 86, 45, 251, 39, 37]}

 53%|█████▎    | 529996/1000000 [5:32:22<3:47:34, 34.42it/s]global step 530000, trans_decision ep_re 219.84127054752366

{"global_step": 530000, "eval_re": [95.97286100883173, 70.69180369750065, 
184.36090476470338, 201.3094088544034, 982.8262589181004, 180.55163429755723, 
93.24491805034339, 177.65758673626652, 32.08437560997335, 179.71295353755647], 
"eval_len": [70, 62, 95, 97, 313, 95, 70, 93, 30, 97]}

 54%|█████▍    | 539999/1000000 [5:38:50<3:42:34, 34.45it/s]global step 540000, trans_decision ep_re 180.7165034432956

{"global_step": 540000, "eval_re": [273.3486485962789, 248.0153754500627, 
181.38941039444356, 187.89386825488126, 180.49737765295396, 23.416905116437807, 
259.1345230753708, 364.18740407175693, 52.719041964145376, 36.56247985662466], 
"eval_len": [117, 112, 90, 93, 93, 26, 122, 148, 48, 38]}

 55%|█████▍    | 549999/1000000 [5:44:56<3:36:11, 34.69it/s]global step 550000, trans_decision ep_re 67.27245167262144

{"global_step": 550000, "eval_re": [69.34252365122222, 72.69714143449701, 
79.64318795668348, 57.4686485559338, 78.27293746403966, 69.84951228587722, 
26.021885374479126, 72.53608101501432, 75.69177044584175, 71.2008285426259], 
"eval_len": [46, 48, 51, 45, 54, 50, 27, 49, 55, 50]}

 56%|█████▌    | 559999/1000000 [5:51:12<3:33:01, 34.42it/s]global step 560000, trans_decision ep_re 192.11771725695692

{"global_step": 560000, "eval_re": [79.73177519399778, 905.5295813834358, 
68.16764075823855, 18.537222897333756, 147.19455215187136, 25.611690775109437, 
19.311669573676784, 51.82992177902475, 310.12899923031756, 295.13411882656334], 
"eval_len": [65, 313, 61, 22, 82, 32, 25, 49, 141, 129]}

 57%|█████▋    | 569998/1000000 [5:57:27<3:26:32, 34.70it/s]global step 570000, trans_decision ep_re 250.30131923692042

{"global_step": 570000, "eval_re": [18.086275291468326, 48.125805725614185, 
398.77814404645164, 168.20403832172576, 26.984751507760617, 22.64751833654529, 
582.5590977794366, 22.957082758493197, 483.76075872620294, 730.9097198755056], 
"eval_len": [23, 50, 168, 97, 29, 25, 215, 25, 194, 258]}

 58%|█████▊    | 579997/1000000 [6:03:43<3:20:25, 34.93it/s]global step 580000, trans_decision ep_re 499.91354506426177

{"global_step": 580000, "eval_re": [1244.233369563643, 326.25183175496386, 
337.6484345692661, 239.09077851874582, 325.27225339561465, 28.038614330768727, 
755.189804543529, 368.3740512226961, 242.52035082126687, 1132.515961922124], 
"eval_len": [378, 137, 146, 109, 131, 28, 250, 153, 113, 365]}

 59%|█████▉    | 589996/1000000 [6:10:01<3:16:41, 34.74it/s]global step 590000, trans_decision ep_re 191.13545482485443

{"global_step": 590000, "eval_re": [319.96024845417185, 66.43521119885162, 
31.143537867620083, 66.4244415023934, 55.09680992820806, 442.8191910490829, 
393.86401673365185, 39.83655430737147, 429.65091584347334, 66.12362136371996], 
"eval_len": [140, 58, 34, 61, 50, 180, 164, 42, 168, 51]}

 60%|█████▉    | 599999/1000000 [6:16:16<3:12:15, 34.68it/s]global step 600000, trans_decision ep_re 75.60679313177222

{"global_step": 600000, "eval_re": [28.944509079759694, 64.28080617177639, 
51.02288644896376, 36.48900195446248, 325.75246877208116, 40.437176048799614, 
62.51796209079444, 49.8081730313385, 34.36174126488997, 62.453206454856335], 
"eval_len": [32, 54, 47, 38, 134, 41, 53, 46, 37, 52]}

 61%|██████    | 609999/1000000 [6:22:31<3:07:18, 34.70it/s]global step 610000, trans_decision ep_re 684.8035894717477

{"global_step": 610000, "eval_re": [585.4127201812182, 333.7834547226543, 
859.2964381940194, 185.33574010087244, 858.2671023787627, 1051.5261049916924, 
2213.6855867818467, 185.82221464673415, 188.19889294595305, 386.7076397737233], 
"eval_len": [207, 140, 286, 97, 295, 333, 675, 90, 98, 154]}

 62%|██████▏   | 619998/1000000 [6:29:00<3:02:17, 34.74it/s]global step 620000, trans_decision ep_re 636.3927021317046

{"global_step": 620000, "eval_re": [775.1523670948977, 731.119923396662, 
1068.7202071874497, 68.014473120372, 768.2766395761423, 442.4804855294741, 
175.55743303661112, 22.462612209695568, 1027.6326805868705, 1284.5101995788707],
"eval_len": [255, 243, 325, 58, 257, 164, 89, 26, 312, 373]}

 63%|██████▎   | 629999/1000000 [6:35:20<2:57:54, 34.66it/s]global step 630000, trans_decision ep_re 553.9380208517047

{"global_step": 630000, "eval_re": [419.62879442502236, 754.980076280312, 
227.25178999992679, 102.10846669026202, 77.12675292381941, 481.45860746299195, 
1011.3290337762664, 1118.0859754805997, 475.4505315394782, 871.9601799383671], 
"eval_len": [170, 248, 120, 72, 65, 181, 326, 363, 180, 294]}

 64%|██████▍   | 639999/1000000 [6:41:40<2:52:59, 34.68it/s]global step 640000, trans_decision ep_re 991.3549201061396

{"global_step": 640000, "eval_re": [962.2023128899102, 1312.3784583112974, 
971.876198696527, 189.49636667429243, 771.0594742369883, 192.52402381025922, 
2963.064765240299, 66.60446748393237, 2290.563137977918, 193.77999573997283], 
"eval_len": [306, 374, 277, 96, 256, 101, 832, 58, 671, 96]}

 65%|██████▍   | 649999/1000000 [6:47:49<2:48:32, 34.61it/s]global step 650000, trans_decision ep_re 24.31195237725893

{"global_step": 650000, "eval_re": [20.67618199288645, 30.017680573368004, 
25.832978955049708, 17.514071765047238, 31.194985574304997, 22.555829492081514, 
27.940076367345934, 19.889025523028515, 24.26084744153652, 23.23784608794042], 
"eval_len": [22, 29, 27, 22, 31, 27, 31, 24, 29, 27]}

 66%|██████▌   | 659999/1000000 [6:54:02<2:42:54, 34.78it/s]global step 660000, trans_decision ep_re 212.04268738870854

{"global_step": 660000, "eval_re": [200.82946710628227, 210.82273629474827, 
198.4236724731541, 410.4169781338021, 234.22939498133758, 182.36797673666555, 
242.62124349456369, 38.13923218963505, 196.7347124281602, 205.84146004873656], 
"eval_len": [109, 111, 107, 182, 122, 102, 132, 39, 106, 120]}

 67%|██████▋   | 669998/1000000 [7:00:30<2:38:45, 34.65it/s]global step 670000, trans_decision ep_re 589.8624935504519

{"global_step": 670000, "eval_re": [180.97831598089883, 915.1185107870242, 
245.01898083926747, 303.18688944580106, 214.0947156380559, 2121.94133197073, 
470.41263383138676, 266.2251697725708, 301.37753591831944, 880.2708513204648], 
"eval_len": [90, 307, 113, 133, 104, 645, 181, 121, 130, 300]}

 68%|██████▊   | 679999/1000000 [7:06:38<2:33:26, 34.76it/s]global step 680000, trans_decision ep_re 190.9528981430378

{"global_step": 680000, "eval_re": [93.9404555267495, 115.0075801121182, 
376.827312377706, 153.50831389491938, 97.97247987777479, 89.32032444926426, 
120.5483209920746, 23.172543371300293, 705.8941646296287, 133.33748619884233], 
"eval_len": [67, 74, 166, 82, 68, 64, 80, 25, 271, 72]}

 69%|██████▉   | 689999/1000000 [7:12:53<2:28:12, 34.86it/s]global step 690000, trans_decision ep_re 85.61196219702063

{"global_step": 690000, "eval_re": [86.9258274656124, 58.35169783885554, 
57.437163511947055, 85.22452410411726, 59.25853501168487, 52.66795261798769, 
56.90689361082342, 222.06440152217334, 117.77961202391668, 59.50301426308802], 
"eval_len": [67, 43, 41, 67, 42, 39, 43, 116, 97, 46]}

 70%|██████▉   | 699999/1000000 [7:19:07<2:24:17, 34.65it/s]global step 700000, trans_decision ep_re 119.91811944188586

{"global_step": 700000, "eval_re": [133.8841038312139, 119.97295962729247, 
123.25906418236355, 130.58453041050132, 124.57603040474585, 125.60879559397941, 
131.6098218083188, 55.08369653037249, 119.36389437288925, 135.23829765718156], 
"eval_len": [93, 89, 87, 87, 86, 87, 96, 39, 86, 90]}

 71%|███████   | 709998/1000000 [7:25:23<2:19:55, 34.54it/s]global step 710000, trans_decision ep_re 201.49188237226753

{"global_step": 710000, "eval_re": [369.0616560948136, 163.7127190048764, 
180.6703307574213, 199.5751974078955, 216.0737793358491, 193.7326913608577, 
182.23597216491592, 191.80739999854703, 168.76777254177344, 149.28130505572537],
"eval_len": [164, 108, 114, 134, 152, 133, 100, 127, 108, 91]}

 72%|███████▏  | 719997/1000000 [7:31:50<2:14:11, 34.78it/s]global step 720000, trans_decision ep_re 177.7891003888657

{"global_step": 720000, "eval_re": [298.0286799610283, 85.32663432340436, 
293.7314782556961, 67.6402064789107, 287.3884389801699, 76.5294909626015, 
85.7357047445548, 225.76450704004938, 82.47294195833999, 275.272921183902], 
"eval_len": [181, 79, 169, 80, 166, 83, 77, 182, 73, 179]}

 73%|███████▎  | 729999/1000000 [7:37:56<2:09:47, 34.67it/s]global step 730000, trans_decision ep_re 225.27544512603907

{"global_step": 730000, "eval_re": [93.05360215403225, 100.75371048132244, 
341.5224257845264, 89.5378923301342, 384.7270859853758, 94.46599860513003, 
135.32772871428264, 112.8359694642239, 460.6769709838593, 439.85306675750377], 
"eval_len": [77, 80, 144, 75, 188, 80, 81, 92, 292, 218]}

 74%|███████▍  | 739998/1000000 [7:44:15<2:04:23, 34.84it/s]global step 740000, trans_decision ep_re 433.6640243584285

{"global_step": 740000, "eval_re": [155.2971849322065, 306.64289654830645, 
581.6976150311039, 781.712864924488, 182.72676894059288, 569.8606323305729, 
940.314590570664, 118.15192113421098, 107.31451190009217, 592.9212572720469], 
"eval_len": [85, 130, 214, 278, 105, 213, 313, 72, 68, 215]}

 75%|███████▍  | 749997/1000000 [7:50:32<2:00:02, 34.71it/s]global step 750000, trans_decision ep_re 532.1308590841286

{"global_step": 750000, "eval_re": [747.3952379738062, 178.4810937238331, 
188.64264151218393, 603.0846944096558, 137.6974188525286, 862.6283274044251, 
559.7139769762094, 179.59071814134822, 817.6525409347676, 1046.4219409125283], 
"eval_len": [229, 93, 97, 205, 84, 264, 197, 92, 260, 316]}

 76%|███████▌  | 759996/1000000 [7:57:00<1:55:20, 34.68it/s]global step 760000, trans_decision ep_re 541.5000597878004

{"global_step": 760000, "eval_re": [662.8359238805006, 368.88546726308783, 
485.99506142144907, 2437.179288866734, 79.61166245061716, 694.5999020483721, 
23.906924541330905, 239.5776449519094, 209.4347064312024, 212.97401602280016], 
"eval_len": [227, 146, 182, 735, 66, 243, 30, 115, 107, 103]}

 77%|███████▋  | 769999/1000000 [8:03:20<1:50:13, 34.78it/s]global step 770000, trans_decision ep_re 780.5478708504132

{"global_step": 770000, "eval_re": [1300.4435617941535, 258.53764290140526, 
1859.2816214042612, 705.5448594886489, 778.9432345474513, 890.4671905086936, 
28.511076090390496, 530.6091346940107, 338.5569016068287, 1114.5834854682896], 
"eval_len": [398, 119, 518, 238, 243, 280, 34, 199, 141, 319]}

 78%|███████▊  | 779999/1000000 [8:09:40<1:45:54, 34.62it/s]global step 780000, trans_decision ep_re 561.5956245773139

{"global_step": 780000, "eval_re": [966.9638221627181, 631.143772396499, 
315.2441109330042, 894.4083978822409, 637.4506346487036, 23.23118560330374, 
995.882006649304, 590.0597764575032, 28.856150031919984, 532.7163890079414], 
"eval_len": [282, 219, 129, 263, 219, 26, 325, 205, 31, 189]}

 79%|███████▉  | 789999/1000000 [8:16:00<1:41:32, 34.47it/s]global step 790000, trans_decision ep_re 427.65240965504324

{"global_step": 790000, "eval_re": [244.79858016536684, 538.5716107793386, 
86.03618942596609, 322.73632060278027, 270.6090857048409, 823.1784868777689, 
136.83325781851508, 805.7852237027399, 851.5059287980343, 196.46941267508146], 
"eval_len": [112, 190, 72, 136, 122, 267, 77, 254, 267, 97]}

 80%|███████▉  | 799999/1000000 [8:22:20<1:36:15, 34.63it/s]global step 800000, trans_decision ep_re 1017.4479149931809

{"global_step": 800000, "eval_re": [1189.0955105610067, 1045.1797046845854, 
154.3894893954117, 1283.7753018231801, 752.974495567604, 3432.567577875426, 
1872.2270562470735, 52.097979264401104, 158.22004466685718, 233.95198984626438],
"eval_len": [367, 342, 87, 406, 252, 1000, 574, 49, 87, 111]}

 81%|████████  | 809999/1000000 [8:28:31<1:31:20, 34.67it/s]global step 810000, trans_decision ep_re 748.9749435151554

{"global_step": 810000, "eval_re": [27.247476125095638, 1267.2215804940965, 
1335.2742618831599, 747.6634287103982, 25.99149028905369, 1465.6363001940872, 
952.0658128147807, 202.84614628052074, 1328.356932496376, 137.44600586398607], 
"eval_len": [33, 356, 370, 253, 29, 424, 271, 95, 388, 81]}

 82%|████████▏ | 819998/1000000 [8:34:50<1:25:41, 35.01it/s]global step 820000, trans_decision ep_re 437.6968334049502

{"global_step": 820000, "eval_re": [570.1145768988694, 273.16906480518713, 
605.6712132248065, 943.6826610276131, 349.29607671998315, 332.17510252647054, 
579.0750791667209, 90.794262707967, 316.2139185265693, 316.7763784453156], 
"eval_len": [197, 116, 209, 306, 141, 138, 197, 69, 129, 130]}

 83%|████████▎ | 829997/1000000 [8:41:20<1:21:30, 34.76it/s]global step 830000, trans_decision ep_re 497.0359954453794

{"global_step": 830000, "eval_re": [981.9176694736387, 596.1164916332751, 
313.14958612728753, 208.63526921631058, 329.95503158956586, 193.29851676960416, 
290.8359922484257, 379.5946191932889, 1587.6295221745743, 89.22725602782243], 
"eval_len": [325, 208, 133, 106, 129, 100, 125, 152, 446, 72]}

 84%|████████▍ | 839999/1000000 [8:47:24<1:16:57, 34.65it/s]global step 840000, trans_decision ep_re 488.55064862196633

{"global_step": 840000, "eval_re": [1046.1646350480314, 1008.3288274231526, 
104.73228096316416, 157.30990506931454, 132.04699462833383, 116.67832405661966, 
28.018409705825754, 1014.2553264897275, 138.5521085674517, 1139.4196742680424], 
"eval_len": [310, 300, 65, 86, 76, 73, 31, 305, 76, 331]}

 85%|████████▍ | 849998/1000000 [8:53:41<1:11:52, 34.78it/s]global step 850000, trans_decision ep_re 1069.9263471319384

{"global_step": 850000, "eval_re": [1241.7844072588694, 1484.0105238935114, 
1478.6613507026002, 2225.003619824978, 303.75432868409695, 576.8481159776794, 
675.3621519469777, 1043.2572577899416, 503.70059748379504, 1166.8811177569328], 
"eval_len": [385, 401, 454, 674, 136, 203, 225, 332, 187, 382]}

 86%|████████▌ | 859996/1000000 [9:00:05<1:07:02, 34.80it/s]global step 860000, trans_decision ep_re 556.4983263692244

{"global_step": 860000, "eval_re": [240.6192501100492, 255.64388647999067, 
913.2142466021353, 660.234720337476, 549.568117322799, 1104.185649588954, 
230.65960371665344, 832.6455833076549, 225.62876224305938, 552.583443983473], 
"eval_len": [108, 118, 305, 234, 204, 346, 104, 272, 105, 202]}

 87%|████████▋ | 869999/1000000 [9:06:24<1:02:39, 34.58it/s]global step 870000, trans_decision ep_re 616.8781622051984

{"global_step": 870000, "eval_re": [286.428131834163, 147.91912689549875, 
23.761784814016202, 536.9476822795499, 532.4659690115742, 1140.977790077827, 
1679.1625587818853, 809.3938026471125, 476.7417058794657, 534.983069830891], 
"eval_len": [127, 77, 30, 191, 192, 332, 511, 259, 179, 192]}

 88%|████████▊ | 879998/1000000 [9:12:45<57:41, 34.67it/s]global step 880000, trans_decision ep_re 728.2178434800243

{"global_step": 880000, "eval_re": [442.6357374541231, 443.6671355538999, 
313.117056945869, 1206.2516310199476, 1091.1594132671871, 1198.6343547174679, 
1008.6834796522967, 91.70449082911415, 1041.90906628585, 444.41606907448846], 
"eval_len": [172, 166, 136, 339, 315, 341, 321, 67, 310, 169]}

 89%|████████▉ | 889996/1000000 [9:19:20<52:36, 34.85it/s]global step 890000, trans_decision ep_re 967.8009753153696

{"global_step": 890000, "eval_re": [1736.7740221396782, 186.4361228053673, 
801.5050980102084, 806.6917315135034, 1099.5756031601422, 1245.5808223073795, 
2031.4379991784758, 742.9478129386445, 790.8091906586916, 236.25135044160342], 
"eval_len": [525, 87, 264, 267, 338, 386, 561, 248, 259, 110]}

 90%|████████▉ | 899999/1000000 [9:25:40<48:01, 34.71it/s]global step 900000, trans_decision ep_re 461.3909280974286

{"global_step": 900000, "eval_re": [29.91371981326281, 30.664631620521405, 
1170.1870231013938, 746.8077608910945, 711.9239908845748, 24.21165883339755, 
189.8600531601408, 193.6051963721948, 1049.4783599546358, 467.2568863430687], 
"eval_len": [32, 34, 329, 245, 238, 28, 102, 97, 302, 171]}

 91%|█████████ | 909999/1000000 [9:32:00<43:27, 34.52it/s]global step 910000, trans_decision ep_re 669.1509294958468

{"global_step": 910000, "eval_re": [473.27456886429763, 472.89707747298735, 
1176.1222467226034, 1073.6082145182868, 1087.7186966038962, 1088.584633709312, 
425.4217448349577, 35.386714727275475, 678.5876676113706, 179.90772989348102], 
"eval_len": [179, 174, 337, 315, 316, 307, 166, 36, 230, 95]}

 92%|█████████▏| 919999/1000000 [9:38:20<38:46, 34.38it/s]global step 920000, trans_decision ep_re 821.004223145212

{"global_step": 920000, "eval_re": [2545.908811075988, 41.79354330437455, 
24.30877516937852, 1233.8817841369448, 1363.4087434856413, 236.88043344175557, 
340.3016000680044, 1038.4060592781987, 1336.642225132591, 48.510256359242064], 
"eval_len": [763, 39, 30, 360, 421, 113, 148, 333, 407, 46]}

 93%|█████████▎| 929999/1000000 [9:44:40<33:39, 34.66it/s]global step 930000, trans_decision ep_re 624.7547184093653

{"global_step": 930000, "eval_re": [78.95412497739167, 1220.6392705819105, 
676.2780658968289, 1327.1193893113734, 477.53592000872305, 956.9445816524963, 
655.678209197799, 30.689841155685556, 743.301652176498, 80.4061291349449], 
"eval_len": [64, 334, 225, 358, 175, 303, 215, 35, 251, 62]}

 94%|█████████▍| 939999/1000000 [9:51:00<28:45, 34.78it/s]global step 940000, trans_decision ep_re 765.0985438566315

{"global_step": 940000, "eval_re": [40.43564799879553, 1565.0384249232568, 
937.4982843015002, 325.1542583646326, 967.1970767621793, 1159.834693376676, 
1275.2671588821645, 154.58369460505733, 984.0082840360434, 241.96791531600994], 
"eval_len": [37, 469, 291, 140, 314, 371, 399, 90, 313, 111]}

 95%|█████████▍| 949999/1000000 [9:57:20<23:56, 34.81it/s]global step 950000, trans_decision ep_re 718.0010354115786

{"global_step": 950000, "eval_re": [512.7225226841772, 401.1733182695566, 
568.3368413345765, 1462.1821385129645, 25.814643693760114, 388.7902878208264, 
1304.649613610254, 381.8668358541829, 632.8766042532752, 1501.5975480822121], 
"eval_len": [187, 160, 204, 438, 31, 156, 362, 150, 219, 458]}

 96%|█████████▌| 959999/1000000 [10:03:30<19:10, 34.75it/s]global step 960000, trans_decision ep_re 93.19197089984827

{"global_step": 960000, "eval_re": [99.79903858334505, 95.68508386762713, 
89.09916312620973, 85.94358585583403, 86.96677067559585, 97.03547022566453, 
95.34811014706146, 92.96101956933944, 94.21286796224163, 94.86859898556399], 
"eval_len": [65, 66, 62, 62, 61, 60, 63, 64, 64, 63]}

 97%|█████████▋| 969999/1000000 [10:10:00<14:27, 34.60it/s]global step 970000, trans_decision ep_re 929.8074789524813

{"global_step": 970000, "eval_re": [191.82895823451844, 501.53521200471283, 
1123.410406701095, 1162.9942888688975, 1289.7563412619063, 928.9747513383053, 
196.10597798103143, 1029.5066694268166, 646.1879540178427, 2227.7742296896868], 
"eval_len": [102, 185, 365, 375, 367, 291, 102, 319, 229, 672]}

 98%|█████████▊| 979999/1000000 [10:16:20<09:36, 34.67it/s]global step 980000, trans_decision ep_re 1016.5321634030427

{"global_step": 980000, "eval_re": [1020.1239967540945, 1021.8277894147026, 
341.1420789473841, 2128.857685152142, 1165.9518732056617, 447.5433178433401, 
1092.293849451345, 1193.2460136707482, 861.31969378756, 893.0153358034473], 
"eval_len": [303, 305, 140, 615, 329, 164, 321, 324, 265, 275]}

 99%|█████████▉| 989999/1000000 [10:22:50<04:47, 34.76it/s]global step 990000, trans_decision ep_re 1167.0194244854404

{"global_step": 990000, "eval_re": [479.50040268152196, 28.243763661571666, 
1428.9782357469128, 28.560449201335878, 3396.5943437475426, 754.95977102255, 
616.3046660448085, 2397.7637652557114, 2117.232222748724, 422.0566247437264], 
"eval_len": [172, 31, 447, 31, 1000, 259, 210, 699, 641, 158]}

100%|█████████▉| 999999/1000000 [10:28:55<00:00, 34.62it/s]global step 1000000, trans_decision ep_re 646.260514022499

{"global_step": 1000000, "eval_re": [642.302335931984, 120.98263308256335, 
948.0803303069176, 1034.0026735442646, 884.7462945247299, 671.4297498711205, 
822.0375007646454, 134.83801262973984, 539.6891865639973, 664.4964230050282], 
"eval_len": [220, 77, 317, 309, 290, 224, 268, 80, 193, 223]}

100%|██████████| 1000000/1000000 [10:29:10<00:00, 26.49it/s]
