
{
    'exp_name': 'VDPO',
    'env': 'Ant-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)',
    'noise': 0.15
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9999/1000000 [03:40<8:46:56, 31.31it/s]global step 10000, trans_decision ep_re 190.91700233552822

{"global_step": 10000, "eval_re": [335.39855906522166, 144.15651772384496, 
186.3651749106669, 128.98460784402093, 286.18053460204356, 79.99010732742519, 
43.66029179519963, 231.14644333309727, 86.39267171632135, 386.8951150374409], 
"eval_len": [1000, 1000, 1000, 612, 1000, 1000, 263, 1000, 1000, 1000]}

  2%|▏         | 19997/1000000 [11:00<8:36:05, 31.65it/s]global step 20000, trans_decision ep_re 214.42031180402347

{"global_step": 20000, "eval_re": [406.89446491238806, 43.67036125155595, 
142.25823602437316, 136.2754435915494, 201.0877333076197, 81.53183146760728, 
412.67887815566536, 505.0165149567067, 213.171842437434, 1.6178119353348215], 
"eval_len": [1000, 58, 283, 241, 498, 190, 1000, 1000, 693, 25]}

  3%|▎         | 29997/1000000 [18:10<8:35:28, 31.36it/s]global step 30000, trans_decision ep_re 180.39801084301695

{"global_step": 30000, "eval_re": [29.06885426644721, 28.306355257582286, 
384.98752499671065, 34.39303369429104, 346.3265844725716, 55.55704936733304, 
335.1108983692085, 524.0459186923749, 28.547893461667098, 37.63599585198322], 
"eval_len": [123, 48, 1000, 98, 1000, 200, 665, 1000, 69, 53]}

  4%|▍         | 39997/1000000 [25:20<8:34:12, 31.12it/s]global step 40000, trans_decision ep_re 246.59053866433516

{"global_step": 40000, "eval_re": [268.3347861505349, 46.08040311377496, 
513.240259699348, 519.7187929107006, 198.79835216182406, 466.6848621508085, 
-2.088756199116718, 369.981540458312, 44.316437838218484, 40.838708358946384], 
"eval_len": [1000, 221, 1000, 1000, 406, 1000, 28, 1000, 56, 56]}

  5%|▍         | 49997/1000000 [32:30<8:24:35, 31.38it/s]global step 50000, trans_decision ep_re 349.4881314648783

{"global_step": 50000, "eval_re": [107.8109200748447, 437.81933956403293, 
342.36202017843743, 190.39968880625813, 50.85270272317832, 534.1891177826088, 
540.0996303126119, 471.01927387089955, 492.19598672454816, 328.1326346113629], 
"eval_len": [251, 1000, 564, 373, 107, 1000, 1000, 1000, 1000, 1000]}

  6%|▌         | 59997/1000000 [39:40<8:20:52, 31.28it/s]global step 60000, trans_decision ep_re 425.9088864220066

{"global_step": 60000, "eval_re": [50.40013695855259, 520.0206096318894, 
673.240604179513, 347.55494956691734, 200.92044973354683, 473.1585184790837, 
303.9484010348367, 460.03601134308417, 666.6644902006375, 563.1446930920049], 
"eval_len": [41, 992, 1000, 1000, 407, 1000, 460, 1000, 1000, 1000]}

  7%|▋         | 69997/1000000 [47:00<8:10:33, 31.60it/s]global step 70000, trans_decision ep_re 318.18593852254907

{"global_step": 70000, "eval_re": [672.912333903286, 146.89527752709418, 
614.8708138139114, 71.72903697907668, 30.389668162113686, 60.395941772408385, 
84.33057404463155, 108.9896342512014, 754.3547652111694, 636.9913395605979], 
"eval_len": [1000, 313, 1000, 104, 62, 86, 125, 125, 1000, 1000]}

  8%|▊         | 79997/1000000 [54:00<8:07:15, 31.47it/s]global step 80000, trans_decision ep_re 170.22157783203176

{"global_step": 80000, "eval_re": [126.99142353593426, 107.64919315955164, 
37.975783852090316, 23.984986437252108, 120.61617308231669, 166.13457464890024, 
214.10649549880634, 230.65514215453635, 30.058455883519162, 644.0435500674104], 
"eval_len": [227, 137, 63, 46, 144, 200, 247, 328, 44, 1000]}

  9%|▉         | 89997/1000000 [1:00:50<7:48:42, 32.36it/s]global step 90000, trans_decision ep_re 451.83552617930275

{"global_step": 90000, "eval_re": [256.97944641385124, 378.0473143500652, 
684.3728321326495, 764.7661795976152, 128.6613144935383, 538.6644850623195, 
635.2163615209295, 149.16389724785748, 703.2750738392045, 279.2083571349971], 
"eval_len": [347, 446, 1000, 953, 181, 1000, 677, 209, 920, 310]}

 10%|▉         | 99997/1000000 [1:08:00<7:46:41, 32.14it/s]global step 100000, trans_decision ep_re 376.5263765414712

{"global_step": 100000, "eval_re": [526.6934041857046, 893.5538823737616, 
533.2736851068196, 597.3746812367741, 509.34189487919707, 111.50201551431338, 
81.46968659297413, 77.47789630402242, 190.04097424292212, 244.53564497822353], 
"eval_len": [701, 1000, 1000, 756, 637, 102, 129, 80, 216, 289]}

 11%|█         | 109997/1000000 [1:14:50<7:37:05, 32.45it/s]global step 110000, trans_decision ep_re 314.89962599215306

{"global_step": 110000, "eval_re": [511.71336542248986, 604.1373670777486, 
5.824713593605306, 87.52688428033137, 208.2009047769116, 492.7588564092015, 
714.244804468713, 229.45202140942635, 168.96147625304627, 126.17586623005639], 
"eval_len": [1000, 828, 35, 82, 305, 1000, 994, 274, 207, 102]}

 12%|█▏        | 119997/1000000 [1:21:50<7:34:50, 32.25it/s]global step 120000, trans_decision ep_re 304.95353767310206

{"global_step": 120000, "eval_re": [117.28379968919026, 90.85927754347146, 
157.68721898084306, 182.63726684018286, 970.4029361992408, 626.3768784949551, 
489.19115459891407, 214.43022120324875, 192.0122459731218, 8.654377207852788], 
"eval_len": [121, 127, 196, 292, 1000, 642, 597, 203, 260, 38]}

 13%|█▎        | 129997/1000000 [1:28:40<7:34:06, 31.93it/s]global step 130000, trans_decision ep_re 459.7446503691214

{"global_step": 130000, "eval_re": [123.29843797833057, 298.96442090504377, 
369.5064698510954, 322.68726306847765, 703.1258640744162, 542.8484850788846, 
918.8996471027314, 739.076292219818, 307.7863128644069, 271.2533105480096], 
"eval_len": [152, 356, 415, 347, 932, 1000, 1000, 1000, 372, 234]}

 14%|█▍        | 139997/1000000 [1:35:40<7:27:00, 32.06it/s]global step 140000, trans_decision ep_re 418.7938767808517

{"global_step": 140000, "eval_re": [106.18357687623062, 431.14456828110576, 
779.5563847910546, 429.7270301075885, 263.6623488552703, 634.4011831780309, 
809.2967364434261, 101.86169601949635, 60.62964048878252, 571.4756027675313], 
"eval_len": [139, 456, 1000, 554, 352, 777, 1000, 125, 71, 629]}

 15%|█▍        | 149997/1000000 [1:42:40<7:23:36, 31.94it/s]global step 150000, trans_decision ep_re 617.0686466824861

{"global_step": 150000, "eval_re": [474.4635536111942, 626.3165385935401, 
841.3718832947636, 572.1339072076657, 843.0244160676257, 383.5408674389706, 
957.1171563831717, 445.70237181336483, 209.7291904758514, 817.2865819387139], 
"eval_len": [530, 1000, 1000, 717, 1000, 496, 1000, 664, 271, 1000]}

 16%|█▌        | 159997/1000000 [1:49:50<7:18:42, 31.91it/s]global step 160000, trans_decision ep_re 372.30845472474385

{"global_step": 160000, "eval_re": [964.9496831630946, 177.70570872640232, 
331.35351388325086, 36.123796866443264, 120.92585163137275, 119.4736807781818, 
213.19856305584534, 994.2615055582836, 641.8524311858431, 123.23981239872077], 
"eval_len": [1000, 224, 406, 50, 185, 131, 283, 1000, 715, 149]}

 17%|█▋        | 169997/1000000 [1:56:40<6:59:48, 32.95it/s]global step 170000, trans_decision ep_re 462.5372732895039

{"global_step": 170000, "eval_re": [465.2719845265201, 228.09945460708292, 
764.4573001354669, 426.2240871397086, 717.6329196515383, 522.9698223983756, 
63.40421088436003, 95.59032240342226, 722.7717774856087, 618.9508536629553], 
"eval_len": [634, 267, 1000, 466, 905, 617, 88, 121, 1000, 636]}

 18%|█▊        | 179997/1000000 [2:03:40<6:56:55, 32.78it/s]global step 180000, trans_decision ep_re 394.5878590309263

{"global_step": 180000, "eval_re": [146.5064565667541, 449.48967176689615, 
36.490924938680365, 425.02553631924894, 93.12241550310029, 23.29244637617985, 
884.506809007749, 622.0788952042761, 584.2736750035763, 681.0917596228022], 
"eval_len": [177, 510, 34, 450, 101, 31, 1000, 1000, 1000, 1000]}

 19%|█▉        | 189997/1000000 [2:10:30<6:52:41, 32.71it/s]global step 190000, trans_decision ep_re 520.1477793262534

{"global_step": 190000, "eval_re": [468.66250264883945, 949.8144927726129, 
528.2016561909336, 566.7430681710649, 28.964009413509842, 263.0237666113626, 
829.2255050726224, 194.91759546466542, 410.4239010965856, 961.5012958203375], 
"eval_len": [483, 1000, 597, 1000, 30, 310, 724, 212, 371, 1000]}

 20%|█▉        | 199997/1000000 [2:17:30<6:46:43, 32.78it/s]global step 200000, trans_decision ep_re 673.9886686314185

{"global_step": 200000, "eval_re": [213.48734342118217, 740.7093100286423, 
394.3115762140089, 605.2230398271965, 554.4058807688065, 1071.5839290937172, 
655.179867057762, 960.4274252955689, 540.9953364223162, 1003.5629781849838], 
"eval_len": [182, 704, 445, 1000, 1000, 1000, 1000, 914, 1000, 943]}

 21%|██        | 209997/1000000 [2:24:30<6:46:23, 32.40it/s]global step 210000, trans_decision ep_re 659.2503510781221

{"global_step": 210000, "eval_re": [1033.7260410555123, 681.6733662854825, 
149.8632197075502, 656.9625556986443, 734.6921307646899, 1004.9977446935482, 
715.7963891877475, 65.1873990706223, 660.6699133149, 888.9347510025245], 
"eval_len": [1000, 1000, 215, 647, 689, 1000, 673, 70, 737, 1000]}

 22%|██▏       | 219997/1000000 [2:31:30<6:44:27, 32.14it/s]global step 220000, trans_decision ep_re 518.037370615675

{"global_step": 220000, "eval_re": [809.2002586705332, 698.1609097911546, 
770.0656363425187, 43.922771631477744, 320.550574446301, 268.477739040212, 
697.014620197986, 703.4626764604857, 34.6601468952304, 834.858372680849], 
"eval_len": [1000, 1000, 1000, 49, 1000, 1000, 1000, 596, 66, 1000]}

 23%|██▎       | 229997/1000000 [2:38:30<6:29:35, 32.94it/s]global step 230000, trans_decision ep_re 646.6729217863025

{"global_step": 230000, "eval_re": [888.9743815558038, 794.2592586794046, 
799.1355681317954, 756.4968047630618, 222.98135138402978, 845.8216844077101, 
89.13135046855258, 292.26002325366215, 788.3643222713673, 989.3044729476377], 
"eval_len": [1000, 1000, 1000, 1000, 314, 1000, 119, 305, 1000, 920]}

 24%|██▍       | 239997/1000000 [2:45:30<6:25:18, 32.87it/s]global step 240000, trans_decision ep_re 653.4999906067347

{"global_step": 240000, "eval_re": [997.0856088551939, 554.6289515739484, 
1127.5288901304193, 629.1218217203647, 767.6006630586423, 1152.6237575722964, 
968.4535445582962, 151.46797001113555, 113.76072528496779, 72.72797330208338], 
"eval_len": [1000, 1000, 1000, 662, 726, 1000, 1000, 130, 127, 118]}

 25%|██▍       | 249997/1000000 [2:52:30<6:25:14, 32.45it/s]global step 250000, trans_decision ep_re 413.3781278449903

{"global_step": 250000, "eval_re": [637.7949259677408, 205.3292287404972, 
56.28480930210831, 512.0869632777564, 65.21520293647627, 710.3495065545654, 
624.5775655068622, 597.1529416091881, 467.21787699241105, 257.77225756229694], 
"eval_len": [1000, 226, 79, 467, 76, 1000, 1000, 1000, 692, 275]}

 26%|██▌       | 259997/1000000 [2:59:20<6:16:10, 32.79it/s]global step 260000, trans_decision ep_re 445.8932344991589

{"global_step": 260000, "eval_re": [709.1296506664789, 94.23241779280607, 
215.81762988564583, 781.1442648264083, 365.8766517507306, 413.6817164866596, 
556.1443231880576, 268.9526985001989, 31.409634807833015, 1022.543357086771], 
"eval_len": [640, 102, 228, 695, 474, 405, 1000, 269, 63, 1000]}

 27%|██▋       | 269997/1000000 [3:06:10<6:10:01, 32.88it/s]global step 270000, trans_decision ep_re 310.0533477307653

{"global_step": 270000, "eval_re": [42.34804174562851, 33.323400028659485, 
448.62958919403025, 62.21654436482753, 47.997026500428646, 193.4472688494269, 
343.0670696587246, 866.2413909891087, 727.5042479284048, 335.75889804841364], 
"eval_len": [44, 44, 545, 184, 49, 202, 367, 1000, 1000, 420]}

 28%|██▊       | 279997/1000000 [3:13:00<6:04:14, 32.95it/s]global step 280000, trans_decision ep_re 368.7036260205557

{"global_step": 280000, "eval_re": [591.3736721597041, 395.2928769677718, 
135.06205346925555, 89.23168042134876, 700.7909668149973, 1028.5599712148808, 
556.4297339736559, 31.37696862301256, 38.13732824790394, 120.78100831302618], 
"eval_len": [573, 344, 122, 72, 737, 1000, 607, 25, 62, 157]}

 29%|██▉       | 289997/1000000 [3:19:40<5:59:17, 32.94it/s]global step 290000, trans_decision ep_re 392.3683501580807

{"global_step": 290000, "eval_re": [637.4534748635896, 753.1740879036737, 
555.9427511001791, 199.01964182398402, 665.7382768984849, 41.712581734335785, 
147.14288966000794, 768.4960925603327, 34.88388333248216, 120.1198217037379], 
"eval_len": [1000, 703, 1000, 309, 591, 46, 163, 1000, 57, 148]}

 30%|██▉       | 299997/1000000 [3:26:30<5:55:21, 32.83it/s]global step 300000, trans_decision ep_re 272.640164886505

{"global_step": 300000, "eval_re": [213.6655791755694, 543.4505613645601, 
135.5212283984747, 1037.9023204228708, 219.84920300180704, 284.8411163896798, 
59.052912245865485, 4.7742146815549225, 173.85867940023502, 53.485833784431925],
"eval_len": [241, 573, 153, 1000, 213, 517, 85, 27, 217, 89]}

 31%|███       | 309997/1000000 [3:33:20<5:52:28, 32.63it/s]global step 310000, trans_decision ep_re 331.85917382008444

{"global_step": 310000, "eval_re": [127.26131260096601, 43.58599823945066, 
136.74799411609865, 139.78931812972357, 26.638561323680115, 239.3339292320811, 
104.2534288852571, 933.7427717282637, 343.62113070300285, 1223.6172932423206], 
"eval_len": [125, 60, 91, 113, 28, 217, 116, 1000, 293, 1000]}

 32%|███▏      | 319997/1000000 [3:40:10<5:56:34, 31.78it/s]global step 320000, trans_decision ep_re 542.1635796715385

{"global_step": 320000, "eval_re": [915.3695170223355, 214.86844724290913, 
364.86615706125457, -2.769681998393425, 882.8652092168347, 1040.5247303288288, 
8.34812825213631, 887.142215523754, 532.0799528356649, 578.3411212300606], 
"eval_len": [966, 291, 375, 27, 1000, 1000, 24, 1000, 513, 509]}

 33%|███▎      | 329997/1000000 [3:47:00<5:40:12, 32.82it/s]global step 330000, trans_decision ep_re 414.86956273899324

{"global_step": 330000, "eval_re": [936.9069751979193, 304.67943567395827, 
408.8138055006046, 668.737566165485, 107.6977100495515, 86.17146699264663, 
334.0306284334312, 756.6141199698335, 192.69664660058712, 352.3472728059153], 
"eval_len": [1000, 399, 381, 605, 128, 111, 292, 1000, 195, 331]}

 34%|███▍      | 339998/1000000 [3:53:50<5:38:57, 32.45it/s]global step 340000, trans_decision ep_re 416.40226244608266

{"global_step": 340000, "eval_re": [853.8308626385939, 64.23926212988249, 
246.97029368169936, 275.98524325461653, 140.2136607662564, 106.9370682138181, 
436.46273029409576, 676.6582398283996, 654.289762455107, 708.4355011983581], 
"eval_len": [859, 67, 227, 262, 137, 164, 438, 617, 722, 937]}

 35%|███▍      | 349997/1000000 [4:00:40<5:29:43, 32.86it/s]global step 350000, trans_decision ep_re 349.164973967219

{"global_step": 350000, "eval_re": [520.6602047965065, 299.450748195585, 
91.30770968500843, 333.5334415804015, -1.5104946793982503, 25.069449042614014, 
275.8474775156848, 75.68848890645147, 722.4922710423108, 1149.1104435870259], 
"eval_len": [492, 318, 95, 384, 45, 40, 245, 132, 1000, 1000]}

 36%|███▌      | 359997/1000000 [4:07:30<5:26:30, 32.67it/s]global step 360000, trans_decision ep_re 596.4883134777713

{"global_step": 360000, "eval_re": [238.90169867221934, 937.0872540956199, 
853.919190796729, 1162.3996893419712, 37.195656848037686, 376.7257442598609, 
584.5849815881063, 776.9789075834155, 456.2181195234993, 540.8718920682536], 
"eval_len": [298, 1000, 1000, 1000, 43, 350, 1000, 783, 378, 622]}

 37%|███▋      | 369997/1000000 [4:14:20<5:20:18, 32.78it/s]global step 370000, trans_decision ep_re 541.4863262851084

{"global_step": 370000, "eval_re": [1191.1405954345746, 873.1887378312572, 
417.3334734377914, 476.54046613432223, 98.65166987230472, 14.37665256871514, 
112.58859271120642, 549.2527931859556, 892.181875285182, 789.6084063897732], 
"eval_len": [1000, 883, 315, 478, 68, 31, 115, 1000, 830, 1000]}

 38%|███▊      | 379997/1000000 [4:21:10<5:16:24, 32.66it/s]global step 380000, trans_decision ep_re 634.0416458504285

{"global_step": 380000, "eval_re": [1084.9238010322563, 765.4409807189147, 
130.17367247151017, 776.1048427189702, 814.0974858872659, 461.8265436802268, 
194.27122647457236, 918.1443216449277, 403.0254558550333, 792.4081280206071], 
"eval_len": [969, 698, 119, 1000, 643, 453, 270, 816, 397, 811]}

 39%|███▉      | 389997/1000000 [4:28:10<5:08:27, 32.96it/s]global step 390000, trans_decision ep_re 466.5107450320672

{"global_step": 390000, "eval_re": [352.4039938128884, 204.78792801314245, 
23.063701675662248, 1120.9452924771122, 509.70878303448677, 25.48441683616664, 
594.6889299662643, 939.411185855776, 343.6117653297289, 551.0014533194437], 
"eval_len": [324, 231, 45, 1000, 1000, 60, 595, 961, 1000, 537]}

 40%|███▉      | 399997/1000000 [4:35:00<5:06:52, 32.59it/s]global step 400000, trans_decision ep_re 603.8341577664518

{"global_step": 400000, "eval_re": [902.0228888902161, 271.5470756281884, 
466.2402283152091, 598.9347894206923, 828.864972300641, 746.87522082392, 
946.6440104171448, 38.05928902402256, 459.7014901155663, 779.4516127289178], 
"eval_len": [768, 228, 355, 556, 1000, 647, 1000, 51, 359, 647]}

 41%|████      | 409997/1000000 [4:41:41<4:58:10, 32.98it/s]global step 410000, trans_decision ep_re 231.9984393488706

{"global_step": 410000, "eval_re": [525.6891948539466, 16.574579507108393, 
21.577818846887197, 23.34781610935369, 395.04596667541585, 161.6580427922949, 
148.78628998921414, 634.9254376818386, 109.82119081555602, 282.5580562170901], 
"eval_len": [475, 19, 31, 37, 325, 204, 97, 551, 99, 324]}

 42%|████▏     | 419996/1000000 [4:48:40<4:58:33, 32.38it/s]global step 420000, trans_decision ep_re 487.9757966326621

{"global_step": 420000, "eval_re": [681.3494511285293, 12.967123006479625, 
651.5549194009299, 575.6764555479223, 136.68199961090363, 62.732999828300095, 
137.56428017523544, 1080.0653004729877, 481.4906423751042, 1059.6747947802287], 
"eval_len": [552, 48, 559, 1000, 103, 90, 120, 1000, 1000, 1000]}

 43%|████▎     | 429997/1000000 [4:55:30<4:49:37, 32.80it/s]global step 430000, trans_decision ep_re 320.838118762631

{"global_step": 430000, "eval_re": [419.00169388769626, 454.50503652556705, 
55.81989675089093, 509.2073722939393, 468.7298869900678, 157.42283113173258, 
211.4393642422875, 259.9605365139451, 61.78262462562119, 610.5119446645617], 
"eval_len": [342, 441, 59, 1000, 1000, 151, 161, 198, 127, 634]}

 44%|████▍     | 439997/1000000 [5:02:20<4:45:38, 32.67it/s]global step 440000, trans_decision ep_re 326.1363168038878

{"global_step": 440000, "eval_re": [323.3825515076761, 1080.0079526798274, 
636.2051905365616, 173.25409239573813, 219.49297399999926, 13.92366675341915, 
498.951205266301, 134.6504109056426, 43.61722902968736, 137.87789496402547], 
"eval_len": [280, 1000, 1000, 165, 198, 29, 1000, 118, 45, 149]}

 45%|████▍     | 449997/1000000 [5:09:10<4:37:51, 32.99it/s]global step 450000, trans_decision ep_re 586.2943930868926

{"global_step": 450000, "eval_re": [448.94721522991415, 377.24905025280844, 
700.3811427169235, 64.20779206877022, 1228.0203958810073, 553.2344091946271, 
847.0486783193496, 761.4541161990234, 137.88514714542933, 744.5159838610738], 
"eval_len": [1000, 298, 1000, 54, 1000, 1000, 857, 619, 129, 635]}

 46%|████▌     | 459997/1000000 [5:16:10<4:33:44, 32.88it/s]global step 460000, trans_decision ep_re 243.86248131198735

{"global_step": 460000, "eval_re": [77.292121128985, 78.00759122896054, 
36.85653867228582, 384.0453596173809, 84.48641229240775, 680.7464396292085, 
255.66815873912088, 458.5813945038334, 74.54218487159658, 308.3986124360945], 
"eval_len": [83, 79, 66, 1000, 78, 1000, 185, 1000, 68, 325]}

 47%|████▋     | 469997/1000000 [5:22:50<4:29:27, 32.78it/s]global step 470000, trans_decision ep_re 203.28275238971037

{"global_step": 470000, "eval_re": [116.2700349654868, 94.01766549375867, 
74.35678414683714, 20.950853074901747, 778.2718277130475, 205.47116529449818, 
11.357246670269479, 122.2572757317196, 531.196851533563, 78.67781927302167], 
"eval_len": [86, 94, 45, 39, 571, 185, 19, 92, 380, 126]}

 48%|████▊     | 479997/1000000 [5:29:30<4:23:43, 32.86it/s]global step 480000, trans_decision ep_re 341.0273205292525

{"global_step": 480000, "eval_re": [108.15815420797564, 856.0156863699574, 
17.377918848301437, 392.7631611725594, 14.252373460502715, 519.8237821559251, 
524.6998516565332, 162.39847233431024, 48.01513971490866, 766.7686653715514], 
"eval_len": [112, 618, 50, 359, 36, 636, 470, 167, 79, 1000]}

 49%|████▉     | 489997/1000000 [5:36:20<4:18:13, 32.92it/s]global step 490000, trans_decision ep_re 352.5696880689871

{"global_step": 490000, "eval_re": [878.7557837008138, 138.02614378155755, 
543.8001380729479, 650.8558424546322, 2.7531414913227223, 24.936852704200923, 
328.23554411431724, 581.9840849334646, 60.6910035179731, 315.65834591864035], 
"eval_len": [830, 144, 545, 1000, 23, 29, 268, 1000, 72, 205]}

 50%|████▉     | 499997/1000000 [5:43:10<4:14:45, 32.71it/s]global step 500000, trans_decision ep_re 326.36476682276646

{"global_step": 500000, "eval_re": [208.98298588902708, 698.3749354332726, 
304.2195557916367, 20.030527420815712, 377.40021059466403, 187.58022147654827, 
444.9534341701725, 766.4864136451804, 241.99336546086417, 13.62601834548327], 
"eval_len": [212, 1000, 291, 27, 340, 1000, 470, 702, 194, 23]}

 51%|█████     | 509997/1000000 [5:50:00<4:09:03, 32.79it/s]global step 510000, trans_decision ep_re 350.54793342317936

{"global_step": 510000, "eval_re": [1030.038912754507, 5.424214993381041, 
661.8516400844289, 228.18147161392486, 117.87830679104653, 119.38216823319755, 
348.1670008059868, 124.63749105547389, 394.1393163573515, 475.77881154249485], 
"eval_len": [970, 26, 607, 212, 128, 98, 316, 186, 318, 369]}

 52%|█████▏    | 519997/1000000 [5:56:40<4:03:40, 32.83it/s]global step 520000, trans_decision ep_re 554.005016441891

{"global_step": 520000, "eval_re": [163.1758664006761, 1388.6536339011866, 
374.7763979254657, 862.619600069003, 679.1809380605307, 168.61382863574502, 
386.4318236905013, 957.7416605076509, 358.4948702251283, 200.3615450030227], 
"eval_len": [165, 1000, 287, 599, 598, 191, 385, 733, 333, 244]}

 53%|█████▎    | 529997/1000000 [6:03:30<3:58:26, 32.85it/s]global step 530000, trans_decision ep_re 301.39925360512126

{"global_step": 530000, "eval_re": [45.36138409238429, 433.3406835997481, 
203.47144224140513, 389.1209737366062, 273.73225639855343, 492.74805665075695, 
130.68300783621785, 301.3940622399531, 240.37535714694752, 503.7653121086403], 
"eval_len": [80, 445, 230, 400, 359, 397, 224, 313, 217, 1000]}

 54%|█████▍    | 539997/1000000 [6:10:20<3:54:15, 32.73it/s]global step 540000, trans_decision ep_re 321.5635224201337

{"global_step": 540000, "eval_re": [930.9184743772398, 135.8856032719097, 
81.1456846451201, 271.9822447922182, 162.91922312767284, 72.69644472101946, 
599.03161268752, 56.513371927995216, 42.93171251480108, 861.6108521358408], 
"eval_len": [744, 154, 99, 251, 105, 56, 1000, 68, 59, 771]}

 55%|█████▍    | 549997/1000000 [6:17:10<3:47:40, 32.94it/s]global step 550000, trans_decision ep_re 533.5552075047364

{"global_step": 550000, "eval_re": [176.2156087574652, 23.651668607053015, 
374.30614773979136, 1078.451817147223, 21.16850117177311, 837.2196603625603, 
891.3156599774461, 1054.727643908238, 660.113636557883, 218.38173081792922], 
"eval_len": [185, 23, 324, 1000, 39, 798, 864, 1000, 572, 155]}

 56%|█████▌    | 559997/1000000 [6:24:00<3:43:23, 32.83it/s]global step 560000, trans_decision ep_re 368.3804190149778

{"global_step": 560000, "eval_re": [443.517542615694, 254.25464161031687, 
307.7934576043203, 990.2801400848208, 121.15891785860188, 468.73554842708546, 
56.56530275976188, 184.009476003793, 815.1383659926329, 42.350797192751315], 
"eval_len": [525, 272, 297, 1000, 101, 1000, 76, 156, 1000, 49]}

 57%|█████▋    | 569997/1000000 [6:30:50<3:37:49, 32.90it/s]global step 570000, trans_decision ep_re 277.0380980145092

{"global_step": 570000, "eval_re": [672.0813367498846, 20.457306596189415, 
277.2221241502259, -809.0965727457192, 139.58065619123582, 626.574928068375, 
112.92233665637058, 309.0232835297585, 703.9828690294729, 717.6327119192989], 
"eval_len": [1000, 58, 258, 1000, 122, 1000, 83, 337, 1000, 601]}

 58%|█████▊    | 579997/1000000 [6:37:40<3:32:26, 32.95it/s]global step 580000, trans_decision ep_re 530.5793435294875

{"global_step": 580000, "eval_re": [341.86455376353194, 713.8557287429768, 
220.38305777841043, 126.26543264335497, 535.6635314633609, 652.8617656118553, 
389.1030915178338, 15.118784837257929, 1083.0365097188505, 1227.640979217442], 
"eval_len": [361, 571, 205, 130, 466, 677, 353, 29, 1000, 1000]}

 59%|█████▉    | 589997/1000000 [6:44:30<3:27:30, 32.93it/s]global step 590000, trans_decision ep_re 666.2625019776217

{"global_step": 590000, "eval_re": [1196.5041560850918, 24.748061831048485, 
720.4712437971901, 421.10331365486854, 686.2085638209358, 525.8513040665222, 
733.4042458809698, 760.912542826435, 417.8929591328867, 1175.5286286802673], 
"eval_len": [1000, 40, 1000, 326, 541, 1000, 1000, 685, 399, 1000]}

 60%|█████▉    | 599997/1000000 [6:51:30<3:26:31, 32.28it/s]global step 600000, trans_decision ep_re 544.0027998592898

{"global_step": 600000, "eval_re": [35.51419058056898, 775.9155832193217, 
120.42498446686336, 475.4304862936858, 1116.079133933697, 982.8464491730616, 
591.7691622018464, 1061.5890241757807, 243.4080550213941, 37.05092952667754], 
"eval_len": [54, 668, 137, 423, 1000, 891, 547, 1000, 251, 34]}

 61%|██████    | 609997/1000000 [6:58:20<3:19:44, 32.54it/s]global step 610000, trans_decision ep_re 347.0373224377953

{"global_step": 610000, "eval_re": [484.4000162204709, 163.99356534047183, 
314.0025895037004, 149.21321430265434, 107.34135197348563, 632.0884605215123, 
924.6168301610807, 511.8560571183659, 125.79765175340482, 57.063487482806046], 
"eval_len": [510, 165, 447, 141, 115, 709, 856, 563, 144, 45]}

 62%|██████▏   | 619997/1000000 [7:05:00<3:15:15, 32.44it/s]global step 620000, trans_decision ep_re 519.3420185854891

{"global_step": 620000, "eval_re": [27.734696654395133, 560.6698554585714, 
292.8278352289512, 878.0342284914365, 1040.2937460442465, 18.82881158660969, 
103.26668521535059, 1063.8829299210229, 1059.4364594940612, 148.4449377602472], 
"eval_len": [43, 1000, 312, 910, 1000, 23, 98, 1000, 1000, 172]}

 63%|██████▎   | 629997/1000000 [7:12:00<3:07:45, 32.85it/s]global step 630000, trans_decision ep_re 467.12764945714497

{"global_step": 630000, "eval_re": [158.71983872114936, 663.795094441479, 
562.927496977289, 386.69006132232204, 1021.5095765759081, 382.56028396396044, 
112.71204332907769, 211.51913827533915, 585.9700304766641, 584.8729304882606], 
"eval_len": [177, 1000, 1000, 281, 1000, 313, 138, 221, 491, 1000]}

 64%|██████▍   | 639997/1000000 [7:18:50<3:06:07, 32.24it/s]global step 640000, trans_decision ep_re 301.94413348844887

{"global_step": 640000, "eval_re": [284.39825080308276, 188.2306183472861, 
314.9057395021083, 173.8077158353426, 404.6558196936124, 196.21250329566178, 
447.1893190645073, 93.19494405991978, 560.5298365958827, 356.3165876870847], 
"eval_len": [250, 148, 270, 185, 395, 180, 473, 75, 1000, 281]}

 65%|██████▍   | 649997/1000000 [7:25:40<2:58:02, 32.76it/s]global step 650000, trans_decision ep_re 552.6543976966313

{"global_step": 650000, "eval_re": [204.46538112765407, 194.0240259124353, 
154.882057865825, 628.0205056908648, 1098.8638855156428, 299.62074334688504, 
1206.613194589722, 1136.0906007376418, 301.89158692443266, 302.0719952552096], 
"eval_len": [197, 154, 125, 591, 1000, 201, 1000, 1000, 348, 313]}

 66%|██████▌   | 659997/1000000 [7:32:30<2:53:38, 32.64it/s]global step 660000, trans_decision ep_re 521.119143580465

{"global_step": 660000, "eval_re": [707.8931742038139, 1007.4168256884426, 
645.9951541564703, 320.77272621812733, 19.407409808019676, 1120.8357153850811, 
418.01065759682837, 299.52605797802056, 517.1358785005858, 154.19783626925994], 
"eval_len": [574, 1000, 503, 303, 34, 1000, 431, 374, 523, 151]}

 67%|██████▋   | 669997/1000000 [7:39:20<2:46:51, 32.96it/s]global step 670000, trans_decision ep_re 559.9685530082721

{"global_step": 670000, "eval_re": [539.3926934211771, 378.9883185250576, 
372.19661618221744, 625.4145350761503, 307.9803613680432, 1008.252126036163, 
1079.2824770645336, 794.7835048214044, 202.89302057997517, 290.5018770079988], 
"eval_len": [517, 398, 408, 571, 255, 1000, 1000, 800, 1000, 220]}

 68%|██████▊   | 679997/1000000 [7:46:20<2:44:03, 32.51it/s]global step 680000, trans_decision ep_re 425.98941032828577

{"global_step": 680000, "eval_re": [70.88773532256937, 1037.973293000491, 
21.079210138247095, -432.03978905134124, 226.56639352019317, 1104.8609283516719,
235.1553922572043, 877.0825206374097, 1119.6215781002513, -1.2931589938381989], 
"eval_len": [101, 1000, 26, 1000, 1000, 1000, 233, 1000, 1000, 1000]}

 69%|██████▉   | 689998/1000000 [7:53:10<2:37:21, 32.83it/s]global step 690000, trans_decision ep_re 577.9851757669903

{"global_step": 690000, "eval_re": [46.071759404281195, 1147.2248017120135, 
1165.7778119635152, 1051.9865996057758, 247.8690515355046, 938.6938946688813, 
135.88355429970997, 153.37286893703921, 629.7564975095316, 263.2149180336507], 
"eval_len": [41, 977, 1000, 1000, 326, 888, 109, 115, 500, 198]}

 70%|██████▉   | 699997/1000000 [8:00:10<2:34:44, 32.31it/s]global step 700000, trans_decision ep_re 363.58382305800075

{"global_step": 700000, "eval_re": [413.1467556563536, 165.19261273478634, 
902.8374166502963, 269.7656036250291, 98.91800137243942, 1155.3469153181811, 
101.48522530410393, 276.1152668326201, 116.7754064472845, 136.2550266389124], 
"eval_len": [354, 186, 687, 337, 134, 1000, 160, 194, 115, 132]}

 71%|███████   | 709997/1000000 [8:07:00<2:26:38, 32.96it/s]global step 710000, trans_decision ep_re 510.8821786174649

{"global_step": 710000, "eval_re": [534.8492085603339, 511.8395352484328, 
224.5470023720478, 306.20328699299245, 844.1450512697174, 1052.3311320149726, 
864.1433981915764, 13.354724089215164, 282.19463400577513, 475.2138134295852], 
"eval_len": [588, 577, 238, 295, 1000, 1000, 1000, 55, 322, 1000]}

 72%|███████▏  | 719997/1000000 [8:13:50<2:21:54, 32.88it/s]global step 720000, trans_decision ep_re 532.4816753107895

{"global_step": 720000, "eval_re": [306.5643269919726, 1245.2924070738216, 
1155.6724425434709, 15.190065917976913, 591.1580526047456, 222.40564801268013, 
80.01491552084435, 10.470116350831338, 467.4463881067183, 1230.6023899848333], 
"eval_len": [1000, 1000, 1000, 41, 1000, 211, 97, 31, 382, 993]}

 73%|███████▎  | 729997/1000000 [8:20:40<2:17:12, 32.80it/s]global step 730000, trans_decision ep_re 165.38577893086105

{"global_step": 730000, "eval_re": [652.3617780944847, 9.063836656406435, 
-25.676074823060723, 108.71039609459048, 124.3659209945569, 617.464638946199, 
-9.3739376108877, -363.53017778253883, 307.47386329205716, 232.99754544680326], 
"eval_len": [549, 22, 504, 155, 86, 450, 853, 1000, 220, 206]}

 74%|███████▍  | 739997/1000000 [8:27:30<2:11:43, 32.90it/s]global step 740000, trans_decision ep_re 472.6758244122816

{"global_step": 740000, "eval_re": [178.50658322275135, 94.11135212991414, 
696.4127206154635, 1005.8489310881411, 113.79380057631279, 539.1082195872018, 
15.563068092805805, 1275.4068793404042, 357.80251336069557, 450.20417610912585],
"eval_len": [175, 80, 586, 769, 132, 426, 23, 1000, 340, 384]}

 75%|███████▍  | 749997/1000000 [8:34:20<2:08:36, 32.40it/s]global step 750000, trans_decision ep_re 345.0156101779497

{"global_step": 750000, "eval_re": [333.59045221147846, 6.702244590417905, 
192.3389772156171, 112.08379769612527, 229.38872130771827, 446.10664335234526, 
598.3677497522658, 23.101596518386607, 1072.1577216356447, 436.31819749949784], 
"eval_len": [385, 22, 147, 120, 211, 388, 1000, 41, 969, 425]}

 76%|███████▌  | 759997/1000000 [8:41:10<2:01:17, 32.98it/s]global step 760000, trans_decision ep_re 298.39202970534814

{"global_step": 760000, "eval_re": [1044.2070346738833, 1249.0525776948086, 
236.33911869176202, -470.72740110145014, 409.49787522457535, 11.674400945043791,
-286.0652083462412, 585.7609790629093, 35.09187276857975, 169.08904743961102], 
"eval_len": [876, 954, 357, 1000, 374, 22, 1000, 1000, 41, 126]}

 77%|███████▋  | 769997/1000000 [8:48:00<1:56:31, 32.90it/s]global step 770000, trans_decision ep_re 407.3267551491773

{"global_step": 770000, "eval_re": [126.0776103153767, 369.5078259686637, 
239.5925576711583, 651.6182844490846, 166.2266900211199, 42.99791457193182, 
1209.9032954907425, 643.0917311885071, 180.65679437125394, 443.5948474439347], 
"eval_len": [122, 338, 169, 673, 158, 82, 1000, 1000, 224, 500]}

 78%|███████▊  | 779997/1000000 [8:54:50<1:51:06, 33.00it/s]global step 780000, trans_decision ep_re 494.07463162708353

{"global_step": 780000, "eval_re": [795.9034386427207, 309.4129701880835, 
587.1390053259488, 507.305769576676, 144.2514028036778, 878.7317844667431, 
438.9672841928382, 250.39737186109326, 359.061485114112, 669.5758040989426], 
"eval_len": [1000, 236, 422, 431, 163, 787, 420, 247, 215, 584]}

 79%|███████▉  | 789997/1000000 [9:01:30<1:45:44, 33.10it/s]global step 790000, trans_decision ep_re 687.8575883559433

{"global_step": 790000, "eval_re": [24.8737862163138, 327.77301227568523, 
637.0143393861434, 1132.646941409713, 700.6332557786378, 466.8306762652879, 
977.1310000240978, 278.5342310327174, 1045.6408924389184, 1287.4977487319197], 
"eval_len": [39, 214, 543, 924, 577, 422, 720, 272, 887, 1000]}

 80%|███████▉  | 799997/1000000 [9:08:30<1:41:06, 32.97it/s]global step 800000, trans_decision ep_re 411.742919913645

{"global_step": 800000, "eval_re": [139.75162024596483, 814.441514410989, 
131.97144262750965, 538.7642965846231, 265.5807894017052, 439.3450558655528, 
445.67201805354324, 1130.5283554486373, 187.2358795770612, 24.13822692086375], 
"eval_len": [147, 1000, 117, 553, 361, 529, 396, 1000, 214, 28]}

 81%|████████  | 809997/1000000 [9:15:10<1:36:09, 32.93it/s]global step 810000, trans_decision ep_re 411.1609127085982

{"global_step": 810000, "eval_re": [290.98667652316067, 307.2425843985213, 
535.5431196674925, 746.3837486026275, 19.68163359054717, 68.76186200963288, 
519.4528187231444, 1106.7984934256633, 467.7295036368968, 49.028686508294605], 
"eval_len": [276, 290, 1000, 585, 27, 55, 1000, 1000, 473, 61]}

 82%|████████▏ | 819997/1000000 [9:22:00<1:30:57, 32.99it/s]global step 820000, trans_decision ep_re 509.20631618486567

{"global_step": 820000, "eval_re": [250.7252526129664, 238.23443253624322, 
690.8396109423536, 218.743981189059, 61.295050085570736, 922.8414894830684, 
651.4712133507005, 125.10043314940803, 936.6453495988113, 996.1663489004754], 
"eval_len": [241, 272, 578, 257, 64, 1000, 689, 166, 829, 985]}

 83%|████████▎ | 829997/1000000 [9:28:50<1:25:59, 32.95it/s]global step 830000, trans_decision ep_re 691.5673269738465

{"global_step": 830000, "eval_re": [431.18565883292524, 516.3050220944955, 
941.3996672644555, 1058.309463881587, 477.3284333322378, 422.42747294361817, 
1010.8832036838849, 688.0851130132787, 947.6891266704984, 422.0601080214837], 
"eval_len": [1000, 514, 1000, 1000, 508, 572, 1000, 1000, 1000, 451]}

 84%|████████▍ | 839997/1000000 [9:35:50<1:20:48, 33.00it/s]global step 840000, trans_decision ep_re 645.2206682193103

{"global_step": 840000, "eval_re": [989.5512027851112, 383.48291417537064, 
12.756295427062906, 879.6517503071778, 1175.1324884514163, 496.69936008982586, 
402.05381798607914, 285.991908875025, 847.0458699879252, 979.8410741081092], 
"eval_len": [1000, 1000, 35, 726, 1000, 1000, 319, 315, 724, 1000]}

 85%|████████▍ | 849997/1000000 [9:42:50<1:16:05, 32.85it/s]global step 850000, trans_decision ep_re 325.9657201617856

{"global_step": 850000, "eval_re": [135.07660668315648, 3.8705872597890254, 
502.74290767489964, 37.80655075206059, 59.12766381127631, 168.31837525876836, 
490.4672533422088, 413.8843721411934, 1135.5898827874278, 312.7730019070757], 
"eval_len": [151, 34, 1000, 77, 87, 121, 1000, 1000, 1000, 301]}

 86%|████████▌ | 859997/1000000 [9:49:40<1:11:07, 32.80it/s]global step 860000, trans_decision ep_re 232.01702394201442

{"global_step": 860000, "eval_re": [533.0488015242651, 362.4244909389022, 
27.272740866458008, 419.6803323205362, 105.00007557913452, 164.94837405205894, 
21.407956068378184, 51.82082544620764, 514.201255632332, 120.36538699187167], 
"eval_len": [475, 284, 31, 1000, 75, 1000, 35, 46, 428, 113]}

 87%|████████▋ | 869997/1000000 [9:56:20<1:05:34, 33.04it/s]global step 870000, trans_decision ep_re 518.4484205183271

{"global_step": 870000, "eval_re": [221.18514898674215, 261.98553779897304, 
625.6929170456141, 230.58536466872795, 419.2286060367296, 823.532868532577, 
1027.8740738987963, 742.621478089409, 322.0004103246517, 509.77779980105004], 
"eval_len": [234, 197, 535, 213, 443, 648, 1000, 1000, 326, 500]}

 88%|████████▊ | 879997/1000000 [10:03:10<1:00:54, 32.84it/s]global step 880000, trans_decision ep_re 709.8220165725548

{"global_step": 880000, "eval_re": [825.7021358069688, 665.907300574199, 
17.822836933081255, 918.4093016215804, 563.1750115451041, 860.4998796129643, 
784.5840714682828, 938.2545149919786, 718.6553584337769, 805.2097547376115], 
"eval_len": [1000, 697, 28, 1000, 1000, 1000, 1000, 1000, 1000, 985]}

 89%|████████▉ | 889997/1000000 [10:10:10<55:42, 32.91it/s]global step 890000, trans_decision ep_re 293.2378501901791

{"global_step": 890000, "eval_re": [85.86752383014412, 143.78640030719686, 
233.69436586686712, 542.6172679058276, 294.2771779498736, 587.0979108589084, 
30.046147687978912, 665.1372256569156, 304.4265998304008, 45.427882007678164], 
"eval_len": [67, 168, 169, 454, 252, 583, 40, 1000, 280, 60]}

 90%|████████▉ | 899997/1000000 [10:16:50<50:28, 33.02it/s]global step 900000, trans_decision ep_re 571.3467962790173

{"global_step": 900000, "eval_re": [486.5258218073591, 821.236078330731, 
728.043285254059, 695.5596280161246, 943.3682027013482, 468.8325028803487, 
295.68913151356776, 627.9864329409576, 548.114901479008, 98.11197786666902], 
"eval_len": [512, 1000, 1000, 1000, 1000, 432, 386, 1000, 641, 97]}

 91%|█████████ | 909998/1000000 [10:23:50<45:34, 32.91it/s]global step 910000, trans_decision ep_re 534.2547768541892

{"global_step": 910000, "eval_re": [901.6320624181201, 568.9999243465862, 
331.40409390556016, 582.5468734314277, 1038.8060826689687, 633.4321554323973, 
188.82331250051072, 197.86990704418594, 744.3818123034184, 154.65154449071943], 
"eval_len": [864, 1000, 320, 541, 1000, 1000, 184, 129, 560, 139]}

 92%|█████████▏| 919997/1000000 [10:30:40<40:31, 32.90it/s]global step 920000, trans_decision ep_re 437.7919925398005

{"global_step": 920000, "eval_re": [557.8574687386671, 962.8567129080133, 
476.0166194276792, 341.16504519238595, 595.2211233282206, 102.1852990585982, 
660.3326914289265, 190.86686878215227, 309.7340440273119, 181.68405250604943], 
"eval_len": [621, 1000, 437, 355, 685, 66, 652, 222, 277, 159]}

 93%|█████████▎| 929997/1000000 [10:37:30<35:20, 33.01it/s]global step 930000, trans_decision ep_re 460.7706758918213

{"global_step": 930000, "eval_re": [254.78717977374296, 354.6161774332997, 
629.3607794140611, -343.40264801842164, 765.549708230063, 129.30856534253053, 
1253.8350156693996, 458.99029166977454, 260.526463413599, 844.1352259901655], 
"eval_len": [295, 305, 508, 1000, 729, 114, 1000, 519, 259, 1000]}

 94%|█████████▍| 939997/1000000 [10:44:20<30:23, 32.90it/s]global step 940000, trans_decision ep_re 483.6570726641561

{"global_step": 940000, "eval_re": [761.717715003522, 140.18885817863801, 
171.96819991283903, 743.0604480011775, 760.0235060142396, 34.98164704742612, 
905.4376240871926, 396.2170860375513, 142.48350550089253, 780.4921368580826], 
"eval_len": [1000, 165, 180, 1000, 1000, 63, 1000, 543, 183, 1000]}

 95%|█████████▍| 949997/1000000 [10:51:10<25:17, 32.95it/s]global step 950000, trans_decision ep_re 560.6329089822508

{"global_step": 950000, "eval_re": [643.6829350174687, 126.47144717213645, 
903.8665188600881, 805.9443459309493, 1100.369645701356, 369.970170799626, 
462.5055585130713, 27.406300606568145, 175.30190714586072, 990.8102600753814], 
"eval_len": [694, 139, 1000, 1000, 1000, 346, 491, 39, 182, 1000]}

 96%|█████████▌| 959997/1000000 [10:58:10<20:18, 32.84it/s]global step 960000, trans_decision ep_re 363.70793400299533

{"global_step": 960000, "eval_re": [694.6055265309196, 1048.081000763948, 
1019.2337604077251, -340.8218930092206, 360.9064006153407, 310.5981753549772, 
56.75775419685595, 22.423123464020406, 441.9055619080372, 23.389929797349797], 
"eval_len": [611, 1000, 884, 1000, 338, 218, 57, 24, 346, 34]}

 97%|█████████▋| 969997/1000000 [11:04:50<15:13, 32.86it/s]global step 970000, trans_decision ep_re 593.2075560180124

{"global_step": 970000, "eval_re": [1228.1137045859884, 1116.1399770967519, 
299.5693305474562, 82.40144689119415, 802.9861511755851, -30.172185084265326, 
347.1802491889872, 302.24522067359226, 996.4978182938905, 787.1138468109435], 
"eval_len": [1000, 1000, 292, 90, 669, 47, 283, 295, 736, 1000]}

 98%|█████████▊| 979997/1000000 [11:11:31<10:06, 32.97it/s]global step 980000, trans_decision ep_re 294.4421169423862

{"global_step": 980000, "eval_re": [153.40936044414403, 124.52196416865011, 
286.19052816338984, 459.2386106218385, 256.419384743089, 582.8811735639384, 
214.20865372920068, -1.511407216078051, 233.58753367416602, 635.4753675315233], 
"eval_len": [126, 143, 260, 495, 189, 1000, 260, 24, 287, 472]}

 99%|█████████▉| 989996/1000000 [11:18:30<05:03, 32.91it/s]global step 990000, trans_decision ep_re 505.54043593699555

{"global_step": 990000, "eval_re": [152.9910552291885, 176.72481733225152, 
969.0917461937994, 42.58391680325255, 16.44830258153754, 755.9839401632983, 
855.7562598583187, 1388.1039442729814, 345.88870626223627, 351.831670673091], 
"eval_len": [109, 125, 807, 37, 35, 654, 635, 1000, 300, 320]}

100%|█████████▉| 999997/1000000 [11:25:20<00:00, 32.96it/s]global step 1000000, trans_decision ep_re 596.9119895170234

{"global_step": 1000000, "eval_re": [690.3289636233403, 362.49076589997827, 
1044.2765057930676, 976.0942215960183, 364.5918443128382, 605.53231084546, 
580.6163306087161, 526.2723022694296, 712.6296298280939, 106.28702039329055], 
"eval_len": [825, 341, 1000, 1000, 330, 500, 643, 1000, 1000, 85]}

100%|██████████| 1000000/1000000 [11:25:32<00:00, 24.31it/s]
