
{
    'exp_name': 'VDPO',
    'env': 'Walker2d-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)'
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9996/1000000 [03:10<6:20:25, 43.37it/s]global step 10000, trans_decision ep_re 241.16715377326167

{"global_step": 10000, "eval_re": [246.8555617379026, 258.23613695616757, 
224.41111921823855, 271.5787435441889, 349.1129042776968, 234.91898672059736, 
123.20777410338059, 216.10240059763075, 284.8595499911638, 202.38836058565013], 
"eval_len": [129, 147, 121, 146, 206, 139, 93, 113, 142, 103]}

  2%|▏         | 19999/1000000 [08:52<8:13:37, 33.09it/s]global step 20000, trans_decision ep_re 130.1232282451863

{"global_step": 20000, "eval_re": [245.59028881835744, 44.54549189239406, 
49.8180221535043, 238.19816309990966, 274.7106756757271, 42.74585979704643, 
46.29311507924193, 42.09284913521708, 277.37638134063906, 39.86143545982612], 
"eval_len": [145, 57, 48, 109, 161, 44, 47, 52, 178, 46]}

  3%|▎         | 29998/1000000 [14:42<9:05:09, 29.66it/s]global step 30000, trans_decision ep_re 204.62925646079856

{"global_step": 30000, "eval_re": [248.81172167271328, 213.98452339566603, 
128.66707938488463, 115.8746586555572, 221.02520375289456, 229.0106920777531, 
168.79104530500166, 278.88486840503407, 238.14421901181564, 203.0985529466654], 
"eval_len": [129, 114, 86, 88, 134, 122, 99, 155, 134, 121]}

  4%|▍         | 39999/1000000 [20:47<7:37:57, 34.94it/s]global step 40000, trans_decision ep_re 137.45337213841822

{"global_step": 40000, "eval_re": [92.1888184438672, 59.272058997292795, 
320.87725533197334, 210.08946884332528, 85.09208353444488, 19.722940736302995, 
81.37739623262227, 187.46170506087523, 233.6877684151514, 84.76422578832702], 
"eval_len": [83, 51, 168, 138, 85, 29, 88, 123, 129, 114]}

  5%|▍         | 49998/1000000 [26:28<6:18:04, 41.88it/s]global step 50000, trans_decision ep_re 311.35110645929257

{"global_step": 50000, "eval_re": [269.9476989949266, 231.56152620912252, 
372.9572145489417, 443.7183515650266, 325.1543046964503, 523.4780714020881, 
318.5369488130474, 469.5387823688129, 66.82855023924498, 91.78961575526506], 
"eval_len": [149, 145, 181, 213, 161, 234, 187, 265, 53, 97]}

  6%|▌         | 59999/1000000 [32:23<5:55:13, 44.10it/s]global step 60000, trans_decision ep_re 213.4917390105391

{"global_step": 60000, "eval_re": [37.387800215976846, 43.41140622263809, 
80.5793872367569, 37.4267290758293, 485.83040926625847, 36.41216322225445, 
41.97236539768451, 561.4648654628503, 425.76746368960374, 384.66480031553834], 
"eval_len": [42, 44, 78, 40, 233, 42, 43, 226, 271, 231]}

  7%|▋         | 69999/1000000 [38:06<6:09:52, 41.91it/s]global step 70000, trans_decision ep_re 161.01454494551677

{"global_step": 70000, "eval_re": [257.7183397307771, 70.69925218985469, 
306.29090257410206, 81.16075929095192, 358.0573563324067, 105.23092098365503, 
82.51046486971431, 47.75736618888383, 104.49042256065478, 196.22966473416736], 
"eval_len": [136, 84, 124, 76, 159, 93, 77, 72, 88, 99]}

  8%|▊         | 79999/1000000 [43:53<7:45:36, 32.93it/s]global step 80000, trans_decision ep_re 287.26113058073383

{"global_step": 80000, "eval_re": [253.39305604263822, 87.68100360208167, 
42.49549987547921, 1078.6616803687834, 101.50169014464782, 511.50183152475375, 
296.5507030002569, 183.0685245893261, 155.9682847485002, 161.7890319108712], 
"eval_len": [158, 84, 41, 421, 109, 228, 141, 102, 150, 113]}

  9%|▉         | 89999/1000000 [49:45<7:22:08, 34.30it/s]global step 90000, trans_decision ep_re 262.5335271238744

{"global_step": 90000, "eval_re": [183.57914413571817, 191.51181434417202, 
147.23038140754755, 547.6120194442387, 517.9477832095399, 125.07038688116724, 
150.79522210531962, 152.51386601920166, 238.1285693837936, 370.9460843080456], 
"eval_len": [104, 106, 92, 223, 196, 92, 113, 112, 154, 197]}

 10%|▉         | 99997/1000000 [55:42<5:40:03, 44.11it/s]global step 100000, trans_decision ep_re 377.3639467282493

{"global_step": 100000, "eval_re": [209.0463709437688, 350.2610143327894, 
913.1942533832176, 220.0935324833686, 489.7812255693358, 204.14771080947597, 
569.6315183776885, 153.43284414787794, 451.8450203314741, 212.2059769034964], 
"eval_len": [143, 171, 334, 135, 253, 136, 262, 110, 177, 142]}

 11%|█         | 109996/1000000 [1:01:17<7:18:42, 33.81it/s]global step 110000, trans_decision ep_re 205.32519744025353

{"global_step": 110000, "eval_re": [351.8420516377325, 116.09333369232542, 
523.5770649570487, 116.23438941349318, 127.19723219639162, 191.1931978697975, 
130.86196491796002, 140.89499515573226, 137.2012924364787, 218.15645212557536], 
"eval_len": [160, 87, 262, 97, 89, 131, 93, 127, 113, 116]}

 12%|█▏        | 119997/1000000 [1:07:32<6:09:00, 39.75it/s]global step 120000, trans_decision ep_re 507.4062567464195

{"global_step": 120000, "eval_re": [161.53352808480005, 599.3974802771058, 
192.43810382426744, 401.84348143489484, 214.39603144011485, 559.456127591692, 
554.1393971041562, 1139.772655335327, 324.4529232191182, 926.6328391527186], 
"eval_len": [104, 248, 119, 189, 122, 246, 213, 465, 162, 355]}

 13%|█▎        | 129998/1000000 [1:13:22<5:43:15, 42.24it/s]global step 130000, trans_decision ep_re 512.1485517525646

{"global_step": 130000, "eval_re": [342.36502039569064, 196.41436087472727, 
822.5458318342552, 499.7654710341816, 584.1318738126643, 171.4814503522399, 
805.0932487855691, 323.5058935917121, 131.86620607239155, 1244.3161607722138], 
"eval_len": [168, 119, 344, 274, 277, 111, 325, 189, 101, 459]}

 14%|█▍        | 139995/1000000 [1:19:08<5:43:27, 41.73it/s]global step 140000, trans_decision ep_re 183.131131542897

{"global_step": 140000, "eval_re": [81.35618458179599, 704.0327192730476, 
65.03857146945487, 40.13884950220653, 44.650721701500686, 32.553397048868234, 
708.5342170908408, 72.61505938200192, 48.07784563080123, 34.31374974845222], 
"eval_len": [78, 229, 66, 40, 46, 38, 248, 73, 42, 39]}

 15%|█▍        | 149999/1000000 [1:25:12<6:59:13, 33.79it/s]global step 150000, trans_decision ep_re 913.7929122116171

{"global_step": 150000, "eval_re": [1864.842566146292, 679.0190699079311, 
397.68555946786836, 784.776109521529, 690.317383775616, 1907.536530221652, 
605.4904134355676, 779.6094498409848, 553.2345844857381, 875.417455312993], 
"eval_len": [654, 286, 169, 337, 251, 563, 249, 359, 200, 270]}

 16%|█▌        | 159998/1000000 [1:30:47<7:07:21, 32.76it/s]global step 160000, trans_decision ep_re 265.2553907890203

{"global_step": 160000, "eval_re": [28.927293127216817, 22.890527637386153, 
27.559585129712385, 795.1939922808161, 750.8179569619631, 114.34607258946247, 
30.159694843003386, 491.8526647015843, 26.5106242006515, 364.2954964184067], 
"eval_len": [36, 35, 37, 372, 258, 84, 37, 214, 35, 244]}

 17%|█▋        | 169995/1000000 [1:36:26<5:30:57, 41.80it/s]global step 170000, trans_decision ep_re 443.4311644359456

{"global_step": 170000, "eval_re": [88.54948916987246, 460.0964692050856, 
541.0055783076407, 439.4764787519314, 58.67968014933634, 1202.4529433043306, 
592.6621423244297, 142.5972608808152, 514.4231829787032, 394.36841928731127], 
"eval_len": [75, 179, 202, 185, 57, 381, 220, 82, 185, 158]}

 18%|█▊        | 179995/1000000 [1:42:32<5:43:03, 39.84it/s]global step 180000, trans_decision ep_re 546.7611176209276

{"global_step": 180000, "eval_re": [89.76790858700771, 551.8937808064046, 
182.15158769430158, 242.51298453878152, 434.49593943147795, 635.3307813015967, 
433.44251606438473, 1929.308206814058, 370.22746397599786, 598.4800069952656], 
"eval_len": [85, 235, 123, 142, 216, 218, 176, 525, 225, 241]}

 19%|█▉        | 189997/1000000 [1:48:14<6:58:14, 32.28it/s]global step 190000, trans_decision ep_re 695.3847999536657

{"global_step": 190000, "eval_re": [767.8386004513478, 1153.267869829836, 
193.3262185500545, 1464.885069138278, 571.7625269843035, 649.0151357618356, 
53.36687096382408, 598.0791970032108, 471.2816506722717, 1031.024860181695], 
"eval_len": [338, 429, 113, 412, 207, 221, 54, 224, 194, 314]}

 20%|█▉        | 199995/1000000 [1:54:03<5:06:10, 43.55it/s]global step 200000, trans_decision ep_re 616.018520777118

{"global_step": 200000, "eval_re": [135.66564592357645, 720.8985528537626, 
1106.459665099412, 101.54348247821456, 86.78924620511849, 912.4136173912202, 
93.36587853306003, 1022.1226492591377, 995.5925086087844, 985.3339614188934], 
"eval_len": [98, 242, 310, 84, 75, 305, 77, 290, 301, 281]}

 21%|██        | 209999/1000000 [1:59:55<6:23:25, 34.34it/s]global step 210000, trans_decision ep_re 403.36274010017365

{"global_step": 210000, "eval_re": [304.2169220456425, 107.37203755273947, 
804.8382833312908, 416.47237223920644, 198.3526571137429, 143.62250559558873, 
111.17864820254512, 1107.9170411096482, 323.29856151389015, 516.3583722974422], 
"eval_len": [142, 84, 287, 182, 116, 95, 75, 325, 165, 210]}

 22%|██▏       | 219997/1000000 [2:06:02<5:27:33, 39.69it/s]global step 220000, trans_decision ep_re 585.5053012279278

{"global_step": 220000, "eval_re": [257.66001329568536, 1146.1451592631154, 
86.30151803364897, 462.377950943503, 102.31069593810457, 189.45064040920394, 
232.16099215965212, 940.0331878866066, 1791.450800799902, 647.1620535498558], 
"eval_len": [147, 376, 70, 197, 87, 116, 148, 299, 497, 225]}

 23%|██▎       | 229999/1000000 [2:11:42<4:56:55, 43.22it/s]global step 230000, trans_decision ep_re 293.90835075487115

{"global_step": 230000, "eval_re": [593.2464064288371, 274.67262898577536, 
92.91435953164684, 76.06310744482217, 316.89420501147976, 217.53462472068702, 
116.3733860107444, 84.73163150535684, 820.6878132030871, 345.9653447062752], 
"eval_len": [254, 131, 84, 77, 161, 149, 93, 77, 370, 211]}

 24%|██▍       | 239998/1000000 [2:17:16<5:13:57, 40.34it/s]global step 240000, trans_decision ep_re 449.14974597217326

{"global_step": 240000, "eval_re": [364.59931597037837, 876.1183691959748, 
208.46587793878757, 904.1766763988212, 1370.2956366032856, 133.65862564284578, 
241.15478468073627, 121.25234671275891, 123.80430610321021, 147.9715204749335], 
"eval_len": [140, 299, 107, 264, 408, 86, 122, 85, 82, 87]}

 25%|██▍       | 249997/1000000 [2:23:02<4:49:20, 43.20it/s]global step 250000, trans_decision ep_re 797.0989561557344

{"global_step": 250000, "eval_re": [1137.8384878227064, 808.3107838550392, 
442.3420790582713, 2408.6953760027013, 1709.2117524780167, 156.19922357138532, 
177.54803057806916, 316.4367400735143, 255.61960365003907, 558.7874844676011], 
"eval_len": [322, 247, 176, 570, 464, 102, 106, 141, 125, 192]}

 26%|██▌       | 259999/1000000 [2:28:33<4:42:01, 43.73it/s]global step 260000, trans_decision ep_re 923.1801655502729

{"global_step": 260000, "eval_re": [1433.3446251560283, 1516.263815709385, 
120.23918236939029, 445.93731947439926, 126.48962105862151, 628.842117979493, 
1926.1162743939994, 1787.5962599705044, 1127.3505970145227, 119.62184237638503],
"eval_len": [395, 401, 91, 197, 85, 236, 496, 508, 306, 84]}

 27%|██▋       | 269997/1000000 [2:34:07<5:06:51, 39.65it/s]global step 270000, trans_decision ep_re 304.5303516296402

{"global_step": 270000, "eval_re": [187.5330257798564, 797.0625939718636, 
179.9178616228977, 115.13193148657784, 221.62375640850203, 667.2511229056092, 
184.3618733061865, 428.8713756336622, 133.77083263640475, 129.77914254484213], 
"eval_len": [161, 291, 158, 120, 154, 273, 145, 203, 122, 133]}

 28%|██▊       | 279998/1000000 [2:39:53<4:43:05, 42.39it/s]global step 280000, trans_decision ep_re 588.4676572576174

{"global_step": 280000, "eval_re": [3657.8376827829834, 114.3527425356583, 
138.76936763473446, 97.27594806028884, 305.5156587651353, 849.2591301459388, 
238.87435657520803, 98.90879775436002, 205.39680071139918, 178.4860876104668], 
"eval_len": [850, 100, 79, 88, 185, 332, 154, 90, 110, 135]}

 29%|██▉       | 289997/1000000 [2:44:56<4:55:08, 40.09it/s]global step 290000, trans_decision ep_re 533.8186934538282

{"global_step": 290000, "eval_re": [506.26475598870337, 116.2068200768322, 
738.2189952981645, 3218.8976840853584, 106.2318812555625, 108.18155451319231, 
104.03719893997602, 94.64573674896357, 230.16819410895616, 115.33411352257357], 
"eval_len": [199, 88, 244, 881, 83, 85, 82, 75, 203, 87]}

 30%|██▉       | 299997/1000000 [2:50:15<4:25:40, 43.91it/s]global step 300000, trans_decision ep_re 601.3745553681168

{"global_step": 300000, "eval_re": [70.91232829721163, 295.63568271835936, 
655.0732506309231, 74.658167163652, 79.83571588506753, 2960.351903804403, 
725.404648032932, 702.6628419373104, 328.44221940264913, 120.76879580866053], 
"eval_len": [72, 168, 210, 71, 73, 745, 228, 219, 178, 91]}

 31%|███       | 309997/1000000 [2:55:34<4:22:27, 43.82it/s]global step 310000, trans_decision ep_re 194.38732174966765

{"global_step": 310000, "eval_re": [107.75349903186115, 137.95127856621437, 
107.34084992947481, 166.2663414479961, 147.32123363636632, 154.3586534759734, 
105.23933859109937, 120.90850032352832, 145.06937294388328, 751.6641495502793], 
"eval_len": [89, 113, 85, 95, 104, 94, 82, 85, 134, 281]}

 32%|███▏      | 319998/1000000 [3:01:14<5:29:25, 34.40it/s]global step 320000, trans_decision ep_re 979.3716137916048

{"global_step": 320000, "eval_re": [1044.9905863831682, 1043.155261437411, 
147.2653259416107, 1455.4755222138133, 985.5006402462502, 971.4795670245805, 
827.5172695070717, 2015.9743928780483, 594.7297482822422, 707.6278240018535], 
"eval_len": [322, 286, 102, 424, 292, 296, 260, 489, 236, 256]}

 33%|███▎      | 329997/1000000 [3:07:04<4:27:03, 41.81it/s]global step 330000, trans_decision ep_re 1112.853761436938

{"global_step": 330000, "eval_re": [444.8135486284568, 2298.294179544553, 
1406.5887038468554, 2414.618527705356, 349.2347774554784, 228.59414822624618, 
534.2888328167974, 566.3166126664034, 722.0326525787956, 2163.7556309004376], 
"eval_len": [190, 552, 386, 628, 169, 113, 186, 187, 236, 550]}

 34%|███▍      | 339999/1000000 [3:13:04<4:36:36, 39.77it/s]global step 340000, trans_decision ep_re 949.7822001128994

{"global_step": 340000, "eval_re": [357.4159917263015, 282.0222372442852, 
1789.604182087268, 129.0202535850639, 1129.99192711474, 620.0041189226986, 
273.9444214009828, 1629.463457000111, 2599.7724772363945, 686.5829348111499], 
"eval_len": [199, 172, 435, 85, 329, 299, 221, 450, 658, 313]}

 35%|███▍      | 349998/1000000 [3:18:44<5:28:53, 32.94it/s]global step 350000, trans_decision ep_re 1273.881577866118

{"global_step": 350000, "eval_re": [127.37328694568379, 1184.0497138640726, 
95.85229707529308, 560.2845477065922, 1989.9785521837634, 393.4168728721293, 
2841.239981124017, 374.3287510003106, 3081.609171388861, 2090.682604500459], 
"eval_len": [93, 308, 73, 190, 452, 171, 697, 176, 700, 489]}

 36%|███▌      | 359999/1000000 [3:23:56<4:02:43, 43.94it/s]global step 360000, trans_decision ep_re 731.243300279594

{"global_step": 360000, "eval_re": [436.22700520867454, 384.4894546554317, 
147.66275943591796, 174.95986920700426, 1564.050159168737, 1350.118233334652, 
1070.6871773981923, 256.50005742021335, 871.5551645598022, 1056.1831224073148], 
"eval_len": [175, 152, 94, 111, 385, 353, 306, 127, 263, 308]}

 37%|███▋      | 369998/1000000 [3:29:07<3:57:13, 44.26it/s]global step 370000, trans_decision ep_re 1132.825744966488

{"global_step": 370000, "eval_re": [233.63154244939167, 246.27583186866508, 
2217.9679678513107, 208.9267061068889, 1316.4156441286393, 2979.6565475132275, 
1575.5581261598134, 1607.9850066973768, 158.24311293979514, 783.5969639497681], 
"eval_len": [115, 126, 664, 111, 369, 728, 401, 439, 92, 240]}

 38%|███▊      | 379998/1000000 [3:34:20<3:52:52, 44.37it/s]global step 380000, trans_decision ep_re 700.8725966769457

{"global_step": 380000, "eval_re": [1501.5145844191104, 1062.1635034659826, 
1378.927406127267, 203.45161362039576, 130.7542251909778, 125.30762961350048, 
228.87960016778084, 191.5045713790809, 1248.4468649927887, 937.7759677925726], 
"eval_len": [395, 276, 346, 107, 82, 85, 114, 107, 313, 286]}

 39%|███▉      | 389998/1000000 [3:39:45<3:49:45, 44.25it/s]global step 390000, trans_decision ep_re 2144.2465927696535

{"global_step": 390000, "eval_re": [98.26337926619664, 1460.322688507602, 
1511.6805818345265, 309.9896857392169, 2222.3781599367985, 4917.733275296508, 
3815.078888458682, 4687.935236370143, 1461.4211183758425, 957.6629139110213], 
"eval_len": [74, 366, 373, 144, 540, 1000, 832, 1000, 350, 301]}

 40%|███▉      | 399995/1000000 [3:44:47<3:47:50, 43.89it/s]global step 400000, trans_decision ep_re 531.9835937627046

{"global_step": 400000, "eval_re": [32.16702298065123, 2262.514289037587, 
1043.0105588956303, 44.615948680513846, 34.1183188969459, 33.18872505207733, 
35.44726972984641, 32.960925635068904, 1612.4676913237138, 189.34518739501073], 
"eval_len": [41, 542, 357, 50, 42, 44, 45, 40, 387, 101]}

 41%|████      | 409998/1000000 [3:49:56<3:41:56, 44.31it/s]global step 410000, trans_decision ep_re 869.9497983027434

{"global_step": 410000, "eval_re": [996.7573739394255, 1616.3573333025956, 
277.20345139694507, 1267.4473987023198, 39.32006633596598, 1323.8599406760486, 
1658.185835198101, 8.839695783926437, 833.6612818162108, 677.8656058758937], 
"eval_len": [263, 393, 127, 333, 44, 337, 381, 22, 238, 225]}

 42%|████▏     | 419998/1000000 [3:55:05<3:36:55, 44.56it/s]global step 420000, trans_decision ep_re 849.5428076830678

{"global_step": 420000, "eval_re": [36.04656760490462, 42.83149930792637, 
1260.9381049619444, 472.5017417356901, 41.644419494361564, 755.9791771154523, 
41.702556497236834, 3990.4739046430172, 798.1230819166909, 1055.1870235534525], 
"eval_len": [47, 50, 384, 186, 50, 252, 50, 931, 253, 273]}

 43%|████▎     | 429998/1000000 [4:00:25<3:33:19, 44.53it/s]global step 430000, trans_decision ep_re 956.9950657670782

{"global_step": 430000, "eval_re": [47.87358474049905, 1366.6975883285772, 
2223.127014116654, 36.10171475831415, 46.36259059394552, 575.2677978687439, 
1144.8921017673838, 50.0692483748531, 4021.2223412217586, 58.33667590005239], 
"eval_len": [51, 380, 519, 45, 51, 209, 290, 53, 977, 59]}

 44%|████▍     | 439999/1000000 [4:05:25<3:33:01, 43.81it/s]global step 440000, trans_decision ep_re 849.2365741872757

{"global_step": 440000, "eval_re": [806.0830261938044, 36.22460325950152, 
2876.139939008427, 937.892902308, 55.81806533254389, 462.3978204275533, 
1853.713825357405, 43.510452641315936, 1350.8327108183773, 69.75239652582773], 
"eval_len": [234, 40, 687, 250, 60, 174, 482, 42, 361, 72]}

 45%|████▍     | 449999/1000000 [4:10:35<3:30:08, 43.62it/s]global step 450000, trans_decision ep_re 738.9861987431448

{"global_step": 450000, "eval_re": [102.39049826315288, 405.3846326383471, 
432.1578721829592, 100.83797184451934, 158.6264308496127, 187.73573161941698, 
1371.893515680891, 250.96095520655365, 3501.3480382536677, 878.5263408923262], 
"eval_len": [78, 176, 157, 70, 95, 100, 354, 140, 811, 262]}

 46%|████▌     | 459999/1000000 [4:15:45<3:25:29, 43.80it/s]global step 460000, trans_decision ep_re 805.4247205882689

{"global_step": 460000, "eval_re": [37.915314838416045, 2826.4103638092383, 
1365.6482842337782, 344.78411626054753, 561.9834685379909, 556.1846328568136, 
568.4153998974102, 490.35795201453277, 982.931518484898, 319.6161549490629], 
"eval_len": [43, 646, 381, 135, 210, 200, 196, 171, 293, 144]}

 47%|████▋     | 469996/1000000 [4:20:59<3:19:07, 44.36it/s]global step 470000, trans_decision ep_re 779.1926642432561

{"global_step": 470000, "eval_re": [1510.1808147729516, 211.0826886793427, 
507.93316054596596, 1165.5883183426638, 153.6256171147116, 138.4087127082161, 
1172.6153539037225, 722.6547216337584, 1020.048447718965, 1189.7888070122626], 
"eval_len": [386, 114, 215, 322, 104, 108, 352, 227, 280, 291]}

 48%|████▊     | 479996/1000000 [4:26:25<3:15:24, 44.35it/s]global step 480000, trans_decision ep_re 1135.9045241945494

{"global_step": 480000, "eval_re": [262.1904647856749, 555.552147608367, 
4236.825851789666, 71.6515041442543, 67.7990402397807, 2300.794611022315, 
154.7473343379342, 525.5038190681814, 1611.2496930309608, 1572.7307759183589], 
"eval_len": [119, 210, 849, 70, 67, 539, 93, 223, 363, 468]}

 49%|████▉     | 489998/1000000 [4:31:35<3:10:40, 44.58it/s]global step 490000, trans_decision ep_re 755.5835949656769

{"global_step": 490000, "eval_re": [842.6498002371297, 299.42151191007014, 
226.72608485059823, 681.9082390138176, 1461.565921188665, 836.1021531754623, 
220.42623997783423, 2157.1065368073832, 767.2412233986279, 62.6882390971804], 
"eval_len": [242, 127, 115, 221, 381, 228, 114, 552, 229, 70]}

 50%|████▉     | 499999/1000000 [4:36:45<3:08:53, 44.12it/s]global step 500000, trans_decision ep_re 1391.1089433296888

{"global_step": 500000, "eval_re": [1657.506359113818, 1121.9021724525198, 
632.2063498402061, 4547.545635438281, 3412.810790804301, 233.53159993177974, 
1285.45980283714, 233.0685299557643, 268.2939318971756, 518.764261025904], 
"eval_len": [442, 408, 215, 1000, 714, 119, 385, 121, 133, 193]}

 51%|█████     | 509995/1000000 [4:41:49<3:06:03, 43.89it/s]global step 510000, trans_decision ep_re 680.7156108270661

{"global_step": 510000, "eval_re": [466.3960465673645, 434.7459522766957, 
91.21953393793898, 345.2771600991116, 1259.4172768743972, 2116.371615088615, 
109.22295957734362, 343.6492163551796, 302.5602983719005, 1338.2960491221143], 
"eval_len": [190, 190, 78, 185, 318, 527, 103, 172, 174, 389]}

 52%|█████▏    | 519995/1000000 [4:47:15<3:01:50, 44.00it/s]global step 520000, trans_decision ep_re 1175.3870138069333

{"global_step": 520000, "eval_re": [221.15141845873336, 772.8656229607659, 
690.4693776155113, 1494.0827068092399, 275.2574588502228, 669.0883846140346, 
156.70556026844065, 559.9157751180355, 4663.76478859098, 2250.569044783368], 
"eval_len": [127, 250, 246, 399, 143, 232, 100, 209, 1000, 548]}

 53%|█████▎    | 529995/1000000 [4:52:13<2:57:23, 44.16it/s]global step 530000, trans_decision ep_re 89.35386507431365

{"global_step": 530000, "eval_re": [35.25358128542882, 274.41026765251445, 
12.643527575739888, 34.946725826477234, 128.7368096293624, 134.00783937481967, 
139.81961298156708, 40.37481612806891, 23.453128474232543, 69.89234181492556], 
"eval_len": [46, 121, 32, 44, 98, 94, 99, 47, 42, 61]}

 54%|█████▍    | 539995/1000000 [4:57:25<2:53:16, 44.25it/s]global step 540000, trans_decision ep_re 413.445590798808

{"global_step": 540000, "eval_re": [2184.541463457068, 52.61515491615516, 
67.1895454498909, 1254.1688291809728, 229.76363306899356, 12.202010534598749, 
57.66327777640189, 157.74961394474388, 69.12954785516604, 49.4328318040898], 
"eval_len": [439, 53, 64, 311, 119, 25, 58, 86, 58, 51]}

 55%|█████▍    | 549995/1000000 [5:02:19<2:49:34, 44.23it/s]global step 550000, trans_decision ep_re 1172.5084462565665

{"global_step": 550000, "eval_re": [1704.4968932313811, 861.8923276550925, 
206.11299118143165, 1435.9676853197886, 2398.227951657591, 132.22130972383144, 
136.77637976052253, 106.43339129848695, 483.7718673255376, 4259.183665412002], 
"eval_len": [409, 241, 105, 377, 540, 82, 84, 94, 212, 871]}

 56%|█████▌    | 559995/1000000 [5:07:28<2:46:04, 44.16it/s]global step 560000, trans_decision ep_re 1239.3720905184414

{"global_step": 560000, "eval_re": [676.3235009965715, 724.0386396079276, 
664.559162302441, 278.56498264456104, 2861.6969840868032, 247.1944467043237, 
1213.120781531297, 2462.025465388423, 1367.893042200471, 1898.3038997215956], 
"eval_len": [205, 205, 207, 131, 613, 124, 322, 544, 316, 461]}

 57%|█████▋    | 569995/1000000 [5:12:37<2:41:37, 44.34it/s]global step 570000, trans_decision ep_re 840.4621593600052

{"global_step": 570000, "eval_re": [1560.5573525460686, 398.7026763762095, 
654.7435695346512, 1146.9612007770454, 2290.462870422182, 355.311133504201, 
264.96316836715664, 1093.2940650664557, 50.21459963889184, 589.4109573671897], 
"eval_len": [367, 159, 210, 319, 484, 154, 125, 316, 53, 215]}

 58%|█████▊    | 579995/1000000 [5:17:56<2:37:18, 44.50it/s]global step 580000, trans_decision ep_re 1025.7981341746931

{"global_step": 580000, "eval_re": [1204.56018215122, 687.1235227543399, 
2492.544867955813, 1033.625729430453, 1008.6528802345567, 850.7716338309621, 
797.7390474949906, 797.9330924692242, 924.8916938702945, 460.1386915550755], 
"eval_len": [297, 213, 574, 275, 267, 251, 243, 239, 289, 171]}

 59%|█████▉    | 589995/1000000 [5:22:50<2:33:10, 44.61it/s]global step 590000, trans_decision ep_re 558.0474985828972

{"global_step": 590000, "eval_re": [38.69363356987652, 36.082973179483034, 
1419.9328937941275, 45.153117383698266, 45.29523528022301, 1595.3509299039038, 
34.9032958045566, 2058.9467685008426, 101.88034013407209, 204.23579827818878], 
"eval_len": [40, 41, 362, 45, 47, 406, 41, 507, 87, 126]}

 60%|█████▉    | 599995/1000000 [5:28:06<2:29:29, 44.59it/s]global step 600000, trans_decision ep_re 1111.2050299217622

{"global_step": 600000, "eval_re": [476.44457832064916, 1416.5447738248445, 
1483.0414163197445, 2324.8316367340162, 966.2164931981287, 536.1806248666844, 
416.23894268747574, 1433.8203023612014, 1747.4613940536046, 311.2701368512742], 
"eval_len": [204, 370, 372, 549, 280, 199, 185, 356, 433, 182]}

 61%|██████    | 609995/1000000 [5:32:57<2:26:01, 44.51it/s]global step 610000, trans_decision ep_re 974.986981477389

{"global_step": 610000, "eval_re": [2311.860819542431, 61.826799343809355, 
1225.107288526791, 775.1186220483147, 69.36672919931418, 501.72762356872556, 
2498.420745431274, 2235.9745346342784, 8.996267170002326, 61.47038530894904], 
"eval_len": [515, 64, 338, 233, 66, 164, 511, 489, 21, 60]}

 62%|██████▏   | 619995/1000000 [5:38:16<2:23:20, 44.18it/s]global step 620000, trans_decision ep_re 1814.9724959807027

{"global_step": 620000, "eval_re": [1177.059142432908, 602.5092316470381, 
348.0942273416489, 2259.405288569541, 1335.24451017616, 599.3534772335098, 
5098.239801457578, 987.4962823213289, 2198.9238832254046, 3543.399115401908], 
"eval_len": [302, 193, 144, 483, 350, 212, 998, 305, 475, 710]}

 63%|██████▎   | 629995/1000000 [5:43:09<2:17:41, 44.79it/s]global step 630000, trans_decision ep_re 944.5842599596954

{"global_step": 630000, "eval_re": [2567.5337045897786, 50.601878314845564, 
2143.2594606176594, 50.21453929817181, 578.3189871485132, 69.58024465651091, 
2416.7220119009235, 56.58257034598615, 45.53817422856359, 1467.491028495999], 
"eval_len": [519, 61, 456, 55, 196, 65, 499, 62, 58, 387]}

 64%|██████▍   | 639995/1000000 [5:48:12<2:13:07, 45.07it/s]global step 640000, trans_decision ep_re 823.5662832110218

{"global_step": 640000, "eval_re": [283.50205737193465, 476.1081768158857, 
199.94639403621238, 236.01876696896494, 1493.609747256169, 280.57204563334744, 
1228.8902346388, 438.0490457942322, 202.18989844377674, 3396.776465150894], 
"eval_len": [153, 186, 121, 131, 385, 140, 339, 191, 123, 706]}

 65%|██████▍   | 649995/1000000 [5:53:16<2:10:11, 44.80it/s]global step 650000, trans_decision ep_re 864.8389309776023

{"global_step": 650000, "eval_re": [357.71218679966114, 44.66246627672659, 
3747.7288633726844, 138.28852274202748, 81.4554946740389, 44.74103990491093, 
95.98298290423074, 67.42133875746423, 2325.0262349331556, 1745.3701794111219], 
"eval_len": [160, 51, 791, 106, 71, 51, 70, 66, 644, 489]}

 66%|██████▌   | 659995/1000000 [5:58:19<2:06:24, 44.83it/s]global step 660000, trans_decision ep_re 1093.8655605544309

{"global_step": 660000, "eval_re": [2609.933150341278, 654.5980456292726, 
1893.8660805741056, 2189.1682459296944, 303.3270605943724, 1166.5658287696817, 
532.7032161480491, 1451.964297885774, 81.86200583048084, 54.667673841601065], 
"eval_len": [608, 216, 464, 451, 140, 304, 189, 333, 70, 59]}

 67%|██████▋   | 669995/1000000 [6:03:36<2:03:23, 44.57it/s]global step 670000, trans_decision ep_re 705.2301453493623

{"global_step": 670000, "eval_re": [1236.5224327913666, 52.27506426201689, 
932.5015083248616, 1595.4888628968047, 328.1412175614465, 2181.7640541486435, 
117.9986099291334, 122.5656931327634, 341.79374264365464, 143.25026780293248], 
"eval_len": [309, 58, 240, 361, 140, 481, 94, 95, 145, 109]}

 68%|██████▊   | 679995/1000000 [6:08:36<1:58:54, 44.85it/s]global step 680000, trans_decision ep_re 1240.8051614750225

{"global_step": 680000, "eval_re": [1704.1201633862122, 679.0899571484752, 
1421.5382999904832, 644.0730258315913, 367.3744819383511, 2301.9427939806624, 
645.877740030682, 628.3669735420184, 1665.3439750903697, 2350.3242038113826], 
"eval_len": [457, 218, 377, 212, 147, 540, 221, 219, 395, 486]}

 69%|██████▉   | 689995/1000000 [6:13:29<1:55:03, 44.91it/s]global step 690000, trans_decision ep_re 1029.81534129881

{"global_step": 690000, "eval_re": [2789.1017868815084, 552.6044782683089, 
2220.8794935777855, 225.6298077156257, 260.0699990046448, 921.2734887822181, 
328.41522334023927, 446.5180556548424, 329.8939140754607, 2223.7671656874654], 
"eval_len": [590, 191, 479, 123, 128, 247, 136, 187, 149, 499]}

 70%|██████▉   | 699995/1000000 [6:18:32<1:51:25, 44.87it/s]global step 700000, trans_decision ep_re 647.5620908456707

{"global_step": 700000, "eval_re": [595.9442845282032, 65.32660708155566, 
249.4645853716783, 596.8568631097527, 817.6839108856257, 2225.0204550621766, 
621.7997634006628, 716.135561976071, 38.799052484021075, 548.5898245569603], 
"eval_len": [206, 62, 120, 200, 250, 518, 198, 219, 41, 183]}

 71%|███████   | 709995/1000000 [6:23:46<2:09:02, 37.46it/s]global step 710000, trans_decision ep_re 1440.857101219583

{"global_step": 710000, "eval_re": [1123.8413719749622, 1678.259458297167, 
477.8759696350218, 1006.7292728545298, 1505.0588955557366, 2392.9174906659864, 
1743.2053385059735, 2683.977040137189, 276.43717913910194, 1520.2689954301609], 
"eval_len": [294, 424, 176, 270, 376, 587, 402, 577, 141, 387]}

 72%|███████▏  | 719995/1000000 [6:28:41<1:44:23, 44.70it/s]global step 720000, trans_decision ep_re 182.99672940513307

{"global_step": 720000, "eval_re": [224.3085183147126, 44.89853298311467, 
170.1505007129807, 68.18884062143148, 174.01039036292454, 172.14618432478198, 
53.961198530968545, 11.790305610035205, 371.40945891880904, 539.1033636715721], 
"eval_len": [119, 52, 108, 69, 115, 107, 67, 26, 172, 189]}

 73%|███████▎  | 729995/1000000 [6:33:39<1:40:40, 44.70it/s]global step 730000, trans_decision ep_re 813.255241423113

{"global_step": 730000, "eval_re": [771.7974001265015, 1808.1975758730994, 
247.7361383271979, 472.9254898712292, 495.0741081315054, 1945.3487285855933, 
74.01906578747325, 1584.9286064388652, 65.69703529458017, 666.8282657950862], 
"eval_len": [229, 408, 123, 167, 196, 451, 73, 367, 65, 199]}

 74%|███████▍  | 739995/1000000 [6:38:41<1:36:41, 44.82it/s]global step 740000, trans_decision ep_re 591.3713054683747

{"global_step": 740000, "eval_re": [757.7618629298394, 85.96621121928939, 
98.86026512062224, 75.6455470462842, 866.5393364729863, 115.91282840660733, 
382.6160756621641, 198.0581527404489, 2568.2108665755145, 764.1419085099906], 
"eval_len": [251, 77, 85, 124, 270, 95, 161, 122, 596, 237]}

 75%|███████▍  | 749995/1000000 [6:43:56<1:32:56, 44.83it/s]global step 750000, trans_decision ep_re 1418.3291676864292

{"global_step": 750000, "eval_re": [1336.6594073936496, 488.122249189126, 
1540.690268389122, 991.9229325175621, 2138.8949873518036, 3092.735019713367, 
2312.0611144796044, 748.2597381509619, 792.5923599923229, 741.3535996867726], 
"eval_len": [349, 191, 377, 293, 470, 698, 507, 241, 240, 257]}

 76%|███████▌  | 759995/1000000 [6:48:47<1:29:35, 44.65it/s]global step 760000, trans_decision ep_re 1573.6054665758254

{"global_step": 760000, "eval_re": [1001.5946502419799, 1946.124636954795, 
1886.201475823389, 717.1802627861492, 47.92190763037589, 2394.077245534035, 
4659.4216956653445, 403.54880414043197, 2563.2779544013442, 116.70603258040776],
"eval_len": [278, 479, 421, 225, 54, 515, 1000, 161, 553, 99]}

 77%|███████▋  | 769995/1000000 [6:53:53<1:25:50, 44.65it/s]global step 770000, trans_decision ep_re 298.41272588287154

{"global_step": 770000, "eval_re": [67.08886686356227, 100.6734073090503, 
129.5575688840061, 129.03437952775786, 118.89699735486619, 140.0407279933498, 
40.11511228389367, 1259.7350241784402, 885.9804291374479, 113.00474529634141], 
"eval_len": [71, 86, 105, 102, 94, 109, 48, 404, 256, 95]}

 78%|███████▊  | 779995/1000000 [6:59:07<1:21:29, 44.99it/s]global step 780000, trans_decision ep_re 1287.8999373446309

{"global_step": 780000, "eval_re": [1211.1200178517875, 385.7604602859816, 
637.279421231436, 99.29350536962586, 1620.2854682035395, 415.3235592198732, 
1067.301499102181, 801.9828517491278, 4013.1221528741075, 2627.530437558648], 
"eval_len": [332, 154, 214, 88, 396, 176, 295, 257, 835, 559]}

 79%|███████▉  | 789995/1000000 [7:03:58<1:18:05, 44.82it/s]global step 790000, trans_decision ep_re 683.0120911260484

{"global_step": 790000, "eval_re": [1051.4910780068144, 38.070769437266314, 
41.154190126310446, 3217.515639663757, 972.3466357367361, 212.93167415128818, 
409.33927659123015, 202.63476705855376, 645.0812967836996, 39.555583704827576], 
"eval_len": [275, 43, 44, 706, 270, 116, 156, 110, 219, 44]}

 80%|███████▉  | 799995/1000000 [7:08:58<1:14:21, 44.83it/s]global step 800000, trans_decision ep_re 166.7154518841521

{"global_step": 800000, "eval_re": [562.1837359845952, 9.626848403536746, 
39.151539435846324, 34.64306871337706, 441.16871055361946, 60.1987843319766, 
171.49654300665398, 189.91817700966826, 58.10994686999766, 100.65716453224974], 
"eval_len": [213, 21, 43, 39, 209, 60, 124, 103, 58, 83]}

 81%|████████  | 809995/1000000 [7:13:57<1:10:46, 44.74it/s]global step 810000, trans_decision ep_re 683.5677088591989

{"global_step": 810000, "eval_re": [551.2659187403984, 88.57342733206754, 
179.74069890669364, 1544.3704239710573, 66.10811594931042, 84.34564838650448, 
1433.2499047017545, 1581.5213098898887, 952.5938592951544, 353.9077814191593], 
"eval_len": [195, 86, 113, 369, 66, 92, 374, 403, 270, 163]}

 82%|████████▏ | 819995/1000000 [7:19:00<1:06:57, 44.80it/s]global step 820000, trans_decision ep_re 387.1750327290307

{"global_step": 820000, "eval_re": [358.14177138364994, 346.8346200893011, 
1334.35606323458, 146.4180456722121, 286.9148633866034, 62.03857289691623, 
729.995706972376, 366.6949302173555, 56.0783260696007, 184.2774273677119], 
"eval_len": [143, 146, 327, 98, 128, 62, 249, 146, 62, 111]}

 83%|████████▎ | 829995/1000000 [7:24:01<1:03:33, 44.58it/s]global step 830000, trans_decision ep_re 349.2975569492784

{"global_step": 830000, "eval_re": [62.08983754737578, 57.170821223973455, 
421.7578224616729, 490.5447162553704, 1815.3190634853702, 49.26019041021704, 
48.02058057139574, 403.17243397142346, 44.20297010556927, 101.437133460415], 
"eval_len": [62, 63, 158, 172, 421, 57, 52, 144, 53, 84]}

 84%|████████▍ | 839995/1000000 [7:28:59<59:54, 44.52it/s]global step 840000, trans_decision ep_re 237.75915715404503

{"global_step": 840000, "eval_re": [244.06971956273622, 92.17443572645884, 
286.07857956267725, 112.99225672038756, 217.37004083756588, 332.0584654780355, 
88.12049590736397, 628.6217669423283, 46.137271391334636, 329.9685394115622], 
"eval_len": [116, 73, 135, 89, 106, 141, 79, 213, 55, 153]}

 85%|████████▍ | 849995/1000000 [7:33:57<55:54, 44.72it/s]global step 850000, trans_decision ep_re 1243.3199833376207

{"global_step": 850000, "eval_re": [49.88161014253464, 158.7359992848456, 
5156.73642763238, 2588.361076434108, 63.19680753026358, 1816.372314592666, 
56.7699865230492, 203.88053634759353, 1742.647366099119, 596.6177087896467], 
"eval_len": [53, 103, 1000, 532, 61, 451, 56, 113, 435, 217]}

 86%|████████▌ | 859995/1000000 [7:39:00<52:08, 44.75it/s]global step 860000, trans_decision ep_re 799.9643005695748

{"global_step": 860000, "eval_re": [664.7411078828468, 1090.4972599210794, 
362.86138142162423, 964.3012504088251, 1238.4516961951088, 1011.0893397989812, 
776.4163448267932, 951.3107650571972, 736.8311050957733, 203.1427550875184], 
"eval_len": [221, 993, 151, 272, 329, 297, 270, 257, 256, 216]}

 87%|████████▋ | 869995/1000000 [7:44:10<48:47, 44.40it/s]global step 870000, trans_decision ep_re 317.34069724091376

{"global_step": 870000, "eval_re": [732.2564201421764, 1011.92751363236, 
51.272473283758714, 84.2324899766448, 54.733611161386314, 755.9442258767668, 
206.11372670364534, 125.76831525475588, 52.48298361300788, 98.67521276463539], 
"eval_len": [233, 353, 57, 74, 60, 291, 106, 91, 57, 77]}

 88%|████████▊ | 879995/1000000 [7:49:19<45:01, 44.42it/s]global step 880000, trans_decision ep_re 325.24063209411975

{"global_step": 880000, "eval_re": [1029.0953245755145, 37.8307421654742, 
34.71906391701982, 701.3199007594528, 36.48298118391009, 269.4838068986992, 
31.87072107887082, 36.83319499064814, 35.57332896707775, 1039.19725640453], 
"eval_len": [303, 40, 39, 220, 42, 142, 39, 40, 40, 264]}

 89%|████████▉ | 889995/1000000 [7:54:22<41:19, 44.36it/s]global step 890000, trans_decision ep_re 738.5069498044898

{"global_step": 890000, "eval_re": [803.7937820782175, 1962.9278591103791, 
1315.664559613647, 230.32183732900066, 225.18188230601763, 261.0196382828958, 
1079.360022629186, 46.029328769562454, 467.9751611135065, 992.7954268124844], 
"eval_len": [232, 454, 345, 110, 122, 145, 282, 51, 170, 279]}

 90%|████████▉ | 899995/1000000 [7:59:31<37:24, 44.55it/s]global step 900000, trans_decision ep_re 803.6708875699375

{"global_step": 900000, "eval_re": [91.05307674071946, 32.716937901876285, 
1966.106006860562, 2971.15578716796, 307.0026985063929, 48.52087807614405, 
60.20862282966214, 611.0310872432187, 1895.9663536196679, 52.94742675317107], 
"eval_len": [73, 37, 486, 697, 144, 50, 58, 217, 440, 53]}

 91%|█████████ | 909995/1000000 [8:04:49<33:58, 44.15it/s]global step 910000, trans_decision ep_re 652.9409081654451

{"global_step": 910000, "eval_re": [140.98539173766432, 1645.019215104495, 
561.920658847784, 2168.2412454961654, 160.08563122623104, 586.254000270869, 
164.61817858018523, 538.0175716071616, 276.244127980062, 288.0230608038314], 
"eval_len": [85, 404, 192, 494, 109, 210, 98, 223, 142, 133]}

 92%|█████████▏| 919995/1000000 [8:09:59<30:02, 44.38it/s]global step 920000, trans_decision ep_re 1008.5851623349469

{"global_step": 920000, "eval_re": [856.7539306260846, 5232.964868656006, 
311.5436868353509, 135.01994010780058, 215.73679289729392, 1783.674838217184, 
604.4499912529675, 621.5991575818788, 98.13175762783425, 225.9766595470665], 
"eval_len": [234, 1000, 139, 96, 120, 433, 214, 277, 74, 114]}

 93%|█████████▎| 929997/1000000 [8:14:55<26:20, 44.29it/s]global step 930000, trans_decision ep_re 824.6167800386578

{"global_step": 930000, "eval_re": [231.02063180754595, 149.10277743148552, 
1933.5091270797122, 2818.853628271202, 169.06812001235193, 187.80553400543792, 
488.0646483585035, 1162.0888937922284, 144.99929444682414, 961.6551451812871], 
"eval_len": [118, 94, 416, 623, 94, 106, 167, 315, 86, 255]}

 94%|█████████▍| 939997/1000000 [8:20:02<22:38, 44.17it/s]global step 940000, trans_decision ep_re 933.6046841136507

{"global_step": 940000, "eval_re": [314.7569076504925, 426.65125087537484, 
926.6746268753149, 241.6179332673558, 1587.133820362926, 545.847855029487, 
129.52977950617637, 2957.5746275233264, 537.749965245203, 1668.5100748008497], 
"eval_len": [152, 158, 275, 127, 391, 194, 95, 685, 194, 450]}

 95%|█████████▍| 949997/1000000 [8:25:12<18:49, 44.28it/s]global step 950000, trans_decision ep_re 1184.1935602896206

{"global_step": 950000, "eval_re": [352.7490174093067, 2150.119470089776, 
1966.926994566921, 613.8626936507762, 75.81402334631736, 331.5579914580054, 
49.79628953322934, 2159.849785522109, 401.0820182808468, 3740.1773190389194], 
"eval_len": [142, 494, 438, 221, 80, 149, 60, 501, 201, 756]}

 96%|█████████▌| 959997/1000000 [8:30:21<14:59, 44.47it/s]global step 960000, trans_decision ep_re 334.92294142835965

{"global_step": 960000, "eval_re": [290.64066012392965, 94.78333052128694, 
71.63898087031808, 230.62845072086225, 35.92604236627216, 83.59445577506936, 
1276.0604515936466, 912.8735999340414, 239.19989427291662, 113.88354810525387], 
"eval_len": [140, 83, 72, 114, 37, 75, 348, 333, 124, 88]}

 97%|█████████▋| 969997/1000000 [8:35:24<11:17, 44.26it/s]global step 970000, trans_decision ep_re 168.5688059073498

{"global_step": 970000, "eval_re": [186.79724302216079, 380.5824730522225, 
132.24936075832423, 49.780770800242145, 128.09451106162533, 110.59611396748714, 
120.25924879776493, 156.1429998721652, 390.6694742284086, 30.515863513097283], 
"eval_len": [105, 189, 86, 53, 83, 79, 84, 95, 189, 39]}

 98%|█████████▊| 979997/1000000 [8:40:39<07:31, 44.32it/s]global step 980000, trans_decision ep_re 1089.6481967763764

{"global_step": 980000, "eval_re": [218.68048923446935, 556.2940726231993, 
1708.5811562786212, 651.6957284030458, 91.84142588482057, 231.89048538299375, 
4942.752421496082, 226.47995869032442, 75.5313059267788, 2192.73492384343], 
"eval_len": [129, 187, 392, 219, 80, 125, 1000, 123, 76, 491]}

 99%|█████████▉| 989995/1000000 [8:45:49<03:45, 44.28it/s]global step 990000, trans_decision ep_re 469.7370457103637

{"global_step": 990000, "eval_re": [386.78645335025277, 690.7260820092397, 
1519.802022186544, 44.36572375441277, 313.4131787600322, 1163.6986039104938, 
120.6030784284739, 187.03194282810568, 176.5909085644471, 94.35246331163513], 
"eval_len": [148, 239, 413, 49, 132, 282, 84, 110, 108, 74]}

100%|█████████▉| 999995/1000000 [8:50:59<00:00, 44.27it/s]global step 1000000, trans_decision ep_re 625.140499453386

{"global_step": 1000000, "eval_re": [3094.6133415994514, 185.8095700201649, 
225.59213681975152, 62.0736099817719, 916.7920988592065, 89.79778424479954, 
177.0761005734036, 199.57106047210016, 1139.087811542602, 160.9914804206091], 
"eval_len": [641, 111, 115, 61, 235, 71, 97, 113, 314, 90]}

100%|██████████| 1000000/1000000 [8:51:01<00:00, 31.39it/s]
