
{
    'exp_name': 'VDPO',
    'env': 'Ant-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)',
    'noise': 0.2
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9997/1000000 [03:50<8:49:55, 31.14it/s]global step 10000, trans_decision ep_re 2.883930676808923

{"global_step": 10000, "eval_re": [8.909410937827118, 10.868513421232699, 
-55.924249229526744, -17.589981186380108, -1.6620106622281101, 
19.087982271423964, 17.06533142113515, 48.246634965623876, -10.842915794220486, 
10.680590623201873], "eval_len": [26, 120, 181, 231, 230, 176, 17, 305, 1000, 
52]}

  2%|▏         | 19997/1000000 [10:40<8:57:26, 30.39it/s]global step 20000, trans_decision ep_re -24.92001446264978

{"global_step": 20000, "eval_re": [8.090785838868767, -44.005684626639834, 
11.057091876318928, -33.738161492508226, -32.62861638607193, 
-113.46741420080907, 9.828442119435106, -36.64359774521697, 5.0698441793485545, 
-22.762834189223135], "eval_len": [45, 1000, 28, 114, 258, 1000, 101, 100, 19, 
33]}

  3%|▎         | 29997/1000000 [17:50<8:40:20, 31.07it/s]global step 30000, trans_decision ep_re -20.14415380439315

{"global_step": 30000, "eval_re": [33.276195322443925, 15.283640197480034, 
-58.42709588117518, -56.89620661865678, -17.880139421083562, -134.8865533288918,
3.9329900504419624, 15.511772675501087, 2.555981231484558, -3.9121222714756962],
"eval_len": [121, 47, 75, 257, 133, 1000, 19, 67, 75, 21]}

  4%|▍         | 39999/1000000 [25:10<8:41:21, 30.69it/s]global step 40000, trans_decision ep_re 16.726500721042562

{"global_step": 40000, "eval_re": [12.981701137320421, 165.56102175346172, 
-0.5157043915189071, -38.14186153999803, 7.394623984466509, 89.77002105971228, 
-18.655427174993193, -45.18108510035234, 3.3244416212592087, 
-9.272724138932073], "eval_len": [37, 1000, 51, 141, 118, 1000, 148, 355, 49, 
78]}

  5%|▍         | 49997/1000000 [32:20<8:35:44, 30.70it/s]global step 50000, trans_decision ep_re -9.264624888088164

{"global_step": 50000, "eval_re": [-59.90590557094737, 37.53667444653902, 
14.300647262225173, -78.29121298041782, 31.04490834756361, -31.67782436156989, 
-1.1582349734124036, 10.746953722495197, -20.03396022706701, 4.791705453709872],
"eval_len": [421, 1000, 30, 1000, 65, 161, 33, 93, 69, 31]}

  6%|▌         | 59999/1000000 [39:30<8:31:53, 30.61it/s]global step 60000, trans_decision ep_re 41.128574971213915

{"global_step": 60000, "eval_re": [31.143233983023684, 61.97723131933313, 
41.41071573071378, 107.0760627727631, -23.771610963605674, 50.63363822388124, 
118.69970010338776, 0.7011664680646265, 15.712437216694713, 7.703174857882779], 
"eval_len": [189, 1000, 101, 1000, 232, 1000, 167, 130, 77, 16]}

  7%|▋         | 69997/1000000 [46:50<8:22:27, 30.85it/s]global step 70000, trans_decision ep_re 39.239966613825274

{"global_step": 70000, "eval_re": [153.3016536259135, 6.3000668248801635, 
101.99117388044546, 19.88022414571795, 12.698512420837911, -36.279528376149024, 
5.033847638348086, 11.306773718998143, 106.62680153980148, 11.540140719459016], 
"eval_len": [1000, 47, 443, 56, 34, 257, 14, 162, 1000, 1000]}

  8%|▊         | 79997/1000000 [54:00<8:21:16, 30.59it/s]global step 80000, trans_decision ep_re 54.49557211015841

{"global_step": 80000, "eval_re": [-2.4574216759171907, 89.10136428920516, 
12.94579968167213, 76.20534855850711, 39.105037534081276, 25.23908076620693, 
108.22897069046459, -1.5245990810675647, 107.40812394541673, 90.70401639301494],
"eval_len": [276, 256, 244, 246, 108, 90, 355, 57, 646, 351]}

  9%|▉         | 89997/1000000 [1:01:10<8:12:31, 30.79it/s]global step 90000, trans_decision ep_re 71.91491892329074

{"global_step": 90000, "eval_re": [255.3032717260499, 37.8236168680938, 
22.540141071540013, 25.002909052478476, 45.440691950990235, 104.13113848828179, 
63.65540351123129, 47.36025493772109, 32.74297751247064, 85.14878411405033], 
"eval_len": [853, 148, 35, 46, 1000, 252, 1000, 123, 54, 182]}

 10%|▉         | 99997/1000000 [1:08:20<8:04:13, 30.98it/s]global step 100000, trans_decision ep_re 84.84826780028418

{"global_step": 100000, "eval_re": [88.36887432174427, 106.12439041754912, 
115.28125438624045, 126.8197130910325, 165.7188611378122, 16.914713441465082, 
134.0484716568812, 21.65818379776833, -8.196427908369401, 81.74464366071803], 
"eval_len": [188, 1000, 179, 1000, 1000, 67, 925, 25, 40, 445]}

 11%|█         | 109997/1000000 [1:15:40<8:05:40, 30.54it/s]global step 110000, trans_decision ep_re 98.91096174513616

{"global_step": 110000, "eval_re": [154.65336607699885, 266.4297608440704, 
60.14143221748039, 18.44840882705784, 223.97529163986346, 121.47748627525866, 
10.297349382275751, 64.48585794647164, 17.74938654580316, 51.451277696081426], 
"eval_len": [298, 512, 151, 74, 1000, 1000, 26, 216, 65, 139]}

 12%|█▏        | 119997/1000000 [1:22:50<7:56:36, 30.77it/s]global step 120000, trans_decision ep_re 169.7023749842356

{"global_step": 120000, "eval_re": [134.131937349723, 277.31000324441635, 
463.4250228543021, 22.961963569998233, 123.04707935134479, 85.04997182724821, 
108.12409383762136, 251.34249047549605, 49.52067079078226, 182.11051654142358], 
"eval_len": [471, 1000, 1000, 76, 1000, 288, 453, 609, 67, 1000]}

 13%|█▎        | 129997/1000000 [1:30:10<7:51:10, 30.77it/s]global step 130000, trans_decision ep_re 118.64479018521483

{"global_step": 130000, "eval_re": [117.29432186095926, 19.144160103368108, 
9.860946352207137, 261.0539008817885, 169.95201175242616, 96.69472667734537, 
45.92946664740651, 468.79136315687373, -5.611591437533638, 3.33859585730714], 
"eval_len": [251, 26, 22, 1000, 532, 363, 167, 1000, 16, 23]}

 14%|█▍        | 139997/1000000 [1:37:20<7:52:40, 30.32it/s]global step 140000, trans_decision ep_re 113.0955068160384

{"global_step": 140000, "eval_re": [107.2660953540774, 220.70404797770067, 
130.9466325661201, 337.9781922944346, 94.17837219279451, 86.74678378156244, 
22.141220028244962, 81.60723648428844, 7.433193028935693, 41.953294452225286], 
"eval_len": [377, 630, 1000, 982, 1000, 264, 124, 1000, 64, 73]}

 15%|█▍        | 149997/1000000 [1:44:40<7:38:30, 30.90it/s]global step 150000, trans_decision ep_re 109.16165687230037

{"global_step": 150000, "eval_re": [78.41654289555125, 35.07466523140159, 
129.13523634915984, 124.63034446608901, 138.13152123523238, 36.06036697954702, 
72.19112892854807, 231.40315320268238, 136.27408544087365, 110.29952399391834], 
"eval_len": [90, 60, 251, 184, 1000, 1000, 135, 1000, 1000, 1000]}

 16%|█▌        | 159997/1000000 [1:52:10<7:38:04, 30.56it/s]global step 160000, trans_decision ep_re 114.79167882001859

{"global_step": 160000, "eval_re": [-268.2081412643249, 21.481452726822315, 
347.47601913351315, 186.3619396069025, 17.599729666579204, 121.82222245805508, 
-2.764876609317273, 242.43792902522605, 274.82845926508463, 206.88205419164527],
"eval_len": [1000, 42, 828, 541, 80, 288, 26, 663, 594, 1000]}

 17%|█▋        | 169997/1000000 [1:59:20<7:28:48, 30.82it/s]global step 170000, trans_decision ep_re 152.17654100256345

{"global_step": 170000, "eval_re": [177.61581574026198, 118.6575238902802, 
159.2640317124177, 235.6009803992779, 265.64757411581127, 90.80179646951851, 
9.752551248364693, 188.54440139825172, 16.8866486602936, 258.9940863911569], 
"eval_len": [1000, 287, 1000, 1000, 1000, 1000, 25, 350, 36, 1000]}

 18%|█▊        | 179997/1000000 [2:06:40<7:22:57, 30.85it/s]global step 180000, trans_decision ep_re 142.91280006535015

{"global_step": 180000, "eval_re": [36.81749581416681, 31.896620427343027, 
312.1265105080932, 145.63781445951435, 110.76729751040472, 41.73076965650339, 
180.88915344634074, 354.5776528782148, 160.33299650600898, 54.351689446911394], 
"eval_len": [77, 137, 1000, 357, 215, 76, 1000, 702, 296, 116]}

 19%|█▉        | 189999/1000000 [2:14:00<7:16:34, 30.92it/s]global step 190000, trans_decision ep_re 147.44792765670712

{"global_step": 190000, "eval_re": [262.22634515621434, 110.48986304496091, 
281.3808407511067, 30.393338524845348, 246.02592038317505, 54.696637534919034, 
155.16122733252942, 26.90722369316086, 166.01684704750784, 141.18103309865194], 
"eval_len": [1000, 280, 1000, 52, 592, 70, 718, 25, 578, 244]}

 20%|█▉        | 199997/1000000 [2:21:10<7:06:37, 31.25it/s]global step 200000, trans_decision ep_re 114.44103130412759

{"global_step": 200000, "eval_re": [26.500796274676947, 22.344464976777935, 
17.789786521002465, 229.08604153451165, 121.44495699583858, 48.71747696751264, 
6.854792282479425, 127.53013725718542, 312.0732744340118, 232.0685857972789], 
"eval_len": [74, 61, 78, 1000, 139, 93, 28, 297, 1000, 441]}

 21%|██        | 209999/1000000 [2:28:20<7:07:49, 30.78it/s]global step 210000, trans_decision ep_re 163.52088891943092

{"global_step": 210000, "eval_re": [280.90389328791105, 179.86080333835991, 
17.03318279105837, 279.2666118651378, 300.81791607705725, 5.219799974292197, 
58.13452885991798, 374.4450009448405, 108.68011123187766, 30.847040823856645], 
"eval_len": [430, 380, 45, 578, 684, 26, 132, 1000, 287, 118]}

 22%|██▏       | 219997/1000000 [2:35:40<7:07:44, 30.39it/s]global step 220000, trans_decision ep_re 180.4341497463595

{"global_step": 220000, "eval_re": [8.54150950359733, 52.77906734144572, 
330.0351640223885, 33.512918579429154, 211.66408439062747, 125.43769013817783, 
396.02369573930486, 205.24537953213834, 56.07648382690675, 385.0255043895791], 
"eval_len": [41, 115, 589, 1000, 372, 1000, 1000, 1000, 114, 1000]}

 23%|██▎       | 229997/1000000 [2:43:00<6:52:09, 31.14it/s]global step 230000, trans_decision ep_re 232.34187172501046

{"global_step": 230000, "eval_re": [185.41557066817842, 303.1676084132648, 
450.01791704098923, 189.17748030081424, 20.097478053860495, 409.60085212596147, 
72.90744469657268, 418.74868568767596, 0.7064907516674117, 273.5791895111202], 
"eval_len": [438, 610, 1000, 1000, 68, 1000, 249, 1000, 96, 1000]}

 24%|██▍       | 239997/1000000 [2:50:20<6:48:51, 30.98it/s]global step 240000, trans_decision ep_re 226.7438118699917

{"global_step": 240000, "eval_re": [157.6744266902237, 48.30216997159056, 
132.8266241078976, 385.3011098502552, 502.64708267957, 244.07108004430705, 
428.0332284803435, 21.52931566382796, 29.575997042405263, 317.4770841694961], 
"eval_len": [312, 96, 172, 676, 1000, 633, 1000, 142, 226, 1000]}

 25%|██▍       | 249997/1000000 [2:57:30<6:42:50, 31.03it/s]global step 250000, trans_decision ep_re 237.44417047259876

{"global_step": 250000, "eval_re": [311.74463207668504, 385.45790583525184, 
448.04670272135934, 277.97180912965933, 32.93617830621689, 415.92916879389156, 
2.7381279249923836, 112.47016698696292, 9.372083001229752, 377.7749299497382], 
"eval_len": [525, 1000, 1000, 492, 29, 1000, 18, 146, 28, 1000]}

 26%|██▌       | 259997/1000000 [3:04:50<6:30:56, 31.55it/s]global step 260000, trans_decision ep_re 253.12351237222225

{"global_step": 260000, "eval_re": [61.562877001650996, 485.76053783936305, 
177.09793719532897, 150.3738588393372, 344.78854595662364, 215.7093972423205, 
264.0209478730571, 374.7732327992019, 420.07970122562466, 37.06808774971419], 
"eval_len": [144, 973, 378, 527, 491, 1000, 724, 1000, 1000, 115]}

 27%|██▋       | 269999/1000000 [3:12:00<6:27:11, 31.42it/s]global step 270000, trans_decision ep_re 270.94437545725054

{"global_step": 270000, "eval_re": [106.51271202784014, 491.4393699696545, 
396.9297656295056, 516.3947192092759, 370.67984877904865, 251.32640341301683, 
4.926022434729608, 305.3404428677082, 80.83088506003757, 185.06358518168864], 
"eval_len": [576, 1000, 714, 1000, 1000, 1000, 66, 831, 181, 565]}

 28%|██▊       | 279997/1000000 [3:19:20<6:21:39, 31.44it/s]global step 280000, trans_decision ep_re 201.27396562132944

{"global_step": 280000, "eval_re": [428.8654168600958, 2.5948320534683407, 
253.38744336206202, 432.18661225029734, -10.256996156287528, 25.49373895560121, 
16.842216731940916, 398.3686028792384, 135.06563105432895, 330.19215822254915], 
"eval_len": [1000, 9, 512, 1000, 22, 116, 67, 797, 216, 836]}

 29%|██▉       | 289997/1000000 [3:26:20<6:16:19, 31.44it/s]global step 290000, trans_decision ep_re 264.43781140427785

{"global_step": 290000, "eval_re": [188.3881009840525, 544.0331760916672, 
356.2595548300071, 57.7288004752882, -6.340808556441486, 427.99709616780103, 
351.2551841078784, 68.04724981685396, 570.5022109603518, 86.50754916532028], 
"eval_len": [320, 1000, 813, 226, 28, 909, 704, 64, 1000, 1000]}

 30%|██▉       | 299997/1000000 [3:33:30<6:09:38, 31.56it/s]global step 300000, trans_decision ep_re 192.62323742785094

{"global_step": 300000, "eval_re": [176.08875456734143, 125.58191109124107, 
33.31509518179945, 486.27700784884655, 153.34865452828748, 96.36500766962615, 
146.8290377713092, 146.77138741315767, 157.79228430373843, 403.8632339031617], 
"eval_len": [364, 297, 79, 1000, 366, 260, 349, 351, 1000, 802]}

 31%|███       | 309997/1000000 [3:40:40<6:08:13, 31.23it/s]global step 310000, trans_decision ep_re 230.37200386081395

{"global_step": 310000, "eval_re": [475.3190258905569, 186.8284135811598, 
69.02936303897484, 8.704004416045668, 469.7416411196369, 205.5951883292666, 
344.2681735812249, 124.11469979353632, 296.06113128402865, 124.05839757370894], 
"eval_len": [1000, 594, 84, 19, 961, 422, 756, 267, 480, 301]}

 32%|███▏      | 319997/1000000 [3:47:50<6:01:37, 31.34it/s]global step 320000, trans_decision ep_re 189.53043554369043

{"global_step": 320000, "eval_re": [48.377298247884575, 138.2976551167812, 
55.39024023416099, 399.4772199257911, 215.27997786687405, 286.43152981319594, 
126.79578039421853, 347.6170554058154, 180.50636720690136, 97.13123122528151], 
"eval_len": [192, 320, 137, 1000, 591, 483, 388, 1000, 371, 191]}

 33%|███▎      | 329997/1000000 [3:54:50<5:53:31, 31.59it/s]global step 330000, trans_decision ep_re 225.2010776110097

{"global_step": 330000, "eval_re": [403.00649185889745, 212.98982306237593, 
307.43554850918565, 206.8739975661506, 160.37077268923383, 298.56104658288825, 
12.51878164539247, 43.554256845217324, 249.22563019357614, 357.4744271571795], 
"eval_len": [1000, 1000, 1000, 369, 212, 468, 20, 94, 1000, 1000]}

 34%|███▍      | 339997/1000000 [4:02:00<5:51:02, 31.33it/s]global step 340000, trans_decision ep_re 185.49270339100917

{"global_step": 340000, "eval_re": [315.2890428852396, 26.784314506497015, 
203.07139416692456, 146.11034673436083, 107.37399381448277, 281.694682459497, 
152.03936997163885, 411.4020555705623, 218.78334958603304, -7.621515785143999], 
"eval_len": [1000, 61, 568, 341, 425, 1000, 294, 1000, 545, 26]}

 35%|███▍      | 349997/1000000 [4:09:10<5:41:01, 31.77it/s]global step 350000, trans_decision ep_re 123.59311897139598

{"global_step": 350000, "eval_re": [105.40453224051966, 384.2690749827663, 
10.766270720768926, 8.51716982813892, 34.18307702571015, 14.537404217010772, 
30.39433238838011, 285.74369934943417, 171.61051845099516, 190.50511051023574], 
"eval_len": [145, 1000, 28, 18, 67, 27, 148, 442, 346, 365]}

 36%|███▌      | 359997/1000000 [4:16:10<5:41:08, 31.27it/s]global step 360000, trans_decision ep_re 164.38673650938978

{"global_step": 360000, "eval_re": [12.635823398834091, 18.214863396416536, 
388.31855919474987, 260.95456219542575, 169.76595462004096, 460.36218540295806, 
46.59386075121679, 159.6535082574607, 36.706976178927555, 90.66107169786703], 
"eval_len": [29, 67, 999, 527, 261, 994, 68, 170, 185, 131]}

 37%|███▋      | 369997/1000000 [4:23:10<5:31:13, 31.70it/s]global step 370000, trans_decision ep_re 201.48198707397822

{"global_step": 370000, "eval_re": [412.1312380792598, 76.83102548566428, 
26.352406659057525, 371.48548062778633, 438.2295813871908, 46.56207245781285, 
230.30819261933723, 213.36675753873806, 19.318229680518222, 180.23488620441688],
"eval_len": [1000, 99, 33, 507, 1000, 98, 1000, 1000, 94, 389]}

 38%|███▊      | 379997/1000000 [4:30:20<5:26:42, 31.63it/s]global step 380000, trans_decision ep_re 184.79516737465846

{"global_step": 380000, "eval_re": [517.825267362248, 190.34822642784363, 
222.4160896514894, 240.52937292289857, 224.93658425677262, 17.375469764110616, 
10.681820086360414, 367.03779016674224, 0.2632734245724637, 56.53777968354667], 
"eval_len": [1000, 455, 539, 411, 470, 19, 23, 656, 14, 75]}

 39%|███▉      | 389997/1000000 [4:37:30<5:23:52, 31.39it/s]global step 390000, trans_decision ep_re 146.5207389440603

{"global_step": 390000, "eval_re": [34.86591992120769, 127.88956873942517, 
15.317792022912435, 207.5426370393041, 90.95241596466329, 194.42153397306305, 
595.2785366581525, 91.1050071319229, 82.2297378879703, 25.604240101981794], 
"eval_len": [72, 221, 39, 1000, 144, 490, 1000, 1000, 193, 44]}

 40%|███▉      | 399997/1000000 [4:44:30<5:16:12, 31.62it/s]global step 400000, trans_decision ep_re 178.10696715998998

{"global_step": 400000, "eval_re": [107.35743646754716, 344.035744064842, 
119.02664515062216, 233.6339014080751, 352.10988447680126, 60.57646778247224, 
236.8715325525506, 55.58190607365406, 130.6752043356722, 141.200949287663], 
"eval_len": [182, 531, 240, 522, 799, 103, 330, 162, 221, 183]}

 41%|████      | 409997/1000000 [4:51:30<5:13:38, 31.35it/s]global step 410000, trans_decision ep_re 197.00383579104553

{"global_step": 410000, "eval_re": [282.848150279116, 327.4672779723095, 
427.1220445907103, 81.97402258620917, 113.11694607449905, 354.0851552158764, 
87.2176822120515, 11.747633108730339, 262.36413592530914, 22.095309945643578], 
"eval_len": [384, 555, 1000, 247, 190, 1000, 68, 33, 1000, 30]}

 42%|████▏     | 419997/1000000 [4:58:40<5:07:50, 31.40it/s]global step 420000, trans_decision ep_re 186.96541982071977

{"global_step": 420000, "eval_re": [65.7969853180562, 160.97985099747018, 
36.72523121689571, 163.7960721366674, 92.80646079318755, 400.55896505320584, 
265.40099608700274, 393.48598094744216, 224.6930563319456, 65.41059932532416], 
"eval_len": [113, 290, 64, 293, 166, 1000, 466, 1000, 278, 95]}

 43%|████▎     | 429997/1000000 [5:05:40<5:02:45, 31.38it/s]global step 430000, trans_decision ep_re 106.8526865599508

{"global_step": 430000, "eval_re": [111.95971159965666, 41.85744369558214, 
62.59698065086753, 24.38691665364083, 89.29085377714462, 139.61121788837252, 
220.81760813165465, 54.64508057665876, 262.42936554551176, 60.93168708041869], 
"eval_len": [202, 56, 100, 97, 163, 237, 1000, 98, 1000, 230]}

 44%|████▍     | 439997/1000000 [5:12:50<4:56:01, 31.53it/s]global step 440000, trans_decision ep_re 126.07244485554256

{"global_step": 440000, "eval_re": [199.97011556922934, 11.732955271558579, 
13.140466435648575, 289.6294004664112, 10.57456707379187, 11.017415393801935, 
194.77745764484905, 80.50787626244004, 75.7168897754724, 373.65730466222254], 
"eval_len": [1000, 29, 20, 1000, 79, 35, 230, 147, 218, 734]}

 45%|████▍     | 449997/1000000 [5:19:50<4:51:04, 31.49it/s]global step 450000, trans_decision ep_re 120.14257547907592

{"global_step": 450000, "eval_re": [-74.35732620138488, 143.3514222209382, 
217.79121942763194, 201.23174078666332, -2.017222289136092, 348.8419497132139, 
23.59584198280061, 278.1790735972517, 26.628541287209515, 38.18051426557076], 
"eval_len": [1000, 229, 367, 425, 23, 427, 28, 507, 28, 60]}

 46%|████▌     | 459997/1000000 [5:26:50<4:45:23, 31.54it/s]global step 460000, trans_decision ep_re 165.40654787498426

{"global_step": 460000, "eval_re": [112.07855131712745, 125.42910301901571, 
68.89920111510096, 59.42159254347229, 252.63441496076453, 276.06888803722205, 
52.35236538008743, 345.00985874356974, 285.43954080387664, 76.73196282960558], 
"eval_len": [155, 270, 132, 86, 1000, 685, 75, 1000, 1000, 74]}

 47%|████▋     | 469997/1000000 [5:34:00<4:41:45, 31.35it/s]global step 470000, trans_decision ep_re 167.48051324804072

{"global_step": 470000, "eval_re": [66.3150356462603, 45.68008286115608, 
212.20001161981472, 598.0502770422125, 29.04486177812891, 140.67295978861478, 
151.02230556102253, 386.6413331560526, 12.479104681833098, 32.69916034531156], 
"eval_len": [115, 138, 334, 957, 84, 258, 310, 1000, 70, 88]}

 48%|████▊     | 479997/1000000 [5:41:00<4:35:24, 31.47it/s]global step 480000, trans_decision ep_re 257.4855454878127

{"global_step": 480000, "eval_re": [83.4618142382086, 7.464841401237715, 
30.05658970052138, 135.59701368239934, 562.4393202468373, 519.3963125917738, 
401.1783928361438, 451.6386589556438, 294.87089046993486, 88.75162075542636], 
"eval_len": [189, 54, 94, 265, 1000, 868, 1000, 1000, 1000, 129]}

 49%|████▉     | 489997/1000000 [5:48:10<4:29:10, 31.58it/s]global step 490000, trans_decision ep_re 193.21711380850465

{"global_step": 490000, "eval_re": [254.90926953235618, 306.4237756754459, 
23.627639040459496, 96.68827189864929, 335.56787143633653, 285.46153824575094, 
231.32573980657625, 103.1703459982052, 225.9648040192399, 69.03188243202679], 
"eval_len": [1000, 1000, 37, 110, 1000, 515, 423, 219, 534, 220]}

 50%|████▉     | 499999/1000000 [5:55:20<4:26:46, 31.24it/s]global step 500000, trans_decision ep_re 160.3593028964341

{"global_step": 500000, "eval_re": [324.82983381841944, 133.52290256909404, 
51.34341515312656, 379.15979810455036, 29.41165600511848, 3.5127413347874668, 
6.0719940862534285, 332.43568981333726, 269.2587016169436, 74.0462964627102], 
"eval_len": [1000, 307, 187, 632, 97, 81, 29, 689, 1000, 131]}

 51%|█████     | 509997/1000000 [6:02:20<4:16:25, 31.85it/s]global step 510000, trans_decision ep_re 334.60706551846295

{"global_step": 510000, "eval_re": [451.66377383593976, 402.5333749498995, 
420.39603666813565, 445.87961983368393, 30.876850395801167, 286.83883014185085, 
481.57465225785364, 372.26138315322754, 218.9796380905525, 235.06649585768534], 
"eval_len": [1000, 1000, 931, 937, 51, 1000, 1000, 1000, 368, 428]}

 52%|█████▏    | 519997/1000000 [6:09:40<4:09:33, 32.06it/s]global step 520000, trans_decision ep_re 169.56565861721373

{"global_step": 520000, "eval_re": [96.72985282628734, -193.14749637062013, 
224.8643509269223, 221.49443315427033, 287.69545570787574, 271.8594732631259, 
464.38682096646596, 242.6314568478548, -0.045007050047727204, 
79.18724590000275], "eval_len": [190, 1000, 1000, 638, 500, 497, 1000, 1000, 27,
154]}

 53%|█████▎    | 529999/1000000 [6:16:50<4:09:37, 31.38it/s]global step 530000, trans_decision ep_re 175.69872345485734

{"global_step": 530000, "eval_re": [106.55578598031587, 41.9725042251781, 
206.93943183692232, 156.13725662383368, 189.8682116293309, 382.0917516655864, 
-9.009516440725246, 283.4955615578116, 221.88343389783645, 177.05281357248322], 
"eval_len": [228, 51, 376, 296, 343, 1000, 48, 466, 440, 334]}

 54%|█████▍    | 539997/1000000 [6:23:50<4:02:01, 31.68it/s]global step 540000, trans_decision ep_re 103.63235180629586

{"global_step": 540000, "eval_re": [45.74367278587827, 202.1275774184607, 
94.53196569612797, 28.745677905333423, 247.6984497537446, 41.41632313749962, 
161.70669248353022, 120.88889959945746, 73.03229768279043, 20.431961600136088], 
"eval_len": [74, 1000, 172, 40, 1000, 92, 245, 367, 192, 28]}

 55%|█████▍    | 549997/1000000 [6:31:00<4:00:02, 31.24it/s]global step 550000, trans_decision ep_re 222.91208262344844

{"global_step": 550000, "eval_re": [280.668526211375, 4.728081027248034, 
132.46117462817392, 73.2481387307095, 143.46187814484705, 334.5179550848643, 
200.09353372955482, 435.850754754422, 131.07408341896522, 493.01670050432443], 
"eval_len": [1000, 22, 297, 113, 192, 1000, 365, 1000, 342, 1000]}

 56%|█████▌    | 559997/1000000 [6:38:10<3:55:22, 31.16it/s]global step 560000, trans_decision ep_re 89.41503778636408

{"global_step": 560000, "eval_re": [350.91933884824374, -10.48526987966208, 
-515.7421864631895, 430.421100380336, 416.308665484081, 158.53414490563847, 
25.64252253351478, -231.3201815616568, 89.96819981134578, 179.90404380498947], 
"eval_len": [1000, 37, 1000, 1000, 548, 330, 48, 1000, 191, 376]}

 57%|█████▋    | 569997/1000000 [6:45:20<3:51:18, 30.98it/s]global step 570000, trans_decision ep_re 145.05288756860122

{"global_step": 570000, "eval_re": [102.12924090394483, -5.988540602110694, 
197.87361077935287, 249.80789931441595, 95.45863585497649, 111.40597871097614, 
31.32854365278691, 287.8985802612198, 234.26155700404206, 146.35336980640807], 
"eval_len": [621, 21, 1000, 561, 219, 682, 25, 1000, 1000, 273]}

 58%|█████▊    | 579997/1000000 [6:52:30<3:42:26, 31.47it/s]global step 580000, trans_decision ep_re 185.3806488362692

{"global_step": 580000, "eval_re": [202.23338000602743, -99.38236400385327, 
18.85635522228942, 617.8719352111808, 190.04781315635927, 173.66560618501563, 
266.56321385870905, 159.90185764841092, 26.432819060788944, 297.6158720177637], 
"eval_len": [510, 933, 24, 1000, 331, 365, 1000, 278, 30, 1000]}

 59%|█████▉    | 589997/1000000 [6:59:40<3:34:19, 31.88it/s]global step 590000, trans_decision ep_re 228.73485370594372

{"global_step": 590000, "eval_re": [30.017753698506954, 389.3769604652583, 
-0.9623264264485625, 107.69849449849168, 262.71176371113944, 432.1179456283544, 
432.2899272461409, 66.63041335714239, 268.29472524879725, 299.17287963205456], 
"eval_len": [104, 1000, 28, 291, 1000, 665, 1000, 141, 1000, 1000]}

 60%|█████▉    | 599997/1000000 [7:06:50<3:31:42, 31.49it/s]global step 600000, trans_decision ep_re 163.59826854712097

{"global_step": 600000, "eval_re": [261.1119754452928, 302.13783515182877, 
254.91688688070278, 13.186179520526279, 4.125798329121398, 35.106988314484695, 
108.46066703126347, 466.33127778764265, 115.29097525036522, 75.31410175998171], 
"eval_len": [385, 1000, 385, 41, 51, 67, 249, 840, 190, 191]}

 61%|██████    | 609997/1000000 [7:13:50<3:23:16, 31.98it/s]global step 610000, trans_decision ep_re 206.35864164356212

{"global_step": 610000, "eval_re": [370.95584467540675, 190.0668159556366, 
53.309960131703754, 309.88674387674973, 391.7939945345549, 140.3915347148627, 
311.6016029685432, 85.87920726780595, 186.82194383563862, 22.878768474718914], 
"eval_len": [1000, 435, 82, 1000, 1000, 219, 1000, 283, 312, 57]}

 62%|██████▏   | 619997/1000000 [7:21:00<3:22:28, 31.28it/s]global step 620000, trans_decision ep_re 217.90261281895647

{"global_step": 620000, "eval_re": [163.03154022749314, 53.37888901578246, 
310.862609448315, 338.53553421088395, 218.97106917806866, 127.63007509664914, 
439.6646527700342, 462.9687598169586, 51.24703620030792, 12.735962225071354], 
"eval_len": [436, 304, 1000, 1000, 712, 438, 1000, 1000, 114, 60]}

 63%|██████▎   | 629997/1000000 [7:28:20<3:15:42, 31.51it/s]global step 630000, trans_decision ep_re 180.06788599941078

{"global_step": 630000, "eval_re": [209.19974932373424, 315.0426324323114, 
235.68073786596256, 90.31091046755368, 274.229946442772, 6.840597316070184, 
124.07264110597048, 321.12765841710836, 13.0655070340004, 211.1084795886244], 
"eval_len": [591, 612, 435, 117, 546, 54, 252, 493, 50, 455]}

 64%|██████▍   | 639997/1000000 [7:35:20<3:08:14, 31.87it/s]global step 640000, trans_decision ep_re 148.20159320878966

{"global_step": 640000, "eval_re": [204.00339963167585, 137.35687147618322, 
289.9272379322541, 232.3666036749062, 15.974543878775542, 62.0015120133735, 
471.796632624895, 14.746719813646088, 46.08832012925214, 7.754090912934857], 
"eval_len": [1000, 258, 1000, 369, 26, 186, 1000, 189, 47, 28]}

 65%|██████▍   | 649997/1000000 [7:42:00<3:04:21, 31.64it/s]global step 650000, trans_decision ep_re 55.01141496316196

{"global_step": 650000, "eval_re": [15.050191466806126, 163.67563433241335, 
2.320412708821974, 47.276925872732285, 10.140928231642828, -4.854640810020048, 
110.28996950115776, -9.742917389848742, 150.06090584815018, 65.89673986976386], 
"eval_len": [43, 192, 60, 111, 24, 186, 179, 52, 1000, 238]}

 66%|██████▌   | 659999/1000000 [7:49:10<2:57:55, 31.85it/s]global step 660000, trans_decision ep_re 192.9665275388216

{"global_step": 660000, "eval_re": [27.656659008780437, 47.972614579532376, 
145.91098976526612, 30.841425296872107, 447.3301469155355, 474.9588060405696, 
28.21429869653779, 512.0333867808927, 18.72970924654306, 196.01723905768648], 
"eval_len": [24, 72, 194, 29, 771, 1000, 27, 1000, 27, 313]}

 67%|██████▋   | 669997/1000000 [7:56:10<2:53:42, 31.66it/s]global step 670000, trans_decision ep_re 245.80474421978715

{"global_step": 670000, "eval_re": [313.8124490661781, 196.91775772105441, 
331.53862835666035, 189.46491662758433, 12.142550997343614, 138.8743469101848, 
174.54090046497197, 403.2596137138484, 415.65544971028953, 281.8408286297561], 
"eval_len": [1000, 1000, 1000, 379, 22, 595, 355, 1000, 843, 537]}

 68%|██████▊   | 679997/1000000 [8:03:10<2:43:17, 32.66it/s]global step 680000, trans_decision ep_re 205.1554609881427

{"global_step": 680000, "eval_re": [96.99607688135525, 345.18884700373457, 
323.97356286409456, 66.66188705113193, 102.72118304167836, 374.7193536216859, 
300.3949697722191, 186.4517013658288, 125.76502018011911, 128.68200809957986], 
"eval_len": [168, 748, 770, 124, 157, 1000, 1000, 279, 160, 206]}

 69%|██████▉   | 689997/1000000 [8:10:10<2:42:31, 31.79it/s]global step 690000, trans_decision ep_re 156.7215498185836

{"global_step": 690000, "eval_re": [129.84482213773245, 139.41053754457874, 
15.229460840264514, 167.3537246564323, 304.57800713423677, 115.11111939764544, 
73.51054249658651, 100.89301990699094, 258.00374390568544, 263.28052016568284], 
"eval_len": [274, 285, 30, 296, 1000, 202, 297, 164, 1000, 1000]}

 70%|██████▉   | 699997/1000000 [8:17:20<2:37:37, 31.72it/s]global step 700000, trans_decision ep_re 142.4888058516229

{"global_step": 700000, "eval_re": [236.998632591852, 40.75528436437098, 
208.1473108923704, 289.3427211567334, 8.092968680587163, 304.1823323411011, 
85.6217232047802, 168.46302332711352, 32.15069807161083, 51.13336388570978], 
"eval_len": [343, 43, 1000, 1000, 23, 1000, 282, 312, 93, 75]}

 71%|███████   | 709997/1000000 [8:24:20<2:30:47, 32.05it/s]global step 710000, trans_decision ep_re 164.68205978217193

{"global_step": 710000, "eval_re": [46.92996067797092, 479.8261136863744, 
335.0905314934608, 45.643277850582656, 39.27317775720425, 43.53998740821257, 
120.76674401480369, 220.42404797397256, 72.57289749454395, 242.7538594645935], 
"eval_len": [94, 796, 780, 222, 68, 55, 236, 1000, 138, 1000]}

 72%|███████▏  | 719999/1000000 [8:31:20<2:26:19, 31.89it/s]global step 720000, trans_decision ep_re 139.0031310527682

{"global_step": 720000, "eval_re": [68.13973564631569, 25.024132682224877, 
24.908514703033745, 37.62923219735846, 28.93430269216665, 467.9219261474269, 
75.75948067070861, 424.73939393179177, 217.1549424459482, 19.819649410707154], 
"eval_len": [144, 38, 34, 83, 151, 930, 194, 1000, 405, 133]}

 73%|███████▎  | 729997/1000000 [8:38:20<2:20:30, 32.03it/s]global step 730000, trans_decision ep_re 160.65812722757374

{"global_step": 730000, "eval_re": [4.38263611551222, 544.6023114452059, 
198.295098191128, 288.8195661997707, 91.54975938860949, 7.649405426026377, 
13.346548901437767, 121.54579283032487, 267.03113848655335, 69.35901529116869], 
"eval_len": [37, 837, 343, 464, 170, 25, 28, 307, 1000, 142]}

 74%|███████▍  | 739997/1000000 [8:45:20<2:15:57, 31.87it/s]global step 740000, trans_decision ep_re 145.97896076863137

{"global_step": 740000, "eval_re": [231.31594737422262, 22.4762505711174, 
246.8047402350042, 6.739821746154947, 207.55250202501378, 8.338170224034997, 
96.7164546637384, 32.97121890576548, 164.17551426260758, 442.6989876786545], 
"eval_len": [1000, 26, 396, 54, 1000, 26, 273, 55, 1000, 931]}

 75%|███████▍  | 749997/1000000 [8:52:20<2:11:12, 31.76it/s]global step 750000, trans_decision ep_re 312.0148627679376

{"global_step": 750000, "eval_re": [542.3264292621305, 345.6087143411981, 
103.44537011089278, 354.4137426784711, 497.8343916522019, 29.656340944848647, 
170.25955915360052, 187.63703859406178, 551.4799240616567, 337.4871168803139], 
"eval_len": [1000, 1000, 194, 1000, 1000, 72, 254, 302, 1000, 1000]}

 76%|███████▌  | 759997/1000000 [8:59:30<2:05:05, 31.98it/s]global step 760000, trans_decision ep_re 146.1751695752284

{"global_step": 760000, "eval_re": [195.0286288468765, 130.7600129681262, 
150.98924217049716, 222.3904818910867, 312.99161576383995, 28.368709649625075, 
22.079404799880272, 113.28933860292011, 73.69152581261208, 212.16273524682], 
"eval_len": [495, 272, 257, 1000, 1000, 34, 28, 174, 120, 1000]}

 77%|███████▋  | 769997/1000000 [9:06:30<2:01:14, 31.62it/s]global step 770000, trans_decision ep_re 196.9693061067845

{"global_step": 770000, "eval_re": [215.97459652836088, 104.1436975533703, 
159.6522228942414, 216.56938150641315, 130.35231668555778, 133.09728139031594, 
271.26362377630824, 128.8376359096423, 383.59884383413714, 226.20346098949793], 
"eval_len": [1000, 320, 275, 461, 266, 271, 473, 325, 756, 1000]}

 78%|███████▊  | 779997/1000000 [9:13:30<1:55:37, 31.71it/s]global step 780000, trans_decision ep_re 248.32903497124306

{"global_step": 780000, "eval_re": [45.14737565456005, 564.823580670149, 
262.1295289980376, 64.31017678175216, 288.3083599866939, 242.10656655089153, 
541.134153848522, 20.773579735193497, 286.4768878962445, 168.0801395903862], 
"eval_len": [86, 1000, 1000, 131, 385, 1000, 1000, 28, 1000, 224]}

 79%|███████▉  | 789997/1000000 [9:20:40<1:49:29, 31.97it/s]global step 790000, trans_decision ep_re 132.79099019407818

{"global_step": 790000, "eval_re": [13.072473119242195, 1.5237796431877582, 
142.62762864321917, 55.018559532624344, 271.98712614600464, 35.19591961182091, 
202.95425780472834, 410.7749621868362, 30.536577145079406, 164.2186181080387], 
"eval_len": [27, 18, 459, 86, 418, 36, 503, 927, 141, 316]}

 80%|███████▉  | 799997/1000000 [9:27:40<1:43:55, 32.07it/s]global step 800000, trans_decision ep_re 168.50543675160594

{"global_step": 800000, "eval_re": [190.6063309186466, 229.36160750301397, 
11.75699918300389, 284.53518757719314, 137.53747644708452, 42.82524581115984, 
186.25952371968265, 113.8393014284579, 43.25760642846417, 445.0750884993523], 
"eval_len": [1000, 455, 12, 1000, 267, 51, 461, 290, 58, 1000]}

 81%|████████  | 809997/1000000 [9:34:40<1:40:04, 31.64it/s]global step 810000, trans_decision ep_re 220.3193790662755

{"global_step": 810000, "eval_re": [111.1256099335828, 294.0563367127122, 
233.0871977136368, 283.14223920418567, 340.075769359064, 198.57548710003093, 
172.51244318629224, 544.8130621289339, 12.866278424658528, 12.939366899658102], 
"eval_len": [1000, 760, 451, 1000, 1000, 345, 1000, 1000, 26, 27]}

 82%|████████▏ | 819997/1000000 [9:41:50<1:33:10, 32.20it/s]global step 820000, trans_decision ep_re 282.56598232738065

{"global_step": 820000, "eval_re": [310.43648494397036, 510.22903927300615, 
260.23715265775854, 417.2108662090006, 262.4060451270718, 288.2462162648478, 
31.184070587044797, 368.49009333514806, 292.82720974696684, 84.39264512899157], 
"eval_len": [1000, 1000, 1000, 903, 537, 1000, 159, 605, 565, 1000]}

 83%|████████▎ | 829997/1000000 [9:49:00<1:28:26, 32.03it/s]global step 830000, trans_decision ep_re 256.609619377056

{"global_step": 830000, "eval_re": [496.2026343505608, 249.28099241662613, 
222.85060016498824, 164.17275497979438, 251.7929515105621, 254.1989793865968, 
522.5877072823703, 21.997549205366482, 283.3599089646835, 99.65211550901113], 
"eval_len": [935, 1000, 1000, 1000, 1000, 1000, 922, 123, 516, 144]}

 84%|████████▍ | 839997/1000000 [9:56:10<1:23:55, 31.77it/s]global step 840000, trans_decision ep_re 234.26545074704853

{"global_step": 840000, "eval_re": [519.7277319030607, 379.9757386377206, 
391.37728190251204, 338.61814181397597, 16.185568096492858, 27.998556299682438, 
173.43981769273145, 78.14325574182578, 363.1213623602631, 54.06705302222069], 
"eval_len": [1000, 1000, 1000, 789, 41, 164, 455, 75, 1000, 239]}

 85%|████████▍ | 849997/1000000 [10:03:10<1:17:58, 32.06it/s]global step 850000, trans_decision ep_re 100.87955702878853

{"global_step": 850000, "eval_re": [1.5255166618109972, 90.14801771182499, 
239.51648776840003, 8.612404555044739, 246.85492595407334, 4.928077768792306, 
149.49124293336357, -26.197574146772006, -14.367787199580535, 
308.2842582809278], "eval_len": [26, 263, 1000, 27, 504, 1000, 239, 439, 1000, 
1000]}

 86%|████████▌ | 859997/1000000 [10:10:20<1:12:41, 32.10it/s]global step 860000, trans_decision ep_re 192.58395942073724

{"global_step": 860000, "eval_re": [113.50827277690517, 154.97843272025912, 
211.9285074128339, 549.7512819635583, 360.6665435315608, 61.506612093348316, 
266.0932796451296, 98.13001620262735, 0.02170179415186302, 109.25494606699812], 
"eval_len": [197, 317, 461, 1000, 1000, 72, 1000, 204, 26, 119]}

 87%|████████▋ | 869997/1000000 [10:17:20<1:07:59, 31.87it/s]global step 870000, trans_decision ep_re 322.78007968381473

{"global_step": 870000, "eval_re": [371.4240726539636, 196.21061753819203, 
418.9292760355376, 351.4058481018963, 239.36504053552144, 532.9597654894792, 
393.61657949722854, 414.31826500725754, 270.6046862969212, 38.9666456821502], 
"eval_len": [1000, 485, 1000, 1000, 696, 1000, 899, 1000, 515, 78]}

 88%|████████▊ | 879997/1000000 [10:24:30<1:02:56, 31.77it/s]global step 880000, trans_decision ep_re 201.80235836564987

{"global_step": 880000, "eval_re": [310.4726612135718, 276.2792854283291, 
182.2893160497947, 28.011042461807502, 274.517537767183, 193.94444174847976, 
-0.8232721150662159, 262.2817380948712, 102.94231120384394, 388.1085218036839], 
"eval_len": [1000, 1000, 1000, 26, 1000, 1000, 20, 723, 245, 1000]}

 89%|████████▉ | 889997/1000000 [10:31:40<56:55, 32.21it/s]global step 890000, trans_decision ep_re 251.25610944118316

{"global_step": 890000, "eval_re": [474.8058957027093, 296.19804043005985, 
392.9823521306488, 3.821408312110681, 270.6125952546289, 440.5400874278922, 
269.55573997447544, 28.785192118969363, 279.21788536497223, 56.041897695365016],
"eval_len": [1000, 622, 835, 37, 427, 716, 456, 49, 1000, 135]}

 90%|████████▉ | 899997/1000000 [10:38:40<51:47, 32.18it/s]global step 900000, trans_decision ep_re 217.11374812039654

{"global_step": 900000, "eval_re": [30.474256151730646, 143.90149608720907, 
45.183216137201235, 532.629202290576, 267.73350683206064, 333.39631813175714, 
21.369042785869144, 391.8162823938557, 352.31687289175056, 52.31728750195505], 
"eval_len": [76, 436, 80, 1000, 1000, 1000, 34, 915, 1000, 135]}

 91%|█████████ | 909997/1000000 [10:45:50<47:37, 31.50it/s]global step 910000, trans_decision ep_re 203.00701434688065

{"global_step": 910000, "eval_re": [94.88795481956583, 199.60106116812932, 
263.33757561840565, 61.20263345808027, 40.17396676477706, 334.3141654210247, 
341.0899874473258, 239.9978272671907, 168.74670081866464, 286.7182706856426], 
"eval_len": [189, 1000, 1000, 200, 90, 924, 814, 1000, 1000, 1000]}

 92%|█████████▏| 919997/1000000 [10:53:00<42:03, 31.70it/s]global step 920000, trans_decision ep_re 234.8702807098469

{"global_step": 920000, "eval_re": [200.19021406300018, 83.575612193323, 
445.1649546355376, 40.734091747043934, 5.00841164639394, 247.1394135006258, 
413.36721770375254, 429.5000623447367, 248.99904556304523, 235.0237837010102], 
"eval_len": [1000, 169, 1000, 113, 93, 1000, 1000, 1000, 1000, 782]}

 93%|█████████▎| 929997/1000000 [11:00:10<36:23, 32.06it/s]global step 930000, trans_decision ep_re 239.15393719640662

{"global_step": 930000, "eval_re": [367.8637380369568, 535.8623333332689, 
30.304863480858828, 435.4731654712578, 9.3493898499445, 302.1050467364281, 
105.35930133200125, 238.40552883346334, 348.6724103220812, 18.1435945678053], 
"eval_len": [730, 1000, 176, 1000, 14, 1000, 239, 1000, 1000, 44]}

 94%|█████████▍| 939997/1000000 [11:07:10<31:27, 31.79it/s]global step 940000, trans_decision ep_re 127.31913830088281

{"global_step": 940000, "eval_re": [198.86590168711837, 16.11813882642643, 
288.64845734233506, 68.61736136241711, 0.9149909161380017, -61.415477152880435, 
66.15547742870604, 56.83018198691068, 192.80198754116196, 445.65436307049504], 
"eval_len": [326, 150, 468, 121, 23, 1000, 106, 130, 427, 736]}

 95%|█████████▍| 949997/1000000 [11:14:10<26:03, 31.99it/s]global step 950000, trans_decision ep_re 187.42537451060426

{"global_step": 950000, "eval_re": [232.01009339785585, 33.10241389708802, 
189.93571229624462, 31.069193834075836, 162.40695212607076, 315.6178358117153, 
493.2611401110445, 293.95592064377433, 61.18696827493115, 61.70751471324235], 
"eval_len": [454, 86, 462, 42, 466, 698, 1000, 576, 221, 146]}

 96%|█████████▌| 959997/1000000 [11:21:20<21:06, 31.60it/s]global step 960000, trans_decision ep_re 199.72962055410113

{"global_step": 960000, "eval_re": [155.6768663637257, 363.40225360993674, 
203.49935225885096, 533.0701056099492, 7.9455898984819395, 253.75251339166448, 
-3.6661550099047835, 153.73411641629994, 118.52144211631317, 211.360120885694], 
"eval_len": [279, 1000, 394, 1000, 9, 471, 23, 1000, 1000, 366]}

 97%|█████████▋| 969997/1000000 [11:28:20<15:43, 31.79it/s]global step 970000, trans_decision ep_re 268.2563244225468

{"global_step": 970000, "eval_re": [36.56452551691318, 533.0638963562357, 
3.671708951508707, 329.6437245770594, 396.33867142318707, 437.09747999336497, 
55.36360011119448, 264.2787033510746, 217.70053294834796, 408.84040099658154], 
"eval_len": [106, 1000, 58, 1000, 1000, 1000, 125, 632, 1000, 1000]}

 98%|█████████▊| 979997/1000000 [11:35:30<10:23, 32.07it/s]global step 980000, trans_decision ep_re 156.047022688432

{"global_step": 980000, "eval_re": [171.66190240285013, 322.3213725247852, 
113.86538200835255, 3.4118028856202374, 3.0077481964356974, 55.20095505039073, 
288.6900597006164, 214.66967541206853, 232.380166297396, 155.26116240580478], 
"eval_len": [477, 1000, 205, 14, 27, 132, 1000, 1000, 1000, 485]}

 99%|█████████▉| 989997/1000000 [11:42:40<05:13, 31.92it/s]global step 990000, trans_decision ep_re 271.37219810825655

{"global_step": 990000, "eval_re": [330.94317493810087, 327.3358992165146, 
60.907051077835234, 555.0365541849942, 340.381955606296, 9.196852351360539, 
367.92097644950496, 381.06871987435994, 214.7189656032743, 126.21183178032511], 
"eval_len": [1000, 652, 93, 1000, 1000, 26, 1000, 727, 525, 377]}

100%|█████████▉| 999997/1000000 [11:49:40<00:00, 31.51it/s]global step 1000000, trans_decision ep_re 202.67901017056727

{"global_step": 1000000, "eval_re": [569.5436496836608, 196.09538215647973, 
151.13823834754214, 243.8074648438742, 417.0202676797818, 4.755932658470265, 
14.816699065983496, 136.8014561035584, 174.48617830599463, 118.32483286032742], 
"eval_len": [1000, 1000, 295, 428, 1000, 45, 43, 241, 258, 188]}

100%|██████████| 1000000/1000000 [11:49:53<00:00, 23.48it/s]
