
{
    'exp_name': 'VDPO',
    'env': 'Humanoid-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'markov(4, 32, [[249, 1], [1, 31]])',
    'noise': 0.1
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9998/1000000 [06:30<14:44:53, 18.65it/s]global step 10000, trans_decision ep_re 198.28491687512255

{"global_step": 10000, "eval_re": [175.8363798211711, 141.17937173070666, 
191.43769704804802, 108.90584249628628, 147.0044405503575, 188.2696358292344, 
123.59330207562651, 173.32770366989305, 363.72508545869795, 369.56971007120416],
"eval_len": [34, 27, 37, 21, 28, 36, 24, 33, 67, 72]}

  2%|▏         | 19998/1000000 [18:50<14:45:13, 18.45it/s]global step 20000, trans_decision ep_re 153.3306350318975

{"global_step": 20000, "eval_re": [118.0932677221065, 103.18615019160413, 
119.6237563418315, 145.56388879459143, 304.7840287512561, 145.22179189038124, 
185.80520675713603, 157.4397156370377, 114.12963030294415, 139.45891393008608], 
"eval_len": [23, 20, 23, 28, 58, 28, 35, 30, 22, 27]}

  3%|▎         | 29998/1000000 [31:20<14:36:56, 18.44it/s]global step 30000, trans_decision ep_re 161.76083441281375

{"global_step": 30000, "eval_re": [313.01368049934587, 140.18708903940492, 
167.32917432740825, 158.03968374036972, 163.28990537162778, 146.89650216413906, 
119.68353316205507, 114.72565104965895, 126.47489397893024, 167.9682307951975], 
"eval_len": [59, 27, 32, 31, 31, 28, 23, 22, 24, 32]}

  4%|▍         | 39998/1000000 [43:40<14:24:36, 18.51it/s]global step 40000, trans_decision ep_re 177.22686059740835

{"global_step": 40000, "eval_re": [350.2578940714168, 119.81021266250607, 
189.61898679524526, 133.79921291615057, 114.86758178367747, 186.50289981108037, 
141.3174579163844, 129.99971089718827, 171.02517518334514, 235.06947393708904], 
"eval_len": [68, 23, 36, 26, 22, 36, 27, 25, 33, 44]}

  5%|▍         | 49998/1000000 [56:10<14:16:28, 18.49it/s]global step 50000, trans_decision ep_re 188.46238560854096

{"global_step": 50000, "eval_re": [141.15510812108647, 142.23452950942934, 
135.0050225821599, 157.6217748478738, 179.10581590447683, 390.89921511757814, 
146.49060307872645, 119.81526600000043, 340.9269397552307, 131.3695811688472], 
"eval_len": [27, 27, 26, 30, 34, 71, 28, 23, 67, 25]}

  6%|▌         | 59998/1000000 [1:08:40<14:10:29, 18.42it/s]global step 60000, trans_decision ep_re 202.5783434640336

{"global_step": 60000, "eval_re": [277.72680240191244, 187.7589752206529, 
131.40895798792312, 442.8265319427843, 108.88152242888889, 190.39954703439656, 
175.53408745417005, 141.02194673618664, 168.52059981483347, 201.70446361858768],
"eval_len": [54, 36, 25, 82, 21, 37, 34, 27, 32, 38]}

  7%|▋         | 69998/1000000 [1:21:00<14:02:54, 18.39it/s]global step 70000, trans_decision ep_re 142.73909697195023

{"global_step": 70000, "eval_re": [135.78311808399073, 179.28243621596596, 
120.91789044929514, 156.60931788113126, 155.72613257888142, 156.34887116933984, 
139.63723938016204, 124.54381384834208, 114.46003288801647, 144.08211722437716],
"eval_len": [26, 34, 23, 30, 30, 30, 27, 24, 22, 28]}

  8%|▊         | 79998/1000000 [1:33:30<13:50:49, 18.46it/s]global step 80000, trans_decision ep_re 193.4891715393634

{"global_step": 80000, "eval_re": [178.093429933175, 158.03814154057696, 
103.86528511421963, 193.4753931219573, 146.04828509234966, 379.8167353033688, 
168.53907070107076, 145.5701284381895, 330.70274181775994, 130.7425043309667], 
"eval_len": [34, 30, 20, 37, 28, 70, 32, 28, 63, 25]}

  9%|▉         | 89998/1000000 [1:45:50<13:45:45, 18.37it/s]global step 90000, trans_decision ep_re 162.7112863280752

{"global_step": 90000, "eval_re": [177.44292695392042, 134.85648261510966, 
157.83965610825905, 149.92612324783244, 129.34877274706557, 175.01548716776736, 
155.6680582386374, 275.92529189887955, 161.73572829039344, 109.35433601288722], 
"eval_len": [34, 26, 30, 29, 25, 34, 30, 52, 31, 21]}

 10%|▉         | 99998/1000000 [1:58:20<13:35:20, 18.40it/s]global step 100000, trans_decision ep_re 157.2121667618996

{"global_step": 100000, "eval_re": [124.6648313494399, 114.06103691411835, 
189.17788012773192, 266.6134831229505, 119.79942785069035, 191.12491579818013, 
131.63656040653308, 175.4675099380108, 145.53712823155968, 114.03889387978143], 
"eval_len": [24, 22, 36, 51, 23, 36, 25, 34, 28, 22]}

 11%|█         | 109998/1000000 [2:10:40<13:23:51, 18.45it/s]global step 110000, trans_decision ep_re 156.13753153144623

{"global_step": 110000, "eval_re": [157.6905908204675, 119.5610432731596, 
193.31588208087106, 145.5503587122782, 211.09494167026003, 197.18603666733907, 
129.70464940501193, 152.32121659329297, 130.39970264377519, 124.5508934480069], 
"eval_len": [30, 23, 37, 28, 41, 38, 25, 29, 25, 24]}

 12%|█▏        | 119998/1000000 [2:23:10<13:17:04, 18.40it/s]global step 120000, trans_decision ep_re 262.74271718951

{"global_step": 120000, "eval_re": [380.0039182784626, 179.8481171506001, 
185.9674592175335, 124.7277599484092, 187.1206247106244, 130.79479381956594, 
532.4076845654951, 378.5875416803347, 166.45937274189677, 361.5098997821778], 
"eval_len": [70, 35, 36, 24, 36, 25, 97, 70, 32, 73]}

 13%|█▎        | 129998/1000000 [2:35:30<13:00:54, 18.57it/s]global step 130000, trans_decision ep_re 177.01589644860593

{"global_step": 130000, "eval_re": [119.62381203597027, 164.8420101779331, 
140.14967175076958, 168.5756664574641, 233.32876335029954, 136.46723107246808, 
150.92247720915506, 119.73134653511238, 333.3543221990424, 203.16366369784458], 
"eval_len": [23, 32, 27, 33, 44, 26, 29, 23, 62, 39]}

 14%|█▍        | 139998/1000000 [2:48:00<12:49:37, 18.62it/s]global step 140000, trans_decision ep_re 241.06177147995353

{"global_step": 140000, "eval_re": [120.35181387486732, 446.9857680394164, 
147.86459933364432, 190.23755025676545, 526.1324727624319, 212.51856435118682, 
152.59395951334585, 320.8573382283113, 162.38136004941353, 130.69428839015242], 
"eval_len": [23, 81, 28, 36, 100, 40, 29, 58, 31, 25]}

 15%|█▍        | 149998/1000000 [3:00:20<12:44:55, 18.52it/s]global step 150000, trans_decision ep_re 180.65256220123425

{"global_step": 150000, "eval_re": [183.13656228276113, 109.32350492271347, 
429.72466418418054, 171.68843068724067, 168.7492218133162, 131.10875174901528, 
150.5094664595044, 103.07366819528855, 188.3838587235408, 170.8274929947812], 
"eval_len": [35, 21, 80, 33, 33, 25, 29, 20, 36, 33]}

 16%|█▌        | 159998/1000000 [3:12:50<12:38:49, 18.45it/s]global step 160000, trans_decision ep_re 153.3997845978459

{"global_step": 160000, "eval_re": [104.02008772953447, 131.7637914050514, 
147.50458027846534, 140.68008710257598, 152.75146263571426, 135.40114038166186, 
192.80197632557432, 213.36300868525976, 195.93463878678998, 119.77707264783166],
"eval_len": [20, 25, 28, 27, 29, 26, 37, 40, 38, 23]}

 17%|█▋        | 169998/1000000 [3:25:10<12:23:34, 18.60it/s]global step 170000, trans_decision ep_re 152.72273029681975

{"global_step": 170000, "eval_re": [154.02229451519855, 152.39438707663268, 
140.2438185656174, 119.31504307033393, 178.75740786732288, 174.4293750571363, 
108.69041909289564, 172.07763458628702, 148.21663713332487, 179.08028600344818],
"eval_len": [30, 29, 27, 23, 34, 34, 21, 33, 29, 34]}

 18%|█▊        | 179998/1000000 [3:37:40<12:31:02, 18.20it/s]global step 180000, trans_decision ep_re 152.02788409247447

{"global_step": 180000, "eval_re": [267.36138152455607, 113.6537355865406, 
149.62818853711968, 157.69586504984042, 136.40264555978683, 125.54134841414395, 
135.52425807302953, 159.19919192240803, 133.78196326512236, 141.4902629921972], 
"eval_len": [50, 22, 29, 30, 26, 24, 26, 31, 26, 27]}

 19%|█▉        | 189998/1000000 [3:50:10<12:16:27, 18.33it/s]global step 190000, trans_decision ep_re 182.79221103938417

{"global_step": 190000, "eval_re": [348.42716676520934, 154.8416763571677, 
185.71136863883058, 130.4312904496003, 193.6712388550255, 146.28892338349, 
125.50783361794716, 114.02576722420913, 293.1273973404959, 135.88944776186605], 
"eval_len": [66, 30, 36, 25, 37, 28, 24, 22, 58, 26]}

 20%|█▉        | 199998/1000000 [4:02:30<11:55:37, 18.63it/s]global step 200000, trans_decision ep_re 152.74260857453788

{"global_step": 200000, "eval_re": [125.08063152842874, 145.14269487216728, 
146.70813625858975, 120.35500002217132, 157.94855252033557, 333.1203531930881, 
135.72540143341848, 128.81595249521615, 130.94002870886078, 103.58933471310272],
"eval_len": [24, 28, 28, 23, 30, 64, 26, 25, 25, 20]}

 21%|██        | 209998/1000000 [4:15:00<11:56:24, 18.38it/s]global step 210000, trans_decision ep_re 161.31205871682542

{"global_step": 210000, "eval_re": [161.54314608829864, 172.6045660307067, 
160.5692451365055, 188.7285752987328, 176.84620476282112, 192.90827436485552, 
130.5335575503041, 130.0375733960224, 180.42579903238152, 118.92364550762593], 
"eval_len": [31, 33, 31, 37, 34, 37, 25, 25, 34, 23]}

 22%|██▏       | 219998/1000000 [4:27:20<11:44:04, 18.46it/s]global step 220000, trans_decision ep_re 188.970921255793

{"global_step": 220000, "eval_re": [211.92270277346398, 125.00947319892701, 
320.1087792760991, 135.21769600716146, 145.026316624688, 161.7399166348515, 
219.72575409631375, 180.47719508018554, 190.75319516165814, 199.72818370458162],
"eval_len": [40, 24, 60, 26, 28, 31, 43, 35, 37, 40]}

 23%|██▎       | 229998/1000000 [4:39:50<11:32:25, 18.53it/s]global step 230000, trans_decision ep_re 226.54953173404002

{"global_step": 230000, "eval_re": [129.8292955621165, 159.3990769014063, 
183.1738034650876, 135.07998581942047, 151.0570624365236, 167.56613524654307, 
141.72876163330184, 388.0664896173375, 416.448725813747, 393.14598084491604], 
"eval_len": [25, 31, 35, 26, 29, 32, 27, 72, 75, 72]}

 24%|██▍       | 239998/1000000 [4:52:10<11:26:28, 18.45it/s]global step 240000, trans_decision ep_re 149.90827892116485

{"global_step": 240000, "eval_re": [157.2221058886911, 150.46960434256928, 
130.10323722930417, 171.72878400618688, 134.5245846064258, 160.60859220222164, 
119.56267911391743, 140.12176654039342, 162.1415511067515, 172.59988417518727], 
"eval_len": [30, 29, 25, 33, 26, 31, 23, 27, 31, 33]}

 25%|██▍       | 249998/1000000 [5:04:40<11:12:47, 18.58it/s]global step 250000, trans_decision ep_re 148.68692757833432

{"global_step": 250000, "eval_re": [139.50189841283648, 148.94868873611105, 
197.2779243255676, 136.61640628504557, 120.2912568533664, 146.29812192963433, 
175.81490432071803, 135.37799878001587, 114.24602020958613, 172.4960559304617], 
"eval_len": [27, 29, 38, 26, 23, 28, 34, 26, 22, 33]}

 26%|██▌       | 259998/1000000 [5:17:00<11:02:40, 18.61it/s]global step 260000, trans_decision ep_re 156.3484079568332

{"global_step": 260000, "eval_re": [174.49256217228648, 108.90399869601933, 
155.87833856471653, 346.1255021881089, 125.4908428207637, 120.52463820940999, 
168.05999414188548, 119.43696868429399, 119.77448314709062, 124.79675094375673],
"eval_len": [34, 21, 30, 65, 24, 23, 32, 23, 23, 24]}

 27%|██▋       | 269998/1000000 [5:29:20<10:48:39, 18.76it/s]global step 270000, trans_decision ep_re 178.13721965566066

{"global_step": 270000, "eval_re": [139.76378845310938, 348.9051918911211, 
150.76980832176233, 195.6473027409817, 135.0048112809165, 162.03304111312187, 
167.46818902077243, 126.40353223870495, 161.64408232484456, 193.7324491712719], 
"eval_len": [27, 68, 29, 37, 26, 31, 32, 24, 31, 37]}

 28%|██▊       | 279998/1000000 [5:41:40<10:48:03, 18.52it/s]global step 280000, trans_decision ep_re 166.5596012505692

{"global_step": 280000, "eval_re": [172.73708620210206, 144.0257078134486, 
124.75797230709875, 175.3461419958901, 150.57296630901843, 145.95020579943775, 
252.3777351526356, 154.0061220277034, 179.41715486298915, 166.4049200353679], 
"eval_len": [33, 28, 24, 34, 29, 28, 48, 30, 35, 32]}

 29%|██▉       | 289998/1000000 [5:54:00<10:37:38, 18.56it/s]global step 290000, trans_decision ep_re 178.61679181316043

{"global_step": 290000, "eval_re": [136.66591390397528, 108.35675570961574, 
145.8484368233852, 141.03693412169864, 125.41552391902678, 124.97636344259044, 
192.05571224161164, 364.55193681041044, 278.75845400791246, 168.5018871513778], 
"eval_len": [26, 21, 28, 27, 24, 24, 37, 71, 52, 32]}

 30%|██▉       | 299998/1000000 [6:06:30<10:27:50, 18.58it/s]global step 300000, trans_decision ep_re 162.3963230239555

{"global_step": 300000, "eval_re": [174.9166947677988, 166.42751767269053, 
150.79401826461273, 151.6188890588671, 200.2596984758341, 167.89827956759328, 
141.72234077076425, 174.6307200030082, 181.64817728037013, 114.04689437801596], 
"eval_len": [33, 32, 29, 29, 39, 32, 27, 34, 35, 22]}

 31%|███       | 309998/1000000 [6:18:50<10:19:38, 18.56it/s]global step 310000, trans_decision ep_re 220.02870123490487

{"global_step": 310000, "eval_re": [358.1987483811388, 159.0383424570757, 
156.6697870063133, 120.57573399074494, 136.1016958018433, 146.52036628858968, 
170.0093305769567, 306.0558396446729, 480.37233755156285, 166.7448306501505], 
"eval_len": [67, 31, 30, 23, 26, 28, 33, 58, 98, 32]}

 32%|███▏      | 319998/1000000 [6:31:10<10:08:05, 18.64it/s]global step 320000, trans_decision ep_re 168.19766736902653

{"global_step": 320000, "eval_re": [156.62139749074248, 163.69991575009166, 
134.10379744550153, 139.86804872265424, 156.74967164909117, 114.50852633517887, 
153.82468217809446, 177.23070568667993, 346.34988207033285, 139.02004636189815],
"eval_len": [30, 31, 26, 27, 30, 22, 30, 34, 64, 27]}

 33%|███▎      | 329998/1000000 [6:43:30<10:03:49, 18.49it/s]global step 330000, trans_decision ep_re 169.7920151622946

{"global_step": 330000, "eval_re": [139.77271674828398, 170.39488799558194, 
113.78178955374382, 155.38522039597729, 331.6313723525427, 147.88522564301525, 
166.44946047776983, 194.2928052760518, 102.63401774215205, 175.6926554378277], 
"eval_len": [27, 33, 22, 30, 63, 28, 32, 37, 20, 34]}

 34%|███▍      | 339998/1000000 [6:55:50<9:52:27, 18.57it/s]global step 340000, trans_decision ep_re 230.52439462732576

{"global_step": 340000, "eval_re": [130.050612109494, 125.23533928765244, 
748.4522315172143, 167.43183606294775, 114.54889703053261, 129.55529735361043, 
205.52727345930674, 158.85984040946533, 350.9806718267597, 174.60194721627417], 
"eval_len": [25, 24, 141, 32, 22, 25, 39, 31, 66, 33]}

 35%|███▍      | 349998/1000000 [7:08:10<9:40:13, 18.67it/s]global step 350000, trans_decision ep_re 160.3371005308942

{"global_step": 350000, "eval_re": [140.90470115300496, 114.19044052327348, 
120.01230781263475, 118.90711984456708, 268.4485646084899, 108.33631142165193, 
120.31892184617739, 125.18777490169256, 157.6525092785551, 329.41235391889467], 
"eval_len": [27, 22, 23, 23, 54, 21, 23, 24, 30, 64]}

 36%|███▌      | 359998/1000000 [7:20:30<9:38:45, 18.43it/s]global step 360000, trans_decision ep_re 148.8374774238614

{"global_step": 360000, "eval_re": [130.41858478261574, 255.2947167705403, 
119.97836260024926, 135.77161683203505, 103.34542230075822, 155.86790725233385, 
201.96090341189435, 125.17210321041156, 130.30187360392185, 130.26328347385376],
"eval_len": [25, 48, 23, 26, 20, 30, 39, 24, 25, 25]}

 37%|███▋      | 369998/1000000 [7:32:50<9:13:41, 18.96it/s]global step 370000, trans_decision ep_re 192.0051056666302

{"global_step": 370000, "eval_re": [119.82185063826913, 176.6590977419204, 
193.84542868200603, 262.85803011059164, 148.25928633516386, 188.83094084730394, 
471.4192080324875, 141.44052326490046, 119.95835767520181, 96.95833333845705], 
"eval_len": [23, 34, 37, 50, 28, 36, 91, 27, 23, 19]}

 38%|███▊      | 379998/1000000 [7:45:10<9:16:43, 18.56it/s]global step 380000, trans_decision ep_re 163.50092905056448

{"global_step": 380000, "eval_re": [189.31431019299788, 267.8986781430762, 
163.0163260471941, 114.25214006261787, 164.94827532324572, 136.49919727359853, 
140.9968787286815, 196.7623798184562, 115.04728763840522, 146.27381727737134], 
"eval_len": [36, 53, 31, 22, 32, 26, 27, 38, 22, 28]}

 39%|███▉      | 389998/1000000 [7:57:40<9:27:47, 17.91it/s]global step 390000, trans_decision ep_re 188.52774355306477

{"global_step": 390000, "eval_re": [160.93365587130995, 296.06028039259854, 
301.8958724894256, 147.08013610403913, 177.7545732696428, 166.5526273120246, 
114.43396779755102, 178.0094302738525, 160.35678353095702, 182.20010848924636], 
"eval_len": [31, 58, 59, 28, 34, 32, 22, 34, 31, 35]}

 40%|███▉      | 399998/1000000 [8:10:20<9:07:07, 18.28it/s]global step 400000, trans_decision ep_re 144.72645346695955

{"global_step": 400000, "eval_re": [181.1391985039035, 172.9785342682618, 
161.32139294142564, 164.44702331345795, 108.35960524803764, 109.11567015777783, 
154.45281206922547, 135.80043278795978, 129.61926351168148, 130.03060186786448],
"eval_len": [35, 33, 31, 32, 21, 21, 30, 26, 25, 25]}

 41%|████      | 409998/1000000 [8:22:50<8:59:50, 18.22it/s]global step 410000, trans_decision ep_re 170.49525344291803

{"global_step": 410000, "eval_re": [152.55604608184743, 151.29525277584005, 
170.03955440311813, 144.54280358861888, 172.47301100819286, 124.71389068067751, 
130.41332473336126, 150.4605822119944, 341.7591753083075, 166.69889363722214], 
"eval_len": [29, 29, 33, 28, 33, 24, 25, 29, 65, 32]}

 42%|████▏     | 419998/1000000 [8:35:20<8:46:55, 18.35it/s]global step 420000, trans_decision ep_re 205.9374865978404

{"global_step": 420000, "eval_re": [151.68800985688682, 139.98521837640786, 
481.70599193948163, 173.28853309314974, 175.020505205123, 135.6248190268991, 
119.69534006005725, 166.88887605278885, 180.439586226735, 335.03798614087503], 
"eval_len": [29, 27, 89, 33, 34, 26, 23, 32, 35, 61]}

 43%|████▎     | 429998/1000000 [8:47:50<8:34:24, 18.47it/s]global step 430000, trans_decision ep_re 171.7201083317295

{"global_step": 430000, "eval_re": [119.85244237660115, 321.08401069155826, 
188.4230881538492, 119.47700093160073, 158.0088167239659, 177.19756881287967, 
188.6167524930773, 173.71182689279522, 157.26720348941868, 113.56237275154889], 
"eval_len": [23, 63, 36, 23, 30, 34, 36, 34, 30, 22]}

 44%|████▍     | 439998/1000000 [9:00:10<8:28:52, 18.34it/s]global step 440000, trans_decision ep_re 167.25758910399492

{"global_step": 440000, "eval_re": [124.69488135671008, 296.5047820757888, 
120.12722389318664, 109.3356860878197, 144.81493304863412, 165.58870295549045, 
140.36500716605653, 120.81929839711394, 305.42096577443294, 144.90441028471577],
"eval_len": [24, 57, 23, 21, 28, 32, 27, 23, 57, 28]}

 45%|████▍     | 449998/1000000 [9:12:40<8:30:40, 17.95it/s]global step 450000, trans_decision ep_re 162.45612399266707

{"global_step": 450000, "eval_re": [135.4716267907372, 109.23026905984779, 
130.27072350833214, 171.24585389965608, 172.92590854044494, 160.48391736221714, 
306.91756352454, 125.19963845354505, 166.47309137790984, 146.34264740944047], 
"eval_len": [26, 21, 25, 33, 33, 31, 59, 24, 32, 28]}

 46%|████▌     | 459998/1000000 [9:25:10<8:14:35, 18.20it/s]global step 460000, trans_decision ep_re 138.8353574823438

{"global_step": 460000, "eval_re": [156.16845003153017, 156.33060349484933, 
125.03674752059723, 158.8387547147403, 131.40731846766815, 119.78872259053105, 
125.36711521650335, 146.4436853656235, 140.16973053931022, 128.80244688208452], 
"eval_len": [30, 30, 24, 31, 25, 23, 24, 28, 27, 25]}

 47%|████▋     | 469998/1000000 [9:37:40<8:05:57, 18.18it/s]global step 470000, trans_decision ep_re 139.08104164877363

{"global_step": 470000, "eval_re": [119.885685684214, 162.7122215836759, 
114.77528390855618, 163.01301625100956, 173.86053926146377, 163.065086545076, 
136.33073414710321, 107.80122100213543, 108.83929586660682, 140.52733223789545],
"eval_len": [23, 31, 22, 31, 34, 31, 26, 21, 21, 27]}

 48%|████▊     | 479998/1000000 [9:50:20<7:56:47, 18.18it/s]global step 480000, trans_decision ep_re 173.0959738340734

{"global_step": 480000, "eval_re": [146.1182532090011, 217.65974326535306, 
108.89571593029717, 161.93373604594612, 129.70267469564746, 130.48009827908575, 
385.4526280759796, 157.25914141710695, 147.3733551885524, 146.08439223376456], 
"eval_len": [28, 41, 21, 31, 25, 25, 74, 30, 28, 28]}

 49%|████▉     | 489998/1000000 [10:02:50<7:46:04, 18.24it/s]global step 490000, trans_decision ep_re 198.49081619378973

{"global_step": 490000, "eval_re": [152.62584898042098, 325.39302707456005, 
154.58576587220318, 309.5978114945849, 157.94345521556988, 151.2418622499471, 
190.7681712221833, 130.61702476538946, 181.530028802476, 230.60516626056227], 
"eval_len": [29, 62, 30, 59, 30, 29, 37, 25, 35, 44]}

 50%|████▉     | 499998/1000000 [10:15:20<7:38:27, 18.18it/s]global step 500000, trans_decision ep_re 142.89550043475384

{"global_step": 500000, "eval_re": [188.10001223594315, 203.946278248641, 
136.60656905474397, 108.83792086916009, 140.49459424512614, 130.37226336994152, 
120.30328039279317, 125.20073897768893, 144.77742713890706, 130.31591981459303],
"eval_len": [36, 39, 26, 21, 27, 25, 23, 24, 28, 25]}

 51%|█████     | 509998/1000000 [10:28:00<7:28:39, 18.20it/s]global step 510000, trans_decision ep_re 180.0458549035185

{"global_step": 510000, "eval_re": [136.42787955199228, 177.24635208837702, 
145.041148559115, 162.8226228520592, 314.19231608935144, 182.46737102878814, 
231.34091672697335, 149.0837874686167, 154.7097048259515, 147.1264498439603], 
"eval_len": [26, 34, 28, 31, 62, 35, 44, 29, 30, 28]}

 52%|█████▏    | 519998/1000000 [10:40:30<7:14:57, 18.39it/s]global step 520000, trans_decision ep_re 173.39736343857084

{"global_step": 520000, "eval_re": [113.96832616327417, 166.50414672714814, 
107.52801978169987, 447.2895318769933, 172.07539740298162, 177.4801173174373, 
163.70706064245738, 124.85604863682462, 119.15645574565963, 141.40853009123256],
"eval_len": [22, 32, 21, 84, 33, 34, 31, 24, 23, 27]}

 53%|█████▎    | 529998/1000000 [10:53:00<7:00:41, 18.62it/s]global step 530000, trans_decision ep_re 156.92055720499786

{"global_step": 530000, "eval_re": [125.26411194756182, 115.24362571897277, 
155.286391843513, 136.1361847515165, 203.87411196399, 144.80821583319928, 
177.85635050626087, 150.83814283746304, 172.0916106138501, 187.8068260336511], 
"eval_len": [24, 22, 30, 26, 39, 28, 34, 29, 33, 36]}

 54%|█████▍    | 539998/1000000 [11:05:30<7:05:48, 18.00it/s]global step 540000, trans_decision ep_re 150.83598651010712

{"global_step": 540000, "eval_re": [136.36454118789183, 177.41569294975244, 
152.39433200210254, 120.18628490648778, 150.81244345590991, 129.812626132422, 
156.01109486122442, 199.37781347643255, 125.12197291126664, 160.86306321758107],
"eval_len": [26, 34, 29, 23, 29, 25, 30, 39, 24, 31]}

 55%|█████▍    | 549998/1000000 [11:18:00<6:45:09, 18.51it/s]global step 550000, trans_decision ep_re 198.24136288192793

{"global_step": 550000, "eval_re": [141.1035182585812, 157.37805430455307, 
139.88023279145645, 157.09604901296228, 139.78657608081286, 354.4447436420054, 
148.67025615682627, 381.981314965655, 210.66584799921324, 151.40703560721332], 
"eval_len": [27, 30, 27, 30, 27, 65, 28, 69, 41, 29]}

 56%|█████▌    | 559998/1000000 [11:30:20<6:35:58, 18.52it/s]global step 560000, trans_decision ep_re 165.89913848880133

{"global_step": 560000, "eval_re": [350.7945926406596, 130.04201383276384, 
173.97499104098975, 124.47178687520794, 168.30912641698382, 120.19189258749283, 
145.74904914117843, 170.50306038085128, 125.46681900015005, 149.4880529717358], 
"eval_len": [66, 25, 33, 24, 32, 23, 28, 33, 24, 29]}

 57%|█████▋    | 569998/1000000 [11:42:40<6:26:04, 18.56it/s]global step 570000, trans_decision ep_re 203.0747374529933

{"global_step": 570000, "eval_re": [130.21146607014023, 168.04658089088755, 
209.06986996573826, 169.28391129352465, 202.09776583320712, 130.0176805342435, 
333.6574616886265, 200.95029357566048, 171.46428367555995, 315.94806100234484], 
"eval_len": [25, 32, 40, 33, 38, 25, 64, 38, 33, 60]}

 58%|█████▊    | 579998/1000000 [11:55:10<6:19:28, 18.45it/s]global step 580000, trans_decision ep_re 143.07953141888999

{"global_step": 580000, "eval_re": [130.6489530339989, 162.04235823912896, 
146.24425099727884, 185.80311679971683, 176.6949830159028, 135.67116244250818, 
113.21593337444638, 114.28532860285583, 125.35157147855733, 140.83765620450595],
"eval_len": [25, 31, 28, 36, 34, 26, 22, 22, 24, 27]}

 59%|█████▉    | 589998/1000000 [12:07:30<6:08:34, 18.54it/s]global step 590000, trans_decision ep_re 159.856251762555

{"global_step": 590000, "eval_re": [167.42400120531082, 125.58321418574377, 
209.6262951760031, 198.34792477009125, 173.8700081216384, 112.1277130663226, 
167.17284911278327, 172.6578784796717, 117.61016757988929, 154.14246592809553], 
"eval_len": [32, 24, 40, 38, 33, 22, 32, 33, 23, 29]}

 60%|█████▉    | 599998/1000000 [12:19:50<5:58:47, 18.58it/s]global step 600000, trans_decision ep_re 140.51499437125926

{"global_step": 600000, "eval_re": [120.49451460199832, 159.9672126193291, 
141.63642988023295, 119.89594488939669, 140.3626585189258, 167.7222023160171, 
194.21929184967823, 125.64414388153716, 126.07797908680165, 109.12956606867571],
"eval_len": [23, 31, 27, 23, 27, 32, 37, 24, 24, 21]}

 61%|██████    | 609998/1000000 [12:32:20<5:50:28, 18.55it/s]global step 610000, trans_decision ep_re 177.19939961771124

{"global_step": 610000, "eval_re": [147.38053021944987, 133.03287340355024, 
159.80954854285926, 171.37239207541052, 109.48339171801203, 151.7744770224939, 
172.30691479594623, 129.78775221465375, 316.0488627361977, 280.9972534485388], 
"eval_len": [28, 26, 31, 33, 21, 29, 33, 25, 58, 57]}

 62%|██████▏   | 619998/1000000 [12:44:40<5:40:07, 18.62it/s]global step 620000, trans_decision ep_re 168.11577290216104

{"global_step": 620000, "eval_re": [171.8078244561318, 172.08988840872686, 
145.7849427304223, 184.07766972648596, 161.92774179665548, 145.93922473185341, 
152.56779593474104, 109.08298233846529, 163.8799034379709, 273.9997554601575], 
"eval_len": [33, 33, 28, 35, 31, 28, 29, 21, 31, 55]}

 63%|██████▎   | 629998/1000000 [12:57:00<5:33:53, 18.47it/s]global step 630000, trans_decision ep_re 174.7684188924386

{"global_step": 630000, "eval_re": [167.57961570773924, 173.2643653541485, 
157.96951670255936, 378.83226165907246, 113.12407426749851, 160.74482787729823, 
130.43007154944507, 103.19097528687432, 166.4802480214431, 196.0682324983073], 
"eval_len": [32, 33, 30, 68, 22, 31, 25, 20, 32, 38]}

 64%|██████▍   | 639998/1000000 [13:09:20<5:19:47, 18.76it/s]global step 640000, trans_decision ep_re 177.24636648452207

{"global_step": 640000, "eval_re": [139.7684349536212, 108.9400526137213, 
437.9781965252254, 114.86795506355111, 141.20025811755713, 141.95971610832788, 
183.60279766645, 163.8952359935533, 140.53033001360694, 199.72068778960616], 
"eval_len": [27, 21, 83, 22, 27, 27, 35, 31, 27, 38]}

 65%|██████▍   | 649998/1000000 [13:21:50<5:20:40, 18.19it/s]global step 650000, trans_decision ep_re 157.23546824206133

{"global_step": 650000, "eval_re": [214.615353819137, 131.46108911299285, 
162.679187040034, 145.80617000906108, 102.6711585422531, 166.86413608162027, 
157.04322279712835, 159.96674640190687, 171.6825020996647, 159.56511651681467], 
"eval_len": [41, 25, 31, 28, 20, 32, 30, 31, 33, 31]}

 66%|██████▌   | 659998/1000000 [13:34:10<5:05:04, 18.57it/s]global step 660000, trans_decision ep_re 164.97565354631578

{"global_step": 660000, "eval_re": [140.79708478771317, 103.38999138787929, 
170.60438196536694, 130.4239866756398, 130.30067380256023, 125.2492495600294, 
150.90263424636976, 215.64989096524482, 141.00761066690708, 341.43103140544724],
"eval_len": [27, 20, 33, 25, 25, 24, 29, 41, 27, 64]}

 67%|██████▋   | 669998/1000000 [13:46:30<4:56:36, 18.54it/s]global step 670000, trans_decision ep_re 201.0797688521577

{"global_step": 670000, "eval_re": [141.91330486273466, 266.52283888779704, 
124.631692871449, 165.07595690531835, 372.89210005881273, 216.9203553712478, 
317.88671913455835, 129.91235776793627, 145.83549686689437, 129.2068657948287], 
"eval_len": [27, 49, 24, 32, 72, 41, 61, 25, 28, 25]}

 68%|██████▊   | 679998/1000000 [13:58:50<4:49:24, 18.43it/s]global step 680000, trans_decision ep_re 168.9738046671622

{"global_step": 680000, "eval_re": [188.59090949231555, 161.4375231069716, 
153.01538464105693, 209.2341146919301, 195.8692165142967, 196.12566156929196, 
167.52251571627033, 172.77284641907522, 103.5111252351581, 141.65874928525542], 
"eval_len": [36, 32, 29, 40, 37, 38, 32, 33, 20, 27]}

 69%|██████▉   | 689998/1000000 [14:11:20<4:38:04, 18.58it/s]global step 690000, trans_decision ep_re 174.51621248396484

{"global_step": 690000, "eval_re": [114.36886257020852, 167.34151003554615, 
230.55957919993878, 165.4376865451016, 160.94431644936859, 393.3183292731275, 
136.01288456076335, 144.4602907527655, 135.96320180694852, 96.75546364587997], 
"eval_len": [22, 32, 44, 32, 31, 72, 26, 28, 26, 19]}

 70%|██████▉   | 699998/1000000 [14:23:40<4:31:17, 18.43it/s]global step 700000, trans_decision ep_re 150.01491883254198

{"global_step": 700000, "eval_re": [146.47271922179266, 194.7599221984142, 
102.82611553432876, 161.53605159567297, 161.57078475022493, 173.39969606655407, 
125.15242635535866, 205.10298901244687, 125.74288563388646, 103.5855979567399], 
"eval_len": [28, 37, 20, 31, 31, 33, 24, 39, 24, 20]}

 71%|███████   | 709998/1000000 [14:36:00<4:20:56, 18.52it/s]global step 710000, trans_decision ep_re 157.67375025550777

{"global_step": 710000, "eval_re": [221.98541934437483, 124.57230069739379, 
109.2001974062964, 152.16543169007963, 154.8865665359871, 191.59080506231845, 
168.97190552665322, 146.1100569393646, 156.07261102661784, 151.18220832599195], 
"eval_len": [42, 24, 21, 29, 30, 37, 32, 28, 30, 29]}

 72%|███████▏  | 719998/1000000 [14:48:30<4:11:31, 18.55it/s]global step 720000, trans_decision ep_re 208.62984233347515

{"global_step": 720000, "eval_re": [413.21647467955466, 125.5437955703091, 
146.26998466626782, 158.40997254341406, 437.30329499704874, 242.97317184221492, 
156.56868791293928, 108.70424363888452, 151.73446309351257, 145.57433439060569],
"eval_len": [77, 24, 28, 30, 82, 46, 30, 21, 29, 28]}

 73%|███████▎  | 729998/1000000 [15:00:51<4:00:43, 18.69it/s]global step 730000, trans_decision ep_re 147.06560641790333

{"global_step": 730000, "eval_re": [135.47652211395055, 130.51633059723974, 
124.75728733283856, 136.99260546007676, 168.3816704200637, 130.0645077884199, 
196.94703800590312, 178.00991094074416, 140.9780933475206, 128.53209817227594], 
"eval_len": [26, 25, 24, 27, 32, 25, 38, 34, 27, 25]}

 74%|███████▍  | 739998/1000000 [15:13:11<3:56:23, 18.33it/s]global step 740000, trans_decision ep_re 205.49940944496797

{"global_step": 740000, "eval_re": [140.44664799395596, 172.28515433128783, 
131.17215513814074, 452.91594074143853, 332.63114617738086, 168.11863708113228, 
150.3748992290452, 190.0040917859673, 139.442984188161, 177.60243778317002], 
"eval_len": [27, 33, 25, 85, 66, 32, 29, 36, 27, 34]}

 75%|███████▍  | 749998/1000000 [15:25:41<3:45:18, 18.49it/s]global step 750000, trans_decision ep_re 164.31272924787748

{"global_step": 750000, "eval_re": [135.96862919706803, 136.1985082475436, 
135.29783341404334, 157.23428874722705, 146.06327261660155, 160.27560243004885, 
125.19938895789352, 130.8156109791455, 341.22016442105814, 174.85399346814523], 
"eval_len": [26, 26, 26, 30, 28, 31, 24, 25, 65, 34]}

 76%|███████▌  | 759998/1000000 [15:38:01<3:36:49, 18.45it/s]global step 760000, trans_decision ep_re 161.3509655159204

{"global_step": 760000, "eval_re": [330.42380222387675, 96.3955584266619, 
141.05128398977212, 119.72434437206049, 147.12804052669256, 158.749969773456, 
125.83378598422523, 130.6319608110595, 232.6057093932949, 130.9651996581045], 
"eval_len": [63, 19, 27, 23, 28, 31, 24, 25, 44, 25]}

 77%|███████▋  | 769998/1000000 [15:50:31<3:27:00, 18.52it/s]global step 770000, trans_decision ep_re 169.00972560266308

{"global_step": 770000, "eval_re": [177.64026176251056, 146.94061565487473, 
291.5247883558229, 120.22053698449562, 108.41013421921389, 119.0327867272873, 
185.60573975711642, 188.63058144295033, 140.38853025404515, 211.7032808683136], 
"eval_len": [34, 28, 57, 23, 21, 23, 36, 36, 27, 40]}

 78%|███████▊  | 779998/1000000 [16:02:51<3:17:38, 18.55it/s]global step 780000, trans_decision ep_re 158.24774623823788

{"global_step": 780000, "eval_re": [145.36620555627098, 124.73223967768621, 
141.94106688961182, 114.50247313650658, 109.92083198938043, 139.80437872429874, 
172.32151853150023, 119.8646915557562, 181.97763970351915, 332.0464166178485], 
"eval_len": [28, 24, 27, 22, 21, 27, 33, 23, 35, 63]}

 79%|███████▉  | 789998/1000000 [16:15:21<3:08:36, 18.56it/s]global step 790000, trans_decision ep_re 145.742940602368

{"global_step": 790000, "eval_re": [146.55624691566058, 152.11645296012665, 
130.25288788575207, 182.29224582104487, 103.27333424382147, 157.8043219835065, 
225.53396571693276, 109.05798866124806, 136.53994399937847, 114.00201783620857],
"eval_len": [28, 29, 25, 35, 20, 30, 43, 21, 26, 22]}

 80%|███████▉  | 799998/1000000 [16:27:41<2:59:34, 18.56it/s]global step 800000, trans_decision ep_re 194.75250579079793

{"global_step": 800000, "eval_re": [121.03751389146296, 125.41511004954853, 
124.82758686250877, 205.7907645677968, 119.88798639005684, 118.37126894135778, 
359.2979180931722, 145.7148117860351, 412.5417928491653, 214.6403044768749], 
"eval_len": [23, 24, 24, 39, 23, 23, 68, 28, 77, 41]}

 81%|████████  | 809998/1000000 [16:40:11<2:50:25, 18.58it/s]global step 810000, trans_decision ep_re 148.72455053843694

{"global_step": 810000, "eval_re": [119.74907571401958, 156.91617459226825, 
119.82480841567128, 165.70458381533066, 144.82436718528598, 170.67870438041606, 
197.0880672621109, 157.25894970887927, 135.9203392738514, 119.28043503653588], 
"eval_len": [23, 30, 23, 32, 28, 33, 39, 30, 26, 23]}

 82%|████████▏ | 819998/1000000 [16:52:31<2:42:15, 18.49it/s]global step 820000, trans_decision ep_re 239.61637281610356

{"global_step": 820000, "eval_re": [125.73794715278191, 160.7523937152301, 
125.35390890252306, 327.76990017227683, 332.95955718469304, 207.8305880848372, 
414.16634620343194, 393.65183455095666, 172.63898543868447, 135.3022667556208], 
"eval_len": [24, 31, 24, 64, 63, 40, 78, 76, 33, 26]}

 83%|████████▎ | 829998/1000000 [17:04:51<2:33:02, 18.51it/s]global step 830000, trans_decision ep_re 180.60993609196777

{"global_step": 830000, "eval_re": [136.1841595259734, 168.2155115171725, 
445.67549588062224, 130.2699160495463, 152.29967577544826, 155.42394644019876, 
167.00401849044297, 162.2834347246885, 163.12063880097077, 125.62256371461399], 
"eval_len": [26, 32, 82, 25, 29, 30, 32, 31, 31, 24]}

 84%|████████▍ | 839998/1000000 [17:17:21<2:24:09, 18.50it/s]global step 840000, trans_decision ep_re 171.64092737926404

{"global_step": 840000, "eval_re": [185.17036074618932, 135.93333199187225, 
278.74773011054043, 152.27344683388588, 128.3533542478961, 174.98491207899326, 
182.57525661721638, 114.50159728604967, 157.44184267661592, 206.42744120338102],
"eval_len": [36, 26, 55, 29, 25, 34, 35, 22, 30, 40]}

 85%|████████▍ | 849998/1000000 [17:29:41<2:15:10, 18.49it/s]global step 850000, trans_decision ep_re 181.49773144844113

{"global_step": 850000, "eval_re": [171.3562545768357, 178.18439102346935, 
339.03442904376243, 172.89642638048602, 126.17396073128317, 144.40350796758133, 
224.4206085008018, 125.37542857164779, 164.24540341356817, 168.88690427497562], 
"eval_len": [33, 34, 66, 33, 24, 28, 43, 24, 31, 32]}

 86%|████████▌ | 859998/1000000 [17:42:01<2:05:20, 18.62it/s]global step 860000, trans_decision ep_re 174.2987659716658

{"global_step": 860000, "eval_re": [162.45384748383316, 173.8880516128467, 
134.32747777624917, 115.02848538833459, 125.03111580574479, 367.7030063590786, 
144.30582698265025, 165.4973086460516, 163.2659456981896, 191.48659396367975], 
"eval_len": [31, 33, 26, 22, 24, 71, 28, 32, 31, 37]}

 87%|████████▋ | 869998/1000000 [17:54:21<1:56:07, 18.66it/s]global step 870000, trans_decision ep_re 145.3086326628946

{"global_step": 870000, "eval_re": [206.26797744253744, 150.98899923144847, 
113.830611540816, 129.73286957746294, 113.76879346012328, 172.14306507936794, 
119.77044961278204, 145.428670286975, 181.84923121537773, 119.30565918205525], 
"eval_len": [40, 29, 22, 25, 22, 33, 23, 28, 35, 23]}

 88%|████████▊ | 879998/1000000 [18:06:51<1:47:35, 18.59it/s]global step 880000, trans_decision ep_re 189.05345272981444

{"global_step": 880000, "eval_re": [237.95886726996991, 157.47601303407316, 
170.65838108286962, 130.62114911830378, 151.19031900276718, 130.83611753701635, 
160.61397906300536, 397.50509688933437, 192.0188241972784, 161.6557801035262], 
"eval_len": [46, 30, 33, 25, 29, 25, 31, 74, 37, 31]}

 89%|████████▉ | 889998/1000000 [18:19:11<1:39:13, 18.48it/s]global step 890000, trans_decision ep_re 172.48309261471593

{"global_step": 890000, "eval_re": [131.3295108374836, 157.56396403535814, 
182.6786239280589, 178.2609661026942, 302.84818527160377, 205.13809699743967, 
96.56283569432654, 141.8937367041989, 161.96694799651712, 166.5880585794784], 
"eval_len": [25, 30, 35, 34, 59, 39, 19, 27, 31, 32]}

 90%|████████▉ | 899998/1000000 [18:31:31<1:29:14, 18.68it/s]global step 900000, trans_decision ep_re 148.75828434590449

{"global_step": 900000, "eval_re": [161.93599825686752, 129.71124585991305, 
178.3824508236545, 125.38072326022868, 150.0905339177426, 151.81057769786614, 
131.60687452272896, 130.78819481683806, 172.59036670120435, 155.28587760200094],
"eval_len": [31, 25, 34, 24, 29, 29, 25, 25, 33, 30]}

 91%|█████████ | 909998/1000000 [18:44:01<1:20:32, 18.62it/s]global step 910000, trans_decision ep_re 151.4694073334161

{"global_step": 910000, "eval_re": [186.71865845002873, 160.56072341920637, 
187.09703167131943, 137.88970156126356, 152.68627073645004, 108.80516262976542, 
161.25109296580817, 126.05836809348425, 108.5055965338744, 185.12146727296053], 
"eval_len": [36, 31, 36, 27, 29, 21, 31, 24, 21, 35]}

 92%|█████████▏| 919998/1000000 [18:56:21<1:12:25, 18.41it/s]global step 920000, trans_decision ep_re 148.26553132383125

{"global_step": 920000, "eval_re": [151.96237373313645, 173.06814163553295, 
134.80910227273102, 131.0811018535759, 124.59389857247785, 211.8648138971315, 
108.87413436141327, 142.00583837309856, 174.32258458412298, 130.07332395509212],
"eval_len": [29, 33, 26, 25, 24, 40, 21, 27, 34, 25]}

 93%|█████████▎| 929998/1000000 [19:08:51<1:03:30, 18.37it/s]global step 930000, trans_decision ep_re 202.42701223182516

{"global_step": 930000, "eval_re": [228.01519073483706, 183.63152540316275, 
200.29817210143693, 144.5603277407031, 285.2772104781298, 153.07864586941756, 
150.78561252427173, 169.22484953314162, 120.5797489654275, 388.8188389677234], 
"eval_len": [43, 35, 38, 28, 58, 30, 29, 33, 23, 70]}

 94%|█████████▍| 939998/1000000 [19:21:11<54:07, 18.48it/s]global step 940000, trans_decision ep_re 166.12791293396342

{"global_step": 940000, "eval_re": [136.182911112083, 145.7679004803993, 
130.54930141897063, 114.3017241080005, 135.87803866633266, 147.3831392606004, 
109.32710697658587, 412.63755628454487, 172.3763725176939, 156.87507851442325], 
"eval_len": [26, 28, 25, 22, 26, 28, 21, 77, 33, 30]}

 95%|█████████▍| 949998/1000000 [19:33:41<45:04, 18.49it/s]global step 950000, trans_decision ep_re 225.55425520913985

{"global_step": 950000, "eval_re": [152.13464217257294, 408.89083653771746, 
367.6840683379336, 386.3606100186438, 145.77248764191356, 204.79428548804762, 
156.95710379015947, 124.51971177840959, 163.0879613454007, 145.34084498059968], 
"eval_len": [29, 76, 67, 74, 28, 39, 30, 24, 31, 28]}

 96%|█████████▌| 959998/1000000 [19:46:01<35:43, 18.66it/s]global step 960000, trans_decision ep_re 190.20138581806813

{"global_step": 960000, "eval_re": [162.02435187261335, 277.19713028337054, 
193.10925622920877, 96.51635197866366, 314.9265648406128, 188.32710559025898, 
237.29084553856066, 141.17923504138912, 160.4287856047364, 131.01423120126717], 
"eval_len": [31, 57, 37, 19, 62, 36, 45, 27, 31, 25]}

 97%|█████████▋| 969998/1000000 [19:58:31<27:07, 18.43it/s]global step 970000, trans_decision ep_re 158.66466474274299

{"global_step": 970000, "eval_re": [316.404038714087, 120.19969311528551, 
188.02766938353128, 135.68398288868872, 164.04266004071795, 114.14708518905138, 
125.19390112030057, 128.52057078850072, 153.53925495109843, 140.88779123616808],
"eval_len": [60, 23, 36, 26, 31, 22, 24, 25, 30, 27]}

 98%|█████████▊| 979998/1000000 [20:10:51<18:03, 18.47it/s]global step 980000, trans_decision ep_re 166.0665359891944

{"global_step": 980000, "eval_re": [248.21053749996605, 156.0350866209419, 
157.03504169611404, 130.2204503406013, 131.50417452326758, 130.6314113810098, 
145.84728754768756, 236.09034607622462, 157.07878373986418, 168.01224046626697],
"eval_len": [47, 30, 30, 25, 25, 25, 28, 45, 30, 32]}

 99%|█████████▉| 989998/1000000 [20:23:11<08:56, 18.66it/s]global step 990000, trans_decision ep_re 187.79453982128382

{"global_step": 990000, "eval_re": [156.552946876622, 170.74434787079687, 
184.30098797747982, 174.33430118996483, 124.35629767454466, 161.997462801244, 
140.4876467023236, 180.4120561606787, 401.53002861394805, 183.22932234523546], 
"eval_len": [30, 33, 36, 33, 24, 31, 27, 35, 75, 35]}

100%|█████████▉| 999998/1000000 [20:35:41<00:00, 18.59it/s]global step 1000000, trans_decision ep_re 155.006523711627

{"global_step": 1000000, "eval_re": [167.6991935308178, 171.41651427528384, 
151.22036291404294, 178.9235098454361, 96.69756685752402, 156.9707758002592, 
156.54337340696497, 173.3479688754782, 129.1687151894593, 168.0772564210038], 
"eval_len": [32, 33, 29, 34, 19, 30, 30, 33, 25, 32]}

100%|██████████| 1000000/1000000 [20:35:45<00:00, 13.49it/s]
