
{
    'exp_name': 'VDPO',
    'env': 'Humanoid-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'markov(4, 32, [[249, 1], [1, 31]])',
    'noise': 0.05
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9998/1000000 [06:30<14:45:29, 18.63it/s]global step 10000, trans_decision ep_re 196.83238782137553

{"global_step": 10000, "eval_re": [146.21303934216218, 147.07087797347057, 
130.69029739756598, 255.65789096789254, 363.2963417001064, 274.1755469945778, 
162.13254370371064, 172.09124061860328, 170.2560482778541, 146.74005123781197], 
"eval_len": [28, 28, 25, 48, 68, 51, 31, 33, 33, 28]}

  2%|▏         | 19998/1000000 [18:50<14:37:24, 18.62it/s]global step 20000, trans_decision ep_re 183.37042986143712

{"global_step": 20000, "eval_re": [167.26526655360678, 171.34545463400167, 
346.89765084145847, 141.91266175006717, 166.43447042253584, 181.18531464775856, 
171.25561969634808, 167.73206459219915, 162.49235817997723, 157.18343729641813],
"eval_len": [32, 33, 67, 27, 32, 35, 33, 32, 31, 30]}

  3%|▎         | 29998/1000000 [31:20<14:36:06, 18.45it/s]global step 30000, trans_decision ep_re 215.41348705972828

{"global_step": 30000, "eval_re": [347.44364628595645, 147.34486685981352, 
251.89875536496967, 161.2943285034342, 189.32549742353999, 171.87150760128853, 
394.43838380236684, 172.34880647091006, 176.84029133898943, 141.32878694601425],
"eval_len": [67, 28, 48, 31, 36, 33, 74, 33, 34, 27]}

  4%|▍         | 39998/1000000 [43:40<14:18:26, 18.64it/s]global step 40000, trans_decision ep_re 163.56873981346843

{"global_step": 40000, "eval_re": [176.50709690772177, 151.167250517297, 
172.39742518443484, 151.79913976623698, 130.78513026333619, 161.85530655652784, 
130.82293199056784, 152.02075785951692, 146.9763964385016, 261.3559626505436], 
"eval_len": [34, 29, 33, 29, 25, 31, 25, 29, 28, 50]}

  5%|▍         | 49998/1000000 [56:10<14:14:29, 18.53it/s]global step 50000, trans_decision ep_re 175.92968119963035

{"global_step": 50000, "eval_re": [177.0996387934506, 167.61204481979334, 
187.91810681553267, 202.29897355654867, 214.91147347247826, 187.32496018434443, 
135.76940371126105, 151.81884385824944, 146.04985478944602, 188.49351199519896],
"eval_len": [34, 32, 36, 39, 41, 36, 26, 29, 28, 36]}

  6%|▌         | 59998/1000000 [1:08:30<14:04:29, 18.55it/s]global step 60000, trans_decision ep_re 178.1887194452915

{"global_step": 60000, "eval_re": [157.0281836947807, 141.01235276057233, 
293.65242115443647, 157.93034648046273, 176.8934640018401, 157.36134958847046, 
208.21263621399808, 181.66642419703538, 177.34022068994693, 130.78979567137162],
"eval_len": [30, 27, 55, 30, 34, 30, 40, 35, 34, 25]}

  7%|▋         | 69998/1000000 [1:20:50<13:59:37, 18.46it/s]global step 70000, trans_decision ep_re 174.89239266265787

{"global_step": 70000, "eval_re": [141.3427223603652, 140.61082869851657, 
157.22761113665382, 162.56190209494255, 177.31929306327774, 187.21330522741889, 
162.48452142976444, 135.71240849148347, 287.78709722553117, 196.66423689862475],
"eval_len": [27, 27, 30, 31, 34, 36, 31, 26, 54, 38]}

  8%|▊         | 79998/1000000 [1:33:20<13:49:53, 18.48it/s]global step 80000, trans_decision ep_re 190.85707955689895

{"global_step": 80000, "eval_re": [424.0434885524842, 183.39773598318416, 
141.27472000243975, 167.10928459860858, 130.63767142386362, 166.53221697722412, 
151.7964381583854, 187.5646588083196, 194.12438002079244, 162.0902010436877], 
"eval_len": [79, 35, 27, 32, 25, 32, 29, 36, 37, 31]}

  9%|▉         | 89998/1000000 [1:45:40<13:30:29, 18.71it/s]global step 90000, trans_decision ep_re 182.03782172575137

{"global_step": 90000, "eval_re": [192.161999614144, 167.21247524194476, 
130.74353022611794, 182.27664527040594, 220.6223501466628, 188.58290684108698, 
146.14376982421467, 195.62418036506065, 199.96636449349307, 197.04399523438303],
"eval_len": [37, 32, 25, 35, 42, 36, 28, 38, 38, 38]}

 10%|▉         | 99998/1000000 [1:58:00<13:26:46, 18.59it/s]global step 100000, trans_decision ep_re 161.93934905488942

{"global_step": 100000, "eval_re": [162.11155182296815, 140.6003337288609, 
130.34275308567294, 156.04971957518597, 130.62111811122952, 188.92697728767916, 
182.68407150059826, 177.76769865342098, 183.29332084328638, 166.99594593999208],
"eval_len": [31, 27, 25, 30, 25, 36, 35, 34, 35, 32]}

 11%|█         | 109998/1000000 [2:10:30<13:23:41, 18.46it/s]global step 110000, trans_decision ep_re 179.80498601864014

{"global_step": 110000, "eval_re": [162.46568045494112, 146.44732874559037, 
183.62681073800798, 177.62167409976922, 228.52224045432476, 241.22994603724933, 
186.56191225731547, 151.95933498190905, 146.0807356917605, 173.53419672553355], 
"eval_len": [31, 28, 35, 34, 44, 46, 36, 29, 28, 33]}

 12%|█▏        | 119998/1000000 [2:22:50<13:03:35, 18.72it/s]global step 120000, trans_decision ep_re 196.4621610081726

{"global_step": 120000, "eval_re": [221.05341006781586, 162.77566625984448, 
171.99656217180726, 187.80026746225477, 197.19930041646282, 242.94564891656117, 
166.1892563878028, 254.85079259565717, 187.10701381033033, 172.7036919931896], 
"eval_len": [42, 31, 33, 36, 38, 46, 32, 48, 36, 33]}

 13%|█▎        | 129998/1000000 [2:35:10<13:02:42, 18.53it/s]global step 130000, trans_decision ep_re 175.62842368946636

{"global_step": 130000, "eval_re": [150.9966639059211, 212.76677261786918, 
146.71939944611674, 166.0114418179753, 186.7822126286443, 185.4790970977786, 
203.48463657224798, 172.89138492524444, 141.22308083244837, 189.92954705041768],
"eval_len": [29, 41, 28, 32, 36, 36, 39, 33, 27, 37]}

 14%|█▍        | 139998/1000000 [2:47:30<12:37:15, 18.93it/s]global step 140000, trans_decision ep_re 218.88797098625614

{"global_step": 140000, "eval_re": [168.0117037501243, 152.14769009170314, 
192.4036913592509, 382.8033233883794, 182.6770971072113, 161.2466005024321, 
223.99069538320705, 366.96861671385625, 167.12740626722237, 191.5028852991748], 
"eval_len": [32, 29, 37, 70, 35, 31, 43, 69, 32, 37]}

 15%|█▍        | 149998/1000000 [2:59:50<12:44:54, 18.52it/s]global step 150000, trans_decision ep_re 179.0348119892298

{"global_step": 150000, "eval_re": [146.64180978676637, 352.5043433667045, 
119.90143880408998, 162.46747651345652, 167.0167299562344, 183.06771801538434, 
167.6866421064259, 150.90229635807822, 178.3594756399385, 161.80018934521948], 
"eval_len": [28, 68, 23, 31, 32, 35, 32, 29, 34, 31]}

 16%|█▌        | 159998/1000000 [3:12:10<12:28:22, 18.71it/s]global step 160000, trans_decision ep_re 172.05149003996695

{"global_step": 160000, "eval_re": [177.09316584510114, 151.52018621554743, 
141.81760029730162, 182.5692034529238, 204.01917046070346, 146.57437442818136, 
178.32253531130402, 161.66106393126728, 210.05306805854096, 166.8845323987985], 
"eval_len": [34, 29, 27, 35, 39, 28, 34, 31, 40, 32]}

 17%|█▋        | 169998/1000000 [3:24:30<12:26:55, 18.52it/s]global step 170000, trans_decision ep_re 205.15266280556799

{"global_step": 170000, "eval_re": [391.30405839946746, 183.7647775371557, 
162.31641277762702, 216.62198477730706, 187.92251512312893, 225.3248259191949, 
197.48730839947171, 157.35004328878173, 172.4030992446911, 157.0316025888542], 
"eval_len": [73, 35, 31, 41, 36, 43, 38, 30, 33, 30]}

 18%|█▊        | 179998/1000000 [3:36:50<12:12:57, 18.65it/s]global step 180000, trans_decision ep_re 160.67070081257387

{"global_step": 180000, "eval_re": [146.77394056872913, 178.50138150340578, 
166.83469165816535, 175.19717413920128, 141.34881979693202, 156.34437210376575, 
165.96330997650855, 157.3667534241061, 140.1056822443651, 178.2708827105593], 
"eval_len": [28, 34, 32, 34, 27, 30, 32, 30, 27, 34]}

 19%|█▉        | 189998/1000000 [3:49:10<11:57:25, 18.82it/s]global step 190000, trans_decision ep_re 186.20179069960915

{"global_step": 190000, "eval_re": [146.1632177523057, 194.00809158039755, 
172.25992873891104, 161.97810209886472, 167.7901260342059, 157.73297057966212, 
198.00496029075273, 181.95854320726886, 319.9547741888602, 162.1671925248625], 
"eval_len": [28, 37, 33, 31, 32, 30, 38, 35, 63, 31]}

 20%|█▉        | 199998/1000000 [4:01:30<12:00:54, 18.50it/s]global step 200000, trans_decision ep_re 176.29879125689007

{"global_step": 200000, "eval_re": [162.30497118825417, 141.02845561277945, 
162.9197314950645, 183.70815012727678, 178.19626429232667, 204.0533470411418, 
206.3367360982902, 150.53422651671738, 197.0539177801021, 176.85211241694768], 
"eval_len": [31, 27, 31, 35, 34, 39, 40, 29, 38, 34]}

 21%|██        | 209998/1000000 [4:13:50<11:46:17, 18.64it/s]global step 210000, trans_decision ep_re 181.24249605811423

{"global_step": 210000, "eval_re": [198.22103590834917, 150.9480746990751, 
199.8729262636702, 177.33503571297186, 172.183564537763, 225.17746851562308, 
166.55479697331418, 166.7238850026422, 182.88712281442196, 172.52105015331153], 
"eval_len": [38, 29, 38, 34, 33, 43, 32, 32, 35, 33]}

 22%|██▏       | 219998/1000000 [4:26:10<11:36:51, 18.66it/s]global step 220000, trans_decision ep_re 192.39796224167208

{"global_step": 220000, "eval_re": [130.46816133761016, 196.5370516552831, 
167.24436080392968, 146.51419507522803, 213.4283147635258, 152.12012799302565, 
162.7787943668754, 194.54910944352753, 378.6766024601175, 181.66290451759772], 
"eval_len": [25, 38, 32, 28, 41, 29, 31, 37, 71, 35]}

 23%|██▎       | 229998/1000000 [4:38:30<11:30:54, 18.57it/s]global step 230000, trans_decision ep_re 169.90751373818134

{"global_step": 230000, "eval_re": [151.9186680160948, 157.07252007428534, 
156.966234948378, 207.53571258635097, 171.81207388850422, 192.74273478419923, 
151.7713913299029, 205.7520238793907, 167.2242189794223, 136.27955889528488], 
"eval_len": [29, 30, 30, 40, 33, 37, 29, 40, 32, 26]}

 24%|██▍       | 239998/1000000 [4:50:50<11:20:33, 18.61it/s]global step 240000, trans_decision ep_re 172.21436989233598

{"global_step": 240000, "eval_re": [135.8328506447757, 177.19982045970477, 
154.7458618804012, 184.43309586423266, 151.27003964708916, 193.07209709745808, 
167.35960988084213, 156.79351096965146, 193.8378830812396, 207.59892939796504], 
"eval_len": [26, 34, 30, 35, 29, 37, 32, 30, 37, 40]}

 25%|██▍       | 249998/1000000 [5:03:20<11:12:22, 18.59it/s]global step 250000, trans_decision ep_re 220.54715762465085

{"global_step": 250000, "eval_re": [236.09546538042903, 405.7824862954257, 
200.14357563135138, 147.14651824578422, 186.56732148424845, 361.7181236631682, 
176.08627325850867, 204.3547958349755, 135.64571773934193, 151.93129871327534], 
"eval_len": [45, 76, 38, 28, 36, 68, 34, 39, 26, 29]}

 26%|██▌       | 259998/1000000 [5:15:40<11:03:50, 18.58it/s]global step 260000, trans_decision ep_re 261.4994844888103

{"global_step": 260000, "eval_re": [182.87704504071533, 362.91658810847366, 
170.63460451284072, 379.42205482445155, 162.74036669144905, 171.46818924219826, 
152.08488225297467, 178.9873653305931, 492.0612991599227, 361.80244972448406], 
"eval_len": [35, 70, 33, 70, 31, 33, 29, 34, 88, 68]}

 27%|██▋       | 269998/1000000 [5:28:00<10:48:04, 18.77it/s]global step 270000, trans_decision ep_re 186.5131210728378

{"global_step": 270000, "eval_re": [146.47267103821403, 177.21137162015094, 
181.87036341398755, 220.60066691398941, 172.30232793888825, 120.15281962725118, 
369.69675146804695, 157.02423060654476, 156.89124632768699, 162.9087617736177], 
"eval_len": [28, 34, 35, 42, 33, 23, 69, 30, 30, 31]}

 28%|██▊       | 279998/1000000 [5:40:20<10:39:48, 18.76it/s]global step 280000, trans_decision ep_re 192.41445462821417

{"global_step": 280000, "eval_re": [135.75686354488948, 497.47653064945274, 
161.90482854450363, 146.40393572535433, 135.78472081276152, 146.51749674842543, 
181.3484498164279, 194.562508244889, 135.81781362841983, 188.5713985670182], 
"eval_len": [26, 93, 31, 28, 26, 28, 35, 37, 26, 36]}

 29%|██▉       | 289998/1000000 [5:52:40<10:35:51, 18.61it/s]global step 290000, trans_decision ep_re 190.73704245102198

{"global_step": 290000, "eval_re": [135.475393372764, 178.12170056390082, 
156.69581676387998, 191.77487637022304, 192.59915578560452, 172.27512267226032, 
146.20125748672217, 198.92681290195992, 383.3284484939227, 151.97184009898228], 
"eval_len": [26, 34, 30, 37, 37, 33, 28, 38, 72, 29]}

 30%|██▉       | 299998/1000000 [6:05:00<10:27:59, 18.58it/s]global step 300000, trans_decision ep_re 202.770579061689

{"global_step": 300000, "eval_re": [187.3299449187821, 363.6562061052854, 
196.08665043974565, 157.0132075793059, 151.52490049704363, 177.50086265647712, 
156.96424405313468, 161.55538143759028, 222.21689807253327, 253.857494856992], 
"eval_len": [36, 68, 38, 30, 29, 34, 30, 31, 43, 48]}

 31%|███       | 309998/1000000 [6:17:20<10:19:13, 18.57it/s]global step 310000, trans_decision ep_re 197.45792571891042

{"global_step": 310000, "eval_re": [197.49071431629704, 152.08484276328937, 
188.71723426044753, 198.2203423760795, 157.13615678615272, 141.1026373998791, 
156.7672154069011, 198.71281037361177, 187.73229495322772, 396.6150085532185], 
"eval_len": [38, 29, 36, 38, 30, 27, 30, 38, 36, 75]}

 32%|███▏      | 319998/1000000 [6:29:40<10:10:02, 18.58it/s]global step 320000, trans_decision ep_re 177.36688103877535

{"global_step": 320000, "eval_re": [199.33005290002933, 193.2745698113244, 
227.76651337700957, 181.62834363762937, 141.28329662437588, 151.57062760815415, 
192.16345973845523, 162.37300636302427, 151.3829158790349, 172.89602444871647], 
"eval_len": [38, 37, 43, 35, 27, 29, 37, 31, 29, 33]}

 33%|███▎      | 329998/1000000 [6:42:10<9:58:16, 18.67it/s]global step 330000, trans_decision ep_re 187.49456596778447

{"global_step": 330000, "eval_re": [355.51473341096033, 206.91852945973116, 
156.41675289875948, 162.82933416050963, 146.16998886741877, 210.0543446615023, 
135.34896675168767, 188.09312275886825, 167.42934310784588, 146.17054360056113],
"eval_len": [68, 40, 30, 31, 28, 40, 26, 36, 32, 28]}

 34%|███▍      | 339998/1000000 [6:54:30<9:46:39, 18.75it/s]global step 340000, trans_decision ep_re 154.1493448078008

{"global_step": 340000, "eval_re": [130.84897576094303, 136.61471473104493, 
187.02784372826758, 136.3235157855832, 156.66932701809696, 177.71702884689805, 
168.1034575924003, 151.30326009487288, 156.09028189025716, 140.79504262964412], 
"eval_len": [25, 26, 36, 26, 30, 34, 32, 29, 30, 27]}

 35%|███▍      | 349998/1000000 [7:06:50<9:41:41, 18.62it/s]global step 350000, trans_decision ep_re 207.57923197758836

{"global_step": 350000, "eval_re": [150.67919426650442, 162.42541807445224, 
178.9913910777996, 130.86430818813363, 162.43669958882882, 146.64559883693377, 
151.171037318263, 178.01494183974555, 182.5245178995569, 632.0392126856655], 
"eval_len": [29, 31, 34, 25, 31, 28, 29, 34, 35, 113]}

 36%|███▌      | 359998/1000000 [7:19:10<9:26:28, 18.83it/s]global step 360000, trans_decision ep_re 200.42797936916716

{"global_step": 360000, "eval_re": [156.81444865787222, 177.647418736453, 
542.3543018536087, 182.59386792027902, 152.22761343631277, 156.08677385782323, 
141.44787647558044, 171.7198580276052, 187.30332052084708, 136.08431420528967], 
"eval_len": [30, 34, 104, 35, 29, 30, 27, 33, 36, 26]}

 37%|███▋      | 369998/1000000 [7:31:30<9:05:32, 19.25it/s]global step 370000, trans_decision ep_re 173.71449329121475

{"global_step": 370000, "eval_re": [136.03408842701836, 165.8525522348536, 
177.94841782616476, 161.57388953819984, 172.47955493714795, 241.8985697639204, 
167.97100582960076, 178.1435644126613, 141.92050680521177, 193.3227831373686], 
"eval_len": [26, 32, 34, 31, 33, 46, 32, 34, 27, 37]}

 38%|███▊      | 379998/1000000 [7:43:40<9:11:01, 18.75it/s]global step 380000, trans_decision ep_re 187.96713795203186

{"global_step": 380000, "eval_re": [151.21122778923595, 172.686685410048, 
417.54360105804227, 146.3513175809843, 171.61683558025805, 156.65510868416263, 
146.49753159651192, 198.18728802686655, 172.70029025094348, 146.22149354326544],
"eval_len": [29, 33, 76, 28, 33, 30, 28, 38, 33, 28]}

 39%|███▉      | 389998/1000000 [7:56:10<8:53:41, 19.05it/s]global step 390000, trans_decision ep_re 224.39967154702828

{"global_step": 390000, "eval_re": [182.39041227721373, 324.2018265908703, 
166.76440027807615, 183.08125056844744, 255.43407546770177, 172.54343394618854, 
220.61906288673322, 197.3343858587118, 298.0497793643573, 243.57808823198206], 
"eval_len": [35, 62, 32, 35, 50, 33, 42, 38, 58, 46]}

 40%|███▉      | 399998/1000000 [8:08:20<8:54:42, 18.70it/s]global step 400000, trans_decision ep_re 216.75412426160787

{"global_step": 400000, "eval_re": [431.1593782498795, 363.79048759607133, 
173.545219926867, 197.8881187378059, 151.58516504969657, 161.66246142421107, 
162.26820041126706, 167.17573030673168, 167.36049364374077, 191.10598726980808],
"eval_len": [78, 68, 33, 38, 29, 31, 31, 32, 32, 37]}

 41%|████      | 409998/1000000 [8:20:40<8:43:28, 18.78it/s]global step 410000, trans_decision ep_re 200.16493348537682

{"global_step": 410000, "eval_re": [523.2030877013225, 156.81126603790108, 
178.47957179815637, 125.3705547343153, 167.47392851065973, 156.61260687981857, 
156.44520705635549, 260.6490779620181, 151.72401402197963, 124.88002015124158], 
"eval_len": [96, 30, 34, 24, 32, 30, 30, 52, 29, 24]}

 42%|████▏     | 419998/1000000 [8:33:00<8:35:04, 18.77it/s]global step 420000, trans_decision ep_re 186.70238381982242

{"global_step": 420000, "eval_re": [162.02826615906633, 331.6531111263395, 
173.05723183328772, 141.42929991057906, 162.72329109448444, 247.26371194857566, 
151.0000142169019, 140.7026747389392, 210.5358253362315, 146.63041183381884], 
"eval_len": [31, 64, 33, 27, 31, 47, 29, 27, 40, 28]}

 43%|████▎     | 429998/1000000 [8:45:20<8:32:52, 18.52it/s]global step 430000, trans_decision ep_re 189.47148101581914

{"global_step": 430000, "eval_re": [171.83914022498735, 157.2222137297798, 
152.19228137500815, 177.5891775339038, 177.32411266918004, 147.137262666685, 
161.4125016667982, 192.61199050740112, 171.40151625078212, 385.9846135336659], 
"eval_len": [33, 30, 29, 34, 34, 28, 31, 37, 33, 71]}

 44%|████▍     | 439998/1000000 [8:57:40<8:15:25, 18.84it/s]global step 440000, trans_decision ep_re 201.47864320102204

{"global_step": 440000, "eval_re": [136.22849586413855, 170.96384081081212, 
269.4170129309179, 259.78219484103363, 141.26622491113832, 157.19955750876306, 
381.77724860444715, 140.94711711573925, 162.49545880060538, 194.709280622625], 
"eval_len": [26, 33, 51, 49, 27, 30, 71, 27, 31, 37]}

 45%|████▍     | 449998/1000000 [9:09:50<8:09:00, 18.75it/s]global step 450000, trans_decision ep_re 233.1721821593754

{"global_step": 450000, "eval_re": [130.79573899018925, 473.93487202677085, 
162.27408155399394, 147.06471536695187, 587.8898879064751, 141.54358134366825, 
155.54752801006956, 156.54388967623956, 188.83610708990787, 187.2914196294876], 
"eval_len": [25, 89, 31, 28, 111, 27, 30, 30, 36, 36]}

 46%|████▌     | 459998/1000000 [9:22:10<8:01:07, 18.71it/s]global step 460000, trans_decision ep_re 246.9667546885085

{"global_step": 460000, "eval_re": [340.1538099266255, 209.37221512568277, 
200.65279510269002, 178.10889599668582, 192.3764761265016, 168.14282631473475, 
622.0138457832019, 187.4927923185086, 193.38560586063542, 177.9682843298189], 
"eval_len": [63, 40, 38, 34, 37, 32, 116, 36, 37, 34]}

 47%|████▋     | 469998/1000000 [9:34:30<7:52:46, 18.68it/s]global step 470000, trans_decision ep_re 226.36078066670325

{"global_step": 470000, "eval_re": [145.85954018291537, 151.34278363977577, 
193.09281213059452, 156.92993334689876, 268.4635045227248, 199.48980663424098, 
213.45793417619535, 547.1071766736973, 193.32511568795093, 194.53919967203842], 
"eval_len": [28, 29, 37, 30, 51, 38, 41, 100, 37, 37]}

 48%|████▊     | 479998/1000000 [9:46:50<7:44:05, 18.67it/s]global step 480000, trans_decision ep_re 188.53005149500737

{"global_step": 480000, "eval_re": [162.21683827412846, 172.70217425452006, 
202.94894507077828, 146.75405868283235, 161.82910006762495, 329.5556039267568, 
161.72394626031198, 177.70645396319782, 177.75563879213686, 192.10775565778602],
"eval_len": [31, 33, 39, 28, 31, 62, 31, 34, 34, 37]}

 49%|████▉     | 489998/1000000 [9:59:10<7:33:37, 18.74it/s]global step 490000, trans_decision ep_re 200.21005969688304

{"global_step": 490000, "eval_re": [136.5402092849103, 172.67805443784727, 
130.37506306277032, 176.82157684571106, 430.8622980596785, 167.15032175374097, 
196.5268014351193, 146.47835195515862, 287.4027627584127, 157.26515737548146], 
"eval_len": [26, 33, 25, 34, 79, 32, 38, 28, 54, 30]}

 50%|████▉     | 499998/1000000 [10:11:30<7:27:55, 18.60it/s]global step 500000, trans_decision ep_re 199.08594223035522

{"global_step": 500000, "eval_re": [177.9786918329168, 152.3916405386946, 
162.19199589684933, 172.56386425713401, 208.77545114636632, 181.69450485500803, 
151.37499878361112, 176.8490058905282, 151.7856081922785, 455.2536609101652], 
"eval_len": [34, 29, 31, 33, 40, 35, 29, 34, 29, 88]}

 51%|█████     | 509998/1000000 [10:23:40<7:17:58, 18.65it/s]global step 510000, trans_decision ep_re 189.87505481684312

{"global_step": 510000, "eval_re": [156.5013869289075, 167.38372339760843, 
187.8689090507462, 146.7096479813631, 136.2202110879849, 136.01681994302822, 
187.90208350340555, 183.15400088600697, 455.67837840526, 141.31538698412035], 
"eval_len": [30, 32, 36, 28, 26, 26, 36, 35, 85, 27]}

 52%|█████▏    | 519998/1000000 [10:36:10<7:08:23, 18.67it/s]global step 520000, trans_decision ep_re 207.57735611657367

{"global_step": 520000, "eval_re": [130.5580926656758, 146.56515093866273, 
191.59634491726115, 146.19200549030558, 171.86965517329764, 171.40116313277483, 
199.95457324625517, 140.64426416564555, 442.8455790509744, 334.14673238488405], 
"eval_len": [25, 28, 37, 28, 33, 33, 38, 27, 81, 61]}

 53%|█████▎    | 529998/1000000 [10:48:30<7:02:00, 18.56it/s]global step 530000, trans_decision ep_re 170.48088833579558

{"global_step": 530000, "eval_re": [198.0815039326549, 172.51363029788538, 
166.79246730156015, 171.82083045116545, 182.2071009997898, 161.70922227638107, 
186.2825692798297, 136.31600969006985, 182.8125632759647, 146.2729858526551], 
"eval_len": [38, 33, 32, 33, 35, 31, 36, 26, 35, 28]}

 54%|█████▍    | 539998/1000000 [11:00:40<6:48:23, 18.77it/s]global step 540000, trans_decision ep_re 181.39180547221503

{"global_step": 540000, "eval_re": [209.55751282543437, 182.56328897417615, 
156.72318642796867, 182.86528230573657, 199.1256228420439, 173.07970117376806, 
182.67057541251538, 192.65443598809858, 146.6767028781224, 188.00174589428613], 
"eval_len": [40, 35, 30, 35, 38, 33, 35, 37, 28, 36]}

 55%|█████▍    | 549998/1000000 [11:13:00<6:37:41, 18.86it/s]global step 550000, trans_decision ep_re 261.2239804359572

{"global_step": 550000, "eval_re": [182.50736524536543, 421.139324168155, 
167.89184649764573, 375.4791440320368, 366.7214714658191, 247.99729685890293, 
426.5533264711304, 136.03661913363882, 120.15796096926147, 167.75544951761583], 
"eval_len": [35, 78, 32, 71, 69, 47, 78, 26, 23, 32]}

 56%|█████▌    | 559998/1000000 [11:25:20<6:37:04, 18.47it/s]global step 560000, trans_decision ep_re 207.48191146402934

{"global_step": 560000, "eval_re": [193.22953713023014, 156.90154415359822, 
156.92115983151388, 248.47801272816963, 130.79150028440196, 368.9144124555793, 
166.686658424987, 161.11016652851993, 264.9826458451837, 226.80347725810935], 
"eval_len": [37, 30, 30, 47, 25, 68, 32, 31, 51, 43]}

 57%|█████▋    | 569998/1000000 [11:37:40<6:25:03, 18.61it/s]global step 570000, trans_decision ep_re 169.71028576808007

{"global_step": 570000, "eval_re": [146.25705303791247, 141.81538145472615, 
193.65161895554152, 160.71029447851333, 167.66768537630418, 220.53355477532546, 
172.84090779739302, 168.5547176220873, 157.20309051930673, 167.86855366369065], 
"eval_len": [28, 27, 37, 31, 32, 42, 33, 32, 30, 32]}

 58%|█████▊    | 579998/1000000 [11:50:00<6:12:39, 18.78it/s]global step 580000, trans_decision ep_re 210.9890300640199

{"global_step": 580000, "eval_re": [141.18133570570697, 639.2620954184865, 
156.74195695919127, 166.94467834390252, 182.67415715862592, 136.20366246473762, 
208.8750036734716, 162.7810094505578, 136.4539447458347, 178.77245671968427], 
"eval_len": [27, 109, 30, 32, 35, 26, 40, 31, 26, 34]}

 59%|█████▉    | 589998/1000000 [12:02:20<6:08:28, 18.55it/s]global step 590000, trans_decision ep_re 210.0161318925794

{"global_step": 590000, "eval_re": [141.48103133675272, 167.67398613843244, 
166.4382517712222, 141.17820761187266, 187.58438977462382, 178.3048809401132, 
366.5177986212583, 167.76617820209225, 146.43519898232444, 436.781395547102], 
"eval_len": [27, 32, 32, 27, 36, 34, 72, 32, 28, 81]}

 60%|█████▉    | 599998/1000000 [12:14:40<6:03:43, 18.33it/s]global step 600000, trans_decision ep_re 190.89852690045964

{"global_step": 600000, "eval_re": [157.94009199321653, 375.567749490786, 
177.3471371291501, 168.2326144661223, 218.65246611274804, 157.07619706081454, 
178.05342879667526, 177.03195885185818, 146.85400210214505, 152.22962300108014],
"eval_len": [30, 72, 34, 32, 42, 30, 34, 34, 28, 29]}

 61%|██████    | 609998/1000000 [12:27:00<5:49:47, 18.58it/s]global step 610000, trans_decision ep_re 237.2217849109959

{"global_step": 610000, "eval_re": [166.57066450906575, 157.05522987690105, 
404.66358485431584, 321.6448649635394, 156.86189343804764, 404.78189464063416, 
146.68050155741994, 173.00643052768916, 299.65179035122793, 141.30099439111777],
"eval_len": [32, 30, 77, 63, 30, 75, 28, 33, 59, 27]}

 62%|██████▏   | 619998/1000000 [12:39:20<5:39:16, 18.67it/s]global step 620000, trans_decision ep_re 194.0773744012562

{"global_step": 620000, "eval_re": [191.32808077283082, 146.66689898764557, 
188.09159630593496, 171.80811978054663, 168.00539157743864, 188.7456256692454, 
406.26966977648, 161.9701287865611, 141.1873162053197, 176.70091615055946], 
"eval_len": [37, 28, 36, 33, 32, 36, 76, 31, 27, 34]}

 63%|██████▎   | 629998/1000000 [12:51:40<5:32:07, 18.57it/s]global step 630000, trans_decision ep_re 175.4683220414415

{"global_step": 630000, "eval_re": [146.84327146774456, 140.7886509740912, 
196.55338135122554, 182.97055934507074, 172.71932808289827, 161.9359877831272, 
173.54435110905857, 157.4296811245658, 223.8888399016381, 198.009169274995], 
"eval_len": [28, 27, 38, 35, 33, 31, 33, 30, 43, 38]}

 64%|██████▍   | 639998/1000000 [13:03:50<5:20:39, 18.71it/s]global step 640000, trans_decision ep_re 196.64045128385766

{"global_step": 640000, "eval_re": [161.61758536566302, 146.65201701976406, 
168.49467395866316, 183.33801468684734, 167.5309620949114, 136.30384293617087, 
182.25426043177757, 160.69085448983307, 314.5930807290534, 344.92922112589247], 
"eval_len": [31, 28, 32, 35, 32, 26, 35, 31, 59, 66]}

 65%|██████▍   | 649998/1000000 [13:16:10<5:10:04, 18.81it/s]global step 650000, trans_decision ep_re 181.91646115412863

{"global_step": 650000, "eval_re": [172.09750974628483, 172.40192751412513, 
157.27400420296647, 161.6613653619623, 167.40919298793563, 183.540195743446, 
176.8175751644999, 146.57939486224268, 212.72669822940168, 268.6567477284217], 
"eval_len": [33, 33, 30, 31, 32, 35, 34, 28, 41, 51]}

 66%|██████▌   | 659998/1000000 [13:28:30<5:03:49, 18.65it/s]global step 660000, trans_decision ep_re 212.21767324605943

{"global_step": 660000, "eval_re": [136.5385548580385, 582.2125692788056, 
147.15393680412902, 177.08811360718698, 199.04649659176724, 167.66595907266438, 
155.9457594170106, 194.97846723416797, 194.2562015462997, 167.29067405052444], 
"eval_len": [26, 106, 28, 34, 38, 32, 30, 37, 37, 32]}

 67%|██████▋   | 669998/1000000 [13:40:50<4:53:56, 18.71it/s]global step 670000, trans_decision ep_re 182.69124999776608

{"global_step": 670000, "eval_re": [151.21019502985558, 213.01140804425972, 
161.82925130926714, 189.0202512245203, 252.43581056828228, 167.48444355019328, 
206.77997259946235, 172.32939174762282, 140.5473971922383, 172.26437871195924], 
"eval_len": [29, 41, 31, 36, 48, 32, 40, 33, 27, 33]}

 68%|██████▊   | 679998/1000000 [13:53:10<4:44:38, 18.74it/s]global step 680000, trans_decision ep_re 182.44603375861806

{"global_step": 680000, "eval_re": [171.69098488720525, 141.31095875765217, 
183.50098450746296, 145.76433523972793, 183.17548317994627, 145.31291168108743, 
151.6071163328916, 192.99377205262675, 156.5617405678014, 352.5420503797791], 
"eval_len": [33, 27, 35, 28, 35, 28, 29, 37, 30, 68]}

 69%|██████▉   | 689998/1000000 [14:05:31<4:35:47, 18.73it/s]global step 690000, trans_decision ep_re 225.95422690445966

{"global_step": 690000, "eval_re": [141.39765123109433, 161.36236264807792, 
172.93565131560848, 178.20265919026076, 171.412483794519, 187.8713721087149, 
183.68765355385398, 484.92611308566666, 409.71469285583476, 168.0316292609659], 
"eval_len": [27, 31, 33, 34, 33, 36, 35, 88, 74, 32]}

 70%|██████▉   | 699998/1000000 [14:17:41<4:28:32, 18.62it/s]global step 700000, trans_decision ep_re 192.4714624883573

{"global_step": 700000, "eval_re": [135.92644621762705, 181.63341810205142, 
168.82260837541517, 151.52261769083665, 162.2460607352687, 450.5359409583686, 
141.56517467786068, 182.624797541813, 157.14555394018106, 192.6920066441505], 
"eval_len": [26, 35, 32, 29, 31, 85, 27, 35, 30, 37]}

 71%|███████   | 709998/1000000 [14:30:01<4:17:58, 18.74it/s]global step 710000, trans_decision ep_re 179.06432260813995

{"global_step": 710000, "eval_re": [162.10642339135208, 171.85118051933566, 
218.33776009676205, 145.9909899787222, 172.45074368076354, 146.48054931237033, 
186.75096021493536, 198.66522371210766, 167.9223221167216, 220.08707305832868], 
"eval_len": [31, 33, 42, 28, 33, 28, 36, 38, 32, 42]}

 72%|███████▏  | 719998/1000000 [14:42:21<4:11:08, 18.58it/s]global step 720000, trans_decision ep_re 157.41874751589233

{"global_step": 720000, "eval_re": [141.38265732483586, 166.94932591398054, 
172.4075967644844, 140.95843849391122, 141.44895216187118, 157.11532487682575, 
151.6241800630556, 157.0912443907447, 172.79903806310568, 172.41071710610828], 
"eval_len": [27, 32, 33, 27, 27, 30, 29, 30, 33, 33]}

 73%|███████▎  | 729998/1000000 [14:54:41<4:02:24, 18.56it/s]global step 730000, trans_decision ep_re 168.50764961232656

{"global_step": 730000, "eval_re": [146.30829783440274, 178.44090361001503, 
146.39316387971144, 171.2621459735772, 191.7124503734802, 213.75918943948164, 
146.40715381446344, 146.51662301565725, 161.6643270903153, 182.61224109216144], 
"eval_len": [28, 34, 28, 33, 37, 41, 28, 28, 31, 35]}

 74%|███████▍  | 739998/1000000 [15:07:01<3:51:06, 18.75it/s]global step 740000, trans_decision ep_re 179.8246197379527

{"global_step": 740000, "eval_re": [166.7486009962505, 145.76553976367165, 
145.75295569109267, 232.850923120668, 152.72954850551912, 156.4537624856291, 
316.5915442541964, 141.57244968015348, 146.26397403295934, 193.51689884938682], 
"eval_len": [32, 28, 28, 45, 29, 30, 62, 27, 28, 37]}

 75%|███████▍  | 749998/1000000 [15:19:21<3:43:55, 18.61it/s]global step 750000, trans_decision ep_re 167.47929706161622

{"global_step": 750000, "eval_re": [205.26996045820695, 146.54533387712448, 
146.97483050037627, 151.86916397002926, 183.10599951004122, 183.5332986156513, 
146.44112500669522, 136.14135278345464, 207.63512506629078, 167.27678082829226],
"eval_len": [39, 28, 28, 29, 35, 35, 28, 26, 40, 32]}

 76%|███████▌  | 759998/1000000 [15:31:41<3:33:00, 18.78it/s]global step 760000, trans_decision ep_re 221.72340917111143

{"global_step": 760000, "eval_re": [157.73736534464717, 184.40984998383448, 
178.52735961537832, 173.16423493239935, 161.07579188401485, 141.1061163738512, 
177.754910727703, 188.90199313461525, 703.2797896252063, 151.27668008946424], 
"eval_len": [30, 35, 34, 33, 31, 27, 34, 36, 133, 29]}

 77%|███████▋  | 769998/1000000 [15:43:51<3:24:26, 18.75it/s]global step 770000, trans_decision ep_re 165.98412676507863

{"global_step": 770000, "eval_re": [130.81402562652886, 171.55609722587758, 
204.33009740686103, 161.77445962309383, 146.3882250122131, 125.34642793062054, 
156.1934686829158, 193.57857518441443, 181.34275142967874, 188.51713952858228], 
"eval_len": [25, 33, 39, 31, 28, 24, 30, 37, 35, 36]}

 78%|███████▊  | 779998/1000000 [15:56:11<3:15:54, 18.72it/s]global step 780000, trans_decision ep_re 218.99073363566498

{"global_step": 780000, "eval_re": [199.00010027234504, 172.7084734799909, 
175.51731413462858, 146.5225986447993, 187.41590592993998, 676.1524438337897, 
157.49587116144082, 176.8427383840488, 136.349892756719, 161.9019977589479], 
"eval_len": [38, 33, 34, 28, 36, 131, 30, 34, 26, 31]}

 79%|███████▉  | 789998/1000000 [16:08:31<3:08:46, 18.54it/s]global step 790000, trans_decision ep_re 221.72402650998052

{"global_step": 790000, "eval_re": [167.8423978895458, 162.23607840778985, 
445.4981902084119, 167.55293573266633, 220.1920687971083, 157.2085660951314, 
172.88344172874417, 172.74239958713608, 183.79129101212288, 367.2928956411485], 
"eval_len": [32, 31, 82, 32, 42, 30, 33, 33, 35, 68]}

 80%|███████▉  | 799998/1000000 [16:20:51<2:56:16, 18.91it/s]global step 800000, trans_decision ep_re 201.06898390318568

{"global_step": 800000, "eval_re": [151.4513501748576, 162.67449520765706, 
172.21190058176833, 161.32839315259443, 321.16791968825595, 177.4522264994021, 
388.7383874129183, 151.86302417251486, 150.84083158612106, 172.96131055576703], 
"eval_len": [29, 31, 33, 31, 61, 34, 72, 29, 29, 33]}

 81%|████████  | 809998/1000000 [16:33:01<2:51:10, 18.50it/s]global step 810000, trans_decision ep_re 189.19202604078254

{"global_step": 810000, "eval_re": [151.3711557160157, 125.43212972474488, 
219.4695437780856, 172.86053153807654, 152.00202255792752, 208.7371450400823, 
161.3686435177737, 365.36486623262203, 172.94612073070346, 162.36810157179372], 
"eval_len": [29, 24, 42, 33, 29, 40, 31, 69, 33, 31]}

 82%|████████▏ | 819998/1000000 [16:45:21<2:40:43, 18.67it/s]global step 820000, trans_decision ep_re 161.0465722814085

{"global_step": 820000, "eval_re": [157.4348240118735, 152.46158897139557, 
141.07019992773112, 174.3452174763918, 166.96932040899264, 172.89674513552657, 
156.22590252313475, 191.0177246138947, 156.9185013577878, 141.1256983873564], 
"eval_len": [30, 29, 27, 33, 32, 33, 30, 37, 30, 27]}

 83%|████████▎ | 829998/1000000 [16:57:41<2:30:29, 18.83it/s]global step 830000, trans_decision ep_re 180.63585445295377

{"global_step": 830000, "eval_re": [167.7567071394791, 171.45996844087216, 
141.61145588982876, 151.5974021155, 255.01982512394054, 156.98271763186065, 
192.33913996839175, 177.37390961928384, 193.62448367885935, 198.5929349215213], 
"eval_len": [32, 33, 27, 29, 48, 30, 37, 34, 37, 38]}

 84%|████████▍ | 839998/1000000 [17:10:01<2:21:49, 18.80it/s]global step 840000, trans_decision ep_re 175.44974074255552

{"global_step": 840000, "eval_re": [131.04192690330635, 199.3987073647315, 
183.86551897427432, 219.00104003054423, 151.95587858197507, 209.92845157063942, 
156.99169048920413, 183.03141536242686, 156.5778969668359, 162.70488118161734], 
"eval_len": [25, 38, 35, 42, 29, 40, 30, 35, 30, 31]}

 85%|████████▍ | 849998/1000000 [17:22:11<2:13:35, 18.72it/s]global step 850000, trans_decision ep_re 176.59136346104998

{"global_step": 850000, "eval_re": [130.9208342880884, 168.01971823409573, 
384.39493601006524, 136.18625670335118, 230.1383002238049, 135.59230731079364, 
152.14386375618184, 151.61101304225227, 146.25217441412548, 130.65423062774107],
"eval_len": [25, 32, 71, 26, 44, 26, 29, 29, 28, 25]}

 86%|████████▌ | 859998/1000000 [17:34:31<2:04:22, 18.76it/s]global step 860000, trans_decision ep_re 196.64136344408445

{"global_step": 860000, "eval_re": [178.01442065752843, 167.77749641343848, 
188.13771646734426, 182.68961828961187, 146.47323425832994, 194.31731782968637, 
353.9460719909028, 172.61936386632715, 209.60748166487997, 172.8309130027951], 
"eval_len": [34, 32, 36, 35, 28, 37, 67, 33, 40, 33]}

 87%|████████▋ | 869998/1000000 [17:46:51<1:55:29, 18.76it/s]global step 870000, trans_decision ep_re 190.1285054566234

{"global_step": 870000, "eval_re": [167.28079912484523, 216.60255004694253, 
166.68809664617993, 383.35037428035963, 152.42119626036842, 140.6798118722745, 
151.61423526514946, 151.23228519295003, 157.18609073808682, 214.22961513907728],
"eval_len": [32, 41, 32, 72, 29, 27, 29, 29, 30, 41]}

 88%|████████▊ | 879998/1000000 [17:59:11<1:47:41, 18.57it/s]global step 880000, trans_decision ep_re 181.57003945561434

{"global_step": 880000, "eval_re": [240.09118426942956, 161.48891315444502, 
198.5279897611953, 178.46599169062455, 161.76858539481105, 162.05920137167735, 
135.87987974630065, 177.69225840170978, 216.84313990284153, 182.88325086310834],
"eval_len": [46, 31, 38, 34, 31, 31, 26, 34, 41, 35]}

 89%|████████▉ | 889998/1000000 [18:11:31<1:37:53, 18.73it/s]global step 890000, trans_decision ep_re 210.87863945289888

{"global_step": 890000, "eval_re": [167.05454531766307, 162.33902200883497, 
140.87001381244926, 140.5016459769208, 151.59281620706884, 703.3224072559369, 
151.69369835761393, 152.13580322743115, 177.55229496575822, 161.72414739931156],
"eval_len": [32, 31, 27, 27, 29, 125, 29, 29, 34, 31]}

 90%|████████▉ | 899998/1000000 [18:23:51<1:29:03, 18.72it/s]global step 900000, trans_decision ep_re 200.44992854056522

{"global_step": 900000, "eval_re": [189.11544039287662, 478.8660178166731, 
197.0905798016454, 192.82186971723425, 156.48267458150787, 167.72277347441485, 
130.7448161555251, 147.03896693945936, 177.42951546172606, 167.18663106458965], 
"eval_len": [36, 99, 38, 37, 30, 32, 25, 28, 34, 32]}

 91%|█████████ | 909998/1000000 [18:36:11<1:21:03, 18.51it/s]global step 910000, trans_decision ep_re 247.57132155811183

{"global_step": 910000, "eval_re": [368.51989027199414, 141.2496509535595, 
183.7339497860423, 114.51778375461534, 394.9588130047802, 172.06072366403956, 
167.41702517504027, 407.2821633553514, 163.12841585963398, 362.84479975606155], 
"eval_len": [68, 27, 35, 22, 73, 33, 32, 76, 31, 69]}

 92%|█████████▏| 919998/1000000 [18:48:31<1:11:11, 18.73it/s]global step 920000, trans_decision ep_re 189.67707917022702

{"global_step": 920000, "eval_re": [162.95754133035695, 172.75251447991255, 
151.00724390281513, 217.88831639235994, 151.20067911216844, 235.6644721683934, 
155.97288464038624, 167.32372347872854, 335.7293947541783, 146.27402144297062], 
"eval_len": [31, 33, 29, 42, 29, 45, 30, 32, 64, 28]}

 93%|█████████▎| 929998/1000000 [19:00:51<1:02:59, 18.52it/s]global step 930000, trans_decision ep_re 215.59896685807522

{"global_step": 930000, "eval_re": [125.25741464576191, 187.15539084831474, 
136.5152871945285, 187.4175408383199, 188.93378718723895, 187.35285232977492, 
328.4239459996171, 167.49930002963342, 373.8084583856412, 273.6256911219214], 
"eval_len": [24, 36, 26, 36, 36, 36, 64, 32, 70, 52]}

 94%|█████████▍| 939998/1000000 [19:13:11<54:19, 18.41it/s]global step 940000, trans_decision ep_re 234.29991038112263

{"global_step": 940000, "eval_re": [167.58818522711854, 213.06097974136412, 
326.5443290571399, 162.49634576293033, 141.32869235006012, 194.1211929253842, 
161.31208435791768, 157.34421135952255, 641.7263880413808, 177.47669498840776], 
"eval_len": [32, 41, 64, 31, 27, 37, 31, 30, 120, 34]}

 95%|█████████▍| 949998/1000000 [19:25:31<44:45, 18.62it/s]global step 950000, trans_decision ep_re 212.99890428303624

{"global_step": 950000, "eval_re": [204.20925227757786, 173.14921090266506, 
392.59271193825714, 167.4283069863556, 167.37344325636982, 342.7141526589689, 
157.4872697403359, 177.225210624805, 181.56966382951958, 166.2398206155076], 
"eval_len": [39, 33, 73, 32, 32, 66, 30, 34, 35, 32]}

 96%|█████████▌| 959998/1000000 [19:37:51<35:49, 18.61it/s]global step 960000, trans_decision ep_re 158.77564209636768

{"global_step": 960000, "eval_re": [171.44426413641793, 182.19190382638678, 
141.40097916903792, 161.82480048457023, 156.7319316359857, 151.25701301632992, 
157.40676353416666, 172.87194621580164, 141.2379767616219, 151.38884218335835], 
"eval_len": [33, 35, 27, 31, 30, 29, 30, 33, 27, 29]}

 97%|█████████▋| 969998/1000000 [19:50:11<26:47, 18.66it/s]global step 970000, trans_decision ep_re 171.62243472858924

{"global_step": 970000, "eval_re": [188.96109121301117, 162.08845812538266, 
202.4189374157604, 161.90458732913334, 210.04267976945866, 141.39965803484156, 
141.62884349593602, 193.8017746397312, 178.34335317547675, 135.63496408716068], 
"eval_len": [36, 31, 39, 31, 40, 27, 27, 37, 34, 26]}

 98%|█████████▊| 979998/1000000 [20:02:31<17:56, 18.58it/s]global step 980000, trans_decision ep_re 227.34535044709702

{"global_step": 980000, "eval_re": [166.53405603137494, 178.15167822908805, 
141.16368842945693, 157.0971684973722, 182.49336179607423, 152.4383343883913, 
168.1130904008929, 427.2517088606225, 151.77695557436255, 548.4334622633348], 
"eval_len": [32, 34, 27, 30, 35, 29, 32, 79, 29, 99]}

 99%|█████████▉| 989998/1000000 [20:14:51<08:54, 18.72it/s]global step 990000, trans_decision ep_re 189.4845048858329

{"global_step": 990000, "eval_re": [497.4158546418574, 140.09534407010852, 
157.0999991769805, 162.13901080315935, 151.17417704688756, 161.58105962325232, 
141.0507497425191, 161.8920306765525, 175.83099899305805, 146.56582408395388], 
"eval_len": [92, 27, 30, 31, 29, 31, 27, 31, 34, 28]}

100%|█████████▉| 999998/1000000 [20:27:11<00:00, 19.05it/s]global step 1000000, trans_decision ep_re 222.97815284386758

{"global_step": 1000000, "eval_re": [173.72316571770347, 141.23205899785114, 
172.4005559171604, 146.52321241720293, 188.07339128256362, 171.5343712518996, 
194.5501604291766, 157.245627428494, 717.2396719511496, 167.2593130454741], 
"eval_len": [33, 27, 33, 28, 36, 33, 37, 30, 124, 32]}

100%|██████████| 1000000/1000000 [20:27:16<00:00, 13.58it/s]
