
{
    'exp_name': 'VDPO',
    'env': 'Humanoid-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)',
    'noise': 0.2
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9998/1000000 [03:53<9:27:46, 29.06it/s]global step 10000, trans_decision ep_re 275.0032009847281

{"global_step": 10000, "eval_re": [494.0385219437165, 302.79173468352883, 
293.794403101169, 349.33474347383867, 401.6393052609192, 115.7647017968579, 
306.61188701035996, 263.1569374516817, 120.49709998731335, 102.40267513789537], 
"eval_len": [92, 63, 54, 65, 84, 23, 57, 49, 23, 20]}

  2%|▏         | 19999/1000000 [11:34<9:33:10, 28.50it/s]global step 20000, trans_decision ep_re 198.6272274121158

{"global_step": 20000, "eval_re": [96.2419090736536, 118.755000661895, 
101.48020167014784, 276.11480231355944, 273.609559331366, 102.84902132145514, 
116.65737487983752, 271.5441130727407, 331.1897400948258, 297.8305517016767], 
"eval_len": [19, 23, 20, 53, 52, 20, 23, 51, 61, 56]}

  3%|▎         | 29998/1000000 [19:15<9:19:04, 28.92it/s]global step 30000, trans_decision ep_re 194.8334945523217

{"global_step": 30000, "eval_re": [475.6970275040582, 352.7074328492315, 
141.83776390189712, 89.1316732500861, 122.03913939173046, 96.24109409216578, 
141.4203540466752, 102.94695803685786, 102.92422954234159, 323.38927290817315], 
"eval_len": [90, 68, 27, 18, 24, 19, 27, 20, 20, 64]}

  4%|▍         | 39997/1000000 [26:56<9:16:57, 28.73it/s]global step 40000, trans_decision ep_re 235.0382009413168

{"global_step": 40000, "eval_re": [327.1495404318647, 238.80818337122028, 
107.93251748122243, 306.7795458111384, 477.2557951890319, 352.3241904418784, 
113.29762924610452, 95.43829287653007, 118.11557341106628, 213.28074115311085], 
"eval_len": [63, 47, 21, 56, 90, 66, 22, 19, 23, 43]}

  5%|▍         | 49997/1000000 [34:50<9:11:55, 28.69it/s]global step 50000, trans_decision ep_re 268.7653139535344

{"global_step": 50000, "eval_re": [100.84396962822832, 134.4881500768597, 
542.262202450886, 140.1526452369388, 84.26400352205191, 326.24448603626445, 
441.39553131335794, 337.062209209196, 460.9720764273362, 119.96786563422467], 
"eval_len": [20, 26, 105, 27, 17, 61, 83, 63, 84, 23]}

  6%|▌         | 59999/1000000 [42:30<9:08:54, 28.54it/s]global step 60000, trans_decision ep_re 228.30718714560408

{"global_step": 60000, "eval_re": [349.09720443576526, 379.9165337166475, 
290.8141699291393, 84.22476988748258, 108.09384474235637, 161.50015942729277, 
372.1831127274726, 344.3513703369317, 96.64010043281743, 96.25060582013502], 
"eval_len": [62, 69, 53, 17, 21, 31, 67, 66, 19, 19]}

  7%|▋         | 69998/1000000 [50:10<8:56:29, 28.89it/s]global step 70000, trans_decision ep_re 322.11500171672407

{"global_step": 70000, "eval_re": [89.87638201733243, 474.4452519944792, 
368.4366374734468, 100.82903933274136, 372.60148045311456, 436.37184124899295, 
281.1429056972196, 324.5757500638067, 368.08278688777, 404.78794199833686], 
"eval_len": [18, 93, 68, 20, 68, 82, 59, 61, 69, 77]}

  8%|▊         | 79997/1000000 [57:42<8:58:26, 28.48it/s]global step 80000, trans_decision ep_re 244.95639354043593

{"global_step": 80000, "eval_re": [393.2724662649173, 110.0723801655515, 
256.3775511670118, 448.75928765180015, 89.97072050305641, 249.86847823139846, 
150.72703456012576, 220.21344272997055, 255.15858253308446, 275.14399159744295],
"eval_len": [72, 22, 49, 83, 18, 48, 29, 41, 48, 51]}

  9%|▉         | 89999/1000000 [1:05:24<8:47:52, 28.73it/s]global step 90000, trans_decision ep_re 238.73979921755057

{"global_step": 90000, "eval_re": [140.12766262707527, 111.97415446870544, 
311.4143892878325, 123.37998593378407, 276.5984707908327, 130.00439470397288, 
382.887083622603, 447.0982920774031, 356.6788214193677, 107.23473724392939], 
"eval_len": [27, 22, 56, 24, 51, 25, 70, 84, 66, 21]}

 10%|▉         | 99999/1000000 [1:13:04<8:47:30, 28.44it/s]global step 100000, trans_decision ep_re 334.72584462037673

{"global_step": 100000, "eval_re": [326.5638115163365, 355.46305824410484, 
738.2462455168456, 306.76867387582274, 390.46756846329123, 118.88894216399892, 
119.27709731691378, 358.56418281552396, 370.8841429775631, 262.1347233133668], 
"eval_len": [59, 66, 141, 57, 73, 23, 23, 64, 68, 52]}

 11%|█         | 109997/1000000 [1:20:44<8:36:20, 28.73it/s]global step 110000, trans_decision ep_re 250.59829902750465

{"global_step": 110000, "eval_re": [314.6084064550892, 296.7296135145571, 
145.06554127720833, 142.59364523091625, 371.19557991612714, 355.6287755490351, 
90.72393756195838, 162.86941739161077, 108.02019663382787, 518.5478767447165], 
"eval_len": [58, 60, 28, 27, 69, 65, 18, 30, 21, 103]}

 12%|█▏        | 119999/1000000 [1:28:40<8:26:28, 28.96it/s]global step 120000, trans_decision ep_re 268.8949015955598

{"global_step": 120000, "eval_re": [90.53860677562616, 150.4667781240128, 
694.9805461256307, 463.2411303360061, 96.53070778995615, 130.68396976583819, 
95.22477745709921, 350.7426682240647, 508.5252167369201, 108.01461462044438], 
"eval_len": [18, 29, 130, 89, 19, 25, 19, 64, 112, 21]}

 13%|█▎        | 129999/1000000 [1:36:10<8:33:25, 28.24it/s]global step 130000, trans_decision ep_re 263.1425810875956

{"global_step": 130000, "eval_re": [96.22919673054416, 314.646372702961, 
558.4518538671828, 156.75538845301304, 109.85144005918163, 124.0864530740376, 
342.63828181382337, 118.49219765884887, 373.28318959858524, 436.991436917778], 
"eval_len": [19, 57, 102, 30, 22, 24, 64, 24, 73, 85]}

 14%|█▍        | 139999/1000000 [1:43:52<8:10:10, 29.24it/s]global step 140000, trans_decision ep_re 244.85740828390672

{"global_step": 140000, "eval_re": [433.85674376212637, 111.54467370845333, 
95.89661080690173, 108.22260225577782, 117.26543276439543, 428.90930399354346, 
123.1216472510319, 376.9730416746374, 330.9602113314144, 321.82381529078543], 
"eval_len": [80, 22, 19, 21, 23, 81, 24, 69, 62, 61]}

 15%|█▍        | 149999/1000000 [1:51:40<8:22:33, 28.19it/s]global step 150000, trans_decision ep_re 247.3621261241285

{"global_step": 150000, "eval_re": [412.1158998729328, 96.7226109750782, 
199.4679044862124, 579.3373468447414, 101.64538132959437, 141.47573380019716, 
160.48082950117163, 118.30445770718029, 340.5037924656121, 323.56730425856506], 
"eval_len": [77, 19, 37, 120, 20, 27, 30, 23, 63, 61]}

 16%|█▌        | 159998/1000000 [1:59:16<8:04:04, 28.92it/s]global step 160000, trans_decision ep_re 160.6336220158204

{"global_step": 160000, "eval_re": [123.68960615770285, 144.9375419998406, 
364.4256281161215, 146.53493001117042, 133.70388474845225, 102.58710178482085, 
261.5303208808513, 96.7540322051431, 113.11908629316942, 119.05408796093162], 
"eval_len": [24, 28, 76, 28, 26, 20, 49, 19, 22, 23]}

 17%|█▋        | 169999/1000000 [2:07:00<8:01:07, 28.75it/s]global step 170000, trans_decision ep_re 273.07366037514873

{"global_step": 170000, "eval_re": [582.7505103930025, 95.57620692954426, 
130.1477509113555, 106.01126709050008, 304.5354950345002, 487.44628626472246, 
417.67867502243604, 102.57301448316282, 407.94105123420076, 96.07634638806256], 
"eval_len": [119, 19, 25, 21, 58, 89, 78, 20, 89, 19]}

 18%|█▊        | 179999/1000000 [2:14:46<7:57:23, 28.63it/s]global step 180000, trans_decision ep_re 237.77594466995865

{"global_step": 180000, "eval_re": [84.12753821286023, 351.8906087515433, 
144.21346708409965, 763.6535353665039, 146.6446274254174, 89.9814316762278, 
262.583304281618, 310.1453063071766, 101.38675842134545, 123.13286917279416], 
"eval_len": [17, 65, 28, 149, 28, 18, 51, 59, 20, 24]}

 19%|█▉        | 189999/1000000 [2:22:30<7:51:29, 28.63it/s]global step 190000, trans_decision ep_re 322.5177943928427

{"global_step": 190000, "eval_re": [431.91867528862605, 139.97155373622363, 
119.97059126138699, 337.235627560411, 321.3351222451518, 420.0509766956389, 
425.7516218779304, 319.5105166788153, 349.3259287133515, 360.1073298708915], 
"eval_len": [80, 27, 23, 62, 61, 88, 80, 67, 63, 65]}

 20%|█▉        | 199999/1000000 [2:30:15<7:46:29, 28.58it/s]global step 200000, trans_decision ep_re 220.17122179295114

{"global_step": 200000, "eval_re": [124.06841813953946, 390.2712263961321, 
299.0774960040857, 189.7264170762295, 317.4512875011367, 355.1683988908288, 
89.556737033359, 112.37110765127363, 167.2323709840093, 156.78875825291723], 
"eval_len": [24, 74, 57, 36, 61, 67, 18, 22, 31, 30]}

 21%|██        | 209999/1000000 [2:38:10<7:45:13, 28.30it/s]global step 210000, trans_decision ep_re 198.83809730685977

{"global_step": 210000, "eval_re": [426.63355902138824, 111.77853050257565, 
125.19550870721014, 124.8903508774831, 113.38638019148162, 337.5706899553811, 
202.8775339343814, 101.58955254877412, 154.45421232019385, 290.0046550097286], 
"eval_len": [82, 22, 24, 24, 22, 63, 37, 20, 30, 58]}

 22%|██▏       | 219998/1000000 [2:45:42<7:34:11, 28.62it/s]global step 220000, trans_decision ep_re 298.6239654941518

{"global_step": 220000, "eval_re": [95.77285336387168, 107.99244679909333, 
394.51453734279886, 422.6347720246309, 295.1668735611837, 413.9168266736142, 
309.9289722641198, 375.5590393153334, 123.49034202148785, 447.262991575384], 
"eval_len": [19, 21, 75, 77, 56, 77, 57, 71, 24, 84]}

 23%|██▎       | 229997/1000000 [2:53:31<7:32:53, 28.34it/s]global step 230000, trans_decision ep_re 241.98263353929957

{"global_step": 230000, "eval_re": [113.09162931200574, 111.38584686302563, 
151.0229690468856, 301.668217490119, 285.85167162176094, 292.7736109136823, 
344.08639118753797, 466.26960289374756, 84.18045325615257, 269.4959428080778], 
"eval_len": [22, 22, 29, 57, 54, 53, 62, 90, 17, 53]}

 24%|██▍       | 239997/1000000 [3:01:30<7:28:55, 28.22it/s]global step 240000, trans_decision ep_re 230.73628947853757

{"global_step": 240000, "eval_re": [275.0880543142664, 112.24613714256793, 
294.4164124140458, 449.08473156187205, 101.81481041296155, 90.11072773856243, 
114.86431098950973, 295.51552214711614, 317.6337086276092, 256.5884794368643], 
"eval_len": [51, 22, 56, 83, 20, 18, 23, 58, 60, 49]}

 25%|██▍       | 249999/1000000 [3:09:06<7:27:18, 27.94it/s]global step 250000, trans_decision ep_re 270.5961003618826

{"global_step": 250000, "eval_re": [119.3312520298425, 101.94861690546144, 
339.89058471610315, 424.7228693883604, 477.6641269570385, 105.28995667613258, 
419.2597673527194, 282.284874473972, 333.6730787496547, 101.89587636954191], 
"eval_len": [23, 20, 64, 77, 91, 21, 75, 51, 64, 20]}

 26%|██▌       | 259997/1000000 [3:16:53<7:13:53, 28.43it/s]global step 260000, trans_decision ep_re 321.58499067585615

{"global_step": 260000, "eval_re": [307.7963858139068, 664.1690827683905, 
109.01748660988584, 101.29874438459458, 567.9481841805839, 410.64194073335096, 
326.7318187794004, 141.61736968376312, 265.4439316318659, 321.1849621728196], 
"eval_len": [59, 121, 22, 20, 105, 74, 68, 27, 51, 59]}

 27%|██▋       | 269999/1000000 [3:24:45<7:12:31, 28.13it/s]global step 270000, trans_decision ep_re 274.7997389110884

{"global_step": 270000, "eval_re": [481.1588114381945, 340.09546720381934, 
368.20916998241165, 89.91454518251551, 90.67522747779051, 309.3287074821428, 
95.3552238138568, 363.26786969049016, 234.0793391460075, 375.91302769365484], 
"eval_len": [97, 62, 69, 18, 18, 58, 19, 67, 44, 72]}

 28%|██▊       | 279998/1000000 [3:32:50<7:03:17, 28.35it/s]global step 280000, trans_decision ep_re 233.22948474311025

{"global_step": 280000, "eval_re": [377.0434930190882, 137.35179679511984, 
286.8084498575681, 285.63252270328707, 107.17846306650873, 290.6163090716619, 
400.5216460127874, 261.05637487777614, 84.22913348602385, 101.8566585412814], 
"eval_len": [80, 26, 54, 52, 21, 55, 79, 50, 17, 20]}

 29%|██▉       | 289999/1000000 [3:40:30<7:05:52, 27.79it/s]global step 290000, trans_decision ep_re 381.739883195115

{"global_step": 290000, "eval_re": [420.3262106334043, 283.81945046629727, 
498.6644791854576, 180.49463486867532, 316.5722979186297, 554.0701923860084, 
414.7558972340435, 114.04122843593142, 562.6616735772257, 471.99276724547695], 
"eval_len": [76, 53, 95, 34, 58, 103, 76, 22, 113, 91]}

 30%|██▉       | 299997/1000000 [3:48:23<6:58:31, 27.88it/s]global step 300000, trans_decision ep_re 162.13015146513368

{"global_step": 300000, "eval_re": [214.6142330772862, 276.4589348710778, 
101.88264019464754, 134.03302957703255, 89.27966872753088, 158.910691700773, 
103.18126590736728, 349.28780718410883, 84.16747925120862, 109.48576416030438], 
"eval_len": [42, 52, 20, 26, 18, 31, 20, 63, 17, 22]}

 31%|███       | 309997/1000000 [3:56:15<6:53:30, 27.81it/s]global step 310000, trans_decision ep_re 258.83764762560065

{"global_step": 310000, "eval_re": [335.698378838938, 135.33323693538182, 
323.1737953237309, 132.74518848595406, 90.57710716449687, 536.3952538940088, 
309.04415459009715, 418.7532256985335, 160.95847454662112, 145.69766077824445], 
"eval_len": [62, 26, 60, 26, 18, 99, 56, 76, 31, 28]}

 32%|███▏      | 319998/1000000 [4:04:20<6:39:16, 28.38it/s]global step 320000, trans_decision ep_re 275.04334283634364

{"global_step": 320000, "eval_re": [132.94474057989083, 127.74834517731236, 
136.922944764709, 622.2216527211872, 112.91487250322324, 654.6917630762443, 
119.05357330750226, 90.05682512265507, 358.7661489353778, 395.112562175334], 
"eval_len": [26, 25, 26, 124, 22, 124, 23, 18, 67, 83]}

 33%|███▎      | 329998/1000000 [4:12:10<6:37:35, 28.09it/s]global step 330000, trans_decision ep_re 318.11812434170287

{"global_step": 330000, "eval_re": [365.1456451752132, 157.13501484652153, 
332.8956654271555, 102.81741869595533, 84.65806788674844, 379.32304225956256, 
362.5681352566475, 539.5947375752078, 429.9652602041664, 427.07825608985024], 
"eval_len": [64, 30, 64, 20, 17, 76, 69, 97, 82, 78]}

 34%|███▍      | 339999/1000000 [4:19:51<6:37:13, 27.69it/s]global step 340000, trans_decision ep_re 313.9490100287319

{"global_step": 340000, "eval_re": [142.27853821261988, 709.2077411495989, 
235.9148601336831, 316.891281437745, 570.648959038694, 457.9360409814469, 
356.7365670852017, 89.81651533348348, 175.95511276180844, 84.10448415303745], 
"eval_len": [28, 128, 46, 58, 113, 83, 67, 18, 34, 17]}

 35%|███▍      | 349999/1000000 [4:27:44<6:26:31, 28.03it/s]global step 350000, trans_decision ep_re 205.70932625003056

{"global_step": 350000, "eval_re": [100.78478776719706, 291.9978475222325, 
134.6112783009271, 318.3922125269538, 96.0111441623206, 90.22587442312195, 
112.9639829392335, 452.11432174925164, 112.00018305508048, 347.991630053987], 
"eval_len": [20, 55, 26, 61, 19, 18, 22, 84, 22, 66]}

 36%|███▌      | 359997/1000000 [4:35:36<6:24:41, 27.73it/s]global step 360000, trans_decision ep_re 268.7599701866936

{"global_step": 360000, "eval_re": [265.2247439486053, 96.05929526439373, 
268.0995890458395, 378.95053114461325, 120.27236173229042, 249.46827063318867, 
373.3788611456979, 330.47809335847444, 493.0896519933083, 112.57830360052424], 
"eval_len": [50, 19, 52, 68, 23, 47, 69, 62, 89, 22]}

 37%|███▋      | 369999/1000000 [4:43:32<6:19:11, 27.69it/s]global step 370000, trans_decision ep_re 246.89207506905922

{"global_step": 370000, "eval_re": [146.9304537159045, 122.03005938428029, 
105.704651116569, 187.14373900655568, 506.7931762851622, 108.02261900177432, 
364.00431967734966, 266.36391085931024, 131.83542316102447, 530.0923984826621], 
"eval_len": [28, 24, 21, 36, 95, 21, 68, 53, 26, 96]}

 38%|███▊      | 379998/1000000 [4:51:23<6:05:55, 28.24it/s]global step 380000, trans_decision ep_re 254.57224297021062

{"global_step": 380000, "eval_re": [102.05908828443114, 102.4242740381926, 
123.30316787159329, 448.6746360307553, 227.5890717405279, 782.6992162944309, 
255.96028803865462, 208.5402562556159, 182.57190690394563, 111.90052424395886], 
"eval_len": [20, 20, 24, 79, 46, 145, 48, 42, 34, 22]}

 39%|███▉      | 389999/1000000 [4:59:14<6:02:54, 28.01it/s]global step 390000, trans_decision ep_re 190.75849865968956

{"global_step": 390000, "eval_re": [501.00492332638277, 281.07777920460023, 
118.1036946135639, 128.1572033218332, 90.7692368273306, 101.47097145755761, 
117.59954855626333, 108.08366717726031, 89.53495950921251, 371.7830026028914], 
"eval_len": [88, 52, 23, 25, 18, 20, 23, 21, 18, 78]}

 40%|███▉      | 399997/1000000 [5:07:05<5:58:32, 27.89it/s]global step 400000, trans_decision ep_re 186.90897391842353

{"global_step": 400000, "eval_re": [131.83202423660163, 89.44948404657111, 
113.63459701106873, 130.65361618662263, 123.8486205661185, 463.00063423593537, 
101.88890132323306, 306.15605479425705, 290.9174177669688, 117.70838901685836], 
"eval_len": [25, 18, 22, 25, 24, 89, 20, 55, 55, 23]}

 41%|████      | 409998/1000000 [5:14:55<5:47:34, 28.29it/s]global step 410000, trans_decision ep_re 313.7633348938949

{"global_step": 410000, "eval_re": [102.43313401408587, 495.77012847348226, 
145.43136271990303, 483.177961901943, 90.06060846848894, 280.12466991419706, 
120.97080383790282, 347.7282478708022, 592.9355765540585, 479.00085518408497], 
"eval_len": [20, 89, 28, 93, 18, 52, 24, 66, 105, 86]}

 42%|████▏     | 419997/1000000 [5:22:51<5:46:14, 27.92it/s]global step 420000, trans_decision ep_re 436.2442211565055

{"global_step": 420000, "eval_re": [365.7711058857524, 986.4304085828027, 
427.8245168223799, 90.43875267854752, 151.75171353893975, 107.33625636279923, 
597.1435686154261, 280.6902252509929, 772.993438458714, 582.0622253687004], 
"eval_len": [68, 193, 81, 18, 29, 21, 108, 52, 149, 107]}

 43%|████▎     | 429999/1000000 [5:30:45<5:40:53, 27.87it/s]global step 430000, trans_decision ep_re 251.69155691235193

{"global_step": 430000, "eval_re": [312.150814812007, 263.5180838228796, 
114.20402618373963, 430.10545708766057, 89.9643593834933, 90.4274684721043, 
452.8371000389816, 290.5753431912846, 100.78628879456434, 372.3466273368039], 
"eval_len": [64, 50, 22, 80, 18, 18, 82, 53, 20, 69]}

 44%|████▍     | 439997/1000000 [5:38:36<5:38:18, 27.59it/s]global step 440000, trans_decision ep_re 131.94781735486043

{"global_step": 440000, "eval_re": [84.19980297067684, 280.04875865613616, 
89.84586155839709, 111.16732984832377, 107.59940843754592, 100.144578416805, 
122.94772400076734, 231.0232399830427, 96.13908648445961, 96.36238319244977], 
"eval_len": [17, 55, 18, 22, 21, 20, 24, 45, 19, 19]}

 45%|████▍     | 449997/1000000 [5:46:40<5:34:11, 27.43it/s]global step 450000, trans_decision ep_re 241.52826164859465

{"global_step": 450000, "eval_re": [107.50572663535789, 103.12681425388442, 
263.772646352792, 288.6749219739437, 94.98779772510656, 297.3483702880724, 
268.4862518087677, 90.41297545419653, 694.7260477479014, 206.24106424592395], 
"eval_len": [21, 20, 52, 56, 19, 58, 51, 18, 132, 39]}

 46%|████▌     | 459999/1000000 [5:54:21<5:23:18, 27.84it/s]global step 460000, trans_decision ep_re 257.7107535466937

{"global_step": 460000, "eval_re": [372.7828891313143, 177.27921642953237, 
114.29465139336284, 733.2388822346875, 301.42623632293703, 400.8417370123619, 
138.50319020255807, 129.42917569147383, 95.61246809553512, 113.69908895317405], 
"eval_len": [67, 33, 22, 139, 58, 87, 27, 25, 19, 22]}

 47%|████▋     | 469998/1000000 [6:02:12<5:12:29, 28.27it/s]global step 470000, trans_decision ep_re 395.48517339278266

{"global_step": 470000, "eval_re": [944.2677458086622, 650.1001355948073, 
354.22381281938493, 89.41381682327301, 246.34582908626368, 221.47674553593563, 
468.26690091113096, 256.33229520427125, 352.0419260362848, 372.3825261078127], 
"eval_len": [177, 121, 66, 18, 47, 43, 92, 49, 62, 68]}

 48%|████▊     | 479998/1000000 [6:10:03<5:01:28, 28.75it/s]global step 480000, trans_decision ep_re 268.7382126579047

{"global_step": 480000, "eval_re": [249.01461771220053, 94.90405327543162, 
127.24256692115101, 493.6556955682999, 371.3118605063681, 361.52844576461393, 
632.6980890765758, 101.83443516389126, 118.22398207988309, 136.96838051063148], 
"eval_len": [46, 19, 25, 95, 67, 65, 119, 20, 23, 26]}

 49%|████▉     | 489997/1000000 [6:17:52<5:00:55, 28.25it/s]global step 490000, trans_decision ep_re 284.85374308170225

{"global_step": 490000, "eval_re": [334.052834120764, 313.7221207280421, 
117.71359774409086, 471.5492738134368, 339.2895363578478, 96.12206934585889, 
599.1007244723171, 332.4977236983448, 136.97768207519147, 107.51186846112907], 
"eval_len": [63, 61, 23, 87, 64, 19, 107, 63, 27, 21]}

 50%|████▉     | 499998/1000000 [6:25:50<4:47:54, 28.94it/s]global step 500000, trans_decision ep_re 260.96988685437543

{"global_step": 500000, "eval_re": [96.60687436739101, 290.6293111340308, 
318.0002609265112, 90.49393257499385, 884.8883026299383, 95.49235102905045, 
160.77832762249923, 107.91824351478265, 450.7488994974626, 114.14236524709388], 
"eval_len": [19, 57, 59, 18, 180, 19, 31, 21, 83, 22]}

 51%|█████     | 509998/1000000 [6:33:23<4:43:06, 28.85it/s]global step 510000, trans_decision ep_re 307.2959669742853

{"global_step": 510000, "eval_re": [274.00546115076196, 396.85729461585726, 
749.0613986711984, 567.0953714497031, 136.1125249304152, 107.18824668889742, 
393.7421693618815, 211.66341276833325, 123.02586767277818, 114.20792243302701], 
"eval_len": [52, 71, 139, 107, 26, 21, 72, 39, 24, 22]}

 52%|█████▏    | 519999/1000000 [6:41:06<4:44:58, 28.07it/s]global step 520000, trans_decision ep_re 361.80136148532745

{"global_step": 520000, "eval_re": [375.84848600705794, 119.59422583888676, 
129.79306906596318, 858.1779364438685, 96.87197030775349, 672.2583053932051, 
244.5275130492864, 526.9368301859455, 106.9923833290598, 487.0128952322477], 
"eval_len": [71, 23, 25, 161, 19, 130, 46, 94, 21, 92]}

 53%|█████▎    | 529999/1000000 [6:48:50<4:33:38, 28.63it/s]global step 530000, trans_decision ep_re 169.38753551050507

{"global_step": 530000, "eval_re": [509.31190364259743, 119.31658841919372, 
140.6822030559565, 124.72051377363354, 96.4430139541188, 95.09192573568019, 
124.04100173403359, 286.7572560729127, 107.87521627524852, 89.63573244167553], 
"eval_len": [90, 23, 27, 24, 19, 19, 24, 53, 21, 18]}

 54%|█████▍    | 539997/1000000 [6:56:33<4:28:13, 28.58it/s]global step 540000, trans_decision ep_re 526.9548282257181

{"global_step": 540000, "eval_re": [358.8229919957137, 464.99148642949245, 
405.7596470368885, 830.5260262100043, 643.2664572554601, 384.5090596476353, 
468.70811673892814, 1527.3664964718323, 84.24636422452615, 101.35163624669964], 
"eval_len": [66, 84, 70, 158, 113, 70, 92, 283, 17, 20]}

 55%|█████▍    | 549997/1000000 [7:04:30<4:22:26, 28.58it/s]global step 550000, trans_decision ep_re 235.23804410728545

{"global_step": 550000, "eval_re": [97.07446485463275, 378.43091446526665, 
98.08609511064226, 417.44425972547145, 344.68159910229195, 291.3269108917813, 
113.61920801443138, 367.21493904610975, 107.9071865310175, 136.59486333120958], 
"eval_len": [19, 68, 19, 73, 62, 57, 22, 67, 21, 27]}

 56%|█████▌    | 559997/1000000 [7:12:02<4:19:57, 28.21it/s]global step 560000, trans_decision ep_re 401.6561147112535

{"global_step": 560000, "eval_re": [102.69160739340542, 253.47938402748517, 
105.87108515258602, 1165.0531480029194, 379.9168499641336, 1508.774954499189, 
120.97308451255238, 155.20421467396108, 113.72690253515819, 110.86991635114511],
"eval_len": [20, 50, 21, 233, 69, 272, 24, 30, 22, 22]}

 57%|█████▋    | 569997/1000000 [7:19:46<4:11:55, 28.45it/s]global step 570000, trans_decision ep_re 241.18081727358572

{"global_step": 570000, "eval_re": [823.9267616876896, 89.55219218758512, 
117.55816503752352, 326.3403857100503, 236.0820555310009, 133.86322985402916, 
124.34328441079013, 340.61099113110635, 90.18350960286148, 129.34759758322036], 
"eval_len": [144, 18, 23, 61, 48, 26, 24, 62, 18, 25]}

 58%|█████▊    | 579999/1000000 [7:27:30<4:01:35, 28.97it/s]global step 580000, trans_decision ep_re 343.36263970089186

{"global_step": 580000, "eval_re": [282.03157798134924, 445.23440286574305, 
244.9785202446588, 552.2050063234786, 407.2127696273021, 320.13719104169303, 
174.56989200637952, 470.41230361743607, 113.13526247458125, 423.70947082629647],
"eval_len": [54, 87, 47, 101, 76, 60, 33, 87, 22, 76]}

 59%|█████▉    | 589999/1000000 [7:35:15<4:00:37, 28.40it/s]global step 590000, trans_decision ep_re 287.02607021715426

{"global_step": 590000, "eval_re": [122.24944988197358, 96.05645909088892, 
517.9293693020024, 96.28475180172984, 117.31204482349693, 1034.9041377687422, 
178.789025923313, 293.9905341833464, 114.01112356057193, 298.73380583547777], 
"eval_len": [24, 19, 105, 19, 23, 193, 34, 55, 22, 59]}

 60%|█████▉    | 599999/1000000 [7:43:10<3:52:34, 28.66it/s]global step 600000, trans_decision ep_re 265.65477872679367

{"global_step": 600000, "eval_re": [249.07311420205744, 636.4843142074603, 
102.41910083563488, 150.56760306295698, 107.32093590655039, 164.0938213798113, 
295.9917085358848, 362.0601620945103, 492.84780561615935, 95.68922142691113], 
"eval_len": [46, 114, 20, 29, 21, 32, 55, 66, 90, 19]}

 61%|██████    | 609998/1000000 [7:50:42<3:45:04, 28.88it/s]global step 610000, trans_decision ep_re 307.42774501429807

{"global_step": 610000, "eval_re": [272.8772171758997, 89.18970073637576, 
478.4526677264738, 405.07737192000474, 119.99871580130554, 108.20160586482996, 
289.41749451348176, 328.47130715267446, 886.1453145637145, 96.44605468822061], 
"eval_len": [49, 18, 87, 83, 24, 21, 57, 58, 189, 19]}

 62%|██████▏   | 619999/1000000 [7:58:26<3:40:27, 28.73it/s]global step 620000, trans_decision ep_re 285.7433745677112

{"global_step": 620000, "eval_re": [140.93543173658279, 108.05080572311608, 
339.58762153268606, 90.20814288614851, 373.6001549916491, 357.69085997208157, 
176.70452300825832, 427.45957556884844, 747.2511063410897, 95.9455239166518], 
"eval_len": [27, 21, 62, 18, 66, 69, 34, 79, 142, 19]}

 63%|██████▎   | 629999/1000000 [8:06:20<3:37:12, 28.39it/s]global step 630000, trans_decision ep_re 320.7707340490276

{"global_step": 630000, "eval_re": [915.0668565350106, 108.57820849276645, 
90.00228578501739, 329.4592940583831, 315.9964897610675, 553.2771062193762, 
580.5503070180628, 89.709422562543, 106.84276295254745, 118.22460710550212], 
"eval_len": [178, 21, 18, 63, 60, 100, 126, 18, 21, 23]}

 64%|██████▍   | 639998/1000000 [8:13:53<3:27:57, 28.85it/s]global step 640000, trans_decision ep_re 282.40955354728806

{"global_step": 640000, "eval_re": [110.94206144736827, 402.2288782610966, 
386.83441497715273, 388.89070092437646, 102.41782999077216, 311.5102656064251, 
408.9595389799953, 108.13339222788814, 101.67613779545144, 502.5023152623537], 
"eval_len": [22, 88, 72, 71, 20, 58, 73, 21, 20, 91]}

 65%|██████▍   | 649997/1000000 [8:21:50<3:23:45, 28.63it/s]global step 650000, trans_decision ep_re 390.6679083453347

{"global_step": 650000, "eval_re": [663.6692614825201, 343.4664238188972, 
344.2003625698566, 257.8376344076241, 389.45687559210825, 308.8275711172744, 
96.65284109865755, 281.2731064909789, 117.94076377750005, 1103.3542430979294], 
"eval_len": [116, 64, 64, 46, 71, 56, 19, 56, 23, 213]}

 66%|██████▌   | 659999/1000000 [8:29:21<3:15:37, 28.97it/s]global step 660000, trans_decision ep_re 193.63798177409197

{"global_step": 660000, "eval_re": [206.07445076926507, 108.5324601345185, 
284.2820049501127, 265.04743295170675, 331.86310074435, 122.28486313034021, 
286.04850937614356, 84.22743145244914, 102.32927419074115, 145.69029004129243], 
"eval_len": [42, 21, 53, 51, 66, 24, 55, 17, 20, 28]}

 67%|██████▋   | 669997/1000000 [8:37:04<3:11:19, 28.75it/s]global step 670000, trans_decision ep_re 430.94512968135575

{"global_step": 670000, "eval_re": [140.2236645896263, 1071.7127276299725, 
504.92758539985016, 395.8372405228143, 368.82222787648874, 96.93573416568049, 
426.46001977863216, 526.6161233364874, 304.01085376477545, 473.90511974923], 
"eval_len": [27, 213, 90, 77, 71, 19, 78, 101, 56, 88]}

 68%|██████▊   | 679999/1000000 [8:45:00<3:06:08, 28.65it/s]global step 680000, trans_decision ep_re 295.2864648015022

{"global_step": 680000, "eval_re": [632.4917193569313, 233.89151525494214, 
289.33577918650406, 84.17813782279293, 377.6333865022304, 102.25295980542698, 
89.32701765263633, 95.8828816329267, 344.10460467278455, 703.7666461278462], 
"eval_len": [121, 45, 54, 17, 69, 20, 18, 19, 62, 133]}

 69%|██████▉   | 689999/1000000 [8:52:32<3:01:15, 28.50it/s]global step 690000, trans_decision ep_re 353.2513995269867

{"global_step": 690000, "eval_re": [598.9333435454489, 584.8917288077674, 
611.2376918831069, 89.28385522309574, 107.56258636614773, 291.7613239411101, 
331.1310365594491, 240.4280827289194, 102.00825220025203, 575.2760940145696], 
"eval_len": [106, 110, 111, 18, 21, 54, 61, 48, 20, 125]}

 70%|██████▉   | 699997/1000000 [9:00:16<2:59:12, 27.90it/s]global step 700000, trans_decision ep_re 319.5369160109098

{"global_step": 700000, "eval_re": [576.1728880683866, 90.52307518995958, 
711.6706481235797, 133.9551730929549, 90.20380772813162, 653.3275344372076, 
263.2859928711394, 226.6444504357484, 324.7940794089329, 124.79151075305775], 
"eval_len": [111, 18, 143, 26, 18, 118, 51, 45, 63, 24]}

 71%|███████   | 709998/1000000 [9:08:10<2:47:07, 28.92it/s]global step 710000, trans_decision ep_re 264.86970103642034

{"global_step": 710000, "eval_re": [344.8822658072115, 111.59182027028379, 
156.59801420144564, 250.11300027462926, 141.37519472677582, 327.82957803497186, 
329.9720403415485, 746.014642398424, 107.66170625041144, 132.6587480585016], 
"eval_len": [61, 22, 30, 49, 27, 62, 59, 150, 21, 26]}

 72%|███████▏  | 719999/1000000 [9:15:43<2:41:32, 28.89it/s]global step 720000, trans_decision ep_re 245.72575781892343

{"global_step": 720000, "eval_re": [100.5901488653813, 448.15507293598785, 
521.2854914987225, 90.96183143861728, 102.29527156281107, 140.38823866229916, 
96.0456300905997, 304.51353474214756, 568.8674783334951, 84.15488005917274], 
"eval_len": [20, 80, 99, 18, 20, 27, 19, 58, 103, 17]}

 73%|███████▎  | 729999/1000000 [9:23:26<2:38:35, 28.38it/s]global step 730000, trans_decision ep_re 260.40781869700885

{"global_step": 730000, "eval_re": [643.9833506148293, 297.98592435784286, 
445.34064555875744, 95.93734485834939, 172.01692270953487, 89.84183846938424, 
321.87934289148046, 107.77670988957952, 84.2159654577349, 345.10014216259555], 
"eval_len": [126, 54, 82, 19, 33, 18, 63, 21, 17, 66]}

 74%|███████▍  | 739998/1000000 [9:31:10<2:30:44, 28.75it/s]global step 740000, trans_decision ep_re 208.11857779689666

{"global_step": 740000, "eval_re": [130.2506347434252, 302.6633247240082, 
107.25491550057325, 376.13789428543487, 102.66882117729773, 113.0883291353039, 
96.3641640901923, 107.60904158260956, 113.92765490117733, 631.2209978289444], 
"eval_len": [25, 55, 21, 70, 20, 22, 19, 21, 22, 118]}

 75%|███████▍  | 749997/1000000 [9:38:57<2:26:45, 28.39it/s]global step 750000, trans_decision ep_re 161.01936728536072

{"global_step": 750000, "eval_re": [102.4134792387605, 124.68171037160751, 
152.08914216860123, 231.7337888965703, 216.2514456041253, 107.31657245275694, 
113.98964642063007, 96.1263942796562, 185.21556778453572, 280.37592563636326], 
"eval_len": [20, 24, 29, 46, 45, 21, 22, 19, 35, 54]}

 76%|███████▌  | 759999/1000000 [9:46:50<2:22:16, 28.11it/s]global step 760000, trans_decision ep_re 318.61443621297076

{"global_step": 760000, "eval_re": [300.0136717933222, 89.14452922848471, 
155.49929328188267, 248.16135219342257, 1202.8569932005266, 129.43364589260517, 
151.71011031304707, 634.7552494462296, 178.6453932594435, 95.92412352074348], 
"eval_len": [55, 18, 30, 49, 249, 25, 29, 113, 34, 19]}

 77%|███████▋  | 769997/1000000 [9:54:25<2:15:42, 28.25it/s]global step 770000, trans_decision ep_re 392.79585646941075

{"global_step": 770000, "eval_re": [90.44738757297152, 293.7217973230345, 
579.0561013727755, 226.46080671173556, 1523.0809198670083, 396.4235126448275, 
84.12042392929423, 129.81566060779005, 270.7726090437272, 334.05934562094325], 
"eval_len": [18, 60, 115, 44, 284, 69, 17, 25, 51, 65]}

 78%|███████▊  | 779999/1000000 [10:02:10<2:09:29, 28.32it/s]global step 780000, trans_decision ep_re 352.5767465663032

{"global_step": 780000, "eval_re": [129.56374781963595, 90.11512903746697, 
272.19582031777213, 1227.2414022322828, 494.86218937135453, 329.27293101380405, 
299.45338032825344, 317.04909435374736, 112.98217550229595, 253.0315956864188], 
"eval_len": [25, 18, 52, 224, 96, 62, 59, 61, 22, 50]}

 79%|███████▉  | 789998/1000000 [10:09:55<2:03:02, 28.45it/s]global step 790000, trans_decision ep_re 255.3050439849235

{"global_step": 790000, "eval_re": [659.3835579111815, 385.83907723231863, 
95.96569810028208, 648.2910740258671, 89.33194891473565, 105.81191270235293, 
114.60361688736418, 112.22774041125727, 233.00974257238133, 108.58607109149409],
"eval_len": [119, 73, 19, 136, 18, 21, 22, 22, 46, 21]}

 80%|███████▉  | 799997/1000000 [10:17:50<1:58:01, 28.24it/s]global step 800000, trans_decision ep_re 392.5716115278231

{"global_step": 800000, "eval_re": [89.19985348329142, 307.78251523052734, 
985.2882005029588, 95.38377402480997, 392.17908669064127, 569.564046480409, 
805.4934183509445, 188.277641735853, 403.20916895339326, 89.3384098254029], 
"eval_len": [18, 58, 215, 19, 84, 107, 151, 36, 83, 18]}

 81%|████████  | 809997/1000000 [10:25:24<1:52:08, 28.24it/s]global step 810000, trans_decision ep_re 300.62580286068044

{"global_step": 810000, "eval_re": [315.8701365897177, 130.92206948166753, 
124.47806883403287, 108.01821111837093, 198.12709550526003, 168.80547633981243, 
1066.4185284499072, 556.1961992180629, 236.53550135722998, 100.88674171274252], 
"eval_len": [58, 25, 24, 21, 38, 32, 208, 111, 47, 20]}

 82%|████████▏ | 819998/1000000 [10:33:20<1:44:43, 28.65it/s]global step 820000, trans_decision ep_re 311.1697417867274

{"global_step": 820000, "eval_re": [339.51456708828425, 853.6249735210918, 
89.24920279745353, 97.38636455768525, 84.26578196616833, 102.35910812882547, 
1088.721289061516, 138.76878681777663, 102.23668027777082, 215.5706636507014], 
"eval_len": [64, 157, 18, 19, 17, 20, 209, 27, 20, 44]}

 83%|████████▎ | 829997/1000000 [10:40:53<1:40:02, 28.32it/s]global step 830000, trans_decision ep_re 216.58698151110212

{"global_step": 830000, "eval_re": [113.90832337083133, 359.246752524137, 
114.47293751629205, 337.2492009856047, 103.18569163557297, 310.37423497284294, 
320.30668412090165, 102.05677383477617, 95.97979012302457, 309.0894260270379], 
"eval_len": [22, 68, 22, 64, 20, 58, 65, 20, 19, 56]}

 84%|████████▍ | 839999/1000000 [10:48:50<1:33:31, 28.51it/s]global step 840000, trans_decision ep_re 253.9106032517381

{"global_step": 840000, "eval_re": [320.8093525574736, 279.07208602011895, 
605.6038227398104, 95.76833599855387, 123.96133658566548, 347.2601715018109, 
400.8956456079041, 124.32088625333564, 126.34365362582001, 115.07074162688855], 
"eval_len": [60, 52, 106, 19, 24, 66, 76, 24, 24, 23]}

 85%|████████▍ | 849999/1000000 [10:56:40<1:27:54, 28.44it/s]global step 850000, trans_decision ep_re 436.70802143675627

{"global_step": 850000, "eval_re": [97.14570507920286, 430.8520970868071, 
254.14340141586706, 351.3997706103909, 261.23878301940323, 286.4601459596865, 
1852.2302192999907, 404.830353413377, 333.1284590729402, 95.65127940989717], 
"eval_len": [19, 79, 47, 66, 50, 53, 340, 84, 61, 19]}

 86%|████████▌ | 859998/1000000 [11:04:11<1:20:56, 28.83it/s]global step 860000, trans_decision ep_re 321.35221950556246

{"global_step": 860000, "eval_re": [128.22224935335865, 89.56349422750448, 
95.48273332017132, 89.79795419756024, 429.7018296073369, 95.4964251162597, 
281.6608971221671, 1027.6477240139322, 765.0080967631544, 210.94079133417978], 
"eval_len": [25, 18, 19, 18, 82, 19, 52, 189, 141, 40]}

 87%|████████▋ | 869997/1000000 [11:11:55<1:16:11, 28.44it/s]global step 870000, trans_decision ep_re 160.66902150488713

{"global_step": 870000, "eval_re": [106.94573161037552, 380.7370632822037, 
96.59546507186111, 111.70954707324518, 111.50950914282477, 279.9999966411589, 
95.1103411029719, 124.88901036263745, 106.46118645234534, 192.7323643092474], 
"eval_len": [21, 74, 19, 22, 22, 55, 19, 24, 21, 36]}

 88%|████████▊ | 879998/1000000 [11:19:50<1:09:24, 28.81it/s]global step 880000, trans_decision ep_re 239.69308467335628

{"global_step": 880000, "eval_re": [134.11018799250908, 135.73797472728887, 
89.69589623335277, 1186.0116494856193, 101.91203206289487, 175.39996030430507, 
210.91332768015815, 176.49098630006034, 90.24498706917105, 96.41384487820305], 
"eval_len": [26, 26, 18, 208, 20, 34, 39, 35, 18, 19]}

 89%|████████▉ | 889998/1000000 [11:27:24<1:04:36, 28.37it/s]global step 890000, trans_decision ep_re 242.51933541670238

{"global_step": 890000, "eval_re": [107.33591201633594, 123.28814419590559, 
428.64152473975935, 421.9951458715251, 274.11478177168755, 95.33959367527498, 
244.95428656260944, 96.73824301997405, 134.27409059556283, 498.5116317183886], 
"eval_len": [21, 24, 80, 77, 56, 19, 48, 19, 26, 94]}

 90%|████████▉ | 899999/1000000 [11:35:12<58:43, 28.38it/s]global step 900000, trans_decision ep_re 232.84738549287277

{"global_step": 900000, "eval_re": [392.5435436808256, 302.23393236644495, 
89.88243434564933, 496.3452364849979, 113.47437000056335, 89.24841923581687, 
431.59093360851443, 119.69888283457959, 175.27369346227226, 118.1824089090636], 
"eval_len": [73, 55, 18, 86, 22, 18, 96, 23, 33, 23]}

 91%|█████████ | 909997/1000000 [11:42:56<51:59, 28.85it/s]global step 910000, trans_decision ep_re 327.6402924610508

{"global_step": 910000, "eval_re": [102.36391784237824, 1193.5496683123454, 
242.10922562369979, 286.02808534337424, 479.97586957914797, 159.30851405571428, 
281.5082081848982, 305.66498764909346, 123.66690593746443, 102.22754208239267], 
"eval_len": [20, 220, 46, 55, 93, 30, 53, 57, 24, 20]}

 92%|█████████▏| 919999/1000000 [11:50:50<46:54, 28.42it/s]global step 920000, trans_decision ep_re 355.3001607292023

{"global_step": 920000, "eval_re": [108.47966424805284, 107.7949302788548, 
96.96842698123315, 101.84343446896708, 471.404893696003, 838.0452849632635, 
106.62144227574142, 689.5517285184595, 731.9949335109791, 300.2968683504686], 
"eval_len": [21, 21, 19, 20, 93, 167, 21, 125, 139, 57]}

 93%|█████████▎| 929999/1000000 [11:58:23<41:00, 28.45it/s]global step 930000, trans_decision ep_re 299.71328418034756

{"global_step": 930000, "eval_re": [262.8461024751626, 382.1832384566529, 
762.9039955124346, 174.14763484238176, 532.9602929249684, 108.05312145636019, 
284.28379502492356, 95.99922442013278, 95.7013932600398, 298.0540434304189], 
"eval_len": [50, 69, 136, 33, 102, 21, 54, 19, 19, 58]}

 94%|█████████▍| 939997/1000000 [12:06:20<35:12, 28.41it/s]global step 940000, trans_decision ep_re 446.025038863031

{"global_step": 940000, "eval_re": [112.40394606076087, 177.2896761999492, 
823.1913837086335, 883.3901162501403, 108.87032076829922, 605.9719991094126, 
972.8654573039091, 473.55995834751474, 103.56870666238008, 199.13882421930938], 
"eval_len": [22, 34, 162, 167, 21, 107, 191, 82, 20, 38]}

 95%|█████████▍| 949999/1000000 [12:14:00<28:40, 29.07it/s]global step 950000, trans_decision ep_re 336.2541718698526

{"global_step": 950000, "eval_re": [507.8930087665771, 184.42040970022887, 
113.97567848342071, 123.39537280523629, 626.33469714726, 566.1490430229729, 
101.40824175930054, 108.61886563133378, 354.71344786913227, 675.6329535130634], 
"eval_len": [94, 35, 22, 24, 129, 107, 20, 21, 65, 143]}

 96%|█████████▌| 959999/1000000 [12:21:40<23:03, 28.91it/s]global step 960000, trans_decision ep_re 405.53000390285007

{"global_step": 960000, "eval_re": [558.5831689656235, 449.3570443470179, 
117.9536428108385, 353.3725984573873, 922.8971445660703, 102.120617716357, 
658.3783732881651, 422.8386869347807, 101.75628832913083, 368.0424736131295], 
"eval_len": [109, 81, 23, 64, 177, 20, 131, 85, 20, 69]}

 97%|█████████▋| 969999/1000000 [12:29:20<17:43, 28.20it/s]global step 970000, trans_decision ep_re 169.96441487614348

{"global_step": 970000, "eval_re": [89.21701576400191, 96.18857598077686, 
744.163015457479, 107.54472685613821, 109.0088851451148, 95.40681699799632, 
108.68274560858336, 108.13118327444904, 90.34848665079512, 150.95269702609997], 
"eval_len": [18, 19, 132, 21, 21, 19, 21, 21, 18, 29]}

 98%|█████████▊| 979999/1000000 [12:36:47<11:35, 28.76it/s]global step 980000, trans_decision ep_re 180.59213550457443

{"global_step": 980000, "eval_re": [255.06483751929608, 119.26275453215962, 
107.32841200884867, 113.08803194021337, 594.5028956092532, 112.33135112797552, 
96.16135833395995, 136.2323651676735, 102.31917272616414, 169.63017608020016], 
"eval_len": [49, 23, 21, 22, 114, 22, 19, 26, 20, 32]}

 99%|█████████▉| 989997/1000000 [12:44:40<05:46, 28.84it/s]global step 990000, trans_decision ep_re 335.0448483373478

{"global_step": 990000, "eval_re": [821.190368918692, 292.4066098280497, 
137.99520110878007, 666.6262395173222, 322.8178737602567, 126.94200415159453, 
128.82275154863, 402.4654951956425, 360.13028489691465, 91.05165444759587], 
"eval_len": [174, 55, 27, 128, 58, 25, 25, 72, 68, 18]}

100%|█████████▉| 999999/1000000 [12:52:11<00:00, 28.91it/s]global step 1000000, trans_decision ep_re 255.88030245801087

{"global_step": 1000000, "eval_re": [123.53684926931412, 324.55214840060984, 
96.1400716387012, 157.06147541938492, 95.90203047211268, 573.6257189508492, 
107.89665862227959, 299.98520051579476, 438.31291872481506, 341.7899525662475], 
"eval_len": [24, 59, 19, 30, 19, 104, 21, 62, 90, 65]}

100%|██████████| 1000000/1000000 [12:52:23<00:00, 21.58it/s]
