
{
    'exp_name': 'VDPO',
    'env': 'HalfCheetah-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'markov(ord(15,1), ord(3,5,3,shift=22), [[124, 1], [1, 19]])',
    'noise': 0.25
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9997/1000000 [04:30<10:30:57, 26.15it/s]global step 10000, trans_decision ep_re -277.0999233788087

{"global_step": 10000, "eval_re": [-281.7626939635392, -258.8229174573669, 
-284.4351850397378, -286.2483690015876, -293.7213344820097, -290.5207055116812, 
-221.06266126557995, -279.938709869549, -293.10549178515686, 
-281.38116541187793], "eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 
1000, 1000, 1000]}

  2%|▏         | 19998/1000000 [13:30<10:15:48, 26.52it/s]global step 20000, trans_decision ep_re -110.0492069732928

{"global_step": 20000, "eval_re": [-137.88665564223237, -39.67521700428325, 
15.101437960236254, -123.12879272773627, -142.07681591892728, 
-62.683353663044194, -296.1492332906317, -45.641277212531655, 
-115.20430568958969, -153.14785654418802], "eval_len": [1000, 1000, 1000, 1000, 
1000, 1000, 1000, 1000, 1000, 1000]}

  3%|▎         | 29999/1000000 [22:30<10:23:25, 25.93it/s]global step 30000, trans_decision ep_re 72.0345041759832

{"global_step": 30000, "eval_re": [135.77979025681597, 65.60588688425966, 
46.28356467021225, 23.79061240570971, 114.45666473887295, 102.46871074187807, 
62.46486330377124, 97.03482617734286, 16.134206965752618, 56.325915615216786], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  4%|▍         | 39997/1000000 [31:30<10:13:44, 26.07it/s]global step 40000, trans_decision ep_re 135.70673629283652

{"global_step": 40000, "eval_re": [214.82505792739462, 113.21201104219236, 
341.46068608576826, 45.132837922090545, 28.437581173566578, 170.44297362640077, 
160.35165579879347, 203.8876875350401, 50.148882710214174, 29.167989106904514], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  5%|▍         | 49998/1000000 [40:30<9:58:06, 26.47it/s]global step 50000, trans_decision ep_re 182.98243462741507

{"global_step": 50000, "eval_re": [-12.260976370952937, 336.05153512988244, 
38.095989940664424, -4.371266067086859, 229.59213843101216, -23.663072701863836,
375.0554068005155, 194.26477121097273, 246.34114953905154, 450.7186703619557], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  6%|▌         | 59998/1000000 [49:30<9:55:49, 26.29it/s]global step 60000, trans_decision ep_re 193.81259396736866

{"global_step": 60000, "eval_re": [138.06862497493466, 327.74678712339005, 
-8.725892984129352, 355.58423151802384, 333.02208651442663, 175.343117204462, 
106.3915631876667, 328.3759188024996, 90.61193294234904, 91.70757039006337], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  7%|▋         | 69999/1000000 [58:20<9:53:33, 26.11it/s]global step 70000, trans_decision ep_re 352.19509586945026

{"global_step": 70000, "eval_re": [401.24916864387063, 472.0365698815872, 
282.16050949043284, 462.90820174028187, 216.70046575694252, 266.7613405329811, 
461.4207407974226, 252.4298684550995, 355.6330773745362, 350.65101602134797], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  8%|▊         | 79999/1000000 [1:07:20<9:46:19, 26.15it/s]global step 80000, trans_decision ep_re 263.016512652765

{"global_step": 80000, "eval_re": [108.09834243675088, 193.81189009783955, 
430.6256310417096, 49.834284140656905, 409.46290530301195, 316.63468831919045, 
310.97685616336975, 237.68677248987277, 261.76395230740457, 311.2698042278438], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  9%|▉         | 89999/1000000 [1:16:20<9:39:41, 26.16it/s]global step 90000, trans_decision ep_re 350.1164211177751

{"global_step": 90000, "eval_re": [294.787860468408, 539.0599208671848, 
447.60623667083644, 463.01814000104883, 278.4588657312814, 496.83657054413163, 
385.05171210645454, 386.8738598500284, 213.52311740621892, -4.05207246784275], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 10%|▉         | 99999/1000000 [1:25:20<9:33:58, 26.13it/s]global step 100000, trans_decision ep_re 358.88705706105105

{"global_step": 100000, "eval_re": [463.7457388219124, 411.6366202679587, 
394.7164911082635, 332.33052996773574, 294.15694050529316, 421.58886606243226, 
335.97194099877066, 274.0815352403694, 194.4286614057457, 466.2132462320293], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 11%|█         | 109999/1000000 [1:34:20<9:32:08, 25.93it/s]global step 110000, trans_decision ep_re 441.5277358599001

{"global_step": 110000, "eval_re": [283.27433766471415, 488.1449340803042, 
489.9593034889689, 385.83283000903975, 517.6453295801907, 355.2250133786737, 
418.5918949135327, 484.32095868558304, 501.7430367908825, 490.53972000711104], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 12%|█▏        | 119999/1000000 [1:43:20<9:24:14, 25.99it/s]global step 120000, trans_decision ep_re 411.5085504884088

{"global_step": 120000, "eval_re": [432.66294301102863, 359.7440450317314, 
497.09549980451885, 446.55509912282497, 477.9284765386424, 445.99366620497733, 
319.4915046304772, 709.14668498472, -115.6089392277339, 542.0765247829007], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 13%|█▎        | 129998/1000000 [1:52:20<9:04:57, 26.61it/s]global step 130000, trans_decision ep_re 378.44841852619436

{"global_step": 130000, "eval_re": [572.2341337068667, 460.6457642796132, 
437.5692659810745, 522.8430657564776, 468.4842492774426, 481.556767232339, 
357.3181290891447, -134.44558484401736, 555.0623336245642, 63.216061158438166], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 14%|█▍        | 139998/1000000 [2:01:20<9:04:07, 26.34it/s]global step 140000, trans_decision ep_re 525.2454071519799

{"global_step": 140000, "eval_re": [437.5651901288768, 512.3361955690899, 
745.3776467527504, 546.5633680720251, 501.8022748985297, 485.52828444107183, 
575.8864666370742, 433.9732837725158, 486.6634683214965, 526.7578929263694], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 15%|█▍        | 149998/1000000 [2:10:20<8:53:09, 26.57it/s]global step 150000, trans_decision ep_re 452.1516799917589

{"global_step": 150000, "eval_re": [493.71413899792316, 465.9308549902522, 
574.3094729661096, 556.3787619610378, 475.1480430214853, 455.4673103836272, 
472.97011444972736, 512.8054772769799, 668.0461405636278, -153.25351469318124], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 16%|█▌        | 159997/1000000 [2:19:20<8:55:19, 26.15it/s]global step 160000, trans_decision ep_re 359.3969534516504

{"global_step": 160000, "eval_re": [-166.8434941995037, 562.6572981948844, 
366.9238746814574, 713.7785149063341, -219.27267523397327, 410.218058578586, 
429.71148404763494, 523.1470626169914, 382.78121264783994, 590.8681982762528], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 17%|█▋        | 169999/1000000 [2:28:20<8:49:08, 26.14it/s]global step 170000, trans_decision ep_re 351.6902705567647

{"global_step": 170000, "eval_re": [510.22768555370595, 290.6619627074266, 
516.601906944386, 745.6936599064792, 406.68556895038, 521.4739935559986, 
-331.9509878901902, -250.95613138657387, 380.5456225544683, 727.9194246715663], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 18%|█▊        | 179997/1000000 [2:37:20<8:48:10, 25.88it/s]global step 180000, trans_decision ep_re 442.6976748886676

{"global_step": 180000, "eval_re": [387.3181600480273, 287.37716605991466, 
523.5390379228163, 148.01119698491206, 561.9350402394851, 461.9766440305215, 
482.65969455040755, 544.4173762395975, 536.0428682009881, 493.6995646100069], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 19%|█▉        | 189999/1000000 [2:46:20<8:38:03, 26.06it/s]global step 190000, trans_decision ep_re 430.05604105563054

{"global_step": 190000, "eval_re": [517.1961081148783, 596.063241946552, 
662.2797442403333, 566.5137282411973, 540.5590359758475, 623.0147012207195, 
668.6663591565386, 538.8457854130908, -253.8947314557304, -158.68356229712074], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 20%|█▉        | 199999/1000000 [2:55:20<8:31:24, 26.07it/s]global step 200000, trans_decision ep_re 560.4902794984495

{"global_step": 200000, "eval_re": [618.6807583187739, 438.1794689963482, 
489.84559103885607, 502.3272286724709, 712.4122930522121, 610.0602380930454, 
437.58587348817485, 609.6258782476738, 624.0674780193798, 562.1179870575605], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 21%|██        | 209999/1000000 [3:04:20<8:28:07, 25.91it/s]global step 210000, trans_decision ep_re 570.719926646184

{"global_step": 210000, "eval_re": [860.0484540241731, 633.34862169086, 
-41.12542057132597, 625.7878788813085, 631.1743908693446, 518.6268425325143, 
432.0984860955839, 571.1502175837868, 911.0025448224524, 565.0872505331427], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 22%|██▏       | 219997/1000000 [3:13:20<8:24:58, 25.74it/s]global step 220000, trans_decision ep_re 609.7803503395752

{"global_step": 220000, "eval_re": [753.7466531591397, 567.3316386152775, 
561.5057179419057, 511.1046644771015, 533.663440110016, 382.6046600960243, 
717.2154051857625, 459.48513716410673, 943.6615207563623, 667.4846658900564], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 23%|██▎       | 229999/1000000 [3:22:20<8:10:20, 26.17it/s]global step 230000, trans_decision ep_re 530.6028763375363

{"global_step": 230000, "eval_re": [520.1141451073614, 519.4045459657591, 
516.7061066175919, 635.8932125560399, 576.8146866935017, 366.8645207220957, 
505.7934986473408, 499.1209626793174, 570.5382557489665, 594.7788286373882], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 24%|██▍       | 239997/1000000 [3:31:20<8:06:32, 26.03it/s]global step 240000, trans_decision ep_re 536.3905017133902

{"global_step": 240000, "eval_re": [555.3805525829442, 482.393049624041, 
591.3824066371798, 454.6784334588267, 415.56447414993073, 571.6032333093627, 
480.24187844800383, 693.6745852330007, 527.3631181518554, 591.6232855387572], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 25%|██▍       | 249997/1000000 [3:40:20<7:59:23, 26.08it/s]global step 250000, trans_decision ep_re 527.6578393184609

{"global_step": 250000, "eval_re": [458.4258649938891, 557.8301353159126, 
544.9383523427819, 495.0324580580631, 533.1081721894091, 514.927986759736, 
587.0105208362334, 497.3547330276176, 553.8087665104495, 534.1414031505169], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 26%|██▌       | 259997/1000000 [3:49:20<8:04:04, 25.48it/s]global step 260000, trans_decision ep_re 565.2435923107749

{"global_step": 260000, "eval_re": [613.9069710557394, 489.160674927904, 
592.7048928661794, 485.9866280712139, 580.986701848252, 571.1013172800699, 
538.6979217791201, 676.1302349916222, 650.0315113716177, 453.7290689160312], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 27%|██▋       | 269999/1000000 [3:58:20<7:44:10, 26.21it/s]global step 270000, trans_decision ep_re 590.1110353416797

{"global_step": 270000, "eval_re": [614.9830024767866, 484.70798947376306, 
766.3118600705963, 596.8644522797426, 515.1354893457584, 559.8028398554617, 
600.76830771038, 615.417729390066, 569.5042442758185, 577.6144385384241], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 28%|██▊       | 279998/1000000 [4:07:20<7:36:33, 26.28it/s]global step 280000, trans_decision ep_re 505.00974550576905

{"global_step": 280000, "eval_re": [502.26456521231694, 560.8206087146865, 
467.6043235816927, -97.04110116668461, 846.1896062306945, 535.2463308003354, 
582.8933310509505, 601.8065555521559, 445.00342735815065, 605.3098077233914], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 29%|██▉       | 289999/1000000 [4:16:30<7:36:06, 25.94it/s]global step 290000, trans_decision ep_re 601.3562390491317

{"global_step": 290000, "eval_re": [663.2494377192796, 560.4491627018783, 
573.9984165248765, 763.8796249445169, 879.2113162412278, 832.5366389630966, 
503.7409342744187, 485.0683741937869, 131.45271896007304, 619.9757659681617], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 30%|██▉       | 299998/1000000 [4:25:30<7:19:05, 26.57it/s]global step 300000, trans_decision ep_re 608.7779689891065

{"global_step": 300000, "eval_re": [701.8652561740224, 619.2618654173675, 
896.5518885765189, 540.9909104740682, 542.5098995116898, 513.7594258182448, 
521.5273657596703, 749.7991081707653, 488.53002955857744, 512.9839404301401], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 31%|███       | 309999/1000000 [4:34:30<7:26:00, 25.78it/s]global step 310000, trans_decision ep_re 584.5315688093937

{"global_step": 310000, "eval_re": [523.5777151463482, 562.5891225089445, 
702.4556138049618, 246.15449608973876, 665.8937441950015, 536.4801574163752, 
725.412066921408, 623.639168849134, 617.20521573707, 641.9083874249546], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 32%|███▏      | 319997/1000000 [4:43:40<7:15:30, 26.02it/s]global step 320000, trans_decision ep_re 658.4045805418292

{"global_step": 320000, "eval_re": [740.745134345519, 525.6161281291495, 
529.140543830557, 610.8931086558867, 623.4200276689247, 762.9590224640237, 
680.8623359969893, 771.1563415417794, 556.902828852595, 782.3503339328669], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 33%|███▎      | 329998/1000000 [4:52:40<7:04:40, 26.30it/s]global step 330000, trans_decision ep_re 575.373388876261

{"global_step": 330000, "eval_re": [489.540250420167, 717.4346735033148, 
295.60528243534384, 592.219950569501, 571.733604735758, 536.239325166129, 
626.63689681684, 724.7345870721691, 614.5889248479853, 585.0003931954008], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 34%|███▍      | 339997/1000000 [5:01:40<7:09:59, 25.58it/s]global step 340000, trans_decision ep_re 487.8494813160849

{"global_step": 340000, "eval_re": [571.1889311454584, 173.00276413595037, 
297.5538895055929, 544.7835830638807, 459.1065423157293, 651.752304363539, 
550.5062959594266, 429.95210303513034, 745.9160040320197, 454.73239560412213], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 35%|███▍      | 349999/1000000 [5:10:50<6:57:27, 25.95it/s]global step 350000, trans_decision ep_re 647.6509327878416

{"global_step": 350000, "eval_re": [747.2320932787632, 723.0732287152017, 
510.47084258159515, 775.9787739881754, 643.9677123469353, 561.5216843470429, 
612.9748118872551, 751.8977624281968, 685.1653168073697, 464.2271014978808], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 36%|███▌      | 359999/1000000 [5:19:50<6:49:28, 26.05it/s]global step 360000, trans_decision ep_re 561.5471553601923

{"global_step": 360000, "eval_re": [650.2289496033175, 574.4446833352807, 
576.6947790042134, 531.3701538015257, 642.8835985711829, 447.6410615952655, 
682.2887531808371, 519.3360176829227, 578.8049723852936, 411.77858444208374], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 37%|███▋      | 369999/1000000 [5:28:40<6:40:55, 26.19it/s]global step 370000, trans_decision ep_re 622.1323743029556

{"global_step": 370000, "eval_re": [904.0287424065124, 384.35592139413455, 
609.177024223562, 655.134007322287, 585.7533673891169, 533.1943643147989, 
699.247604216885, 463.16433725534944, 720.9195927222136, 666.3487817846964], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 38%|███▊      | 379999/1000000 [5:37:40<6:37:26, 26.00it/s]global step 380000, trans_decision ep_re 548.9915322305155

{"global_step": 380000, "eval_re": [611.3478415282144, 531.2666694926409, 
630.8933183827791, 534.8480192121802, 305.63528427501154, 547.3491189308186, 
502.0920492299292, 763.5884422194176, 370.0085408483055, 692.8860381858589], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 39%|███▉      | 389999/1000000 [5:46:40<6:30:34, 26.03it/s]global step 390000, trans_decision ep_re 707.6218682241639

{"global_step": 390000, "eval_re": [701.8084948892744, 837.1940607886996, 
621.2800593801355, 500.496586686523, 850.9514756835242, 1036.5619867979392, 
621.7219409357596, 521.1385969263807, 509.2772028899286, 875.7882772634729], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 40%|███▉      | 399999/1000000 [5:55:40<6:22:05, 26.17it/s]global step 400000, trans_decision ep_re 568.8474529633727

{"global_step": 400000, "eval_re": [553.9291048068974, 609.61960119933, 
629.7881426708702, 497.04374286565036, 614.7815772989089, 563.9354564052679, 
597.2289073230758, 576.8532854298928, 453.78495510389394, 591.5097565299393], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 41%|████      | 409998/1000000 [6:04:40<6:13:00, 26.36it/s]global step 410000, trans_decision ep_re 555.2217158740773

{"global_step": 410000, "eval_re": [641.306371676334, -109.92613569264188, 
738.1908530484006, 621.5099439697673, 677.1527522123043, 691.3124805909293, 
492.7941231505707, 542.0448619149481, 713.7956815027159, 544.0362263674442], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 42%|████▏     | 419998/1000000 [6:13:40<6:05:15, 26.47it/s]global step 420000, trans_decision ep_re 799.8414939807012

{"global_step": 420000, "eval_re": [596.3521858305993, 669.452980269386, 
580.8953015740353, 643.4724060572809, 871.1847394908752, 1030.8090165913038, 
1179.1538702802227, 1048.681777642937, 558.5635738499694, 819.8490882204023], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 43%|████▎     | 429998/1000000 [6:22:40<6:02:44, 26.19it/s]global step 430000, trans_decision ep_re 637.864581447658

{"global_step": 430000, "eval_re": [603.8951992594206, 744.115583170992, 
714.6340033609617, 454.434088446704, 602.9303675784315, 728.539433510875, 
689.446113592388, 578.7135609990278, 542.0034632936242, 719.9340012641549], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 44%|████▍     | 439998/1000000 [6:31:40<5:54:10, 26.35it/s]global step 440000, trans_decision ep_re 525.4309931684586

{"global_step": 440000, "eval_re": [631.8283865143447, 562.2424264182988, 
433.7101431577793, 625.1961188811983, 522.2908456348486, 486.9448270356106, 
510.9622270381347, 670.6556723593511, 336.983525796738, 473.4957588482808], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 45%|████▍     | 449998/1000000 [6:40:40<5:46:14, 26.47it/s]global step 450000, trans_decision ep_re 715.3815793841155

{"global_step": 450000, "eval_re": [1070.4885802517992, 637.2255757496232, 
539.9617504856217, 695.8966904618864, 974.654343467215, 401.16138605021183, 
644.8477435461033, 788.2559856413734, 708.9138582387737, 692.4098799485462], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 46%|████▌     | 459997/1000000 [6:49:50<5:43:12, 26.22it/s]global step 460000, trans_decision ep_re 637.2589194222494

{"global_step": 460000, "eval_re": [668.2398206723523, 531.5875963469715, 
699.5492558640432, 461.6274980584257, 644.7649546041737, 804.5103290418516, 
590.4868230857846, 708.5447485796516, 618.9804032943223, 644.297764674917], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 47%|████▋     | 469999/1000000 [6:58:40<5:37:58, 26.14it/s]global step 470000, trans_decision ep_re 641.5920352421278

{"global_step": 470000, "eval_re": [825.6503150361121, 534.7549339363613, 
517.026798371227, 548.2022045805168, 751.7851044878215, 729.6942499161868, 
615.9548689176403, 691.3409711568567, 613.3955193611498, 588.1153866574056], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 48%|████▊     | 479998/1000000 [7:07:40<5:28:40, 26.37it/s]global step 480000, trans_decision ep_re 654.4969884595116

{"global_step": 480000, "eval_re": [660.5974679251251, 559.143324623266, 
814.6731475306494, 900.747204671258, 535.803802112301, 672.5277759647237, 
676.5988649873917, 547.451184987571, 661.2264478908289, 516.200663902001], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 49%|████▉     | 489998/1000000 [7:16:40<5:23:56, 26.24it/s]global step 490000, trans_decision ep_re 422.28338308970035

{"global_step": 490000, "eval_re": [409.9494667189177, 624.7877709398083, 
497.51116722928634, 444.4217115691892, 516.7418399285076, 198.9282609352554, 
587.1601807949573, 445.9840795507184, 536.0946348239705, -38.74528159360752], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 50%|████▉     | 499998/1000000 [7:25:40<5:15:30, 26.41it/s]global step 500000, trans_decision ep_re 651.4386002371132

{"global_step": 500000, "eval_re": [634.5260932513843, 666.7868707128483, 
567.4695173913279, 777.6795510373436, 639.7700690916047, 812.6774379222305, 
640.462233252926, 527.2335771032486, 664.35099863429, 583.4296539739286], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 51%|█████     | 509997/1000000 [7:34:40<5:12:14, 26.16it/s]global step 510000, trans_decision ep_re 630.6556317184211

{"global_step": 510000, "eval_re": [588.3341433003089, 782.8201184140446, 
516.5867812817215, 840.8053197209902, 651.4669377921622, 609.0450884010138, 
403.7721687557823, 773.0754623342946, 565.0248366668192, 575.625460517073], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 52%|█████▏    | 519998/1000000 [7:43:40<5:04:09, 26.30it/s]global step 520000, trans_decision ep_re 608.1643837509372

{"global_step": 520000, "eval_re": [883.9772332269107, 703.2765829622045, 
514.1253570264797, 601.3895885187328, 541.7388735763063, 445.56515959228506, 
737.7264634392492, 507.45969102926057, 555.3674901079644, 591.0173980299776], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 53%|█████▎    | 529997/1000000 [7:52:40<5:00:24, 26.08it/s]global step 530000, trans_decision ep_re 587.3989390045972

{"global_step": 530000, "eval_re": [511.07923373179887, 604.8640986352289, 
662.9283001459445, 626.5558579505112, 493.50780197173634, 402.2864854434957, 
598.9289942640976, 591.4141184551781, 716.0454897886509, 666.3790096593307], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 54%|█████▍    | 539999/1000000 [8:01:40<4:54:21, 26.05it/s]global step 540000, trans_decision ep_re 496.9807247814656

{"global_step": 540000, "eval_re": [602.1117288877995, 267.0469891746501, 
307.4978878127528, 667.7039899499215, 603.2864983675637, 465.9269617536844, 
402.13074297671704, 614.6350654352889, 400.29360720462597, 639.1737762516519], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 55%|█████▍    | 549999/1000000 [8:10:40<4:46:49, 26.15it/s]global step 550000, trans_decision ep_re 683.9510764525086

{"global_step": 550000, "eval_re": [1221.7758445606778, 562.4196864906502, 
477.97171626273706, 758.164562082943, 656.666970019084, 607.8160626460899, 
720.8119315934833, 569.4222945959519, 583.4264779792529, 681.0352182942152], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 56%|█████▌    | 559999/1000000 [8:19:40<4:38:47, 26.30it/s]global step 560000, trans_decision ep_re 689.4159310772175

{"global_step": 560000, "eval_re": [732.3932006843703, 555.1406498754184, 
635.2944579347696, 501.1963641992106, 504.54182285079685, 804.3776866598874, 
624.8869555364123, 577.7692105547923, 1009.1372305507574, 949.4217319257591], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 57%|█████▋    | 569999/1000000 [8:28:30<4:31:58, 26.35it/s]global step 570000, trans_decision ep_re 732.7084602072864

{"global_step": 570000, "eval_re": [671.5471863529302, 590.2481506799314, 
780.6570033506277, 651.2101518609023, 991.4288201654025, 455.542215688314, 
815.8766547071257, 841.4726330181085, 833.0338249993173, 696.0679612502046], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 58%|█████▊    | 579998/1000000 [8:37:30<4:23:36, 26.55it/s]global step 580000, trans_decision ep_re 591.0398802289299

{"global_step": 580000, "eval_re": [700.9626354382539, 676.5215196453414, 
723.2606847889094, 597.23322598203, 421.36182644716916, 521.0573199540324, 
468.5299823017237, 601.0441405729213, 590.6717679045819, 609.7556992543354], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 59%|█████▉    | 589999/1000000 [8:46:30<4:20:33, 26.23it/s]global step 590000, trans_decision ep_re 523.9901273248072

{"global_step": 590000, "eval_re": [558.8543773669139, 516.4045406949457, 
635.3044230470753, 421.3657344644572, 745.1270223151082, 539.3887467747046, 
670.9433397212363, 240.51715014945788, 352.46365707684566, 559.5322816373272], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 60%|█████▉    | 599999/1000000 [8:55:30<4:13:09, 26.33it/s]global step 600000, trans_decision ep_re 525.242228587119

{"global_step": 600000, "eval_re": [568.9791326178532, 541.5571300661439, 
506.07042412629306, 379.98734028766864, 692.8135473333206, 443.1310601924836, 
442.37452473457626, 490.4999824030904, 669.561918841142, 517.4472252686181], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 61%|██████    | 609998/1000000 [9:04:20<4:04:06, 26.63it/s]global step 610000, trans_decision ep_re 615.8450523302961

{"global_step": 610000, "eval_re": [747.9494792885937, 563.5853164061028, 
528.852565270506, 838.4628085922025, 494.27688998834003, 431.12805504741874, 
530.8634100995575, 626.5106577368093, 762.1399467073963, 634.6813941660338], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 62%|██████▏   | 619999/1000000 [9:13:10<3:59:50, 26.41it/s]global step 620000, trans_decision ep_re 670.0778372122031

{"global_step": 620000, "eval_re": [617.8098325550718, 836.4125878639433, 
713.720332025721, 573.6665588672557, 717.3124778499966, 747.5193351780882, 
600.7341201160715, 495.7134264758868, 733.0234205714569, 664.8662806185388], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 63%|██████▎   | 629998/1000000 [9:22:10<3:52:13, 26.56it/s]global step 630000, trans_decision ep_re 656.9113067668819

{"global_step": 630000, "eval_re": [444.05250914071524, 612.7447328735476, 
897.5108930186893, 683.1802524634699, 747.4709073528236, 631.6826158477635, 
700.8766777304397, 631.4065548674286, 467.92781296888114, 752.26011140506], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 64%|██████▍   | 639999/1000000 [9:31:00<3:46:54, 26.44it/s]global step 640000, trans_decision ep_re 631.8269989674361

{"global_step": 640000, "eval_re": [547.9823437347251, 713.7688463263745, 
559.4993387678304, 552.2989173520691, 597.9650631387525, 660.1505403015283, 
549.0383128172105, 616.2459864727805, 839.6532004352478, 681.6674403278422], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 65%|██████▍   | 649998/1000000 [9:40:00<3:41:40, 26.32it/s]global step 650000, trans_decision ep_re 596.3869882624995

{"global_step": 650000, "eval_re": [618.3543867449664, 565.3519649707779, 
220.57145872714273, 681.9684976934442, 813.1157555135084, 558.8909869940188, 
455.39726281706993, 827.3605034495192, 558.4431721260291, 664.4158935885176], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 66%|██████▌   | 659999/1000000 [9:48:50<3:34:23, 26.43it/s]global step 660000, trans_decision ep_re 502.48268135963065

{"global_step": 660000, "eval_re": [648.4568168537254, 557.2316710906402, 
600.4066597787677, 398.6083346525134, 496.14398131160397, 563.669040808503, 
496.5748994075788, 367.9210599230098, 114.80681883924986, 781.0075309307138], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 67%|██████▋   | 669998/1000000 [9:57:50<3:27:07, 26.55it/s]global step 670000, trans_decision ep_re 613.1588421569229

{"global_step": 670000, "eval_re": [606.935543312187, 514.5304108035263, 
561.7558049832278, 667.4131430869413, 683.184524140814, 568.3797386706565, 
547.5580928516995, 705.1595520813858, 638.6351661957466, 638.036445443045], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 68%|██████▊   | 679999/1000000 [10:06:40<3:22:34, 26.33it/s]global step 680000, trans_decision ep_re 641.6149656336894

{"global_step": 680000, "eval_re": [588.7411492371531, 608.8048161743067, 
732.4456434171949, 414.8850111415972, 573.4158515121683, 875.616419736746, 
680.1004009673563, 680.1409812442269, 628.8969718267276, 633.1024110794182], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 69%|██████▉   | 689999/1000000 [10:15:40<3:17:10, 26.20it/s]global step 690000, trans_decision ep_re 535.158459069782

{"global_step": 690000, "eval_re": [525.0855488968223, 459.90310909171495, 
553.7908785249722, 521.0696452600968, 589.7719009626953, 659.9722029460756, 
357.8982961407507, 600.4809155373331, 583.9096922893295, 499.70240104803], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 70%|██████▉   | 699999/1000000 [10:24:30<3:10:40, 26.22it/s]global step 700000, trans_decision ep_re 540.1695109278638

{"global_step": 700000, "eval_re": [510.1175402946587, 422.5186134393214, 
697.9392154192124, 524.2650991639833, 606.6185295145723, 554.0636646215634, 
597.620488836051, 549.5566704969875, 469.6251596692688, 469.370127823019], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 71%|███████   | 709998/1000000 [10:33:30<3:02:26, 26.49it/s]global step 710000, trans_decision ep_re 591.1515280067179

{"global_step": 710000, "eval_re": [799.5189088031061, 646.6861640906487, 
634.5553094199308, 969.9329949865524, 446.7466435067395, 28.15563461383363, 
590.1640018036416, 516.3722732193592, 407.447325562592, 871.9360240607755], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 72%|███████▏  | 719999/1000000 [10:42:20<2:56:52, 26.38it/s]global step 720000, trans_decision ep_re 539.4935580319477

{"global_step": 720000, "eval_re": [642.35369029703, 593.2939633197835, 
469.8472318611085, 697.7587693161024, 501.45394548946996, 467.38076932567105, 
416.37331619424435, 588.1215347035854, 339.63814436545425, 678.714215447027], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 73%|███████▎  | 729999/1000000 [10:51:20<2:51:05, 26.30it/s]global step 730000, trans_decision ep_re 628.1023943503988

{"global_step": 730000, "eval_re": [645.0208008731144, 710.8858489583156, 
508.67894840013633, 776.918855446222, 605.5102659734493, 573.9029815006469, 
581.8017286554124, 680.8372923433184, 687.8180249729937, 509.64919638037816], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 74%|███████▍  | 739999/1000000 [11:00:20<2:45:05, 26.25it/s]global step 740000, trans_decision ep_re 404.41582161875726

{"global_step": 740000, "eval_re": [314.6134878196325, 536.4286757810345, 
475.64290482510114, 585.0326517520402, 319.49477838434825, 275.1216733069508, 
379.069210456435, 325.0401679520855, 377.6162450484282, 456.098420861517], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 75%|███████▍  | 749998/1000000 [11:09:10<2:37:50, 26.40it/s]global step 750000, trans_decision ep_re 531.7397178058638

{"global_step": 750000, "eval_re": [524.4252540897778, 578.0091558601707, 
547.5328392888358, 439.70286435012065, 688.103606930866, 632.4688050331058, 
440.22346582900985, 507.5840212318711, 482.2445179746052, 477.1026474702754], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 76%|███████▌  | 759999/1000000 [11:18:10<2:32:10, 26.29it/s]global step 760000, trans_decision ep_re 555.7006772988091

{"global_step": 760000, "eval_re": [425.05764236291645, 777.6097055639066, 
515.9393583512252, 596.8511673984799, 566.4985533223221, 522.0484348064668, 
523.0564492148087, 551.0765710631994, 487.5303757868505, 591.3385151179153], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 77%|███████▋  | 769999/1000000 [11:27:00<2:25:15, 26.39it/s]global step 770000, trans_decision ep_re 664.438817627183

{"global_step": 770000, "eval_re": [897.0043485513845, 514.6570547379114, 
707.4385698010882, 546.3178699154671, 609.1004307700433, 831.2399435496148, 
825.6061353730547, 605.4205473648453, 622.8710640553292, 484.7322121530917], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 78%|███████▊  | 779998/1000000 [11:36:00<2:17:15, 26.72it/s]global step 780000, trans_decision ep_re 595.9462276596413

{"global_step": 780000, "eval_re": [494.11507604067066, 667.487062807671, 
491.761620723355, 424.0961613317748, 850.1001068272119, 592.2842755202273, 
682.5842310225281, 588.648870359875, 512.7055268154724, 655.679345147627], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 79%|███████▉  | 789998/1000000 [11:44:50<2:10:47, 26.76it/s]global step 790000, trans_decision ep_re 622.8350765056973

{"global_step": 790000, "eval_re": [603.310493789464, 571.680285098245, 
614.2689963790843, 636.4721780920809, 930.6510324538974, 706.9521681250596, 
843.9311345281372, 462.4269744507166, 789.6836843679739, 68.97381777231494], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 80%|███████▉  | 799998/1000000 [11:53:50<2:05:04, 26.65it/s]global step 800000, trans_decision ep_re 444.09826894078196

{"global_step": 800000, "eval_re": [552.0605522802738, 351.1964294264152, 
443.3970750026152, 432.42793417232775, 517.438187101969, 483.3816727958157, 
477.0778337581441, 654.9819195482287, 623.1487383447453, -94.12765302271526], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 81%|████████  | 809998/1000000 [12:02:40<1:58:58, 26.62it/s]global step 810000, trans_decision ep_re 558.3092908692479

{"global_step": 810000, "eval_re": [461.5575173538231, 644.4891035067341, 
616.0459990301525, 470.00381219426805, 592.5989004835884, 572.1061149314027, 
929.2545666684675, 416.1710990565817, 297.3819618394799, 583.4838336279813], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 82%|████████▏ | 819999/1000000 [12:11:30<1:53:56, 26.33it/s]global step 820000, trans_decision ep_re 527.3910584240579

{"global_step": 820000, "eval_re": [655.3322240220521, 453.32017994277044, 
392.23639958719025, 395.97758601618926, 436.61947063517124, 396.31215292875584, 
554.1273958945404, 546.9289232875335, 686.8913958715075, 756.1648560548686], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 83%|████████▎ | 829998/1000000 [12:20:30<1:46:17, 26.66it/s]global step 830000, trans_decision ep_re 537.8872754519964

{"global_step": 830000, "eval_re": [443.4387265034673, 577.234047154685, 
628.1469444999794, 591.8812850884142, 682.8939290658435, 310.43414496966096, 
595.808386985862, 485.70670618325386, 673.3531122729893, 389.9754717958098], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 84%|████████▍ | 839998/1000000 [12:29:20<1:39:33, 26.78it/s]global step 840000, trans_decision ep_re 619.0274823523354

{"global_step": 840000, "eval_re": [546.6196647419292, 578.8087761833825, 
582.9332604891486, 522.8065365010988, 715.4739275579471, 488.6429374370748, 
554.6855799998705, 878.2148399025631, 546.0413653242414, 776.0479353860986], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 85%|████████▍ | 849998/1000000 [12:38:20<1:33:22, 26.77it/s]global step 850000, trans_decision ep_re 594.6349605166836

{"global_step": 850000, "eval_re": [476.1219459459541, 569.5591572347527, 
746.8738674761918, 478.7077060291986, 494.9426977738006, 548.58508066857, 
644.8798260300093, 496.90839565726304, 654.9188358282054, 834.8520925228913], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 86%|████████▌ | 859999/1000000 [12:47:10<1:28:23, 26.40it/s]global step 860000, trans_decision ep_re 520.224892989392

{"global_step": 860000, "eval_re": [536.9819141349174, 572.3186873767587, 
713.7373095800526, 441.6931872210657, 404.69765386332875, 609.0827795465732, 
463.1653139325103, 668.5597291386366, 299.4833842064341, 492.5289708936411], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 87%|████████▋ | 869999/1000000 [12:56:00<1:22:30, 26.26it/s]global step 870000, trans_decision ep_re 543.2006463185575

{"global_step": 870000, "eval_re": [186.239295681843, 521.1431719648111, 
555.4168087162349, 297.8365117090105, 688.2854597278196, 639.3801581199227, 
613.2347058600544, 591.3132467598996, 836.1771920333902, 502.9799126125884], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 88%|████████▊ | 879998/1000000 [13:05:00<1:15:20, 26.54it/s]global step 880000, trans_decision ep_re 538.0009110207167

{"global_step": 880000, "eval_re": [538.9897500613687, 538.1225679931875, 
604.3106183938307, 596.5421898667158, 491.89972629760723, 494.4040886404902, 
444.55348306594374, 444.998074444349, 557.1230770982944, 669.0655343453792], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 89%|████████▉ | 889999/1000000 [13:13:50<1:09:42, 26.30it/s]global step 890000, trans_decision ep_re 615.8495792790494

{"global_step": 890000, "eval_re": [735.7291209450327, 734.765281508285, 
572.8306085701108, 530.1413443953372, 488.0054673807665, 547.132127655665, 
731.3475355840632, 585.3442693822931, 621.2291706265236, 611.9708667424171], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 90%|████████▉ | 899998/1000000 [13:22:50<1:02:05, 26.85it/s]global step 900000, trans_decision ep_re 555.1000319729659

{"global_step": 900000, "eval_re": [597.7787527433456, 577.7469438082497, 
675.9274375327406, 564.9518003980251, 466.71916933395676, 377.66985728318315, 
588.0278411065144, 670.4466071260341, 503.3968980962387, 528.3350123013706], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 91%|█████████ | 909999/1000000 [13:31:40<56:56, 26.35it/s]global step 910000, trans_decision ep_re 467.9984360725724

{"global_step": 910000, "eval_re": [612.8965820033542, 530.8968613487083, 
389.22291197382447, -89.67370512710276, 479.76831721019045, 601.5121782653341, 
415.88371601001074, 717.504933373038, 545.3950713670599, 476.5774943013065], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 92%|█████████▏| 919998/1000000 [13:40:40<50:13, 26.55it/s]global step 920000, trans_decision ep_re 600.4042273345992

{"global_step": 920000, "eval_re": [438.12097242397834, 582.5016876179706, 
488.41261100505557, 623.6827081961775, 700.9777979111718, 747.9913327363164, 
626.8687768043611, 675.5360121587097, 656.1546637820019, 463.7957107102501], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 93%|█████████▎| 929998/1000000 [13:49:40<44:54, 25.98it/s]global step 930000, trans_decision ep_re 465.19219784040996

{"global_step": 930000, "eval_re": [550.5591890418096, 152.22326837879652, 
512.3498277328608, 415.97958464649656, 586.2122299587943, 520.4549208673853, 
404.0863529452955, 509.1209884012811, 445.390336851079, 555.5452795803], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 94%|█████████▍| 939998/1000000 [13:58:40<37:53, 26.39it/s]global step 940000, trans_decision ep_re 617.7144989556144

{"global_step": 940000, "eval_re": [676.8270861905334, 591.3665976238957, 
552.6198839111033, 704.5590624967414, 610.7091592814107, 575.7272687226593, 
529.0949010643532, 642.6966235188473, 514.0465853388914, 779.4978214077088], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 95%|█████████▍| 949999/1000000 [14:07:30<31:58, 26.07it/s]global step 950000, trans_decision ep_re 383.3808218828309

{"global_step": 950000, "eval_re": [769.6153792890852, 608.6643526568739, 
749.1634607281637, 328.3221783240175, -315.75813236062004, 284.65847277071276, 
596.239810838323, 414.2387594683473, 377.1531515071856, 21.510785606219336], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 96%|█████████▌| 959999/1000000 [14:16:30<25:44, 25.90it/s]global step 960000, trans_decision ep_re 410.765416569231

{"global_step": 960000, "eval_re": [613.196042378437, 560.5925971910468, 
222.9417758859239, 655.841086103976, 281.6660016394713, 539.0134973222197, 
95.63817363320197, 286.57970173631617, 414.4473095504665, 437.7379802512504], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 97%|█████████▋| 969997/1000000 [14:25:30<19:15, 25.97it/s]global step 970000, trans_decision ep_re 489.2507478472735

{"global_step": 970000, "eval_re": [414.7899135873838, 454.4790127707155, 
561.9062623716558, 669.4257150588307, 501.4094161696066, 490.63579593573405, 
423.603031127431, 594.3925747651737, 317.79732357238055, 464.06843311382374], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 98%|█████████▊| 979999/1000000 [14:34:30<12:45, 26.14it/s]global step 980000, trans_decision ep_re 626.1895118076433

{"global_step": 980000, "eval_re": [581.6967859136737, 821.020414741964, 
593.3322910897908, 712.7590631322003, 493.9881740604091, 561.1259576670441, 
377.93192038243484, 755.3314596211242, 684.9453204521174, 679.7637310156734], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 99%|█████████▉| 989999/1000000 [14:43:20<06:24, 26.00it/s]global step 990000, trans_decision ep_re 651.8641978859903

{"global_step": 990000, "eval_re": [780.1571951585638, 802.8122773166693, 
716.5745387773949, 559.4260977974684, 561.7220070726313, 668.1775103693853, 
609.6001108748702, 565.6825645780416, 600.1464539092406, 654.3432230056382], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|█████████▉| 999999/1000000 [14:52:20<00:00, 26.06it/s]global step 1000000, trans_decision ep_re 529.8736287017216

{"global_step": 1000000, "eval_re": [874.9242543357808, 612.027616984575, 
510.6110403313724, 619.7146009994073, 612.6644175371587, 582.931622691694, 
577.8389303020493, 594.3108345671054, -330.68424084487253, 644.3972101129461], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|██████████| 1000000/1000000 [14:52:54<00:00, 18.67it/s]
