
{
    'exp_name': 'VDPO',
    'env': 'Ant-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'markov(4, 32, [[249, 1], [1, 31]])',
    'noise': 0.15
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9999/1000000 [05:30<13:08:57, 20.91it/s]global step 10000, trans_decision ep_re 301.4927334028053

{"global_step": 10000, "eval_re": [382.06300233132447, 337.28237416285947, 
350.92828266940575, 396.8528591978447, 302.72547394986486, 325.6870735615147, 
209.96004126729824, 173.74077339367832, 301.6267126318151, 234.0607408624471], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  2%|▏         | 19998/1000000 [16:50<12:51:34, 21.17it/s]global step 20000, trans_decision ep_re 121.0837121146872

{"global_step": 20000, "eval_re": [60.20678329370208, 74.58442696929558, 
302.73596742895296, 95.1471249773546, 45.614917769331385, 124.07289010213236, 
48.22248253225549, 222.45078927746135, 21.556231519742816, 216.2455072766433], 
"eval_len": [116, 298, 799, 285, 63, 347, 68, 425, 110, 1000]}

  3%|▎         | 29998/1000000 [27:40<12:37:29, 21.34it/s]global step 30000, trans_decision ep_re 167.88999124625855

{"global_step": 30000, "eval_re": [121.06714481364097, 221.4969838541826, 
151.69798865320917, 488.86989503509625, 136.86084759282375, 11.228887922629623, 
67.22012449493928, 146.56064094122024, 250.66140154036154, 83.23599761448216], 
"eval_len": [378, 481, 306, 1000, 1000, 45, 73, 329, 506, 1000]}

  4%|▍         | 39999/1000000 [38:40<12:39:41, 21.06it/s]global step 40000, trans_decision ep_re 197.3311405122964

{"global_step": 40000, "eval_re": [40.46331669784233, 329.5490888259551, 
319.60173880150415, 56.72023092012761, 30.36230603588364, 185.19901254133163, 
193.27670957819493, 52.2122709166899, 333.6568538675183, 432.26987693791654], 
"eval_len": [74, 1000, 696, 257, 168, 1000, 1000, 137, 1000, 1000]}

  5%|▍         | 49998/1000000 [49:40<12:24:15, 21.27it/s]global step 50000, trans_decision ep_re 214.63680179197877

{"global_step": 50000, "eval_re": [240.76220262136104, 115.88114400316086, 
165.90763005749685, 88.39070079563098, 351.0490749820306, 28.535302951638407, 
432.4972787955533, 517.9597986704633, 148.31221958808055, 57.072665454371695], 
"eval_len": [425, 251, 400, 216, 620, 41, 1000, 1000, 242, 78]}

  6%|▌         | 59999/1000000 [1:00:40<12:28:58, 20.92it/s]global step 60000, trans_decision ep_re 272.19607623618015

{"global_step": 60000, "eval_re": [116.82919474029308, 160.54654679760156, 
268.9237755086939, 260.6900635899379, 233.23234364616232, 430.77164570890557, 
403.2574503978046, 423.25639592938114, 390.38992909535955, 34.063416947662084], 
"eval_len": [160, 257, 1000, 1000, 1000, 619, 725, 703, 1000, 99]}

  7%|▋         | 69999/1000000 [1:11:40<12:09:29, 21.25it/s]global step 70000, trans_decision ep_re 234.4124903645855

{"global_step": 70000, "eval_re": [30.835292513866854, 470.9950189521298, 
137.72764540697642, 400.9107847812964, 160.10520728758502, 124.14171090201374, 
25.775048490925673, 95.80865191157572, 471.66868457878144, 426.1568588207036], 
"eval_len": [56, 1000, 155, 585, 285, 191, 71, 241, 1000, 1000]}

  8%|▊         | 79999/1000000 [1:22:30<12:20:48, 20.70it/s]global step 80000, trans_decision ep_re 427.2310677589524

{"global_step": 80000, "eval_re": [530.7679542872417, 57.92311912329694, 
37.687955647033675, 499.79138420940876, 561.3284295669107, 427.5590198693743, 
520.6623640374802, 636.2730669769497, 595.4334972692757, 404.8838866025523], 
"eval_len": [1000, 93, 83, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  9%|▉         | 89998/1000000 [1:33:50<12:02:09, 21.00it/s]global step 90000, trans_decision ep_re 308.0337534507583

{"global_step": 90000, "eval_re": [592.2769915278134, 546.5888776600062, 
124.56376534039264, 565.5058078149024, 18.4982076925808, 34.61551932248923, 
39.096207202613385, 176.22768224401156, 551.9233323066148, 431.04114339615825], 
"eval_len": [1000, 1000, 85, 1000, 39, 48, 58, 225, 1000, 1000]}

 10%|▉         | 99999/1000000 [1:44:50<11:47:11, 21.21it/s]global step 100000, trans_decision ep_re 343.2372541987537

{"global_step": 100000, "eval_re": [508.51405736854707, 27.794217938441268, 
527.8953471227371, 590.801476943114, 654.4156888570901, 69.22467168919076, 
157.46689039673944, 714.6304019483725, 45.62150012302368, 136.008289600281], 
"eval_len": [1000, 63, 817, 840, 1000, 99, 215, 1000, 54, 1000]}

 11%|█         | 109999/1000000 [1:55:50<11:39:44, 21.20it/s]global step 110000, trans_decision ep_re 171.38624528492045

{"global_step": 110000, "eval_re": [262.25380449348364, 87.84506508035791, 
66.60612126242022, 482.9730682835834, 105.63846580074724, 234.46599483997346, 
14.642381096893288, 360.1715976134669, 70.05807499461116, 29.207879383667095], 
"eval_len": [377, 533, 120, 1000, 240, 344, 39, 1000, 94, 50]}

 12%|█▏        | 119999/1000000 [2:06:40<11:36:36, 21.05it/s]global step 120000, trans_decision ep_re 254.679973611285

{"global_step": 120000, "eval_re": [32.62924216049894, 398.35000865546795, 
173.71661083698285, 618.1738590730632, 160.09313933240128, 203.02508390493344, 
116.99744808779973, 459.73907887867307, 122.50542583445, 261.56983934857993], 
"eval_len": [77, 1000, 380, 1000, 288, 242, 148, 845, 127, 290]}

 13%|█▎        | 129999/1000000 [2:17:30<11:22:08, 21.26it/s]global step 130000, trans_decision ep_re 250.67543740578412

{"global_step": 130000, "eval_re": [172.03182706809528, 205.91313369537036, 
494.56220242431823, 9.453284320262515, 340.2577656912342, 267.4269746099399, 
377.70291864167245, 236.64971557859317, 86.51023024802716, 316.2463217803277], 
"eval_len": [227, 206, 1000, 64, 1000, 280, 449, 264, 125, 505]}

 14%|█▍        | 139999/1000000 [2:28:20<11:21:12, 21.04it/s]global step 140000, trans_decision ep_re 307.9545558695684

{"global_step": 140000, "eval_re": [15.518330515151158, 417.1219163180307, 
565.5941589182723, 431.03936201883016, 25.088509832341916, 543.6160048197046, 
514.9888674913465, 88.2023951420182, 140.5753444325247, 337.80066920746407], 
"eval_len": [47, 1000, 1000, 1000, 58, 1000, 1000, 148, 291, 358]}

 15%|█▍        | 149998/1000000 [2:39:20<11:09:17, 21.17it/s]global step 150000, trans_decision ep_re 374.17852542723165

{"global_step": 150000, "eval_re": [358.75542642316185, 576.1813772073768, 
488.9270051280986, 38.270306767828146, 833.7673297080503, 131.2638594676817, 
437.68977779977143, 199.23889785813864, 558.298771423819, 119.3925024883906], 
"eval_len": [404, 830, 1000, 46, 1000, 275, 536, 177, 1000, 129]}

 16%|█▌        | 159998/1000000 [2:50:10<10:52:06, 21.47it/s]global step 160000, trans_decision ep_re 223.05963620122512

{"global_step": 160000, "eval_re": [17.790608150187143, 766.428393778838, 
78.28819228504106, 21.834254137536014, 46.167658162555355, 212.653569617304, 
232.83536476175806, 41.80225402342454, 417.51223324679376, 395.28383384881295], 
"eval_len": [38, 1000, 99, 57, 200, 278, 302, 49, 671, 1000]}

 17%|█▋        | 169999/1000000 [3:01:00<10:49:59, 21.28it/s]global step 170000, trans_decision ep_re 271.29575367957625

{"global_step": 170000, "eval_re": [84.20496200752828, 119.71265169415894, 
707.469052862192, 323.64738624706905, 191.3145025525322, 162.04095537563418, 
16.710135262224163, 653.8504272652181, 370.1361556099266, 83.87130791927859], 
"eval_len": [153, 159, 1000, 368, 244, 135, 47, 720, 405, 111]}

 18%|█▊        | 179998/1000000 [3:11:40<10:43:39, 21.23it/s]global step 180000, trans_decision ep_re 289.0071533403388

{"global_step": 180000, "eval_re": [254.7433902057395, 68.53169767506623, 
459.3059402185206, 198.54154648237505, 960.7270686972638, 78.90984496105014, 
23.965299368281126, 41.51206654162421, 642.2186871130615, 161.61599214040592], 
"eval_len": [268, 99, 533, 264, 1000, 94, 40, 57, 728, 211]}

 19%|█▉        | 189999/1000000 [3:22:30<10:33:34, 21.31it/s]global step 190000, trans_decision ep_re 358.8330899737043

{"global_step": 190000, "eval_re": [470.9998856135518, 870.5616007770208, 
570.0044172659303, 136.61954568123014, 414.1840328440895, 72.04649984321405, 
128.54565062283567, 315.5040799984757, 181.93516212846316, 427.93002496223176], 
"eval_len": [676, 1000, 813, 169, 437, 146, 142, 283, 195, 738]}

 20%|█▉        | 199999/1000000 [3:33:20<10:30:07, 21.16it/s]global step 200000, trans_decision ep_re 325.0094042323019

{"global_step": 200000, "eval_re": [767.6142426199833, 37.98389522890316, 
17.71839843098086, 438.27511255015145, 63.716396850286046, 487.0383987748481, 
677.7713413740037, 208.97756499228578, 475.4975439755239, 75.50114752605262], 
"eval_len": [1000, 39, 38, 479, 107, 489, 1000, 296, 1000, 94]}

 21%|██        | 209998/1000000 [3:44:10<10:22:39, 21.15it/s]global step 210000, trans_decision ep_re 474.17181223776777

{"global_step": 210000, "eval_re": [538.8462429011537, 404.7668951879928, 
365.74439429417157, 910.715147649845, 572.3851831654921, 36.58800295587114, 
198.89449409548098, 683.314232501851, 455.84599756599147, 574.6175320598273], 
"eval_len": [1000, 414, 336, 1000, 625, 42, 211, 1000, 487, 1000]}

 22%|██▏       | 219999/1000000 [3:55:10<10:13:26, 21.19it/s]global step 220000, trans_decision ep_re 171.53962585141306

{"global_step": 220000, "eval_re": [148.48708027733758, 97.9326777105095, 
245.425316505228, 133.2979955947811, 78.8659256230792, 577.4457921229632, 
145.10204614298814, 28.752254868154772, 155.17249581912984, 104.9146738499591], 
"eval_len": [164, 120, 295, 186, 93, 1000, 138, 62, 248, 126]}

 23%|██▎       | 229999/1000000 [4:05:50<10:05:06, 21.21it/s]global step 230000, trans_decision ep_re 400.259543862977

{"global_step": 230000, "eval_re": [159.48269378610092, 32.62949599243939, 
427.3147695158599, 1105.6492123477115, 304.401118858617, 441.94154142905603, 
697.1028760859098, 157.25720976512974, 215.80711183923813, 461.0094090097072], 
"eval_len": [177, 69, 721, 1000, 343, 391, 1000, 160, 236, 495]}

 24%|██▍       | 239999/1000000 [4:16:40<10:01:25, 21.06it/s]global step 240000, trans_decision ep_re 569.1825812117485

{"global_step": 240000, "eval_re": [889.975126671849, 163.12758337197053, 
272.0113743844373, 773.3099735657395, 816.33511350702, 858.25233798166, 
180.44866351995125, 871.0582492584895, 567.5164815248244, 299.7909083315438], 
"eval_len": [1000, 236, 307, 1000, 918, 1000, 175, 1000, 713, 388]}

 25%|██▍       | 249999/1000000 [4:27:30<9:56:10, 20.97it/s]global step 250000, trans_decision ep_re 198.85474960091196

{"global_step": 250000, "eval_re": [169.89430685267132, 92.2457594940171, 
18.61828424069201, 254.87807399867947, 143.5668956975084, 647.0589477258999, 
35.82293828775273, 81.46722233883094, 38.050822414723456, 506.94424495834426], 
"eval_len": [274, 121, 45, 190, 127, 643, 42, 122, 52, 568]}

 26%|██▌       | 259999/1000000 [4:38:20<9:40:07, 21.26it/s]global step 260000, trans_decision ep_re 647.0704613984346

{"global_step": 260000, "eval_re": [1088.1568170065448, 1087.4858516270883, 
794.6876120137241, 1004.5649997698836, 400.51343296599293, 423.40303527985265, 
204.2937464306, 702.397920965527, 708.1041215853878, 57.09707633974611], 
"eval_len": [1000, 1000, 1000, 1000, 397, 1000, 246, 1000, 800, 66]}

 27%|██▋       | 269999/1000000 [4:49:20<9:33:45, 21.21it/s]global step 270000, trans_decision ep_re 458.0810354289223

{"global_step": 270000, "eval_re": [673.9501015900125, 227.33563156963993, 
143.51513638687328, 531.7301059869937, 145.28861087265844, 1030.2001686398764, 
151.05229255055607, 513.8395247081897, 574.8104827754241, 589.088299208999], 
"eval_len": [1000, 326, 162, 1000, 138, 971, 212, 1000, 1000, 1000]}

 28%|██▊       | 279999/1000000 [5:00:10<9:23:10, 21.31it/s]global step 280000, trans_decision ep_re 454.1029065326785

{"global_step": 280000, "eval_re": [249.35743301572262, 590.5320058768071, 
232.1444210688727, 844.6366513497982, 37.88984068991193, 784.315293374717, 
550.9914320075111, 880.16933238222, 162.15155075140757, 208.8411048098171], 
"eval_len": [311, 1000, 231, 1000, 50, 729, 633, 1000, 164, 326]}

 29%|██▉       | 289999/1000000 [5:11:10<9:22:13, 21.05it/s]global step 290000, trans_decision ep_re 405.0440929904373

{"global_step": 290000, "eval_re": [730.5811703979557, 11.716627618415218, 
691.1407405228616, 41.29284485656855, 662.49198202341, 812.4632593434668, 
251.37307724630816, 628.8045021908335, 164.5843888329806, 55.992336871573315], 
"eval_len": [1000, 22, 797, 1000, 1000, 872, 313, 1000, 238, 112]}

 30%|██▉       | 299999/1000000 [5:22:00<9:14:34, 21.04it/s]global step 300000, trans_decision ep_re 435.88272368647984

{"global_step": 300000, "eval_re": [123.54733291431789, 685.5059470852968, 
183.30443159628453, 898.4137431613221, 737.8059202337688, 405.4648648448979, 
612.7535768181675, 74.87069379829124, 445.2887673614032, 191.87195905104937], 
"eval_len": [157, 1000, 181, 909, 969, 452, 624, 99, 424, 225]}

 31%|███       | 309999/1000000 [5:32:50<9:01:12, 21.25it/s]global step 310000, trans_decision ep_re 471.3969683030781

{"global_step": 310000, "eval_re": [749.0006840128549, 67.95837124388704, 
537.8365296636644, 571.3803129310529, 783.0745217440676, 192.7699862915628, 
156.07253051349983, 991.1831145130524, 69.13211053056284, 595.5615215865774], 
"eval_len": [1000, 64, 541, 1000, 793, 239, 195, 1000, 68, 738]}

 32%|███▏      | 319999/1000000 [5:43:50<8:52:59, 21.26it/s]global step 320000, trans_decision ep_re 410.7850214915419

{"global_step": 320000, "eval_re": [937.8966888146466, 799.1095106071072, 
222.02887974543432, 801.3640770202519, 335.84020907551906, 166.80034933231423, 
85.46384276429337, 55.96002122290806, 371.32975123100505, 332.0568851019397], 
"eval_len": [945, 771, 286, 1000, 387, 415, 82, 146, 407, 297]}

 33%|███▎      | 329999/1000000 [5:54:40<8:45:06, 21.27it/s]global step 330000, trans_decision ep_re 644.7383724288483

{"global_step": 330000, "eval_re": [945.2626689561214, 874.3452110926698, 
930.2765815916111, 149.33929159932322, 88.94533820694609, 968.8104217050018, 
814.2443779872449, 139.10069756836097, 660.5566920811424, 876.5024435000615], 
"eval_len": [1000, 1000, 1000, 117, 80, 1000, 1000, 158, 1000, 915]}

 34%|███▍      | 339998/1000000 [6:05:40<8:36:37, 21.29it/s]global step 340000, trans_decision ep_re 393.5009158000002

{"global_step": 340000, "eval_re": [833.3029538370716, 142.83217059271348, 
20.326197005592093, 900.4235725993301, 27.496317159025715, 592.7751906105335, 
397.55545703012956, 167.11871035557357, 823.5020570539428, 29.67653175608954], 
"eval_len": [806, 240, 51, 1000, 54, 644, 512, 167, 930, 45]}

 35%|███▍      | 349998/1000000 [6:16:30<8:28:04, 21.32it/s]global step 350000, trans_decision ep_re 284.1555988724408

{"global_step": 350000, "eval_re": [494.1202926146982, 29.727208182949507, 
910.0209256151045, 1.1773492540828605, 561.0885576162447, 171.1498105928963, 
294.9483665493797, 57.88371773272911, 23.68800415629426, 297.7517564100286], 
"eval_len": [1000, 69, 1000, 48, 1000, 204, 276, 194, 114, 350]}

 36%|███▌      | 359999/1000000 [6:27:20<8:26:04, 21.08it/s]global step 360000, trans_decision ep_re 429.74482070823217

{"global_step": 360000, "eval_re": [176.97880326384214, 470.40974104250733, 
334.2643945094618, 449.0743221044085, 833.4220133944685, 48.32723492827024, 
149.1541073571789, 111.68423770381624, 883.0142934036759, 841.1190593746924], 
"eval_len": [209, 421, 384, 522, 1000, 122, 168, 167, 1000, 1000]}

 37%|███▋      | 369998/1000000 [6:38:10<8:11:16, 21.37it/s]global step 370000, trans_decision ep_re 502.2249268749221

{"global_step": 370000, "eval_re": [451.93596927987187, 528.882790632515, 
576.1260658239555, 473.5233768536729, 663.937104864211, 583.0787504255686, 
247.00950547972056, 665.2229638416663, 493.9570353240987, 338.5757062239406], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 362, 1000, 1000, 554]}

 38%|███▊      | 379999/1000000 [6:49:20<8:08:40, 21.15it/s]global step 380000, trans_decision ep_re 519.2441449710252

{"global_step": 380000, "eval_re": [579.6516494079465, 45.46080235398622, 
688.1173336820416, 748.7575741652471, 482.2968862231785, 758.1334885748109, 
555.8590369806772, 825.5253573854887, 227.8156863999774, 280.82363453689743], 
"eval_len": [1000, 51, 1000, 1000, 1000, 967, 561, 1000, 331, 267]}

 39%|███▉      | 389999/1000000 [7:00:20<8:02:08, 21.09it/s]global step 390000, trans_decision ep_re 219.5709202558202

{"global_step": 390000, "eval_re": [13.678166960201477, 144.7426284093967, 
85.78550625426084, 496.10718842983715, 178.17147531304656, 356.6112671196103, 
104.94624857156907, 218.64066173385592, 566.7959703604546, 30.230089405969643], 
"eval_len": [17, 199, 116, 685, 412, 402, 126, 249, 692, 50]}

 40%|███▉      | 399998/1000000 [7:11:00<7:46:57, 21.42it/s]global step 400000, trans_decision ep_re 389.2085926685393

{"global_step": 400000, "eval_re": [605.139940764588, 31.686283885862135, 
432.0285523702497, 100.24323649733083, 642.0785315292693, 723.3511032398432, 
830.9229327676218, 191.94555478809485, 252.67049793003844, 82.01929291249513], 
"eval_len": [648, 51, 508, 115, 1000, 1000, 1000, 168, 315, 80]}

 41%|████      | 409999/1000000 [7:21:50<7:44:25, 21.17it/s]global step 410000, trans_decision ep_re 480.389380062384

{"global_step": 410000, "eval_re": [272.8416548010854, 877.1867988441039, 
424.05308300814045, 140.25295609552552, 571.080679291861, 422.0173698289823, 
840.016191954148, 196.02030557028155, 675.8944598764017, 384.5303013533109], 
"eval_len": [349, 958, 420, 145, 549, 1000, 1000, 242, 713, 1000]}

 42%|████▏     | 419998/1000000 [7:32:50<7:33:27, 21.32it/s]global step 420000, trans_decision ep_re 393.60623353214703

{"global_step": 420000, "eval_re": [37.49213370733674, 426.6583950112401, 
240.43840992721812, 821.1828741443352, 509.75332815669384, 237.3586036421081, 
576.4740892515642, 551.9966423685692, 186.13985851432545, 348.5680005980788], 
"eval_len": [79, 365, 218, 1000, 1000, 312, 1000, 1000, 292, 353]}

 43%|████▎     | 429999/1000000 [7:43:40<7:32:13, 21.01it/s]global step 430000, trans_decision ep_re 376.9610933957914

{"global_step": 430000, "eval_re": [850.3433431683926, 19.349303763907674, 
502.70964677304187, 338.1639129389964, 443.8863482444877, 99.03051939815732, 
496.524282379224, 21.43882866695777, 381.64287844912946, 616.5218701756197], 
"eval_len": [1000, 56, 539, 302, 535, 128, 565, 30, 1000, 593]}

 44%|████▍     | 439999/1000000 [7:54:30<7:18:31, 21.28it/s]global step 440000, trans_decision ep_re 383.89513363231475

{"global_step": 440000, "eval_re": [863.2388211818648, 152.19523341884096, 
145.6546335980681, 105.9805315153837, 167.8103309987831, 430.57121148029785, 
34.43710921165797, 726.9942047423375, 798.2672380547523, 413.80202212116143], 
"eval_len": [1000, 174, 187, 152, 193, 720, 48, 644, 865, 433]}

 45%|████▍     | 449999/1000000 [8:05:20<7:11:23, 21.25it/s]global step 450000, trans_decision ep_re 556.552748119024

{"global_step": 450000, "eval_re": [78.49213119231969, 609.92488747341, 
319.7372819492484, 523.9039700219098, 607.0820902957715, 1145.7259584335839, 
284.8828999316509, 747.0673529305162, 495.7441094908403, 752.9667994709902], 
"eval_len": [86, 628, 322, 567, 570, 1000, 306, 822, 1000, 762]}

 46%|████▌     | 459998/1000000 [8:16:20<7:02:02, 21.33it/s]global step 460000, trans_decision ep_re 459.6061740521237

{"global_step": 460000, "eval_re": [288.0254229746856, 114.13848029770597, 
195.49195803043318, 229.40528541429634, 1098.7984464574138, 954.7710648946028, 
75.61791386323901, 270.0891008835534, 1120.909544636243, 248.81452306906303], 
"eval_len": [338, 107, 277, 288, 1000, 1000, 68, 317, 1000, 252]}

 47%|████▋     | 469998/1000000 [8:27:10<6:55:36, 21.25it/s]global step 470000, trans_decision ep_re 322.66766834936664

{"global_step": 470000, "eval_re": [751.9067109733516, 117.76641562036593, 
127.59679321458897, 274.9737706015649, 142.76181672908004, 21.810773142344672, 
399.85866200462, -16.40842581810209, 605.2737262549298, 801.1364407709227], 
"eval_len": [744, 132, 108, 323, 231, 36, 435, 128, 778, 841]}

 48%|████▊     | 479999/1000000 [8:38:00<6:45:29, 21.37it/s]global step 480000, trans_decision ep_re 392.77052607764057

{"global_step": 480000, "eval_re": [306.746958646299, 556.8064054440387, 
128.43921899382647, 611.2146298429892, 662.8539725678796, 385.4792083205792, 
363.3386912483323, 345.6372146067839, 139.09266461549834, 428.09629649017864], 
"eval_len": [338, 1000, 215, 719, 722, 419, 345, 375, 165, 497]}

 49%|████▉     | 489999/1000000 [8:48:50<6:42:11, 21.13it/s]global step 490000, trans_decision ep_re 420.7485101832064

{"global_step": 490000, "eval_re": [880.3132942664004, 116.83685037994432, 
28.984483918241217, 784.9665234740665, 784.365389784241, 837.2637287916017, 
126.55908234548718, 112.69287173148018, 434.9131068550656, 100.58977028553578], 
"eval_len": [1000, 136, 68, 1000, 805, 977, 149, 114, 407, 124]}

 50%|████▉     | 499998/1000000 [8:59:40<6:29:12, 21.41it/s]global step 500000, trans_decision ep_re 616.7135106489455

{"global_step": 500000, "eval_re": [994.696794262886, 780.1141175649648, 
47.45577678776925, 730.8420419068652, 747.9462284674819, 889.1528522798749, 
818.9542844643391, 48.109519795656375, 132.97193238431743, 976.8915585753003], 
"eval_len": [1000, 780, 46, 756, 722, 1000, 829, 61, 192, 1000]}

 51%|█████     | 509999/1000000 [9:10:40<6:29:12, 20.98it/s]global step 510000, trans_decision ep_re 554.5846464691758

{"global_step": 510000, "eval_re": [894.2684280359258, 222.1801364485933, 
107.07335745303301, 653.9845497371706, 695.5918351160828, 494.932414595326, 
796.9434492034065, 139.7029003701839, 777.0757778377396, 764.0936158942968], 
"eval_len": [1000, 234, 102, 1000, 771, 535, 1000, 166, 1000, 1000]}

 52%|█████▏    | 519999/1000000 [9:21:30<6:17:46, 21.18it/s]global step 520000, trans_decision ep_re 337.8827813301833

{"global_step": 520000, "eval_re": [123.76066993245479, 230.28525400128692, 
81.2804469508351, 381.94403192484447, 481.11254371192973, 30.04263215010062, 
1014.4465059210012, 672.9000810265576, 43.71995794123619, 319.3356897415863], 
"eval_len": [172, 200, 86, 432, 1000, 40, 1000, 1000, 70, 455]}

 53%|█████▎    | 529999/1000000 [9:32:20<6:08:06, 21.28it/s]global step 530000, trans_decision ep_re 526.23976028324

{"global_step": 530000, "eval_re": [860.1501586980658, 1033.0971917035797, 
980.0260906733414, 775.5656409396121, 261.2370417451145, 870.2748770285879, 
356.74143022305765, 23.103761407807514, 74.36647563863258, 27.834934774601535], 
"eval_len": [1000, 1000, 946, 804, 281, 1000, 487, 34, 131, 50]}

 54%|█████▍    | 539998/1000000 [9:43:20<5:57:26, 21.45it/s]global step 540000, trans_decision ep_re 377.3944324333122

{"global_step": 540000, "eval_re": [518.8277838946685, 192.52289561395233, 
279.55305606172124, 115.51896181310815, 934.3513576797077, 137.68760632285128, 
839.3700456857222, 27.017401213148357, 703.6341366465714, 25.46107940167116], 
"eval_len": [431, 255, 210, 108, 1000, 118, 1000, 34, 1000, 47]}

 55%|█████▍    | 549998/1000000 [9:54:10<5:55:24, 21.10it/s]global step 550000, trans_decision ep_re 570.0194112252866

{"global_step": 550000, "eval_re": [693.8936428141883, 371.939880187914, 
666.4800308510754, 695.7604461809885, 682.2117029568818, 526.3593003416329, 
332.8457780226334, 792.6560159147806, 421.52102402353466, 516.5262909592359], 
"eval_len": [1000, 391, 1000, 821, 849, 562, 378, 1000, 343, 1000]}

 56%|█████▌    | 559998/1000000 [10:05:10<5:46:34, 21.16it/s]global step 560000, trans_decision ep_re 160.45919000426815

{"global_step": 560000, "eval_re": [201.0627284985848, 116.55258736030989, 
56.709779810920715, 33.96668861631157, 286.43078297631314, 80.50083552875448, 
315.1238264464288, 339.168820808163, 113.57970296629455, 61.49614703060067], 
"eval_len": [178, 104, 59, 31, 299, 67, 1000, 531, 138, 97]}

 57%|█████▋    | 569999/1000000 [10:15:50<5:37:19, 21.25it/s]global step 570000, trans_decision ep_re 414.1193787794

{"global_step": 570000, "eval_re": [172.82339122780354, 164.91757274647742, 
34.21416225952337, 395.2368342228554, 345.8879156503251, 609.6231382805757, 
703.1407921237889, 697.1467689096543, 680.5689271293571, 337.63428524363894], 
"eval_len": [292, 176, 39, 509, 382, 1000, 1000, 1000, 1000, 469]}

 58%|█████▊    | 579998/1000000 [10:26:50<5:27:17, 21.39it/s]global step 580000, trans_decision ep_re 214.6730565905294

{"global_step": 580000, "eval_re": [78.78521762552957, 410.844939332774, 
164.2141434934314, 41.61691688634445, 461.3251446805281, 554.7198585757235, 
131.53012493201928, 36.626288705585, 91.01187961904643, 176.05605205431232], 
"eval_len": [65, 1000, 177, 47, 1000, 592, 141, 40, 134, 264]}

 59%|█████▉    | 589999/1000000 [10:37:40<5:24:45, 21.04it/s]global step 590000, trans_decision ep_re 299.4151910535972

{"global_step": 590000, "eval_re": [94.2453789583215, 79.727506089982, 
456.11535744836596, 365.9004755721877, 895.3606042525437, 163.6301966363829, 
16.78268657419512, 642.8876398010342, 258.2204302512512, 21.281634951707858], 
"eval_len": [76, 79, 671, 1000, 1000, 168, 50, 594, 334, 50]}

 60%|█████▉    | 599999/1000000 [10:48:30<5:14:15, 21.21it/s]global step 600000, trans_decision ep_re 432.8272214283561

{"global_step": 600000, "eval_re": [541.6876316580467, 394.4656722051016, 
635.5614190359277, 679.0948974693126, 388.4242687742249, 140.94921993377386, 
755.4926016667267, 146.794793964428, 86.64760249696198, 559.1541070790568], 
"eval_len": [724, 443, 1000, 1000, 495, 171, 1000, 197, 100, 1000]}

 61%|██████    | 609998/1000000 [10:59:20<5:04:28, 21.35it/s]global step 610000, trans_decision ep_re 287.09234014314706

{"global_step": 610000, "eval_re": [574.5121616948891, 83.01356774233734, 
13.522020453452157, 568.9512193986011, 354.19979329783916, 230.4664231964961, 
403.55299073023434, 419.27226073572393, 192.05655240275723, 31.37641177914083], 
"eval_len": [1000, 91, 57, 1000, 402, 393, 387, 490, 193, 41]}

 62%|██████▏   | 619998/1000000 [11:10:10<4:55:53, 21.40it/s]global step 620000, trans_decision ep_re 490.3061061866765

{"global_step": 620000, "eval_re": [528.4192239332253, 266.00487220611956, 
531.4906818682085, 956.7404243501337, 306.029930792171, 554.6303914690105, 
496.7150474626316, 530.9403279605577, 22.757762787079695, 709.332399037627], 
"eval_len": [626, 317, 1000, 1000, 332, 1000, 586, 776, 28, 1000]}

 63%|██████▎   | 629998/1000000 [11:21:10<4:52:41, 21.07it/s]global step 630000, trans_decision ep_re 522.021479304459

{"global_step": 630000, "eval_re": [354.26505892536954, 825.1923297363403, 
771.0976017669047, 559.0674986461362, 754.3456363407734, 169.63687679273912, 
808.4420367742358, 149.15873078579168, 401.45049261889756, 427.5585306574019], 
"eval_len": [464, 1000, 1000, 622, 844, 261, 1000, 215, 433, 528]}

 64%|██████▍   | 639999/1000000 [11:32:10<4:44:35, 21.08it/s]global step 640000, trans_decision ep_re 372.26914835613377

{"global_step": 640000, "eval_re": [559.4815541733262, 860.4920029550885, 
810.3985877997754, 481.18248495127676, 134.2963070348615, 15.389607622089667, 
297.86539457271164, 30.952264289393142, 256.3089777761172, 276.3243023866978], 
"eval_len": [622, 928, 1000, 659, 110, 18, 377, 46, 320, 263]}

 65%|██████▍   | 649999/1000000 [11:43:00<4:35:28, 21.18it/s]global step 650000, trans_decision ep_re 416.0680970767

{"global_step": 650000, "eval_re": [695.1924274456404, 52.118645449871686, 
156.24948577141964, 276.3003584667728, 593.2473719533243, 291.0643023680473, 
556.815674948369, 763.5090942680325, 370.08814696327806, 406.0954631322443], 
"eval_len": [780, 67, 108, 268, 843, 363, 605, 1000, 367, 406]}

 66%|██████▌   | 659998/1000000 [11:53:50<4:24:23, 21.43it/s]global step 660000, trans_decision ep_re 346.2347024587454

{"global_step": 660000, "eval_re": [641.0393630778334, 15.417852547059947, 
253.9602185637723, 147.92187595406415, 741.1179105277344, 470.027309930904, 
110.50982071878953, 593.4872705549712, 177.70485714254235, 311.16054556978287], 
"eval_len": [1000, 49, 348, 134, 798, 634, 171, 923, 336, 384]}

 67%|██████▋   | 669998/1000000 [12:04:40<4:19:42, 21.18it/s]global step 670000, trans_decision ep_re 474.8934992253427

{"global_step": 670000, "eval_re": [527.746976378762, 829.7162649616256, 
347.7482700267199, 298.39901386499497, 105.25762620722953, 390.12815711146555, 
649.0137410156383, 701.0171508951998, 85.73391111171344, 814.1738806800772], 
"eval_len": [452, 848, 378, 319, 191, 377, 1000, 1000, 157, 701]}

 68%|██████▊   | 679999/1000000 [12:15:40<4:12:35, 21.11it/s]global step 680000, trans_decision ep_re 315.19078321950644

{"global_step": 680000, "eval_re": [185.25274868401843, 120.96078406073077, 
502.32071199369346, 757.6044393533934, 70.13039298623016, 125.27162812521414, 
592.6327170356552, 379.6338381606465, 380.6241458725757, 37.47642592290625], 
"eval_len": [305, 151, 513, 1000, 98, 231, 1000, 429, 386, 57]}

 69%|██████▉   | 689999/1000000 [12:26:30<4:04:51, 21.10it/s]global step 690000, trans_decision ep_re 574.6719614595559

{"global_step": 690000, "eval_re": [701.2472317624859, 584.6063602622228, 
301.11873401135404, 729.3871260060906, 623.9278758918786, 554.0484327508052, 
609.7497506305675, 511.6337310225012, 346.221418302749, 784.7789539549043], 
"eval_len": [763, 694, 530, 1000, 1000, 676, 773, 521, 314, 1000]}

 70%|██████▉   | 699999/1000000 [12:37:30<3:55:50, 21.20it/s]global step 700000, trans_decision ep_re 304.64545988918087

{"global_step": 700000, "eval_re": [839.4226936350785, 76.53543287757338, 
24.996435132084606, 1214.4446557190183, 286.2968389321189, 64.45681719338552, 
123.84252506843139, 77.492831238524, 226.37197698806125, 112.59439210753256], 
"eval_len": [1000, 121, 53, 1000, 313, 115, 106, 97, 297, 158]}

 71%|███████   | 709999/1000000 [12:48:10<3:51:02, 20.92it/s]global step 710000, trans_decision ep_re 523.1059552945613

{"global_step": 710000, "eval_re": [428.37669185840366, 93.2842987258621, 
737.1567045533279, 807.8396044127181, 458.3183785871617, 682.7640240982239, 
480.5092358924662, 47.16411733914818, 812.2584308760983, 683.3880666022033], 
"eval_len": [498, 150, 1000, 1000, 646, 1000, 1000, 60, 1000, 891]}

 72%|███████▏  | 719999/1000000 [12:59:10<3:42:11, 21.00it/s]global step 720000, trans_decision ep_re 482.517015096694

{"global_step": 720000, "eval_re": [720.7455172604293, 435.57633201932924, 
8.473679857268346, 842.6870950464113, 352.9704525165109, 411.8982388363004, 
891.9011435047277, 458.70239215848943, -0.9544684169896546, 703.1697681844621], 
"eval_len": [765, 416, 49, 842, 390, 690, 944, 497, 48, 1000]}

 73%|███████▎  | 729998/1000000 [13:10:10<3:29:57, 21.43it/s]global step 730000, trans_decision ep_re 524.8100996062418

{"global_step": 730000, "eval_re": [788.4883782443515, 463.6064731483283, 
111.99862452905612, 603.625586051271, 894.6022194507415, 554.4798037281339, 
828.3530840956981, 223.18126945291962, 357.8277564966696, 421.93780086524794], 
"eval_len": [1000, 1000, 114, 759, 1000, 1000, 1000, 270, 492, 460]}

 74%|███████▍  | 739999/1000000 [13:21:10<3:24:19, 21.21it/s]global step 740000, trans_decision ep_re 308.1981809473315

{"global_step": 740000, "eval_re": [23.465226219801178, 193.33089759311676, 
803.206882333369, 106.72554962489586, 401.1918264707613, 5.387091389083379, 
685.2932594185089, 59.77300457189271, 503.9832295892153, 299.62484226267054], 
"eval_len": [54, 181, 1000, 212, 361, 37, 744, 125, 459, 288]}

 75%|███████▍  | 749999/1000000 [13:32:00<3:16:25, 21.21it/s]global step 750000, trans_decision ep_re 430.41262211874044

{"global_step": 750000, "eval_re": [108.80936336599575, 503.61268709613813, 
224.17567929790687, 267.7612512146282, 1021.011790027344, 43.65162539672458, 
194.24932368556784, 277.296651867092, 691.6758258386917, 971.8820233973153], 
"eval_len": [129, 1000, 207, 264, 1000, 71, 223, 616, 1000, 819]}

 76%|███████▌  | 759998/1000000 [13:42:50<3:08:22, 21.23it/s]global step 760000, trans_decision ep_re 335.7939347114447

{"global_step": 760000, "eval_re": [332.95565595209627, 1049.2532282388318, 
20.71952388703593, 62.45553757143747, 393.9595383144681, 710.0615246951564, 
22.141355331194127, 726.4337117941884, 9.412404075733095, 30.546867254305198], 
"eval_len": [284, 1000, 49, 61, 338, 1000, 43, 775, 19, 55]}

 77%|███████▋  | 769999/1000000 [13:53:40<3:00:11, 21.27it/s]global step 770000, trans_decision ep_re 293.79886407498594

{"global_step": 770000, "eval_re": [101.57268801326583, 409.4180994344048, 
303.3931968831068, 113.38999581808946, 167.03684678644598, 1198.6257746256879, 
373.4280851922161, 432.1628403325426, -280.0768916780389, 119.03800534213914], 
"eval_len": [84, 403, 319, 138, 248, 1000, 337, 508, 1000, 91]}

 78%|███████▊  | 779999/1000000 [14:04:30<2:53:22, 21.15it/s]global step 780000, trans_decision ep_re 377.13591149411053

{"global_step": 780000, "eval_re": [228.38200116815645, 323.8685519415597, 
228.32321903504854, 201.47588773092707, 64.10165136495223, 1006.7604907378549, 
230.89305429675562, 682.5250827639043, 528.1402799300218, 276.8888959719249], 
"eval_len": [274, 360, 276, 242, 122, 1000, 312, 757, 1000, 1000]}

 79%|███████▉  | 789998/1000000 [14:15:20<2:43:55, 21.35it/s]global step 790000, trans_decision ep_re 338.2734936214008

{"global_step": 790000, "eval_re": [13.488863423567555, 167.64525762901778, 
70.35187742533557, 476.90780928092124, 347.95731664821335, 43.193142695197444, 
451.7833403568539, 786.1662955928757, 243.6137117443964, 781.627321417629], 
"eval_len": [45, 199, 85, 487, 404, 60, 498, 1000, 303, 1000]}

 80%|███████▉  | 799998/1000000 [14:26:11<2:36:57, 21.24it/s]global step 800000, trans_decision ep_re 519.0942700725549

{"global_step": 800000, "eval_re": [572.3179409744704, 636.6597595284242, 
189.38668835319197, 592.2307289138355, 428.7909354245784, 952.5437194570471, 
227.8340861176047, 183.326431778378, 594.798073330441, 813.0543368475778], 
"eval_len": [795, 785, 151, 728, 749, 1000, 210, 263, 1000, 1000]}

 81%|████████  | 809998/1000000 [14:37:11<2:27:58, 21.40it/s]global step 810000, trans_decision ep_re 314.3646581424241

{"global_step": 810000, "eval_re": [422.06186931564565, 547.7331300923365, 
633.0160341076319, 77.62497541349366, 327.02483098696746, 98.0771374884208, 
92.91785798837108, 63.8990944934644, 798.6578985945488, 82.63375294336066], 
"eval_len": [570, 531, 1000, 94, 339, 124, 95, 63, 809, 92]}

 82%|████████▏ | 819999/1000000 [14:48:01<2:20:52, 21.29it/s]global step 820000, trans_decision ep_re 321.70212209023765

{"global_step": 820000, "eval_re": [84.79932416533634, 86.26434491565863, 
438.9815735917077, 144.9675117264566, 331.3203392379246, 591.8281658585497, 
373.6217663384547, 231.95956615576674, 922.1939860160273, 11.084642896494184], 
"eval_len": [99, 168, 422, 187, 296, 500, 300, 242, 1000, 47]}

 83%|████████▎ | 829999/1000000 [14:58:41<2:14:53, 21.00it/s]global step 830000, trans_decision ep_re 431.93591824652486

{"global_step": 830000, "eval_re": [189.99484888763064, 524.8383459758547, 
842.9719404344426, 121.31816936351899, 150.82499963346254, 110.08300618543129, 
280.83734635139575, 1156.7332202648538, 387.9912734839427, 553.7660318847161], 
"eval_len": [250, 649, 822, 132, 221, 123, 265, 1000, 486, 1000]}

 84%|████████▍ | 839999/1000000 [15:09:41<2:05:54, 21.18it/s]global step 840000, trans_decision ep_re 181.7058335230146

{"global_step": 840000, "eval_re": [50.37216194096659, 26.58895863214266, 
41.74841297982774, 460.5441521632647, 515.3117628008843, 50.36245386547107, 
220.75982724893785, 57.552354118116355, 32.43961814430034, 361.37863333623443], 
"eval_len": [69, 54, 40, 559, 526, 89, 235, 83, 47, 344]}

 85%|████████▍ | 849998/1000000 [15:20:21<1:57:02, 21.36it/s]global step 850000, trans_decision ep_re 341.24662370329577

{"global_step": 850000, "eval_re": [706.7018667578236, 212.10072242982383, 
430.6929469187836, 246.34650318294186, 697.7331613105544, 762.9421883732359, 
27.239444083730064, 183.32515839882055, 27.553743532721963, 117.83050204452198],
"eval_len": [1000, 168, 455, 247, 1000, 924, 46, 175, 108, 110]}

 86%|████████▌ | 859998/1000000 [15:31:11<1:49:07, 21.38it/s]global step 860000, trans_decision ep_re 346.5232131315778

{"global_step": 860000, "eval_re": [192.10955862750208, 111.59507378606189, 
75.14152828083645, 447.35960907278303, 732.5093881428284, 242.03837706412142, 
499.48314631160855, 604.4329821417366, 199.73249259284273, 360.8299752954564], 
"eval_len": [223, 152, 116, 522, 1000, 284, 1000, 610, 305, 370]}

 87%|████████▋ | 869998/1000000 [15:42:01<1:42:15, 21.19it/s]global step 870000, trans_decision ep_re 459.6190342052981

{"global_step": 870000, "eval_re": [941.6442780723223, 608.9142350161853, 
325.94911782776416, 223.42307660726254, 180.72809936027082, 460.65003653473013, 
880.8744493457122, 33.02764515684263, 923.8709869555219, 17.108417176369226], 
"eval_len": [1000, 802, 322, 228, 195, 1000, 1000, 42, 1000, 38]}

 88%|████████▊ | 879999/1000000 [15:53:01<1:34:47, 21.10it/s]global step 880000, trans_decision ep_re 236.1619387944126

{"global_step": 880000, "eval_re": [28.361736205683872, 142.3877373922356, 
143.26805872862545, 281.08784006070164, 356.89897301024, 51.808738800331824, 
233.5130820909264, 16.150979176402846, 1036.9396154568885, 71.20262702208998], 
"eval_len": [103, 124, 138, 265, 351, 65, 266, 56, 1000, 155]}

 89%|████████▉ | 889998/1000000 [16:03:41<1:25:58, 21.32it/s]global step 890000, trans_decision ep_re 509.7251485695936

{"global_step": 890000, "eval_re": [55.00299119845394, 954.3111036565371, 
808.8124352411587, 981.0685036059389, 755.3185776868442, 172.90528776719327, 
165.1067553214065, 834.1629618157286, 272.37827381695865, 98.18459558571605], 
"eval_len": [56, 1000, 929, 1000, 694, 175, 153, 1000, 283, 165]}

 90%|████████▉ | 899999/1000000 [16:14:41<1:19:14, 21.03it/s]global step 900000, trans_decision ep_re 558.0757470657638

{"global_step": 900000, "eval_re": [724.1577778874023, 545.0197722806726, 
189.64074430704224, 231.54063514466762, 478.9488896969901, 810.4615984281069, 
301.99273740901907, 623.4287801914661, 776.5746433883353, 898.9918919239366], 
"eval_len": [1000, 524, 224, 321, 1000, 1000, 408, 1000, 1000, 1000]}

 91%|█████████ | 909999/1000000 [16:25:41<1:11:30, 20.97it/s]global step 910000, trans_decision ep_re 376.6887287774237

{"global_step": 910000, "eval_re": [190.37047111718053, 202.6483908282316, 
1117.5833921803248, 572.253651009257, 742.9445564803214, 112.59471370231752, 
189.08306441068822, 379.6200684928535, 91.73115516127054, 168.05782439179183], 
"eval_len": [224, 239, 1000, 1000, 1000, 111, 264, 382, 75, 165]}

 92%|█████████▏| 919999/1000000 [16:36:31<1:02:48, 21.23it/s]global step 920000, trans_decision ep_re 580.3127775348345

{"global_step": 920000, "eval_re": [1016.1470985537302, 306.7249820627329, 
884.9505725645158, 9.336342257731586, 470.5200731299963, 736.4655068195119, 
124.46001215746995, 836.39250897498, 921.8217373326372, 496.3089414950391], 
"eval_len": [1000, 366, 1000, 23, 489, 747, 150, 936, 1000, 558]}

 93%|█████████▎| 929999/1000000 [16:47:21<55:00, 21.21it/s]global step 930000, trans_decision ep_re 334.9454162946496

{"global_step": 930000, "eval_re": [78.97194756955432, 286.82929524185033, 
226.353796725447, 556.3275727411985, 470.6997108917571, 354.64830565260934, 
549.6712121096895, 291.11839858538525, 114.10390105371691, 420.7300223752878], 
"eval_len": [70, 293, 264, 598, 510, 372, 802, 311, 205, 1000]}

 94%|█████████▍| 939999/1000000 [16:58:11<47:05, 21.24it/s]global step 940000, trans_decision ep_re 474.6814395328289

{"global_step": 940000, "eval_re": [827.777812443548, 366.9436129541519, 
217.61610281291172, 14.55975159317723, 149.06415790626295, 745.5016941435891, 
603.1483155926484, 997.9621297731253, 815.2473729959449, 8.993445112929079], 
"eval_len": [1000, 266, 383, 42, 168, 914, 1000, 1000, 1000, 39]}

 95%|█████████▍| 949999/1000000 [17:09:11<39:38, 21.03it/s]global step 950000, trans_decision ep_re 477.7766124128847

{"global_step": 950000, "eval_re": [761.0299695730401, 855.5273962590899, 
98.02246020703767, 382.7196447799333, 365.8723218832921, 950.9087863523439, 
515.9460231215286, 62.75865808496524, 638.2037443159954, 146.77711955162096], 
"eval_len": [763, 1000, 95, 344, 458, 1000, 1000, 148, 764, 142]}

 96%|█████████▌| 959999/1000000 [17:20:01<31:29, 21.17it/s]global step 960000, trans_decision ep_re 449.1371610704359

{"global_step": 960000, "eval_re": [161.92720950914523, 840.0258469301091, 
1056.434961599053, 194.57832505705142, 312.98146114052344, 429.122037616737, 
333.63203256668646, 600.6541255838323, 82.69453126683872, 479.32107943438297], 
"eval_len": [266, 761, 1000, 194, 270, 471, 353, 619, 120, 441]}

 97%|█████████▋| 969999/1000000 [17:30:51<23:46, 21.03it/s]global step 970000, trans_decision ep_re 621.2483970422544

{"global_step": 970000, "eval_re": [948.1658182467663, 384.2674079931056, 
423.9770947546555, 963.9975858329597, 25.379049057688825, 637.5628681510616, 
283.58033078869573, 961.9600312223353, 653.9068585785828, 929.6869257966931], 
"eval_len": [1000, 410, 402, 1000, 35, 696, 364, 1000, 682, 949]}

 98%|█████████▊| 979998/1000000 [17:41:51<15:30, 21.51it/s]global step 980000, trans_decision ep_re 484.80300414396186

{"global_step": 980000, "eval_re": [538.3362710428249, 86.87824374753718, 
412.3408707617076, 538.0663793022378, 228.31513438389678, 923.7406189724431, 
194.82002387407837, 382.0442616609835, 741.0081186972258, 802.4801189966837], 
"eval_len": [1000, 136, 408, 671, 178, 1000, 204, 419, 765, 1000]}

 99%|█████████▉| 989999/1000000 [17:52:51<07:56, 21.00it/s]global step 990000, trans_decision ep_re 511.96594700489106

{"global_step": 990000, "eval_re": [41.46590018030217, 90.95094195385028, 
914.4360550895282, 139.26443255114452, 1084.3677139460005, 112.8821946123363, 
945.1287574026882, 340.98607924770545, 768.974081368482, 681.203313696873], 
"eval_len": [73, 65, 1000, 140, 971, 116, 1000, 1000, 814, 695]}

100%|█████████▉| 999998/1000000 [18:03:41<00:00, 21.26it/s]global step 1000000, trans_decision ep_re 248.1060265518645

{"global_step": 1000000, "eval_re": [193.58421026633135, 386.76196618409335, 
29.290937455243753, 160.00287179601685, 47.61264824102344, 71.37286407750605, 
717.2123464395447, 652.1489753687557, 50.92677109448854, 172.1466745956406], 
"eval_len": [234, 342, 29, 188, 80, 84, 1000, 1000, 77, 176]}

100%|██████████| 1000000/1000000 [18:03:55<00:00, 15.38it/s]
