
{
    'exp_name': 'VDPO',
    'env': 'Ant-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'markov(ord(15,1), ord(3,5,3,shift=22), [[124, 1], [1, 19]])',
    'noise': 0.15
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9999/1000000 [04:20<10:09:00, 27.09it/s]global step 10000, trans_decision ep_re 21.719036905703906

{"global_step": 10000, "eval_re": [37.96310436725473, 58.515816641508486, 
32.19832291690271, 16.527975112532115, -197.68607945404142, 58.88770183445003, 
58.58735754703166, 13.837399144306707, 33.988780604340285, 104.36999034275375], 
"eval_len": [136, 104, 70, 50, 1000, 382, 1000, 32, 41, 1000]}

  2%|▏         | 19997/1000000 [12:50<10:06:31, 26.93it/s]global step 20000, trans_decision ep_re 28.348219612898447

{"global_step": 20000, "eval_re": [140.85653773472305, 23.32361666333417, 
-5.388865969093718, 34.53403877222893, 17.77869115873846, 9.172764679984375, 
88.73873056755127, -33.33989066891892, 18.594155928029608, -10.787582737592745],
"eval_len": [400, 180, 44, 726, 220, 293, 1000, 1000, 45, 347]}

  3%|▎         | 29999/1000000 [21:10<10:04:34, 26.74it/s]global step 30000, trans_decision ep_re 71.9119546450351

{"global_step": 30000, "eval_re": [8.921938825642123, 49.27774726137474, 
204.1366744455731, 85.19678667601474, 33.08315952944957, 33.75219332264427, 
-0.9121063558165936, 116.45721976233015, 179.10578418924504, 
10.100148793893839], "eval_len": [66, 177, 1000, 247, 76, 138, 84, 483, 498, 
1000]}

  4%|▍         | 39999/1000000 [29:40<9:54:56, 26.89it/s]global step 40000, trans_decision ep_re 149.28529187630494

{"global_step": 40000, "eval_re": [314.2972135277399, 32.58748309041939, 
76.91362277963727, 80.54531787860375, 309.47781681910266, 0.6605154621473142, 
22.686484501939248, 287.31401521841997, 357.18707663253576, 11.183372852504203],
"eval_len": [1000, 37, 191, 254, 1000, 72, 50, 1000, 1000, 179]}

  5%|▍         | 49999/1000000 [38:00<9:45:30, 27.04it/s]global step 50000, trans_decision ep_re 80.18099968104082

{"global_step": 50000, "eval_re": [22.480364661603172, 6.516314858360916, 
149.445411840481, 207.47367074469912, 12.948346944810103, 141.49716155272438, 
169.493721131156, -5.003393357429892, 10.213428020363954, 86.74497041363945], 
"eval_len": [82, 58, 334, 659, 32, 351, 1000, 179, 32, 209]}

  6%|▌         | 59998/1000000 [46:20<9:38:14, 27.09it/s]global step 60000, trans_decision ep_re 234.2121100565404

{"global_step": 60000, "eval_re": [339.63755723783254, 32.374379008237845, 
347.67294658357525, 196.79827297359245, 119.28920912084497, 98.6662289435035, 
459.7022360583901, 262.4259035268876, 204.86943615244854, 280.68493096009115], 
"eval_len": [1000, 65, 1000, 1000, 175, 1000, 1000, 1000, 1000, 1000]}

  7%|▋         | 69999/1000000 [55:00<9:35:37, 26.93it/s]global step 70000, trans_decision ep_re 159.8849330854518

{"global_step": 70000, "eval_re": [79.44897082236169, 160.39982532238426, 
245.2052824307781, 84.46519836463328, 97.4568840261881, 50.480848494680885, 
92.11651242958193, 236.59359934826475, 156.9920412358822, 395.6901683797629], 
"eval_len": [96, 197, 463, 90, 128, 61, 109, 518, 177, 547]}

  8%|▊         | 79998/1000000 [1:03:20<9:19:14, 27.42it/s]global step 80000, trans_decision ep_re 229.26851568072325

{"global_step": 80000, "eval_re": [235.28853966667674, 57.29415393058156, 
141.08964006006886, 45.319530205329386, 148.6656412387788, 490.93880710764785, 
399.58583956915, 352.4389569125932, 328.4258989123364, 93.6381492040702], 
"eval_len": [435, 124, 273, 91, 252, 827, 1000, 465, 1000, 125]}

  9%|▉         | 89999/1000000 [1:11:50<9:22:48, 26.95it/s]global step 90000, trans_decision ep_re 299.50554905419006

{"global_step": 90000, "eval_re": [169.22469737166693, 110.96090094832566, 
423.8932476128892, 320.07458134658395, 196.98943509277976, 655.077143387033, 
483.1958463307228, 274.6204433448087, 295.07850123394536, 65.94069387314589], 
"eval_len": [273, 194, 602, 444, 246, 1000, 443, 358, 1000, 106]}

 10%|▉         | 99999/1000000 [1:20:10<9:18:22, 26.86it/s]global step 100000, trans_decision ep_re 264.89045831841355

{"global_step": 100000, "eval_re": [508.5705775734138, 299.76363906412456, 
83.39852058114562, 159.00001105349318, 375.85363743323603, 140.51169878675125, 
342.69186396483445, 350.1210371712914, 242.98745739190332, 146.00614016394186], 
"eval_len": [964, 352, 122, 212, 500, 220, 1000, 414, 286, 254]}

 11%|█         | 109998/1000000 [1:28:40<9:04:45, 27.23it/s]global step 110000, trans_decision ep_re 315.2929061981693

{"global_step": 110000, "eval_re": [160.38572738194026, 232.8685382849955, 
363.38604332630143, 713.9910936540675, 142.81775091070327, 362.7981643837364, 
377.02760308374144, 187.29424181381674, 50.85635001924609, 561.5035491231445], 
"eval_len": [204, 372, 605, 1000, 194, 1000, 531, 172, 104, 688]}

 12%|█▏        | 119999/1000000 [1:37:00<8:56:45, 27.32it/s]global step 120000, trans_decision ep_re 273.5319957381424

{"global_step": 120000, "eval_re": [627.0930463828748, 341.00112721896073, 
434.3623491802318, 17.516823058975287, 279.4856015049074, 185.82512837836927, 
367.82474795281746, 382.6054510517956, 74.92040974713927, 24.685272905352548], 
"eval_len": [1000, 366, 530, 37, 362, 303, 428, 1000, 84, 42]}

 13%|█▎        | 129997/1000000 [1:45:20<8:48:21, 27.44it/s]global step 130000, trans_decision ep_re 298.30486276410653

{"global_step": 130000, "eval_re": [319.9391438593145, 308.7717557390076, 
144.00292752290196, 41.42842848075843, 477.36499672657897, 135.37979224980606, 
953.7049336876227, 195.70344021892703, 46.60449646381015, 360.14871269233794], 
"eval_len": [1000, 431, 168, 79, 1000, 239, 986, 678, 65, 431]}

 14%|█▍        | 139998/1000000 [1:53:50<8:42:22, 27.44it/s]global step 140000, trans_decision ep_re 175.5393358296033

{"global_step": 140000, "eval_re": [48.372937071485985, 233.653830936924, 
27.80848042643383, 444.7369160238064, 53.59389144787029, 521.502331306102, 
214.5054366228327, 49.25445693075445, 14.038376330803299, 147.92670119902004], 
"eval_len": [68, 242, 46, 691, 62, 1000, 206, 90, 34, 149]}

 15%|█▍        | 149999/1000000 [2:02:00<8:41:33, 27.16it/s]global step 150000, trans_decision ep_re 62.6358376964915

{"global_step": 150000, "eval_re": [78.37022625659478, 10.351483215497252, 
33.90797309564044, 57.189782596031286, 90.60997317086012, 116.23516240406958, 
47.02124281470861, 35.45091442026324, 21.779977681951426, 135.44164130929826], 
"eval_len": [111, 32, 55, 72, 112, 114, 58, 29, 117, 172]}

 16%|█▌        | 159999/1000000 [2:10:10<8:39:31, 26.95it/s]global step 160000, trans_decision ep_re 248.4074611906823

{"global_step": 160000, "eval_re": [134.6954634250346, 27.248644424536934, 
404.32276816125335, 125.21209827669932, 112.5392606796229, 517.8825969817333, 
470.2078600000036, 321.8818600241168, 73.50980616163788, 296.5742537721844], 
"eval_len": [153, 31, 461, 152, 182, 1000, 1000, 375, 111, 293]}

 17%|█▋        | 169997/1000000 [2:18:40<8:28:24, 27.21it/s]global step 170000, trans_decision ep_re 324.6261346337928

{"global_step": 170000, "eval_re": [153.94976016050873, 194.63855348944358, 
728.043235781155, 261.7990948423742, 494.2891568298842, 155.95418862331636, 
347.1086854507334, 266.3916544395778, 172.35767741025762, 471.72933931067763], 
"eval_len": [224, 243, 914, 344, 1000, 186, 526, 481, 237, 1000]}

 18%|█▊        | 179997/1000000 [2:27:00<8:22:31, 27.20it/s]global step 180000, trans_decision ep_re 451.61373152045246

{"global_step": 180000, "eval_re": [773.1726786195637, 688.1069443928466, 
689.0726487744819, 437.7551875329394, 278.0616239926545, 315.47629297290354, 
486.28163777594654, 148.95549848388274, 267.315695860421, 431.9391067988843], 
"eval_len": [1000, 910, 821, 1000, 390, 1000, 507, 170, 277, 1000]}

 19%|█▉        | 189998/1000000 [2:35:30<8:10:49, 27.50it/s]global step 190000, trans_decision ep_re 133.78960441501104

{"global_step": 190000, "eval_re": [119.2823933742672, 80.83245955825362, 
154.514003350197, 85.85989965609035, 96.43240420523227, 68.2712766726656, 
569.9451561101171, 76.79725427630348, 51.287713021427116, 34.67348392555673], 
"eval_len": [137, 97, 175, 164, 117, 92, 1000, 121, 104, 33]}

 20%|█▉        | 199999/1000000 [2:43:40<8:07:53, 27.33it/s]global step 200000, trans_decision ep_re 203.36378519018973

{"global_step": 200000, "eval_re": [189.34161669723184, 502.89204885531456, 
124.64260383134656, 521.9068424909507, 125.81153776328243, 88.69306230091979, 
53.74178609253481, 94.97772518353344, 167.1964788442168, 164.43414984256606], 
"eval_len": [233, 544, 124, 567, 128, 102, 67, 220, 227, 181]}

 21%|██        | 209998/1000000 [2:52:00<7:59:18, 27.47it/s]global step 210000, trans_decision ep_re 319.8869730106005

{"global_step": 210000, "eval_re": [73.46004657983546, 707.6803081643583, 
499.5826892106369, 359.22668443458275, 580.055678443127, 152.13084243162038, 
85.42381323564477, 349.918143728462, 163.3547002086184, 228.03682366911943], 
"eval_len": [66, 1000, 619, 342, 636, 163, 111, 446, 118, 231]}

 22%|██▏       | 219997/1000000 [3:00:20<7:58:13, 27.18it/s]global step 220000, trans_decision ep_re 296.59339307993747

{"global_step": 220000, "eval_re": [46.936095944915266, 293.59029352431986, 
283.73633577933316, 293.66797197045605, 246.16119594400337, 85.45841219876831, 
587.6613751344252, 534.7935430883606, 116.86025865051607, 477.06844856427705], 
"eval_len": [73, 280, 200, 400, 295, 89, 1000, 669, 168, 1000]}

 23%|██▎       | 229997/1000000 [3:08:30<7:52:24, 27.17it/s]global step 230000, trans_decision ep_re 339.216879293068

{"global_step": 230000, "eval_re": [507.5073209779996, 230.76714781613376, 
96.02017468126546, 496.54048673457345, 575.2597243411734, 495.7996920246158, 
373.5949736744969, 181.922329109204, 401.40612186090414, 33.350821710313596], 
"eval_len": [1000, 302, 130, 501, 558, 1000, 550, 239, 441, 152]}

 24%|██▍       | 239998/1000000 [3:17:00<7:38:44, 27.61it/s]global step 240000, trans_decision ep_re 363.9128043304794

{"global_step": 240000, "eval_re": [323.4587861967735, 942.9001455548083, 
105.54226163733865, 320.45153619319666, 11.055158639142823, 712.3038266448423, 
256.9712575709262, 607.1670483584865, 90.98347841469626, 268.2945440945826], 
"eval_len": [384, 957, 103, 489, 31, 805, 399, 706, 219, 321]}

 25%|██▍       | 249999/1000000 [3:25:20<7:41:35, 27.08it/s]global step 250000, trans_decision ep_re 295.7161867292236

{"global_step": 250000, "eval_re": [79.46376937991795, 64.88227698815341, 
436.97502039285445, 29.48786513110352, 41.22085172432253, 225.37089185173926, 
282.6227904982264, 27.8967852918493, 712.0905008569401, 1057.1511151771288], 
"eval_len": [139, 117, 1000, 37, 51, 166, 316, 69, 1000, 1000]}

 26%|██▌       | 259999/1000000 [3:33:40<7:31:30, 27.32it/s]global step 260000, trans_decision ep_re 262.77085090634023

{"global_step": 260000, "eval_re": [428.21126358178185, 111.19992788140208, 
260.65865533013795, 68.71997966387354, 301.6230391594653, 260.98619261392247, 
496.72063865186374, 12.853764814773577, 657.9775466323429, 28.757500733838793], 
"eval_len": [383, 182, 324, 93, 282, 383, 1000, 95, 754, 44]}

 27%|██▋       | 269997/1000000 [3:42:00<7:28:50, 27.11it/s]global step 270000, trans_decision ep_re 258.6237733184096

{"global_step": 270000, "eval_re": [109.71730676056086, 376.7888848513515, 
942.528344788812, 106.31958606806637, 8.293824632085121, 121.41451656385782, 
256.97198503950557, 133.13046446791427, 103.26481812763105, 427.8080018843115], 
"eval_len": [175, 397, 807, 147, 68, 107, 333, 179, 147, 1000]}

 28%|██▊       | 279999/1000000 [3:50:10<7:21:21, 27.19it/s]global step 280000, trans_decision ep_re 301.90042883429777

{"global_step": 280000, "eval_re": [120.99963489953737, 163.87765782963424, 
226.66663141191756, 64.26837068034081, 491.6742258752649, 228.94243066494002, 
491.41273937727055, 409.21104172320986, 503.9541823355541, 317.9973735453087], 
"eval_len": [117, 285, 258, 72, 595, 350, 1000, 429, 514, 333]}

 29%|██▉       | 289999/1000000 [3:58:30<7:38:31, 25.81it/s]global step 290000, trans_decision ep_re 365.1109734854155

{"global_step": 290000, "eval_re": [209.0909177042503, 74.864612836273, 
437.68966047026043, 209.1407901383217, 477.0664207734989, 326.95179489122165, 
931.6933457027296, 506.67932889582914, 390.9187403575579, 87.01412308421287], 
"eval_len": [182, 65, 406, 208, 550, 308, 1000, 1000, 404, 106]}

 30%|██▉       | 299999/1000000 [4:06:50<7:05:09, 27.44it/s]global step 300000, trans_decision ep_re 300.18749353329036

{"global_step": 300000, "eval_re": [225.13381737618656, 413.6742910836858, 
48.246354222294656, 398.2309164981435, 95.87572462673953, 327.249095136634, 
446.45033927817656, 629.0404196649465, 76.86327363919852, 341.11070380689745], 
"eval_len": [251, 1000, 95, 1000, 89, 325, 481, 1000, 119, 1000]}

 31%|███       | 309998/1000000 [4:15:20<6:56:33, 27.61it/s]global step 310000, trans_decision ep_re 296.89163523800005

{"global_step": 310000, "eval_re": [78.90931314139291, 303.10058792811543, 
852.5572532347011, 204.45955850878613, 122.89051459222377, 54.50818039011738, 
244.06811905194672, 658.3248962976423, 322.81747049482, 127.2804587402541], 
"eval_len": [89, 311, 1000, 169, 124, 62, 291, 1000, 321, 143]}

 32%|███▏      | 319999/1000000 [4:23:30<6:57:00, 27.18it/s]global step 320000, trans_decision ep_re 291.78895746184696

{"global_step": 320000, "eval_re": [105.11122225005589, 820.4783624355516, 
364.7885696329352, 40.16021669922102, 172.42065782950058, 118.63703534614544, 
91.3683901402452, 504.9252395939364, 164.07826386277145, 535.9216168281066], 
"eval_len": [132, 840, 473, 1000, 252, 188, 100, 1000, 239, 480]}

 33%|███▎      | 329998/1000000 [4:32:00<6:46:16, 27.49it/s]global step 330000, trans_decision ep_re 258.4641117102499

{"global_step": 330000, "eval_re": [275.3821006149987, 651.8689008833568, 
278.29681134350864, 211.72676461757337, 100.72509436498602, 151.20577075047294, 
392.40078096269053, 268.71712495907383, 246.65266625513237, 7.665102350706309], 
"eval_len": [219, 1000, 263, 249, 127, 162, 395, 247, 266, 30]}

 34%|███▍      | 339999/1000000 [4:40:10<6:44:02, 27.22it/s]global step 340000, trans_decision ep_re 202.43014619966783

{"global_step": 340000, "eval_re": [526.8517436625326, 132.4564080487017, 
96.99637906745792, 116.93720971716775, 68.14942630754325, 486.84837121514744, 
10.242304303334535, 463.41162510876785, 52.50747511069781, 69.9005194553272], 
"eval_len": [590, 148, 139, 160, 56, 493, 39, 509, 100, 89]}

 35%|███▍      | 349997/1000000 [4:48:30<6:36:57, 27.29it/s]global step 350000, trans_decision ep_re 373.1690502651911

{"global_step": 350000, "eval_re": [485.4442954887954, 755.2490226141597, 
175.3084332567494, 39.91917077548526, 251.92698304936022, 682.6970582893404, 
176.81297799021357, 453.2519797450018, 213.70133210406172, 497.37924933874376], 
"eval_len": [479, 1000, 252, 52, 214, 622, 185, 392, 212, 1000]}

 36%|███▌      | 359998/1000000 [4:56:50<6:28:25, 27.46it/s]global step 360000, trans_decision ep_re 367.6251753504697

{"global_step": 360000, "eval_re": [5.245862238469447, 436.736743933033, 
510.0251189975128, 33.03044567529196, 550.6749837354522, 246.3124701889145, 
381.81630385588335, 325.92703303579896, 728.3219189681137, 458.16087287622776], 
"eval_len": [53, 1000, 616, 65, 719, 201, 413, 350, 1000, 370]}

 37%|███▋      | 369997/1000000 [5:05:10<6:26:31, 27.17it/s]global step 370000, trans_decision ep_re 347.60911531937666

{"global_step": 370000, "eval_re": [297.49822525325345, 168.51153267029426, 
1067.7249674097254, 144.78628332029643, 386.5899032223683, 303.82473465305964, 
402.45954862338243, 79.92364781692247, 590.2062705524247, 34.566039672040226], 
"eval_len": [332, 187, 966, 197, 1000, 276, 1000, 110, 1000, 44]}

 38%|███▊      | 379999/1000000 [5:13:30<6:21:52, 27.06it/s]global step 380000, trans_decision ep_re 362.9242007464086

{"global_step": 380000, "eval_re": [16.498487697241174, 27.646859150916054, 
858.044893231546, 151.75251034796977, 483.0712868776881, 155.65392254321674, 
397.1866542451037, 624.3912305954952, 695.8301633505123, 219.16599942439643], 
"eval_len": [42, 93, 1000, 149, 483, 157, 590, 683, 689, 210]}

 39%|███▉      | 389999/1000000 [5:21:50<6:13:30, 27.22it/s]global step 390000, trans_decision ep_re 306.5624331207399

{"global_step": 390000, "eval_re": [32.6975628524709, 220.33030094090788, 
107.59271543489973, 612.8399831478685, 199.92743325599358, 451.3524547866359, 
194.3526477386522, 668.6644439665639, 554.7037382483968, 23.163050835009773], 
"eval_len": [60, 311, 113, 706, 378, 446, 274, 1000, 663, 26]}

 40%|███▉      | 399999/1000000 [5:30:10<6:08:47, 27.12it/s]global step 400000, trans_decision ep_re 300.74793621347237

{"global_step": 400000, "eval_re": [559.0537898790554, 78.67336920963325, 
111.30586472717319, 318.14275194864405, 596.0005905966492, 401.2444605295963, 
484.0879451264824, 67.64313320369087, 169.95888767864886, 221.36856923515032], 
"eval_len": [642, 98, 86, 328, 652, 1000, 359, 93, 290, 256]}

 41%|████      | 409999/1000000 [5:38:30<6:00:32, 27.27it/s]global step 410000, trans_decision ep_re 227.47389190446302

{"global_step": 410000, "eval_re": [506.12486725290796, 201.96273380905467, 
575.4419729240074, 318.89328147686166, 64.84627747465076, 141.70686071763043, 
114.18629214807603, 35.19705859318571, 105.79669608047871, 210.58287856777687], 
"eval_len": [1000, 154, 604, 368, 91, 187, 96, 61, 103, 321]}

 42%|████▏     | 419999/1000000 [5:46:50<5:54:52, 27.24it/s]global step 420000, trans_decision ep_re 356.67866071101867

{"global_step": 420000, "eval_re": [394.02528834267395, 253.2544355324773, 
694.8568247155849, 948.7533004832045, 134.9256053787169, 12.470360493462744, 
356.6856780303013, 155.03622304658342, 187.1562736317159, 429.62261745546596], 
"eval_len": [405, 247, 599, 1000, 1000, 33, 335, 218, 142, 403]}

 43%|████▎     | 429999/1000000 [5:55:10<5:48:27, 27.26it/s]global step 430000, trans_decision ep_re 441.893150469972

{"global_step": 430000, "eval_re": [477.3177823965615, 869.8023024707494, 
53.243650838099754, 957.954311862987, 753.2517423603655, 183.99211087777678, 
10.968362134077145, 105.67108534342105, 377.7823533795738, 628.9478030361076], 
"eval_len": [540, 1000, 63, 924, 1000, 168, 32, 109, 367, 628]}

 44%|████▍     | 439997/1000000 [6:03:30<5:45:01, 27.05it/s]global step 440000, trans_decision ep_re 237.57159912859908

{"global_step": 440000, "eval_re": [9.79026611023685, 25.63072243101041, 
262.9968907602985, 14.562837863774199, 512.6580256801992, 828.6568174263745, 
64.64291131168116, 138.83931342285666, 186.5392683683781, 331.398937911181], 
"eval_len": [26, 52, 273, 33, 487, 1000, 79, 145, 267, 1000]}

 45%|████▍     | 449999/1000000 [6:11:50<5:39:11, 27.03it/s]global step 450000, trans_decision ep_re 293.11383648339574

{"global_step": 450000, "eval_re": [139.1093592343703, 598.7684817865892, 
125.14980049027918, 302.3959275047756, 28.11032961591631, 497.4552718356983, 
103.39184463826398, 394.9670831517792, 490.80014541101576, 250.99012116526964], 
"eval_len": [192, 635, 157, 358, 39, 1000, 119, 1000, 1000, 234]}

 46%|████▌     | 459998/1000000 [6:20:10<5:24:05, 27.77it/s]global step 460000, trans_decision ep_re 380.2410655888092

{"global_step": 460000, "eval_re": [19.351105254048278, 445.346542873195, 
167.4278814350125, 485.97968915057106, 325.22459079771744, 763.723264799108, 
211.7479009576675, 386.1974827310226, 353.0143820933607, 644.3978157963886], 
"eval_len": [49, 513, 136, 1000, 386, 799, 292, 437, 379, 672]}

 47%|████▋     | 469998/1000000 [6:28:30<5:21:13, 27.50it/s]global step 470000, trans_decision ep_re 573.2193969829152

{"global_step": 470000, "eval_re": [985.5575326870971, 496.2834438746818, 
42.75764670183635, 46.42311970083038, 869.4010999638962, 650.7679033847415, 
889.3419419759606, 953.9390700183351, 783.8566363270265, 13.865575194747512], 
"eval_len": [1000, 1000, 73, 93, 1000, 766, 1000, 1000, 715, 63]}

 48%|████▊     | 479997/1000000 [6:37:00<5:18:56, 27.17it/s]global step 480000, trans_decision ep_re 408.78099644370786

{"global_step": 480000, "eval_re": [320.06783990852165, 534.2856195073709, 
38.17235968794463, 537.1469241475413, 134.25231616098407, 455.9117186113235, 
530.475811531575, 591.7560343357491, 205.63751327685316, 740.1038272692152], 
"eval_len": [470, 648, 44, 699, 138, 474, 1000, 699, 260, 1000]}

 49%|████▉     | 489998/1000000 [6:45:20<5:04:16, 27.94it/s]global step 490000, trans_decision ep_re 173.19257338852518

{"global_step": 490000, "eval_re": [469.1873056999159, 57.51205636380369, 
60.83547835189722, 665.3922804969108, 177.0288716624347, 202.79554397120833, 
-865.7905273045126, 526.2368593346382, 203.30693846386828, 235.42092684508688], 
"eval_len": [416, 67, 113, 719, 150, 157, 1000, 1000, 185, 235]}

 50%|████▉     | 499997/1000000 [6:53:40<5:05:51, 27.25it/s]global step 500000, trans_decision ep_re 167.9327282467662

{"global_step": 500000, "eval_re": [48.80348058684458, 58.424917352478, 
156.4994052389803, 338.16500331620017, 73.12522874785462, 329.05418582902894, 
72.16071986371927, 207.83642138248584, 266.30045481984337, 128.95746533022705], 
"eval_len": [66, 71, 126, 388, 106, 1000, 72, 169, 1000, 1000]}

 51%|█████     | 509999/1000000 [7:02:00<5:01:12, 27.11it/s]global step 510000, trans_decision ep_re 184.80047709467902

{"global_step": 510000, "eval_re": [558.2352861841556, 153.5874426667496, 
69.85319856197928, 77.42413899505264, 259.15228449375576, 15.844409677854282, 
186.8138876609999, 162.91162008540158, 30.040426788252258, 334.1420758325891], 
"eval_len": [1000, 174, 53, 69, 215, 71, 212, 208, 46, 384]}

 52%|█████▏    | 519999/1000000 [7:10:10<4:53:47, 27.23it/s]global step 520000, trans_decision ep_re 407.2045485531306

{"global_step": 520000, "eval_re": [439.1850213517743, 382.9747219030423, 
358.8840938509041, 897.0574506581593, 662.6640559436174, 123.57258799109397, 
537.3653743981752, 420.97897266682867, 190.12342164351588, 59.23978512419494], 
"eval_len": [689, 1000, 298, 1000, 1000, 146, 501, 1000, 202, 80]}

 53%|█████▎    | 529999/1000000 [7:18:40<4:48:54, 27.11it/s]global step 530000, trans_decision ep_re 265.4193612864707

{"global_step": 530000, "eval_re": [110.33625012618388, 398.76991877020424, 
439.40311120634993, 148.58187445057044, 435.17432918865177, 245.8696564299162, 
366.83377400401764, 273.4652691304835, 10.30983795480177, 225.44959160352747], 
"eval_len": [1000, 470, 434, 165, 475, 636, 1000, 748, 30, 1000]}

 54%|█████▍    | 539999/1000000 [7:27:00<4:41:30, 27.23it/s]global step 540000, trans_decision ep_re 340.6992340888208

{"global_step": 540000, "eval_re": [170.04289332758262, 637.1787797346699, 
349.38919257553016, 127.71832855567357, 282.7951154348993, 433.04272806101073, 
37.508218756503084, 231.90998734851638, 451.5179685418927, 685.8891285519295], 
"eval_len": [154, 689, 402, 132, 358, 481, 101, 240, 1000, 814]}

 55%|█████▍    | 549999/1000000 [7:35:20<4:34:29, 27.32it/s]global step 550000, trans_decision ep_re 486.09802323302245

{"global_step": 550000, "eval_re": [477.81060821170854, 522.347795700085, 
921.169394507269, 522.091469557493, 493.5581439988055, 127.8306080861807, 
379.47570292785724, 63.02959183989655, 635.7603230898019, 717.906594411127], 
"eval_len": [462, 617, 774, 456, 428, 161, 329, 101, 1000, 623]}

 56%|█████▌    | 559998/1000000 [7:43:31<4:26:09, 27.55it/s]global step 560000, trans_decision ep_re 150.1973718647027

{"global_step": 560000, "eval_re": [83.11770550569061, 621.2277834480169, 
6.9746085248448395, 52.74392401642864, 90.97864167794596, 39.47227750275938, 
444.8996133882846, -4.928413553636383, 65.54574494841698, 101.94183318827537], 
"eval_len": [91, 490, 31, 96, 115, 88, 364, 42, 99, 166]}

 57%|█████▋    | 569999/1000000 [7:52:00<4:23:09, 27.23it/s]global step 570000, trans_decision ep_re 644.822470149559

{"global_step": 570000, "eval_re": [1027.8511181426456, 756.0753748658831, 
655.0842075377592, 893.6989593180739, 85.91794133174493, 649.2932823219791, 
20.196180918055532, 262.535758955088, 1036.509117705545, 1061.062760398816], 
"eval_len": [1000, 818, 555, 788, 86, 585, 30, 291, 1000, 1000]}

 58%|█████▊    | 579998/1000000 [8:00:20<4:14:18, 27.53it/s]global step 580000, trans_decision ep_re 255.66951624491338

{"global_step": 580000, "eval_re": [177.6151693611677, 68.31423232749522, 
21.18929564487394, 223.93857833034775, 163.24275200700112, 552.7665662051677, 
190.01497278746498, 573.3014656917899, 92.04544635305022, 494.26668374077553], 
"eval_len": [151, 66, 23, 257, 174, 1000, 257, 615, 179, 1000]}

 59%|█████▉    | 589999/1000000 [8:08:40<4:11:49, 27.13it/s]global step 590000, trans_decision ep_re 287.1225191695601

{"global_step": 590000, "eval_re": [411.4145179040944, 152.0579268214292, 
-68.78821877435213, 415.2870072177669, 304.5012557995637, 30.827458327491076, 
242.18707028805986, 699.1589788179822, 184.2142830789478, 500.36491221461796], 
"eval_len": [1000, 179, 1000, 362, 422, 75, 246, 666, 209, 1000]}

 60%|█████▉    | 599998/1000000 [8:17:10<4:01:20, 27.62it/s]global step 600000, trans_decision ep_re 438.6555308338483

{"global_step": 600000, "eval_re": [741.5445858843965, 35.32266545523687, 
304.6158689914558, 75.78106687973117, 170.53028810463363, 161.58714258763078, 
785.7656619647508, 779.3650767929904, 618.9449839481596, 713.0979677294976], 
"eval_len": [1000, 50, 341, 123, 147, 218, 817, 721, 626, 1000]}

 61%|██████    | 609997/1000000 [8:25:30<3:59:54, 27.09it/s]global step 610000, trans_decision ep_re 309.1730850315424

{"global_step": 610000, "eval_re": [148.7802551439928, 160.83739283328666, 
90.1011277459936, 769.2558644717952, 107.51516502141997, 67.33013558574285, 
166.20715166130682, 558.9072791038731, 507.22192308462496, 515.574555663388], 
"eval_len": [207, 133, 97, 1000, 117, 81, 136, 1000, 442, 419]}

 62%|██████▏   | 619999/1000000 [8:33:50<3:51:16, 27.38it/s]global step 620000, trans_decision ep_re 237.43214722064013

{"global_step": 620000, "eval_re": [552.7352763917431, 165.44780728013646, 
26.70627580010457, 36.930315972577496, 114.40104570288881, 364.8536108331063, 
63.74457573349946, 51.436874972173065, 460.436580350372, 537.6291091697999], 
"eval_len": [1000, 168, 49, 59, 119, 378, 101, 121, 1000, 529]}

 63%|██████▎   | 629998/1000000 [8:42:10<3:45:20, 27.37it/s]global step 630000, trans_decision ep_re 184.6230573903853

{"global_step": 630000, "eval_re": [335.8173211400499, 105.58350152036606, 
383.6871073479288, 36.713468590198836, 109.3092517854228, 53.185299070257, 
4.501965551172873, 424.85697173377486, 242.17776902957195, 150.39791813510996], 
"eval_len": [441, 126, 432, 50, 142, 94, 32, 371, 227, 154]}

 64%|██████▍   | 639998/1000000 [8:50:20<3:38:26, 27.47it/s]global step 640000, trans_decision ep_re 270.92560869627835

{"global_step": 640000, "eval_re": [933.8742489727746, 132.01578383142203, 
175.4214380546442, 201.20970073279804, 88.22029725107036, 127.83662018758339, 
228.83287729007063, 12.864443125373235, 686.0843377990424, 122.89633971800481], 
"eval_len": [775, 142, 238, 1000, 110, 280, 258, 14, 603, 157]}

 65%|██████▍   | 649997/1000000 [8:58:40<3:33:54, 27.27it/s]global step 650000, trans_decision ep_re 403.8116129125141

{"global_step": 650000, "eval_re": [448.8875153568654, 243.47248837346186, 
682.2478532207639, 274.63693898760164, 149.83045667854864, 909.5290811483053, 
21.374226688815902, 183.2943469625465, 298.7036404731815, 826.1395812350503], 
"eval_len": [450, 306, 624, 303, 112, 969, 30, 190, 376, 1000]}

 66%|██████▌   | 659997/1000000 [9:07:00<3:27:28, 27.31it/s]global step 660000, trans_decision ep_re 280.08317048533706

{"global_step": 660000, "eval_re": [12.491057288045688, 110.07107179077563, 
677.9590518546433, 827.9551349807969, 114.37974733978595, -78.39225221248287, 
268.02547822016555, 269.2907167553178, 63.788801711289324, 535.2628971250335], 
"eval_len": [38, 204, 1000, 793, 146, 1000, 269, 250, 77, 577]}

 67%|██████▋   | 669997/1000000 [9:15:20<3:24:02, 26.96it/s]global step 670000, trans_decision ep_re 271.73278286152674

{"global_step": 670000, "eval_re": [425.5478259862157, 25.28247114788553, 
233.5502037405723, 143.85959111057204, 524.6893296176443, 101.2825641885542, 
198.99550747266338, 539.4715298070649, 481.9390081657661, 42.70979737832924], 
"eval_len": [509, 58, 240, 201, 1000, 131, 268, 651, 448, 61]}

 68%|██████▊   | 679998/1000000 [9:23:40<3:16:39, 27.12it/s]global step 680000, trans_decision ep_re 436.6524844270959

{"global_step": 680000, "eval_re": [0.6786682560310442, 112.84186538792854, 
822.3172868640185, 310.75475402422455, 484.8803630270628, 786.3555974518034, 
278.28957465016344, 539.867496463225, 562.9795298313012, 467.55970831520085], 
"eval_len": [232, 136, 1000, 398, 542, 1000, 546, 697, 1000, 482]}

 69%|██████▉   | 689999/1000000 [9:32:10<3:10:33, 27.11it/s]global step 690000, trans_decision ep_re 256.907429746591

{"global_step": 690000, "eval_re": [228.49863122150413, 176.4617468657354, 
729.3828476746917, 118.85334148353016, 102.00610490966054, 258.3811269151612, 
227.23252219822976, 99.20749064853682, 215.48913979036828, 413.5613457584919], 
"eval_len": [221, 384, 1000, 164, 98, 258, 260, 1000, 339, 443]}

 70%|██████▉   | 699998/1000000 [9:40:30<3:01:22, 27.57it/s]global step 700000, trans_decision ep_re 445.15546685444554

{"global_step": 700000, "eval_re": [475.8688403097965, 249.2519249700235, 
398.4140486145149, 580.6729875860102, 525.3022932346472, 49.81851439511673, 
725.3216692885022, 746.9456595204945, 324.656158407377, 375.30257221797206], 
"eval_len": [1000, 192, 448, 489, 554, 96, 1000, 856, 590, 420]}

 71%|███████   | 709999/1000000 [9:48:50<2:58:28, 27.08it/s]global step 710000, trans_decision ep_re 544.7734122565153

{"global_step": 710000, "eval_re": [876.8548205247918, 819.8133303872003, 
534.2817418848226, 478.50252983492913, 180.44103482795117, 345.2731525028427, 
790.7115032733828, 748.3740071282776, 161.33722105239457, 512.1447811485584], 
"eval_len": [1000, 875, 555, 1000, 197, 409, 1000, 1000, 192, 605]}

 72%|███████▏  | 719997/1000000 [9:57:20<2:49:31, 27.53it/s]global step 720000, trans_decision ep_re 195.6366869073101

{"global_step": 720000, "eval_re": [524.0063803241878, 60.48417125945925, 
413.4804505041417, 100.22366681441615, 47.258612302103586, 400.94391968921224, 
7.96636520495232, 51.38049510419466, 98.94431555206587, 251.6784923183672], 
"eval_len": [1000, 102, 1000, 127, 111, 330, 33, 76, 156, 188]}

 73%|███████▎  | 729999/1000000 [10:05:30<2:43:38, 27.50it/s]global step 730000, trans_decision ep_re 293.3316719059852

{"global_step": 730000, "eval_re": [52.422367833368654, 80.76624384834578, 
89.61687360858247, 330.8862058687268, 564.3991580193921, 60.745006162216086, 
227.5293539685112, 566.8938921464699, 85.79212786545034, 874.2654897387889], 
"eval_len": [63, 91, 93, 378, 831, 109, 403, 1000, 275, 1000]}

 74%|███████▍  | 739997/1000000 [10:13:50<2:37:53, 27.45it/s]global step 740000, trans_decision ep_re 446.178828345697

{"global_step": 740000, "eval_re": [51.64240675840996, 783.0506768618801, 
862.3353129679429, 36.21915399260083, 482.9536475436304, 138.0594433963895, 
505.0018021730178, 469.40349477697913, 578.6043323761658, 554.5180126099542], 
"eval_len": [75, 717, 1000, 75, 1000, 175, 429, 434, 1000, 565]}

 75%|███████▍  | 749998/1000000 [10:22:10<2:30:53, 27.62it/s]global step 750000, trans_decision ep_re 308.3800397862923

{"global_step": 750000, "eval_re": [214.24651275697386, 538.310309528931, 
312.50462656967636, 107.32020088520076, 712.9906478094774, 576.2840516671519, 
191.2345754461284, 18.00281415531793, 173.08288089863277, 239.82377814543239], 
"eval_len": [268, 668, 378, 208, 1000, 1000, 326, 31, 212, 264]}

 76%|███████▌  | 759999/1000000 [10:30:30<2:26:13, 27.36it/s]global step 760000, trans_decision ep_re 514.63940003545

{"global_step": 760000, "eval_re": [456.5986803537952, 958.0286276108584, 
868.1061955949307, 214.39650281125265, 102.66193240757364, 516.224939159698, 
91.47322153924674, 265.0318723514331, 1031.698693829588, 642.1733346961236], 
"eval_len": [1000, 1000, 919, 1000, 139, 694, 158, 260, 1000, 1000]}

 77%|███████▋  | 769997/1000000 [10:38:50<2:20:19, 27.32it/s]global step 770000, trans_decision ep_re 369.7276182805571

{"global_step": 770000, "eval_re": [346.31035759598166, 366.6991480607052, 
300.77258178348563, 121.04464523699592, 699.3623229455271, 829.6903880183985, 
513.0555290446883, 394.0898912671665, 102.08127587755718, 24.1700429750649], 
"eval_len": [390, 1000, 364, 98, 1000, 1000, 1000, 341, 139, 29]}

 78%|███████▊  | 779998/1000000 [10:47:10<2:12:01, 27.77it/s]global step 780000, trans_decision ep_re 314.21955454386034

{"global_step": 780000, "eval_re": [29.191808555785396, 47.61276808787542, 
87.40372996276612, 787.9473832754211, 660.2940615224655, 179.15174146594129, 
351.3732375016661, 155.90278561620377, 733.5637702229956, 109.75425922748299], 
"eval_len": [84, 88, 146, 1000, 726, 215, 386, 170, 1000, 133]}

 79%|███████▉  | 789998/1000000 [10:55:30<2:06:12, 27.73it/s]global step 790000, trans_decision ep_re 477.7534981108889

{"global_step": 790000, "eval_re": [965.3932248689262, 121.26902192110644, 
338.1316284868454, 252.5610116234078, 272.5435780521478, 296.8437298956232, 
214.46652723218034, 1071.2998797362634, 533.4143195360939, 711.6120597562946], 
"eval_len": [1000, 257, 317, 225, 348, 267, 282, 1000, 517, 695]}

 80%|███████▉  | 799997/1000000 [11:03:50<2:01:40, 27.39it/s]global step 800000, trans_decision ep_re 419.5849277307449

{"global_step": 800000, "eval_re": [944.0105960972559, 74.06851795453544, 
98.27838731144833, 312.09698258099075, 246.15021477105287, 7.805465747629574, 
504.5185671247366, 919.4222952327701, 501.22867558669043, 588.2695749003384], 
"eval_len": [977, 135, 101, 372, 321, 31, 558, 1000, 619, 1000]}

 81%|████████  | 809997/1000000 [11:12:10<1:55:52, 27.33it/s]global step 810000, trans_decision ep_re 273.57758167320327

{"global_step": 810000, "eval_re": [318.086723336925, 83.6626208202592, 
31.719959412732656, 802.7016195426901, 351.51824162467335, 196.144476179195, 
296.7567146022616, 51.78990938408114, 450.9132465785197, 152.4823052506951], 
"eval_len": [350, 85, 51, 669, 304, 190, 295, 59, 504, 179]}

 82%|████████▏ | 819999/1000000 [11:20:20<1:49:54, 27.30it/s]global step 820000, trans_decision ep_re 562.2315957204173

{"global_step": 820000, "eval_re": [628.0274818575775, 1003.7590482105247, 
214.65418231887975, 367.09987822481776, 673.9670494993472, 804.6107061416507, 
553.2306449673324, 422.816727023437, 936.3587085763845, 17.791530384221428], 
"eval_len": [1000, 1000, 327, 328, 782, 1000, 1000, 1000, 1000, 32]}

 83%|████████▎ | 829997/1000000 [11:28:50<1:43:04, 27.49it/s]global step 830000, trans_decision ep_re 399.7263095429619

{"global_step": 830000, "eval_re": [239.99107250949785, 542.2054106187848, 
65.3040128808254, 791.2704297282452, 59.236231127036326, 920.159894013367, 
50.764022323251325, 539.6136251301573, 681.2768904988145, 107.44150659964002], 
"eval_len": [350, 1000, 117, 1000, 109, 1000, 99, 1000, 1000, 175]}

 84%|████████▍ | 839999/1000000 [11:37:10<1:37:47, 27.27it/s]global step 840000, trans_decision ep_re 517.4403200500068

{"global_step": 840000, "eval_re": [538.450310891461, 314.62165432351065, 
712.811409788631, 333.7097436932113, 629.6100307141077, 646.767629702883, 
672.2596075103409, 769.2560847337332, 446.00231492927503, 110.91441421291445], 
"eval_len": [821, 422, 1000, 432, 1000, 676, 1000, 1000, 1000, 121]}

 85%|████████▍ | 849997/1000000 [11:45:40<1:31:03, 27.45it/s]global step 850000, trans_decision ep_re 453.5647859906024

{"global_step": 850000, "eval_re": [757.5440947172569, 164.49984001960127, 
731.6977710123888, 113.42645101047157, 558.1096530337596, 624.7784122673459, 
409.63779052988906, 723.9110214176211, 424.01644144257267, 28.026384455116805], 
"eval_len": [962, 195, 737, 99, 751, 1000, 675, 934, 504, 55]}

 86%|████████▌ | 859998/1000000 [11:54:00<1:24:07, 27.74it/s]global step 860000, trans_decision ep_re 366.0432883124325

{"global_step": 860000, "eval_re": [184.21033430627932, 500.15693063713417, 
358.0054405500724, 1071.6964173505864, -9.939251195070202, 228.3217861970939, 
106.26168397736463, 823.3422062923061, 379.25650682341995, 19.12082818513828], 
"eval_len": [305, 555, 396, 1000, 61, 275, 143, 757, 385, 38]}

 87%|████████▋ | 869999/1000000 [12:02:20<1:18:49, 27.48it/s]global step 870000, trans_decision ep_re 465.995789290844

{"global_step": 870000, "eval_re": [166.08321435200216, 266.5939469578453, 
92.73337003519308, 937.5650833538627, 228.35522514294075, 895.869772104341, 
926.7955775743745, 75.41438972906026, 171.6184955467597, 898.9288181120603], 
"eval_len": [181, 260, 132, 1000, 323, 1000, 1000, 134, 161, 1000]}

 88%|████████▊ | 879999/1000000 [12:10:40<1:13:35, 27.18it/s]global step 880000, trans_decision ep_re 318.46469039915536

{"global_step": 880000, "eval_re": [468.8532827475187, 86.58114310293243, 
63.78075775633068, 228.26481109722116, 947.9248428743169, 142.5600063896966, 
142.6080194481733, 342.04084463507115, 62.15544968259141, 699.8777462577013], 
"eval_len": [1000, 90, 97, 275, 1000, 166, 153, 325, 78, 794]}

 89%|████████▉ | 889999/1000000 [12:18:50<1:06:50, 27.43it/s]global step 890000, trans_decision ep_re 340.3558229673485

{"global_step": 890000, "eval_re": [275.184758038206, 76.29187356836803, 
309.6357675367279, 41.09631772136788, 1109.460734513181, 472.7351667749865, 
438.9760872680327, 140.73658190973552, 119.8660177460811, 419.5749245967986], 
"eval_len": [292, 97, 252, 99, 1000, 427, 473, 118, 146, 499]}

 90%|████████▉ | 899999/1000000 [12:27:10<1:01:05, 27.28it/s]global step 900000, trans_decision ep_re 244.00977709854905

{"global_step": 900000, "eval_re": [448.50630227909386, 244.6755456355699, 
24.076007412346584, 38.057889288521665, 142.93797374080728, 184.90517259490878, 
343.78413782634937, 211.59173923674996, 285.33785865594103, 516.2251443152024], 
"eval_len": [1000, 232, 28, 114, 121, 229, 401, 1000, 1000, 1000]}

 91%|█████████ | 909999/1000000 [12:35:30<54:36, 27.46it/s]global step 910000, trans_decision ep_re 425.22762661381864

{"global_step": 910000, "eval_re": [959.0890125384756, 517.3514878082602, 
336.97924887776685, 379.26043053649784, 256.92949592397656, 263.7018130326752, 
459.9231907393435, 201.29501165418316, 380.72902395391645, 497.017551073091], 
"eval_len": [1000, 1000, 332, 392, 261, 340, 1000, 209, 442, 505]}

 92%|█████████▏| 919997/1000000 [12:43:50<48:45, 27.34it/s]global step 920000, trans_decision ep_re 396.12443918943217

{"global_step": 920000, "eval_re": [909.3486968623919, 22.76781139440585, 
349.8013801743351, 688.2541900096473, 20.776766681751177, 733.6125922121561, 
759.1601773239678, 97.36776215475035, 93.06532168702506, 287.08969339389097], 
"eval_len": [1000, 28, 336, 1000, 53, 709, 889, 103, 120, 316]}

 93%|█████████▎| 929998/1000000 [12:52:10<41:34, 28.06it/s]global step 930000, trans_decision ep_re 323.96836914364746

{"global_step": 930000, "eval_re": [346.33834330302534, 708.9485385889536, 
564.21459442468, 85.66607667350475, 308.5276394008261, 424.63093313535796, 
340.84537124227717, 151.22647862684963, 72.37600310035282, 236.909712940647], 
"eval_len": [391, 766, 584, 122, 383, 1000, 292, 184, 107, 386]}

 94%|█████████▍| 939999/1000000 [13:00:20<36:41, 27.26it/s]global step 940000, trans_decision ep_re 500.2856543085383

{"global_step": 940000, "eval_re": [1025.3541093891772, 674.3838761149166, 
670.4500760996426, 402.7551548587776, 149.19062253218934, 196.89832662485344, 
70.88711126179635, 389.54390823623265, 999.9083858228674, 423.4849721449298], 
"eval_len": [1000, 1000, 608, 1000, 147, 240, 70, 469, 1000, 404]}

 95%|█████████▍| 949998/1000000 [13:08:50<29:49, 27.94it/s]global step 950000, trans_decision ep_re 401.456092017065

{"global_step": 950000, "eval_re": [252.0091198804186, 596.0940621292332, 
81.65956321496212, 901.4811264387671, 419.96705319927304, 113.21655837147091, 
363.33564908854714, 304.23213059054257, 965.2372153872718, 17.32844187016296], 
"eval_len": [220, 663, 95, 1000, 1000, 116, 508, 328, 1000, 22]}

 96%|█████████▌| 959998/1000000 [13:17:00<23:48, 28.00it/s]global step 960000, trans_decision ep_re 420.3631324246543

{"global_step": 960000, "eval_re": [386.63890203285234, 564.0777285418726, 
217.56499937747967, 115.99671760929739, 400.17779059265234, 988.0673419190093, 
85.23388499608447, 438.0140801073256, 937.3264603138589, 70.53341875611024], 
"eval_len": [483, 1000, 258, 155, 453, 1000, 104, 1000, 1000, 95]}

 97%|█████████▋| 969998/1000000 [13:25:20<17:58, 27.83it/s]global step 970000, trans_decision ep_re 526.5157558481945

{"global_step": 970000, "eval_re": [73.41375136560069, 818.2219743950827, 
79.30470034150187, 169.39644412071155, 545.5688427828125, 954.5953969653249, 
874.2655718375693, 763.3790779871302, 81.28663090199895, 905.725167784212], 
"eval_len": [121, 1000, 101, 211, 1000, 1000, 923, 791, 106, 1000]}

 98%|█████████▊| 979997/1000000 [13:33:50<12:07, 27.51it/s]global step 980000, trans_decision ep_re 565.6025100014929

{"global_step": 980000, "eval_re": [536.7811242732223, 433.543293287655, 
425.7376806851718, 991.6892118332682, 891.0889313509921, 371.59823553629235, 
91.35703008878178, 557.2646009621526, 434.2572386523338, 922.7077533450596], 
"eval_len": [490, 441, 467, 1000, 1000, 432, 135, 1000, 490, 1000]}

 99%|█████████▉| 989999/1000000 [13:42:10<06:04, 27.44it/s]global step 990000, trans_decision ep_re 287.2477318171664

{"global_step": 990000, "eval_re": [422.13042207666837, 292.66256695375023, 
480.78413421819664, 167.99018118853337, 450.35154729369225, 225.48544358810096, 
93.53936791345348, 345.79287865110683, 216.60225897707414, 177.13851731108807], 
"eval_len": [586, 269, 1000, 228, 421, 238, 113, 1000, 235, 155]}

100%|█████████▉| 999997/1000000 [13:50:30<00:00, 27.51it/s]global step 1000000, trans_decision ep_re 490.22906671916246

{"global_step": 1000000, "eval_re": [616.279246710599, 954.4307227397527, 
226.42362973699727, 756.7009363821658, 476.64072240428834, 274.91573737083183, 
1073.8387599249268, 313.0619329235116, 101.02322618502379, 108.97575281352721], 
"eval_len": [514, 1000, 243, 624, 439, 254, 894, 357, 145, 137]}

100%|██████████| 1000000/1000000 [13:50:41<00:00, 20.06it/s]
