
{
    'exp_name': 'VDPO',
    'env': 'Hopper-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 4,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)'
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9997/1000000 [02:28<5:35:52, 49.13it/s]global step 10000, trans_decision ep_re 214.5162860511146

{"global_step": 10000, "eval_re": [233.26295946941428, 224.2284030787919, 
60.61114194254857, 232.70943110115226, 231.48114963385896, 233.45628839321833, 
230.54088563133195, 233.51671660739754, 231.2726518341406, 234.0832328192913], 
"eval_len": [106, 100, 37, 104, 104, 105, 104, 105, 104, 106]}

  2%|▏         | 19997/1000000 [07:12<6:53:20, 39.52it/s]global step 20000, trans_decision ep_re 351.76084870765357

{"global_step": 20000, "eval_re": [283.61644396706015, 271.59829433178714, 
486.893032907487, 19.56944113267374, 499.9389882880003, 183.36698283734677, 
493.24636889601817, 421.36644263686975, 280.9948875965872, 577.0176044827052], 
"eval_len": [122, 115, 177, 57, 181, 93, 175, 162, 123, 202]}

  3%|▎         | 29999/1000000 [12:00<7:20:46, 36.68it/s]global step 30000, trans_decision ep_re 816.008856715467

{"global_step": 30000, "eval_re": [26.6503005188759, 235.3064814053323, 
1077.865739241045, 979.9010964264999, 1616.9251462058912, 117.12935253350591, 
1369.7290568938408, 1174.3207201054438, 384.3589596728902, 1177.901714151346], 
"eval_len": [23, 124, 398, 369, 593, 65, 494, 414, 178, 418]}

  4%|▍         | 39995/1000000 [16:33<5:23:48, 49.41it/s]global step 40000, trans_decision ep_re 1153.6034657960802

{"global_step": 40000, "eval_re": [2214.172200998751, 163.78227733448517, 
676.0297701088872, 1693.1618825465628, 1330.7422348810924, 711.3457599006559, 
1761.657368145613, 533.196025889883, 157.0197364737749, 2294.9274016810987], 
"eval_len": [703, 87, 264, 567, 428, 265, 553, 252, 85, 704]}

  5%|▍         | 49997/1000000 [21:17<6:43:18, 39.26it/s]global step 50000, trans_decision ep_re 702.6391054324587

{"global_step": 50000, "eval_re": [1356.795109892024, 1091.549733456513, 
211.66784639408775, 1110.7042264361276, 319.1090732988218, 870.8557492766705, 
1132.0117418236894, 72.05449886734247, 724.1829698073851, 137.4601050719244], 
"eval_len": [438, 362, 101, 365, 158, 279, 370, 44, 244, 81]}

  6%|▌         | 59999/1000000 [26:03<5:08:18, 50.82it/s]global step 60000, trans_decision ep_re 1084.4537366772952

{"global_step": 60000, "eval_re": [629.0209447234831, 429.87869936201724, 
1464.179580734285, 172.28013665296606, 557.2744848864158, 875.0959428742744, 
2656.626724417851, 2158.104869244411, 421.2758253707093, 1480.8001585065383], 
"eval_len": [221, 186, 534, 90, 220, 331, 943, 704, 171, 577]}

  7%|▋         | 69995/1000000 [30:15<4:48:14, 53.78it/s]global step 70000, trans_decision ep_re 1044.6770332676017

{"global_step": 70000, "eval_re": [949.740543569227, 2466.355856248606, 
353.1210805643961, 1090.7844554313979, 327.22930707995107, 991.3087951347469, 
1568.121118831361, 467.50532857028827, 1210.035174605684, 1022.5686726403575], 
"eval_len": [318, 835, 148, 358, 140, 347, 547, 191, 396, 351]}

  8%|▊         | 79997/1000000 [34:33<4:45:35, 53.69it/s]global step 80000, trans_decision ep_re 882.7635939130972

{"global_step": 80000, "eval_re": [143.8751562913637, 638.0629271574358, 
2944.36786055474, 580.4445218466516, 917.5684090173328, 662.9468895990559, 
300.9879858275974, 181.7178555497058, 880.2663176391615, 1577.3980156479277], 
"eval_len": [73, 225, 857, 202, 319, 227, 131, 92, 293, 519]}

  9%|▉         | 89994/1000000 [38:43<4:43:47, 53.44it/s]global step 90000, trans_decision ep_re 1628.83166260997

{"global_step": 90000, "eval_re": [954.8204093978583, 1458.5770538736676, 
2730.5387107324495, 2359.0170447372343, 1813.2087673082797, 3216.1352105225806, 
1512.8525871788731, 110.85832878733976, 1168.1654216621903, 964.1430918992285], 
"eval_len": [308, 462, 827, 733, 576, 1000, 466, 59, 393, 346]}

 10%|▉         | 99996/1000000 [42:54<4:38:35, 53.84it/s]global step 100000, trans_decision ep_re 1166.8663704361675

{"global_step": 100000, "eval_re": [1630.4654562563994, 906.8021134006157, 
945.3986127861692, 419.70958367052816, 1305.3974225571665, 941.5666173127621, 
1638.8727932891652, 178.20815004668222, 3305.5357060398383, 396.70724900234757],
"eval_len": [501, 283, 297, 165, 397, 313, 514, 91, 1000, 154]}

 11%|█         | 109995/1000000 [47:04<4:36:00, 53.74it/s]global step 110000, trans_decision ep_re 950.7253777474525

{"global_step": 110000, "eval_re": [961.2581615916804, 1129.3466751866756, 
1032.6286365008762, 186.9627045758847, 1239.138077264178, 1287.4823298718568, 
1283.1108134921953, 1228.5666254933274, 184.75242700403697, 974.0073264938144], 
"eval_len": [300, 347, 321, 93, 358, 367, 367, 358, 93, 300]}

 12%|█▏        | 119996/1000000 [51:06<4:36:46, 52.99it/s]global step 120000, trans_decision ep_re 1300.9930390107586

{"global_step": 120000, "eval_re": [867.2447988787134, 2042.9066342181486, 
2008.547118651449, 336.96207694125764, 2993.83198428841, 2494.0737486726653, 
219.2578337175451, 1285.7795351576124, 170.67265459710023, 590.6540049846863], 
"eval_len": [270, 626, 649, 141, 903, 772, 102, 417, 88, 213]}

 13%|█▎        | 129996/1000000 [55:24<4:29:18, 53.84it/s]global step 130000, trans_decision ep_re 987.0256698307865

{"global_step": 130000, "eval_re": [602.4501854878214, 1461.9960672218028, 
641.5831529632136, 379.37669110605106, 63.58308776765468, 1393.96789417962, 
2770.2900662892084, 666.1842354344482, 943.8159507272281, 947.0093671308161], 
"eval_len": [213, 445, 214, 152, 42, 436, 812, 241, 295, 302]}

 14%|█▍        | 139997/1000000 [59:38<4:26:49, 53.72it/s]global step 140000, trans_decision ep_re 1178.285693774367

{"global_step": 140000, "eval_re": [1237.2874728151567, 1762.9707151390933, 
657.8314612363733, 1210.8986091859413, 1457.2583432494102, 1739.077739772806, 
650.1968295710597, 1213.7491402961787, 932.0632634959721, 921.5233629816784], 
"eval_len": [362, 488, 228, 364, 458, 545, 226, 375, 294, 291]}

 15%|█▍        | 149999/1000000 [1:03:49<4:24:51, 53.49it/s]global step 150000, trans_decision ep_re 1042.9525185611417

{"global_step": 150000, "eval_re": [1234.5457546370496, 32.951195422570244, 
1063.3731060762407, 1238.1140195837322, 1330.0284308370674, 1279.337850717286, 
1203.4288750106175, 1784.4908831693847, 1100.1697092194693, 163.0853609379989], 
"eval_len": [381, 24, 316, 378, 389, 360, 345, 511, 335, 81]}

 16%|█▌        | 159994/1000000 [1:07:55<4:20:32, 53.73it/s]global step 160000, trans_decision ep_re 1287.843527667164

{"global_step": 160000, "eval_re": [130.55003565687375, 3505.076611499173, 
2122.83269189107, 122.45885075069346, 2855.3012285946347, 428.09524401479416, 
381.119625316886, 44.87303091496426, 2937.5489335010416, 350.57902453150774], 
"eval_len": [68, 1000, 617, 65, 821, 151, 146, 30, 831, 138]}

 17%|█▋        | 169995/1000000 [1:12:02<4:16:56, 53.84it/s]global step 170000, trans_decision ep_re 500.7364466940461

{"global_step": 170000, "eval_re": [643.7242330328816, 933.9606059873012, 
82.08162951162382, 1318.5088634541803, 209.70078307641347, 188.26175037888518, 
165.57353953935666, 1188.1293403907848, 96.88002017290142, 180.54370139613314], 
"eval_len": [217, 292, 50, 387, 106, 93, 79, 364, 54, 82]}

 18%|█▊        | 179996/1000000 [1:16:14<4:14:33, 53.69it/s]global step 180000, trans_decision ep_re 930.5715952133787

{"global_step": 180000, "eval_re": [386.72050126853105, 661.0828568674917, 
953.1550793573257, 811.0157791355506, 26.778474738994436, 1517.3576706037652, 
1739.6850902914946, 1850.026685577542, 418.84134441268753, 941.0524698804046], 
"eval_len": [147, 216, 295, 262, 22, 463, 524, 546, 175, 292]}

 19%|█▉        | 189995/1000000 [1:20:10<4:10:31, 53.89it/s]global step 190000, trans_decision ep_re 646.6888045833973

{"global_step": 190000, "eval_re": [300.14038675793427, 547.2727895456217, 
944.811319951047, 1237.055749353238, 1256.0730832961638, 949.1251222358962, 
51.43984845899362, 807.141540664316, 163.12154537633094, 210.70666019443243], 
"eval_len": [129, 208, 295, 393, 382, 298, 36, 266, 81, 95]}

 20%|█▉        | 199997/1000000 [1:24:19<7:22:47, 30.11it/s]global step 200000, trans_decision ep_re 544.166803842738

{"global_step": 200000, "eval_re": [33.920714788124286, 623.3467749614832, 
997.1576602752026, 47.66511614280851, 1074.5289634474514, 125.41145581410693, 
962.7205735340181, 972.6319050373326, 179.03609261592766, 425.24878181092373], 
"eval_len": [32, 215, 313, 34, 302, 64, 310, 300, 89, 165]}

 21%|██        | 209997/1000000 [1:28:28<4:05:43, 53.58it/s]global step 210000, trans_decision ep_re 1029.7365469109068

{"global_step": 210000, "eval_re": [187.39058030211064, 2556.6569879626595, 
959.66877343178, 977.2724850068983, 1467.7660635882946, 662.7756130487785, 
175.86186902537193, 972.7381457788674, 2218.107496828026, 119.12745413628157], 
"eval_len": [90, 704, 306, 315, 403, 230, 89, 309, 629, 64]}

 22%|██▏       | 219999/1000000 [1:32:33<4:01:17, 53.88it/s]global step 220000, trans_decision ep_re 1056.243706984117

{"global_step": 220000, "eval_re": [1325.8015665654677, 312.54103011070737, 
1268.6111225418658, 1586.560699401352, 1295.4175316436065, 755.746575717302, 
960.2986138598785, 1007.2737539531374, 1400.4774217088675, 649.7087543389854], 
"eval_len": [366, 127, 354, 413, 344, 248, 295, 306, 379, 219]}

 23%|██▎       | 229994/1000000 [1:36:40<3:59:27, 53.59it/s]global step 230000, trans_decision ep_re 680.4735683809486

{"global_step": 230000, "eval_re": [705.1411475361126, 384.90376145547503, 
1029.8688690760507, 154.58536240533925, 269.2135922703257, 1374.3172105228177, 
478.74896254010656, 711.2700217097555, 379.4425273523729, 1317.2442289411288], 
"eval_len": [267, 150, 315, 76, 120, 384, 180, 258, 151, 398]}

 24%|██▍       | 239995/1000000 [1:40:51<3:58:52, 53.03it/s]global step 240000, trans_decision ep_re 1350.642120588805

{"global_step": 240000, "eval_re": [1054.44407272065, 1568.2816898233266, 
1295.6945669727743, 1569.2556454177516, 1300.6638775359513, 1513.938007195115, 
1386.825426899431, 1965.9526029017243, 151.63806232510692, 1699.7272540962213], 
"eval_len": [324, 473, 362, 419, 366, 406, 382, 533, 74, 488]}

 25%|██▍       | 249996/1000000 [1:45:17<4:28:43, 46.52it/s]global step 250000, trans_decision ep_re 997.8770909816687

{"global_step": 250000, "eval_re": [456.4263693917997, 1285.736352662604, 
1462.871971377111, 1506.985732097791, 1211.5597724914808, 710.1663937096334, 
1336.8374157026687, 1426.0355063058514, 277.6877997364006, 304.46359634134524], 
"eval_len": [183, 383, 412, 420, 382, 234, 371, 402, 117, 127]}

 26%|██▌       | 259995/1000000 [1:49:13<3:48:55, 53.87it/s]global step 260000, trans_decision ep_re 1021.0628917876962

{"global_step": 260000, "eval_re": [1330.4087183378076, 954.8315490476908, 
751.8422126262637, 1438.1379139800167, 505.64659344916953, 448.15404418689474, 
1564.8244481367742, 360.3225829252972, 1620.0092903015056, 1236.4515648855415], 
"eval_len": [361, 311, 248, 410, 183, 170, 416, 143, 432, 353]}

 27%|██▋       | 269995/1000000 [1:53:19<3:47:45, 53.42it/s]global step 270000, trans_decision ep_re 878.9559449823737

{"global_step": 270000, "eval_re": [1264.375138507095, 824.8709561011237, 
1106.7044875977133, 361.01512129503845, 939.9609674088334, 1281.0856977238302, 
601.4272710480759, 407.8992430523506, 651.8023757705454, 1350.4181913191305], 
"eval_len": [386, 248, 299, 142, 264, 371, 205, 155, 218, 370]}

 28%|██▊       | 279996/1000000 [1:57:24<3:46:32, 52.97it/s]global step 280000, trans_decision ep_re 823.4893442528158

{"global_step": 280000, "eval_re": [1674.1852428124655, 957.7054125889459, 
1260.9660291059163, 970.8145951942722, 389.4046894338733, 384.70712396657, 
389.728206775211, 961.8720465599675, 958.8671964056015, 286.64289968533586], 
"eval_len": [455, 295, 361, 303, 147, 148, 148, 274, 300, 122]}

 29%|██▉       | 289996/1000000 [2:01:29<3:39:01, 54.03it/s]global step 290000, trans_decision ep_re 1522.9542749088766

{"global_step": 290000, "eval_re": [1081.3387063554792, 403.695175449666, 
711.533822989873, 428.78276111754775, 1845.240876655733, 2860.7379649439886, 
1076.232527454278, 985.0255009672868, 2994.658278586577, 2842.2971345683363], 
"eval_len": [335, 154, 240, 150, 482, 821, 340, 309, 871, 812]}

 30%|██▉       | 299998/1000000 [2:05:37<3:36:29, 53.89it/s]global step 300000, trans_decision ep_re 1465.685919936982

{"global_step": 300000, "eval_re": [398.8063852600992, 2379.5377176744614, 
1556.4684737870723, 398.93352334474025, 2175.6326616357333, 3071.6741296608366, 
1833.946955280075, 1012.1202700921403, 414.06828023076037, 1415.6708024039012], 
"eval_len": [138, 604, 463, 149, 645, 876, 549, 318, 154, 438]}

 31%|███       | 309994/1000000 [2:09:43<3:33:16, 53.92it/s]global step 310000, trans_decision ep_re 781.5042618537872

{"global_step": 310000, "eval_re": [665.9616158464445, 702.5511425945332, 
774.3415699629044, 2060.363522562261, 270.02059846905127, 770.589303521232, 
177.46343911268355, 379.57349267777107, 1581.737134959241, 432.4407988317502], 
"eval_len": [222, 234, 245, 579, 112, 241, 88, 143, 446, 164]}

 32%|███▏      | 319995/1000000 [2:13:57<3:30:15, 53.90it/s]global step 320000, trans_decision ep_re 1154.6224450690204

{"global_step": 320000, "eval_re": [404.1121446063449, 1624.0545765666918, 
377.65413805062605, 1527.080265793302, 2572.8969209331185, 988.2759607076324, 
17.809068553411443, 1556.7300634721644, 1698.1264867010932, 779.4848253058213], 
"eval_len": [152, 431, 146, 408, 682, 305, 28, 422, 457, 247]}

 33%|███▎      | 329995/1000000 [2:17:49<3:27:39, 53.77it/s]global step 330000, trans_decision ep_re 1380.3255114082822

{"global_step": 330000, "eval_re": [1028.222460219921, 2608.667687102249, 
1904.3572751147406, 1969.3873093048974, 185.02567726787544, 176.639468285424, 
1695.749222327002, 1661.735323525929, 1203.873371583849, 1369.597319350934], 
"eval_len": [320, 710, 505, 552, 90, 87, 449, 457, 347, 394]}

 34%|███▍      | 339997/1000000 [2:22:07<3:24:07, 53.89it/s]global step 340000, trans_decision ep_re 981.710958488469

{"global_step": 340000, "eval_re": [1035.6290753861315, 377.01743631170757, 
671.8235762505607, 458.1404037599419, 1910.1261371989535, 1482.1641865417664, 
700.4266424370556, 1500.9741667789904, 1014.2271844776031, 666.5807757419806], 
"eval_len": [319, 144, 233, 177, 534, 460, 249, 471, 314, 228]}

 35%|███▍      | 349994/1000000 [2:25:57<3:19:50, 54.21it/s]global step 350000, trans_decision ep_re 1016.2657376422209

{"global_step": 350000, "eval_re": [1133.7905631136198, 986.938105400659, 
1376.8488213873127, 797.1491968916818, 1674.8635378398362, 1469.9478477615378, 
610.781565212713, 422.03283350165924, 646.4658346912775, 1043.8390706219104], 
"eval_len": [333, 276, 389, 257, 438, 391, 207, 154, 203, 311]}

 36%|███▌      | 359996/1000000 [2:30:01<3:20:05, 53.31it/s]global step 360000, trans_decision ep_re 721.6412031608317

{"global_step": 360000, "eval_re": [377.3486226456971, 263.56027279545106, 
1596.465580779399, 1305.5145162540084, 304.26494048347433, 90.7410165542531, 
1017.4280239526561, 182.60904347341113, 1070.1733046820707, 1008.3067099878951],
"eval_len": [138, 110, 411, 381, 118, 52, 310, 88, 326, 286]}

 37%|███▋      | 369998/1000000 [2:34:03<3:15:21, 53.75it/s]global step 370000, trans_decision ep_re 684.7270790701393

{"global_step": 370000, "eval_re": [2290.4054758969187, 192.28507490681335, 
424.47676086008823, 919.5580364575052, 1374.032840171469, 498.045196349799, 
503.19156322219425, 442.4388255238714, 84.96745279944422, 117.86956451329083], 
"eval_len": [582, 91, 160, 289, 423, 163, 164, 165, 48, 70]}

 38%|███▊      | 379999/1000000 [2:38:04<3:12:56, 53.56it/s]global step 380000, trans_decision ep_re 962.2367192596915

{"global_step": 380000, "eval_re": [764.90943823355, 1334.9575185153997, 
188.2818254675895, 622.6060223451137, 594.7669017791213, 1246.8332695937445, 
1409.4056629997135, 1414.551758665872, 189.78933890545423, 1856.2654560913568], 
"eval_len": [224, 412, 90, 207, 199, 327, 371, 378, 89, 514]}

 39%|███▉      | 389995/1000000 [2:42:08<3:09:37, 53.62it/s]global step 390000, trans_decision ep_re 917.9116087936396

{"global_step": 390000, "eval_re": [1063.7442873528285, 1579.3814107766796, 
21.102063899703886, 400.6012936024608, 1917.858721096954, 1017.6799069672858, 
1091.3404252742394, 356.44555756149174, 137.06826873390793, 1593.894152670845], 
"eval_len": [293, 440, 19, 148, 536, 279, 321, 136, 69, 413]}

 40%|███▉      | 399996/1000000 [2:46:10<3:05:37, 53.87it/s]global step 400000, trans_decision ep_re 834.3146374240841

{"global_step": 400000, "eval_re": [691.5277627201245, 887.6532463378521, 
1020.7279819758247, 429.88090085706136, 1018.6641431423786, 430.7964220271884, 
967.0953294160134, 823.8532156163784, 1202.2665336501282, 870.6808384978914], 
"eval_len": [223, 260, 312, 161, 317, 158, 303, 244, 322, 252]}

 41%|████      | 409998/1000000 [2:50:13<3:02:55, 53.76it/s]global step 410000, trans_decision ep_re 585.537918551569

{"global_step": 410000, "eval_re": [431.87892608665953, 1075.4888891292037, 
718.7702629919523, 465.6875126481703, 155.44066665738674, 401.19647740975, 
111.3006317089404, 1156.4223006412963, 1161.7253213111383, 177.46819693119207], 
"eval_len": [153, 317, 232, 164, 74, 146, 61, 305, 311, 83]}

 42%|████▏     | 419994/1000000 [2:54:14<2:59:34, 53.83it/s]global step 420000, trans_decision ep_re 1078.255440180781

{"global_step": 420000, "eval_re": [1203.1710324369774, 1798.891191523766, 
1059.253711734486, 18.773500736758592, 1205.5022315078372, 1722.2893323667388, 
684.7173181914704, 1111.0461597344713, 431.4634594436897, 1547.4464641316163], 
"eval_len": [350, 480, 282, 19, 316, 470, 221, 303, 154, 413]}

 43%|████▎     | 429996/1000000 [2:58:17<2:57:33, 53.50it/s]global step 430000, trans_decision ep_re 636.1844277105668

{"global_step": 430000, "eval_re": [283.50027493703067, 412.6069579914522, 
649.445747377069, 724.6942349508093, 717.4425358146988, 1896.577286079571, 
979.875318819543, 329.93190315018916, 179.33768126651438, 188.4323367187894], 
"eval_len": [120, 150, 219, 235, 233, 496, 301, 129, 85, 88]}

 44%|████▍     | 439998/1000000 [3:02:18<2:53:18, 53.85it/s]global step 440000, trans_decision ep_re 811.4842684679213

{"global_step": 440000, "eval_re": [727.6472416354521, 1200.3614186333534, 
597.8460226115253, 1043.2915891558728, 5.352136977367347, 1493.642485432234, 
709.2556674855174, 670.5644400358867, 1225.530821770916, 441.3508609410885], 
"eval_len": [227, 336, 179, 315, 19, 404, 232, 228, 324, 163]}

 45%|████▍     | 449994/1000000 [3:06:21<2:50:49, 53.66it/s]global step 450000, trans_decision ep_re 583.7370352189233

{"global_step": 450000, "eval_re": [363.4596130097734, 1053.1539251753122, 
373.5975846065278, 137.55666198887596, 389.9671045424271, 561.9196448684924, 
177.82169367680348, 1048.2863398367183, 687.0978762963657, 1044.509908187936], 
"eval_len": [139, 300, 135, 69, 146, 213, 82, 281, 223, 311]}

 46%|████▌     | 459996/1000000 [3:10:22<2:48:01, 53.56it/s]global step 460000, trans_decision ep_re 799.5398959375473

{"global_step": 460000, "eval_re": [1272.0910070163832, 543.5406855661189, 
1779.0926348738662, 20.742937363405044, 1062.5804634309661, 436.76428773532126, 
158.42374421672733, 1458.9985526698354, 1101.0780556937896, 162.0865908090592], 
"eval_len": [356, 190, 474, 20, 316, 158, 74, 381, 328, 76]}

 47%|████▋     | 469996/1000000 [3:14:25<2:46:30, 53.05it/s]global step 470000, trans_decision ep_re 744.9675505566372

{"global_step": 470000, "eval_re": [739.1497025230394, 188.41994487979684, 
1016.0683195860182, 8.741718257109799, 1044.3836468367092, 816.5044860361544, 
388.31007879738206, 921.7390509775074, 1322.4609779451519, 1003.8975797275026], 
"eval_len": [215, 87, 289, 12, 276, 232, 132, 255, 341, 270]}

 48%|████▊     | 479995/1000000 [3:18:27<2:42:12, 53.43it/s]global step 480000, trans_decision ep_re 543.11624300559

{"global_step": 480000, "eval_re": [16.805672225272367, 438.9269117027436, 
1312.6952101300885, 88.21368636738259, 687.9678984756489, 166.23100593927714, 
1597.5611630004023, 330.9046435639717, 62.72184597515101, 729.1343926759631], 
"eval_len": [17, 156, 375, 50, 219, 76, 429, 128, 42, 233]}

 49%|████▉     | 489995/1000000 [3:22:28<2:39:36, 53.26it/s]global step 490000, trans_decision ep_re 837.3197342003389

{"global_step": 490000, "eval_re": [447.1633878410599, 1421.2524515095754, 
1121.9927361932785, 1441.9550360847918, 10.969191807660131, 679.7801769339475, 
1605.3918663232362, 1001.7432588636311, 179.13425379230895, 463.8149826538982], 
"eval_len": [162, 388, 310, 375, 15, 222, 424, 301, 84, 167]}

 50%|████▉     | 499995/1000000 [3:26:32<2:36:41, 53.18it/s]global step 500000, trans_decision ep_re 1171.237583416891

{"global_step": 500000, "eval_re": [710.5619739583669, 906.8363910294329, 
1312.0710963486306, 628.7271887597072, 1862.6957357225456, 2397.178084233653, 
717.4308865402954, 1780.9049574700362, 1034.5114545211156, 361.4580655851284], 
"eval_len": [225, 250, 367, 204, 520, 630, 235, 466, 277, 135]}

 51%|█████     | 509997/1000000 [3:30:48<2:31:46, 53.81it/s]global step 510000, trans_decision ep_re 763.4541214944545

{"global_step": 510000, "eval_re": [16.94393201932458, 2000.1749054139527, 
677.6331112026055, 135.10647720104737, 683.0886555929889, 1285.2128036525883, 
975.8392513085582, 983.7203261331911, 863.9206290981481, 12.901123322141071], 
"eval_len": [17, 514, 221, 67, 218, 365, 285, 278, 247, 15]}

 52%|█████▏    | 519994/1000000 [3:34:39<2:28:06, 54.01it/s]global step 520000, trans_decision ep_re 1199.7411313166074

{"global_step": 520000, "eval_re": [2234.719639846255, 1209.3237889123702, 
718.2795630918068, 5.179573741802005, 181.39745827896826, 1889.5528278505155, 
1583.2606420188142, 2231.3441927181925, 766.2079257486812, 1178.1457009586663], 
"eval_len": [579, 330, 226, 15, 84, 465, 402, 578, 223, 307]}

 53%|█████▎    | 529996/1000000 [3:38:44<2:25:38, 53.79it/s]global step 530000, trans_decision ep_re 602.6680109182481

{"global_step": 530000, "eval_re": [1312.795318128047, 660.6995169279928, 
187.14605163380756, 178.1002157165975, 1047.395332163046, 370.61341084958707, 
450.97712113465315, 657.2211092754608, 981.0067561028056, 180.72527725048332], 
"eval_len": [366, 215, 87, 84, 282, 127, 156, 213, 269, 83]}

 54%|█████▍    | 539997/1000000 [3:42:46<2:24:33, 53.03it/s]global step 540000, trans_decision ep_re 831.3329567458537

{"global_step": 540000, "eval_re": [50.470535898368865, 73.70805414203251, 
921.9647923243746, 21.02186799466906, 159.73667194834525, 689.2377844279231, 
717.60887550491, 3288.373734582932, 1027.4763707545571, 1363.7308798804243], 
"eval_len": [31, 43, 284, 20, 76, 221, 227, 903, 321, 391]}

 55%|█████▍    | 549998/1000000 [3:46:50<2:19:36, 53.72it/s]global step 550000, trans_decision ep_re 741.024351939496

{"global_step": 550000, "eval_re": [449.38056432585364, 978.907146712736, 
11.082602590835704, 677.3972611640401, 389.86060037246205, 2067.5260522260555, 
341.1446999784413, 727.7273080308149, 1345.6054172556132, 421.6118667381076], 
"eval_len": [160, 310, 15, 222, 144, 555, 129, 228, 372, 147]}

 56%|█████▌    | 559994/1000000 [3:50:51<2:15:33, 54.10it/s]global step 560000, trans_decision ep_re 848.8785289649035

{"global_step": 560000, "eval_re": [853.594375470268, 1260.0190365488727, 
2060.169228814602, 1011.593120816084, 509.86613098026805, 441.295524938934, 
386.1069825076989, 989.3058665877933, 234.1854142934703, 742.6496086910445], 
"eval_len": [264, 361, 528, 324, 174, 153, 140, 261, 99, 234]}

 57%|█████▋    | 569996/1000000 [3:54:54<2:12:49, 53.95it/s]global step 570000, trans_decision ep_re 939.8108122804639

{"global_step": 570000, "eval_re": [784.4916437618408, 1349.5455532779965, 
1751.6252282417504, 698.0562125016785, 422.5921363151011, 171.85907201720497, 
78.58444658150391, 11.07828010860151, 929.451416345126, 3200.8241336538367], 
"eval_len": [236, 348, 482, 214, 150, 78, 49, 15, 290, 802]}

 58%|█████▊    | 579998/1000000 [3:58:56<2:09:36, 54.01it/s]global step 580000, trans_decision ep_re 955.9982132868909

{"global_step": 580000, "eval_re": [680.3434947383346, 167.85264972950594, 
1326.3752404866736, 1935.46922558746, 143.12641391099842, 106.54086904064532, 
856.1194076884314, 1451.0790196224177, 1047.478365233692, 1845.5974468307493], 
"eval_len": [220, 78, 370, 479, 68, 56, 277, 364, 296, 454]}

 59%|█████▉    | 589999/1000000 [4:03:08<2:07:15, 53.70it/s]global step 590000, trans_decision ep_re 954.6715082893876

{"global_step": 590000, "eval_re": [116.48413107070813, 801.1886705403065, 
1996.6565587463124, 3611.957109784859, 140.30456530405377, 838.2096339264423, 
537.2763578557079, 344.7200597879929, 305.1055983043662, 854.8123975731276], 
"eval_len": [59, 258, 601, 981, 68, 274, 188, 129, 118, 276]}

 60%|█████▉    | 599996/1000000 [4:07:06<2:24:01, 46.29it/s]global step 600000, trans_decision ep_re 934.198970538802

{"global_step": 600000, "eval_re": [896.6393088528218, 1259.7642625007445, 
457.43708203714345, 109.71905820659144, 118.05992268098174, 1438.4110330467436, 
672.168621210175, 2015.5149075713412, 1304.4517758509803, 1069.8237334304954], 
"eval_len": [251, 357, 160, 57, 59, 372, 210, 497, 336, 285]}

 61%|██████    | 609997/1000000 [4:11:19<2:00:28, 53.95it/s]global step 610000, trans_decision ep_re 1493.619302789164

{"global_step": 610000, "eval_re": [941.4067139194959, 898.9839923509683, 
2273.176139742464, 678.1289183498471, 1352.960535761512, 2705.5484089411843, 
2686.0057802123847, 193.42804705420338, 3017.528434202024, 189.02605735755566], 
"eval_len": [289, 252, 640, 219, 402, 703, 731, 86, 843, 85]}

 62%|██████▏   | 619996/1000000 [4:15:29<1:57:55, 53.70it/s]global step 620000, trans_decision ep_re 990.7225942538851

{"global_step": 620000, "eval_re": [355.2083909815583, 2484.7896893945785, 
1820.3535712132118, 900.4152695549203, 695.7788369354062, 1906.5260924688089, 
882.6886597312281, 33.21261152278861, 103.9207022302464, 724.3321185061038], 
"eval_len": [134, 733, 541, 287, 221, 571, 262, 30, 70, 231]}

 63%|██████▎   | 629996/1000000 [4:19:20<1:54:17, 53.95it/s]global step 630000, trans_decision ep_re 1249.76156259065

{"global_step": 630000, "eval_re": [1557.4158525611074, 1908.7022349755018, 
408.1552848812266, 531.1113426721369, 2177.29044344226, 1564.0268165493665, 
959.2229507155962, 1309.4310774964977, 1362.841555232286, 719.4180673805221], 
"eval_len": [430, 507, 140, 171, 547, 386, 281, 381, 361, 227]}

 64%|██████▍   | 639997/1000000 [4:23:24<1:50:31, 54.29it/s]global step 640000, trans_decision ep_re 931.5178287972624

{"global_step": 640000, "eval_re": [2088.1463872015815, 183.51370902574598, 
623.6048182521879, 883.2483388204647, 794.8667378298494, 944.5731255704799, 
1073.7331170921639, 1446.7258561281471, 902.4172353285337, 374.3489627234687], 
"eval_len": [587, 80, 199, 276, 251, 274, 315, 368, 270, 134]}

 65%|██████▍   | 649999/1000000 [4:27:39<1:48:31, 53.76it/s]global step 650000, trans_decision ep_re 780.5809724194421

{"global_step": 650000, "eval_re": [1472.1734405110954, 58.23941630661996, 
139.32428413504758, 1017.059470530284, 159.62056601262887, 61.29544038391871, 
1776.9034337354603, 1157.0021911518386, 676.6864022923019, 1287.5050791352253], 
"eval_len": [361, 41, 67, 305, 73, 42, 518, 295, 207, 324]}

 66%|██████▌   | 659996/1000000 [4:31:31<1:45:48, 53.56it/s]global step 660000, trans_decision ep_re 677.5362482285915

{"global_step": 660000, "eval_re": [11.240279152233743, 1269.4789930358393, 
70.79131852303217, 1015.037466086036, 61.88736350925798, 1525.0772764793646, 
149.90546453652343, 1866.977796515686, 363.85256858936003, 441.113955858582], 
"eval_len": [15, 358, 46, 293, 47, 372, 70, 470, 130, 148]}

 67%|██████▋   | 669997/1000000 [4:35:31<1:41:25, 54.22it/s]global step 670000, trans_decision ep_re 583.8403646194713

{"global_step": 670000, "eval_re": [704.4555819168487, 818.6116016532801, 
920.410080184742, 145.2149829671548, 1457.457024217663, 347.07342038163034, 
524.311379556759, 140.37225420389308, 218.31677469135764, 562.1805464213845], 
"eval_len": [209, 229, 288, 69, 431, 129, 176, 68, 94, 186]}

 68%|██████▊   | 679998/1000000 [4:39:32<1:38:52, 53.94it/s]global step 680000, trans_decision ep_re 512.1482152529535

{"global_step": 680000, "eval_re": [679.7068371113542, 290.9446206779822, 
448.053795324844, 931.7599151280168, 279.6762871240011, 957.9326873242742, 
113.85282985384181, 568.2083838101547, 746.8664422956773, 104.48035387938809], 
"eval_len": [210, 110, 158, 267, 106, 279, 58, 184, 235, 54]}

 69%|██████▉   | 689998/1000000 [4:43:32<1:35:19, 54.20it/s]global step 690000, trans_decision ep_re 557.7623268652445

{"global_step": 690000, "eval_re": [61.78993249720381, 60.87605987871105, 
1448.7674472771594, 357.8177409916945, 243.20466514318662, 394.44710062748544, 
351.1270416987548, 1705.0668114501968, 136.1679624585779, 818.3585066294748], 
"eval_len": [43, 42, 409, 128, 100, 142, 125, 444, 66, 258]}

 70%|██████▉   | 699999/1000000 [4:47:32<1:32:52, 53.84it/s]global step 700000, trans_decision ep_re 456.78272563874697

{"global_step": 700000, "eval_re": [74.381384564746, 69.80976664148567, 
1587.7732398578949, 718.9850139298703, 413.11652195164356, 70.91227562832499, 
420.00037761013044, 135.1438977621343, 406.80690874682637, 670.897869694413], 
"eval_len": [48, 46, 447, 217, 143, 46, 144, 65, 138, 207]}

 71%|███████   | 709995/1000000 [4:51:32<1:29:53, 53.77it/s]global step 710000, trans_decision ep_re 908.2462188158943

{"global_step": 710000, "eval_re": [84.9747943550649, 398.158019946242, 
1333.8071590896338, 1362.5862406574636, 719.369035217533, 371.7414563383179, 
144.92804611404802, 907.4421632759102, 1926.7852226595737, 1832.6700505051551], 
"eval_len": [51, 135, 330, 377, 213, 123, 69, 246, 464, 444]}

 72%|███████▏  | 719997/1000000 [4:55:35<1:26:17, 54.08it/s]global step 720000, trans_decision ep_re 983.779078731932

{"global_step": 720000, "eval_re": [1558.4979254369525, 1530.2841852570107, 
1398.634220792058, 134.7340878281894, 314.2514864972303, 767.303945053782, 
773.9702298496676, 1395.455062679138, 613.1065441501445, 1351.5530997751468], 
"eval_len": [385, 374, 361, 64, 116, 247, 235, 345, 201, 339]}

 73%|███████▎  | 729999/1000000 [4:59:50<1:23:25, 53.94it/s]global step 730000, trans_decision ep_re 765.870555732991

{"global_step": 730000, "eval_re": [201.37154780447685, 971.4603388410613, 
147.815189644181, 1681.7936279033388, 737.23777656403, 427.04981769996357, 
359.5108345608893, 78.22320597138203, 1543.7740540780535, 1510.4691642625326], 
"eval_len": [92, 261, 72, 415, 228, 150, 128, 48, 391, 374]}

 74%|███████▍  | 739995/1000000 [5:03:40<1:20:19, 53.95it/s]global step 740000, trans_decision ep_re 577.4177475763685

{"global_step": 740000, "eval_re": [321.71659911037415, 113.66383124617246, 
1185.4605688085583, 407.5042386556042, 988.7867375501373, 552.6113563807094, 
920.0906300311989, 403.3576983136295, 150.25196361536302, 730.7338520519381], 
"eval_len": [116, 57, 297, 144, 293, 177, 254, 138, 70, 195]}

 75%|███████▍  | 749997/1000000 [5:07:41<1:17:12, 53.97it/s]global step 750000, trans_decision ep_re 1463.7855910334097

{"global_step": 750000, "eval_re": [1299.2522126401038, 3281.101749985102, 
1229.9963635177924, 426.601671071696, 930.2494188380371, 2269.3095203746207, 
227.84171062445864, 1165.406307182664, 611.8587205715862, 3196.2382355280342], 
"eval_len": [337, 867, 375, 147, 285, 557, 92, 298, 190, 811]}

 76%|███████▌  | 759999/1000000 [5:11:48<1:14:47, 53.48it/s]global step 760000, trans_decision ep_re 656.2668864442086

{"global_step": 760000, "eval_re": [19.896936989134797, 1265.0543801579079, 
999.60799378323, 1772.4764537120745, 316.87888417624185, 1036.4150158051252, 
328.43147936696937, 63.82611234448225, 740.8082588746043, 19.273349232314562], 
"eval_len": [20, 362, 262, 491, 115, 311, 118, 44, 224, 20]}

 77%|███████▋  | 769995/1000000 [5:16:00<1:11:24, 53.68it/s]global step 770000, trans_decision ep_re 1295.1854929706537

{"global_step": 770000, "eval_re": [25.454061652796987, 205.4165340268418, 
1221.1815390517816, 1482.6855861166325, 2965.2604849726918, 1286.363543151167, 
1252.8306045641314, 2135.9810866958437, 901.5328738942185, 1475.1486155804328], 
"eval_len": [28, 93, 309, 409, 757, 352, 370, 534, 269, 412]}

 78%|███████▊  | 779996/1000000 [5:19:54<1:07:45, 54.11it/s]global step 780000, trans_decision ep_re 858.497728687276

{"global_step": 780000, "eval_re": [411.4711592403874, 690.741361138921, 
9.027563563278072, 184.62102770148314, 2389.8287499556254, 1152.8671605880143, 
1055.6280089425018, 946.4921410985324, 1518.8987442323455, 225.40137041166938], 
"eval_len": [143, 223, 18, 79, 601, 332, 286, 279, 371, 91]}

 79%|███████▉  | 789998/1000000 [5:23:56<1:05:07, 53.74it/s]global step 790000, trans_decision ep_re 716.3232219542099

{"global_step": 790000, "eval_re": [311.5151889756044, 1396.1853175765186, 
698.0493400336857, 113.70316996399822, 1503.6478167185173, 873.4459036998185, 
400.5262820489649, 342.39441049258687, 880.338245233504, 643.4265447989012], 
"eval_len": [114, 375, 220, 67, 402, 266, 139, 147, 258, 204]}

 80%|███████▉  | 799994/1000000 [5:27:56<1:02:13, 53.58it/s]global step 800000, trans_decision ep_re 863.5710647364228

{"global_step": 800000, "eval_re": [356.05561266369267, 346.1188393416324, 
636.7195877766773, 2622.7986890391553, 1479.624350272854, 12.292076886001892, 
1818.624517635898, 240.77765099837413, 188.6389572468191, 934.0603655031239], 
"eval_len": [135, 124, 200, 727, 364, 15, 446, 98, 83, 285]}

 81%|████████  | 809996/1000000 [5:32:10<58:57, 53.72it/s]global step 810000, trans_decision ep_re 1032.3614172152188

{"global_step": 810000, "eval_re": [358.05980545643934, 828.3339510514988, 
23.860653632501, 1161.501712384763, 169.10867510814091, 1835.554811298037, 
484.6079240703907, 1385.0086556958456, 350.00014552694967, 3727.577837927623], 
"eval_len": [123, 250, 23, 332, 75, 519, 160, 344, 122, 1000]}

 82%|████████▏ | 819996/1000000 [5:36:02<55:32, 54.02it/s]global step 820000, trans_decision ep_re 620.9968269674362

{"global_step": 820000, "eval_re": [555.1733818709362, 934.1778093111021, 
283.07260679275515, 897.4137781190014, 123.67151807073157, 881.7089569397327, 
207.0984653539309, 1083.8626345669493, 394.2509625315268, 849.538156117696], 
"eval_len": [168, 238, 115, 230, 60, 227, 85, 268, 132, 228]}

 83%|████████▎ | 829995/1000000 [5:40:04<52:35, 53.88it/s]global step 830000, trans_decision ep_re 675.3458084024425

{"global_step": 830000, "eval_re": [195.67956002912067, 939.1337021026415, 
297.17221281439276, 7.626247620689412, 364.42319402435345, 914.1241879743974, 
1805.4635482938434, 1469.6125078163275, 152.25683770822533, 607.9660856404331], 
"eval_len": [83, 271, 109, 11, 131, 258, 458, 364, 70, 188]}

 84%|████████▍ | 839997/1000000 [5:44:03<1:03:21, 42.09it/s]global step 840000, trans_decision ep_re 528.9256339124697

{"global_step": 840000, "eval_re": [356.9938761824914, 1233.0921256704642, 
676.2808880671395, 512.9557358145756, 15.894446275098467, 596.2529775986832, 
198.07903810587135, 199.7854725245914, 1109.524612261502, 390.39716662428], 
"eval_len": [129, 348, 212, 170, 19, 191, 86, 85, 322, 138]}

 85%|████████▍ | 849998/1000000 [5:48:03<46:18, 53.98it/s]global step 850000, trans_decision ep_re 334.21963298458707

{"global_step": 850000, "eval_re": [45.633816431395594, 12.41808021431216, 
1043.3378667401814, 81.47912616920394, 136.57299664804373, 13.587363191026741, 
1269.978067097518, 131.0975671010916, 147.71878224293906, 460.3726640101585], 
"eval_len": [40, 20, 264, 52, 68, 17, 351, 67, 78, 162]}

 86%|████████▌ | 859999/1000000 [5:52:01<43:13, 53.97it/s]global step 860000, trans_decision ep_re 537.541131283001

{"global_step": 860000, "eval_re": [13.614350591846705, 1096.0799425223736, 
920.3963208099369, 386.7962469780802, 128.17744314848701, 280.5285413787501, 
1297.4023393334646, 1016.0163901734294, 11.362607648157546, 225.03713024548443],
"eval_len": [17, 313, 272, 136, 64, 110, 340, 308, 16, 91]}

 87%|████████▋ | 869995/1000000 [5:56:02<40:10, 53.94it/s]global step 870000, trans_decision ep_re 973.9519872665203

{"global_step": 870000, "eval_re": [1980.9353833103787, 120.03496998746733, 
280.06585761106027, 393.23972136948277, 1489.633648542283, 1119.1464774516692, 
591.8638420554432, 1526.4966391705702, 65.33283351947881, 2172.7704996473685], 
"eval_len": [492, 59, 112, 137, 414, 277, 182, 377, 57, 516]}

 88%|████████▊ | 879997/1000000 [6:00:05<37:05, 53.93it/s]global step 880000, trans_decision ep_re 651.2836852828291

{"global_step": 880000, "eval_re": [666.3456364228913, 163.27442418453555, 
2821.7407016171255, 906.0086279649655, 694.2282442053963, 276.3650619298151, 
632.8267774177662, 106.67359474656665, 228.85135369131058, 16.522430647918778], 
"eval_len": [210, 74, 707, 277, 212, 104, 198, 56, 97, 24]}

 89%|████████▉ | 889998/1000000 [6:04:06<34:00, 53.90it/s]global step 890000, trans_decision ep_re 680.1298044515858

{"global_step": 890000, "eval_re": [86.63427794885936, 745.138508344383, 
135.03029196522166, 1529.9058655492906, 662.6522366318704, 991.7894230386572, 
16.31311893572484, 983.0366706478275, 504.1169805340803, 1146.680670919942], 
"eval_len": [52, 204, 65, 371, 194, 261, 18, 269, 158, 284]}

 90%|████████▉ | 899994/1000000 [6:08:07<30:45, 54.20it/s]global step 900000, trans_decision ep_re 560.9932872143833

{"global_step": 900000, "eval_re": [154.3354153830886, 564.3710294824391, 
677.8263583786719, 54.476541931525766, 1022.0448839345321, 673.4598981123567, 
12.993753389191204, 882.0477216191202, 770.2151513943004, 798.1621185186065], 
"eval_len": [72, 166, 189, 40, 257, 192, 16, 228, 215, 213]}

 91%|█████████ | 909996/1000000 [6:12:07<27:44, 54.09it/s]global step 910000, trans_decision ep_re 848.0381816837

{"global_step": 910000, "eval_re": [18.992728061522012, 1048.5104866309507, 
1176.3421490270327, 898.584849478023, 1840.1241629824794, 12.970587519039807, 
990.8981761827029, 586.7593008272338, 869.1427080235004, 1038.0566681045145], 
"eval_len": [22, 260, 283, 238, 443, 17, 246, 164, 222, 255]}

 92%|█████████▏| 919998/1000000 [6:16:09<24:46, 53.82it/s]global step 920000, trans_decision ep_re 752.7496894546747

{"global_step": 920000, "eval_re": [1291.346454196692, 139.57180771127332, 
757.5567978081378, 199.8976841477849, 1526.9109549503314, 390.1315792057392, 
1223.9967184981879, 938.0627658043682, 943.981655240425, 116.04047698380725], 
"eval_len": [343, 66, 213, 84, 377, 133, 300, 255, 247, 57]}

 93%|█████████▎| 929999/1000000 [6:20:11<22:23, 52.11it/s]global step 930000, trans_decision ep_re 718.9264459654224

{"global_step": 930000, "eval_re": [13.36309040688457, 579.673052996807, 
375.7593691119064, 51.437889799551755, 941.3086695954092, 931.766549733799, 
398.3345751915974, 540.8989103056289, 950.0758876459875, 2406.6464648666524], 
"eval_len": [17, 177, 130, 40, 244, 241, 138, 169, 246, 601]}

 94%|█████████▍| 939995/1000000 [6:24:10<18:29, 54.07it/s]global step 940000, trans_decision ep_re 608.9814611190329

{"global_step": 940000, "eval_re": [256.5062630907646, 721.6293785112008, 
132.73298740424622, 6.658297119245385, 10.047317138315101, 601.7706734007463, 
924.3168609966323, 576.1610016747215, 2122.9800992734054, 737.0117325810526], 
"eval_len": [100, 220, 65, 18, 15, 183, 258, 167, 516, 207]}

 95%|█████████▍| 949995/1000000 [6:28:21<15:22, 54.21it/s]global step 950000, trans_decision ep_re 983.5783793063129

{"global_step": 950000, "eval_re": [409.23795797843985, 1018.5019389573682, 
549.0674750026466, 1121.4477736093772, 1872.664801849251, 604.4856137592874, 
1653.106275129758, 323.37183965063906, 298.9048087495507, 1984.9953083768114], 
"eval_len": [143, 268, 155, 286, 463, 178, 410, 115, 109, 472]}

 96%|█████████▌| 959996/1000000 [6:32:11<12:27, 53.50it/s]global step 960000, trans_decision ep_re 670.3625813580306

{"global_step": 960000, "eval_re": [328.5619097184345, 1066.632060361365, 
674.875614793008, 10.431129034021854, 1325.63542544086, 1551.1541014836835, 
1157.7803742222002, 11.942465210809525, 13.450352527767203, 563.1623807881567], 
"eval_len": [112, 260, 199, 15, 315, 376, 293, 16, 17, 164]}

 97%|█████████▋| 969997/1000000 [6:36:12<09:14, 54.11it/s]global step 970000, trans_decision ep_re 994.1218317279945

{"global_step": 970000, "eval_re": [129.4933771098647, 310.4168521799583, 
1615.4641597407626, 1017.5669904539066, 1336.9074650962789, 477.3852204891168, 
381.9054668430136, 1924.1076929427697, 717.9617097651927, 2030.009382659082], 
"eval_len": [63, 110, 419, 273, 326, 154, 135, 461, 193, 546]}

 98%|█████████▊| 979997/1000000 [6:40:13<06:08, 54.26it/s]global step 980000, trans_decision ep_re 979.3241786882984

{"global_step": 980000, "eval_re": [249.2905180593486, 622.7270535777398, 
132.09224217760152, 2041.521245876308, 1930.6213657794797, 1507.0144588542682, 
278.8746787789696, 1528.908198770381, 295.5681124158346, 1206.6239125930533], 
"eval_len": [96, 182, 65, 487, 465, 359, 104, 361, 113, 294]}

 99%|█████████▉| 989999/1000000 [6:44:13<03:04, 54.21it/s]global step 990000, trans_decision ep_re 477.15003691676003

{"global_step": 990000, "eval_re": [387.3962141984399, 9.241423801602545, 
1232.0856396345246, 897.6187669560952, 387.8979434421968, 14.006403561645136, 
685.236003765214, 124.89053332739485, 1010.79537558547, 22.332064895017265], 
"eval_len": [132, 22, 318, 232, 132, 17, 197, 60, 252, 22]}

100%|█████████▉| 999995/1000000 [6:48:11<00:00, 53.65it/s]global step 1000000, trans_decision ep_re 365.181676020406

{"global_step": 1000000, "eval_re": [334.46232579157765, 46.65994903710244, 
12.61500053714315, 550.6410659444557, 538.861111937271, 660.8538423192118, 
13.541024429508163, 37.234564525670265, 1070.8844372541957, 386.06343842792364],
"eval_len": [120, 36, 16, 156, 164, 184, 17, 31, 263, 131]}

100%|██████████| 1000000/1000000 [6:48:19<00:00, 40.82it/s]
