
{
    'exp_name': 'VDPO',
    'env': 'Ant-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)',
    'noise': 0.05
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9997/1000000 [03:40<8:34:51, 32.05it/s]global step 10000, trans_decision ep_re 761.8308353881994

{"global_step": 10000, "eval_re": [756.0023776183916, 754.9394646587854, 
769.1660174129519, 765.4631991051571, 759.6120046101322, 762.5782766658629, 
760.7157384407659, 770.7619752646012, 762.0462866961155, 757.0230134092309], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  2%|▏         | 19997/1000000 [11:00<8:34:17, 31.76it/s]global step 20000, trans_decision ep_re 130.74553482130173

{"global_step": 20000, "eval_re": [34.42081072872714, 15.730130644244895, 
542.5806846722991, 23.173360399252672, 13.830401943154671, 27.651414824392987, 
37.47966177560749, 97.08557299573125, 22.367443385022998, 493.1358668445839], 
"eval_len": [27, 27, 1000, 36, 33, 26, 52, 93, 28, 1000]}

  3%|▎         | 29997/1000000 [18:00<8:34:07, 31.45it/s]global step 30000, trans_decision ep_re 378.18883033378563

{"global_step": 30000, "eval_re": [652.5604113284793, 655.2608192930513, 
681.4809449966953, 467.1482237364173, 20.497954072924223, 342.06768950863295, 
248.2200601629949, 71.18189251695443, 420.207819545788, 223.26248817591866], 
"eval_len": [1000, 1000, 787, 1000, 53, 340, 1000, 74, 1000, 250]}

  4%|▍         | 39997/1000000 [25:00<8:23:43, 31.76it/s]global step 40000, trans_decision ep_re 493.5624221973885

{"global_step": 40000, "eval_re": [709.1316416833444, 910.7569391793564, 
247.60642852946648, 295.70064898679055, 746.3395895189526, 650.9010837189356, 
259.2809730081327, 527.8376238439396, 547.3388530452942, 40.730440459672955], 
"eval_len": [1000, 1000, 277, 255, 1000, 1000, 201, 1000, 1000, 41]}

  5%|▍         | 49997/1000000 [32:10<8:19:07, 31.72it/s]global step 50000, trans_decision ep_re 428.2799883850081

{"global_step": 50000, "eval_re": [122.71817140782224, 54.36085763094584, 
674.2993615500395, 407.74823963773565, 340.95182383226023, 211.21198638920868, 
458.8354846206808, 1009.7709093941354, 172.20749780088647, 830.6955515863662], 
"eval_len": [87, 55, 520, 301, 246, 183, 315, 816, 120, 1000]}

  6%|▌         | 59997/1000000 [39:10<8:12:47, 31.79it/s]global step 60000, trans_decision ep_re 546.8657841216701

{"global_step": 60000, "eval_re": [686.3127408228381, 324.57666291407446, 
733.3034591051173, 92.92157081833004, 341.876717574363, 178.73464935428274, 
811.6286238257605, 484.238125242796, 908.5430537729005, 906.5222377862385], 
"eval_len": [460, 218, 1000, 72, 242, 150, 1000, 314, 675, 679]}

  7%|▋         | 69997/1000000 [46:10<8:14:12, 31.36it/s]global step 70000, trans_decision ep_re 713.1990810413187

{"global_step": 70000, "eval_re": [1411.3644564112913, 575.3481290878109, 
1577.6017805569425, 184.42750782809117, 754.4911867917762, 22.111498716678973, 
778.1447283848374, 197.15583311313148, 287.1643920881795, 1344.1812974344462], 
"eval_len": [1000, 1000, 1000, 122, 491, 29, 470, 168, 195, 1000]}

  8%|▊         | 79997/1000000 [53:20<8:03:25, 31.72it/s]global step 80000, trans_decision ep_re 519.5392740125693

{"global_step": 80000, "eval_re": [37.187779451805866, 130.37967379499986, 
282.61110322593163, 923.5130875579238, 945.1343736024444, 190.1369997333029, 
462.0073376269195, 227.06294999488097, 167.21730664850247, 1830.1421284889823], 
"eval_len": [40, 105, 174, 1000, 1000, 132, 297, 139, 112, 1000]}

  9%|▉         | 89997/1000000 [1:00:20<8:00:23, 31.57it/s]global step 90000, trans_decision ep_re 841.319342280692

{"global_step": 90000, "eval_re": [240.4462056544366, 81.77267750329695, 
1525.0636119688802, 897.5711082199263, 1162.5058077810472, 1508.5766516168453, 
294.98332505647545, 1546.405555439667, 882.3012559732193, 273.56722359312585], 
"eval_len": [191, 66, 1000, 556, 1000, 1000, 212, 1000, 614, 198]}

 10%|▉         | 99997/1000000 [1:07:30<7:50:24, 31.89it/s]global step 100000, trans_decision ep_re 603.6584032927956

{"global_step": 100000, "eval_re": [233.37925074498952, 665.8267795909776, 
182.34867623058804, 883.5001629427936, 1420.5169648338474, 1462.2565698198844, 
260.1370772403831, 206.1050921551184, 571.3539106175123, 151.1595487518613], 
"eval_len": [149, 419, 121, 511, 842, 1000, 177, 124, 395, 108]}

 11%|█         | 109997/1000000 [1:14:20<7:52:24, 31.40it/s]global step 110000, trans_decision ep_re 683.9774888592779

{"global_step": 110000, "eval_re": [114.31772934912934, 714.9246731920379, 
446.52911689837106, 1466.6401342108168, 1584.9894846210948, 27.97201941526048, 
1389.5655488494444, 78.0957091845367, 666.8065458156939, 349.93392705639326], 
"eval_len": [91, 367, 246, 785, 1000, 32, 800, 93, 1000, 240]}

 12%|█▏        | 119997/1000000 [1:21:30<7:42:09, 31.74it/s]global step 120000, trans_decision ep_re 551.153681468787

{"global_step": 120000, "eval_re": [1118.7741444746184, 960.7300584181903, 
80.63648458379295, 393.68542613463705, 335.7303980272369, 83.51066084587491, 
326.74144786149697, 171.70823777541315, 765.3970584695867, 1274.6228980970222], 
"eval_len": [581, 1000, 53, 201, 185, 51, 177, 108, 377, 1000]}

 13%|█▎        | 129997/1000000 [1:28:30<7:34:00, 31.94it/s]global step 130000, trans_decision ep_re 890.3581049150125

{"global_step": 130000, "eval_re": [180.26493591949782, 66.47625202304543, 
706.013853898114, 1380.2914683964348, 1056.8383880496363, 2095.2505778772615, 
927.2760567771359, 410.1367045049855, 1888.605424456631, 192.42738724738354], 
"eval_len": [105, 44, 369, 654, 491, 974, 394, 196, 1000, 95]}

 14%|█▍        | 139997/1000000 [1:35:30<7:27:09, 32.05it/s]global step 140000, trans_decision ep_re 563.2704799791395

{"global_step": 140000, "eval_re": [358.8036270083014, 775.4663060099658, 
977.7559557131457, 785.3175992045069, 29.75956831408215, 271.41261988243525, 
575.5000428059451, 887.2719593406492, 886.132839588394, 85.28428192396937], 
"eval_len": [302, 402, 504, 463, 37, 151, 305, 429, 434, 71]}

 15%|█▍        | 149997/1000000 [1:42:20<7:30:45, 31.43it/s]global step 150000, trans_decision ep_re 1519.8626836985504

{"global_step": 150000, "eval_re": [2077.961204273348, 1421.2817838921208, 
1904.1295113323092, 679.8424958446749, 2001.9805604186522, 1949.4802884603098, 
1853.3911465854633, 347.25920509242985, 2057.959032771277, 905.341608314918], 
"eval_len": [1000, 1000, 1000, 424, 1000, 1000, 1000, 196, 1000, 1000]}

 16%|█▌        | 159997/1000000 [1:49:40<7:22:24, 31.65it/s]global step 160000, trans_decision ep_re 1750.741248244201

{"global_step": 160000, "eval_re": [2195.143466403557, 2195.632864478702, 
2096.9072757500153, 1639.2997407619168, 2289.9511991773843, 1299.4507629610323, 
2322.1447153551776, 772.3815403438372, 430.8665976068998, 2265.6343196034877], 
"eval_len": [1000, 1000, 1000, 723, 1000, 611, 1000, 372, 229, 1000]}

 17%|█▋        | 169997/1000000 [1:56:50<7:15:12, 31.79it/s]global step 170000, trans_decision ep_re 1680.6797143505569

{"global_step": 170000, "eval_re": [2058.5639637997915, 1451.315418425, 
772.9845088284677, 2237.3182649552896, 2230.0787408702836, 1279.889219529574, 
316.6912835944661, 2151.5557700653326, 1981.5178261182418, 2326.882147319125], 
"eval_len": [1000, 666, 373, 1000, 1000, 628, 142, 1000, 1000, 1000]}

 18%|█▊        | 179997/1000000 [2:04:00<7:12:30, 31.60it/s]global step 180000, trans_decision ep_re 1821.1561812017067

{"global_step": 180000, "eval_re": [1978.9521453796924, 2116.367347515223, 
2103.941930934951, 1456.775733521417, 1752.4989412174525, 1193.6342916457315, 
1297.6025955160487, 2052.7974085950345, 2038.050732687497, 2220.9406850040164], 
"eval_len": [1000, 1000, 1000, 701, 943, 1000, 1000, 1000, 1000, 1000]}

 19%|█▉        | 189997/1000000 [2:11:20<7:06:54, 31.62it/s]global step 190000, trans_decision ep_re 1101.7979899835295

{"global_step": 190000, "eval_re": [491.34614443108353, 730.6881518123961, 
233.57629370180388, 306.2248737998735, 224.97987981436629, 2302.1693330293742, 
2521.2125568493693, 2273.1540846007842, 722.317215000727, 1212.3113667955183], 
"eval_len": [214, 360, 138, 175, 116, 1000, 1000, 1000, 352, 1000]}

 20%|█▉        | 199997/1000000 [2:18:30<7:00:39, 31.70it/s]global step 200000, trans_decision ep_re 1536.9346912149658

{"global_step": 200000, "eval_re": [1224.7469782002722, 320.0034996396627, 
1686.1867786279367, 2192.641400489487, 1013.6183185288759, 2326.082857042963, 
2092.440188090564, 2149.8939270618102, 147.49903236714476, 2216.233932100941], 
"eval_len": [589, 147, 1000, 1000, 451, 1000, 1000, 1000, 100, 1000]}

 21%|██        | 209997/1000000 [2:25:40<6:57:31, 31.53it/s]global step 210000, trans_decision ep_re 1432.9890209947096

{"global_step": 210000, "eval_re": [2230.472524346795, 221.88891408222926, 
2019.936108420185, 2250.366504044867, 1513.806260916924, 701.4596915131527, 
415.35254371317137, 1475.7245082702636, 1212.8355651284173, 2288.0475895110926],
"eval_len": [1000, 114, 862, 1000, 746, 330, 165, 717, 1000, 1000]}

 22%|██▏       | 219997/1000000 [2:32:50<6:47:31, 31.90it/s]global step 220000, trans_decision ep_re 1929.9863114929271

{"global_step": 220000, "eval_re": [1520.3221586997681, 2431.9179789229047, 
2343.9036768924657, 1997.5301662462184, 706.4909885360204, 1175.685414059246, 
2247.5398747556687, 2357.292924081618, 2402.2789687888053, 2116.9009639465603], 
"eval_len": [629, 1000, 1000, 817, 1000, 489, 1000, 1000, 1000, 1000]}

 23%|██▎       | 229997/1000000 [2:40:00<6:46:09, 31.60it/s]global step 230000, trans_decision ep_re 838.3817036513408

{"global_step": 230000, "eval_re": [474.90191620682145, 693.431025346167, 
287.4961436590146, 621.3723237504184, 716.7441469560862, 2174.7566601171043, 
1403.8711255478304, 1096.799068125776, 469.02034266271727, 445.42428414147287], 
"eval_len": [196, 323, 132, 271, 255, 1000, 563, 472, 206, 214]}

 24%|██▍       | 239997/1000000 [2:47:00<6:39:10, 31.73it/s]global step 240000, trans_decision ep_re 2127.3890543385537

{"global_step": 240000, "eval_re": [2321.2199169487512, 2344.8303166216697, 
2122.376892386839, 2283.8652185052215, 2091.178549636875, 1171.796780242625, 
2311.3476892564295, 2240.3880391067705, 2159.559597271992, 2227.3275434083625], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 603, 1000, 1000, 1000, 1000]}

 25%|██▍       | 249997/1000000 [2:54:20<6:34:54, 31.65it/s]global step 250000, trans_decision ep_re 1809.8828849807155

{"global_step": 250000, "eval_re": [1049.7440257149237, 2488.6861918956915, 
2220.3443718651665, 2505.1311026284416, 2480.6862957000317, 1416.7168224385375, 
802.9910483323033, 459.0309173558204, 2451.7126391401966, 2223.7854347360435], 
"eval_len": [450, 1000, 910, 1000, 1000, 614, 354, 224, 1000, 1000]}

 26%|██▌       | 259997/1000000 [3:01:30<6:33:06, 31.37it/s]global step 260000, trans_decision ep_re 1879.9365489817114

{"global_step": 260000, "eval_re": [1873.7203229933345, 2190.310185970016, 
2095.7064853138745, 1935.918114388081, 1911.5284027782218, 1691.8938504875323, 
1956.9401922498066, 1189.0748873118246, 1857.8970139904209, 2096.3760343340036],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 595, 1000, 1000]}

 27%|██▋       | 269997/1000000 [3:08:50<6:24:12, 31.67it/s]global step 270000, trans_decision ep_re 1709.1732685239863

{"global_step": 270000, "eval_re": [1330.5834451822818, 1658.050497014667, 
1026.7839641809774, 860.2627450514043, 1945.7733605716503, 2266.222649591312, 
2323.019886328071, 2323.349580562626, 1203.4808784098193, 2154.205678347051], 
"eval_len": [616, 745, 1000, 1000, 1000, 1000, 1000, 1000, 457, 1000]}

 28%|██▊       | 279997/1000000 [3:16:10<6:17:14, 31.81it/s]global step 280000, trans_decision ep_re 1397.6953498870512

{"global_step": 280000, "eval_re": [845.6405489060473, 1073.6331239458389, 
2181.878448066871, 371.50867360134833, 2468.2067189692953, 599.2416750616438, 
2231.8082640010916, 2238.6545919386754, 102.53620747398215, 1863.8452469057167],
"eval_len": [421, 1000, 970, 146, 1000, 266, 1000, 1000, 52, 894]}

 29%|██▉       | 289997/1000000 [3:23:10<6:15:30, 31.51it/s]global step 290000, trans_decision ep_re 1944.9016656735748

{"global_step": 290000, "eval_re": [2313.0658566060806, 2214.203215345503, 
1001.3356195952608, 378.8214006610348, 2469.8286519071016, 2385.9908740186725, 
2356.998019147601, 2240.7028095536457, 1903.2556879128767, 2184.814521987974], 
"eval_len": [1000, 1000, 1000, 211, 1000, 1000, 1000, 1000, 876, 1000]}

 30%|██▉       | 299997/1000000 [3:30:30<6:07:21, 31.76it/s]global step 300000, trans_decision ep_re 1890.7895224252286

{"global_step": 300000, "eval_re": [303.9730551816323, 2321.573204231408, 
2275.7949094603537, 2396.103146203826, 2362.4215148487206, 1411.766233397097, 
970.1861592841526, 2210.6234475177325, 2195.5116209320167, 2459.941933195347], 
"eval_len": [121, 1000, 1000, 1000, 969, 607, 434, 970, 1000, 1000]}

 31%|███       | 309997/1000000 [3:37:40<6:04:21, 31.56it/s]global step 310000, trans_decision ep_re 2164.001176633276

{"global_step": 310000, "eval_re": [462.21493782491314, 2544.188136921604, 
2571.761355173517, 2626.440213587734, 2365.972952785351, 2208.5645676007416, 
1333.6405631833559, 2593.9023479550538, 2426.120316775395, 2507.206374525094], 
"eval_len": [217, 1000, 1000, 985, 1000, 1000, 575, 1000, 1000, 1000]}

 32%|███▏      | 319997/1000000 [3:45:00<5:58:34, 31.61it/s]global step 320000, trans_decision ep_re -120.02276663828067

{"global_step": 320000, "eval_re": [48.33455216781452, -681.2219321158333, 
444.85329116361254, 650.6773509788813, -690.4717503355548, -635.1722026645033, 
17.834282389869564, 25.64770854730398, 60.02390714674053, -440.73287366113794], 
"eval_len": [98, 1000, 1000, 1000, 1000, 1000, 90, 61, 58, 1000]}

 33%|███▎      | 329997/1000000 [3:52:10<5:51:28, 31.77it/s]global step 330000, trans_decision ep_re 1059.9995483664459

{"global_step": 330000, "eval_re": [2374.706336223845, 173.96268295265045, 
103.88427372930822, 2201.416361047298, 2328.6647791199707, 820.8526185616076, 
653.8149684559083, 182.10456200662398, 1656.722024248541, 103.86687731870698], 
"eval_len": [1000, 85, 60, 1000, 1000, 370, 1000, 105, 1000, 57]}

 34%|███▍      | 339997/1000000 [3:59:10<5:51:23, 31.30it/s]global step 340000, trans_decision ep_re 1491.5563319900618

{"global_step": 340000, "eval_re": [1362.4474432878426, 127.03013324683788, 
2366.050079580044, 871.5452315908161, 2520.5240990709303, 380.3523258736496, 
2034.5006644673865, 2480.1657337703405, 301.9606807637141, 2470.9869282490586], 
"eval_len": [613, 66, 1000, 376, 1000, 188, 827, 1000, 155, 1000]}

 35%|███▍      | 349997/1000000 [4:06:20<5:41:24, 31.73it/s]global step 350000, trans_decision ep_re 1364.4131045302229

{"global_step": 350000, "eval_re": [106.90094131299225, 702.2865752676937, 
471.0376428262068, 1578.6367678711629, 1784.44601321061, 2016.658142396866, 
2261.065080691504, 2641.094834373178, 1024.4310088461216, 1057.5740385058946], 
"eval_len": [68, 328, 255, 650, 816, 827, 1000, 1000, 402, 530]}

 36%|███▌      | 359997/1000000 [4:13:30<5:37:50, 31.57it/s]global step 360000, trans_decision ep_re 887.4552351703067

{"global_step": 360000, "eval_re": [177.1572878498302, 232.60678003017222, 
693.7928338600952, 283.3537815956799, 1271.1833989200416, 2034.1932850182575, 
212.30210681188467, 1526.2794276344307, 2253.9957743959767, 189.68767558669845],
"eval_len": [87, 105, 297, 126, 475, 860, 96, 1000, 1000, 89]}

 37%|███▋      | 369997/1000000 [4:20:30<5:30:00, 31.82it/s]global step 370000, trans_decision ep_re 1602.8013485039112

{"global_step": 370000, "eval_re": [1551.4494855720172, 2459.2142124244547, 
2632.6403508694366, 2486.8228456872075, 1088.0219108110227, 89.28783738950443, 
2329.3368034879873, 1088.6821898727922, 963.1088412692409, 1339.4490076554491], 
"eval_len": [1000, 1000, 1000, 1000, 416, 48, 1000, 436, 1000, 535]}

 38%|███▊      | 379997/1000000 [4:27:40<5:25:41, 31.73it/s]global step 380000, trans_decision ep_re 1849.9467570362722

{"global_step": 380000, "eval_re": [1714.8958239995832, 2034.8747289044302, 
2334.093772600508, 2134.13876881425, 2477.0975477018205, 268.3378725701107, 
793.2196011299648, 2267.5376639700703, 2223.5722138653127, 2251.6995768066713], 
"eval_len": [757, 769, 1000, 1000, 1000, 121, 392, 1000, 1000, 1000]}

 39%|███▉      | 389997/1000000 [4:34:50<5:20:26, 31.73it/s]global step 390000, trans_decision ep_re 1904.7638306519427

{"global_step": 390000, "eval_re": [1389.406899252186, 2352.5096008816763, 
2306.035493030972, 272.6594782874191, 1481.2382470705993, 2273.551010610595, 
2182.455188087795, 2311.7658823703864, 2326.83805449908, 2151.1784524287177], 
"eval_len": [621, 1000, 1000, 138, 1000, 1000, 970, 1000, 1000, 1000]}

 40%|███▉      | 399997/1000000 [4:42:10<5:15:03, 31.74it/s]global step 400000, trans_decision ep_re 1583.7452664430655

{"global_step": 400000, "eval_re": [661.7492674595454, 2160.769173221186, 
2095.0390839011466, 644.338326598036, 2346.753608159429, 941.7220658113339, 
2044.8965342608608, 2209.5005373121235, 742.5397969447058, 1990.1442707622882], 
"eval_len": [347, 1000, 1000, 1000, 1000, 1000, 1000, 994, 1000, 1000]}

 41%|████      | 409997/1000000 [4:49:20<5:08:18, 31.89it/s]global step 410000, trans_decision ep_re 1676.6580132405222

{"global_step": 410000, "eval_re": [2351.9502555610165, 2576.6891716753107, 
206.3814809262463, 1997.3701152493663, 676.2764856593222, 560.0873182302039, 
1377.447902414317, 2396.002577772448, 2348.555713092898, 2275.819111824094], 
"eval_len": [1000, 1000, 120, 887, 261, 238, 1000, 1000, 1000, 1000]}

 42%|████▏     | 419997/1000000 [4:56:30<5:02:27, 31.96it/s]global step 420000, trans_decision ep_re 1002.8829774482649

{"global_step": 420000, "eval_re": [1813.2040626235673, 180.655180496361, 
2451.962460378387, 882.0191041422221, 901.1113961260124, 965.9468287811069, 
523.8826046895931, 1099.010608144127, 1050.3674368636307, 160.67009223764057], 
"eval_len": [1000, 126, 1000, 389, 427, 1000, 231, 1000, 454, 81]}

 43%|████▎     | 429997/1000000 [5:03:40<4:54:58, 32.21it/s]global step 430000, trans_decision ep_re 1644.813821568206

{"global_step": 430000, "eval_re": [2181.836243757276, 1826.2742289956414, 
794.8804371855103, 2291.721684299329, 2062.0257389949247, 1233.4321120040001, 
2059.3856615279187, 266.4645172145345, 1455.9218468389652, 2276.195744863962], 
"eval_len": [1000, 1000, 378, 1000, 1000, 1000, 944, 122, 732, 1000]}

 44%|████▍     | 439997/1000000 [5:10:50<4:51:48, 31.99it/s]global step 440000, trans_decision ep_re 1970.1874209592702

{"global_step": 440000, "eval_re": [1831.1574709756721, 2432.6678841585854, 
2355.066195987159, 1001.8107151396595, 2348.8086754279448, 2387.4447642790537, 
2320.2758244195484, 1304.0482315685204, 1290.0477425503964, 2430.546705086163], 
"eval_len": [751, 1000, 1000, 399, 1000, 1000, 1000, 546, 541, 1000]}

 45%|████▍     | 449997/1000000 [5:18:00<4:46:57, 31.94it/s]global step 450000, trans_decision ep_re -528.9625342672264

{"global_step": 450000, "eval_re": [15.603307706356407, 12.877716072168251, 
-1134.0887836359666, -938.7535624514607, 72.67904728233847, -340.1437656280136, 
24.768431849134316, -893.5665495250172, -1149.3377780659562, -959.663406275847],
"eval_len": [23, 24, 1000, 1000, 59, 1000, 24, 1000, 1000, 1000]}

 46%|████▌     | 459997/1000000 [5:25:00<4:44:15, 31.66it/s]global step 460000, trans_decision ep_re 42.83284435006074

{"global_step": 460000, "eval_re": [14.391511457050623, 81.54613474537, 
58.72608774165974, 52.44554903608528, 123.28291000191942, -49.90027835734719, 
14.9768315819072, 73.31097733039724, 42.790102703837064, 16.75861725972804], 
"eval_len": [24, 90, 60, 49, 109, 1000, 25, 1000, 62, 49]}

 47%|████▋     | 469997/1000000 [5:32:00<4:38:15, 31.75it/s]global step 470000, trans_decision ep_re 1775.8177997791481

{"global_step": 470000, "eval_re": [1944.244711767982, 1233.9347006443556, 
1650.930818312274, 2052.007758303722, 2288.5760163762634, 1967.7838022277595, 
1714.2125323630025, 2240.7260050963077, 1986.5865008602534, 679.175151839563], 
"eval_len": [717, 500, 639, 839, 1000, 715, 783, 1000, 848, 290]}

 48%|████▊     | 479997/1000000 [5:39:10<4:28:51, 32.23it/s]global step 480000, trans_decision ep_re 804.2723103001072

{"global_step": 480000, "eval_re": [172.18480635844108, 435.533831924418, 
131.64405287819872, 1859.670538826605, 1007.4912723100682, 943.849175250633, 
21.744050650385862, 950.5450517650795, 1364.0245081960365, 1156.0358148412058], 
"eval_len": [124, 192, 81, 744, 371, 1000, 27, 446, 649, 495]}

 49%|████▉     | 489997/1000000 [5:46:00<4:24:49, 32.10it/s]global step 490000, trans_decision ep_re 1897.420663258983

{"global_step": 490000, "eval_re": [977.4242270675638, 66.86936489473992, 
1304.0492832295865, 2423.616264852411, 2279.8480304340433, 2415.1690525604668, 
2416.422591573273, 2717.3459446398047, 2157.6708977873213, 2215.7909755506203], 
"eval_len": [486, 1000, 582, 941, 1000, 1000, 1000, 1000, 1000, 1000]}

 50%|████▉     | 499997/1000000 [5:53:20<4:17:04, 32.42it/s]global step 500000, trans_decision ep_re 1233.286912295529

{"global_step": 500000, "eval_re": [329.5282185650364, 1614.7329004781207, 
2440.2721035658205, 944.2466295578739, 2668.3066664224016, 1943.8907675991284, 
1487.6259916698457, 109.3667630353084, 342.5557869573612, 452.34329510439363], 
"eval_len": [158, 709, 1000, 404, 1000, 757, 602, 62, 160, 207]}

 51%|█████     | 509997/1000000 [6:00:20<4:15:45, 31.93it/s]global step 510000, trans_decision ep_re 1340.9245866772358

{"global_step": 510000, "eval_re": [2219.804674471473, 492.7844400725613, 
527.0276887790576, 945.0148360319989, 743.1281176667612, 675.5876846156956, 
2203.9923451550103, 2179.7958427753483, 1996.8004872799404, 1425.309749924511], 
"eval_len": [934, 203, 224, 379, 342, 345, 1000, 869, 855, 633]}

 52%|█████▏    | 519997/1000000 [6:07:20<4:11:43, 31.78it/s]global step 520000, trans_decision ep_re 1430.7943215701796

{"global_step": 520000, "eval_re": [2516.697226490689, 2326.0232673795185, 
1568.938222650566, 863.890971062514, 926.6498275464621, 704.9764646706182, 
2338.4211179955278, 640.041751552989, 1422.937241254959, 999.3671250979534], 
"eval_len": [1000, 1000, 1000, 390, 397, 292, 1000, 272, 580, 419]}

 53%|█████▎    | 529997/1000000 [6:14:30<4:06:32, 31.77it/s]global step 530000, trans_decision ep_re 1478.6089615255937

{"global_step": 530000, "eval_re": [2231.114711258765, 970.1798684505337, 
2454.441838185661, 835.0361741315877, 2221.7751039812065, 610.0404857005311, 
385.9191029260435, 1988.5747097718997, 1491.9918432894267, 1597.0157775602838], 
"eval_len": [1000, 1000, 1000, 303, 783, 259, 158, 1000, 1000, 612]}

 54%|█████▍    | 539999/1000000 [6:21:30<3:58:44, 32.11it/s]global step 540000, trans_decision ep_re 1784.0664685064025

{"global_step": 540000, "eval_re": [1389.5546525849015, 2372.664244453065, 
2378.960810389391, 1577.384181790623, 2291.752880195277, 1945.6342630074405, 
575.9417060899475, 1471.9112627711818, 2538.6140065930945, 1298.2466771891013], 
"eval_len": [593, 1000, 1000, 1000, 971, 785, 245, 1000, 995, 557]}

 55%|█████▍    | 549997/1000000 [6:28:40<3:52:02, 32.32it/s]global step 550000, trans_decision ep_re 1850.192883185548

{"global_step": 550000, "eval_re": [2391.1484156892666, 2409.5009064553883, 
2544.136417976388, 216.51702737106123, 1927.372838292532, 2491.890370579488, 
111.84374244105156, 2366.190254586911, 2508.744163266645, 1534.5846951967485], 
"eval_len": [1000, 1000, 1000, 103, 782, 1000, 72, 1000, 1000, 651]}

 56%|█████▌    | 559997/1000000 [6:35:50<3:49:28, 31.96it/s]global step 560000, trans_decision ep_re 1435.4384519222044

{"global_step": 560000, "eval_re": [915.6032028821965, 848.9005333191865, 
1738.4324153052776, 1776.7335154606776, 1309.0060120655787, 2288.912105837492, 
444.08540523269255, 2262.6966864302876, 1196.1560450503382, 1573.8585976383151],
"eval_len": [1000, 326, 731, 1000, 548, 981, 200, 1000, 453, 624]}

 57%|█████▋    | 569997/1000000 [6:43:00<3:44:49, 31.88it/s]global step 570000, trans_decision ep_re 1103.8245762086738

{"global_step": 570000, "eval_re": [925.5083143307062, 1747.154124238543, 
2121.722554634644, 609.374677114848, 1253.9420365984513, 490.180636961211, 
350.6514316641057, 1870.1310680642266, 997.3126146573693, 672.2683038226326], 
"eval_len": [1000, 728, 958, 268, 539, 196, 203, 792, 1000, 291]}

 58%|█████▊    | 579997/1000000 [6:50:00<3:34:36, 32.62it/s]global step 580000, trans_decision ep_re 1940.523481007809

{"global_step": 580000, "eval_re": [1067.382489745607, 2522.187356789942, 
2399.684333562188, 1861.9995592406815, 2640.8148231072623, 2691.9129727268955, 
2612.2858428817794, 1262.4447601205545, 174.33241493478945, 2172.1902569683925],
"eval_len": [434, 1000, 1000, 767, 1000, 1000, 1000, 1000, 80, 829]}

 59%|█████▉    | 589997/1000000 [6:57:10<3:33:55, 31.94it/s]global step 590000, trans_decision ep_re 1019.4849583008912

{"global_step": 590000, "eval_re": [912.4388346838479, 2497.2231195812737, 
1383.0227579257821, 1714.3791987205914, 272.09787871615157, 1059.3426278930135, 
288.25462287832255, 470.74348513990367, 1182.4046726194624, 414.9423848505627], 
"eval_len": [1000, 1000, 634, 721, 134, 466, 125, 207, 448, 168]}

 60%|█████▉    | 599999/1000000 [7:04:10<3:28:41, 31.95it/s]global step 600000, trans_decision ep_re 837.8637565352622

{"global_step": 600000, "eval_re": [1982.400923239869, 513.8678881464367, 
1057.3847792849401, 1829.7642044221545, 183.36349670598813, 114.21088247923244, 
1937.9185022462834, 139.55799389498344, 438.1821543696818, 181.98674056304952], 
"eval_len": [1000, 274, 555, 1000, 108, 77, 1000, 90, 220, 110]}

 61%|██████    | 609997/1000000 [7:11:10<3:25:43, 31.60it/s]global step 610000, trans_decision ep_re 2007.4074049807684

{"global_step": 610000, "eval_re": [487.75220543550796, 2646.935810276688, 
2550.6638648386242, 2699.0758379885165, 2398.2145330676753, 924.4302259497305, 
2695.4912412726053, 628.1582077304488, 2507.4994909473403, 2535.8526323005485], 
"eval_len": [235, 1000, 1000, 1000, 1000, 428, 1000, 256, 1000, 1000]}

 62%|██████▏   | 619997/1000000 [7:18:20<3:20:08, 31.64it/s]global step 620000, trans_decision ep_re 1971.9613836821923

{"global_step": 620000, "eval_re": [2472.4905814921285, 592.8425945386921, 
390.3763530322039, 2489.9114740298014, 1762.945305400962, 1961.2430052052894, 
2529.819085651622, 2478.809017616282, 2460.889160044498, 2580.2872598104395], 
"eval_len": [1000, 274, 162, 1000, 1000, 781, 1000, 1000, 1000, 1000]}

 63%|██████▎   | 629997/1000000 [7:25:30<3:10:46, 32.33it/s]global step 630000, trans_decision ep_re 1924.720511648084

{"global_step": 630000, "eval_re": [2207.369398071124, 2255.6692265018987, 
2127.01050368989, 2174.978757842909, 2236.8370289749173, 2341.7449013055234, 
289.30969948381096, 1744.0598171367553, 2239.748205380322, 1630.477578093689], 
"eval_len": [1000, 975, 1000, 1000, 1000, 1000, 152, 863, 1000, 713]}

 64%|██████▍   | 639997/1000000 [7:32:40<3:08:09, 31.89it/s]global step 640000, trans_decision ep_re 1881.736540304272

{"global_step": 640000, "eval_re": [2148.526538102519, 2125.8100030843825, 
2008.237067768423, 2173.8259997401597, 2302.4841656731573, 2296.6718318113867, 
2275.586983979561, 1486.7378955546963, 1858.333243762431, 141.1516735660044], 
"eval_len": [1000, 1000, 892, 1000, 1000, 1000, 1000, 1000, 887, 72]}

 65%|██████▍   | 649997/1000000 [7:39:50<3:02:09, 32.02it/s]global step 650000, trans_decision ep_re 1722.049832196397

{"global_step": 650000, "eval_re": [1747.3348846899344, 1625.294684949462, 
2383.195014880613, 638.8347335069035, 322.7483507868975, 2471.4115742327167, 
2518.920467166647, 2143.6977368481225, 993.6727080387908, 2375.3881668638837], 
"eval_len": [1000, 1000, 1000, 260, 124, 1000, 1000, 1000, 446, 1000]}

 66%|██████▌   | 659997/1000000 [7:47:00<2:56:42, 32.07it/s]global step 660000, trans_decision ep_re 1756.7970203520622

{"global_step": 660000, "eval_re": [1612.4779920974934, 2432.3836860227666, 
1422.6789128157923, 2360.5890150612904, -251.81523345899214, 2006.7013762893964,
707.9189347974672, 2635.584571617996, 2149.2457144665045, 2492.2052338109083], 
"eval_len": [1000, 1000, 572, 913, 1000, 837, 285, 1000, 1000, 1000]}

 67%|██████▋   | 669997/1000000 [7:54:10<2:54:00, 31.61it/s]global step 670000, trans_decision ep_re 523.3915816092928

{"global_step": 670000, "eval_re": [147.0596469750083, 201.95503459522888, 
1501.6093225433774, 874.7858827053946, 877.6590780057643, 131.6758354731987, 
1336.150534965975, -1045.1551778265487, 928.5509035639091, 279.6247550916193], 
"eval_len": [69, 94, 699, 380, 323, 77, 573, 1000, 390, 133]}

 68%|██████▊   | 679997/1000000 [8:01:10<2:47:12, 31.90it/s]global step 680000, trans_decision ep_re 1776.052974437409

{"global_step": 680000, "eval_re": [1192.237542955749, 1901.2391546328668, 
652.8206181159709, 503.5872339035702, 1607.5205635400225, 2020.277908698293, 
2563.6416529168328, 2451.241650974369, 2398.093649417845, 2469.869769218571], 
"eval_len": [450, 788, 242, 192, 649, 868, 982, 1000, 1000, 1000]}

 69%|██████▉   | 689997/1000000 [8:08:20<2:40:13, 32.25it/s]global step 690000, trans_decision ep_re 1445.194631020337

{"global_step": 690000, "eval_re": [2209.158061566491, 1579.8491817859313, 
2525.9794002906447, 2477.488650910357, 2444.333327979079, 1218.423796997098, 
956.8560259560307, 548.1971203557672, 300.75462622034, 190.90611814163313], 
"eval_len": [1000, 680, 1000, 1000, 1000, 532, 1000, 252, 155, 86]}

 70%|██████▉   | 699997/1000000 [8:15:20<2:35:45, 32.10it/s]global step 700000, trans_decision ep_re 1491.6605142468443

{"global_step": 700000, "eval_re": [2362.0532438395458, 1228.5399611892783, 
2284.4990391517726, 132.83190962155743, 1158.520974651579, 2435.458934544611, 
2157.990153468481, 2372.778785448728, 360.08417494132374, 423.8479656115665], 
"eval_len": [1000, 604, 1000, 70, 1000, 1000, 1000, 1000, 174, 224]}

 71%|███████   | 709997/1000000 [8:22:20<2:30:47, 32.05it/s]global step 710000, trans_decision ep_re 1376.5910635043715

{"global_step": 710000, "eval_re": [1461.0706659149773, 1498.4327999177515, 
2453.79713205456, 1734.2416035140272, 194.74936069994615, 2413.387838017558, 
2474.3812621472525, 233.9634091779541, 507.64920835576294, 794.237355243927], 
"eval_len": [636, 1000, 1000, 721, 94, 1000, 1000, 125, 213, 1000]}

 72%|███████▏  | 719997/1000000 [8:29:30<2:25:45, 32.02it/s]global step 720000, trans_decision ep_re 1712.7767627449418

{"global_step": 720000, "eval_re": [2456.6954249344694, 870.0139471650047, 
140.89809058562903, 764.5959313866397, 2173.2596205862696, 2542.3778691324424, 
1461.370499883306, 2548.805529175297, 1534.9770829044164, 2634.7736316959463], 
"eval_len": [1000, 321, 74, 321, 865, 1000, 630, 935, 529, 1000]}

 73%|███████▎  | 729997/1000000 [8:36:30<2:20:00, 32.14it/s]global step 730000, trans_decision ep_re 2107.338612465546

{"global_step": 730000, "eval_re": [926.0849996659417, 811.7553965140257, 
2558.3562831482704, 2565.0740334512275, 2506.145573967602, 2574.5718567616054, 
2493.8433234351, 2691.933825703631, 2494.9319681081497, 1450.688863899906], 
"eval_len": [342, 329, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 590]}

 74%|███████▍  | 739997/1000000 [8:43:50<2:16:26, 31.76it/s]global step 740000, trans_decision ep_re 1467.6431717948085

{"global_step": 740000, "eval_re": [1709.921571025343, 33.25785083998582, 
912.3385333184423, 1715.9755355909508, 1810.3616978198868, 162.68135433956994, 
2556.8430131377613, 2716.428422634427, 597.5040495387618, 2461.1196897029545], 
"eval_len": [684, 33, 1000, 718, 1000, 72, 1000, 1000, 213, 1000]}

 75%|███████▍  | 749997/1000000 [8:50:50<2:09:10, 32.26it/s]global step 750000, trans_decision ep_re 1666.3853530945526

{"global_step": 750000, "eval_re": [1185.2580979810336, 1179.6995025340327, 
1810.7669875798747, 1943.325263868353, 2650.1356605478295, 1506.5735962243398, 
1755.514419789606, 2340.175449615529, 864.938767678729, 1427.465785126198], 
"eval_len": [457, 437, 659, 764, 1000, 596, 684, 945, 358, 616]}

 76%|███████▌  | 759997/1000000 [8:57:50<2:01:41, 32.87it/s]global step 760000, trans_decision ep_re 1564.6900398010525

{"global_step": 760000, "eval_re": [1282.035892028512, 611.6858518013195, 
2162.760441814847, 2409.013763951592, 2439.3620849505014, 921.4972692005489, 
1798.4803111501528, 894.6089223498327, 836.0085122788229, 2291.4473484843993], 
"eval_len": [618, 270, 1000, 1000, 1000, 383, 668, 337, 368, 1000]}

 77%|███████▋  | 769997/1000000 [9:04:50<1:58:02, 32.47it/s]global step 770000, trans_decision ep_re 1966.176817583992

{"global_step": 770000, "eval_re": [2351.280053432997, 2624.4138322236263, 
998.2169882065353, 2200.9119725102773, 2467.308909393824, 2477.3910687447265, 
1830.4053442123284, 355.39807092598335, 2397.7799244121506, 1958.6620117774712],
"eval_len": [1000, 1000, 381, 1000, 1000, 1000, 1000, 162, 1000, 771]}

 78%|███████▊  | 779997/1000000 [9:12:00<1:54:29, 32.02it/s]global step 780000, trans_decision ep_re 1262.7768928328294

{"global_step": 780000, "eval_re": [731.2374245288405, 1912.9440681233818, 
357.49870920749254, 849.3279488526804, 1938.3579774739849, 180.97409133856104, 
2469.241512227604, 2630.76691825561, 1335.908604990056, 221.51167333008257], 
"eval_len": [271, 1000, 162, 371, 798, 81, 1000, 1000, 589, 110]}

 79%|███████▉  | 789997/1000000 [9:19:00<1:49:37, 31.93it/s]global step 790000, trans_decision ep_re 1382.4148164723777

{"global_step": 790000, "eval_re": [1788.594384546163, 966.0192454323873, 
2301.0992232068083, 661.2707327689208, 2322.7052022579883, 131.6391319994772, 
566.5756830392127, 579.4933252179608, 2150.7627296692813, 2355.988506585576], 
"eval_len": [1000, 426, 1000, 267, 1000, 72, 270, 289, 1000, 1000]}

 80%|███████▉  | 799997/1000000 [9:26:10<1:44:57, 31.76it/s]global step 800000, trans_decision ep_re 1569.621886983054

{"global_step": 800000, "eval_re": [2462.914833554412, 848.3195546663917, 
2262.6532901609394, 2365.7985969493275, 2425.9536008409277, 756.3146021756152, 
1861.0598448771914, 1075.6947083280834, 474.5636385133534, 1162.9461997642995], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 336, 787, 433, 190, 1000]}

 81%|████████  | 809997/1000000 [9:33:10<1:39:06, 31.95it/s]global step 810000, trans_decision ep_re 1528.6387130017984

{"global_step": 810000, "eval_re": [1285.7941742666735, 2362.5437607579884, 
995.2798300897219, 895.0460860624383, 1138.2931692827067, 1052.7126581325897, 
2317.823735090557, 350.6962845260276, 2303.6073517438176, 2584.590080065464], 
"eval_len": [520, 1000, 414, 1000, 1000, 1000, 1000, 172, 1000, 1000]}

 82%|████████▏ | 819997/1000000 [9:40:20<1:32:32, 32.42it/s]global step 820000, trans_decision ep_re 1546.7566002458732

{"global_step": 820000, "eval_re": [610.1522685856678, 2448.3770562729883, 
510.55602228585366, 1040.7548695504722, 748.4428760482238, 1238.8723999891643, 
2321.248249369106, 1551.1019968705023, 2584.6287827585343, 2413.431480728221], 
"eval_len": [248, 1000, 269, 430, 301, 532, 1000, 692, 1000, 1000]}

 83%|████████▎ | 829997/1000000 [9:47:20<1:26:16, 32.84it/s]global step 830000, trans_decision ep_re 1611.7977509047282

{"global_step": 830000, "eval_re": [1760.9289644304965, 688.5882062961169, 
1008.4859727062013, 925.6481276359133, 2268.249286292636, 2372.9264537892013, 
1049.100801448632, 2351.579470495457, 2078.0601392276185, 1614.410086725008], 
"eval_len": [1000, 303, 1000, 415, 1000, 1000, 444, 1000, 1000, 675]}

 84%|████████▍ | 839997/1000000 [9:54:20<1:21:29, 32.72it/s]global step 840000, trans_decision ep_re 1281.8834847062346

{"global_step": 840000, "eval_re": [2167.4303785756806, 31.163112543021047, 
2443.299347387994, 908.7791381201808, 240.61716381696075, 1648.9800315376642, 
251.72566071861849, 592.5282433765021, 2436.3004644700522, 2098.011306515671], 
"eval_len": [956, 33, 1000, 1000, 96, 681, 156, 225, 1000, 1000]}

 85%|████████▍ | 849997/1000000 [10:01:20<1:16:00, 32.89it/s]global step 850000, trans_decision ep_re 1674.9610549825943

{"global_step": 850000, "eval_re": [2304.329369397737, 750.1405840694844, 
1713.3524175116818, 1091.0167279636669, 2323.772010281342, 2254.156947188029, 
1702.45274778214, 1788.5855888307303, 425.87433386872874, 2395.929822932405], 
"eval_len": [1000, 301, 1000, 501, 1000, 1000, 721, 857, 229, 1000]}

 86%|████████▌ | 859997/1000000 [10:08:20<1:11:51, 32.47it/s]global step 860000, trans_decision ep_re 1372.3604803854248

{"global_step": 860000, "eval_re": [507.086053529229, 2170.8306590866146, 
1009.5985009220932, 2377.335697431733, 2407.664753141859, 774.2288837863874, 
946.810719337318, 281.08534699949263, 2415.947577933146, 833.016611686376], 
"eval_len": [219, 1000, 399, 1000, 1000, 306, 444, 1000, 1000, 353]}

 87%|████████▋ | 869997/1000000 [10:15:10<1:06:30, 32.58it/s]global step 870000, trans_decision ep_re 1439.1945176164204

{"global_step": 870000, "eval_re": [441.42007424823714, 266.886481703291, 
2466.7831671050226, 2335.860655078766, 1917.7705431625632, 2811.418199072305, 
1275.2515744611785, 176.78076110691038, 312.2901949965744, 2387.483525229354], 
"eval_len": [207, 104, 1000, 1000, 656, 1000, 531, 95, 128, 1000]}

 88%|████████▊ | 879997/1000000 [10:22:10<1:00:56, 32.82it/s]global step 880000, trans_decision ep_re 1534.2517540905142

{"global_step": 880000, "eval_re": [2712.5038411268456, 701.9189216472598, 
1264.2385729723965, 1721.495271934625, 880.9221976043342, 2618.3907374312184, 
2180.9217886863016, 1483.5473813426922, 688.7491921011047, 1089.8296360583618], 
"eval_len": [1000, 295, 524, 1000, 356, 1000, 1000, 672, 258, 1000]}

 89%|████████▉ | 889997/1000000 [10:29:10<55:47, 32.86it/s]global step 890000, trans_decision ep_re 1244.7745656884692

{"global_step": 890000, "eval_re": [2429.8112540515467, 843.989210052448, 
2179.388657982929, 959.9083042088168, 885.1423390437059, 2656.6536775292316, 
61.74992736724313, 1376.742566087782, 956.2154147245315, 98.144305836458], 
"eval_len": [1000, 302, 1000, 1000, 333, 1000, 42, 550, 367, 52]}

 90%|████████▉ | 899997/1000000 [10:36:00<50:36, 32.93it/s]global step 900000, trans_decision ep_re 1167.8219921178638

{"global_step": 900000, "eval_re": [702.3126258144026, 2471.56624360936, 
102.75226442156881, 1492.8120285027808, 1309.3289912794403, 496.63126842416625, 
587.5352568665589, 2666.7736959967197, 1471.9911720774048, 376.51637418623676], 
"eval_len": [295, 1000, 63, 669, 550, 196, 239, 1000, 652, 130]}

 91%|█████████ | 909997/1000000 [10:42:50<45:40, 32.85it/s]global step 910000, trans_decision ep_re 732.5613799451484

{"global_step": 910000, "eval_re": [47.84676633527091, 1361.9040110612723, 
125.68653026403614, 609.3086757969194, 563.4118533914431, 845.943342372081, 
733.7285433121968, 1663.8177932275125, 475.6683281140327, 898.2979555767182], 
"eval_len": [39, 1000, 58, 256, 242, 349, 331, 622, 201, 320]}

 92%|█████████▏| 919997/1000000 [10:49:40<40:58, 32.55it/s]global step 920000, trans_decision ep_re 1360.7970071123868

{"global_step": 920000, "eval_re": [1524.698219862594, 1519.672578576613, 
2523.1563150840693, 756.7480950246188, 689.157395713421, 88.69293551373771, 
929.6687834124375, 2641.9140224437747, 674.0099647762408, 2260.251760716364], 
"eval_len": [531, 592, 1000, 292, 278, 61, 394, 1000, 326, 1000]}

 93%|█████████▎| 929997/1000000 [10:56:30<35:21, 33.00it/s]global step 930000, trans_decision ep_re 1160.1135187517366

{"global_step": 930000, "eval_re": [149.3848407201902, 1450.3761748620577, 
1635.1119692735365, 555.3407223622422, 2078.696174262267, 1857.7721036557184, 
744.7147209545152, 98.67739427105661, 789.6912340413897, 2241.369853114393], 
"eval_len": [74, 664, 760, 270, 1000, 753, 347, 58, 1000, 939]}

 94%|█████████▍| 939997/1000000 [11:03:30<31:37, 31.62it/s]global step 940000, trans_decision ep_re 2193.719001729336

{"global_step": 940000, "eval_re": [2051.9529902867425, 1736.178022088351, 
2390.7502954012684, 2313.8331362174627, 2400.544318188909, 2579.9452172082338, 
1060.0661388824788, 2358.868760917125, 2563.2768431061563, 2481.774294996636], 
"eval_len": [835, 1000, 984, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 95%|█████████▍| 949997/1000000 [11:10:40<25:30, 32.67it/s]global step 950000, trans_decision ep_re 947.466975154474

{"global_step": 950000, "eval_re": [2212.103533792875, 381.34064399761473, 
599.3750709235358, 1198.2503494427208, 989.6713541674255, 1192.2521727583662, 
382.9578456999548, 1191.4286393010582, 171.22470992905707, 1156.0654315321317], 
"eval_len": [1000, 204, 273, 499, 1000, 1000, 180, 493, 97, 1000]}

 96%|█████████▌| 959997/1000000 [11:17:30<20:29, 32.54it/s]global step 960000, trans_decision ep_re 1539.0691004010764

{"global_step": 960000, "eval_re": [941.678984500663, 76.41545095219094, 
1424.572327063048, 657.9029269062139, 186.50481631955267, 2406.8958475202926, 
2555.119639460514, 2602.955783968062, 2031.7832373744523, 2506.861989945775], 
"eval_len": [376, 48, 565, 298, 100, 929, 1000, 1000, 751, 1000]}

 97%|█████████▋| 969997/1000000 [11:24:30<15:36, 32.05it/s]global step 970000, trans_decision ep_re 1197.6234514973105

{"global_step": 970000, "eval_re": [1166.4319640545048, 1502.6371555562557, 
281.70384741740924, 261.35374095740485, 1451.0657483531143, 1044.7334673332089, 
1391.158082472809, 19.750724508840698, 2462.4620062946974, 2394.937778024859], 
"eval_len": [437, 752, 167, 1000, 512, 436, 590, 23, 1000, 1000]}

 98%|█████████▊| 979997/1000000 [11:31:30<10:27, 31.90it/s]global step 980000, trans_decision ep_re -410.9050295613589

{"global_step": 980000, "eval_re": [-1016.7791984717967, -490.7960040400782, 
-813.3262135781239, -3.8738600107981926, 3.1364572315763777, 4.945168050389436, 
-724.0868506704869, -17.607952140177105, -1069.0110492539561, 
18.349207269861672], "eval_len": [1000, 1000, 1000, 35, 34, 29, 1000, 211, 1000,
1000]}

 99%|█████████▉| 989997/1000000 [11:38:40<05:11, 32.12it/s]global step 990000, trans_decision ep_re 1042.3817736296837

{"global_step": 990000, "eval_re": [1574.6811812952076, 276.02583694290814, 
131.23918497028689, 1469.1394034142243, 825.1663462318184, 1287.1148820621781, 
885.7621864691828, 2239.314512761991, 810.9232397865442, 924.450962362494], 
"eval_len": [729, 125, 63, 1000, 350, 526, 353, 1000, 1000, 1000]}

100%|█████████▉| 999997/1000000 [11:45:40<00:00, 32.62it/s]global step 1000000, trans_decision ep_re 1042.3430836759455

{"global_step": 1000000, "eval_re": [1648.8737120318906, 659.2049110284717, 
861.1756767055576, 1788.5334381528887, 2146.788467800407, 543.2795681511823, 
1716.118878868005, 213.26364744223577, 607.9662250350167, 238.22631154379914], 
"eval_len": [1000, 308, 1000, 1000, 1000, 246, 1000, 97, 311, 139]}

100%|██████████| 1000000/1000000 [11:45:51<00:00, 23.61it/s]
