
{
    'exp_name': 'VDPO',
    'env': 'Ant-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'ExtremeSparseL4U32::markov(4, 32, [[249, 1], [1, 31]])'
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9996/1000000 [03:30<7:51:53, 34.97it/s]global step 10000, trans_decision ep_re 691.0774828251567

{"global_step": 10000, "eval_re": [694.7998938571726, 691.4783449696456, 
695.4084120625054, 706.8573410232847, 678.5178518578393, 693.9357456070286, 
693.7402008847963, 681.3790915529904, 684.0719160118042, 690.5860304245006], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  2%|▏         | 19996/1000000 [10:40<7:47:37, 34.93it/s]global step 20000, trans_decision ep_re 662.9799958359561

{"global_step": 20000, "eval_re": [783.6593295421259, 354.60545346887636, 
37.561474843969414, 879.8640089409703, 764.2948657461711, 891.1639964792221, 
813.9573404828939, 822.1885341397335, 284.940661755252, 997.5642929603453], 
"eval_len": [1000, 442, 40, 1000, 1000, 1000, 1000, 1000, 322, 1000]}

  3%|▎         | 29996/1000000 [17:30<7:41:42, 35.01it/s]global step 30000, trans_decision ep_re 791.4666871670231

{"global_step": 30000, "eval_re": [120.39563833279297, 1034.8659973400343, 
689.8752332534232, 1133.3220616262124, 834.9235359038588, 1060.466128463485, 
800.87942888776, 1098.3367072435133, 1032.3246272311721, 109.27751338797908], 
"eval_len": [104, 1000, 658, 1000, 1000, 1000, 1000, 1000, 1000, 110]}

  4%|▍         | 39996/1000000 [24:30<7:32:44, 35.34it/s]global step 40000, trans_decision ep_re 1104.9982164630364

{"global_step": 40000, "eval_re": [1145.3246511582975, 1102.2842065014086, 
1055.6433879529704, 1151.6482698598845, 1104.3163426846356, 1126.8086244439257, 
1192.8631974472376, 911.9446745133915, 1129.0493845696353, 1130.0994254989778], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  5%|▍         | 49996/1000000 [31:30<7:28:23, 35.31it/s]global step 50000, trans_decision ep_re 1014.9649171529654

{"global_step": 50000, "eval_re": [872.7801751017566, 900.0963179862208, 
1010.0815901616384, 1020.5108479440379, 1052.5791499581644, 1068.8549779199736, 
1111.70920245395, 1067.3916488198338, 1053.2281268291954, 992.417134354884], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  6%|▌         | 59999/1000000 [38:20<7:24:41, 35.23it/s]global step 60000, trans_decision ep_re 732.6397821117127

{"global_step": 60000, "eval_re": [498.6832501127321, 645.6222512103876, 
1032.3204098418057, 1230.0117699461819, 383.3339849715572, 729.8361320783345, 
662.4476837689054, 899.4107587468134, 753.153649793361, 491.5779306470485], 
"eval_len": [380, 557, 867, 1000, 273, 547, 568, 1000, 619, 420]}

  7%|▋         | 69998/1000000 [45:10<7:17:15, 35.45it/s]global step 70000, trans_decision ep_re 703.1071883995052

{"global_step": 70000, "eval_re": [940.2502719896656, 566.499883157053, 
110.9248549126709, 1363.6694408264227, 1071.6550559046527, 1213.386004745813, 
998.6568590213087, 303.92262031280933, 242.58064464836025, 219.52624847629502], 
"eval_len": [740, 632, 90, 1000, 1000, 1000, 1000, 224, 221, 184]}

  8%|▊         | 79996/1000000 [52:00<7:22:51, 34.62it/s]global step 80000, trans_decision ep_re 1105.2132402509121

{"global_step": 80000, "eval_re": [1248.7949686615798, 140.70734948350838, 
1265.538100678735, 1189.3538951739706, 1100.3103503030343, 1191.05850296917, 
1203.1892237879636, 1167.5341769763506, 1215.9793852683988, 1329.6664492064106],
"eval_len": [1000, 142, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  9%|▉         | 89996/1000000 [58:50<7:10:56, 35.19it/s]global step 90000, trans_decision ep_re 1319.1041151040135

{"global_step": 90000, "eval_re": [1075.2422206300737, 1461.3698966807153, 
1286.9933014151234, 1498.966118168536, 1407.7607704205725, 1427.0478520937913, 
903.9699390656078, 1346.0388195543724, 1386.6233112849227, 1397.0289217264208], 
"eval_len": [726, 1000, 1000, 1000, 1000, 1000, 596, 1000, 1000, 1000]}

 10%|▉         | 99999/1000000 [1:06:01<7:06:05, 35.20it/s]global step 100000, trans_decision ep_re 489.7709612135326

{"global_step": 100000, "eval_re": [405.5892350055186, 90.52216511607818, 
66.97137198488348, 205.4649448154597, 101.49700638106825, 1546.7685304277284, 
261.66559888584464, 514.1146932523408, 373.017266886819, 1332.0987993795852], 
"eval_len": [237, 65, 66, 155, 71, 967, 224, 405, 269, 955]}

 11%|█         | 109996/1000000 [1:12:41<7:03:58, 34.99it/s]global step 110000, trans_decision ep_re 558.3793676823724

{"global_step": 110000, "eval_re": [613.9089265419324, 407.3376882005048, 
213.67050935095097, 1076.8239647487721, 186.14190780342653, 1301.547087531402, 
879.6541273937033, 193.01743700276225, 456.5324498185391, 255.15957843173038], 
"eval_len": [418, 244, 159, 803, 153, 956, 1000, 136, 339, 188]}

 12%|█▏        | 119996/1000000 [1:19:21<6:54:01, 35.42it/s]global step 120000, trans_decision ep_re 456.99901433519506

{"global_step": 120000, "eval_re": [194.4451644242843, 186.19138834649704, 
63.21281774201418, 840.8052626055543, 46.92265534604623, 417.2773194382942, 
683.1881656963254, 662.0692589959817, 634.32728783726, 841.5508229196929], 
"eval_len": [192, 132, 57, 650, 58, 427, 434, 1000, 422, 517]}

 13%|█▎        | 129996/1000000 [1:26:01<6:50:50, 35.29it/s]global step 130000, trans_decision ep_re 1425.6691335287228

{"global_step": 130000, "eval_re": [1410.0108230791623, 1675.7678635796842, 
1322.5971760335965, 1648.984129797073, 1446.1151572533615, 1562.1620275141297, 
1690.8623986253046, 1055.1979051407936, 1627.8389877934183, 817.1548664707018], 
"eval_len": [1000, 1000, 959, 1000, 974, 1000, 1000, 705, 982, 464]}

 14%|█▍        | 139996/1000000 [1:32:51<6:45:10, 35.38it/s]global step 140000, trans_decision ep_re 1522.6050003530636

{"global_step": 140000, "eval_re": [602.5497353557886, 1696.164580521344, 
1446.722972188092, 1622.257764049266, 1651.3154310975415, 1737.0075649605917, 
1525.6462999126209, 1745.6294500438094, 1564.421697316048, 1634.3345080855368], 
"eval_len": [378, 1000, 848, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 15%|█▍        | 149996/1000000 [1:39:51<6:41:17, 35.30it/s]global step 150000, trans_decision ep_re 1158.605121730817

{"global_step": 150000, "eval_re": [1479.6550637264918, 1259.8938869256958, 
1840.5324343113841, 1292.5393376600887, 1586.3561022540928, 1566.3635037875279, 
1680.510886227851, 103.07080305011563, 135.98371795127176, 641.1454814136489], 
"eval_len": [1000, 678, 1000, 845, 1000, 1000, 1000, 69, 100, 360]}

 16%|█▌        | 159996/1000000 [1:46:51<6:43:46, 34.67it/s]global step 160000, trans_decision ep_re 1066.2817545587236

{"global_step": 160000, "eval_re": [786.7110968488438, 697.6438110857816, 
1574.90405979944, 2158.9435719119638, 44.31915787203229, 829.3527237947096, 
1467.2334419671568, 1123.72836299009, 1782.6923216352443, 197.2889976819726], 
"eval_len": [370, 1000, 1000, 1000, 47, 470, 754, 610, 847, 117]}

 17%|█▋        | 169996/1000000 [1:53:31<6:35:39, 34.96it/s]global step 170000, trans_decision ep_re 1676.595357947878

{"global_step": 170000, "eval_re": [1669.1609753698604, 1850.8068525685119, 
337.5541386795747, 1817.716238320863, 1956.7604490144597, 1908.9983374976234, 
1729.2030340474896, 1779.0275264460092, 1826.7141577154202, 1890.011869818966], 
"eval_len": [1000, 1000, 197, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 18%|█▊        | 179996/1000000 [2:00:32<6:28:37, 35.17it/s]global step 180000, trans_decision ep_re 1425.0831963258943

{"global_step": 180000, "eval_re": [1427.0834313342652, 1828.193033014695, 
852.9773950126901, 1267.2360372176065, 1976.8645244203067, 759.7258389816936, 
1659.2910883524914, 678.2326681197322, 1798.8356450038298, 2002.392301801632], 
"eval_len": [761, 1000, 548, 598, 1000, 475, 809, 323, 1000, 1000]}

 19%|█▉        | 189996/1000000 [2:07:42<6:28:55, 34.71it/s]global step 190000, trans_decision ep_re 1549.813424022965

{"global_step": 190000, "eval_re": [1832.2334969912577, 1184.865811706337, 
1195.599068793398, 1413.8187766916185, 2036.7371491118226, 1789.111628656097, 
1808.078441567947, 1516.2932729100157, 628.6647339889057, 2092.7318598122497], 
"eval_len": [1000, 1000, 641, 722, 1000, 1000, 1000, 1000, 350, 1000]}

 20%|█▉        | 199996/1000000 [2:14:42<6:17:53, 35.28it/s]global step 200000, trans_decision ep_re 1604.4843967299662

{"global_step": 200000, "eval_re": [2034.3058799624985, 2089.944196914316, 
1954.884994640947, 678.978125277774, 2081.4255471156816, 203.49182655631682, 
2120.218100404831, 2196.9258268501158, 1949.9760368449859, 734.6934327321962], 
"eval_len": [1000, 1000, 1000, 354, 1000, 113, 1000, 1000, 1000, 379]}

 21%|██        | 209996/1000000 [2:21:33<6:14:14, 35.18it/s]global step 210000, trans_decision ep_re 2056.2011453964947

{"global_step": 210000, "eval_re": [2053.8170905373004, 1998.5150995441488, 
2048.7192414399274, 2069.22037759448, 2200.3464284232587, 2143.9328623280035, 
1972.9522290765701, 1950.2565384989307, 1947.0437013132475, 2177.2078852090813],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 22%|██▏       | 219996/1000000 [2:28:33<6:15:45, 34.60it/s]global step 220000, trans_decision ep_re 1961.1694014378932

{"global_step": 220000, "eval_re": [2045.6286941786675, 2086.4470880176445, 
2106.517342582521, 2092.4992373751556, 2065.5427380209244, 2133.4736623827466, 
1531.7283352356944, 1986.0547590084004, 2023.2586842697715, 1540.5434733074067],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 721, 1000, 1000, 694]}

 23%|██▎       | 229996/1000000 [2:35:23<6:06:25, 35.02it/s]global step 230000, trans_decision ep_re 1650.4329730647355

{"global_step": 230000, "eval_re": [1759.2780856418958, 1914.414982686314, 
1865.1167418118248, 1798.7576589319494, 443.69289324292885, 695.5083160807467, 
2019.5212773296446, 2087.004159525342, 2039.7544812150286, 1881.2811341816791], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 414, 1000, 1000, 1000, 1000]}

 24%|██▍       | 239996/1000000 [2:42:13<6:05:25, 34.66it/s]global step 240000, trans_decision ep_re 1825.9102072954659

{"global_step": 240000, "eval_re": [2149.5528166004124, 1283.124584720134, 
2119.6880481496673, 1443.701389273683, 2255.3303693087305, 1410.9884058846205, 
2228.1440591647874, 1143.2345602732155, 2224.7144656714963, 2000.6233739079096],
"eval_len": [1000, 607, 1000, 595, 1000, 758, 1000, 1000, 1000, 880]}

 25%|██▍       | 249996/1000000 [2:49:13<5:52:28, 35.46it/s]global step 250000, trans_decision ep_re 1666.9437635170264

{"global_step": 250000, "eval_re": [1964.3692292501628, 2045.8210530279846, 
2014.4623653652272, 2069.010232218045, 1879.7719469476162, 714.8988908551619, 
1439.3658039348254, 571.232712846915, 2021.9748552789904, 1948.5305454453364], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 378, 759, 386, 1000, 1000]}

 26%|██▌       | 259996/1000000 [2:56:13<5:49:56, 35.24it/s]global step 260000, trans_decision ep_re 1207.3757795922436

{"global_step": 260000, "eval_re": [684.9433558835261, 1938.385948966602, 
169.66315853339106, 1970.7011385225496, 1720.7117684330262, 841.6117051389808, 
910.1715361446762, 1730.663865994061, 1269.1223260339243, 837.7829922716994], 
"eval_len": [360, 1000, 90, 1000, 1000, 404, 475, 1000, 647, 465]}

 27%|██▋       | 269996/1000000 [3:02:53<5:43:44, 35.40it/s]global step 270000, trans_decision ep_re 1700.5984477224924

{"global_step": 270000, "eval_re": [933.0999430095175, 1763.590462327468, 
1918.3080618605738, 1982.5915510886648, 1859.4496443593132, 2060.5472040184127, 
2121.1933947155007, 1191.1648981992419, 1995.0583554959571, 1180.9809621502745],
"eval_len": [491, 1000, 1000, 1000, 1000, 1000, 1000, 571, 1000, 599]}

 28%|██▊       | 279996/1000000 [3:09:53<5:38:01, 35.50it/s]global step 280000, trans_decision ep_re 1376.0255290573166

{"global_step": 280000, "eval_re": [770.8862754018646, 267.6479073597686, 
1769.2752728509793, 1787.1313796368427, 2050.153406902006, 1059.7780156076128, 
2002.6338951488024, 1800.1846007544427, 254.32771442336556, 1998.236822487483], 
"eval_len": [386, 211, 907, 1000, 1000, 570, 1000, 1000, 130, 1000]}

 29%|██▉       | 289996/1000000 [3:16:43<5:35:57, 35.22it/s]global step 290000, trans_decision ep_re 1977.6960833988942

{"global_step": 290000, "eval_re": [1885.2545495960173, 1835.5129026503953, 
1883.2653542777482, 1926.218575503191, 2062.7292507269763, 1987.3152427957598, 
1964.9463721360112, 2025.3052634902842, 2170.254861694718, 2036.1584611178391], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 30%|██▉       | 299996/1000000 [3:23:43<5:30:32, 35.30it/s]global step 300000, trans_decision ep_re 1646.7578827008608

{"global_step": 300000, "eval_re": [2135.2079431127054, 1969.4747664916245, 
1800.2663041793712, 171.86807871187713, 1884.1956618878066, 2137.861825315692, 
1927.593857724712, 462.49776697774513, 2000.0661249269267, 1978.546497680147], 
"eval_len": [1000, 1000, 1000, 112, 1000, 1000, 890, 234, 1000, 1000]}

 31%|███       | 309996/1000000 [3:30:33<5:25:05, 35.38it/s]global step 310000, trans_decision ep_re 1924.080104605447

{"global_step": 310000, "eval_re": [621.2463250431838, 2429.16988182634, 
2161.9762786690026, 2257.7681932134856, 2240.203734435561, 2236.354352474265, 
2155.1991344365647, 1994.3209294060252, 2341.3569765649563, 803.2052399850874], 
"eval_len": [244, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 384]}

 32%|███▏      | 319996/1000000 [3:37:23<5:20:24, 35.37it/s]global step 320000, trans_decision ep_re 1217.571005625116

{"global_step": 320000, "eval_re": [372.04799224397357, 307.54521101333756, 
314.69100662911785, 2045.9712338116597, 1041.0558314561536, 1233.0424304266683, 
1508.8698164214786, 2213.689804844666, 2140.1264900574347, 998.6702393466679], 
"eval_len": [178, 166, 141, 1000, 532, 518, 618, 965, 1000, 448]}

 33%|███▎      | 329996/1000000 [3:44:13<5:16:53, 35.24it/s]global step 330000, trans_decision ep_re 1271.5281286873883

{"global_step": 330000, "eval_re": [713.1822392296436, 2284.8192625310007, 
2124.4298114325825, 1153.7841776235018, 83.2746938409001, 1553.9826919737397, 
221.79188958671904, 873.8300888257827, 1649.0191505387427, 2057.167281291271], 
"eval_len": [425, 1000, 1000, 1000, 87, 684, 120, 370, 801, 1000]}

 34%|███▍      | 339996/1000000 [3:50:53<5:10:54, 35.38it/s]global step 340000, trans_decision ep_re 1161.9398108265236

{"global_step": 340000, "eval_re": [1652.4058389553684, 401.65648428695937, 
667.5382949067243, 1140.3550607237223, 1337.4318664742818, 396.8825864263604, 
2111.5034977874975, 2241.133512133969, 814.8200840480495, 855.6708825223051], 
"eval_len": [786, 195, 343, 460, 603, 242, 1000, 1000, 395, 406]}

 35%|███▍      | 349996/1000000 [3:57:43<5:06:00, 35.40it/s]global step 350000, trans_decision ep_re 875.5354206144935

{"global_step": 350000, "eval_re": [2068.276937125905, 1885.851036910952, 
669.9188410391719, 843.727899522977, 132.28112309496677, 760.5910495058133, 
209.43578044931635, 205.81884053445395, 114.33281077591903, 1865.1198871854601],
"eval_len": [1000, 1000, 336, 332, 77, 372, 108, 127, 69, 956]}

 36%|███▌      | 359996/1000000 [4:04:23<5:01:56, 35.33it/s]global step 360000, trans_decision ep_re 1262.7920591768525

{"global_step": 360000, "eval_re": [376.6614312769133, 2045.1365774306046, 
595.5471861006978, 480.7841399658349, 1941.9868654598406, 2004.014242997775, 
2104.0796305214776, 1462.0559912542683, 145.14134176945305, 1472.5131849916588],
"eval_len": [223, 1000, 283, 218, 1000, 1000, 1000, 719, 108, 742]}

 37%|███▋      | 369996/1000000 [4:11:03<4:57:20, 35.31it/s]global step 370000, trans_decision ep_re 1489.7150345009964

{"global_step": 370000, "eval_re": [1450.086674364406, 758.5593793123302, 
1933.5607195045138, 1883.758564987004, 1795.446343151325, 2104.609975686233, 
1063.805304847079, 1438.5821886400306, 1988.2302987183382, 480.51089579870325], 
"eval_len": [1000, 346, 1000, 1000, 881, 1000, 490, 745, 1000, 248]}

 38%|███▊      | 379999/1000000 [4:17:53<4:52:47, 35.29it/s]global step 380000, trans_decision ep_re 2266.987021386034

{"global_step": 380000, "eval_re": [2289.7732798267543, 2374.840386263574, 
2265.9780631293374, 2155.735138569013, 2237.652878289552, 2469.71823180992, 
2164.6989642549647, 2208.5622457895647, 2281.129220111857, 2221.7818058158064], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 986, 1000, 1000]}

 39%|███▉      | 389996/1000000 [4:24:53<4:49:12, 35.15it/s]global step 390000, trans_decision ep_re 1775.8764050378468

{"global_step": 390000, "eval_re": [1472.044003088732, 2222.513248881193, 
2089.79466893689, 1876.5160429261234, 1077.0140602264546, 2142.920732289828, 
1740.8924381869988, 1117.6128456504664, 2064.5879256792045, 1954.8680845125778],
"eval_len": [690, 1000, 1000, 1000, 621, 1000, 901, 509, 1000, 1000]}

 40%|███▉      | 399996/1000000 [4:31:53<4:41:32, 35.52it/s]global step 400000, trans_decision ep_re 259.46084615236543

{"global_step": 400000, "eval_re": [493.11821414214864, 550.8663163998124, 
-464.39406718844305, 576.3536504598752, -106.53875071254907, 
-246.76996264324683, 580.0441976574482, 686.4914769724489, 435.27184172400194, 
90.16554471215787], "eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000,
1000, 89]}

 41%|████      | 409996/1000000 [4:38:43<4:38:03, 35.37it/s]global step 410000, trans_decision ep_re 1293.4650100803185

{"global_step": 410000, "eval_re": [1605.496024441028, 203.65149065254164, 
2006.156116418467, 1346.185838615122, 383.50545291006773, 161.08309773584594, 
1408.1784789288688, 1959.1178066979896, 2101.4034938202367, 1759.8723005830163],
"eval_len": [1000, 187, 1000, 1000, 194, 94, 673, 1000, 1000, 858]}

 42%|████▏     | 419996/1000000 [4:45:33<4:34:14, 35.25it/s]global step 420000, trans_decision ep_re 2063.3603825052915

{"global_step": 420000, "eval_re": [2221.7772362195133, 2033.1526605963356, 
1092.702107278518, 2231.877469532227, 1958.9793317545136, 2229.6152321964246, 
2253.4455351384004, 2211.160716333438, 2050.7816247151186, 2350.111911288421], 
"eval_len": [996, 1000, 505, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 43%|████▎     | 429996/1000000 [4:52:33<4:34:02, 34.67it/s]global step 430000, trans_decision ep_re 1516.0626583677217

{"global_step": 430000, "eval_re": [1044.467483071124, 2129.390235602077, 
2190.754950665644, 1962.9416442339132, 818.4441138697159, 2255.259687509815, 
512.6773946051713, 1841.410591467796, 644.1285363503197, 1761.151946301642], 
"eval_len": [508, 1000, 1000, 1000, 484, 1000, 1000, 1000, 339, 902]}

 44%|████▍     | 439996/1000000 [4:59:23<4:24:33, 35.28it/s]global step 440000, trans_decision ep_re 1812.623304597968

{"global_step": 440000, "eval_re": [2287.1806598154735, 1407.2553347297808, 
2201.2327485520673, 2168.8294585707463, 661.8069490276163, 614.3281382918498, 
2392.4137726572344, 2226.5707015047037, 2053.38537528438, 2113.2299075458254], 
"eval_len": [1000, 622, 1000, 1000, 327, 275, 1000, 1000, 1000, 1000]}

 45%|████▍     | 449996/1000000 [5:06:13<4:22:53, 34.87it/s]global step 450000, trans_decision ep_re 1197.8783961275667

{"global_step": 450000, "eval_re": [570.3181344891334, 937.2360108011384, 
1936.9001098582924, 410.578536370637, 2139.860901966222, 2444.191324486263, 
330.24164826298033, 2322.7191519577823, 133.6150205202143, 753.1231225630057], 
"eval_len": [276, 1000, 1000, 180, 1000, 1000, 180, 1000, 83, 330]}

 46%|████▌     | 459996/1000000 [5:13:03<4:14:52, 35.31it/s]global step 460000, trans_decision ep_re 1683.4973482371418

{"global_step": 460000, "eval_re": [1946.4829693973784, 754.1402875224941, 
2344.14596049362, 2107.22879164555, 2219.9870144368115, 2239.2239999477597, 
745.7483650928648, 2376.264201912611, 1204.4886735233995, 897.2632183989293], 
"eval_len": [1000, 419, 1000, 1000, 1000, 1000, 320, 1000, 540, 430]}

 47%|████▋     | 469996/1000000 [5:19:53<4:14:59, 34.64it/s]global step 470000, trans_decision ep_re 1782.8131573751111

{"global_step": 470000, "eval_re": [760.9614291112786, 2331.96553812614, 
2087.868877199805, 1339.4670895870634, 1754.3507250317814, 806.8616806066639, 
2282.4441340480557, 2190.048346866785, 2171.975409872533, 2102.1883433010053], 
"eval_len": [389, 1000, 1000, 609, 1000, 365, 1000, 1000, 1000, 1000]}

 48%|████▊     | 479996/1000000 [5:26:43<4:01:39, 35.86it/s]global step 480000, trans_decision ep_re 1882.4986233830064

{"global_step": 480000, "eval_re": [1838.3671891187787, 2199.816348675355, 
1980.7030748524749, 2196.3983836343514, 1920.8406808899574, 590.7000674773518, 
1957.0806263159052, 2077.98986273586, 2025.0208246840757, 2038.0691754459517], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 346, 1000, 1000, 950, 1000]}

 49%|████▉     | 489999/1000000 [5:33:33<3:58:20, 35.66it/s]global step 490000, trans_decision ep_re 1729.692161288281

{"global_step": 490000, "eval_re": [2359.3128314517653, 2247.190522159433, 
1969.1751876885498, 432.0510493496389, 2102.4961903779454, 2128.894479811807, 
330.6821735584266, 1710.9085679992238, 2218.164334146301, 1798.0462763397202], 
"eval_len": [1000, 1000, 1000, 247, 1000, 1000, 171, 854, 1000, 880]}

 50%|████▉     | 499996/1000000 [5:40:13<3:51:52, 35.94it/s]global step 500000, trans_decision ep_re 1177.495637050942

{"global_step": 500000, "eval_re": [202.5348555271076, 470.8021269784771, 
350.89493990749185, 2134.8246399913746, 1477.861227838769, 436.4400088356534, 
2015.3001772446219, 291.13085163387734, 2143.9321812343046, 2251.235361317742], 
"eval_len": [112, 247, 159, 1000, 623, 1000, 1000, 145, 966, 1000]}

 51%|█████     | 509996/1000000 [5:46:53<3:47:39, 35.87it/s]global step 510000, trans_decision ep_re 1594.5798712533326

{"global_step": 510000, "eval_re": [2133.5521287946176, 1037.4661261310277, 
2376.902701070878, 2072.797845895177, 1339.55495277253, 139.8990189954099, 
2342.2191868326636, 2272.132971469874, 228.55950864007053, 2002.714271931079], 
"eval_len": [1000, 432, 1000, 1000, 617, 88, 1000, 1000, 142, 1000]}

 52%|█████▏    | 519996/1000000 [5:53:33<3:47:20, 35.19it/s]global step 520000, trans_decision ep_re 1049.6080148934004

{"global_step": 520000, "eval_re": [1605.1322970139192, 329.46194140351054, 
267.133389864515, 1105.0762946838045, 592.783072497442, 426.4804464528906, 
1918.9047589092966, 1687.1998938145343, 649.8674618401518, 1914.0405924539416], 
"eval_len": [795, 162, 132, 632, 317, 196, 1000, 1000, 374, 1000]}

 53%|█████▎    | 529996/1000000 [6:00:23<3:41:34, 35.35it/s]global step 530000, trans_decision ep_re 1859.2278202747766

{"global_step": 530000, "eval_re": [1878.609313995958, 1766.320350173575, 
1234.1419914577486, 2023.87348651778, 1919.4134919576225, 1682.3425468654716, 
1850.9184348308181, 2170.639433344827, 1795.7054535220664, 2270.3137000819006], 
"eval_len": [1000, 1000, 590, 1000, 1000, 1000, 1000, 1000, 948, 1000]}

 54%|█████▍    | 539996/1000000 [6:07:23<3:37:27, 35.26it/s]global step 540000, trans_decision ep_re 1807.7487997952753

{"global_step": 540000, "eval_re": [1953.0198211302954, 2200.3094219179384, 
1913.7249890917658, 2181.5590980478164, 2167.8225531390117, 962.5066012936555, 
1929.5635403741403, 1737.9336520972524, 2267.466000821956, 763.582320038921], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 424, 1000, 788, 1000, 387]}

 55%|█████▍    | 549996/1000000 [6:14:13<3:31:44, 35.42it/s]global step 550000, trans_decision ep_re 1905.383483408847

{"global_step": 550000, "eval_re": [2294.548926290057, 2055.7316970231113, 
2215.0750755271324, 2247.8651974583104, 2567.470678391544, 1235.7675906944105, 
1312.8973538810585, 2397.3261358952636, 461.86425685692495, 2265.2879220706554],
"eval_len": [1000, 1000, 1000, 1000, 1000, 622, 1000, 1000, 255, 1000]}

 56%|█████▌    | 559996/1000000 [6:21:03<3:28:30, 35.17it/s]global step 560000, trans_decision ep_re 1441.1379377751243

{"global_step": 560000, "eval_re": [1872.0754508432092, 309.70930515316155, 
1920.4083271755567, 102.75222845543726, 1804.7133800998733, 2074.726433874766, 
2638.617507907101, 1913.7063915568187, 740.2594846965079, 1034.4108679888125], 
"eval_len": [1000, 158, 1000, 81, 1000, 1000, 1000, 856, 371, 1000]}

 57%|█████▋    | 569996/1000000 [6:28:03<3:23:37, 35.20it/s]global step 570000, trans_decision ep_re 1814.7421766066789

{"global_step": 570000, "eval_re": [2179.6330977502184, 2238.311521288987, 
2287.6856098757144, 1399.7307054889707, 356.1177436311863, 2248.8498226996467, 
2286.759986365715, 1967.874445234181, 1051.928904279496, 2130.5299294526744], 
"eval_len": [1000, 1000, 1000, 1000, 176, 1000, 1000, 1000, 519, 1000]}

 58%|█████▊    | 579999/1000000 [6:34:53<3:19:02, 35.17it/s]global step 580000, trans_decision ep_re 1848.3083020756662

{"global_step": 580000, "eval_re": [2056.907500215968, 1753.7800352950544, 
2019.7380034028624, 2304.0689021581793, 1409.3504313105425, 2400.303821622852, 
2115.128829092511, 158.08922070390682, 2130.5795868209843, 2135.136690133801], 
"eval_len": [1000, 1000, 1000, 1000, 674, 1000, 1000, 1000, 1000, 1000]}

 59%|█████▉    | 589996/1000000 [6:41:53<3:14:13, 35.18it/s]global step 590000, trans_decision ep_re 1708.6972388361894

{"global_step": 590000, "eval_re": [2159.7110722465254, 227.07710369655842, 
824.1271184275935, 2205.729686865386, 2237.532712005451, 2121.4842597805487, 
2083.7470236307836, 2198.2841366115695, 974.8494622792593, 2054.429812818217], 
"eval_len": [1000, 124, 450, 1000, 1000, 1000, 1000, 1000, 439, 1000]}

 60%|█████▉    | 599996/1000000 [6:48:43<3:08:49, 35.31it/s]global step 600000, trans_decision ep_re 1568.9480387375656

{"global_step": 600000, "eval_re": [2006.7763588326748, 2067.27304096731, 
2470.276473763309, 817.8368888160885, 840.4837958706723, 2122.6468861155204, 
185.80955357223613, 2270.4325203823755, 2217.6662490646813, 690.2786199907887], 
"eval_len": [877, 1000, 1000, 1000, 397, 1000, 112, 1000, 1000, 343]}

 61%|██████    | 609996/1000000 [6:55:33<3:03:58, 35.33it/s]global step 610000, trans_decision ep_re 1704.110116874865

{"global_step": 610000, "eval_re": [1922.7001172147454, 452.8720456403337, 
2135.3986155258503, 1742.0312389019066, 645.2964851395847, 2188.5654861206754, 
2209.620331435195, 2220.7564618003976, 2129.571845566802, 1394.2885414031593], 
"eval_len": [1000, 213, 1000, 795, 310, 1000, 1000, 1000, 966, 652]}

 62%|██████▏   | 619996/1000000 [7:02:23<3:02:59, 34.61it/s]global step 620000, trans_decision ep_re 1569.4967888511112

{"global_step": 620000, "eval_re": [2137.67259991416, 2113.422859514645, 
857.7776092354662, 2128.45236745187, 2330.0832438304205, 2149.368222961431, 
1672.0505455174773, 265.17134665532024, 173.28999401961843, 1867.6790994107007],
"eval_len": [1000, 1000, 387, 1000, 1000, 1000, 769, 167, 94, 1000]}

 63%|██████▎   | 629996/1000000 [7:09:13<2:55:00, 35.24it/s]global step 630000, trans_decision ep_re 1585.7469906124054

{"global_step": 630000, "eval_re": [912.262185308205, 1951.1976227391933, 
1744.9039096875204, 2101.5736747057795, 1986.0878551472083, 2094.4652307063343, 
1966.6690622594456, 95.17284402635539, 1182.3784922720638, 1822.7590292719497], 
"eval_len": [1000, 1000, 1000, 999, 991, 1000, 1000, 81, 594, 965]}

 64%|██████▍   | 639996/1000000 [7:16:13<2:50:51, 35.12it/s]global step 640000, trans_decision ep_re 1513.0054205125812

{"global_step": 640000, "eval_re": [2187.47124949104, 2266.2709547862073, 
1589.4528863426822, 1870.2430843971817, 1987.96526626617, 2119.340891111632, 
526.8401183513073, 217.87843037104975, 440.6847643186127, 1923.9065596899284], 
"eval_len": [1000, 1000, 801, 829, 1000, 1000, 279, 114, 231, 895]}

 65%|██████▍   | 649996/1000000 [7:22:53<2:45:23, 35.27it/s]global step 650000, trans_decision ep_re 1909.1626413973693

{"global_step": 650000, "eval_re": [1765.4689233556778, 2207.3317263506847, 
1503.3621540228692, 2233.9702272328163, 1973.9483456060173, 2226.4513816267417, 
2270.8220643372615, 2341.4913942935473, 1751.8940754473817, 816.8861217006964], 
"eval_len": [804, 1000, 702, 1000, 868, 1000, 1000, 1000, 731, 385]}

 66%|██████▌   | 659996/1000000 [7:29:53<2:41:15, 35.14it/s]global step 660000, trans_decision ep_re 1380.7148855152025

{"global_step": 660000, "eval_re": [1868.4512584384915, 318.3545879063562, 
2044.5733852851527, 2043.0169029632625, 2106.9082185216826, 1525.4518483871022, 
644.5621320982518, 987.3466354299889, 1801.5026638234838, 466.98122229825515], 
"eval_len": [1000, 158, 1000, 968, 1000, 768, 325, 1000, 1000, 270]}

 67%|██████▋   | 669996/1000000 [7:36:43<2:35:45, 35.31it/s]global step 670000, trans_decision ep_re 1553.5664737623688

{"global_step": 670000, "eval_re": [663.335415105281, 1187.4193689420722, 
2165.9132422687976, 1811.3997018431185, 1236.5771868904358, 181.3884388128638, 
1945.7322347848867, 2261.3958129192097, 2053.6665271062307, 2028.836808950793], 
"eval_len": [302, 1000, 1000, 1000, 1000, 118, 1000, 1000, 1000, 1000]}

 68%|██████▊   | 679996/1000000 [7:43:33<2:31:42, 35.16it/s]global step 680000, trans_decision ep_re 1304.4715009980355

{"global_step": 680000, "eval_re": [145.56714342802195, 2108.2264389621714, 
1350.3449301230255, 2069.934571631671, 2126.569974287605, 497.56895400324834, 
170.2052458151471, 2197.9390953448647, 1812.1100174624244, 566.2486389221754], 
"eval_len": [94, 1000, 665, 1000, 1000, 204, 125, 1000, 1000, 274]}

 69%|██████▉   | 689996/1000000 [7:50:23<2:29:43, 34.51it/s]global step 690000, trans_decision ep_re 1563.4414361157403

{"global_step": 690000, "eval_re": [1795.9338516647563, 1343.651799394799, 
2006.1634659604329, 1597.5874341204922, 331.9254456924755, 2209.80538973949, 
1903.7223864965472, 211.98702340948336, 2210.8517052360257, 2022.7858594429024],
"eval_len": [823, 1000, 1000, 1000, 209, 1000, 943, 129, 1000, 1000]}

 70%|██████▉   | 699996/1000000 [7:57:13<2:21:32, 35.33it/s]global step 700000, trans_decision ep_re 1530.288457414666

{"global_step": 700000, "eval_re": [2045.3658521255688, 2119.099161366985, 
2072.1859233965256, 456.45883795129555, 2114.649809401066, 1024.9416618061605, 
1955.0407334570896, 435.5526939132512, 1945.542225711259, 1134.0476750174578], 
"eval_len": [1000, 1000, 1000, 234, 1000, 535, 1000, 214, 1000, 1000]}

 71%|███████   | 709996/1000000 [8:04:03<2:17:58, 35.03it/s]global step 710000, trans_decision ep_re 1054.4625438219546

{"global_step": 710000, "eval_re": [599.9797175512002, 1185.9375358067346, 
2042.5382305992862, 859.4733215249267, 2079.3169338513558, 142.32587605386493, 
834.3947788455239, 439.0223559913456, 2082.3554807388878, 279.28120725641867], 
"eval_len": [254, 1000, 1000, 1000, 1000, 93, 425, 199, 1000, 173]}

 72%|███████▏  | 719996/1000000 [8:10:53<2:14:22, 34.73it/s]global step 720000, trans_decision ep_re 818.4342697034252

{"global_step": 720000, "eval_re": [506.7396473538042, 184.74756671396233, 
171.3067634197907, 2163.132114019641, 1908.0801451510088, 1013.0435831343916, 
282.17288718107403, 657.4511661554596, 884.8077334128595, 412.86109049226104], 
"eval_len": [266, 89, 92, 1000, 1000, 520, 159, 362, 367, 195]}

 73%|███████▎  | 729996/1000000 [8:17:23<2:05:08, 35.96it/s]global step 730000, trans_decision ep_re 1383.8452720451796

{"global_step": 730000, "eval_re": [276.16824211975785, 1222.3310263979026, 
136.0138556833202, 1864.2810111669444, 1974.7848460772664, 1421.781138300178, 
2003.689981879468, 2179.535833748321, 1612.774865237186, 1147.091919841452], 
"eval_len": [186, 705, 93, 1000, 1000, 1000, 1000, 1000, 857, 609]}

 74%|███████▍  | 739997/1000000 [8:24:23<2:06:42, 34.20it/s]global step 740000, trans_decision ep_re 1632.1481696674473

{"global_step": 740000, "eval_re": [2008.6787891063714, 2077.478812829143, 
1273.1994820427037, 2196.001684937448, 1896.4069790424062, 2055.4489008633745, 
1091.4349458125198, 394.30809192473635, 1956.3234674188166, 1372.200542696951], 
"eval_len": [1000, 1000, 600, 1000, 1000, 1000, 549, 208, 871, 602]}

 75%|███████▍  | 749999/1000000 [8:31:43<2:00:59, 34.44it/s]global step 750000, trans_decision ep_re 1255.212119674623

{"global_step": 750000, "eval_re": [337.8885066393997, 711.9512407815472, 
2004.912791793988, 1976.747061620802, 1248.2998409999846, 1321.342440081901, 
1993.3346864599853, 711.4453273133738, 2058.2831894254527, 187.9161116297946], 
"eval_len": [154, 459, 1000, 1000, 1000, 798, 1000, 337, 967, 108]}

 76%|███████▌  | 759997/1000000 [8:38:53<1:53:35, 35.21it/s]global step 760000, trans_decision ep_re 1394.1563635434081

{"global_step": 760000, "eval_re": [2008.9193303763784, 1819.8516668132818, 
1508.835502055044, 204.797850238342, 1947.1614683259484, 175.7842882889976, 
1665.210239448135, 639.3483192916278, 2119.3523832477663, 1852.3025873485615], 
"eval_len": [1000, 830, 1000, 111, 1000, 102, 850, 485, 1000, 1000]}

 77%|███████▋  | 769997/1000000 [8:46:13<2:05:58, 30.43it/s]global step 770000, trans_decision ep_re 2107.5581812596197

{"global_step": 770000, "eval_re": [2250.3924002256995, 2082.3682807549076, 
2030.3164859218332, 2279.4327468522756, 2247.704398835696, 1695.4926795876972, 
2324.9284166327307, 1910.2106088945454, 2048.484957876426, 2206.250837014386], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 745, 1000, 1000, 1000, 1000]}

 78%|███████▊  | 779999/1000000 [8:53:23<1:47:10, 34.21it/s]global step 780000, trans_decision ep_re 1831.148697425019

{"global_step": 780000, "eval_re": [1932.00336968546, 1434.4925655312804, 
1950.806620976225, 2068.485312282824, 2078.8077000378644, 1864.4623908371734, 
1856.4120175905562, 1520.190624756679, 1842.6184173389995, 1763.207955213129], 
"eval_len": [1000, 653, 1000, 1000, 1000, 1000, 1000, 711, 1000, 1000]}

 79%|███████▉  | 789999/1000000 [9:00:33<1:38:46, 35.44it/s]global step 790000, trans_decision ep_re 1486.4890822273117

{"global_step": 790000, "eval_re": [2108.918894106372, 1994.1654803428162, 
879.6654861342216, 813.2877848068922, 213.2558615029554, 2070.4193774742002, 
2214.1153433563513, 1242.2133834936947, 2162.3373338381143, 1166.5118772174992],
"eval_len": [1000, 1000, 406, 1000, 132, 1000, 1000, 1000, 1000, 597]}

 80%|███████▉  | 799996/1000000 [9:07:23<1:33:50, 35.52it/s]global step 800000, trans_decision ep_re 1799.2323976742955

{"global_step": 800000, "eval_re": [1966.58630461657, 879.2421898017366, 
1830.001068132987, 1813.6403443803315, 2105.3330671347703, 1745.6387883051093, 
2284.641514275111, 1947.2075157978807, 1916.8097988254556, 1503.2233854730052], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 879, 1000]}

 81%|████████  | 809997/1000000 [9:14:13<1:34:02, 33.67it/s]global step 810000, trans_decision ep_re 1858.8788924407268

{"global_step": 810000, "eval_re": [1958.9865254623262, 2094.763055264635, 
2004.360901649403, 2053.283407048842, 1755.4065638345826, 736.7123343136634, 
1916.293161443195, 2213.9951356144184, 1866.079893072169, 1988.9079467040335], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 353, 1000, 1000, 1000, 1000]}

 82%|████████▏ | 819997/1000000 [9:21:43<1:27:53, 34.13it/s]global step 820000, trans_decision ep_re 1577.900716146151

{"global_step": 820000, "eval_re": [1977.7826465936344, 362.18151659082787, 
1404.2905964947477, 1854.4587074732974, 294.86010598548705, 2092.588080819043, 
2099.3560788559676, 1957.4242527667425, 1865.515025039208, 1870.5501508425518], 
"eval_len": [1000, 212, 1000, 1000, 186, 1000, 1000, 1000, 1000, 1000]}

 83%|████████▎ | 829997/1000000 [9:28:53<1:23:29, 33.94it/s]global step 830000, trans_decision ep_re 1775.8910130844743

{"global_step": 830000, "eval_re": [334.0977753657272, 1994.0256008013557, 
2144.5828709987213, 981.3686485194212, 2142.17985565946, 2074.3677265703227, 
2237.309254060064, 1362.5174187894306, 2279.026202249355, 2209.4347778308825], 
"eval_len": [179, 1000, 1000, 468, 1000, 1000, 1000, 642, 1000, 1000]}

 84%|████████▍ | 839997/1000000 [9:36:13<1:18:06, 34.14it/s]global step 840000, trans_decision ep_re 1104.5161167552083

{"global_step": 840000, "eval_re": [858.1833855834902, 180.4280201673671, 
2130.1279831646566, 1653.8703428034403, 433.9350067880729, 492.1201275978834, 
1868.585725900393, 119.2157557638342, 2083.943264206822, 1224.751555576124], 
"eval_len": [380, 90, 1000, 744, 250, 236, 816, 76, 1000, 1000]}

 85%|████████▍ | 849999/1000000 [9:43:23<1:09:39, 35.89it/s]global step 850000, trans_decision ep_re 1518.9079965159783

{"global_step": 850000, "eval_re": [134.96387793036482, 1741.4133770420349, 
83.45042162954849, 1514.172305945639, 1373.5048730657852, 2082.9089879418516, 
2127.3641414524877, 2098.9802320645076, 2206.7529108497824, 1825.5688372377808],
"eval_len": [81, 923, 59, 1000, 577, 1000, 1000, 940, 1000, 1000]}

 86%|████████▌ | 859997/1000000 [9:50:43<1:08:48, 33.91it/s]global step 860000, trans_decision ep_re 1505.703183538222

{"global_step": 860000, "eval_re": [1140.8373338531708, 2101.4263604005305, 
1072.6092713097871, 2071.993666310735, 96.60788601202437, 1776.7567578553717, 
2279.975855826483, 1978.1481567490052, 355.86082158098003, 2182.815725484134], 
"eval_len": [526, 1000, 497, 1000, 68, 1000, 1000, 1000, 188, 1000]}

 87%|████████▋ | 869999/1000000 [9:57:53<1:04:26, 33.62it/s]global step 870000, trans_decision ep_re 1125.1062323695128

{"global_step": 870000, "eval_re": [1980.705059454822, 30.984009617772603, 
237.81891234169004, 1974.5286690722894, 152.00857399549014, 876.6723297904434, 
1801.3609208099058, 1584.2526086513699, 1772.3058715568766, 840.4253684044646], 
"eval_len": [1000, 42, 145, 1000, 110, 381, 1000, 861, 1000, 386]}

 88%|████████▊ | 879997/1000000 [10:04:53<55:47, 35.85it/s]global step 880000, trans_decision ep_re 1927.8836646003415

{"global_step": 880000, "eval_re": [1446.5792548569557, 2179.719504981843, 
2126.8383810720097, 2108.1133785601387, 2187.4721835166833, 2061.8665813907096, 
2045.173921837605, 2131.3711009729986, 2101.8896507695813, 889.8126880448893], 
"eval_len": [723, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 466]}

 89%|████████▉ | 889999/1000000 [10:12:13<1:03:04, 29.06it/s]global step 890000, trans_decision ep_re 1624.641158556501

{"global_step": 890000, "eval_re": [2115.9595823208097, 1919.2274984872715, 
2180.841541674214, 1941.5221707074525, 2038.510481535664, 1266.5418776160463, 
522.7210975103287, 50.42040152248767, 2161.6064516375764, 2049.060482553161], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 62, 1000, 1000]}

 90%|████████▉ | 899999/1000000 [10:19:33<46:21, 35.95it/s]global step 900000, trans_decision ep_re 1860.5078055031493

{"global_step": 900000, "eval_re": [415.28888452590525, 2187.817708498547, 
2228.376039084363, 2198.9675443454803, 1696.2328129199138, 995.9846125130671, 
2135.56517052605, 2226.7281076214094, 2277.8776196508347, 2242.23955534592], 
"eval_len": [213, 1000, 1000, 1000, 777, 465, 1000, 1000, 1000, 1000]}

 91%|█████████ | 909999/1000000 [10:26:53<1:30:04, 16.65it/s]global step 910000, trans_decision ep_re 1135.542046350867

{"global_step": 910000, "eval_re": [649.8124853486603, 1925.5349726287043, 
1777.4558744935025, 1063.4297892660163, 1932.3621083610967, 296.2657526585312, 
490.4875811862443, 776.787526320108, 759.8066269176131, 1683.4777463281935], 
"eval_len": [347, 1000, 1000, 580, 1000, 198, 262, 1000, 384, 1000]}

 92%|█████████▏| 919999/1000000 [10:34:03<37:26, 35.62it/s]global step 920000, trans_decision ep_re 1506.1413520392325

{"global_step": 920000, "eval_re": [713.1168513478106, 727.4613120822197, 
2259.048293571711, 2159.186331642454, 1035.8430508094589, 1744.8240619466833, 
2278.1514068694437, 791.9588356918616, 1124.966878116454, 2226.856498314228], 
"eval_len": [398, 333, 1000, 1000, 501, 850, 1000, 1000, 1000, 1000]}

 93%|█████████▎| 929997/1000000 [10:41:13<32:40, 35.71it/s]global step 930000, trans_decision ep_re 1521.4993884955802

{"global_step": 930000, "eval_re": [1125.1445595146224, 520.2991762526627, 
2101.365507773849, 2155.0928254847354, 1280.84947271156, 936.6499750776699, 
2174.427725370063, 498.3242720868661, 2283.0211862552705, 2139.8191844285025], 
"eval_len": [1000, 298, 1000, 1000, 737, 1000, 1000, 261, 1000, 1000]}

 94%|█████████▍| 939999/1000000 [10:52:46<33:51, 29.54it/s]global step 940000, trans_decision ep_re 2098.1196053327426

{"global_step": 940000, "eval_re": [2069.621960878404, 2058.556229076501, 
2092.2126346468262, 2133.4937676701397, 2190.8693659931123, 2089.7498266445314, 
1937.3283314916023, 2148.2249935445957, 2070.317653876804, 2190.821289504909], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 95%|█████████▍| 949996/1000000 [10:59:36<23:10, 35.97it/s]global step 950000, trans_decision ep_re 1640.4194209653874

{"global_step": 950000, "eval_re": [2094.608941745754, 2129.0076710923727, 
1977.265964435681, 2012.819277755882, 822.5292053540859, 2136.257496737892, 
330.05922661251105, 1633.8939478562654, 1167.7987287700876, 2099.9537492933446],
"eval_len": [1000, 1000, 1000, 1000, 408, 1000, 184, 782, 1000, 1000]}

 96%|█████████▌| 959996/1000000 [11:06:16<18:34, 35.88it/s]global step 960000, trans_decision ep_re 1511.4845625436271

{"global_step": 960000, "eval_re": [108.36552564633698, 665.6324610171762, 
556.3056780613131, 2130.910073326483, 1553.076358133719, 2045.9115805150261, 
1752.9707174908012, 2088.2340749051127, 2116.070963545947, 2097.3681927943585], 
"eval_len": [65, 336, 302, 1000, 1000, 1000, 831, 1000, 1000, 1000]}

 97%|█████████▋| 969996/1000000 [11:12:56<13:52, 36.04it/s]global step 970000, trans_decision ep_re 1526.2630660818606

{"global_step": 970000, "eval_re": [1318.8433997276404, 1799.4793008728197, 
1835.7945237082758, 1305.6609562753692, 361.56196507373795, 1990.3814739035886, 
1179.56891912022, 1986.3718750549906, 1411.3103563129996, 2073.657890768966], 
"eval_len": [1000, 1000, 1000, 1000, 221, 1000, 1000, 1000, 1000, 1000]}

 98%|█████████▊| 979996/1000000 [11:19:46<09:16, 35.93it/s]global step 980000, trans_decision ep_re 1319.8478733442296

{"global_step": 980000, "eval_re": [1998.3454297150352, 217.30582124617686, 
2003.248672807249, 1100.7623989755414, 138.21272720393483, 1851.8036446637154, 
2013.3250536228265, 2064.0615638274135, 1374.106712478568, 437.30670890183734], 
"eval_len": [1000, 111, 1000, 1000, 85, 1000, 1000, 1000, 775, 203]}

 99%|█████████▉| 989996/1000000 [11:26:26<04:37, 36.09it/s]global step 990000, trans_decision ep_re 1681.2689901082529

{"global_step": 990000, "eval_re": [843.8152232026417, 1480.1779690198837, 
2132.606870731653, 1953.5643440962447, 2044.807538015062, 2044.805855384168, 
2191.929102627525, 2245.6453105662986, 904.7281906774048, 970.6094967616474], 
"eval_len": [1000, 1000, 1000, 938, 1000, 1000, 1000, 1000, 507, 442]}

100%|█████████▉| 999996/1000000 [11:33:06<00:00, 35.98it/s]global step 1000000, trans_decision ep_re 1023.8510458244427

{"global_step": 1000000, "eval_re": [2008.0669525308151, 799.7757012742429, 
1865.9420072542218, 389.1895893907383, 1970.6665997360074, 245.96377681998348, 
286.9461230936414, 1167.8418409382944, 69.98767384725102, 1434.1301933592324], 
"eval_len": [1000, 430, 1000, 224, 1000, 139, 182, 1000, 58, 1000]}

100%|██████████| 1000000/1000000 [11:33:23<00:00, 24.04it/s]
