
{
    'exp_name': 'VDPO',
    'env': 'Ant-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'ExtremeClogL1U23::markov(ord(15,1), ord(3,5,3,shift=22), 
[[124, 1], [1, 19]])'
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9999/1000000 [03:20<7:07:56, 38.56it/s]global step 10000, trans_decision ep_re 523.1862503590484

{"global_step": 10000, "eval_re": [505.1436445146354, 507.8704121379906, 
533.8314572746932, 477.30533989703264, 512.6668195160788, 551.8270973759392, 
508.83623927331706, 531.1199786534247, 539.6114517109029, 563.6500632364698], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  2%|▏         | 19999/1000000 [09:50<7:02:21, 38.67it/s]global step 20000, trans_decision ep_re 459.3426538413325

{"global_step": 20000, "eval_re": [257.03400813727234, 67.60443730650967, 
658.6276887180143, 474.2865812894482, 623.0696067861178, 589.5126812047155, 
528.8651696087112, 243.56924925598665, 675.6427928766575, 475.2143232298909], 
"eval_len": [326, 113, 886, 1000, 1000, 1000, 921, 368, 1000, 817]}

  3%|▎         | 29996/1000000 [16:00<6:57:08, 38.76it/s]global step 30000, trans_decision ep_re 694.8155691802116

{"global_step": 30000, "eval_re": [881.2306672956381, 858.8238940271742, 
822.7219058309528, 825.1225157914126, 93.85399899080552, 893.3315963859429, 
917.8123516556656, 798.6039771367506, 806.406593402628, 50.2481912851457], 
"eval_len": [1000, 1000, 1000, 1000, 102, 1000, 1000, 1000, 1000, 56]}

  4%|▍         | 39999/1000000 [22:20<6:53:50, 38.66it/s]global step 40000, trans_decision ep_re 894.54413232417

{"global_step": 40000, "eval_re": [1015.4175747184534, 726.0405684992069, 
762.2623844586399, 773.2602771534044, 939.8190227436083, 913.2972020682528, 
579.5633484510038, 1013.4430310990251, 1139.8804096502652, 1082.4575043998407], 
"eval_len": [1000, 624, 714, 1000, 811, 1000, 596, 1000, 1000, 1000]}

  5%|▍         | 49996/1000000 [28:40<6:48:40, 38.74it/s]global step 50000, trans_decision ep_re 837.6893525621975

{"global_step": 50000, "eval_re": [976.5926840932581, 1268.2383775335825, 
139.48207923301115, 490.79204267822007, 777.167911689822, 1198.7622843353172, 
1024.429921309704, 1236.6319160750772, 1058.718896792476, 206.07741188150763], 
"eval_len": [1000, 1000, 121, 441, 1000, 1000, 1000, 1000, 1000, 172]}

  6%|▌         | 59999/1000000 [35:00<6:41:29, 39.02it/s]global step 60000, trans_decision ep_re 565.5630156320867

{"global_step": 60000, "eval_re": [812.086571636142, 317.1040553933599, 
283.45692657319563, 963.2209643070661, 845.4382409910252, 1092.1204648621294, 
98.45074398487604, 394.3312511092277, 181.4501907099581, 667.9707467538865], 
"eval_len": [1000, 258, 205, 806, 1000, 859, 108, 286, 196, 496]}

  7%|▋         | 69996/1000000 [41:10<6:39:13, 38.83it/s]global step 70000, trans_decision ep_re 1203.3343339949904

{"global_step": 70000, "eval_re": [1242.6125505098635, 1336.0028171054269, 
1302.3852374290564, 1405.123525783884, 92.93029775779465, 1340.0977218695862, 
1353.9404764574936, 1307.4663662294258, 1391.1395201604737, 1261.6448266468988],
"eval_len": [1000, 984, 1000, 1000, 82, 1000, 1000, 1000, 1000, 1000]}

  8%|▊         | 79998/1000000 [47:30<6:34:13, 38.89it/s]global step 80000, trans_decision ep_re 1264.4968273090976

{"global_step": 80000, "eval_re": [815.5167890174866, 1360.5073848845714, 
1253.6220545176293, 1400.2213774638865, 1390.0263615784422, 1510.3132398014266, 
1304.3628954024014, 1489.8400510002314, 1435.6116908823815, 684.9464285425198], 
"eval_len": [1000, 1000, 878, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  9%|▉         | 89998/1000000 [53:50<6:28:54, 39.00it/s]global step 90000, trans_decision ep_re 951.3731013225035

{"global_step": 90000, "eval_re": [1252.2504371280393, 963.2881177864211, 
586.9275040395524, 1195.5893279025256, 1018.0201315372899, 1251.696980385758, 
335.4045487913965, 1297.5453330776295, 292.91128011593725, 1320.0973524604847], 
"eval_len": [894, 851, 609, 1000, 884, 1000, 218, 1000, 243, 1000]}

 10%|▉         | 99998/1000000 [1:00:00<6:24:45, 38.99it/s]global step 100000, trans_decision ep_re 989.9881980198273

{"global_step": 100000, "eval_re": [1448.3298642708899, 1208.10244844521, 
119.54281567514418, 1255.7812763447819, 1084.402587125528, 1457.5168748064364, 
1405.6750921374958, 1030.1588457742257, 766.5730663162, 123.79910930236275], 
"eval_len": [983, 1000, 88, 922, 1000, 1000, 1000, 1000, 505, 87]}

 11%|█         | 109999/1000000 [1:06:20<6:20:14, 39.01it/s]global step 110000, trans_decision ep_re 991.4334540108575

{"global_step": 110000, "eval_re": [639.2730541696354, 870.4241237164478, 
972.0298247581484, 40.05712067544404, 1574.7334175192714, 850.8298929992176, 
1380.7843058814904, 1665.2046082708123, 217.85442218176198, 1703.1437699363446],
"eval_len": [433, 537, 840, 43, 1000, 1000, 1000, 1000, 125, 1000]}

 12%|█▏        | 119998/1000000 [1:12:30<6:16:31, 38.95it/s]global step 120000, trans_decision ep_re 439.82273756836094

{"global_step": 120000, "eval_re": [1048.2568101053014, 218.0897434748798, 
283.16312174386184, 208.32197766315414, 322.20803533119835, 326.8568666811761, 
284.11689821138526, 859.6884729765158, 695.3385816869888, 152.18686780914814], 
"eval_len": [653, 126, 272, 134, 252, 236, 260, 1000, 1000, 153]}

 13%|█▎        | 129998/1000000 [1:18:31<6:15:05, 38.66it/s]global step 130000, trans_decision ep_re 1380.3827878253912

{"global_step": 130000, "eval_re": [1715.8172312690529, 1455.029108250035, 
1010.0706689738367, 947.2410871578292, 1293.026806169343, 1183.9358308810224, 
1170.1239353776807, 1650.3485200004493, 1633.992237112099, 1744.242453062564], 
"eval_len": [1000, 1000, 1000, 640, 1000, 1000, 711, 1000, 1000, 1000]}

 14%|█▍        | 139997/1000000 [1:24:51<6:09:25, 38.80it/s]global step 140000, trans_decision ep_re 1135.6195149987907

{"global_step": 140000, "eval_re": [906.5057841553613, 1502.6268378647494, 
462.2697172395547, 1578.0732331141116, 1690.4368943344905, 1146.9185907043047, 
773.9653285776662, 1577.1914305578712, 439.280048737138, 1278.9272847026593], 
"eval_len": [1000, 905, 286, 1000, 1000, 650, 460, 1000, 330, 1000]}

 15%|█▍        | 149997/1000000 [1:31:01<6:02:43, 39.06it/s]global step 150000, trans_decision ep_re 864.9198098720765

{"global_step": 150000, "eval_re": [942.1849994722486, 1672.6963020531553, 
1460.1816282183504, 1619.3106571100948, 466.17698394905835, 426.6960959116834, 
268.1057970463101, 973.9218347778949, 245.68057154332124, 574.2432286386467], 
"eval_len": [1000, 1000, 1000, 967, 221, 237, 161, 1000, 116, 299]}

 16%|█▌        | 159996/1000000 [1:37:11<6:11:10, 37.72it/s]global step 160000, trans_decision ep_re 1736.5297108027266

{"global_step": 160000, "eval_re": [1873.5033552634675, 1810.9642773454625, 
1502.7074007447013, 1802.6393181204553, 2141.0676233495524, 1910.664705680934, 
1889.2991475020185, 2011.0174048979352, 556.7303921595194, 1866.703482963219], 
"eval_len": [1000, 1000, 804, 1000, 1000, 1000, 1000, 1000, 286, 1000]}

 17%|█▋        | 169999/1000000 [1:43:31<5:56:21, 38.82it/s]global step 170000, trans_decision ep_re 1597.5464906367715

{"global_step": 170000, "eval_re": [104.61416949857609, 1999.0533627251486, 
1910.3150159368079, 2124.6890145926545, 2077.622648283694, 2211.181972056702, 
1073.2808895043604, 396.7792146360414, 1977.25887118093, 2100.669747952803], 
"eval_len": [72, 1000, 1000, 985, 1000, 1000, 1000, 238, 1000, 1000]}

 18%|█▊        | 179999/1000000 [1:49:41<5:52:51, 38.73it/s]global step 180000, trans_decision ep_re 1581.8047952762822

{"global_step": 180000, "eval_re": [33.05821462350683, 1950.984759579382, 
1886.6980192844446, 1297.5422834256879, 1822.0567386189375, 1889.1978677934308, 
1102.5387187852514, 2003.6997736478831, 1828.7760044149873, 2003.4955725893099],
"eval_len": [43, 1000, 1000, 1000, 856, 1000, 568, 1000, 1000, 1000]}

 19%|█▉        | 189996/1000000 [1:56:01<5:45:45, 39.05it/s]global step 190000, trans_decision ep_re 1757.543302576079

{"global_step": 190000, "eval_re": [1703.1012713592168, 1697.5273966347363, 
1829.9811162913686, 1706.9571361528735, 1795.0119186693125, 1791.4288563897426, 
1763.0179012254114, 1829.0119461658853, 1748.4569723224301, 1710.9385105498118],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 20%|█▉        | 199997/1000000 [2:02:21<5:40:57, 39.11it/s]global step 200000, trans_decision ep_re 1756.1797276116977

{"global_step": 200000, "eval_re": [2109.130840075002, 1882.2336545705605, 
1964.7689833201168, 1853.5532969397075, 2056.7086124898337, 1010.2642248495797, 
1744.554266555232, 2003.3264729125976, 836.0482136092719, 2101.2087107950747], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 411, 1000]}

 21%|██        | 209998/1000000 [2:08:41<5:39:20, 38.80it/s]global step 210000, trans_decision ep_re 1898.3020495752612

{"global_step": 210000, "eval_re": [1843.0346254994674, 1967.5654662518464, 
1895.2048777649452, 1837.7284521447564, 1921.7602374665255, 1825.606081961773, 
1948.3211359891116, 1927.5356023766888, 1993.627168037496, 1822.6368482599994], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 22%|██▏       | 219998/1000000 [2:15:01<5:33:50, 38.94it/s]global step 220000, trans_decision ep_re 1504.0438977605093

{"global_step": 220000, "eval_re": [52.82342786617289, 2062.0146231580165, 
2097.530312246863, 2004.1246874404915, 2013.341118020286, 1986.6697225340213, 
2141.7036722435955, 805.0767287533399, 40.96087309820067, 1836.1938122441063], 
"eval_len": [47, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 49, 1000]}

 23%|██▎       | 229998/1000000 [2:21:11<5:27:29, 39.19it/s]global step 230000, trans_decision ep_re 1983.1638269685395

{"global_step": 230000, "eval_re": [2212.052232561586, 1727.5588427020014, 
2310.537622988683, 2066.3446846945385, 2415.3105512701845, 1799.4386421306592, 
687.0607339597229, 2154.003030994012, 2146.560990981911, 2312.7709374020965], 
"eval_len": [1000, 693, 1000, 1000, 1000, 844, 316, 1000, 1000, 1000]}

 24%|██▍       | 239999/1000000 [2:27:31<5:25:44, 38.89it/s]global step 240000, trans_decision ep_re 1764.5010739706352

{"global_step": 240000, "eval_re": [2084.747851131037, 183.4620903998087, 
1821.4042068830402, 1741.5138537214248, 1809.1236906332567, 2077.4280165176333, 
1841.4567288502838, 2144.391452578898, 2020.0784482867846, 1921.4044007041837], 
"eval_len": [1000, 99, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 25%|██▍       | 249998/1000000 [2:33:51<5:20:58, 38.94it/s]global step 250000, trans_decision ep_re 1807.728763861808

{"global_step": 250000, "eval_re": [1521.681745028409, 2076.5324380169864, 
1876.7803486814528, 2183.07313481237, 2146.7288796916255, 1951.1106291111014, 
2086.498968731441, 525.4049031862414, 1990.9011035561953, 1718.575487802258], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 339, 1000, 1000]}

 26%|██▌       | 259999/1000000 [2:40:01<5:16:12, 39.00it/s]global step 260000, trans_decision ep_re 1646.182918036499

{"global_step": 260000, "eval_re": [1723.6982173306249, 962.5875914931864, 
1581.812668440694, 1646.2486134063104, 1781.1869761540997, 1661.258732319311, 
1787.2802112427216, 1732.236077675705, 1886.63512737824, 1698.884964924094], 
"eval_len": [1000, 523, 1000, 907, 1000, 1000, 1000, 1000, 1000, 1000]}

 27%|██▋       | 269998/1000000 [2:46:21<5:16:45, 38.41it/s]global step 270000, trans_decision ep_re 2012.6043348907788

{"global_step": 270000, "eval_re": [2022.0202157030026, 2129.8336196612768, 
1660.5455266684498, 2182.6682071003997, 1998.8737596923088, 2012.2343245964787, 
2107.760013310219, 2067.8997224929167, 2008.491623400548, 1935.716336282189], 
"eval_len": [1000, 1000, 797, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 28%|██▊       | 279996/1000000 [2:52:41<5:11:08, 38.57it/s]global step 280000, trans_decision ep_re 1813.7728344462248

{"global_step": 280000, "eval_re": [1704.5262358333264, 2378.815368489035, 
241.98791842463078, 1440.3437751954386, 1314.2886313548297, 2340.5342937387413, 
2212.784709174433, 1980.4791382043086, 2259.0807559653063, 2264.8875180821983], 
"eval_len": [791, 1000, 141, 641, 653, 1000, 1000, 1000, 1000, 1000]}

 29%|██▉       | 289998/1000000 [2:59:01<5:06:38, 38.59it/s]global step 290000, trans_decision ep_re 1575.396958052393

{"global_step": 290000, "eval_re": [2148.7767827627963, 2073.9049210146845, 
2127.668553735203, 1560.2612698518506, 1388.477913445704, 343.4396777247308, 
2224.203712525573, 398.2498540373566, 1309.1823506793828, 2179.8045447466484], 
"eval_len": [1000, 1000, 1000, 716, 687, 178, 1000, 210, 597, 1000]}

 30%|██▉       | 299999/1000000 [3:05:11<4:59:25, 38.96it/s]global step 300000, trans_decision ep_re 1579.5462105573242

{"global_step": 300000, "eval_re": [2133.6329741186905, 2338.7607505203146, 
2166.878024756541, 2225.387885454766, 324.44757265828684, 1866.3596070415902, 
2012.182761437775, 978.7215307869537, 1275.6695446746496, 473.42145412367614], 
"eval_len": [1000, 1000, 1000, 942, 206, 1000, 1000, 457, 575, 258]}

 31%|███       | 309997/1000000 [3:11:21<4:58:54, 38.47it/s]global step 310000, trans_decision ep_re 1228.18011209838

{"global_step": 310000, "eval_re": [268.8286902015663, 1499.8874280899918, 
1782.347660146937, 1211.1494593572581, 1140.2888805624077, 896.421925518508, 
1385.4761561441464, 2160.5492088941487, 1315.1613353175949, 621.6903767512424], 
"eval_len": [157, 660, 862, 579, 569, 1000, 721, 1000, 673, 300]}

 32%|███▏      | 319997/1000000 [3:17:31<4:54:34, 38.47it/s]global step 320000, trans_decision ep_re 1568.1486668936925

{"global_step": 320000, "eval_re": [1033.4859689274851, 2072.110431624596, 
2214.159717464541, 2156.3642851759173, 1934.42918392465, 1987.5583928813535, 
1412.737707651466, 1700.0261444413723, 605.5737960877243, 565.041040757819], 
"eval_len": [508, 1000, 1000, 1000, 1000, 1000, 645, 1000, 243, 232]}

 33%|███▎      | 329996/1000000 [3:23:51<4:46:46, 38.94it/s]global step 330000, trans_decision ep_re 1125.0706104108776

{"global_step": 330000, "eval_re": [1443.4404772838298, 268.173981556837, 
1620.8254670778192, 2138.660775160751, 53.21785474453176, 860.6764144030755, 
1447.1992293395097, 1902.1844054403991, 384.45735574125337, 1131.8701433607687],
"eval_len": [747, 126, 872, 1000, 43, 381, 1000, 1000, 211, 1000]}

 34%|███▍      | 339999/1000000 [3:30:01<4:43:17, 38.83it/s]global step 340000, trans_decision ep_re 1097.3905466011086

{"global_step": 340000, "eval_re": [280.4211458577913, 2299.154873842155, 
1375.1940772907171, 826.9458551052434, 878.5483505960703, 699.240519740701, 
2116.980814370829, 1405.2434926943097, 280.3508182897269, 811.8255182235414], 
"eval_len": [154, 1000, 679, 420, 365, 339, 1000, 695, 160, 410]}

 35%|███▍      | 349999/1000000 [3:36:01<4:36:22, 39.20it/s]global step 350000, trans_decision ep_re 1237.911812416447

{"global_step": 350000, "eval_re": [522.1807457102641, 130.19862863974325, 
875.0250399659967, 1195.7848078734432, 792.9552262859921, 2276.164642291955, 
2426.439235953559, 1930.491829686452, 1129.912556497754, 1099.9654112593132], 
"eval_len": [222, 74, 354, 552, 327, 1000, 1000, 1000, 550, 459]}

 36%|███▌      | 359998/1000000 [3:42:01<4:35:25, 38.73it/s]global step 360000, trans_decision ep_re 1624.2667226379983

{"global_step": 360000, "eval_re": [1964.7550175399517, 1867.9517263137077, 
1921.3914796039005, 1951.223187969636, 1424.099506544029, 71.78288749542422, 
1990.0471748368197, 2102.699739964135, 1107.3275611449867, 1841.3889449673932], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 60, 1000, 1000, 1000, 1000]}

 37%|███▋      | 369998/1000000 [3:48:21<4:28:36, 39.09it/s]global step 370000, trans_decision ep_re 1978.0388332401567

{"global_step": 370000, "eval_re": [1694.3689757701652, 2074.5115022875343, 
2195.3578317719166, 2176.1391704685793, 2115.2913185898133, 2238.3922720182845, 
2007.6540449787017, 1365.825950725274, 1953.0009791146824, 1959.8462866766126], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 646, 1000, 1000]}

 38%|███▊      | 379997/1000000 [3:54:41<4:25:32, 38.91it/s]global step 380000, trans_decision ep_re 1750.3341500505026

{"global_step": 380000, "eval_re": [1899.4635205876898, 2104.87386446394, 
1526.4289863189244, 1988.6512405428898, 2083.3938556919447, 1987.0408574187086, 
1812.5614006567596, 1833.6061119369972, 287.08632849949777, 1980.235334387674], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 148, 1000]}

 39%|███▉      | 389998/1000000 [4:01:01<4:19:09, 39.23it/s]global step 390000, trans_decision ep_re 1329.6164230404956

{"global_step": 390000, "eval_re": [281.0066333067529, 2077.2770854037535, 
1122.1572155427277, 101.13515826580127, 2047.9323479574373, 1885.5272692894487, 
2095.968528227491, 2202.172155061451, 1099.6942281516665, 383.293609198427], 
"eval_len": [130, 1000, 1000, 60, 1000, 1000, 1000, 1000, 1000, 193]}

 40%|███▉      | 399998/1000000 [4:07:11<4:16:32, 38.98it/s]global step 400000, trans_decision ep_re 1710.8091549783803

{"global_step": 400000, "eval_re": [1057.2466571919317, 1284.9103456751013, 
1682.1660543124156, 2245.109354902595, 2021.4128785386229, 1900.932222346555, 
2055.04966720309, 409.3805220487807, 2129.054955370209, 2322.8288921944995], 
"eval_len": [458, 1000, 710, 1000, 1000, 1000, 1000, 263, 966, 1000]}

 41%|████      | 409998/1000000 [4:13:21<4:11:53, 39.04it/s]global step 410000, trans_decision ep_re 1508.6118292788456

{"global_step": 410000, "eval_re": [2261.8664787008684, 2256.547502758572, 
2120.0340858276563, 2238.5562453673024, 2035.2736064951457, 887.598108820636, 
664.7397229481829, 67.29337009940805, 418.54996749557955, 2135.6592042751045], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 396, 1000, 52, 204, 1000]}

 42%|████▏     | 419996/1000000 [4:19:31<4:06:04, 39.28it/s]global step 420000, trans_decision ep_re 1472.5165149059771

{"global_step": 420000, "eval_re": [1875.305353083547, 2243.878301556589, 
2112.9379266587903, 1865.3072538696515, 209.8394585000446, 743.0964163914249, 
776.8652620516038, 544.5769710285128, 2183.423389520232, 2169.9348163993745], 
"eval_len": [1000, 1000, 1000, 901, 102, 379, 398, 228, 1000, 989]}

 43%|████▎     | 429997/1000000 [4:25:41<4:03:02, 39.09it/s]global step 430000, trans_decision ep_re 1793.3769753625206

{"global_step": 430000, "eval_re": [1268.9887119893833, 2292.723580168829, 
2059.5619985207823, 2075.677784571153, 2283.4727029868072, 2053.623861292967, 
1264.9624991507997, 2102.345718517651, 369.7646751082024, 2162.6482213186277], 
"eval_len": [621, 1000, 1000, 898, 1000, 1000, 600, 1000, 177, 1000]}

 44%|████▍     | 439999/1000000 [4:31:51<3:59:06, 39.03it/s]global step 440000, trans_decision ep_re 1553.723552013922

{"global_step": 440000, "eval_re": [950.7076248666646, 2129.765583546377, 
1090.5025009677722, 367.6359411592126, 2204.9291377953273, 2249.4501285856, 
2129.562304077321, 1060.7318476587716, 2297.517930404793, 1056.4325210773823], 
"eval_len": [436, 1000, 584, 198, 1000, 1000, 1000, 519, 995, 502]}

 45%|████▍     | 449997/1000000 [4:38:01<3:55:01, 39.00it/s]global step 450000, trans_decision ep_re 1532.864895162764

{"global_step": 450000, "eval_re": [901.4999341393285, 2056.6166070844756, 
1657.619540216961, 353.34134660963434, 1538.9522282126536, 2060.9384482263235, 
2117.2218041897236, 1680.8822519527534, 2192.0969717557637, 769.4798192400226], 
"eval_len": [398, 1000, 1000, 178, 1000, 1000, 1000, 890, 1000, 1000]}

 46%|████▌     | 459998/1000000 [4:44:21<3:49:45, 39.17it/s]global step 460000, trans_decision ep_re 1944.503384119509

{"global_step": 460000, "eval_re": [1998.6192859442183, 2343.2502483787, 
2169.5954647294034, 2122.927373055383, 2195.6247478086034, 510.0073574701491, 
1936.6144552296764, 2122.971317062944, 2113.370066828761, 1932.0535246872516], 
"eval_len": [1000, 1000, 1000, 962, 1000, 240, 1000, 1000, 1000, 1000]}

 47%|████▋     | 469996/1000000 [4:50:31<3:46:00, 39.08it/s]global step 470000, trans_decision ep_re 1064.7205874403094

{"global_step": 470000, "eval_re": [353.22478953231774, 471.7164396316639, 
932.3098815920782, 340.37827592804064, 1926.078023387743, 1284.4844600419358, 
1725.9545082319084, 850.1432482101345, 2004.5918795259117, 758.3243683213611], 
"eval_len": [173, 236, 429, 179, 824, 1000, 805, 391, 1000, 393]}

 48%|████▊     | 479997/1000000 [4:56:42<3:41:57, 39.05it/s]global step 480000, trans_decision ep_re 1851.7672101967066

{"global_step": 480000, "eval_re": [799.8739242418704, 2034.586939020819, 
2091.3709799930107, 2184.848491110315, 2162.8148289851592, 2134.080886567934, 
2255.677695621903, 2320.5552474539045, 2124.3283024667453, 409.534806505405], 
"eval_len": [461, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 195]}

 49%|████▉     | 489998/1000000 [5:02:52<3:36:56, 39.18it/s]global step 490000, trans_decision ep_re 1022.4352193944351

{"global_step": 490000, "eval_re": [1555.296809228162, 983.6612846920704, 
351.30345799237875, 2108.22802295838, 1472.861437685489, 459.560013222387, 
918.1001996753516, 359.81718280677575, 477.5530439221578, 1537.9707417611996], 
"eval_len": [699, 1000, 206, 1000, 1000, 246, 399, 1000, 262, 748]}

 50%|████▉     | 499998/1000000 [5:09:02<3:38:34, 38.13it/s]global step 500000, trans_decision ep_re 1527.9020333805156

{"global_step": 500000, "eval_re": [1588.3909268653356, 895.6502010908724, 
2239.1632274026897, 1803.4482332451116, 1135.5540113793043, 1295.955975069576, 
2169.834589368899, 1692.7407817209241, 2183.9342862716594, 274.3481013907855], 
"eval_len": [1000, 424, 1000, 1000, 481, 1000, 1000, 783, 1000, 137]}

 51%|█████     | 509998/1000000 [5:15:12<3:34:01, 38.16it/s]global step 510000, trans_decision ep_re 1565.240530732745

{"global_step": 510000, "eval_re": [2034.1541026441216, 2182.3769753732945, 
812.6251392287312, 386.64366297732477, 841.3941599286655, 1964.0181544068862, 
1533.3472767769085, 1717.7615552236637, 2032.2909251784513, 2147.7933555894033],
"eval_len": [1000, 1000, 1000, 190, 1000, 1000, 760, 873, 1000, 1000]}

 52%|█████▏    | 519997/1000000 [5:21:32<3:28:51, 38.30it/s]global step 520000, trans_decision ep_re 1612.3181243356369

{"global_step": 520000, "eval_re": [1430.609501889395, 2296.8559925563986, 
1643.5030109395727, 1177.340061045756, 2334.457716188541, 2050.1917299274637, 
740.4662002321675, 204.23009379438608, 1973.9351043048116, 2271.5918324778763], 
"eval_len": [667, 1000, 706, 539, 1000, 874, 1000, 115, 897, 1000]}

 53%|█████▎    | 529997/1000000 [5:27:42<3:27:05, 37.83it/s]global step 530000, trans_decision ep_re 1624.4464851587786

{"global_step": 530000, "eval_re": [457.17232110578266, 2108.5202926854004, 
219.88120155990188, 2399.9690021258034, 2207.5674193486248, 2338.345505089453, 
2186.636286772583, 2239.6812411592273, 1784.4739738234787, 302.21760791753127], 
"eval_len": [198, 1000, 107, 1000, 1000, 1000, 1000, 1000, 832, 148]}

 54%|█████▍    | 539996/1000000 [5:33:52<3:21:17, 38.09it/s]global step 540000, trans_decision ep_re 2163.000358578195

{"global_step": 540000, "eval_re": [2153.5797263060767, 2123.644569905103, 
2189.2168111207143, 2080.3816435018975, 2218.5122222179634, 2274.968949370816, 
2144.4309216727524, 2141.0387504409864, 2185.9951806889717, 2118.2348105566666],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 55%|█████▍    | 549998/1000000 [5:40:12<3:11:00, 39.26it/s]global step 550000, trans_decision ep_re 1503.13900969023

{"global_step": 550000, "eval_re": [954.3619385225238, 2066.7569511443676, 
2428.2313737031577, 2201.8013884053307, 1761.5395992148026, 2148.419939494498, 
946.1811686688919, 1804.8988210125735, 339.809080482257, 379.38983625389903], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 559, 848, 155, 169]}

 56%|█████▌    | 559999/1000000 [5:46:22<3:07:17, 39.15it/s]global step 560000, trans_decision ep_re 1656.2032192782058

{"global_step": 560000, "eval_re": [584.9479925102976, 1290.5211706202372, 
2199.071753570413, 2223.3685136260383, 2102.906022508852, 2154.094995281184, 
1332.8423203450818, 2009.5852428396568, 449.50819496950345, 2215.185986510795], 
"eval_len": [244, 589, 1000, 1000, 1000, 1000, 621, 877, 241, 1000]}

 57%|█████▋    | 569998/1000000 [5:52:32<3:02:59, 39.16it/s]global step 570000, trans_decision ep_re 1452.6397363864858

{"global_step": 570000, "eval_re": [1043.2345295671391, 1946.0214333902275, 
2109.8527175352556, 1267.98895841886, 1309.9489953441055, 545.1126474644077, 
1300.0088695581487, 1812.7437967785152, 1077.9475680552052, 2113.537847752992], 
"eval_len": [1000, 1000, 1000, 1000, 626, 249, 613, 1000, 1000, 1000]}

 58%|█████▊    | 579996/1000000 [5:58:52<2:58:59, 39.11it/s]global step 580000, trans_decision ep_re 1046.0306618426735

{"global_step": 580000, "eval_re": [2192.7626962633653, 51.49484211225517, 
587.2939083602892, 2124.669792477, 570.2705955287074, 1276.8827420357427, 
2148.250580270661, 439.89084640821636, 284.2597539167119, 784.5308610537866], 
"eval_len": [1000, 44, 274, 1000, 265, 644, 1000, 197, 193, 346]}

 59%|█████▉    | 589996/1000000 [6:04:52<2:54:56, 39.06it/s]global step 590000, trans_decision ep_re 1478.5220685927652

{"global_step": 590000, "eval_re": [912.7911307743182, 743.7134348725541, 
2194.237653722429, 2066.6665566332554, 1044.1753469655612, 2156.459696835242, 
1612.5288767159554, 1027.8750976331326, 947.9775140799919, 2078.7953776952127], 
"eval_len": [410, 347, 1000, 1000, 456, 1000, 1000, 467, 368, 1000]}

 60%|█████▉    | 599996/1000000 [6:11:02<2:52:10, 38.72it/s]global step 600000, trans_decision ep_re 1227.8713123239897

{"global_step": 600000, "eval_re": [198.81858461341744, 1242.676633321686, 
1340.507481376899, 1132.4489636315857, 1263.413331841831, 1184.2812356826237, 
1161.678663124188, 1513.7411795258404, 2220.3470855438527, 1020.7999645779747], 
"eval_len": [121, 1000, 653, 1000, 1000, 571, 611, 666, 1000, 1000]}

 61%|██████    | 609997/1000000 [6:17:22<2:51:54, 37.81it/s]global step 610000, trans_decision ep_re 1038.97486487589

{"global_step": 610000, "eval_re": [796.8245678217182, 2304.212937214254, 
2207.400420955907, 2205.9504039514104, -545.6109706460363, 53.916409176413076, 
143.4855042320446, 1999.69549557582, 271.02533416227345, 952.8485463150975], 
"eval_len": [1000, 1000, 1000, 890, 1000, 44, 85, 1000, 167, 430]}

 62%|██████▏   | 619999/1000000 [6:23:22<2:47:07, 37.90it/s]global step 620000, trans_decision ep_re 1446.4992878071332

{"global_step": 620000, "eval_re": [999.0981820966805, 2013.4160171004066, 
1829.0882283261956, 1850.7600374172714, 1800.8957519648802, 1295.7997106191258, 
1890.854108058272, 1260.5263997077232, 1363.3531828339528, 161.20125994682502], 
"eval_len": [1000, 1000, 872, 1000, 1000, 1000, 1000, 741, 614, 103]}

 63%|██████▎   | 629998/1000000 [6:29:42<2:42:29, 37.95it/s]global step 630000, trans_decision ep_re 1006.528917770696

{"global_step": 630000, "eval_re": [658.4328319796164, 508.8903508021288, 
2116.956096902784, 604.5058736191586, 1021.3607160354239, 59.93251773090756, 
146.01169126122392, 1371.7135337224213, 1288.0020608404611, 2289.4835048128357],
"eval_len": [302, 237, 1000, 291, 406, 1000, 72, 741, 1000, 1000]}

 64%|██████▍   | 639996/1000000 [6:35:42<2:36:08, 38.43it/s]global step 640000, trans_decision ep_re 1866.5664189650329

{"global_step": 640000, "eval_re": [1244.603496950859, 1835.8703542854996, 
2099.888834579441, 2220.826531171189, 1941.8101257755743, 2027.0951631230919, 
2187.3668024271647, 2043.4746377485264, 1883.5515547743898, 1181.1766888145937],
"eval_len": [646, 1000, 1000, 1000, 1000, 1000, 1000, 973, 1000, 1000]}

 65%|██████▍   | 649998/1000000 [6:42:02<2:33:32, 37.99it/s]global step 650000, trans_decision ep_re 1493.6735975281185

{"global_step": 650000, "eval_re": [675.6028887462204, 1350.740103852652, 
293.9378481919786, 1110.7288283274138, 2072.1866670426366, 1843.0035241897722, 
2060.595103064908, 1886.9953202298436, 1647.640983424129, 1995.3047082116282], 
"eval_len": [400, 665, 141, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 66%|██████▌   | 659999/1000000 [6:48:12<2:30:13, 37.72it/s]global step 660000, trans_decision ep_re 1510.8446525492045

{"global_step": 660000, "eval_re": [2039.6562077959363, 1546.1004144552164, 
-128.89674508526392, 1352.9520167081391, 2151.9863122177812, 43.95950147630537, 
2133.7133621609, 1574.8568756267946, 2068.794387737305, 2325.3241923989303], 
"eval_len": [1000, 1000, 1000, 625, 1000, 46, 1000, 652, 1000, 1000]}

 67%|██████▋   | 669998/1000000 [6:54:32<2:25:32, 37.79it/s]global step 670000, trans_decision ep_re 1591.263461653767

{"global_step": 670000, "eval_re": [2156.1838804053073, 867.7258879237319, 
1999.8822576506152, 1624.9912472659562, 1088.644209978583, 1987.2573111874824, 
1984.533384600327, 2064.83908585638, 2052.085933318269, 86.49141835101906], 
"eval_len": [1000, 1000, 1000, 820, 1000, 1000, 1000, 1000, 1000, 75]}

 68%|██████▊   | 679996/1000000 [7:00:42<2:16:18, 39.13it/s]global step 680000, trans_decision ep_re 1921.093442339135

{"global_step": 680000, "eval_re": [2135.270286677061, 1766.772123557124, 
2059.18294070585, 1781.2456505876949, 1932.2851405256126, 1853.8587245553038, 
1978.1595486311544, 1830.0428189529828, 1932.1616744956418, 1941.9555147029268],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 69%|██████▉   | 689996/1000000 [7:07:02<2:12:06, 39.11it/s]global step 690000, trans_decision ep_re 1908.7165332650889

{"global_step": 690000, "eval_re": [1200.466552011758, 2319.8192439198297, 
428.3873425754862, 2189.334728219138, 2261.9127103600254, 2128.8421107337517, 
2297.7114027957077, 2153.4405839123283, 2015.5826417020285, 2091.6680164208333],
"eval_len": [604, 1000, 218, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 70%|██████▉   | 699996/1000000 [7:13:22<2:07:33, 39.20it/s]global step 700000, trans_decision ep_re 1931.3234951982172

{"global_step": 700000, "eval_re": [1765.2653476443588, 2176.0181224509192, 
2122.3401333782194, 2244.3891813611085, 2359.256054681018, 1285.470107840409, 
1153.5696168645172, 1954.8530271543827, 2087.3295746334607, 2164.743785973779], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 699, 1000, 1000, 1000, 1000]}

 71%|███████   | 709999/1000000 [7:19:32<2:02:58, 39.31it/s]global step 710000, trans_decision ep_re 1571.6443043043835

{"global_step": 710000, "eval_re": [1839.0339998404684, 786.6790032210517, 
2203.9765931387824, 558.971636663533, 1931.4740946311463, 1984.2628202726858, 
1108.8459227684486, 1977.6940454557116, 2160.8861009035704, 1164.6188261484385],
"eval_len": [876, 388, 1000, 310, 1000, 1000, 1000, 897, 1000, 1000]}

 72%|███████▏  | 719999/1000000 [7:25:52<1:59:21, 39.10it/s]global step 720000, trans_decision ep_re 1299.824568637609

{"global_step": 720000, "eval_re": [1360.1812724560223, 2187.3856000401734, 
1115.5315861703095, 1228.1759428022397, 2173.8555780731162, 143.09580595836383, 
401.30371172690303, 1633.7357920746306, 1902.047988499877, 852.9324085744562], 
"eval_len": [1000, 1000, 594, 1000, 1000, 77, 173, 1000, 1000, 477]}

 73%|███████▎  | 729997/1000000 [7:32:02<1:54:45, 39.21it/s]global step 730000, trans_decision ep_re 1561.7795217066841

{"global_step": 730000, "eval_re": [553.4175052888553, 865.7086821143756, 
2103.9705355121782, 2298.0917395904553, 2146.777954807316, 2145.3577365148517, 
2220.5477515394896, 1194.2643356160413, 709.864557024455, 1379.7944190588216], 
"eval_len": [274, 1000, 1000, 1000, 1000, 1000, 1000, 594, 1000, 1000]}

 74%|███████▍  | 739998/1000000 [7:38:12<1:51:38, 38.81it/s]global step 740000, trans_decision ep_re 1567.406442963393

{"global_step": 740000, "eval_re": [1561.5173073963335, 1092.793929138871, 
2145.2952535489753, 2146.5994637896674, 1265.5833890530382, 608.590539133548, 
2332.666857647213, 2151.86614001732, 1657.6786666068353, 711.4728833021276], 
"eval_len": [711, 502, 1000, 1000, 584, 279, 1000, 1000, 1000, 323]}

 75%|███████▍  | 749997/1000000 [7:44:22<1:46:24, 39.16it/s]global step 750000, trans_decision ep_re 2090.6145907242058

{"global_step": 750000, "eval_re": [1964.0816919502172, 2247.5457574306533, 
2135.040489606748, 2167.9277866488205, 1941.0253206582863, 2103.20865761254, 
1805.3363663925431, 2040.9097731553047, 2276.2602465550312, 2224.8098172319187],
"eval_len": [947, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 76%|███████▌  | 759996/1000000 [7:50:42<1:42:20, 39.09it/s]global step 760000, trans_decision ep_re 621.9747161409416

{"global_step": 760000, "eval_re": [183.5953394907531, 225.30785983234648, 
224.16083854669714, 579.7482763857024, 610.6908340864155, 1099.3123991233992, 
125.15864392283079, 1620.6395656671527, 670.5709600369114, 880.5624443172062], 
"eval_len": [104, 108, 119, 378, 1000, 569, 95, 1000, 353, 514]}

 77%|███████▋  | 769998/1000000 [7:56:42<1:37:56, 39.14it/s]global step 770000, trans_decision ep_re 1552.2275163484073

{"global_step": 770000, "eval_re": [1924.375671469302, 2074.8461587694756, 
423.6967940879347, 547.2223720235631, 1755.8091598338967, 1934.1355778195948, 
1853.2194784271069, 1187.519987324788, 1856.0394889469658, 1965.4104747814458], 
"eval_len": [1000, 1000, 253, 306, 1000, 1000, 1000, 716, 1000, 1000]}

 78%|███████▊  | 779997/1000000 [8:02:52<1:33:33, 39.19it/s]global step 780000, trans_decision ep_re 1681.1689399896277

{"global_step": 780000, "eval_re": [2337.953613225234, 2188.777970325015, 
2289.4400890497914, 1625.7272621747663, 776.91922900462, 1143.46016494001, 
1248.551489127161, 2013.8166731869642, 2089.9595354068088, 1097.0833734559053], 
"eval_len": [1000, 1000, 1000, 1000, 382, 490, 585, 1000, 1000, 503]}

 79%|███████▉  | 789999/1000000 [8:09:02<1:29:22, 39.16it/s]global step 790000, trans_decision ep_re 1315.1245881210123

{"global_step": 790000, "eval_re": [330.684577880838, 344.9884828878708, 
283.87174065233853, 2151.551997556405, 2156.9015610412544, 1781.19804635868, 
2370.772388496951, 1303.6947407518435, 392.7208693744525, 2034.8614762094915], 
"eval_len": [1000, 1000, 127, 1000, 1000, 853, 1000, 601, 171, 1000]}

 80%|███████▉  | 799999/1000000 [8:15:12<1:25:34, 38.95it/s]global step 800000, trans_decision ep_re 1547.4335915389242

{"global_step": 800000, "eval_re": [1618.900181848338, 1951.2009396719027, 
1075.6897832572206, 2275.5887956460147, 2022.973737546601, 537.7311275395303, 
1391.4258519511002, 1549.347834910055, 2384.618953222707, 666.8587097957721], 
"eval_len": [696, 852, 513, 1000, 1000, 271, 1000, 683, 1000, 312]}

 81%|████████  | 809996/1000000 [8:21:22<1:20:48, 39.19it/s]global step 810000, trans_decision ep_re 1348.2558188050166

{"global_step": 810000, "eval_re": [55.36371222519241, 1055.8538510052538, 
2120.500526547401, 1334.514664487598, 2124.1456015588083, 1069.1528621984176, 
2044.5943569248068, 1162.5329622135396, 1828.40448006327, 687.4951708258772], 
"eval_len": [45, 512, 1000, 677, 1000, 530, 1000, 547, 790, 335]}

 82%|████████▏ | 819996/1000000 [8:27:32<1:17:06, 38.91it/s]global step 820000, trans_decision ep_re 1143.6503731336513

{"global_step": 820000, "eval_re": [922.7037226421869, 743.6392689787898, 
1274.4971046593046, 2121.0648172397705, 267.01532166077556, 2277.333729365413, 
2302.3370329648756, 543.414208004969, 366.8458927722443, 617.652633048184], 
"eval_len": [1000, 375, 626, 1000, 144, 1000, 1000, 267, 211, 282]}

 83%|████████▎ | 829996/1000000 [8:33:32<1:13:36, 38.49it/s]global step 830000, trans_decision ep_re 1920.4334413761571

{"global_step": 830000, "eval_re": [1970.5590608937612, 2245.705026019926, 
2204.0433930220547, 1885.2866703083516, 2070.4015457998535, 357.53904484549423, 
1989.1808367755068, 2083.652417434315, 2169.2583501614376, 2228.708068500869], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 180, 1000, 1000, 1000, 1000]}

 84%|████████▍ | 839999/1000000 [8:39:52<1:10:20, 37.91it/s]global step 840000, trans_decision ep_re 1392.355448677006

{"global_step": 840000, "eval_re": [2070.718430180927, 2061.4764437053896, 
2182.123248208579, 219.22837208426822, 914.0309723777813, 1890.2997439618357, 
1378.8551652745814, 414.86994336399124, 2139.714764645687, 652.2374029670215], 
"eval_len": [1000, 1000, 1000, 133, 354, 1000, 1000, 245, 1000, 335]}

 85%|████████▍ | 849996/1000000 [8:46:02<1:06:16, 37.72it/s]global step 850000, trans_decision ep_re 1618.3819377102839

{"global_step": 850000, "eval_re": [2148.381547506514, 1429.81228572915, 
2164.219623219216, 2153.4648614720536, 1381.114993195583, 1782.8826195494655, 
61.675797993481744, 2057.4306818013515, 819.0198190217528, 2185.81714761427], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 818, 45, 1000, 1000, 1000]}

 86%|████████▌ | 859997/1000000 [8:52:12<1:02:12, 37.51it/s]global step 860000, trans_decision ep_re 1482.407033191717

{"global_step": 860000, "eval_re": [1743.530034577495, 1629.2837984461464, 
114.39338851962117, 1713.9062199239315, 528.7712189389271, 1877.7036236119798, 
1566.3197949761582, 1910.7545368429583, 1853.1952711933416, 1886.2124448866127],
"eval_len": [1000, 984, 87, 1000, 1000, 1000, 874, 1000, 1000, 1000]}

 87%|████████▋ | 869998/1000000 [8:58:32<57:34, 37.63it/s]global step 870000, trans_decision ep_re 1484.6760480271214

{"global_step": 870000, "eval_re": [2159.379500864678, 585.2582718914666, 
184.858098867894, 2237.7870891813477, 1741.1336750175315, 1117.6448928466143, 
2294.5160260992484, 163.4902259399978, 2106.9347853743307, 2255.7579141881033], 
"eval_len": [1000, 286, 127, 1000, 862, 511, 1000, 145, 1000, 1000]}

 88%|████████▊ | 879997/1000000 [9:04:42<53:08, 37.64it/s]global step 880000, trans_decision ep_re 1792.8652911938618

{"global_step": 880000, "eval_re": [2153.9277094891722, 1999.7021719019403, 
674.3651174820639, 2063.4470138585207, 2139.1865948775176, 486.7676323220557, 
2081.8775951924927, 2185.310588506199, 2158.222072066925, 1985.846416241728], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 240, 1000, 1000, 1000, 1000]}

 89%|████████▉ | 889996/1000000 [9:11:02<48:27, 37.84it/s]global step 890000, trans_decision ep_re 1433.333959261597

{"global_step": 890000, "eval_re": [54.49551847867391, 2332.066562913418, 
2269.9213474214594, 1429.008725918922, 1243.4157701657293, 62.495547452025825, 
2229.9990569412303, 2266.091935722242, 2280.5474076618734, 165.29771994039643], 
"eval_len": [44, 1000, 1000, 1000, 588, 45, 1000, 1000, 1000, 94]}

 90%|████████▉ | 899998/1000000 [9:17:12<43:13, 38.56it/s]global step 900000, trans_decision ep_re 1740.2524990498623

{"global_step": 900000, "eval_re": [2217.8256180669932, 1321.1387817446744, 
2180.0840363597877, 2412.659980380895, 2346.518559396723, 2014.7325730022376, 
2391.6167696414936, 1155.2783811083866, 285.2154697995278, 1077.454820997903], 
"eval_len": [1000, 571, 1000, 1000, 1000, 1000, 1000, 490, 127, 558]}

 91%|█████████ | 909999/1000000 [9:23:22<38:55, 38.54it/s]global step 910000, trans_decision ep_re 1529.4408695352017

{"global_step": 910000, "eval_re": [1256.6134072616433, 1991.8002106814743, 
971.3553949230079, 2214.084455427414, 2155.2815260651996, 2274.75048766805, 
2237.4983335748407, 992.7177746853682, 960.2680878711047, 240.03901719391703], 
"eval_len": [539, 1000, 403, 1000, 1000, 1000, 979, 473, 381, 113]}

 92%|█████████▏| 919996/1000000 [9:29:32<34:17, 38.88it/s]global step 920000, trans_decision ep_re 1924.0439219654468

{"global_step": 920000, "eval_re": [2109.7992853013216, 2194.8450189917908, 
2095.6200493602773, 2066.411835877865, 2159.5359181198965, 2011.2290548823537, 
2150.8139304395777, 161.31016664593616, 2118.3735136616983, 2172.50044637375], 
"eval_len": [1000, 1000, 898, 1000, 1000, 1000, 1000, 94, 1000, 1000]}

 93%|█████████▎| 929996/1000000 [9:35:52<30:26, 38.33it/s]global step 930000, trans_decision ep_re 1453.1551289111262

{"global_step": 930000, "eval_re": [1967.7721900534962, 678.6628715516631, 
285.4343299263422, 1299.1065106531744, 2319.6397852854757, 2143.505034923353, 
1820.517764508068, 238.59078866174627, 1719.4608610529403, 2058.8611524950034], 
"eval_len": [1000, 347, 189, 717, 1000, 1000, 1000, 139, 836, 1000]}

 94%|█████████▍| 939998/1000000 [9:42:02<26:08, 38.25it/s]global step 940000, trans_decision ep_re 1933.4699057606344

{"global_step": 940000, "eval_re": [2153.85690975574, 2088.653210679284, 
2115.7908378428056, 2153.0048473993875, 2279.799790055681, 1964.3058654565482, 
245.42158896103723, 2025.7821950684117, 2186.450072459407, 2121.6337399280415], 
"eval_len": [1000, 965, 1000, 1000, 1000, 1000, 144, 1000, 1000, 1000]}

 95%|█████████▍| 949999/1000000 [9:48:22<21:50, 38.16it/s]global step 950000, trans_decision ep_re 1754.0484718574603

{"global_step": 950000, "eval_re": [2277.2408480234194, 1971.6145920811903, 
2259.8247728154925, 1965.9793516720285, 130.71733681374582, 1706.913384968364, 
2038.4128729699244, 2088.679453013235, 878.5184127847623, 2222.5836934324398], 
"eval_len": [1000, 1000, 1000, 1000, 85, 1000, 1000, 1000, 408, 1000]}

 96%|█████████▌| 959997/1000000 [9:54:42<17:26, 38.24it/s]global step 960000, trans_decision ep_re 1110.5420901365408

{"global_step": 960000, "eval_re": [949.6198899370463, 1739.5339001212478, 
1752.6592669471306, 592.2101778030375, 1637.498804833652, 453.60083291104644, 
2196.0029549079572, 567.7267028645658, 773.3706219888542, 443.19774905086797], 
"eval_len": [443, 808, 842, 239, 746, 220, 1000, 267, 337, 214]}

 97%|█████████▋| 969999/1000000 [10:00:42<13:00, 38.44it/s]global step 970000, trans_decision ep_re 1780.0035908361879

{"global_step": 970000, "eval_re": [665.6270102584022, 2382.0647485912004, 
1254.237427507637, 2333.1043997228808, 1247.782696555531, 1416.0832747473942, 
2237.869001599849, 1685.9285504695465, 2236.5106058065703, 2340.828193102868], 
"eval_len": [306, 1000, 558, 1000, 1000, 641, 1000, 785, 1000, 1000]}

 98%|█████████▊| 979998/1000000 [10:07:02<08:35, 38.82it/s]global step 980000, trans_decision ep_re 1611.7882377410465

{"global_step": 980000, "eval_re": [949.7281313692056, 2107.2363522135997, 
424.84952066628733, 2138.450851153567, 1450.7158157699057, 1765.4251287360667, 
2012.436088908954, 1015.2392729456376, 2150.188912741016, 2103.612302906226], 
"eval_len": [527, 1000, 270, 1000, 757, 854, 1000, 479, 1000, 1000]}

 99%|█████████▉| 989997/1000000 [10:13:12<04:18, 38.63it/s]global step 990000, trans_decision ep_re 1733.6503218182497

{"global_step": 990000, "eval_re": [1740.3251782734026, 2179.3949229423556, 
1766.8305136997221, 2040.2711375722015, 1477.2759042249254, 2291.898237638301, 
843.5114258126739, 828.8578442857543, 2040.2958913995267, 2127.842162333633], 
"eval_len": [822, 1000, 779, 1000, 711, 1000, 1000, 400, 1000, 1000]}

100%|█████████▉| 999999/1000000 [10:19:32<00:00, 38.70it/s]global step 1000000, trans_decision ep_re 1417.017760175483

{"global_step": 1000000, "eval_re": [879.1048229967155, 1546.2131395724687, 
251.83051452694664, 382.4459513989882, 651.9230386842289, 2326.4563795657655, 
2092.7931717605325, 2389.174460802566, 1504.4557417786204, 2145.7803806679985], 
"eval_len": [430, 696, 141, 178, 310, 1000, 1000, 1000, 625, 1000]}

100%|██████████| 1000000/1000000 [10:19:42<00:00, 26.89it/s]
