
{
    'exp_name': 'VDPO',
    'env': 'Ant-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'markov(4, 32, [[249, 1], [1, 31]])',
    'noise': 0.05
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9999/1000000 [05:30<13:20:53, 20.60it/s]global step 10000, trans_decision ep_re 569.2685374073862

{"global_step": 10000, "eval_re": [626.9488135204609, 702.5664059848066, 
681.6989254482622, 546.3616965075889, 634.9888860932375, 438.4924239510616, 
676.390699650747, 629.3984196942615, 720.3738268371264, 35.465276386309355], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 40]}

  2%|▏         | 19998/1000000 [16:50<12:55:33, 21.06it/s]global step 20000, trans_decision ep_re 373.69256496338926

{"global_step": 20000, "eval_re": [60.68389112563006, 648.8095878105572, 
765.0461700855989, 515.5467037554666, 585.8754142150932, 181.2116340461358, 
115.45555250719774, 323.1655555870715, 63.14812193338034, 477.9830185677611], 
"eval_len": [93, 1000, 1000, 730, 1000, 227, 135, 442, 98, 615]}

  3%|▎         | 29999/1000000 [27:50<12:48:01, 21.05it/s]global step 30000, trans_decision ep_re 573.5958217885899

{"global_step": 30000, "eval_re": [719.5976031938429, 30.125899184973086, 
711.9202908692315, 556.6334006885971, 511.44071903108284, 711.7382106218788, 
204.38103030172752, 821.7096831040432, 747.3870843025494, 721.0242965879728], 
"eval_len": [1000, 40, 1000, 1000, 640, 1000, 256, 1000, 1000, 1000]}

  4%|▍         | 39999/1000000 [39:00<12:45:40, 20.90it/s]global step 40000, trans_decision ep_re 571.4483612856242

{"global_step": 40000, "eval_re": [38.57902841277593, 839.2520822769425, 
298.5225363256596, 809.101387017192, 937.596454591235, 892.2784784541086, 
791.6957960433664, 622.3369081709999, 292.0641661447767, 193.05677541918516], 
"eval_len": [42, 1000, 281, 803, 946, 1000, 1000, 645, 295, 191]}

  5%|▍         | 49999/1000000 [50:10<12:39:03, 20.86it/s]global step 50000, trans_decision ep_re 458.801430043288

{"global_step": 50000, "eval_re": [608.4552488106879, 427.1312836826324, 
963.1483455518743, 79.41006638932515, 709.6634965833164, 278.14609101955875, 
134.8558526430874, 526.057345840456, 186.277906467143, 674.8686634447988], 
"eval_len": [604, 306, 776, 71, 598, 264, 117, 434, 161, 1000]}

  6%|▌         | 59999/1000000 [1:01:00<12:16:49, 21.26it/s]global step 60000, trans_decision ep_re 849.8089015010822

{"global_step": 60000, "eval_re": [1133.372980046229, 1202.9773435324075, 
907.1783339596035, 1224.8979456235325, 145.93606038080782, 931.3636840930601, 
1125.1991927979907, 467.6739155199577, 1171.8772413182328, 187.6123177390002], 
"eval_len": [927, 1000, 1000, 1000, 108, 1000, 1000, 411, 1000, 162]}

  7%|▋         | 69999/1000000 [1:12:10<12:20:04, 20.94it/s]global step 70000, trans_decision ep_re 590.6016628107709

{"global_step": 70000, "eval_re": [237.9794394623984, 1198.0845454573998, 
710.7828735913963, 1252.8547972746965, 603.2429482164763, 182.4256480746396, 
1282.1246627433038, 241.91498909641214, 130.10608475516, 66.50063943582579], 
"eval_len": [299, 1000, 638, 1000, 399, 105, 1000, 261, 234, 58]}

  8%|▊         | 79999/1000000 [1:23:20<12:20:27, 20.71it/s]global step 80000, trans_decision ep_re 923.1661156878293

{"global_step": 80000, "eval_re": [921.6623215668029, 1548.6828792133965, 
1247.5208348834747, 769.6584732697727, 1349.2987091551772, 356.9566837153276, 
1076.726611517445, 26.737832569103997, 613.4186577279711, 1320.9981532598233], 
"eval_len": [681, 1000, 1000, 551, 1000, 358, 951, 58, 583, 1000]}

  9%|▉         | 89999/1000000 [1:34:30<12:01:38, 21.02it/s]global step 90000, trans_decision ep_re 704.2560256525105

{"global_step": 90000, "eval_re": [774.3933893666403, 859.5026601645602, 
218.68523700713638, 49.41305220832342, 1716.5888356066603, 837.541829470675, 
101.26641005888203, 869.0386834445225, 1246.033677801979, 370.0964813957256], 
"eval_len": [440, 1000, 187, 52, 1000, 602, 70, 551, 1000, 198]}

 10%|▉         | 99999/1000000 [1:45:30<11:51:47, 21.07it/s]global step 100000, trans_decision ep_re 1127.5345632113163

{"global_step": 100000, "eval_re": [987.3250023986282, 1372.642267020169, 
1296.3577540707263, 1849.75262905794, 1666.643533960561, 430.20107991089407, 
1555.6199144047998, 586.8413606383008, 1164.6437353224903, 365.31835532865523], 
"eval_len": [635, 1000, 739, 1000, 1000, 214, 879, 360, 1000, 257]}

 11%|█         | 109998/1000000 [1:56:30<11:39:17, 21.21it/s]global step 110000, trans_decision ep_re 1488.8840856355175

{"global_step": 110000, "eval_re": [1720.1036402910274, 710.7213643878804, 
1846.4538537290691, 1962.9257752583826, 1924.172469774762, 1818.466707304256, 
1902.9391197456, 1654.8397844030824, 898.5338491558429, 449.6842923052711], 
"eval_len": [1000, 354, 1000, 1000, 1000, 1000, 1000, 806, 582, 322]}

 12%|█▏        | 119999/1000000 [2:07:50<11:47:08, 20.74it/s]global step 120000, trans_decision ep_re 1636.4718307480325

{"global_step": 120000, "eval_re": [1301.161694729931, 2073.0515699311777, 
1947.7300484843129, 1310.6809194910024, 592.3138783621364, 1965.7328565484977, 
1853.2281248375632, 1725.5435295360558, 1678.3327324931195, 1916.9429530665288],
"eval_len": [663, 1000, 1000, 1000, 300, 1000, 1000, 933, 1000, 1000]}

 13%|█▎        | 129998/1000000 [2:19:00<11:25:30, 21.15it/s]global step 130000, trans_decision ep_re 1506.9743707694956

{"global_step": 130000, "eval_re": [1622.8444142126411, 680.7431031377279, 
2125.1589120318567, 1697.9632161533605, 255.0520678431267, 843.2507618997246, 
2108.3230498039684, 1919.2601098735527, 1898.9782537181122, 1918.1698190208842],
"eval_len": [896, 427, 1000, 1000, 134, 452, 1000, 1000, 1000, 1000]}

 14%|█▍        | 139998/1000000 [2:30:10<11:14:49, 21.24it/s]global step 140000, trans_decision ep_re 1440.8110801690157

{"global_step": 140000, "eval_re": [2174.7995524021962, 2200.338904322117, 
561.9314152743398, 2129.326477877143, 426.5959993402127, 412.1704468997755, 
1496.7232245338416, 1413.3616883718937, 1770.0342471964386, 1822.8288454721994],
"eval_len": [1000, 1000, 282, 1000, 305, 310, 677, 715, 795, 1000]}

 15%|█▍        | 149999/1000000 [2:41:20<11:20:12, 20.83it/s]global step 150000, trans_decision ep_re 780.1373618810833

{"global_step": 150000, "eval_re": [682.1031380173785, 287.44696055084546, 
1415.8133820496307, 91.5110279323869, 927.1624539025412, 402.8229816678847, 
1083.6411691193018, 1731.545860010216, 956.5355116682409, 222.7911338924072], 
"eval_len": [426, 178, 1000, 128, 505, 235, 546, 1000, 489, 113]}

 16%|█▌        | 159999/1000000 [2:52:20<11:07:06, 20.99it/s]global step 160000, trans_decision ep_re 1708.4868036061391

{"global_step": 160000, "eval_re": [1781.5575362784111, 1849.3010503119754, 
2125.477836681866, 1880.8275680837414, 1928.6324132865539, 1316.4291843337753, 
2257.6565752539896, 143.15611534909263, 1971.3080564323632, 1830.5217000496225],
"eval_len": [1000, 1000, 1000, 1000, 1000, 760, 1000, 102, 1000, 1000]}

 17%|█▋        | 169999/1000000 [3:03:30<10:59:28, 20.98it/s]global step 170000, trans_decision ep_re 1194.5259866315066

{"global_step": 170000, "eval_re": [128.83403332926827, 1637.3434382832268, 
1325.4136636531036, 1818.5215131630157, 1555.536972696055, 1916.2728984587843, 
666.8574044295475, 663.6958799167012, 1540.609941480058, 692.1741209053071], 
"eval_len": [74, 1000, 703, 1000, 991, 1000, 391, 1000, 1000, 400]}

 18%|█▊        | 179999/1000000 [3:14:30<10:45:14, 21.18it/s]global step 180000, trans_decision ep_re 1931.3503475509835

{"global_step": 180000, "eval_re": [2294.0839536396525, 2319.9491859237964, 
2050.2732090703194, 1856.5613810910138, 2148.977345102075, 1416.5336396393386, 
718.2270620053856, 2189.6311533722755, 2105.6897141927, 2213.576831473277], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 598, 331, 1000, 1000, 1000]}

 19%|█▉        | 189999/1000000 [3:25:40<10:40:46, 21.07it/s]global step 190000, trans_decision ep_re 1283.3277059298339

{"global_step": 190000, "eval_re": [1147.0699771153832, 1196.14633133451, 
1612.1288952184238, 1252.818773292314, 587.2932537535556, 1721.6753038739475, 
2237.214064802757, 1536.0589584850288, 1272.1808788273938, 270.6906225950234], 
"eval_len": [575, 602, 1000, 1000, 263, 908, 1000, 742, 597, 142]}

 20%|█▉        | 199999/1000000 [3:36:50<10:27:57, 21.23it/s]global step 200000, trans_decision ep_re 2001.0089446804548

{"global_step": 200000, "eval_re": [2346.6944620761383, 1881.4225529505702, 
2469.1461043345457, 2166.039758388997, 1252.1577112294044, 754.4440678475313, 
2045.5902314555667, 2525.501479789659, 2284.5614360128466, 2284.531642719288], 
"eval_len": [1000, 810, 1000, 1000, 642, 350, 1000, 1000, 1000, 1000]}

 21%|██        | 209999/1000000 [3:48:00<10:16:44, 21.35it/s]global step 210000, trans_decision ep_re 1849.0641577593728

{"global_step": 210000, "eval_re": [1238.1304329869834, 2305.425369732567, 
453.10182269076165, 2248.1298573342056, 2004.8907490723798, 1995.174347506433, 
2315.1126431695493, 2144.52965801278, 2287.14612516345, 1499.0005719246171], 
"eval_len": [624, 1000, 207, 1000, 1000, 1000, 1000, 924, 1000, 816]}

 22%|██▏       | 219999/1000000 [3:59:00<10:09:38, 21.32it/s]global step 220000, trans_decision ep_re 1651.446075308652

{"global_step": 220000, "eval_re": [749.8827474385804, 2090.6836356745052, 
2245.239383557811, 2073.275937645001, 2156.3019225439893, 2168.6792362993883, 
1350.3137090279095, 383.4950555726325, 2018.6411751171686, 1277.947950209535], 
"eval_len": [370, 1000, 1000, 1000, 1000, 1000, 623, 202, 1000, 764]}

 23%|██▎       | 229999/1000000 [4:10:10<10:10:06, 21.03it/s]global step 230000, trans_decision ep_re 1147.6034094571617

{"global_step": 230000, "eval_re": [1218.5025463318564, 192.50404767981283, 
2037.360718765925, 669.5218009006181, 1524.8301997441501, 1663.5706291784243, 
589.3399764657146, 1220.1563674301608, 785.435086552156, 1574.812721522797], 
"eval_len": [744, 99, 1000, 382, 852, 1000, 367, 774, 469, 891]}

 24%|██▍       | 239998/1000000 [4:21:10<9:56:16, 21.24it/s]global step 240000, trans_decision ep_re 1415.7565452129743

{"global_step": 240000, "eval_re": [319.1616170319315, 1945.4999655538575, 
2014.2571135956189, 1764.2624455615796, 1878.6105971650009, 64.18293113950864, 
166.92151319180167, 2162.7187954460105, 1974.341722869348, 1867.6087505750882], 
"eval_len": [231, 1000, 1000, 889, 1000, 109, 108, 1000, 1000, 1000]}

 25%|██▍       | 249999/1000000 [4:32:10<9:51:54, 21.12it/s]global step 250000, trans_decision ep_re 1839.086997328533

{"global_step": 250000, "eval_re": [2422.7752058190113, 2102.4967517565774, 
2097.8707616227875, 2207.2416016711886, 1863.4747051768802, 1079.2068919643777, 
1872.5680232175418, 744.1399818337252, 2213.168460319409, 1787.927589903834], 
"eval_len": [1000, 1000, 1000, 1000, 811, 1000, 766, 337, 1000, 948]}

 26%|██▌       | 259998/1000000 [4:43:20<9:40:44, 21.24it/s]global step 260000, trans_decision ep_re 1827.1926884557295

{"global_step": 260000, "eval_re": [2235.5762264020045, 120.65487741178224, 
2234.273766174726, 1821.1787509904586, 1667.08671398366, 2387.2996149333812, 
2063.929244388025, 2367.2155407600217, 2173.417809809247, 1201.2943397039849], 
"eval_len": [1000, 104, 1000, 856, 750, 1000, 1000, 1000, 1000, 519]}

 27%|██▋       | 269999/1000000 [4:54:30<9:41:57, 20.91it/s]global step 270000, trans_decision ep_re 1357.7597265879554

{"global_step": 270000, "eval_re": [1816.326239256935, 1410.810394692022, 
1113.2671400690774, 1052.6006576576203, 585.3011357531562, 1713.8061982133088, 
1794.365186514124, 1717.352380040897, 814.6456446733533, 1559.1222890090617], 
"eval_len": [1000, 747, 1000, 617, 337, 1000, 1000, 1000, 375, 1000]}

 28%|██▊       | 279999/1000000 [5:05:40<9:27:06, 21.16it/s]global step 280000, trans_decision ep_re 1136.108949998005

{"global_step": 280000, "eval_re": [173.74995312047085, 339.7125796839104, 
2182.007748019324, 1823.6022795884426, 532.5515996819576, 1780.7958225768175, 
150.66660116466224, 2108.236566180576, 920.2544775522339, 1349.5118724116562], 
"eval_len": [106, 226, 1000, 1000, 266, 1000, 76, 1000, 434, 612]}

 29%|██▉       | 289999/1000000 [5:16:30<9:21:28, 21.08it/s]global step 290000, trans_decision ep_re 1459.385481543119

{"global_step": 290000, "eval_re": [333.23716237217593, 2184.8745947104167, 
1049.5668122111315, 1863.4776247759412, 2042.376516601014, 2402.7594116596874, 
492.24545970322964, 1600.3433928060351, 2274.278919781743, 350.6949208098166], 
"eval_len": [200, 1000, 439, 985, 1000, 1000, 289, 1000, 1000, 212]}

 30%|██▉       | 299999/1000000 [5:27:40<9:14:08, 21.05it/s]global step 300000, trans_decision ep_re 1695.964985806472

{"global_step": 300000, "eval_re": [1844.6678547483439, 2093.5900782792523, 
586.900553693621, 1127.8517337722224, 2211.999936922162, 2024.4570223385058, 
1782.2284436612938, 1330.9172179580876, 2012.2435194736124, 1944.7934972176206],
"eval_len": [1000, 1000, 370, 1000, 1000, 1000, 792, 768, 1000, 1000]}

 31%|███       | 309999/1000000 [5:38:40<9:01:26, 21.24it/s]global step 310000, trans_decision ep_re 1513.1517087884472

{"global_step": 310000, "eval_re": [168.25264868619152, 1254.3130027420145, 
2016.8843849474263, 1078.1573401655621, 1459.441898151317, 2318.7470776679993, 
96.5930683542342, 2229.420981863522, 2205.1955177100604, 2304.511167596143], 
"eval_len": [91, 582, 1000, 490, 707, 1000, 62, 1000, 1000, 1000]}

 32%|███▏      | 319999/1000000 [5:49:50<8:54:45, 21.19it/s]global step 320000, trans_decision ep_re 1148.0428919266178

{"global_step": 320000, "eval_re": [170.1057257438298, 476.3411488144481, 
1358.056221852944, 209.6351106378821, 575.6723179474388, 987.9919573840798, 
2364.958133148602, 1800.1158718841239, 1290.41051657269, 2247.1419152801395], 
"eval_len": [179, 255, 566, 159, 263, 472, 1000, 853, 628, 997]}

 33%|███▎      | 329999/1000000 [6:00:40<8:52:31, 20.97it/s]global step 330000, trans_decision ep_re 1945.6919543582633

{"global_step": 330000, "eval_re": [1744.460338329076, 2328.714479522647, 
2329.1571764585365, 1675.8574076500972, 1957.2792901048476, 2318.7507086019064, 
1889.6182559195504, 808.8813135619515, 2232.4854652424406, 2171.715108191579], 
"eval_len": [877, 1000, 1000, 784, 900, 1000, 1000, 1000, 1000, 1000]}

 34%|███▍      | 339999/1000000 [6:12:00<8:42:35, 21.05it/s]global step 340000, trans_decision ep_re 1271.1619143957694

{"global_step": 340000, "eval_re": [1031.8143592479087, 2125.466324280682, 
1068.4163243391342, 400.04829653949633, 387.354437195274, 1713.0137400007247, 
254.5397277118057, 2256.6650769746143, 1172.4413418964753, 2301.859515771578], 
"eval_len": [471, 928, 1000, 174, 243, 915, 134, 969, 1000, 1000]}

 35%|███▍      | 349999/1000000 [6:23:00<8:33:55, 21.08it/s]global step 350000, trans_decision ep_re 1393.8327607970436

{"global_step": 350000, "eval_re": [2243.8316756499653, 203.0659285186401, 
1275.1149495260559, 2098.791203935719, 1434.2004854219322, 2318.7854255077327, 
451.67965565434963, 182.43475251241622, 1492.5479018479616, 2237.875629395663], 
"eval_len": [1000, 104, 569, 1000, 696, 1000, 211, 101, 642, 1000]}

 36%|███▌      | 359999/1000000 [6:34:00<8:25:55, 21.08it/s]global step 360000, trans_decision ep_re 1573.7796507442413

{"global_step": 360000, "eval_re": [2157.092486026087, 2424.7320225565363, 
135.87902615487533, 874.288511461252, 82.35323654577967, 1184.8124146810883, 
2174.0359382742004, 2169.741781103509, 2135.4651052617983, 2399.3959853772876], 
"eval_len": [1000, 1000, 82, 471, 86, 1000, 1000, 1000, 1000, 1000]}

 37%|███▋      | 369998/1000000 [6:45:00<8:12:08, 21.34it/s]global step 370000, trans_decision ep_re 1760.4166893796057

{"global_step": 370000, "eval_re": [259.2426165776428, 2526.4026708495894, 
2077.1132975376886, 864.9042679651221, 1549.3441260067682, 2288.483599953405, 
2447.8205984153333, 2175.06222316502, 1189.3879735129115, 2226.4055198125766], 
"eval_len": [123, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 547, 1000]}

 38%|███▊      | 379998/1000000 [6:56:10<8:07:31, 21.20it/s]global step 380000, trans_decision ep_re 949.6911209248834

{"global_step": 380000, "eval_re": [391.4710830365971, 598.5212171469242, 
2026.895334658669, 360.24954328422774, 1690.3648318833116, 1933.994518394745, 
491.76034159828515, 1497.8493992801077, 77.31883519985378, 428.4861047661133], 
"eval_len": [216, 303, 1000, 212, 827, 1000, 289, 717, 104, 257]}

 39%|███▉      | 389999/1000000 [7:07:00<8:00:13, 21.17it/s]global step 390000, trans_decision ep_re 1900.533681733792

{"global_step": 390000, "eval_re": [2005.7435319250183, 1971.5821936963582, 
2187.797000074751, 864.3539581691931, 2143.9623295725187, 2005.5516728240896, 
2129.3541896499173, 1377.2503588541038, 2187.9226633791927, 2131.8189191927795],
"eval_len": [1000, 1000, 1000, 437, 1000, 1000, 1000, 664, 1000, 1000]}

 40%|███▉      | 399999/1000000 [7:18:10<7:53:58, 21.10it/s]global step 400000, trans_decision ep_re 1419.7461442835563

{"global_step": 400000, "eval_re": [1010.6763111692266, 1565.4988640214315, 
2081.1692287028727, 830.3312640338374, 1840.6232758122167, 1185.5929553760302, 
1121.8275236169434, 1327.3415247025653, 1373.4287683929156, 1860.971727007525], 
"eval_len": [708, 742, 1000, 1000, 1000, 660, 616, 672, 1000, 1000]}

 41%|████      | 409998/1000000 [7:29:20<7:40:08, 21.37it/s]global step 410000, trans_decision ep_re 1183.0167603334735

{"global_step": 410000, "eval_re": [467.86706176469755, 2017.2268962333162, 
132.57919494016616, 1907.657451314243, 1774.5168702640074, 499.1718986962039, 
915.4798084705915, 1768.8041304249584, 446.45580343359177, 1900.4084877929604], 
"eval_len": [270, 994, 91, 1000, 1000, 1000, 484, 1000, 281, 961]}

 42%|████▏     | 419998/1000000 [7:40:20<7:37:37, 21.12it/s]global step 420000, trans_decision ep_re 853.5207787497269

{"global_step": 420000, "eval_re": [465.2369415377229, 99.515343726989, 
1030.6368225510869, 765.5933041602112, 1133.293947445205, 621.3040089573569, 
180.77514999756357, 1350.2395108341416, 2059.8919125039693, 828.720845783023], 
"eval_len": [317, 69, 465, 1000, 551, 330, 96, 1000, 1000, 1000]}

 43%|████▎     | 429999/1000000 [7:51:20<7:27:08, 21.25it/s]global step 430000, trans_decision ep_re 1450.4367896971764

{"global_step": 430000, "eval_re": [2174.2678652195627, 452.48511934651543, 
688.6293675004049, 2219.7121502510327, 853.9210466830012, 1197.035055808766, 
1949.175336410043, 2114.551041930459, 853.3596885688753, 2001.231225253105], 
"eval_len": [876, 195, 1000, 1000, 1000, 529, 1000, 1000, 462, 1000]}

 44%|████▍     | 439999/1000000 [8:02:20<7:25:45, 20.94it/s]global step 440000, trans_decision ep_re 563.7588244485603

{"global_step": 440000, "eval_re": [1591.7747822773363, 215.18197102516297, 
673.2162537699157, 230.7894478538393, 252.71778459122692, 367.10975901818006, 
545.5885226482904, 152.41808203481, 163.75724172560422, 1445.0343995412356], 
"eval_len": [1000, 132, 321, 1000, 167, 219, 1000, 124, 132, 797]}

 45%|████▍     | 449998/1000000 [8:13:20<7:08:32, 21.39it/s]global step 450000, trans_decision ep_re 1434.826145598004

{"global_step": 450000, "eval_re": [712.2185873590792, 1823.1378416605128, 
1976.0249316892634, 1071.7027250612593, 541.0511052249308, 1813.3084161846816, 
1889.7960072742633, 1795.3413825196462, 1886.4275904337928, 839.2528685726102], 
"eval_len": [332, 1000, 1000, 559, 303, 1000, 1000, 1000, 1000, 457]}

 46%|████▌     | 459999/1000000 [8:24:20<7:02:22, 21.31it/s]global step 460000, trans_decision ep_re 1338.9908122007346

{"global_step": 460000, "eval_re": [893.2919957363771, 1972.7466848891613, 
1864.591771600764, 1980.78931399515, 879.4066590882517, 1392.701972198401, 
1662.9480074131768, 932.2476722424065, 414.96653347732325, 1396.2175113663347], 
"eval_len": [501, 1000, 1000, 1000, 384, 741, 873, 523, 194, 1000]}

 47%|████▋     | 469999/1000000 [8:35:30<7:00:42, 21.00it/s]global step 470000, trans_decision ep_re 1196.8855194718926

{"global_step": 470000, "eval_re": [729.4697953300357, 811.9432443524524, 
1087.0957298805756, 2246.306419390768, 2203.1081434672706, 479.14526897470336, 
2015.4399503339746, 625.650079841671, 283.7152265567457, 1486.9813365907305], 
"eval_len": [1000, 403, 1000, 1000, 1000, 215, 1000, 302, 127, 639]}

 48%|████▊     | 479999/1000000 [8:46:30<6:52:59, 20.99it/s]global step 480000, trans_decision ep_re 1186.9918132630312

{"global_step": 480000, "eval_re": [2128.4680131465375, 2092.465832602897, 
113.91253029059119, 77.84237511084766, 2065.5006205526556, -435.61761204298745, 
1482.6879334543303, 769.5310609613388, 1989.6136214574492, 1585.5137570966522], 
"eval_len": [1000, 1000, 77, 88, 1000, 1000, 755, 379, 1000, 1000]}

 49%|████▉     | 489998/1000000 [8:57:30<6:41:03, 21.19it/s]global step 490000, trans_decision ep_re 1262.8066974532342

{"global_step": 490000, "eval_re": [2030.686317068301, 300.64778156773707, 
702.2628637709636, 801.3092008654145, 2067.986083401881, 2111.8762814387374, 
493.536284493197, 1931.1172751362578, 261.40989951595867, 1927.2349872738937], 
"eval_len": [1000, 147, 366, 509, 1000, 1000, 240, 1000, 157, 1000]}

 50%|████▉     | 499998/1000000 [9:08:30<6:28:12, 21.47it/s]global step 500000, trans_decision ep_re 512.6521039443882

{"global_step": 500000, "eval_re": [317.66144655472544, 811.3162517232287, 
1228.519118311655, 731.3720016216826, 93.9164171089733, 244.876534061771, 
500.4415002195626, 147.2859174614395, 419.42254306570356, 631.7093093151409], 
"eval_len": [158, 1000, 749, 376, 92, 177, 366, 95, 271, 353]}

 51%|█████     | 509998/1000000 [9:19:20<6:24:52, 21.22it/s]global step 510000, trans_decision ep_re 858.1013599667419

{"global_step": 510000, "eval_re": [559.1746916120674, 592.3203576842053, 
2260.929664891617, 352.63470109419995, 1171.6136145348837, 216.26646851256942, 
457.956089768585, 636.7178905885572, 1284.4181014662202, 1048.9820195145148], 
"eval_len": [251, 253, 922, 192, 566, 138, 297, 315, 670, 589]}

 52%|█████▏    | 519999/1000000 [9:30:10<6:19:16, 21.09it/s]global step 520000, trans_decision ep_re 853.4214275868314

{"global_step": 520000, "eval_re": [1671.7080127817776, 240.87483058350907, 
1122.3011242234115, 163.3506976284106, 427.0244376769449, 394.2545344335477, 
186.0229259846, 1929.437011743263, 1037.5668138286278, 1361.673886984222], 
"eval_len": [1000, 167, 549, 122, 273, 272, 106, 1000, 1000, 740]}

 53%|█████▎    | 529999/1000000 [9:41:10<6:09:04, 21.22it/s]global step 530000, trans_decision ep_re 1600.2627728460886

{"global_step": 530000, "eval_re": [576.268927304906, 365.5910428191861, 
1615.114706546573, 1899.3375737706558, 2190.3519090421214, 968.5746280534843, 
2231.6399580265133, 2097.753489487985, 2107.6032135812934, 1950.3922798281687], 
"eval_len": [291, 216, 1000, 918, 1000, 437, 1000, 1000, 1000, 863]}

 54%|█████▍    | 539999/1000000 [9:52:10<5:58:56, 21.36it/s]global step 540000, trans_decision ep_re 937.125741378255

{"global_step": 540000, "eval_re": [1108.0187584767468, 882.1032601931994, 
2328.3566774010233, 224.70136056739594, 264.0225342935674, 1939.2511420610879, 
281.0552527644538, 1251.492330146394, 1006.273138250298, 85.98295962838547], 
"eval_len": [1000, 1000, 1000, 129, 126, 1000, 128, 618, 489, 72]}

 55%|█████▍    | 549998/1000000 [10:03:00<5:52:40, 21.27it/s]global step 550000, trans_decision ep_re 1621.119545568534

{"global_step": 550000, "eval_re": [1902.2111264457144, 1979.9879836458124, 
1634.6255971964074, 1719.5076392243645, 1819.8225300535084, 2062.0144955144306, 
2206.421565784841, 622.1687498851591, 257.8930677571019, 2006.542700178001], 
"eval_len": [1000, 1000, 868, 1000, 1000, 1000, 1000, 423, 165, 1000]}

 56%|█████▌    | 559999/1000000 [10:14:10<5:48:33, 21.04it/s]global step 560000, trans_decision ep_re 978.1422923183557

{"global_step": 560000, "eval_re": [651.6994562409594, 2058.937275633279, 
288.37821318577454, 1842.8137018681768, 887.8636951676466, 913.1956373864953, 
122.99854972315428, 666.5632085660592, 2010.1791856514062, 338.7939997606048], 
"eval_len": [1000, 958, 177, 999, 1000, 479, 94, 334, 1000, 163]}

 57%|█████▋    | 569999/1000000 [10:25:10<5:37:53, 21.21it/s]global step 570000, trans_decision ep_re 1458.5781197114281

{"global_step": 570000, "eval_re": [2043.666946035127, 671.06203302286, 
1883.2138053675953, 1159.789106046615, 1962.3605250341575, 1622.016858655311, 
559.8854055401014, 854.1097898174355, 1887.316917503796, 1942.3598100912825], 
"eval_len": [878, 334, 1000, 627, 1000, 807, 1000, 430, 1000, 1000]}

 58%|█████▊    | 579999/1000000 [10:36:10<5:33:11, 21.01it/s]global step 580000, trans_decision ep_re 865.3570070297567

{"global_step": 580000, "eval_re": [2192.240106570735, 1366.7887755000825, 
166.30557311010432, 1299.1565113139002, 1204.0991866299898, -410.3429553623252, 
126.95446771579441, 1265.3912335500916, 968.9359613833034, 474.0412098858921], 
"eval_len": [1000, 543, 126, 736, 1000, 1000, 1000, 808, 1000, 253]}

 59%|█████▉    | 589999/1000000 [10:47:20<5:24:51, 21.04it/s]global step 590000, trans_decision ep_re 808.0562551486851

{"global_step": 590000, "eval_re": [1165.640281260884, 946.2588931985147, 
583.0640185847566, 386.7118345568105, 690.489578608385, 496.5111716046995, 
1040.8389630653257, 1066.5481363992487, 1201.1944497228185, 503.30522448540773],
"eval_len": [1000, 1000, 1000, 328, 508, 310, 1000, 1000, 1000, 1000]}

 60%|█████▉    | 599998/1000000 [10:58:20<5:12:46, 21.31it/s]global step 600000, trans_decision ep_re 1866.0356011061674

{"global_step": 600000, "eval_re": [2155.5686517266536, 1906.5573725538627, 
1817.3773448702827, 1840.9402740482321, 1926.1039057024711, 1978.6651147911891, 
1735.0061760488213, 1761.4295861616167, 1847.5930654429922, 1691.114519715549], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 61%|██████    | 609999/1000000 [11:09:30<5:07:29, 21.14it/s]global step 610000, trans_decision ep_re 1467.3103104698152

{"global_step": 610000, "eval_re": [484.3089598863413, 551.7213857679483, 
1197.0446735994835, 2145.2533605441604, 870.6217018176503, 834.147919604702, 
2104.2585384467593, 2204.3139657249885, 2264.7453801864795, 2016.68721911964], 
"eval_len": [1000, 272, 503, 1000, 1000, 429, 1000, 1000, 1000, 1000]}

 62%|██████▏   | 619998/1000000 [11:20:40<4:55:39, 21.42it/s]global step 620000, trans_decision ep_re 1267.5958372338314

{"global_step": 620000, "eval_re": [461.47320825747994, 1657.4385693589563, 
1922.8387879568559, 2072.9107735240505, 2097.820365656709, 698.7325503121173, 
2156.540775159662, 976.1600462444945, 490.76791332697917, 141.2753825410096], 
"eval_len": [245, 1000, 1000, 1000, 1000, 1000, 1000, 524, 1000, 134]}

 63%|██████▎   | 629999/1000000 [11:31:40<4:50:42, 21.21it/s]global step 630000, trans_decision ep_re 1748.4015584021531

{"global_step": 630000, "eval_re": [2231.1014776109887, 2371.4874730669603, 
1076.368506374552, 2288.652595112688, 1142.2589107633692, 2122.0967409372697, 
2156.337087521495, -215.5797096153273, 1886.9999554982883, 2424.2925467512455], 
"eval_len": [1000, 1000, 532, 1000, 524, 1000, 1000, 1000, 812, 1000]}

 64%|██████▍   | 639999/1000000 [11:42:50<4:43:30, 21.16it/s]global step 640000, trans_decision ep_re 1191.2085854093502

{"global_step": 640000, "eval_re": [1450.7092700306603, 1542.1738019753375, 
109.75251178029045, 1777.4031581234392, 1682.212945651888, 204.5366856218352, 
1664.0823815926308, 1503.4781742806258, 1187.2029159763752, 790.534009060418], 
"eval_len": [1000, 1000, 69, 1000, 1000, 152, 1000, 1000, 638, 1000]}

 65%|██████▍   | 649999/1000000 [11:54:00<4:36:37, 21.09it/s]global step 650000, trans_decision ep_re 720.2284640423652

{"global_step": 650000, "eval_re": [1378.1154404752044, 240.72641750143254, 
131.9797500004738, 857.3265455257837, 750.6304468128159, 605.7099671836958, 
558.9813670627268, 397.38352604025334, 1543.7297667989853, 737.7014130222803], 
"eval_len": [552, 163, 86, 1000, 1000, 273, 245, 276, 1000, 320]}

 66%|██████▌   | 659998/1000000 [12:04:50<4:25:46, 21.32it/s]global step 660000, trans_decision ep_re 246.57418829750662

{"global_step": 660000, "eval_re": [56.12223336348951, 107.6782734918973, 
203.03212638416073, 120.72077726819312, 341.87461385205376, 769.6950910164243, 
249.15067810480483, 417.59964920780874, 119.06394762761963, 80.80449265861436], 
"eval_len": [53, 72, 154, 79, 713, 1000, 228, 353, 80, 92]}

 67%|██████▋   | 669998/1000000 [12:15:40<4:18:06, 21.31it/s]global step 670000, trans_decision ep_re 405.0396523315706

{"global_step": 670000, "eval_re": [73.07395043380131, 644.9845707094812, 
112.99238029423405, 93.16398773311558, 218.0919025828746, 760.9755885317306, 
128.85810068029787, 841.5781266369887, 343.68443396013134, 832.9934817530512], 
"eval_len": [82, 349, 90, 77, 195, 1000, 85, 1000, 193, 1000]}

 68%|██████▊   | 679998/1000000 [12:26:30<4:12:06, 21.16it/s]global step 680000, trans_decision ep_re 1227.2194171087826

{"global_step": 680000, "eval_re": [1946.286283484396, 1701.1951769460204, 
1670.254270661975, 102.55144949632195, 1059.5685265388531, 1585.8465293278161, 
203.21056965298067, 1870.9741744995736, 695.0823409998625, 1437.2248494800258], 
"eval_len": [1000, 1000, 1000, 109, 1000, 1000, 118, 1000, 1000, 784]}

 69%|██████▉   | 689999/1000000 [12:37:40<4:04:37, 21.12it/s]global step 690000, trans_decision ep_re 317.6308276155074

{"global_step": 690000, "eval_re": [228.72573630078648, 539.5360329336227, 
235.17814996469767, 120.86865648118967, 801.4127555247231, 121.43926876881551, 
153.6283565456973, 716.3500505144799, 72.70785650914424, 186.46141261191713], 
"eval_len": [138, 1000, 299, 74, 1000, 73, 96, 1000, 74, 148]}

 70%|██████▉   | 699999/1000000 [12:48:30<3:57:19, 21.07it/s]global step 700000, trans_decision ep_re 329.7416397475375

{"global_step": 700000, "eval_re": [427.98360036101553, 146.5875673442048, 
103.27560797246804, 556.3975216319682, 122.38582263413394, 308.02357470465597, 
452.58206810291125, 554.8643705992931, 146.38554944330212, 478.9307146814218], 
"eval_len": [1000, 101, 95, 275, 91, 174, 1000, 307, 87, 277]}

 71%|███████   | 709999/1000000 [12:59:10<3:50:08, 21.00it/s]global step 710000, trans_decision ep_re 172.71750936340868

{"global_step": 710000, "eval_re": [75.19529025456126, 143.73046525170412, 
108.38832019668665, 86.09964571714904, -31.63341411752137, 89.51436198792378, 
629.7112661060635, 128.21555395152788, 440.44440839114907, 57.509195894842826], 
"eval_len": [89, 98, 82, 80, 1000, 78, 1000, 80, 1000, 54]}

 72%|███████▏  | 719999/1000000 [13:10:00<3:41:09, 21.10it/s]global step 720000, trans_decision ep_re 1109.2302680089504

{"global_step": 720000, "eval_re": [1049.560913380473, 1219.6441379150758, 
523.7208149460827, 1112.313329860862, 1120.8063959305302, 624.7854287271124, 
1484.5206721147856, 1601.6488823592608, 846.7229138505812, 1508.5791910047403], 
"eval_len": [774, 1000, 1000, 815, 1000, 594, 1000, 1000, 1000, 1000]}

 73%|███████▎  | 729999/1000000 [13:21:10<3:34:20, 20.99it/s]global step 730000, trans_decision ep_re 1004.1874187954538

{"global_step": 730000, "eval_re": [581.2635002136467, 543.9310803832982, 
1675.8306493700723, 188.83433207363097, 1504.1379016768633, 445.5801559809983, 
1169.108807004281, 1481.3462855559776, 761.0090256713629, 1690.8324500244062], 
"eval_len": [311, 342, 1000, 129, 928, 1000, 629, 1000, 1000, 1000]}

 74%|███████▍  | 739999/1000000 [13:32:20<3:26:23, 21.00it/s]global step 740000, trans_decision ep_re 763.7432780460498

{"global_step": 740000, "eval_re": [834.056626202712, 1417.3208254064027, 
365.34250433408766, 761.9846609006138, 1399.2840620362515, 1513.3429927924096, 
1090.971056119112, 454.6844110770208, 274.07922812400136, -473.63358653211327], 
"eval_len": [1000, 944, 1000, 1000, 1000, 1000, 1000, 320, 170, 1000]}

 75%|███████▍  | 749999/1000000 [13:43:20<3:16:10, 21.24it/s]global step 750000, trans_decision ep_re 1034.9540121627692

{"global_step": 750000, "eval_re": [1693.302505812943, 77.00911733665839, 
206.33211780942167, 1887.3190512344677, 1664.2235810208442, 578.1698425629224, 
1916.4848852978357, 339.03548663008075, 185.94984804541812, 1801.7136858771005],
"eval_len": [900, 72, 136, 1000, 1000, 381, 1000, 1000, 114, 1000]}

 76%|███████▌  | 759998/1000000 [13:54:20<3:08:29, 21.22it/s]global step 760000, trans_decision ep_re 1329.3092723759805

{"global_step": 760000, "eval_re": [1919.6976971859726, 1033.1327395108954, 
1168.7378339621785, 189.15214823262056, 1005.5314113589193, 851.8658729092897, 
757.2449125385889, 2102.537386972849, 2095.979574218936, 2169.2131468695525], 
"eval_len": [1000, 1000, 598, 140, 514, 465, 391, 1000, 1000, 1000]}

 77%|███████▋  | 769999/1000000 [14:05:20<3:00:11, 21.27it/s]global step 770000, trans_decision ep_re 1165.2175140030981

{"global_step": 770000, "eval_re": [1253.5502013638131, 966.5697488868843, 
1741.6493435417563, 1376.0421037840767, 1480.116513794175, 1429.9928458055608, 
1700.66188839605, 729.6667555760516, 273.2691972761233, 700.6565416064888], 
"eval_len": [746, 741, 1000, 1000, 1000, 1000, 1000, 433, 188, 410]}

 78%|███████▊  | 779999/1000000 [14:16:30<2:52:31, 21.25it/s]global step 780000, trans_decision ep_re 1524.4125688486233

{"global_step": 780000, "eval_re": [1648.2265554540809, 1794.7641548170984, 
1873.4703035936348, 945.5430490427826, 161.6305059920386, 2316.9511234604734, 
2220.522900968669, 1617.6732848523518, 1977.7338419471473, 687.6099683579567], 
"eval_len": [1000, 963, 1000, 487, 121, 1000, 1000, 802, 1000, 375]}

 79%|███████▉  | 789998/1000000 [14:27:30<2:45:03, 21.21it/s]global step 790000, trans_decision ep_re 862.2816905293397

{"global_step": 790000, "eval_re": [976.2501133913274, 917.8029595633456, 
1425.9729506597016, 163.90685179398423, 628.7028242095978, 249.5022052214889, 
896.1729130807429, 179.78446501633505, 1533.3568577908327, 1651.3647645660405], 
"eval_len": [1000, 577, 1000, 187, 323, 208, 1000, 145, 1000, 1000]}

 80%|███████▉  | 799998/1000000 [14:38:30<2:34:51, 21.52it/s]global step 800000, trans_decision ep_re 1010.0906349584084

{"global_step": 800000, "eval_re": [1735.5671421278746, 22.038898495762048, 
2085.24980527458, 697.8359503207266, 1036.199143020672, 1915.371709539529, 
341.23157231320033, 1579.9690727858306, 318.58617389115824, 368.8568818147489], 
"eval_len": [1000, 1000, 1000, 394, 1000, 1000, 191, 758, 186, 210]}

 81%|████████  | 809998/1000000 [14:49:30<2:27:55, 21.41it/s]global step 810000, trans_decision ep_re 1651.0269745591681

{"global_step": 810000, "eval_re": [1213.3963720662805, 841.382450273195, 
2198.6862999459536, 2176.0973255839963, 1389.0775530070864, 1947.6726956817852, 
2166.6474850310665, 1288.5889722393629, 1913.5583794329962, 1375.162212329959], 
"eval_len": [566, 410, 1000, 1000, 1000, 1000, 1000, 676, 1000, 602]}

 82%|████████▏ | 819999/1000000 [15:00:41<2:22:52, 21.00it/s]global step 820000, trans_decision ep_re 674.8453530059542

{"global_step": 820000, "eval_re": [141.44714232372476, 1839.7851812939193, 
113.95224302626161, 1044.4344305568993, 948.0184752148136, 459.36573462045715, 
603.8581384586446, 850.7226283725074, 613.6951599083212, 133.1743962839928], 
"eval_len": [98, 1000, 73, 711, 1000, 187, 366, 1000, 322, 120]}

 83%|████████▎ | 829998/1000000 [15:11:31<2:12:55, 21.32it/s]global step 830000, trans_decision ep_re 848.359056002512

{"global_step": 830000, "eval_re": [1170.0311230036837, 145.35709368194156, 
401.84233648568824, 95.47602708502079, 1572.404368040826, 97.70630005103462, 
1904.1210624103064, 659.5802221122207, 905.5206163950594, 1531.55141075934], 
"eval_len": [654, 103, 227, 62, 1000, 72, 1000, 1000, 1000, 1000]}

 84%|████████▍ | 839998/1000000 [15:22:31<2:05:11, 21.30it/s]global step 840000, trans_decision ep_re 1086.220944929289

{"global_step": 840000, "eval_re": [164.09325963974493, 1404.2995100290923, 
1519.6763888422124, 290.49756668561764, 967.6774575684856, 463.21306431771364, 
1977.1092446350576, 1727.3385912116978, 1921.0346775813719, 427.2696887818983], 
"eval_len": [92, 828, 706, 140, 562, 276, 1000, 874, 1000, 257]}

 85%|████████▍ | 849999/1000000 [15:33:31<1:58:01, 21.18it/s]global step 850000, trans_decision ep_re 1716.34555262157

{"global_step": 850000, "eval_re": [2106.846876553287, 1257.4237701133752, 
2032.167801271089, 1328.4931277176263, 2129.2410430923783, 1553.139010049709, 
944.8035679176234, 2125.3342870451875, 2183.5505846386804, 1502.4554578167445], 
"eval_len": [1000, 570, 1000, 728, 1000, 695, 469, 1000, 1000, 747]}

 86%|████████▌ | 859999/1000000 [15:44:31<1:49:57, 21.22it/s]global step 860000, trans_decision ep_re 1631.666295570782

{"global_step": 860000, "eval_re": [2247.86551501744, 2140.872332869441, 
334.90542125961156, 2422.323023789948, 2225.5074052987866, 2256.0648517374425, 
1153.8810637571282, 1078.129912582497, 1819.8237416402078, 637.2896877553181], 
"eval_len": [1000, 1000, 164, 1000, 1000, 1000, 425, 459, 1000, 317]}

 87%|████████▋ | 869999/1000000 [15:55:31<1:43:02, 21.03it/s]global step 870000, trans_decision ep_re 1708.7139867513138

{"global_step": 870000, "eval_re": [1995.9432232844886, 2076.70569443685, 
1371.3735802167462, 684.3738778514235, 1834.7012847803526, 2069.029029053769, 
2073.0731152764793, 2044.724654973123, 2015.1668931844188, 922.0485144554895], 
"eval_len": [1000, 1000, 1000, 1000, 891, 1000, 1000, 1000, 1000, 1000]}

 88%|████████▊ | 879999/1000000 [16:06:51<1:34:32, 21.16it/s]global step 880000, trans_decision ep_re 813.9560549328683

{"global_step": 880000, "eval_re": [815.6828644270487, 1804.004256071131, 
379.4333495709055, 915.082926295025, 224.7478172622874, 195.03094158998238, 
476.15999178408435, 1312.8273894252236, 174.6728567714237, 1841.9181561315718], 
"eval_len": [1000, 1000, 205, 433, 156, 131, 224, 832, 95, 989]}

 89%|████████▉ | 889998/1000000 [16:17:41<1:26:26, 21.21it/s]global step 890000, trans_decision ep_re 1409.4376995617558

{"global_step": 890000, "eval_re": [413.84666009175317, 1215.3599962134222, 
373.53282465166166, 1833.139806452695, 827.977100029115, 1808.5602847649097, 
1936.386905800085, 1988.9746020871992, 1663.12302852733, 2033.4757869993857], 
"eval_len": [285, 607, 192, 1000, 454, 1000, 1000, 973, 1000, 1000]}

 90%|████████▉ | 899999/1000000 [16:28:41<1:18:54, 21.12it/s]global step 900000, trans_decision ep_re 1247.849894035568

{"global_step": 900000, "eval_re": [1179.1555744262948, 2199.4440585417774, 
714.4898727388563, 113.89984578476674, 2345.7425992550175, 1259.7086279910104, 
1078.2152593931798, 893.1481286828648, 2178.3219449534877, 516.3730285884237], 
"eval_len": [632, 1000, 342, 72, 1000, 859, 486, 452, 1000, 244]}

 91%|█████████ | 909998/1000000 [16:39:41<1:10:04, 21.41it/s]global step 910000, trans_decision ep_re 1450.8475669774868

{"global_step": 910000, "eval_re": [966.4059826281941, 498.52171586679174, 
2330.2867220625803, 2208.973151783117, 225.63133321280102, 892.1129663776582, 
1784.6454445524928, 852.6917642440335, 2234.1106071939653, 2515.095981853235], 
"eval_len": [449, 210, 1000, 1000, 101, 418, 748, 1000, 1000, 1000]}

 92%|█████████▏| 919999/1000000 [16:50:41<1:03:12, 21.09it/s]global step 920000, trans_decision ep_re 1594.6864565873043

{"global_step": 920000, "eval_re": [580.3727761892641, 217.44736091441908, 
2242.3272087429023, 2062.1387866225905, 1911.7489791310031, 1849.2374241305336, 
2096.7241951907017, 2021.6829518381903, 2113.7935592167864, 851.3913238966508], 
"eval_len": [331, 130, 1000, 1000, 984, 1000, 1000, 1000, 1000, 498]}

 93%|█████████▎| 929999/1000000 [17:01:51<55:28, 21.03it/s]global step 930000, trans_decision ep_re 1244.7807752864287

{"global_step": 930000, "eval_re": [1135.8533987869646, 131.8523880411678, 
2229.2577289691353, 2231.04892660896, 253.34553071236874, 875.4463731016075, 
2107.608214275343, 2080.109744821609, 1288.2680669252518, 115.01738062188053], 
"eval_len": [1000, 85, 1000, 1000, 188, 1000, 1000, 1000, 622, 74]}

 94%|█████████▍| 939999/1000000 [17:12:51<47:49, 20.91it/s]global step 940000, trans_decision ep_re 1724.2752021261726

{"global_step": 940000, "eval_re": [2078.325031393821, 1259.0944419768855, 
2336.973443212551, 994.4416637276883, 1889.0503967373204, 517.941728793124, 
1803.3707839960857, 2114.1010018118864, 2226.2278177082453, 2023.225711904121], 
"eval_len": [1000, 590, 1000, 632, 1000, 261, 1000, 1000, 1000, 1000]}

 95%|█████████▍| 949999/1000000 [17:23:51<39:28, 21.11it/s]global step 950000, trans_decision ep_re 918.6444225127258

{"global_step": 950000, "eval_re": [511.8865908142456, 2143.499858532019, 
229.26830805174637, 1567.1522885033032, 1234.3948804493523, 1130.6937921981707, 
175.43498983859487, 454.71236609396516, 1191.1096077933562, 548.2915428525055], 
"eval_len": [320, 1000, 113, 1000, 581, 520, 93, 203, 1000, 332]}

 96%|█████████▌| 959999/1000000 [17:34:51<31:48, 20.96it/s]global step 960000, trans_decision ep_re 1391.2542216579186

{"global_step": 960000, "eval_re": [2298.6221861706053, 414.54689003889155, 
882.8055013303123, 1116.591903474127, 1654.5297860359785, 2223.6874228699003, 
1974.2329575729325, 780.5059529553885, 2021.4231335188986, 545.5964826121524], 
"eval_len": [1000, 188, 504, 491, 787, 1000, 1000, 437, 1000, 251]}

 97%|█████████▋| 969998/1000000 [17:45:51<23:35, 21.19it/s]global step 970000, trans_decision ep_re 983.3351066018502

{"global_step": 970000, "eval_re": [2544.651508321782, 309.36229074916855, 
152.5714470359321, 657.9215476146471, 1034.6701966982382, 83.74938868998802, 
1960.365553159783, 1278.150138134263, 638.9804175958908, 1172.9285780188086], 
"eval_len": [1000, 163, 84, 333, 565, 83, 1000, 528, 283, 1000]}

 98%|█████████▊| 979999/1000000 [17:56:51<15:48, 21.10it/s]global step 980000, trans_decision ep_re 1040.919748500914

{"global_step": 980000, "eval_re": [1478.6360090207072, 531.4937830431688, 
913.804181947592, 1164.0835542528703, 471.73027892075567, 2068.512948315017, 
629.4683239015453, 709.2184989421092, 183.2236990064793, 2259.026207658894], 
"eval_len": [731, 259, 421, 538, 284, 1000, 370, 311, 111, 1000]}

 99%|█████████▉| 989999/1000000 [18:07:51<07:59, 20.88it/s]global step 990000, trans_decision ep_re 1518.5864912062848

{"global_step": 990000, "eval_re": [1199.5534721509885, 2324.6924630694366, 
1245.80652499455, 2110.1284901785075, 841.632079781944, 2148.026191260672, 
93.0065841372223, 2170.0902365123275, 875.501825364284, 2177.427044612913], 
"eval_len": [558, 1000, 646, 950, 410, 1000, 68, 894, 388, 1000]}

100%|█████████▉| 999998/1000000 [18:18:51<00:00, 21.34it/s]global step 1000000, trans_decision ep_re 802.8694269691318

{"global_step": 1000000, "eval_re": [1606.9880321488338, 81.47670128817742, 
360.54124278554565, 752.7665258369524, 552.8157665074383, 1995.4297077777255, 
2080.5038696815423, 72.11366842799382, 96.98910920390482, 429.0696460332048], 
"eval_len": [919, 92, 182, 1000, 235, 1000, 1000, 80, 82, 253]}

100%|██████████| 1000000/1000000 [18:19:09<00:00, 15.16it/s]
