
{
    'exp_name': 'VDPO',
    'env': 'Ant-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'markov(ord(15,1), ord(3,5,3,shift=22), [[124, 1], [1, 19]])',
    'noise': 0.05
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9997/1000000 [04:40<11:06:39, 24.75it/s]global step 10000, trans_decision ep_re 573.2195609757316

{"global_step": 10000, "eval_re": [516.0706108882501, 577.4998253953705, 
686.1892692792937, 533.2695288282777, 572.6755584825489, 526.5977882416721, 
580.9083964500509, 599.6884203839896, 581.714996215095, 557.5812155927681], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  2%|▏         | 19999/1000000 [14:10<10:58:38, 24.80it/s]global step 20000, trans_decision ep_re 789.4346685401715

{"global_step": 20000, "eval_re": [735.2882510995361, 839.4956654017005, 
730.2798300090334, 774.499349988396, 842.119666466543, 772.5831217264941, 
803.7424655598338, 807.5214052087533, 805.106690155897, 783.7102397855281], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  3%|▎         | 29998/1000000 [23:30<10:39:38, 25.27it/s]global step 30000, trans_decision ep_re 708.0542674232968

{"global_step": 30000, "eval_re": [85.61507634442, 1106.5508954532836, 
726.5924371955908, 948.7243198356724, 766.1262318971317, 948.6379506685417, 
706.6815503661544, 443.7120870237201, 663.0028861580446, 684.8992392904074], 
"eval_len": [71, 1000, 1000, 1000, 1000, 1000, 1000, 329, 1000, 1000]}

  4%|▍         | 39997/1000000 [32:50<10:38:03, 25.08it/s]global step 40000, trans_decision ep_re 426.67882795359594

{"global_step": 40000, "eval_re": [456.9344349239477, 88.05479109302959, 
35.62290143749068, 1174.7369798072639, 264.7439365008537, 273.80254767304757, 
377.16375044658605, 38.96632263190581, 699.2192563703129, 857.5433586515209], 
"eval_len": [387, 57, 49, 1000, 190, 220, 327, 40, 539, 1000]}

  5%|▍         | 49998/1000000 [42:00<10:40:36, 24.72it/s]global step 50000, trans_decision ep_re 1014.4305524567613

{"global_step": 50000, "eval_re": [587.8705595036813, 377.2634746514603, 
1381.6173789294742, 35.254097652586246, 1135.8246635474725, 1390.4155042938119, 
1183.4258411641497, 1321.4963202915085, 1336.2877959126859, 1394.8498886207833],
"eval_len": [469, 254, 1000, 38, 835, 1000, 1000, 1000, 1000, 1000]}

  6%|▌         | 59999/1000000 [51:20<10:33:19, 24.74it/s]global step 60000, trans_decision ep_re 1088.8407413737218

{"global_step": 60000, "eval_re": [1524.2364884317267, 915.8358344855812, 
801.8995678947235, 1269.9790615468053, 201.81233710713747, 801.6945915469761, 
1462.3997892347504, 1350.9901753371619, 1083.3633142118988, 1476.1962539404558],
"eval_len": [1000, 660, 1000, 1000, 125, 599, 1000, 1000, 818, 1000]}

  7%|▋         | 69998/1000000 [1:00:40<10:25:06, 24.80it/s]global step 70000, trans_decision ep_re 813.1613240660419

{"global_step": 70000, "eval_re": [88.51086976533942, 865.242764894384, 
63.14086851591147, 1163.2729119974344, 894.9951067156571, 1026.9082066369738, 
1166.46964238825, 1201.8419026721122, 1356.6721543580065, 304.55881271634917], 
"eval_len": [57, 723, 59, 1000, 756, 823, 1000, 1000, 1000, 268]}

  8%|▊         | 79999/1000000 [1:09:50<10:13:42, 24.98it/s]global step 80000, trans_decision ep_re 1454.8213852362594

{"global_step": 80000, "eval_re": [1606.1452734737964, 1492.2234428417146, 
1529.3142300387842, 1591.0222969936876, 1658.7241485919326, 1738.6208997977526, 
1447.8521822145588, 396.2929637785791, 1585.9984521049187, 1502.0199625268694], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 261, 1000, 1000]}

  9%|▉         | 89998/1000000 [1:19:20<10:11:33, 24.80it/s]global step 90000, trans_decision ep_re 1448.6316016381093

{"global_step": 90000, "eval_re": [1850.6640002102092, 511.65764522300196, 
1793.3815316079028, 1912.775347500464, 1889.2048308615213, 1917.1327077092083, 
591.4535710623178, 1737.6915696864162, 383.0249066023158, 1899.3299059177332], 
"eval_len": [1000, 289, 1000, 1000, 1000, 1000, 319, 877, 215, 1000]}

 10%|▉         | 99999/1000000 [1:28:40<9:57:56, 25.09it/s]global step 100000, trans_decision ep_re 1455.8471845283973

{"global_step": 100000, "eval_re": [576.0384195487015, 2104.254553419724, 
1330.9017486336875, 2010.580305876, 513.3910554956401, 1641.1744061390325, 
2144.678330612595, 2150.485464864604, 156.4794893185458, 1930.4880713754435], 
"eval_len": [1000, 1000, 1000, 1000, 239, 760, 1000, 1000, 81, 1000]}

 11%|█         | 109999/1000000 [1:38:00<10:02:58, 24.60it/s]global step 110000, trans_decision ep_re 993.9424511897441

{"global_step": 110000, "eval_re": [1916.1360407690124, 1872.1473575364153, 
129.34446590048003, 552.4897638931367, 2121.9201099347783, 974.9618927579389, 
1602.6788282944206, 376.2354226701777, 309.9604492291347, 83.55018091194435], 
"eval_len": [1000, 1000, 88, 333, 1000, 540, 1000, 250, 149, 49]}

 12%|█▏        | 119999/1000000 [1:47:00<9:50:26, 24.84it/s]global step 120000, trans_decision ep_re 1836.7619957545612

{"global_step": 120000, "eval_re": [1974.0519566788325, 735.1370626959894, 
2054.5824983770717, 2097.309539078337, 930.5274783908205, 2183.1506881338696, 
2027.0472287390467, 2135.1268726525464, 2089.6459623638243, 2141.0406704352727],
"eval_len": [1000, 360, 1000, 1000, 475, 1000, 1000, 1000, 1000, 1000]}

 13%|█▎        | 129999/1000000 [1:56:30<9:43:21, 24.86it/s]global step 130000, trans_decision ep_re 1701.9526081262313

{"global_step": 130000, "eval_re": [1992.1432545987786, 1845.2618159406125, 
1165.5681074638292, 2086.326528125913, 1477.8550449916827, 1052.9856419443272, 
1824.949895432415, 1769.430506496504, 1996.9147207022431, 1808.0905655660074], 
"eval_len": [1000, 1000, 592, 1000, 837, 585, 1000, 1000, 1000, 1000]}

 14%|█▍        | 139999/1000000 [2:05:50<9:29:03, 25.19it/s]global step 140000, trans_decision ep_re 1597.918297824547

{"global_step": 140000, "eval_re": [1389.9235822485457, 1715.026020510893, 
1329.176503165781, 1832.549390241756, 1984.331302269052, 2079.2598275801142, 
1779.1551025354352, 113.96593705025494, 1879.5580933578942, 1876.2372192857422],
"eval_len": [771, 1000, 809, 1000, 1000, 1000, 1000, 145, 1000, 1000]}

 15%|█▍        | 149998/1000000 [2:15:10<9:30:40, 24.82it/s]global step 150000, trans_decision ep_re 1733.1859295812733

{"global_step": 150000, "eval_re": [1978.5766495742619, 1864.8375949229803, 
2106.342941424014, 202.2114676939044, 1547.8729382865886, 1957.7104001715143, 
1842.9850152526305, 1906.1067156771066, 1960.0010623585713, 1965.2145104511608],
"eval_len": [1000, 1000, 1000, 97, 1000, 1000, 1000, 1000, 1000, 1000]}

 16%|█▌        | 159997/1000000 [2:24:30<9:18:36, 25.06it/s]global step 160000, trans_decision ep_re 1077.5234308157449

{"global_step": 160000, "eval_re": [2234.374987916139, 186.74214295151526, 
2177.472245762482, 2052.601329316843, 214.40037177710622, 772.3897647527758, 
1111.7365751874422, 885.0069229373555, 921.3985942183838, 219.11137333740677], 
"eval_len": [1000, 94, 1000, 984, 113, 371, 514, 397, 426, 106]}

 17%|█▋        | 169998/1000000 [2:33:40<9:19:24, 24.73it/s]global step 170000, trans_decision ep_re 1030.493335771395

{"global_step": 170000, "eval_re": [263.7205095115948, 593.2636558374708, 
2144.8420420136836, 1705.5993247817441, 363.4124532398018, 859.4233092615667, 
1356.5380647440982, 1149.4199116733423, 425.861978016123, 1442.852108634527], 
"eval_len": [148, 326, 1000, 906, 171, 417, 713, 541, 249, 766]}

 18%|█▊        | 179998/1000000 [2:42:50<8:59:47, 25.32it/s]global step 180000, trans_decision ep_re 735.5666309507054

{"global_step": 180000, "eval_re": [1109.1238144564245, 1421.4921469038065, 
114.05604823656368, 931.7179774165634, 367.0687834612354, 314.9788277733307, 
502.6606999101468, 1755.620568311888, 147.94267540676466, 691.0047676303315], 
"eval_len": [601, 733, 66, 579, 171, 155, 291, 847, 81, 359]}

 19%|█▉        | 189999/1000000 [2:52:00<8:58:48, 25.06it/s]global step 190000, trans_decision ep_re 1926.3324956696226

{"global_step": 190000, "eval_re": [2145.139617745905, 2107.7202185565866, 
1925.1178342098694, 1242.2373642188827, 2017.8439694752826, 2069.962813589126, 
2107.037761822956, 1645.7911646952305, 2012.3010510636004, 1990.1731613187821], 
"eval_len": [1000, 1000, 1000, 635, 1000, 1000, 1000, 754, 1000, 1000]}

 20%|█▉        | 199998/1000000 [3:01:20<8:49:07, 25.20it/s]global step 200000, trans_decision ep_re 2098.5900244488753

{"global_step": 200000, "eval_re": [2246.439371814813, 2253.0659762217147, 
1639.6628030986355, 2169.2752830849963, 2129.8024106949547, 1484.4064081360993, 
2047.699826926735, 2374.8293804341315, 2480.3409662566924, 2160.377817819978], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 21%|██        | 209999/1000000 [3:10:50<8:46:14, 25.02it/s]global step 210000, trans_decision ep_re 1777.3606189768811

{"global_step": 210000, "eval_re": [2134.778952975403, 1898.1536326402131, 
2257.131357086878, 2305.6302726715803, 145.55235955983662, 2060.9614077337637, 
2183.990551277309, 2311.8450455910747, 2095.4808073336267, 380.08180289912565], 
"eval_len": [1000, 1000, 1000, 1000, 86, 963, 1000, 1000, 1000, 191]}

 22%|██▏       | 219999/1000000 [3:20:10<8:36:07, 25.19it/s]global step 220000, trans_decision ep_re 570.0678846742074

{"global_step": 220000, "eval_re": [1114.8300036437995, 422.61717708645847, 
97.07183768096736, 430.655151166351, 682.9276591544492, 219.24029639253666, 
112.95248675795294, 1026.395214838358, 768.1639240876274, 825.8250959335737], 
"eval_len": [531, 165, 65, 222, 334, 109, 79, 470, 382, 1000]}

 23%|██▎       | 229998/1000000 [3:29:10<8:31:44, 25.08it/s]global step 230000, trans_decision ep_re 1512.0648997225142

{"global_step": 230000, "eval_re": [1977.296169052544, 247.03115529657165, 
2161.87518050873, 827.7012375603464, 2199.131855387237, 1825.2885538834323, 
2046.546170807679, 845.8859140970802, 677.4021935815582, 2312.490567049963], 
"eval_len": [1000, 130, 1000, 1000, 1000, 1000, 1000, 414, 1000, 1000]}

 24%|██▍       | 239997/1000000 [3:38:30<8:29:46, 24.85it/s]global step 240000, trans_decision ep_re 1890.236392742801

{"global_step": 240000, "eval_re": [2185.3142834395276, 917.2069135897137, 
2285.352099172485, 1954.9517356582073, 2373.830441183641, 387.82091182104824, 
2382.247581938558, 2179.826737843886, 2230.0930567897176, 2005.7201659912278], 
"eval_len": [976, 1000, 1000, 1000, 1000, 195, 1000, 1000, 1000, 1000]}

 25%|██▍       | 249999/1000000 [3:48:00<8:28:35, 24.58it/s]global step 250000, trans_decision ep_re 1237.5852804877327

{"global_step": 250000, "eval_re": [122.8927055575103, 2151.3223813061168, 
935.0907373407061, 1623.0819405512939, 296.642421908627, 2089.22444406987, 
1644.7752575103254, 2262.263173100465, 369.08695026360067, 881.472793268811], 
"eval_len": [93, 1000, 1000, 681, 142, 848, 734, 1000, 187, 1000]}

 26%|██▌       | 259997/1000000 [3:57:10<8:16:49, 24.82it/s]global step 260000, trans_decision ep_re 1132.9320104655744

{"global_step": 260000, "eval_re": [1645.5802121561803, 2153.9974793099695, 
129.83840193224523, 1782.1651934609167, 825.1344044446649, 1661.9399834604496, 
684.6513793726712, 416.1836544929036, 1151.7729837566876, 878.0564122690565], 
"eval_len": [729, 1000, 72, 1000, 351, 1000, 368, 183, 583, 1000]}

 27%|██▋       | 269999/1000000 [4:06:30<8:11:09, 24.77it/s]global step 270000, trans_decision ep_re 1875.2666163055314

{"global_step": 270000, "eval_re": [2279.8776186835544, 2250.717201505762, 
365.443524894927, 2120.92957343808, 1781.6412162453346, 1634.606986791605, 
2194.391630530974, 2278.237644871181, 2317.049782159784, 1529.770983934109], 
"eval_len": [1000, 1000, 192, 1000, 846, 793, 1000, 1000, 1000, 1000]}

 28%|██▊       | 279998/1000000 [4:15:50<7:52:29, 25.40it/s]global step 280000, trans_decision ep_re 1867.3813805769107

{"global_step": 280000, "eval_re": [2329.884000990853, 2277.000880999486, 
193.29269726949914, 2295.6039381719893, 2392.467235782432, 2325.130085224064, 
2264.460189609183, 1781.7275312912, 2214.8776853695636, 599.3695610608385], 
"eval_len": [1000, 1000, 95, 1000, 1000, 1000, 1000, 1000, 1000, 304]}

 29%|██▉       | 289998/1000000 [4:25:10<7:48:43, 25.25it/s]global step 290000, trans_decision ep_re 1640.1742793822862

{"global_step": 290000, "eval_re": [2124.196237095887, 2215.5657216241816, 
2096.715562854333, 2221.281128661694, 2050.846626013723, 787.7629838135382, 
1966.1320950317008, 1340.3972421242531, 854.8631383407248, 743.9820582628255], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 380, 889, 579, 1000, 332]}

 30%|██▉       | 299998/1000000 [4:34:30<7:43:19, 25.18it/s]global step 300000, trans_decision ep_re 1817.0051293233978

{"global_step": 300000, "eval_re": [2231.058462352973, 2012.1548352380344, 
1341.662239717074, 2030.4669522409165, 2135.385780325979, 2130.0096228727075, 
2301.380754040032, 2312.2474719519264, 1549.6506536244965, 126.03452086983943], 
"eval_len": [1000, 1000, 511, 1000, 1000, 1000, 1000, 1000, 690, 84]}

 31%|███       | 309998/1000000 [4:43:50<7:39:20, 25.04it/s]global step 310000, trans_decision ep_re 1469.1997191719977

{"global_step": 310000, "eval_re": [687.1086522803274, 2078.4457034660936, 
2277.481160115616, 2200.4938894279653, 267.10557622160195, 2032.2397308544178, 
87.32548990317942, 2217.6523372642555, 910.5481884188792, 1933.5964637676382], 
"eval_len": [301, 1000, 1000, 1000, 170, 1000, 61, 1000, 1000, 1000]}

 32%|███▏      | 319998/1000000 [4:53:10<7:31:26, 25.10it/s]global step 320000, trans_decision ep_re 2206.401516769276

{"global_step": 320000, "eval_re": [2288.1507671551813, 2346.3579992114364, 
2072.250825087156, 2355.679673536399, 2253.0719100768174, 1723.6683408513843, 
2187.798348564506, 2298.726670713509, 2215.068441958677, 2323.2421905376946], 
"eval_len": [1000, 1000, 1000, 1000, 971, 800, 1000, 1000, 1000, 1000]}

 33%|███▎      | 329997/1000000 [5:02:30<7:27:21, 24.96it/s]global step 330000, trans_decision ep_re 1775.6811008901313

{"global_step": 330000, "eval_re": [2543.244292623227, 100.56065562230194, 
201.6286764940497, 2218.1972410439935, 2377.3397060104153, 1118.6423778908454, 
2604.192688527944, 2415.0873128025933, 2424.2574994524402, 1753.6605584335014], 
"eval_len": [1000, 61, 110, 1000, 1000, 488, 1000, 1000, 1000, 1000]}

 34%|███▍      | 339997/1000000 [5:11:50<7:24:37, 24.74it/s]global step 340000, trans_decision ep_re 2185.0086258712845

{"global_step": 340000, "eval_re": [2513.0230701498044, 2425.0854759082645, 
2334.217145531861, 2367.2378617407294, 646.2200415302908, 2394.487709655664, 
2239.212948227194, 2349.70738777408, 1920.4042894056397, 2660.490328789313], 
"eval_len": [1000, 1000, 1000, 1000, 287, 1000, 1000, 1000, 820, 1000]}

 35%|███▍      | 349999/1000000 [5:21:20<7:22:13, 24.50it/s]global step 350000, trans_decision ep_re 1992.3138975330075

{"global_step": 350000, "eval_re": [2499.324567631847, 2048.9908109105227, 
2106.816577059597, 2146.9655703231047, 2160.334523958971, 2400.4222221901987, 
2124.3193089085157, 2072.4786923710353, 590.0939023619999, 1773.392799614283], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 245, 798]}

 36%|███▌      | 359998/1000000 [5:30:40<7:04:05, 25.15it/s]global step 360000, trans_decision ep_re 1228.4952784621923

{"global_step": 360000, "eval_re": [321.7545938770175, 928.1945382418972, 
980.5745973143065, 1485.352835380505, 1661.3977492756158, 2356.764303088555, 
2273.6646814302303, 87.86912669961997, 1088.6934823501688, 1100.6868769640082], 
"eval_len": [173, 488, 438, 653, 1000, 1000, 1000, 61, 596, 1000]}

 37%|███▋      | 369997/1000000 [5:39:50<7:08:21, 24.51it/s]global step 370000, trans_decision ep_re 1662.857792269023

{"global_step": 370000, "eval_re": [2458.136704403247, 2138.4685229815154, 
952.4229761118298, 2315.1185298127907, 2335.4201123436064, -642.0021850808739, 
2343.293012540199, 2258.944952081296, 2198.620680898338, 270.1546165982819], 
"eval_len": [1000, 1000, 399, 1000, 1000, 1000, 1000, 1000, 1000, 109]}

 38%|███▊      | 379999/1000000 [5:49:10<6:58:20, 24.70it/s]global step 380000, trans_decision ep_re 1180.3394992584435

{"global_step": 380000, "eval_re": [2223.345421474917, 590.642937670786, 
956.2326382994113, 1685.167924695929, 2357.5292382818316, 116.46080795054105, 
409.70119454868814, 1893.714213170292, 835.3176871389691, 735.2829293530699], 
"eval_len": [1000, 242, 437, 1000, 1000, 82, 195, 1000, 338, 328]}

 39%|███▉      | 389999/1000000 [5:58:30<6:54:20, 24.54it/s]global step 390000, trans_decision ep_re 1593.6092279339998

{"global_step": 390000, "eval_re": [1336.2504881632376, 2186.9731073484822, 
2573.770530684785, 865.2156504249393, 1094.1165845768523, 2346.7977936603424, 
899.2386544988684, 791.3877172129852, 2283.88352632367, 1558.4582264458359], 
"eval_len": [586, 1000, 1000, 339, 1000, 1000, 466, 327, 1000, 682]}

 40%|███▉      | 399997/1000000 [6:07:40<6:41:44, 24.89it/s]global step 400000, trans_decision ep_re 1576.9866500118114

{"global_step": 400000, "eval_re": [1133.7698041060983, 2398.000049990044, 
1000.7301246958862, 2533.410383963664, 1960.012671546987, 1619.881558904147, 
576.532989668169, 1693.6305204217426, 2195.905910234081, 657.9924865872936], 
"eval_len": [575, 1000, 440, 1000, 866, 1000, 304, 730, 1000, 322]}

 41%|████      | 409998/1000000 [6:17:00<6:27:28, 25.38it/s]global step 410000, trans_decision ep_re 1566.471023875245

{"global_step": 410000, "eval_re": [217.35293473103562, 2138.3505195940543, 
957.9962016577279, 2341.902375130448, 2448.6290338984754, 169.05271158835183, 
244.31654464214625, 2413.390144057719, 2362.0540606182835, 2371.665712834208], 
"eval_len": [97, 922, 1000, 1000, 1000, 89, 143, 1000, 1000, 1000]}

 42%|████▏     | 419998/1000000 [6:26:20<6:26:35, 25.00it/s]global step 420000, trans_decision ep_re 1393.6680013410823

{"global_step": 420000, "eval_re": [367.02947217622904, 872.6887380691234, 
1405.4291789866809, 2031.0186634293525, 1887.0276562982096, 841.9870906025401, 
1702.8132820626115, 377.70007491976577, 2260.6731413024713, 2190.3127155638385],
"eval_len": [179, 361, 655, 913, 1000, 419, 752, 172, 1000, 1000]}

 43%|████▎     | 429997/1000000 [6:35:30<6:25:53, 24.62it/s]global step 430000, trans_decision ep_re 473.03692139058313

{"global_step": 430000, "eval_re": [369.29120047025503, 872.8520639324782, 
280.85418710569394, 166.51580332368258, 300.85600788108945, 715.6891863887118, 
736.7696280803282, 514.7011523048218, 306.24384760681795, 466.5961368119524], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 44%|████▍     | 439998/1000000 [6:45:00<6:10:51, 25.17it/s]global step 440000, trans_decision ep_re 1690.620735877167

{"global_step": 440000, "eval_re": [2183.663398826308, 1431.4312583367528, 
2016.0192724403373, 2066.116367082491, 2058.199311305817, 30.361148080219493, 
509.3325044450818, 2304.8060249254536, 2185.154217345603, 2121.123855983606], 
"eval_len": [1000, 673, 1000, 1000, 1000, 35, 294, 1000, 1000, 1000]}

 45%|████▍     | 449998/1000000 [6:54:20<6:05:13, 25.10it/s]global step 450000, trans_decision ep_re 1458.8704930348422

{"global_step": 450000, "eval_re": [1359.335155871465, 27.200634765860045, 
2101.5946262319044, 2478.35330291304, 1019.1979324391244, 2243.032387753535, 
69.62210025180575, 2168.574014189648, 2504.314305821377, 617.4804701106615], 
"eval_len": [651, 34, 1000, 1000, 1000, 1000, 87, 1000, 1000, 306]}

 46%|████▌     | 459999/1000000 [7:03:40<6:02:39, 24.82it/s]global step 460000, trans_decision ep_re 1093.760508709257

{"global_step": 460000, "eval_re": [1109.3572669316463, 823.1008720773797, 
346.3316492116515, 114.40480145139671, 2489.1034649968224, 457.591450546762, 
1231.9361227626468, 2141.8298687407487, 548.2603929881692, 1675.6891973853471], 
"eval_len": [487, 437, 153, 64, 1000, 262, 579, 1000, 247, 707]}

 47%|████▋     | 469999/1000000 [7:12:50<5:52:38, 25.05it/s]global step 470000, trans_decision ep_re 1614.8982623378017

{"global_step": 470000, "eval_re": [1226.1906837628628, 2099.867145627712, 
1514.0323489175817, 992.19507656014, 2121.3312348381032, 697.7135385506924, 
2051.6514517385112, 2147.462981596604, 1694.0011302780786, 1604.5370315077305], 
"eval_len": [530, 1000, 691, 494, 1000, 1000, 1000, 936, 1000, 751]}

 48%|████▊     | 479998/1000000 [7:22:10<5:44:47, 25.14it/s]global step 480000, trans_decision ep_re 1844.0575454756195

{"global_step": 480000, "eval_re": [2133.093998429486, 1977.6971776488308, 
1162.066137268487, 2203.2160586897935, 1033.311772286471, 2270.730214652903, 
2299.8003449674975, 816.6478118697271, 2236.722425447973, 2307.2895134950245], 
"eval_len": [1000, 845, 1000, 980, 438, 1000, 1000, 379, 1000, 1000]}

 49%|████▉     | 489998/1000000 [7:31:30<5:38:38, 25.10it/s]global step 490000, trans_decision ep_re 1249.7352071576956

{"global_step": 490000, "eval_re": [1865.6099579976556, 542.9303489080681, 
1418.3907267476598, 1683.0479036090037, 841.4153113257113, 2355.5902860180727, 
2230.5151072798853, 30.15241555837305, 206.87626272987202, 1322.8237514026548], 
"eval_len": [811, 307, 664, 825, 429, 1000, 1000, 34, 99, 604]}

 50%|████▉     | 499997/1000000 [7:40:40<5:37:03, 24.72it/s]global step 500000, trans_decision ep_re 2093.3483959412306

{"global_step": 500000, "eval_re": [2300.2275727507526, 1912.2462212232938, 
2225.477551519864, 1233.8857701638826, 2386.009494477874, 2202.4179351881476, 
2142.047448496756, 2038.09536445699, 2237.313280668162, 2255.7633204665854], 
"eval_len": [1000, 1000, 1000, 508, 1000, 1000, 1000, 1000, 1000, 1000]}

 51%|█████     | 509998/1000000 [7:50:10<5:22:31, 25.32it/s]global step 510000, trans_decision ep_re 1763.6690897785834

{"global_step": 510000, "eval_re": [2055.1380166378804, 2172.6030272178473, 
2167.923849360617, 667.6573965041607, 159.9383172535426, 2449.3209105171477, 
2495.0851641266268, 1409.4023098811494, 2477.1605834663906, 1582.461322820471], 
"eval_len": [878, 1000, 1000, 273, 89, 1000, 1000, 612, 1000, 721]}

 52%|█████▏    | 519998/1000000 [7:59:30<5:15:05, 25.39it/s]global step 520000, trans_decision ep_re 1191.6984132703733

{"global_step": 520000, "eval_re": [1185.8232608999044, 695.8259754201912, 
2157.695365829121, 657.8478029361637, 2347.5892205575187, 2040.5226071408686, 
187.55690218205913, 1741.5044310796054, 226.53378777633569, 676.0847788819656], 
"eval_len": [1000, 340, 1000, 1000, 1000, 1000, 109, 765, 115, 279]}

 53%|█████▎    | 529997/1000000 [8:08:40<5:15:04, 24.86it/s]global step 530000, trans_decision ep_re 1044.2796028157484

{"global_step": 530000, "eval_re": [337.26808618082373, 683.2165470669821, 
2261.44355674293, 1012.5217216126546, 2089.864883536817, 567.9587942849935, 
840.6474514327227, 187.65085808264638, 365.1480773299765, 2097.076051886935], 
"eval_len": [145, 305, 1000, 1000, 1000, 277, 412, 99, 218, 1000]}

 54%|█████▍    | 539999/1000000 [8:18:00<5:12:51, 24.50it/s]global step 540000, trans_decision ep_re 1485.0250690146438

{"global_step": 540000, "eval_re": [2465.7853004016665, 2143.4747936138215, 
2117.6416924789087, 540.6324845604187, 808.3115002153746, 209.48282367532255, 
756.0022681310385, 1312.2400950269855, 2257.3958670246, 2239.2838650183025], 
"eval_len": [1000, 1000, 1000, 301, 1000, 110, 1000, 580, 901, 1000]}

 55%|█████▍    | 549999/1000000 [8:27:20<5:01:45, 24.85it/s]global step 550000, trans_decision ep_re 1172.1761139441874

{"global_step": 550000, "eval_re": [786.108514262576, 1066.0792316635345, 
2207.498780339148, 2225.425309663478, 1315.987336961113, 798.2650152011923, 
118.38882728151452, 144.38292654920267, 2490.353986174108, 569.2712113460059], 
"eval_len": [313, 528, 1000, 1000, 556, 389, 134, 120, 1000, 290]}

 56%|█████▌    | 559999/1000000 [8:36:30<4:54:08, 24.93it/s]global step 560000, trans_decision ep_re 1170.9159962675974

{"global_step": 560000, "eval_re": [1246.2335664470418, 1147.1934512149376, 
183.43189913366737, 658.3394519359762, 1734.7170991730454, 1548.475489932733, 
2179.3794639426274, 2179.5591512088317, 332.3297701243783, 499.5006195627348], 
"eval_len": [1000, 532, 133, 305, 843, 684, 1000, 1000, 178, 309]}

 57%|█████▋    | 569998/1000000 [8:45:40<4:44:54, 25.15it/s]global step 570000, trans_decision ep_re 1640.352856515007

{"global_step": 570000, "eval_re": [1266.325708053081, 2070.5418079915316, 
2355.358512967001, 2208.4631884059104, 607.648447167418, 170.37963441854032, 
2208.4647730431116, 2145.513831795386, 1129.4628002323113, 2241.3698610757788], 
"eval_len": [529, 1000, 1000, 1000, 278, 92, 1000, 838, 1000, 1000]}

 58%|█████▊    | 579998/1000000 [8:55:00<4:42:13, 24.80it/s]global step 580000, trans_decision ep_re 2041.546389950336

{"global_step": 580000, "eval_re": [2277.601464134596, 312.7389252504343, 
2470.8264756423955, 2422.048228569144, 2485.151718049223, 2437.5385760387776, 
2603.0526198830376, 2680.921416598925, 392.45688833921434, 2333.127586997618], 
"eval_len": [1000, 135, 1000, 1000, 1000, 1000, 1000, 998, 187, 1000]}

 59%|█████▉    | 589997/1000000 [9:04:20<4:35:12, 24.83it/s]global step 590000, trans_decision ep_re 1517.556081341782

{"global_step": 590000, "eval_re": [2192.970486631484, 2226.9774995049875, 
1548.314928992045, 1485.2087123582694, 1463.1662366360238, 931.9043595585418, 
839.9671939305591, 2073.804736967594, 208.11170234769202, 2205.134956490623], 
"eval_len": [1000, 1000, 1000, 1000, 751, 421, 404, 1000, 144, 1000]}

 60%|█████▉    | 599998/1000000 [9:13:40<4:29:02, 24.78it/s]global step 600000, trans_decision ep_re 1240.047457477802

{"global_step": 600000, "eval_re": [413.83434066800197, 697.7521020438022, 
2431.974557994752, 1137.273784911321, 257.74514651571394, 2040.9985186661638, 
678.9553094548371, 125.23294350105616, 2261.6473601315565, 2355.060510890815], 
"eval_len": [202, 310, 1000, 517, 160, 934, 273, 83, 1000, 1000]}

 61%|██████    | 609997/1000000 [9:22:50<4:23:31, 24.67it/s]global step 610000, trans_decision ep_re 1343.0368734688539

{"global_step": 610000, "eval_re": [2108.0683874761507, -761.5092058843168, 
613.2068382732181, 2394.539664433323, -633.8653052681751, 2271.657965496469, 
2123.255639406984, 1192.3405583215265, 2122.442574296334, 2000.231618137025], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 905, 1000, 1000, 1000]}

 62%|██████▏   | 619998/1000000 [9:32:20<4:16:04, 24.73it/s]global step 620000, trans_decision ep_re 1384.1029855451861

{"global_step": 620000, "eval_re": [1067.8260767428856, 1869.3340555698596, 
103.19249794923036, 645.0833140052135, 2348.3003361465267, 2261.39336404479, 
2333.969481525473, 1429.1547418854454, 718.5188417624712, 1064.2571458199652], 
"eval_len": [441, 802, 70, 317, 1000, 1000, 1000, 584, 373, 507]}

 63%|██████▎   | 629997/1000000 [9:41:30<4:08:22, 24.83it/s]global step 630000, trans_decision ep_re 1901.620500837266

{"global_step": 630000, "eval_re": [699.8742457948101, 2150.7084951265674, 
2373.6083386276327, 2349.7817778216945, 2164.2556143972606, 2169.8310308437412, 
2222.9948263110737, 2151.988521797326, 2093.447577379396, 639.7145802731562], 
"eval_len": [312, 1000, 1000, 1000, 1000, 1000, 1000, 957, 1000, 302]}

 64%|██████▍   | 639999/1000000 [9:51:00<4:05:19, 24.46it/s]global step 640000, trans_decision ep_re 1320.1169335632117

{"global_step": 640000, "eval_re": [568.1955566466382, 405.48517824743345, 
2236.6214340706742, 1084.1220516456879, 828.8841495637264, 2406.325038192469, 
980.3547837481823, 2462.619688865181, 2189.6628470841356, 38.89860756798873], 
"eval_len": [279, 175, 1000, 431, 398, 979, 514, 1000, 1000, 36]}

 65%|██████▍   | 649998/1000000 [10:00:10<3:52:42, 25.07it/s]global step 650000, trans_decision ep_re 837.6776954348561

{"global_step": 650000, "eval_re": [16.33163483321338, 503.3027597495237, 
634.6798185623348, 2230.03313315289, 440.2223557574494, 639.5894480759463, 
1743.0255488420867, 1549.5231286637636, 126.47065535763238, 493.5984713537218], 
"eval_len": [1000, 244, 1000, 996, 193, 1000, 792, 710, 86, 201]}

 66%|██████▌   | 659997/1000000 [10:09:20<3:49:31, 24.69it/s]global step 660000, trans_decision ep_re 1356.0153109708613

{"global_step": 660000, "eval_re": [465.8613058253567, 2042.3535285232172, 
196.86267100418436, 2288.676620170127, 1951.2281181440917, 2052.6390257060434, 
403.27180502930435, 937.5416736574672, 1128.7201382151902, 2092.9982234336317], 
"eval_len": [202, 1000, 97, 1000, 1000, 1000, 278, 428, 1000, 944]}

 67%|██████▋   | 669998/1000000 [10:18:40<3:38:44, 25.14it/s]global step 670000, trans_decision ep_re 1715.1615101123668

{"global_step": 670000, "eval_re": [2060.832497976676, 2102.7353491728527, 
2246.6745352975036, 196.15667080387868, 2176.2524645480585, 2058.0158272241847, 
597.2503321870927, 1499.0938609364355, 2071.7307841783304, 2142.872778798658], 
"eval_len": [1000, 1000, 1000, 105, 1000, 1000, 303, 767, 1000, 1000]}

 68%|██████▊   | 679998/1000000 [10:28:00<3:32:35, 25.09it/s]global step 680000, trans_decision ep_re 1944.2570709035003

{"global_step": 680000, "eval_re": [1817.2218849385183, 2006.339158776414, 
2015.0802922582004, 1930.2418095148957, 2033.0429766329373, 1985.6701671337207, 
1917.9258123307877, 2058.3773531703114, 1711.7020424207178, 1966.9692118585017],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 69%|██████▉   | 689999/1000000 [10:37:30<3:27:44, 24.87it/s]global step 690000, trans_decision ep_re 846.6834765782372

{"global_step": 690000, "eval_re": [134.9691868804302, 1416.7523374997004, 
1165.7367807022144, 359.4800440076682, 41.61631103853758, 2250.880098194582, 
274.51540811262885, 563.6252919383135, 80.11086957677762, 2179.1484378315195], 
"eval_len": [68, 1000, 1000, 187, 1000, 1000, 1000, 458, 51, 1000]}

 70%|██████▉   | 699997/1000000 [10:46:40<3:20:16, 24.97it/s]global step 700000, trans_decision ep_re 1662.10020747537

{"global_step": 700000, "eval_re": [2129.977948596413, 891.7997554273913, 
2052.9912577912896, 370.18662453493505, 975.074789693311, 2147.190578170148, 
2075.94640300138, 2007.4101465005142, 1905.1871547165317, 2065.237416321786], 
"eval_len": [1000, 507, 1000, 199, 499, 1000, 1000, 1000, 1000, 1000]}

 71%|███████   | 709999/1000000 [10:56:10<3:15:42, 24.70it/s]global step 710000, trans_decision ep_re 1552.2247113428455

{"global_step": 710000, "eval_re": [1921.4257743975577, 2225.419016136701, 
878.4754723667157, 955.8920694529555, 2278.1590184694605, 1569.5991425618304, 
2176.870573223321, 2202.111067958931, 194.54211371283725, 1119.752865148146], 
"eval_len": [847, 1000, 408, 1000, 1000, 749, 1000, 1000, 108, 536]}

 72%|███████▏  | 719997/1000000 [11:05:20<3:07:06, 24.94it/s]global step 720000, trans_decision ep_re 1577.9165195241492

{"global_step": 720000, "eval_re": [633.533171738108, 602.8467050985321, 
2290.64482424498, 1936.1748609459487, 1204.541590948395, 1481.2541866441397, 
1570.8736989888546, 1693.2350042898954, 2157.203980078207, 2208.8571722644324], 
"eval_len": [320, 264, 1000, 815, 598, 684, 704, 874, 1000, 1000]}

 73%|███████▎  | 729997/1000000 [11:14:40<2:59:43, 25.04it/s]global step 730000, trans_decision ep_re 1725.8416611220632

{"global_step": 730000, "eval_re": [2260.487814361787, 2150.6994890226074, 
1575.2990398689858, 2125.1166272513465, 926.7903727924282, 344.34126060569105, 
2199.0541945451564, 1323.6375752090157, 2184.5219234967817, 2168.4683140668326],
"eval_len": [1000, 1000, 670, 1000, 437, 153, 1000, 547, 1000, 1000]}

 74%|███████▍  | 739997/1000000 [11:24:00<2:52:58, 25.05it/s]global step 740000, trans_decision ep_re 1836.3015093249383

{"global_step": 740000, "eval_re": [2348.2651237665295, 2353.453956215385, 
573.0110607857388, 187.7825536536029, 2289.538895496862, 1983.4501398608672, 
2238.6297209085856, 1985.734762706875, 2291.879280601072, 2111.269599253864], 
"eval_len": [1000, 1000, 280, 110, 1000, 1000, 1000, 852, 1000, 982]}

 75%|███████▍  | 749997/1000000 [11:33:20<2:45:24, 25.19it/s]global step 750000, trans_decision ep_re 1515.3280410326993

{"global_step": 750000, "eval_re": [1848.8637138513307, 838.4054110122647, 
1847.045649476484, 1964.5167655655232, 1437.210476201082, 757.7734747255271, 
1728.382900589283, 1317.0204906380498, 1609.5311611525892, 1804.5303671148592], 
"eval_len": [1000, 1000, 1000, 1000, 731, 1000, 1000, 698, 1000, 1000]}

 76%|███████▌  | 759999/1000000 [11:42:50<2:43:08, 24.52it/s]global step 760000, trans_decision ep_re 1914.0005902378853

{"global_step": 760000, "eval_re": [403.338174205792, 2367.3300927350338, 
2367.335900528797, 2039.1745649918278, 2119.066033651944, 915.1144941197886, 
2310.1717512333216, 2386.1947453393805, 2167.5128163222917, 2064.7673292506774],
"eval_len": [185, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 77%|███████▋  | 769999/1000000 [11:52:10<2:33:07, 25.04it/s]global step 770000, trans_decision ep_re 1736.7045134131884

{"global_step": 770000, "eval_re": [690.7409671919236, 1105.5714871132066, 
2143.108707803091, 2069.323759980406, 126.20874150129407, 2326.9263086096016, 
2316.158943706348, 2053.3161132446035, 2348.874439060164, 2186.815665921247], 
"eval_len": [344, 549, 1000, 1000, 76, 1000, 1000, 1000, 1000, 1000]}

 78%|███████▊  | 779997/1000000 [12:01:20<2:27:05, 24.93it/s]global step 780000, trans_decision ep_re 1788.337801898772

{"global_step": 780000, "eval_re": [716.5388563908914, 2213.7659137102955, 
2178.6311968132327, 388.8246019478889, 1438.6581383785842, 2262.5834906675277, 
2197.9582228412087, 2033.3825484590109, 2086.4132359979244, 2366.6218137811557],
"eval_len": [350, 1000, 962, 181, 710, 1000, 1000, 1000, 1000, 1000]}

 79%|███████▉  | 789998/1000000 [12:10:40<2:18:26, 25.28it/s]global step 790000, trans_decision ep_re 1271.4955413301946

{"global_step": 790000, "eval_re": [77.74678117766457, 2339.860393592217, 
-838.9854662967488, 68.52596441247137, 1080.7793793020446, 2247.0410209464617, 
2222.8092787589176, 2422.4837000632942, 2307.1558329662216, 787.5385283794005], 
"eval_len": [53, 1000, 1000, 51, 511, 1000, 1000, 1000, 1000, 378]}

 80%|███████▉  | 799997/1000000 [12:19:50<2:12:35, 25.14it/s]global step 800000, trans_decision ep_re 1869.69486946793

{"global_step": 800000, "eval_re": [482.1586448570176, 2292.4378630982173, 
2022.5463276247137, 570.5270614376175, 2349.1373280768516, 2322.5883733245355, 
2073.6572922029723, 2201.7176683369785, 2191.678843376548, 2190.4992923438467], 
"eval_len": [234, 1000, 1000, 282, 1000, 1000, 1000, 1000, 1000, 1000]}

 81%|████████  | 809998/1000000 [12:29:10<2:04:21, 25.46it/s]global step 810000, trans_decision ep_re 1268.8451131591955

{"global_step": 810000, "eval_re": [1243.2738246798815, 1772.9191892401373, 
1781.2542091683147, 401.8237113559631, 2268.4528310743117, 408.2464515298768, 
696.6088347216937, 482.76214675985017, 2468.588712465528, 1164.5212205963971], 
"eval_len": [539, 796, 687, 198, 1000, 209, 308, 199, 1000, 494]}

 82%|████████▏ | 819999/1000000 [12:38:20<2:00:22, 24.92it/s]global step 820000, trans_decision ep_re 2088.0325718904164

{"global_step": 820000, "eval_re": [2361.631996620918, 2152.3303943392375, 
2252.6438527902715, 2305.7264788394623, 2100.8555622024583, 489.4591816074106, 
2231.0802718089362, 2391.1954675053275, 2331.751739211353, 2263.6507739787876], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 311, 1000, 1000, 1000, 1000]}

 83%|████████▎ | 829999/1000000 [12:47:40<1:54:06, 24.83it/s]global step 830000, trans_decision ep_re 2063.9026128350056

{"global_step": 830000, "eval_re": [2290.4256770419634, 1668.142025969715, 
2360.1006795184417, 2290.6027585039838, 2228.120165164802, 920.8298862233477, 
2150.7931151921744, 2230.1634792720915, 2241.8134158438206, 2258.034925619715], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 84%|████████▍ | 839998/1000000 [12:57:00<1:44:26, 25.53it/s]global step 840000, trans_decision ep_re -1206.2481805188077

{"global_step": 840000, "eval_re": [-1114.964460008558, -1625.0311878681305, 
-1699.7737448186274, 40.70100934356034, -1698.084103432458, 10.817704574449053, 
-1583.1530844303404, -1620.5028432349352, -1249.2227567653279, 
-1523.2683385477076], "eval_len": [1000, 1000, 1000, 80, 1000, 57, 1000, 1000, 
1000, 1000]}

 85%|████████▍ | 849999/1000000 [13:06:20<1:41:06, 24.73it/s]global step 850000, trans_decision ep_re 1776.4540444726601

{"global_step": 850000, "eval_re": [1049.4087358266968, 1129.4760700705506, 
2493.914219222277, 2285.2033234415276, 1312.4678167652266, 2366.7065006252114, 
2231.034342563926, 455.4655170074, 2298.019272318944, 2142.844646884841], 
"eval_len": [477, 537, 1000, 1000, 544, 1000, 1000, 237, 1000, 1000]}

 86%|████████▌ | 859997/1000000 [13:15:30<1:33:01, 25.08it/s]global step 860000, trans_decision ep_re 1755.5012730989613

{"global_step": 860000, "eval_re": [300.1657431296476, 2192.4334541318954, 
2542.6243935961143, 1497.2365925403635, 1964.6945954502337, 1940.0587787380452, 
2337.9965709932694, 2239.3458917399084, 112.55712008421878, 2427.8995905859188],
"eval_len": [154, 1000, 1000, 688, 1000, 1000, 1000, 1000, 77, 1000]}

 87%|████████▋ | 869998/1000000 [13:24:50<1:26:38, 25.01it/s]global step 870000, trans_decision ep_re 1535.6976037621728

{"global_step": 870000, "eval_re": [512.2959624766145, 1985.9420917145233, 
2262.888645185079, 665.7853783772649, 1390.51720450512, 830.0379246522212, 
2214.396215209119, 1973.9036938767579, 1361.4726314313486, 2159.736290193679], 
"eval_len": [251, 883, 1000, 344, 596, 368, 1000, 1000, 640, 1000]}

 88%|████████▊ | 879997/1000000 [13:34:00<1:19:29, 25.16it/s]global step 880000, trans_decision ep_re 1489.4263139439313

{"global_step": 880000, "eval_re": [1008.918247827553, 2138.0189071864397, 
2137.4136264462295, 2117.661124537741, 2070.8878583282867, 2172.0900203126776, 
639.1938764899003, 395.93712218728695, 1958.49824296763, 255.6441131555669], 
"eval_len": [534, 1000, 1000, 1000, 1000, 1000, 352, 171, 1000, 142]}

 89%|████████▉ | 889999/1000000 [13:43:10<1:13:18, 25.01it/s]global step 890000, trans_decision ep_re 1168.7977298381034

{"global_step": 890000, "eval_re": [180.6715277822629, 2251.740145262139, 
405.9085243603516, 135.38368407908158, 2101.262997375106, 224.93870041264347, 
209.64598890300584, 2091.5312529985217, 2143.4164206116966, 1943.478056596225], 
"eval_len": [120, 1000, 192, 86, 1000, 133, 112, 1000, 1000, 868]}

 90%|████████▉ | 899997/1000000 [13:52:20<1:06:55, 24.90it/s]global step 900000, trans_decision ep_re 1554.3397969551825

{"global_step": 900000, "eval_re": [2013.6621721198082, 2355.372866228285, 
1916.191394567452, 529.5044837610001, 2224.469067342814, 861.2619000421319, 
170.08290467137167, 2125.96349379958, 2322.50407587696, 1024.3856111424213], 
"eval_len": [1000, 1000, 916, 309, 1000, 391, 99, 1000, 1000, 457]}

 91%|█████████ | 909999/1000000 [14:01:30<59:26, 25.24it/s]global step 910000, trans_decision ep_re 1196.7479992733097

{"global_step": 910000, "eval_re": [1196.026474378382, 1925.95120869022, 
158.6313266701973, 1055.796150499687, 1293.2427402523685, 2222.000172499631, 
1772.7289948921314, 186.53018922059093, 609.2836687935207, 1547.289066836368], 
"eval_len": [600, 1000, 110, 469, 697, 1000, 872, 104, 295, 754]}

 92%|█████████▏| 919997/1000000 [14:10:40<53:53, 24.74it/s]global step 920000, trans_decision ep_re 1853.1463348303162

{"global_step": 920000, "eval_re": [1535.803794422971, 2140.118576924323, 
2325.3232322786016, 1101.1437298422302, 1034.805023851238, 2429.480967890264, 
2248.0318155109603, 1114.6314486692283, 2352.8238828843464, 2249.3008760289995],
"eval_len": [667, 1000, 1000, 528, 512, 1000, 1000, 486, 1000, 1000]}

 93%|█████████▎| 929998/1000000 [14:20:00<45:48, 25.47it/s]global step 930000, trans_decision ep_re 2101.8190371125356

{"global_step": 930000, "eval_re": [2130.3420994977555, 2440.889626309114, 
2263.7627308945303, 2146.9223526739615, 1732.4881799225668, 2512.2232011985793, 
2137.2882790130843, 1089.8310135413685, 2262.952137171948, 2301.490750902447], 
"eval_len": [1000, 1000, 1000, 1000, 762, 1000, 1000, 510, 1000, 1000]}

 94%|█████████▍| 939998/1000000 [14:29:20<39:23, 25.39it/s]global step 940000, trans_decision ep_re 2199.383182609844

{"global_step": 940000, "eval_re": [2261.2207937931826, 2289.153165255613, 
2078.737811199855, 2346.237481738643, 2335.15207238343, 2453.158433345234, 
2365.766668239735, 1770.5902243145736, 2383.5839772799786, 1710.2311985482013], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 831, 1000, 1000]}

 95%|█████████▍| 949998/1000000 [14:38:40<33:03, 25.21it/s]global step 950000, trans_decision ep_re 2223.800986417264

{"global_step": 950000, "eval_re": [1622.7410592223362, 2146.3783177933683, 
2429.32129525568, 2438.6663523877155, 1404.506523672152, 2477.805994502593, 
2280.897500308712, 2460.3578700164608, 2386.7121227234175, 2590.622828290206], 
"eval_len": [1000, 1000, 1000, 1000, 709, 1000, 1000, 1000, 1000, 1000]}

 96%|█████████▌| 959997/1000000 [14:48:00<26:51, 24.82it/s]global step 960000, trans_decision ep_re 1244.6582744247653

{"global_step": 960000, "eval_re": [1201.8100947976393, 473.75520738701755, 
95.60431553067788, 147.82072454101845, 893.9129640216905, 1098.1602693956734, 
1860.3072631880987, 2370.5435919744837, 2229.899498186703, 2074.7688152246506], 
"eval_len": [1000, 265, 65, 99, 1000, 547, 829, 1000, 1000, 1000]}

 97%|█████████▋| 969997/1000000 [14:57:10<19:51, 25.18it/s]global step 970000, trans_decision ep_re 56.80074672803884

{"global_step": 970000, "eval_re": [67.77204767209977, 316.5982841547432, 
-337.77123399070194, 85.34861404070593, 66.14518792058114, 69.07375357173919, 
99.88600180022807, 80.70274871318495, 74.25099999615696, 46.001063401651095], 
"eval_len": [73, 1000, 1000, 66, 56, 49, 76, 92, 67, 80]}

 98%|█████████▊| 979997/1000000 [15:06:10<13:14, 25.16it/s]global step 980000, trans_decision ep_re 1983.2711778168875

{"global_step": 980000, "eval_re": [2257.368221252866, 2258.1556015616807, 
2273.6621339213225, 2272.684594650399, 1337.976727489129, 2150.9790810705863, 
2471.6696468869736, 377.371507166937, 2224.8351673892594, 2208.0090967797223], 
"eval_len": [1000, 1000, 1000, 1000, 516, 1000, 1000, 185, 1000, 1000]}

 99%|█████████▉| 989998/1000000 [15:15:30<06:34, 25.37it/s]global step 990000, trans_decision ep_re 1777.555789065535

{"global_step": 990000, "eval_re": [2202.833256481254, 1818.3692355468509, 
2070.8563372117665, 2157.72083664318, 1384.934516399454, 1884.726783793013, 
1877.9583064593469, 325.2177643000958, 2095.7837990361263, 1957.1570547842634], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 165, 1000, 1000]}

100%|█████████▉| 999998/1000000 [15:24:50<00:00, 25.01it/s]global step 1000000, trans_decision ep_re 1070.5737095895897

{"global_step": 1000000, "eval_re": [1175.2209886163612, 616.0430949708456, 
1041.282754197007, 2222.9938022117285, 971.7876120234018, 190.16091101924238, 
1875.990175717433, 219.92007864523666, 1238.477056411615, 1153.8606220830254], 
"eval_len": [531, 269, 1000, 1000, 367, 98, 794, 104, 535, 509]}

100%|██████████| 1000000/1000000 [15:25:07<00:00, 18.02it/s]
