
{
    'exp_name': 'VDPO',
    'env': 'HalfCheetah-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)',
    'noise': 0.1
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9999/1000000 [03:30<8:23:38, 32.76it/s]global step 10000, trans_decision ep_re -364.83432907887436

{"global_step": 10000, "eval_re": [-401.81404205602286, -246.978039178816, 
-494.8282013472294, -242.5218174534023, -210.92480470259562, -556.0677671724495,
-294.2618004928231, -365.03206902209143, -584.713583936863, -251.2011654264505],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  2%|▏         | 19997/1000000 [10:40<8:15:43, 32.95it/s]global step 20000, trans_decision ep_re 682.9058091061031

{"global_step": 20000, "eval_re": [72.77092708117532, 404.8888517351136, 
960.6633000722711, 7.60985055279685, 1043.659419526286, 1397.7435871093105, 
1419.8540061880637, 394.86964242346255, 1178.3550296308224, 
-51.356523258270464], "eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 
1000, 1000, 1000]}

  3%|▎         | 29997/1000000 [17:40<8:10:24, 32.97it/s]global step 30000, trans_decision ep_re 1135.1597624943029

{"global_step": 30000, "eval_re": [47.05796023759655, 2165.9697948502176, 
537.674388208794, 591.9941921677995, 2711.0052252725714, 3082.462771352512, 
837.2064115841525, 152.48061869427056, 125.31047840203793, 1100.435784173078], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  4%|▍         | 39999/1000000 [24:50<8:09:11, 32.71it/s]global step 40000, trans_decision ep_re 1307.3651593903312

{"global_step": 40000, "eval_re": [2463.630134611959, 667.5071059488405, 
329.1326481580574, 1262.1059928015525, 3743.7815033706647, 1097.3793988490072, 
1389.6488299455305, 233.3775838487416, 1239.689320661244, 647.3990757077145], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  5%|▍         | 49997/1000000 [31:50<8:03:24, 32.75it/s]global step 50000, trans_decision ep_re 635.1810434315627

{"global_step": 50000, "eval_re": [685.8767511829703, 547.6081554129197, 
848.6184148831346, 457.94907895169746, 450.6815082149835, 640.3227012745606, 
843.94226055817, 488.4492672932056, 695.0033149881662, 693.3589815558184], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  6%|▌         | 59997/1000000 [38:50<7:57:31, 32.81it/s]global step 60000, trans_decision ep_re 2065.963631976173

{"global_step": 60000, "eval_re": [849.6542952921924, 2352.970968378084, 
2519.0562368688306, 2559.1823066373304, 2353.9740543635808, 2374.820591145747, 
1988.1933986905763, 2624.6958813600627, 1152.1515700918367, 1884.9370169334936],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  7%|▋         | 69997/1000000 [46:00<7:53:43, 32.72it/s]global step 70000, trans_decision ep_re 1406.8060763693143

{"global_step": 70000, "eval_re": [969.6994032271078, 1080.6481699981225, 
1029.904787723675, 1154.6152975070745, 937.2727648953255, 1306.0947669109316, 
1215.2410289438726, 2353.8836477376494, 1903.8746582483495, 2116.826238501035], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  8%|▊         | 79997/1000000 [53:00<7:46:03, 32.90it/s]global step 80000, trans_decision ep_re 1046.9087148491533

{"global_step": 80000, "eval_re": [971.9424935118204, 980.9651082888013, 
1012.1564056734873, 1009.8958996437882, 1457.0512933980149, 1169.2173431859753, 
989.9050996466682, 1053.213215012599, 882.2076558880573, 942.5326342423206], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  9%|▉         | 89999/1000000 [1:00:00<7:42:48, 32.77it/s]global step 90000, trans_decision ep_re 1172.8883851125042

{"global_step": 90000, "eval_re": [1230.7744820780058, 1265.5301669594667, 
1140.215094629841, 1032.5135003354997, 1074.028806039096, 1955.6985114892323, 
1152.4797469272294, 958.3126024976802, 901.4752523408479, 1017.8556878281418], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 10%|▉         | 99997/1000000 [1:07:10<7:38:40, 32.70it/s]global step 100000, trans_decision ep_re 1516.966470735799

{"global_step": 100000, "eval_re": [1629.7689568914502, 1070.12389189935, 
1753.6391430975425, 1082.1767569438193, 1214.9037733410185, 1297.382389826869, 
1143.4087801696926, 1203.336378486899, 2720.191871417856, 2054.7327652834942], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 11%|█         | 109997/1000000 [1:14:10<7:29:07, 33.03it/s]global step 110000, trans_decision ep_re 1745.9661275857936

{"global_step": 110000, "eval_re": [2831.6244905925605, 1487.4634539135213, 
70.9509338298516, 2142.3518262251678, 1507.372898197662, 1276.1312103238533, 
1871.3358685006765, 1595.998632487306, 3066.814665070533, 1609.6172967168043], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 12%|█▏        | 119999/1000000 [1:21:20<7:26:28, 32.85it/s]global step 120000, trans_decision ep_re 1499.8089194888998

{"global_step": 120000, "eval_re": [1284.0393720535637, 1304.1909174539094, 
1521.7428225665442, 1767.7187969638526, 1384.6274567337423, 1932.2580438044984, 
1437.8773563500392, 937.2539378007242, 1745.894412928754, 1682.4860782333697], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 13%|█▎        | 129997/1000000 [1:28:20<7:22:41, 32.75it/s]global step 130000, trans_decision ep_re 1684.539008442243

{"global_step": 130000, "eval_re": [1555.8432427888363, 1723.045699412733, 
2553.9762298481924, 1715.7218796768152, 1405.012629153374, 1574.3032981130862, 
2449.3559404339685, 1596.6380571241784, 1141.736996420711, 1129.7561114505377], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 14%|█▍        | 139997/1000000 [1:35:20<7:15:54, 32.88it/s]global step 140000, trans_decision ep_re 2145.90598932822

{"global_step": 140000, "eval_re": [3093.8370264404266, 2195.6707239813813, 
3137.594065723874, 2034.2502404777013, 1291.4016994679478, 2224.816932224019, 
1799.4540335284353, 2795.0565554487534, 1409.5769326580305, 1477.4016833316336],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 15%|█▍        | 149999/1000000 [1:42:20<7:11:24, 32.84it/s]global step 150000, trans_decision ep_re 1525.796619242225

{"global_step": 150000, "eval_re": [1421.4646706118856, 1141.829793272854, 
1287.0189270783512, 1310.8812221528635, 1071.8163248739195, 1227.8445915644184, 
2729.98822160771, 1228.2487195761253, 2739.4697517971704, 1099.403969886952], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 16%|█▌        | 159997/1000000 [1:49:30<7:07:59, 32.71it/s]global step 160000, trans_decision ep_re 1547.668231022128

{"global_step": 160000, "eval_re": [1248.4133730066883, 1331.2519437250737, 
1386.128177927898, 1128.5762513191974, 3499.552805355342, 1085.9621935077644, 
1416.356832085367, 1792.2907671974826, 1048.842862069094, 1539.3071040273715], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 17%|█▋        | 169997/1000000 [1:56:30<6:59:14, 33.00it/s]global step 170000, trans_decision ep_re 1517.5983850779157

{"global_step": 170000, "eval_re": [1962.3126965478616, 1148.1942793587511, 
1083.852592028699, 1056.84771662599, 1565.3665038053027, 2119.364529592234, 
1680.306384468217, 1282.9560586258483, 1738.9849058195437, 1537.7981839067115], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 18%|█▊        | 179997/1000000 [2:03:40<6:55:58, 32.85it/s]global step 180000, trans_decision ep_re 1547.07434722953

{"global_step": 180000, "eval_re": [1082.9171484890644, 3053.9092731868686, 
1006.9802508939633, 1674.412322872735, 2127.6470747021303, 1282.732247983105, 
1162.420642273828, 1333.3966138921496, 1204.4366870482875, 1541.891210953166], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 19%|█▉        | 189997/1000000 [2:10:40<6:53:11, 32.67it/s]global step 190000, trans_decision ep_re 1530.465624276928

{"global_step": 190000, "eval_re": [1239.0407766163312, 1977.8266555379435, 
1336.817041717973, 1980.147789526547, 1384.0059643851719, 1105.1162689408784, 
1182.3255620985924, 1344.356216828481, 1636.5706113775577, 2118.449355739803], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 20%|█▉        | 199997/1000000 [2:17:50<6:46:13, 32.82it/s]global step 200000, trans_decision ep_re 1688.1096760987325

{"global_step": 200000, "eval_re": [2269.5515739492153, 2015.013782727113, 
1249.1726430373785, 1701.4246069408543, 1056.5338881552643, 1179.2306429119644, 
1290.9622413591526, 2634.8702050856573, 1081.4730183684635, 2402.8641584522597],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 21%|██        | 209997/1000000 [2:24:50<6:38:27, 33.04it/s]global step 210000, trans_decision ep_re 2079.07519203782

{"global_step": 210000, "eval_re": [3351.331624421894, 2416.945199085284, 
3362.967180265127, 1569.677075513531, 1771.5837607891376, 1629.2190243793054, 
1229.4085814075893, 1436.7947765327974, 2398.433887828525, 1624.3908101550146], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 22%|██▏       | 219999/1000000 [2:31:50<6:33:12, 33.06it/s]global step 220000, trans_decision ep_re 1676.6311453935236

{"global_step": 220000, "eval_re": [1609.9520096860085, 3257.132768447522, 
2139.232577161141, 1313.252051852328, 1404.1581756195603, 1379.6600476012181, 
1497.1974730577126, 1280.0760091261143, 1183.1676942834476, 1702.4826471001845],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 23%|██▎       | 229999/1000000 [2:38:50<6:30:10, 32.89it/s]global step 230000, trans_decision ep_re 1470.5205319174916

{"global_step": 230000, "eval_re": [1213.2618995029088, 2112.971552766284, 
1370.2098401474377, 1450.7430628932073, 1657.563811479427, 1477.8037095284953, 
1333.974280708198, 1208.7805512046866, 1389.635659914768, 1490.2609510295033], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 24%|██▍       | 239997/1000000 [2:45:50<6:24:37, 32.93it/s]global step 240000, trans_decision ep_re 1742.36294808955

{"global_step": 240000, "eval_re": [1505.1022065428567, 1808.8609149830038, 
2068.6529824349614, 1932.5365589944934, 1140.1291914763578, 2227.850722260146, 
1915.212142615584, 1324.9705662935264, 1922.2142585811157, 1578.0999367134534], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 25%|██▍       | 249999/1000000 [2:52:50<6:18:58, 32.98it/s]global step 250000, trans_decision ep_re 1942.1115686968478

{"global_step": 250000, "eval_re": [2945.249715378763, 1192.658026948574, 
1538.6458246667084, 2743.043358555116, 1699.2859661757243, 1805.0472284745565, 
1408.3511764132775, 1684.4197986504391, 1700.7304797121324, 2703.684111993184], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 26%|██▌       | 259999/1000000 [2:59:50<6:15:19, 32.86it/s]global step 260000, trans_decision ep_re 1550.9398106332842

{"global_step": 260000, "eval_re": [2081.2626188810923, 1140.889226897182, 
1235.5105372968158, 1652.1025241991206, 1444.518227011628, 1116.7206691847475, 
1724.891805224065, 1304.4810000506423, 1174.8845484126634, 2634.136949174885], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 27%|██▋       | 269997/1000000 [3:07:00<6:08:24, 33.03it/s]global step 270000, trans_decision ep_re 1768.5504415690193

{"global_step": 270000, "eval_re": [1550.5508087705118, 1573.299474625342, 
1292.1364004371467, 1633.5297231171746, 2927.7359248422376, 2688.396407256049, 
1298.174329263463, 1506.0983571481079, 1596.974537638293, 1618.608452591869], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 28%|██▊       | 279999/1000000 [3:14:00<6:05:25, 32.84it/s]global step 280000, trans_decision ep_re 2454.032922071238

{"global_step": 280000, "eval_re": [2640.916093784479, 2812.0816906135524, 
2793.64318157396, 2858.8034272663244, 2710.5484038475506, 1864.097433920805, 
1552.8724805718825, 2435.5079315469293, 2266.1503007992574, 2605.708276787637], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 29%|██▉       | 289997/1000000 [3:21:00<5:58:52, 32.97it/s]global step 290000, trans_decision ep_re 1817.171379749872

{"global_step": 290000, "eval_re": [2549.011238200875, 1386.7370642370279, 
2964.810033334682, 1682.0311606250343, 1904.3305482707913, 1233.8025837534667, 
1268.8407979488866, 1291.6710141387944, 1975.9977229611982, 1914.4816340279656],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 30%|██▉       | 299999/1000000 [3:28:00<5:53:11, 33.03it/s]global step 300000, trans_decision ep_re 1681.2807148652312

{"global_step": 300000, "eval_re": [1634.8405568494736, 1130.30547277303, 
2221.5613538637062, 1269.3989092835686, 1767.4063143441792, 1270.8754248183373, 
1489.017172849477, 1965.1152305901303, 1292.4612959943802, 2771.825417286028], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 31%|███       | 309999/1000000 [3:35:00<5:41:00, 33.72it/s]global step 310000, trans_decision ep_re 1789.251861401023

{"global_step": 310000, "eval_re": [2634.7514686944924, 2672.516160139991, 
1461.1138998979516, 1418.7645967589285, 1758.0251387836981, 1536.6203283549892, 
1308.3247317123207, 1794.3574553179462, 1319.1057060527999, 1988.939128297112], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 32%|███▏      | 319999/1000000 [3:41:50<5:37:22, 33.59it/s]global step 320000, trans_decision ep_re 1636.1746093896295

{"global_step": 320000, "eval_re": [1362.1741836222477, 1433.0063936256556, 
1523.7957848814722, 1721.2932683825557, 2565.5736976275575, 2415.4201458082684, 
1301.1115266474937, 1314.0978792807032, 1242.7045905948205, 1482.5686234255204],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 33%|███▎      | 329999/1000000 [3:48:50<5:33:45, 33.46it/s]global step 330000, trans_decision ep_re 2137.8202412341234

{"global_step": 330000, "eval_re": [2505.1276084666715, 1156.6799379519232, 
2346.7138427338878, 2567.706957670729, 2799.699075151221, 2616.993751211144, 
1827.8155073468042, 2587.203360089247, 1320.7585855343352, 1649.5037861852695], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 34%|███▍      | 339999/1000000 [3:55:50<5:27:52, 33.55it/s]global step 340000, trans_decision ep_re 1906.7947805889955

{"global_step": 340000, "eval_re": [1738.8851773557835, 1331.9966098822558, 
2233.4937550752675, 1803.8892794756105, 2465.9201549744803, 2422.5664364254512, 
2043.0882387788884, 2089.2225862409755, 1451.3719209552564, 1487.513646725986], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 35%|███▍      | 349999/1000000 [4:02:40<5:24:46, 33.36it/s]global step 350000, trans_decision ep_re 2009.4409037888818

{"global_step": 350000, "eval_re": [1693.8022506537425, 1653.3929720557364, 
2590.8407643093105, 1422.7943976134475, 2755.7751162467384, 1705.8321569752552, 
2015.3636083011736, 1347.613837346025, 2319.6542155824623, 2589.339718804927], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 36%|███▌      | 359999/1000000 [4:09:40<5:21:02, 33.23it/s]global step 360000, trans_decision ep_re 1716.1289520447147

{"global_step": 360000, "eval_re": [1589.6042044404662, 1547.3330541439373, 
1395.9604111013002, 1357.8044392207385, 2527.696736229488, 2183.608041657491, 
1350.9228223481414, 2315.625837064133, 1290.8169891069786, 1601.9169851344711], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 37%|███▋      | 369999/1000000 [4:16:40<5:14:16, 33.41it/s]global step 370000, trans_decision ep_re 1705.3841677233086

{"global_step": 370000, "eval_re": [1729.8911026493843, 1949.4564382116594, 
1094.5356160635097, 1771.9983127611163, 1648.3704683930785, 1666.179998191495, 
2564.232232517836, 1497.6456595768923, 1272.0544564338675, 1859.477392434247], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 38%|███▊      | 379999/1000000 [4:23:30<5:09:34, 33.38it/s]global step 380000, trans_decision ep_re 2280.6052484975526

{"global_step": 380000, "eval_re": [2321.2933649234733, 2398.992702068616, 
2374.000593732564, 2181.5897889737876, 2383.668573154088, 2482.6449813457507, 
1384.1148627829493, 2444.649074047583, 2414.9964866328028, 2420.102057313914], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 39%|███▉      | 389999/1000000 [4:30:30<5:05:11, 33.31it/s]global step 390000, trans_decision ep_re 1713.072530082784

{"global_step": 390000, "eval_re": [1608.033649497381, 1506.608146228909, 
1402.8109630212437, 2169.2130180410268, 2081.457249283461, 1283.970321368159, 
1792.6556117792043, 2617.455036379331, 1249.5240104096872, 1418.9972948194365], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 40%|███▉      | 399999/1000000 [4:37:30<4:59:14, 33.42it/s]global step 400000, trans_decision ep_re 2050.3425011503705

{"global_step": 400000, "eval_re": [1230.9040364197128, 2358.972765733447, 
2863.467546766575, 2033.640444177979, 1179.2420092384464, 2594.456999215926, 
1470.0321477218356, 2077.228555646675, 2728.16556901671, 1967.3149375663995], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 41%|████      | 409999/1000000 [4:44:20<4:55:42, 33.25it/s]global step 410000, trans_decision ep_re 2205.4035535659746

{"global_step": 410000, "eval_re": [2563.454345170326, 2589.6385633424775, 
2554.115080505188, 1772.5641516277, 1600.979484947513, 1590.7524562463352, 
2471.2670668806486, 2401.544519016959, 2704.216907230605, 1805.5029606919977], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 42%|████▏     | 419999/1000000 [4:51:20<4:50:03, 33.33it/s]global step 420000, trans_decision ep_re 1961.1798223878911

{"global_step": 420000, "eval_re": [2572.1202710803277, 2174.3588164005378, 
1704.520404521896, 2519.7071936503294, 1712.105756060957, 2208.444024379823, 
1206.99160500463, 2134.291712539463, 2101.5826154892643, 1277.6758247516834], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 43%|████▎     | 429999/1000000 [4:58:20<4:47:02, 33.10it/s]global step 430000, trans_decision ep_re 1977.9334919268836

{"global_step": 430000, "eval_re": [2204.655849949578, 2003.928806210664, 
2150.2846749166342, 2249.4129891026882, 2338.337324817342, 1462.1263324015604, 
1356.8307996294764, 2463.3725850471533, 2043.112629210801, 1507.2729279829357], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 44%|████▍     | 439999/1000000 [5:05:10<4:41:14, 33.19it/s]global step 440000, trans_decision ep_re 1982.330620536226

{"global_step": 440000, "eval_re": [1721.5975067822856, 1686.7573727025565, 
1629.5775053208056, 2688.9582823877295, 1346.2944320448257, 2233.0130101742193, 
2728.081073672882, 1765.8358168199602, 2227.388420863134, 1795.8027845938593], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 45%|████▍     | 449999/1000000 [5:12:10<4:32:53, 33.59it/s]global step 450000, trans_decision ep_re 1870.9854322981203

{"global_step": 450000, "eval_re": [1221.5592556062363, 1567.3635260390688, 
2711.091370434296, 2749.9555874833204, 1863.1994057149902, 1417.1644854390465, 
2484.37587431859, 2142.8504853210325, 1181.981820475078, 1370.3125121495445], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 46%|████▌     | 459999/1000000 [5:19:10<4:28:52, 33.47it/s]global step 460000, trans_decision ep_re 1737.0239818057903

{"global_step": 460000, "eval_re": [1976.8878813327003, 1212.5957307526403, 
2088.340405523956, 1575.1463514657019, 2132.3557595929224, 1600.0567010982513, 
1928.5405545064996, 1636.600329150376, 1991.1278830335575, 1228.5882216012992], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 47%|████▋     | 469999/1000000 [5:26:00<4:25:38, 33.25it/s]global step 470000, trans_decision ep_re 2014.90736545579

{"global_step": 470000, "eval_re": [2286.522948076206, 1400.5194904705857, 
2365.186062661173, 1643.850222914636, 2038.1665047697986, 2409.3779411891833, 
2462.8878944722865, 1861.534641176057, 1911.6463664881364, 1769.3815823398365], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 48%|████▊     | 479999/1000000 [5:33:00<4:20:35, 33.26it/s]global step 480000, trans_decision ep_re 1642.1946332603366

{"global_step": 480000, "eval_re": [1636.4803583193932, 1485.8457457173802, 
1954.7491372097456, 2212.4735832925244, 1232.3262541739416, 1301.4235660864767, 
1823.610423748267, 1232.9351788419333, 1497.2295421216359, 2044.8725430920674], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 49%|████▉     | 489999/1000000 [5:40:00<4:13:08, 33.58it/s]global step 490000, trans_decision ep_re 1692.2017450496587

{"global_step": 490000, "eval_re": [1508.2474153001233, 1186.6685238912742, 
1180.5814378969635, 2093.3034567408354, 2273.949189900558, 1383.8262810146146, 
2696.152918923147, 1254.5722142259588, 1212.7406354911618, 2131.97537711195], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 50%|████▉     | 499999/1000000 [5:46:50<4:10:42, 33.24it/s]global step 500000, trans_decision ep_re 1794.6054303336052

{"global_step": 500000, "eval_re": [2333.1627428549145, 1365.6708763548402, 
1084.5560035884766, 1827.6988324801123, 1827.9454882025254, 2049.2227304878234, 
2017.1906525446186, 2042.5740982537063, 1908.300055313562, 1489.7328232554728], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 51%|█████     | 509999/1000000 [5:53:50<4:05:24, 33.28it/s]global step 510000, trans_decision ep_re 1768.4258195705097

{"global_step": 510000, "eval_re": [2018.9435845828484, 1445.4697480692798, 
1321.5508856327085, 2324.572462727004, 1272.3542049465138, 1458.6235430710205, 
2104.3640986423125, 1883.9811056018923, 1828.0635655416102, 2026.334996889909], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 52%|█████▏    | 519999/1000000 [6:00:50<3:58:28, 33.55it/s]global step 520000, trans_decision ep_re 1828.9530671377536

{"global_step": 520000, "eval_re": [1199.6993220846734, 2405.554017641791, 
2105.951075685354, 1630.3437840416577, 2028.9063760208035, 1224.493901826814, 
2086.367163786634, 1873.280766773108, 1342.0347415534034, 2392.899521963297], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 53%|█████▎    | 529999/1000000 [6:07:40<3:56:07, 33.17it/s]global step 530000, trans_decision ep_re 1876.7236501163243

{"global_step": 530000, "eval_re": [1558.6635817090576, 2242.5046725204716, 
2540.527001835391, 1824.940874041114, 2450.148368134855, 1232.1545002923249, 
1933.0562819433014, 1287.6907041572993, 2111.0382227524356, 1586.5122937769902],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 54%|█████▍    | 539999/1000000 [6:14:40<3:51:19, 33.14it/s]global step 540000, trans_decision ep_re 1633.9167132287887

{"global_step": 540000, "eval_re": [1470.155111545225, 1861.7917235070508, 
2588.045377242481, 1331.8089290890287, 2009.920942013383, 1341.284296739369, 
1443.6501611927613, 1219.9311924617768, 1615.8877252713858, 1456.6916732254247],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 55%|█████▍    | 549999/1000000 [6:21:40<3:45:37, 33.24it/s]global step 550000, trans_decision ep_re 1910.8979670177719

{"global_step": 550000, "eval_re": [1371.3091127801122, 2532.046164406448, 
1365.8577095139906, 2809.042184245987, 1244.5176430662468, 1715.4102433032251, 
1945.6501776971768, 1790.8932730395936, 2576.4012063975715, 1757.8519557273692],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 56%|█████▌    | 559999/1000000 [6:28:40<3:39:44, 33.37it/s]global step 560000, trans_decision ep_re 1817.0092660014227

{"global_step": 560000, "eval_re": [1327.518572778297, 1601.762806676019, 
1776.3733153590779, 2156.4015588374905, 2487.0891189591302, 1409.3499912662014, 
1983.3490195010406, 1773.1315076918022, 2332.732600535452, 1322.3841684097176], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 57%|█████▋    | 569999/1000000 [6:35:30<3:33:34, 33.56it/s]global step 570000, trans_decision ep_re 1611.6438079683444

{"global_step": 570000, "eval_re": [1271.9097766584603, 1269.624643500415, 
1500.4725106443332, 1882.6789016248645, 1756.2675024770542, 1512.3090271986605, 
1966.5819275154145, 1986.5037894942222, 1710.9110194671644, 1259.1789811028534],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 58%|█████▊    | 579999/1000000 [6:42:30<3:30:26, 33.26it/s]global step 580000, trans_decision ep_re 1990.20898958613

{"global_step": 580000, "eval_re": [1418.0254335857182, 2056.306225264273, 
2228.3097573830823, 2354.1583376469175, 2229.5031493594915, 2098.931170802785, 
1891.3300503590447, 2256.6307202194, 1567.3320062835803, 1801.5630449570097], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 59%|█████▉    | 589999/1000000 [6:49:30<3:25:09, 33.31it/s]global step 590000, trans_decision ep_re 1716.8218052510292

{"global_step": 590000, "eval_re": [1828.7652026701126, 1694.876640238019, 
1543.8096550659636, 1667.7354900149783, 1600.442074563583, 2563.622785023107, 
1356.3541343790619, 1279.4308996502712, 2501.0174405212033, 1132.163730383993], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 60%|█████▉    | 599999/1000000 [6:56:20<3:18:30, 33.58it/s]global step 600000, trans_decision ep_re 1861.5232173073564

{"global_step": 600000, "eval_re": [1716.0477284992076, 1861.5986282263036, 
1622.7052929134545, 1933.2091692551194, 1555.3550731304958, 2337.2762940135044, 
2227.172430520331, 1784.5011235469856, 1984.2990145146518, 1593.067418453511], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 61%|██████    | 609999/1000000 [7:03:20<3:13:33, 33.58it/s]global step 610000, trans_decision ep_re 1825.8370612150034

{"global_step": 610000, "eval_re": [2185.611352288167, 1581.6995447236343, 
1865.4857393089815, 1195.0671912701348, 1932.8755758659545, 1872.2171873576028, 
1235.929089751723, 2406.7819888047134, 2077.951523125679, 1904.7514196534435], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 62%|██████▏   | 619999/1000000 [7:10:20<3:09:27, 33.43it/s]global step 620000, trans_decision ep_re 1723.2123276599134

{"global_step": 620000, "eval_re": [1724.2767410506863, 1315.097388639486, 
2313.914167871115, 1498.32871294246, 1909.059594778357, 1565.736691197414, 
1335.8408455716328, 2134.536649027746, 2122.8299016562314, 1312.5025838640072], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 63%|██████▎   | 629999/1000000 [7:17:10<3:05:27, 33.25it/s]global step 630000, trans_decision ep_re 1887.262176428907

{"global_step": 630000, "eval_re": [1688.8152626982849, 1752.395203267375, 
1652.7256078352857, 1310.017204431781, 2116.881041218525, 2118.461860537628, 
1505.4871492440582, 2270.672951934555, 2142.636637713731, 2314.528845407846], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 64%|██████▍   | 639999/1000000 [7:24:10<2:59:02, 33.51it/s]global step 640000, trans_decision ep_re 2077.548697275138

{"global_step": 640000, "eval_re": [1373.763223084646, 2471.905864777727, 
2458.4997618247967, 2310.4822079740516, 1328.4255655809036, 1928.5625368252256, 
1914.5530939224154, 2579.13265087383, 1985.2975606952987, 2424.864507192487], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 65%|██████▍   | 649999/1000000 [7:31:00<2:55:43, 33.19it/s]global step 650000, trans_decision ep_re 1644.8339531562883

{"global_step": 650000, "eval_re": [1585.1900523167972, 2361.0560542730545, 
1293.7774972093548, 1432.256004716969, 1580.6265653578855, 1896.280712130987, 
1622.350511848423, 1640.9553868372893, 1609.8307077842778, 1426.0160390878475], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 66%|██████▌   | 659999/1000000 [7:38:00<2:52:01, 32.94it/s]global step 660000, trans_decision ep_re 2003.9773117866614

{"global_step": 660000, "eval_re": [1930.1344878844413, 1417.0658372501582, 
2341.6728536043092, 2040.5467965396422, 2192.9429234798918, 2442.7876864007235, 
1871.9123143597021, 2369.0654702431816, 2009.6916508656443, 1423.9530972389207],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 67%|██████▋   | 669999/1000000 [7:45:00<2:43:57, 33.55it/s]global step 670000, trans_decision ep_re 1966.3566983080232

{"global_step": 670000, "eval_re": [1889.4229298452913, 1624.1775722098096, 
2180.4981388674814, 1363.2516174598652, 1684.8464913525904, 2707.7874726792706, 
2267.1861920081187, 2073.734070071301, 1311.2593141599605, 2561.4031844265405], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 68%|██████▊   | 679999/1000000 [7:51:50<2:38:54, 33.56it/s]global step 680000, trans_decision ep_re 2227.9630189088884

{"global_step": 680000, "eval_re": [2347.2371353544104, 2426.468021008757, 
2023.869476651336, 2669.4714869907434, 2690.577692514351, 2567.842817304594, 
1486.5152070070421, 2109.2856389911217, 1452.9989730887016, 2505.3637401778255],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 69%|██████▉   | 689999/1000000 [7:58:50<2:35:29, 33.23it/s]global step 690000, trans_decision ep_re 1851.23968314206

{"global_step": 690000, "eval_re": [1997.2888855476413, 2417.595703901381, 
1617.7963440374508, 2038.8104195769204, 1980.9703735493124, 1831.2392328886108, 
2313.336895498085, 1277.8961827542132, 1526.869425905549, 1510.5933677614387], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 70%|██████▉   | 699999/1000000 [8:05:40<2:31:52, 32.92it/s]global step 700000, trans_decision ep_re 1895.448664350915

{"global_step": 700000, "eval_re": [2308.8172598556553, 1445.950243106699, 
1844.234356196961, 1692.2014231092555, 2378.239593073243, 1262.904394010833, 
2322.5279728745427, 1805.1253804759456, 1483.2590974263778, 2411.2269233796387],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 71%|███████   | 709997/1000000 [8:12:40<2:28:05, 32.64it/s]global step 710000, trans_decision ep_re 1970.9381209929675

{"global_step": 710000, "eval_re": [1894.55633753952, 2030.8586570267803, 
2106.150074379528, 2420.2644968598343, 2002.6123758195317, 1704.8951250250902, 
1313.075058214384, 2002.8698026529764, 2338.8088912881517, 1895.2903911238802], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 72%|███████▏  | 719997/1000000 [8:19:50<2:21:55, 32.88it/s]global step 720000, trans_decision ep_re 1873.444485054873

{"global_step": 720000, "eval_re": [1911.3557935312929, 2367.1251042824592, 
1728.5071852830956, 1818.6704617400364, 1773.2394800053405, 1434.9395563010962, 
2164.7939090826135, 1380.927314983753, 2333.653751995976, 1821.2322933430708], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 73%|███████▎  | 729999/1000000 [8:26:50<2:16:30, 32.97it/s]global step 730000, trans_decision ep_re 1769.2207437448037

{"global_step": 730000, "eval_re": [1563.2085590020156, 1818.5745664493345, 
1942.7976223963087, 1872.7988970269457, 1533.9449456062953, 2114.263061662851, 
1247.5627670493138, 2206.110516770625, 2057.977613016544, 1334.968888467802], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 74%|███████▍  | 739999/1000000 [8:33:50<2:11:29, 32.96it/s]global step 740000, trans_decision ep_re 2119.232353171618

{"global_step": 740000, "eval_re": [2301.8578378945977, 1948.2987155090034, 
2309.1249813498125, 1445.8528223040098, 2110.6086419239355, 2092.9146531237398, 
2202.8717774745805, 2140.003502438732, 2367.185682922268, 2273.6049167755023], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 75%|███████▍  | 749997/1000000 [8:40:50<2:06:51, 32.85it/s]global step 750000, trans_decision ep_re 1900.6660273783855

{"global_step": 750000, "eval_re": [2332.0788155192595, 1343.5863894573029, 
1283.1124803217078, 2797.512622596838, 1351.6910668820371, 1324.5942888522352, 
2538.3833417952847, 2302.9688093010755, 2365.223011234244, 1367.5094478238675], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 76%|███████▌  | 759997/1000000 [8:47:50<2:01:32, 32.91it/s]global step 760000, trans_decision ep_re 1682.6274226375801

{"global_step": 760000, "eval_re": [1371.6156231174903, 1626.11824868923, 
1783.2794250007703, 1940.011605819315, 1814.9732257499634, 2117.1463761926984, 
1738.8351371970716, 2131.638817987155, 2348.837103529152, -46.18133690704395], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 77%|███████▋  | 769998/1000000 [8:54:50<1:56:34, 32.88it/s]global step 770000, trans_decision ep_re 1832.8310373745342

{"global_step": 770000, "eval_re": [1761.8864279409663, 1896.7403439970788, 
2005.3520118852305, 1971.2977535793339, 1492.4046912591666, 1868.2047735648023, 
2014.1609116995305, 1614.356505346115, 1783.2027378325383, 1920.7042166405822], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 78%|███████▊  | 779997/1000000 [9:02:00<1:51:01, 33.03it/s]global step 780000, trans_decision ep_re 1901.6539708310852

{"global_step": 780000, "eval_re": [2060.913303109928, 1592.5229611581374, 
2065.420707000492, 2007.4042278006107, 1397.8723881190667, 1795.0888333926528, 
1983.7069222499588, 2322.82321757823, 1490.8348875131483, 2299.9522603886257], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 79%|███████▉  | 789999/1000000 [9:09:00<1:46:28, 32.87it/s]global step 790000, trans_decision ep_re 1910.0443218146181

{"global_step": 790000, "eval_re": [1933.7017437457644, 2027.3223756530806, 
1765.4580388741624, 2102.264391144764, 1915.9498993787768, 1932.712980011673, 
1659.0633633947102, 1922.9641573795197, 1823.4918703786868, 2017.514398185047], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 80%|███████▉  | 799999/1000000 [9:16:00<1:41:22, 32.88it/s]global step 800000, trans_decision ep_re 1759.972343747652

{"global_step": 800000, "eval_re": [1739.8216818885578, 1816.6676005925372, 
1865.1329593876187, 1407.1945130468182, 2145.2724813794603, 1635.7539503731398, 
2121.091551789715, 1399.3976115193489, 1446.0381513570458, 2023.3529361422798], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 81%|████████  | 809999/1000000 [9:23:00<1:36:04, 32.96it/s]global step 810000, trans_decision ep_re 1803.2409032336207

{"global_step": 810000, "eval_re": [1779.2192108742188, 1967.70696174986, 
1991.252765359776, 1570.3540415379669, 2145.2881751740783, 1902.9718509922807, 
1839.6544087293082, 1682.2172099366123, 1340.737363214131, 1813.0070447679732], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 82%|████████▏ | 819997/1000000 [9:30:00<1:30:52, 33.01it/s]global step 820000, trans_decision ep_re 1674.4126225787095

{"global_step": 820000, "eval_re": [1397.849471584849, 1327.5076131616413, 
1846.789697996379, 1859.9267025630934, 1261.9268743334028, 2134.0836093088615, 
1889.719742816747, 1771.577938537912, 2074.7701862481404, 1179.9743892360677], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 83%|████████▎ | 829997/1000000 [9:37:00<1:25:58, 32.96it/s]global step 830000, trans_decision ep_re 1951.5888432039199

{"global_step": 830000, "eval_re": [1867.9711566241301, 1662.8007986597263, 
1821.1439501835553, 2213.3503678163597, 2100.0553004550543, 2270.6180480624766, 
1911.337127397949, 1861.0861537816456, 1872.2311637897824, 1935.2943652685208], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 84%|████████▍ | 839997/1000000 [9:44:10<1:21:16, 32.81it/s]global step 840000, trans_decision ep_re 1866.8986459838811

{"global_step": 840000, "eval_re": [1905.5447999681317, 1999.352183626731, 
2236.464983151311, 1960.3774132095045, 1403.0161359554033, 1605.4211756091156, 
1767.8021651160427, 1845.793614429691, 2098.8617679529048, 1846.352220819975], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 85%|████████▍ | 849997/1000000 [9:51:10<1:16:02, 32.88it/s]global step 850000, trans_decision ep_re 1871.907239992819

{"global_step": 850000, "eval_re": [2117.9728230066835, 1820.4319151108964, 
1890.971745126286, 2041.1220688865758, 1251.2729685391587, 1854.123753631508, 
1980.8118448537477, 1804.1645023057536, 1884.1427571232653, 2074.0580213443154],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 86%|████████▌ | 859997/1000000 [9:58:10<1:10:44, 32.98it/s]global step 860000, trans_decision ep_re 1789.8466021676777

{"global_step": 860000, "eval_re": [1547.6523954402876, 1760.3053299936016, 
1999.2062200959972, 2125.384005129618, 1456.1080527388485, 1952.2717309956583, 
1605.2661543498912, 1984.9375273065457, 1714.379656551833, 1752.9549490744955], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 87%|████████▋ | 869997/1000000 [10:05:20<1:05:51, 32.90it/s]global step 870000, trans_decision ep_re 1996.4584787935848

{"global_step": 870000, "eval_re": [1984.6180828753322, 2284.049411304212, 
1783.245533674059, 1677.9822568007364, 2242.0955627224525, 2111.4550132717463, 
1306.709220327047, 2121.479848613743, 2279.1362445330883, 2173.8136138134314], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 88%|████████▊ | 879999/1000000 [10:12:20<1:00:56, 32.82it/s]global step 880000, trans_decision ep_re 1907.3123643569575

{"global_step": 880000, "eval_re": [1819.3707742409024, 1667.1333960503234, 
2148.336665575499, 1652.9208366190348, 1915.7921526989683, 2144.8322239308604, 
1883.6245882977055, 1887.3360860205787, 1859.7006539523468, 2094.076266183359], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 89%|████████▉ | 889997/1000000 [10:19:20<55:49, 32.84it/s]global step 890000, trans_decision ep_re 1777.3546959487187

{"global_step": 890000, "eval_re": [1690.474198316222, 1743.1138387917442, 
1969.1042239692772, 1814.8314851997131, 1536.3208751639513, 1764.081930150027, 
1699.8714784336855, 1940.9409456283727, 1730.8720095788615, 1883.9359742553322],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 90%|████████▉ | 899999/1000000 [10:26:20<50:39, 32.90it/s]global step 900000, trans_decision ep_re 1930.7231179861744

{"global_step": 900000, "eval_re": [2299.853625588833, 2075.5923341241014, 
2112.7319706455096, 1570.7782539468183, 1865.5254820549105, 2251.6686371583028, 
1590.8623654072712, 1435.24168724081, 2072.2817080436375, 2032.6951156515483], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 91%|█████████ | 909999/1000000 [10:33:30<45:42, 32.82it/s]global step 910000, trans_decision ep_re 1688.2010002819795

{"global_step": 910000, "eval_re": [1801.2507109130743, 1197.3528775125405, 
1878.6043541034207, 1568.952859454444, 1635.3883907683728, 1772.6580883615209, 
1815.9090187642664, 2206.0268921303036, 1343.6817799411822, 1662.185030870669], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 92%|█████████▏| 919997/1000000 [10:40:30<40:39, 32.79it/s]global step 920000, trans_decision ep_re 1746.7621065048595

{"global_step": 920000, "eval_re": [1809.7859264382384, 1489.3395464968937, 
1624.6499158875406, 2032.3096225267025, 1220.0196600880215, 1828.590647013739, 
2068.003650064024, 2027.928716375389, 1554.0731877815183, 1812.9201923765268], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 93%|█████████▎| 929997/1000000 [10:47:30<35:28, 32.88it/s]global step 930000, trans_decision ep_re 1983.1808365909608

{"global_step": 930000, "eval_re": [2036.646191783533, 1561.7171522056708, 
2019.996688715293, 1878.7801486092426, 2462.779451409509, 2381.3514356179635, 
1855.240994338846, 2137.773987212187, 1482.3853871267972, 2015.1369288905678], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 94%|█████████▍| 939997/1000000 [10:54:40<30:20, 32.96it/s]global step 940000, trans_decision ep_re 2010.6917848234193

{"global_step": 940000, "eval_re": [1592.1132853626411, 1974.3214358639023, 
2242.1307031171555, 2210.274279618806, 1985.927231332545, 2016.9862995145506, 
1933.2243322752136, 2182.829568935056, 1639.153381060332, 2329.957331153989], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 95%|█████████▍| 949997/1000000 [11:01:40<25:21, 32.86it/s]global step 950000, trans_decision ep_re 1812.9738486216334

{"global_step": 950000, "eval_re": [1930.3821402474318, 1903.5787320609559, 
1477.6963309756627, 1883.5465802694362, 1865.241083334246, 1925.568283815285, 
1911.1348515372335, 1679.8599740600155, 1850.5472437989586, 1702.183266117109], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 96%|█████████▌| 959999/1000000 [11:08:50<20:16, 32.87it/s]global step 960000, trans_decision ep_re 1883.5380493492878

{"global_step": 960000, "eval_re": [1371.4237953677334, 1960.8591462951815, 
2300.653311828131, 1964.5320822595909, 1511.2390779488646, 2025.3728221607944, 
1688.6218700938302, 1821.2822060930039, 1940.3977450263008, 2250.9984364194465],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 97%|█████████▋| 969997/1000000 [11:15:50<15:08, 33.02it/s]global step 970000, trans_decision ep_re 2036.5742125654263

{"global_step": 970000, "eval_re": [1436.172935948281, 1996.5455512795768, 
2686.5229881576756, 2336.224483692123, 1921.736563915908, 2035.6582331615643, 
1595.1325568165316, 1921.9386960796237, 1930.0166241392806, 2505.793492463696], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 98%|█████████▊| 979999/1000000 [11:22:50<10:08, 32.90it/s]global step 980000, trans_decision ep_re 1784.7036931692808

{"global_step": 980000, "eval_re": [1528.1249874056355, 1919.458448975538, 
1894.3183314570688, 2027.53205585397, 1982.9814980439621, 1552.930100037015, 
1499.3416078271923, 1424.3989931911856, 1852.0879801756287, 2165.8629287256117],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 99%|█████████▉| 989998/1000000 [11:29:50<05:04, 32.82it/s]global step 990000, trans_decision ep_re 1910.6590522096146

{"global_step": 990000, "eval_re": [2178.0320409825185, 2028.472251297073, 
1929.6270758394526, 1978.368826145218, 2055.137066626747, 1722.0139327829834, 
1832.2184794927978, 1803.2009939135094, 2096.218826787968, 1483.3010282278826], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|█████████▉| 999999/1000000 [11:36:50<00:00, 32.98it/s]global step 1000000, trans_decision ep_re 1550.8939250877722

{"global_step": 1000000, "eval_re": [1402.9636887857246, 1567.6952642527776, 
1348.0492355615045, 1421.1691723940282, 1703.9817279491253, 1961.6608196632972, 
1521.6719085710677, 1421.2631435616593, 1738.3100521736253, 1422.1742379649127],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|██████████| 1000000/1000000 [11:37:17<00:00, 23.90it/s]
