
{
    'exp_name': 'VDPO',
    'env': 'HalfCheetah-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)',
    'noise': 0.05
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9996/1000000 [03:40<8:32:30, 32.19it/s]global step 10000, trans_decision ep_re 736.8189722526855

{"global_step": 10000, "eval_re": [1407.8294021602107, 770.549566917508, 
943.4629861360407, -133.43173379629036, 1522.135020145311, 248.5679577512325, 
-157.85686656537106, 1128.006483341956, 0.01415718566552937, 
1638.9127492505909], "eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 
1000, 1000, 1000]}

  2%|▏         | 19997/1000000 [10:50<8:19:09, 32.72it/s]global step 20000, trans_decision ep_re 511.3595270096718

{"global_step": 20000, "eval_re": [1025.3381574413079, 750.2317263884692, 
171.41640406046457, 900.688792266864, -617.3131858404689, 726.7459751244265, 
154.16387369777286, 1118.426868704575, -332.41222366684065, 1216.3088819201475],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  3%|▎         | 29997/1000000 [18:00<8:21:14, 32.25it/s]global step 30000, trans_decision ep_re 1700.1758447434574

{"global_step": 30000, "eval_re": [1588.8612639319167, 1731.5671279777189, 
1901.7319626855165, 1235.7443113161462, 1970.7431277984144, 1935.4639784275635, 
1919.760923761596, 1005.5900640674977, 1985.964896430884, 1726.330791037324], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  4%|▍         | 39997/1000000 [25:00<8:05:03, 32.99it/s]global step 40000, trans_decision ep_re 1370.138911688194

{"global_step": 40000, "eval_re": [995.6409473063457, 1219.6276593312862, 
2230.24198949212, 1621.501889901444, 993.0857463827274, 985.2208550503, 
1069.676926098492, 1338.158163817915, 2182.8311283585977, 1065.4038111427133], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  5%|▍         | 49999/1000000 [32:10<8:08:36, 32.40it/s]global step 50000, trans_decision ep_re 1735.8953077107622

{"global_step": 50000, "eval_re": [1175.5457660464347, 1107.3026871701736, 
1793.3675781964, 2685.581134236543, 1347.2382636595405, 1160.780851312157, 
2770.5335247353196, 1082.8441549953573, 1192.590980248164, 3043.168136507531], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  6%|▌         | 59997/1000000 [39:20<8:02:40, 32.46it/s]global step 60000, trans_decision ep_re 2449.393695189111

{"global_step": 60000, "eval_re": [2155.77870378136, 1296.731019210368, 
3359.3199382569996, 2186.924573669863, 1990.412449014434, 3989.6768877430727, 
2175.698337111349, 4255.210156856543, 1572.3453970488324, 1511.8394891982875], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  7%|▋         | 69997/1000000 [46:20<7:56:17, 32.54it/s]global step 70000, trans_decision ep_re 1997.146868499764

{"global_step": 70000, "eval_re": [1827.6222433636428, 1396.6152743737612, 
2401.4425346117937, 4057.8677156901313, 1472.3444224990249, 1277.3622870736203, 
1161.375926914108, 1153.6992942958032, 2372.551056239619, 2850.5879299361345], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  8%|▊         | 79997/1000000 [53:30<7:49:40, 32.65it/s]global step 80000, trans_decision ep_re 2510.0594860647425

{"global_step": 80000, "eval_re": [1613.23450813553, 1743.1858504133213, 
3093.4497771438987, 3334.6518552265256, 3105.801523901696, 3521.176028060888, 
2965.852190833722, 1223.1707241498352, 3313.386741554302, 1186.6856612277081], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  9%|▉         | 89997/1000000 [1:00:40<7:46:37, 32.50it/s]global step 90000, trans_decision ep_re 2224.5093243419424

{"global_step": 90000, "eval_re": [4098.1946146173395, 1162.7108349786527, 
1154.84781378814, 1169.0272876552697, 1374.5809152539148, 1292.226483376377, 
1414.069430953573, 3040.4103521308257, 3621.1530889424307, 3917.8724217228983], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 10%|▉         | 99997/1000000 [1:07:40<7:37:27, 32.79it/s]global step 100000, trans_decision ep_re 2000.6219777828833

{"global_step": 100000, "eval_re": [1811.0033620105535, 991.8310033290089, 
1555.9284195228947, 2207.7882360819503, 1007.1234255357559, 3412.3209773849526, 
322.36575557953495, 1914.5951965296583, 4491.331732033819, 2291.931669820704], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 11%|█         | 109997/1000000 [1:14:50<7:38:24, 32.36it/s]global step 110000, trans_decision ep_re 1879.1082421950355

{"global_step": 110000, "eval_re": [2646.092012812072, 1125.082465323243, 
2128.8506048896106, 1275.8022840647552, 2811.0093272212634, 1541.4940828768404, 
1403.7065933517592, 1076.1562098600336, 3006.193759373421, 1776.695082177358], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 12%|█▏        | 119997/1000000 [1:22:00<7:32:42, 32.40it/s]global step 120000, trans_decision ep_re 2817.3985692041633

{"global_step": 120000, "eval_re": [3993.3722321439773, 2912.289762069915, 
3136.2996008429486, 1139.101330836734, 3377.141341162422, 2078.4089779549313, 
2748.848688794025, 3068.9947937573593, 1968.973679878591, 3750.5552846007276], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 13%|█▎        | 129997/1000000 [1:29:00<7:21:57, 32.81it/s]global step 130000, trans_decision ep_re 1366.469860653097

{"global_step": 130000, "eval_re": [1058.5410568338225, 1317.6363635612445, 
1567.7428503522817, 374.6191884591677, 1134.4295891999172, 1077.8711188002678, 
1793.4222876560657, 2545.9472097362054, 1672.4456436557653, 1122.0432982762318],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 14%|█▍        | 139997/1000000 [1:36:10<7:17:19, 32.78it/s]global step 140000, trans_decision ep_re 2289.2466414815235

{"global_step": 140000, "eval_re": [4104.270024388793, 2004.6203591142532, 
224.60730454591567, 2799.876826139126, 1483.8585550390358, 3257.519025682671, 
987.7225728651423, 2707.4291894764033, 1453.6126663690693, 3868.949891194825], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 15%|█▍        | 149997/1000000 [1:43:20<7:17:08, 32.41it/s]global step 150000, trans_decision ep_re 1335.9454294521472

{"global_step": 150000, "eval_re": [1216.3293632787593, 1681.2693843999598, 
1408.759902590135, 1361.0487946952476, 1549.4632180963447, 1068.6391191580917, 
440.7896935374215, 1277.6848940138218, 1575.0336107632777, 1780.4363139884156], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 16%|█▌        | 159997/1000000 [1:50:20<7:07:23, 32.76it/s]global step 160000, trans_decision ep_re 1993.853876896605

{"global_step": 160000, "eval_re": [1775.1342904867274, 3116.3615013830936, 
166.4917027134547, 1268.596297376522, 2247.8606432397914, 1347.8143689559977, 
1087.153902562166, 1478.2558613883723, 4185.508704309924, 3265.3614965500014], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 17%|█▋        | 169997/1000000 [1:57:30<7:01:50, 32.79it/s]global step 170000, trans_decision ep_re 2473.487295772443

{"global_step": 170000, "eval_re": [1170.1476274399395, 1775.2889868309394, 
3488.712178014638, 3367.606854637061, 4654.171601470813, 2138.7122928616477, 
2775.5729141574084, 3084.078763544702, 1173.5610385245266, 1107.02070024275], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 18%|█▊        | 179997/1000000 [2:04:30<6:54:59, 32.93it/s]global step 180000, trans_decision ep_re 2958.4218877815247

{"global_step": 180000, "eval_re": [4230.93531467776, 2792.3422059678473, 
4507.094110565921, 3753.036595927836, 935.950142985093, 3548.5298072677724, 
3243.7689294027577, 1208.9390416219683, 2245.5204689365, 3118.1022604617897], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 19%|█▉        | 189997/1000000 [2:11:40<6:49:29, 32.97it/s]global step 190000, trans_decision ep_re 2491.070738713376

{"global_step": 190000, "eval_re": [2469.3422352680777, 3810.1045635390014, 
4129.761318273611, 2886.1462273489574, 1199.0919925505682, 4375.0013995690515, 
1677.7078487996368, 2010.639038658292, 1206.6177129283049, 1146.2950501982575], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 20%|█▉        | 199999/1000000 [2:18:40<6:44:08, 32.99it/s]global step 200000, trans_decision ep_re 1730.791759935785

{"global_step": 200000, "eval_re": [2102.8309093886746, 2254.4359814538943, 
2213.762454484103, 19.111370167082782, 1202.0710590829553, 1138.0611405440834, 
2286.787150398015, 1282.5220909766417, 1207.4055436222447, 3600.929899240154], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 21%|██        | 209997/1000000 [2:25:40<6:39:58, 32.92it/s]global step 210000, trans_decision ep_re 2040.1825332833184

{"global_step": 210000, "eval_re": [1245.804814693272, 3191.0497581299264, 
1654.6663972043223, 2249.7425266751557, 2272.31555839429, 1489.8961951330568, 
2273.6078194226316, 1309.9833942537462, 1814.4884463223968, 2900.270422604388], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 22%|██▏       | 219999/1000000 [2:32:50<6:35:07, 32.90it/s]global step 220000, trans_decision ep_re 1804.7536388675012

{"global_step": 220000, "eval_re": [3009.01455982501, 1999.9747994004686, 
1356.5685041783508, 1466.910048657762, 2986.3733276146886, 2074.195652407144, 
-9.754926160322977, 2465.4748693290812, 1280.6716676680226, 1418.1078857548089],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 23%|██▎       | 229997/1000000 [2:39:50<6:28:33, 33.03it/s]global step 230000, trans_decision ep_re 2642.8464725647877

{"global_step": 230000, "eval_re": [1900.7064614100864, 3955.4593659631305, 
4457.757302299884, 1543.3452609690914, 1562.4989298085684, 2240.4649045841106, 
3798.8714455560516, 1613.3586817076932, 3034.7200350697035, 2321.2823382795623],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 24%|██▍       | 239999/1000000 [2:46:50<6:23:56, 32.99it/s]global step 240000, trans_decision ep_re 1770.3379664138556

{"global_step": 240000, "eval_re": [510.6336584303237, 3194.777575128115, 
2152.1171672213363, 46.11377954903417, 2681.0555567023384, 1596.068144856983, 
2583.6195329503635, 1576.3843982095411, 1875.0073733619372, 1487.6024777285836],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 25%|██▍       | 249999/1000000 [2:54:00<6:20:26, 32.86it/s]global step 250000, trans_decision ep_re 1464.6854857289177

{"global_step": 250000, "eval_re": [2134.2642449506675, 1407.6834259846146, 
1357.8380425697371, 1480.312038818338, 1296.507861974319, 1239.4303744432457, 
1444.0027784743932, 1344.6712262133012, 1135.4221011925174, 1806.722762668044], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 26%|██▌       | 259998/1000000 [3:01:00<6:14:36, 32.92it/s]global step 260000, trans_decision ep_re 2050.477712250242

{"global_step": 260000, "eval_re": [1227.6013988989073, 1647.8416944398996, 
2124.926426553462, 1803.2986733282162, 2659.221355062205, 1320.5882817501313, 
1403.3272153065157, 1600.7686186506928, 3137.797830744767, 3579.405627767622], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 27%|██▋       | 269998/1000000 [3:08:00<6:10:33, 32.83it/s]global step 270000, trans_decision ep_re 1032.390106881582

{"global_step": 270000, "eval_re": [65.08101990778759, 1254.5008473401292, 
1366.0454936346237, 61.44954196474158, 1579.4904377139258, 2012.2816145145623, 
48.45367320811773, 3565.0812639820374, -384.17499747931265, 755.6921740292076], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 28%|██▊       | 279997/1000000 [3:15:10<6:03:11, 33.04it/s]global step 280000, trans_decision ep_re 1734.5350491014033

{"global_step": 280000, "eval_re": [1190.5688897162788, 1943.9493456819914, 
1932.155962700547, 1304.3019555144044, 1625.8576291151314, 2565.818536201084, 
1250.8157796282046, 2588.0827894080207, 1312.0345338569264, 1631.7650691914425],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 29%|██▉       | 289999/1000000 [3:22:10<5:58:28, 33.01it/s]global step 290000, trans_decision ep_re 2013.5291704441868

{"global_step": 290000, "eval_re": [2958.535593317819, 1476.1208938606626, 
3912.537130971438, 1205.822770676559, 1316.3427613015199, 1371.8767590110047, 
2447.1721889501796, 1210.0273291569893, 1900.429266640596, 2336.4270105550995], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 30%|██▉       | 299997/1000000 [3:29:10<5:52:59, 33.05it/s]global step 300000, trans_decision ep_re 1951.2260159465172

{"global_step": 300000, "eval_re": [1560.7796070997856, 1803.8900832703778, 
1617.5720076147827, 1678.8819233194697, 2641.6225913137323, 1741.9604424678291, 
1866.2539398886788, 2016.813204717378, 2081.135892363176, 2503.3504674099618], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 31%|███       | 309999/1000000 [3:36:20<5:49:19, 32.92it/s]global step 310000, trans_decision ep_re 1930.179177901068

{"global_step": 310000, "eval_re": [1905.05129127163, 2213.21941289255, 
3447.4680754434717, 1306.7487784279033, 1514.1281586660293, 1331.491316692667, 
1298.8491409675016, 1631.8520742209962, 1458.1786754136651, 3194.804855014268], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 32%|███▏      | 319997/1000000 [3:43:20<5:43:11, 33.02it/s]global step 320000, trans_decision ep_re 2264.378727877137

{"global_step": 320000, "eval_re": [1207.3489239901392, 4419.454996356536, 
3126.293344199848, 1508.8827071577612, 1287.4598610679395, 3387.0367071931505, 
1549.4312484853153, 2221.9126328639018, 3647.2030610172073, 288.76379643957415],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 33%|███▎      | 329999/1000000 [3:50:20<5:39:13, 32.92it/s]global step 330000, trans_decision ep_re 2231.442574942329

{"global_step": 330000, "eval_re": [3366.9652741473365, 1594.060607014732, 
1394.910472063359, 2760.991006623521, 4328.423930159244, 1303.3987018712783, 
1266.258477425683, 1997.2991660954053, 1799.6229316522827, 2502.4951823704423], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 34%|███▍      | 339997/1000000 [3:57:20<5:33:04, 33.03it/s]global step 340000, trans_decision ep_re 1657.3138171072999

{"global_step": 340000, "eval_re": [1275.7730149815986, 1778.010930533588, 
4080.6200184783233, 1070.6497614229422, 642.5061289862523, 1927.5154370124073, 
1373.1066096811924, 2762.1635155787917, 475.36692996997425, 1187.4258244279263],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 35%|███▍      | 349999/1000000 [4:04:20<5:29:38, 32.86it/s]global step 350000, trans_decision ep_re 1957.5118367753025

{"global_step": 350000, "eval_re": [1369.8233295628581, 1245.836560193348, 
2236.2826314055246, 1621.694432088684, 1368.543255822798, 1791.3230132495298, 
4000.220201142139, 2887.3840723896787, 1735.3252188639176, 1318.6856530345472], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 36%|███▌      | 359999/1000000 [4:11:20<5:24:00, 32.92it/s]global step 360000, trans_decision ep_re 2575.375634207001

{"global_step": 360000, "eval_re": [3406.341909603399, 1460.8855691716512, 
1214.8744827419696, 1926.4396322943496, 3640.8040170027975, 3464.317049333191, 
3966.811493771558, 3448.125582994685, 1500.660437281953, 1724.496167874456], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 37%|███▋      | 369998/1000000 [4:18:20<5:19:18, 32.88it/s]global step 370000, trans_decision ep_re 2944.016866016109

{"global_step": 370000, "eval_re": [3511.611051590941, 1365.1978489283526, 
1695.4013648780422, 1571.9756355170953, 3539.6445995252902, 4270.556173587017, 
4698.511842580756, 2824.353964917061, 4706.7790851957425, 1256.137093440791], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 38%|███▊      | 379997/1000000 [4:25:20<5:12:46, 33.04it/s]global step 380000, trans_decision ep_re 2388.6772582775106

{"global_step": 380000, "eval_re": [1800.3167020638966, 1467.3904031246025, 
1804.4560604184717, 1762.141548876994, 3827.5929550292876, 3396.6577948428644, 
3441.263310813883, 1598.544316884543, 3040.015661825695, 1748.3938288948677], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 39%|███▉      | 389999/1000000 [4:32:20<5:07:15, 33.09it/s]global step 390000, trans_decision ep_re 1921.241099815139

{"global_step": 390000, "eval_re": [1585.4957931644974, 1429.7838611454215, 
1783.257637645784, 1635.7852811461253, 1992.994971173705, 3530.1821762099803, 
1242.9143474941345, 1645.3348967527857, 1803.4043760074144, 2563.2576574115465],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 40%|███▉      | 399999/1000000 [4:39:20<5:02:59, 33.00it/s]global step 400000, trans_decision ep_re 2242.628900927243

{"global_step": 400000, "eval_re": [3659.469664542552, 2171.89036854856, 
1904.6626646417835, 2981.703309124826, 1312.1881185545205, 1608.7458695163684, 
2945.552834938069, 2433.6507389135, 1262.3060064275114, 2146.1194340647426], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 41%|████      | 409999/1000000 [4:46:20<4:58:20, 32.96it/s]global step 410000, trans_decision ep_re 1841.5082796206339

{"global_step": 410000, "eval_re": [1864.3046773432359, 1533.7619787742865, 
3550.7749570487385, 1684.3722449053064, 1905.1468144390578, 1610.7164560749227, 
1596.1683861372917, 1363.3005647147788, 1990.8865870753018, 1315.6501296934164],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 42%|████▏     | 419997/1000000 [4:53:20<4:50:10, 33.31it/s]global step 420000, trans_decision ep_re 1443.2140993508913

{"global_step": 420000, "eval_re": [1427.7513603363925, 1483.966216060209, 
1386.5545941602804, 606.3727919527777, 1245.265077541676, 1531.24700852991, 
2558.8137274657297, 1265.7849285867035, 1305.3644345976024, 1621.0208542776313],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 43%|████▎     | 429999/1000000 [5:00:20<4:48:42, 32.90it/s]global step 430000, trans_decision ep_re 1889.7922957211704

{"global_step": 430000, "eval_re": [2269.820651724664, 1826.1157780896249, 
1587.6615698122207, 1721.478006573889, 1826.45653152386, 1623.771766771011, 
1589.524366649077, 3635.524553897079, 1254.1393244080275, 1563.4304077622523], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 44%|████▍     | 439997/1000000 [5:07:20<4:40:54, 33.23it/s]global step 440000, trans_decision ep_re 2012.8778550081195

{"global_step": 440000, "eval_re": [1787.4023487652141, 1606.2053131225998, 
2699.610225434142, 2480.487023343239, 2858.3393062040154, 3062.9206279144696, 
1403.1660431748992, 1404.09634946637, 1442.8282150270254, 1383.723097629219], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 45%|████▍     | 449999/1000000 [5:14:20<4:34:29, 33.39it/s]global step 450000, trans_decision ep_re 2633.938980399779

{"global_step": 450000, "eval_re": [3756.756664572288, 2292.7008379227173, 
1551.6468866818248, 2643.2579303914754, 3939.8584126357805, 2731.6281920710494, 
2366.434448929642, 2383.460812952648, 2953.7386755548055, 1719.9069422855614], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 46%|████▌     | 459999/1000000 [5:21:20<4:33:37, 32.89it/s]global step 460000, trans_decision ep_re 1475.4283908148095

{"global_step": 460000, "eval_re": [447.7863982276367, 1444.2763426975878, 
1560.1614237958156, 2193.513840407751, 1708.1217975051661, 1539.3329160745643, 
1329.9929888832771, 1441.6283139448544, 1739.9516640094446, 1349.5182226019979],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 47%|████▋     | 469999/1000000 [5:28:20<4:28:20, 32.92it/s]global step 470000, trans_decision ep_re 2541.5796407881035

{"global_step": 470000, "eval_re": [2889.126828050549, 2136.923789871161, 
2148.6465465608294, 3896.7682341361638, 1975.1848631725081, 3267.5467550231474, 
4322.889718041483, 1701.5727702424604, 2846.4698546594655, 230.66704812326597], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 48%|████▊     | 479997/1000000 [5:35:20<4:22:46, 32.98it/s]global step 480000, trans_decision ep_re 1848.3138538494175

{"global_step": 480000, "eval_re": [2204.1299736948836, 1208.6059445369874, 
1760.068143329958, 1500.738913305261, 2934.596300748843, 1981.733116269303, 
309.7891089081074, 1822.4668366086112, 2535.148594518814, 2225.8616065734054], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 49%|████▉     | 489999/1000000 [5:42:20<4:18:21, 32.90it/s]global step 490000, trans_decision ep_re 2178.4429057741563

{"global_step": 490000, "eval_re": [1681.2420785887648, 2639.153320695603, 
2212.8872131760904, 2226.2777513255605, 1442.0974396413947, 4054.5890559743957, 
1356.5858928896018, 2083.2987315342702, 1913.3521064127174, 2174.9454675031657],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 50%|████▉     | 499999/1000000 [5:49:30<4:11:21, 33.15it/s]global step 500000, trans_decision ep_re 2646.4762889224303

{"global_step": 500000, "eval_re": [2047.2799451435899, 3031.1504227806677, 
1882.234585116337, 3849.99722029865, 2198.871182188552, 1456.9555032555913, 
2216.9140086803895, 3223.0007142597933, 4213.327857085986, 2345.031450414746], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 51%|█████     | 509999/1000000 [5:56:30<4:04:56, 33.34it/s]global step 510000, trans_decision ep_re 1750.6165965377454

{"global_step": 510000, "eval_re": [-27.88063859138753, 921.5484453676927, 
2915.3157898347326, 1508.4923069807016, 3211.3066203207386, 1651.6898462747938, 
1819.652938869495, 1437.2857205797056, 1510.9515876337475, 2557.8033481072343], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 52%|█████▏    | 519999/1000000 [6:03:30<3:59:23, 33.42it/s]global step 520000, trans_decision ep_re 3245.1385856600177

{"global_step": 520000, "eval_re": [2055.6813778077217, 1737.7124194253097, 
3136.713050357538, 3547.613949102634, 1781.6457108914694, 4765.83471768252, 
4695.095227970377, 3198.5478577717604, 3623.933368709883, 3908.608176880964], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 53%|█████▎    | 529999/1000000 [6:10:30<3:57:18, 33.01it/s]global step 530000, trans_decision ep_re 2163.781870537929

{"global_step": 530000, "eval_re": [1818.4490329304197, 2403.650228477798, 
3555.8801763534316, 1368.6780223049218, 1950.1512162151441, 2383.9925298323865, 
2317.580666606318, 1489.3326616705851, 2824.7967299155316, 1525.3074410727536], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 54%|█████▍    | 539999/1000000 [6:17:30<3:48:57, 33.49it/s]global step 540000, trans_decision ep_re 2391.8788123576537

{"global_step": 540000, "eval_re": [1922.6765687487946, 2177.54203167265, 
3400.9679283983223, 3216.748064993158, 2541.524167310155, 1345.3894039167967, 
2053.315432408757, 1647.5552166450086, 2822.904567704394, 2790.164741778504], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 55%|█████▍    | 549999/1000000 [6:24:30<3:47:26, 32.98it/s]global step 550000, trans_decision ep_re 2252.7808039166102

{"global_step": 550000, "eval_re": [1222.395595331397, 2919.648660683188, 
880.975518339838, 1939.4808898344313, 2120.47155539186, 3780.9810376353867, 
3330.5945538681012, 1574.26151618627, 1213.984731927295, 3545.0139799683343], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 56%|█████▌    | 559997/1000000 [6:31:30<3:42:27, 32.97it/s]global step 560000, trans_decision ep_re 2159.0163307473567

{"global_step": 560000, "eval_re": [1327.9847844718358, 2471.3272870637907, 
1643.2814519859744, 1817.7086761436183, 1575.3518968444268, 4163.406162982417, 
3625.8378959677534, 1704.849407776544, 1602.9386900076634, 1657.4770542295403], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 57%|█████▋    | 569997/1000000 [6:38:30<3:36:05, 33.16it/s]global step 570000, trans_decision ep_re 2444.5221118928653

{"global_step": 570000, "eval_re": [2558.8739908327134, 2112.5896250055016, 
2580.956667178338, 1597.3884433849273, 2271.287109735665, 1756.7691436559048, 
4367.476732742397, 1786.2663461064842, 2954.7908588846826, 2458.822201402039], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 58%|█████▊    | 579999/1000000 [6:45:30<3:32:47, 32.90it/s]global step 580000, trans_decision ep_re 1548.0832664909626

{"global_step": 580000, "eval_re": [1736.1252401743275, 1531.1507164184968, 
2714.388301312679, 1607.5170481614794, 1450.4379420381126, 1539.2497985260677, 
1560.942456823488, 402.98531680707435, 1600.5471635197534, 1337.4886811281442], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 59%|█████▉    | 589997/1000000 [6:52:30<3:28:07, 32.83it/s]global step 590000, trans_decision ep_re 1444.6691305345662

{"global_step": 590000, "eval_re": [1519.3058894980545, 1470.170853548105, 
1259.4168750773354, 1573.6317044909822, 1425.712902732734, 1533.5794280454863, 
1408.180647388079, 1992.4257088225293, 772.223623841852, 1492.0436719005052], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 60%|█████▉    | 599999/1000000 [6:59:30<3:21:35, 33.07it/s]global step 600000, trans_decision ep_re 2317.6767663215073

{"global_step": 600000, "eval_re": [3298.727703959574, 2490.6818924549098, 
1427.146984542057, 1525.0647561182057, 2296.7832703543204, 2381.085275459908, 
2805.7595852391646, 2033.0871133333778, 1451.439402420284, 3466.9916793332695], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 61%|██████    | 609999/1000000 [7:06:30<3:17:48, 32.86it/s]global step 610000, trans_decision ep_re 2009.7344335072587

{"global_step": 610000, "eval_re": [2057.365858613501, 1474.5568805278554, 
1325.9072342695154, 2058.4992148011765, 3768.6603535163767, 1391.8740184556639, 
1637.216106486087, 1937.4136525618926, 1768.642635960706, 2677.208379879812], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 62%|██████▏   | 619997/1000000 [7:13:30<3:12:48, 32.85it/s]global step 620000, trans_decision ep_re 2172.843380999092

{"global_step": 620000, "eval_re": [4023.7275175117006, 1500.3377822192385, 
4011.356414651656, 180.3409298831554, 2273.0919295989083, 2061.518778812518, 
2274.593770293859, 2313.644731397692, 1461.4507446212306, 1628.3712110009674], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 63%|██████▎   | 629999/1000000 [7:20:40<3:07:18, 32.92it/s]global step 630000, trans_decision ep_re 1925.8400422776292

{"global_step": 630000, "eval_re": [1777.2426167310243, 1421.5732844839738, 
1405.1278422243802, 2958.549043720717, 2106.7837850819624, 1568.6093065590885, 
1533.2104584514568, 2111.291190002427, 1894.845554177136, 2481.1673413441276], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 64%|██████▍   | 639999/1000000 [7:27:40<3:02:40, 32.84it/s]global step 640000, trans_decision ep_re 2462.4095492928145

{"global_step": 640000, "eval_re": [3875.4336499272945, 3348.5508353164187, 
2718.8988471795883, 1373.7485662229262, 3374.8147923779607, 1879.855441207341, 
1424.6990151864177, 1843.558528085448, 2948.766586137241, 1835.769231287507], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 65%|██████▍   | 649997/1000000 [7:34:40<2:57:39, 32.83it/s]global step 650000, trans_decision ep_re 2490.275022912771

{"global_step": 650000, "eval_re": [1370.9884985765984, 1554.9935446317324, 
2102.0049234761777, 3765.883844917196, 3257.9205037160723, 1452.3238718992518, 
1468.4493020929483, 3886.2588974322193, 3450.394413885554, 2593.5324284999597], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 66%|██████▌   | 659999/1000000 [7:41:40<2:49:51, 33.36it/s]global step 660000, trans_decision ep_re 1847.4660982117057

{"global_step": 660000, "eval_re": [1585.3605522537227, 1460.6317577747052, 
1733.1657146627629, 2732.6174957718067, 1546.7681597922574, 2325.2394189618853, 
1681.9313443372853, 1826.25778389881, 2062.134984609628, 1520.5537700541938], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 67%|██████▋   | 669999/1000000 [7:48:40<2:47:13, 32.89it/s]global step 670000, trans_decision ep_re 2089.978898855331

{"global_step": 670000, "eval_re": [1887.7976179273414, 2210.2073749409587, 
2916.3895486733445, 1336.5957542207943, 2470.7609041776036, 1730.756947690184, 
1482.88495717872, 2195.473117478746, 2686.439396287926, 1982.4833699776884], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 68%|██████▊   | 679998/1000000 [7:55:40<2:41:43, 32.98it/s]global step 680000, trans_decision ep_re 2382.552761931226

{"global_step": 680000, "eval_re": [1872.7157132427974, 1427.4560817651925, 
3975.1710283252705, 1267.9963069001105, 2223.8443550887187, 3735.292845925123, 
1990.4374193618755, 2203.9824866447393, 2581.8030132934427, 2546.8283687649873],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 69%|██████▉   | 689997/1000000 [8:02:40<2:37:15, 32.85it/s]global step 690000, trans_decision ep_re 2192.4601529197294

{"global_step": 690000, "eval_re": [1321.5860371055608, 1503.564512293096, 
3074.3337140403987, 2117.5297391485815, 1519.95266745242, 3103.2332572627956, 
1578.983016919663, 3677.9709450916566, 1450.2866205059997, 2577.1610193771285], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 70%|██████▉   | 699999/1000000 [8:09:40<2:30:54, 33.13it/s]global step 700000, trans_decision ep_re 1686.236098399535

{"global_step": 700000, "eval_re": [1456.43959281632, 1578.0769448115288, 
1729.6151675632873, 1440.674635153325, 1399.4864573273833, 1454.5927310601587, 
1412.9999528047576, 1473.33659454655, 2332.158093519506, 2584.9808143925343], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 71%|███████   | 709999/1000000 [8:16:50<2:26:32, 32.98it/s]global step 710000, trans_decision ep_re 1363.6145430250251

{"global_step": 710000, "eval_re": [83.2482379383557, 1508.5370189642124, 
1587.3906101818843, 1379.751383913712, 1689.6932903891948, 1650.2541470210363, 
1583.5770272160848, 1732.6092588216534, 1477.8622316396254, 943.222224164494], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 72%|███████▏  | 719997/1000000 [8:23:50<2:21:35, 32.96it/s]global step 720000, trans_decision ep_re 1849.5607243301085

{"global_step": 720000, "eval_re": [1344.984874319194, 1339.2266259183023, 
2250.2216519337626, 1953.4956705027246, 1977.8944224423499, 1949.6528780354438, 
2030.1629763337537, 1662.4119724578336, 1419.3779360118722, 2568.178235345851], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 73%|███████▎  | 729997/1000000 [8:30:50<2:17:00, 32.84it/s]global step 730000, trans_decision ep_re 2192.6682193730167

{"global_step": 730000, "eval_re": [1740.9942004325442, 2589.3374215421863, 
1882.2089619820606, 2581.3910743146125, 2264.776237915883, 1461.9512104144044, 
1242.3609593891922, 3856.38364076147, 1455.2016808341855, 2852.0768061436293], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 74%|███████▍  | 739999/1000000 [8:38:00<2:11:32, 32.94it/s]global step 740000, trans_decision ep_re 1882.7579137561036

{"global_step": 740000, "eval_re": [1528.1909018474728, 1841.864882029855, 
1542.202530147968, 1555.119977842623, 2800.1710157102757, 1496.9895906101692, 
2078.884440908963, 1217.2704057139586, 2064.9284923825167, 2701.956900367235], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 75%|███████▍  | 749997/1000000 [8:45:00<2:06:43, 32.88it/s]global step 750000, trans_decision ep_re 1810.0547831284416

{"global_step": 750000, "eval_re": [2652.1154874983736, 2137.8247853496205, 
2073.0247443043104, 1589.473740891941, 2483.7719297182925, 716.1703788805818, 
1621.090284503977, 1762.2777041320849, 2662.58098242565, 402.2177935795858], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 76%|███████▌  | 759997/1000000 [8:52:10<2:01:33, 32.91it/s]global step 760000, trans_decision ep_re 1848.4396975870955

{"global_step": 760000, "eval_re": [2307.4165880693545, 1330.2566115841787, 
2384.848365883373, 1187.174916598149, 2190.2468405923164, 1738.2323339839352, 
1435.7531905366668, 2273.125609684066, 1770.7373468090937, 1866.605172129816], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 77%|███████▋  | 769999/1000000 [8:59:10<1:56:47, 32.82it/s]global step 770000, trans_decision ep_re 1961.1115399300575

{"global_step": 770000, "eval_re": [2487.578276822833, 3082.6748811428115, 
1629.1190051521726, 1355.783144314359, 2300.6503205615204, 1654.8885033119534, 
1639.4246360901466, 1569.386175013757, 1815.4266788111986, 2076.1837780798237], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 78%|███████▊  | 779997/1000000 [9:06:10<1:51:16, 32.95it/s]global step 780000, trans_decision ep_re 2272.9247383547777

{"global_step": 780000, "eval_re": [2026.8827242599893, 2262.759951541026, 
1691.129896389385, 1908.7608261177568, 1755.5531578822213, 3698.046722277131, 
2922.1932743698976, 1429.9309900982093, 1522.082186456856, 3511.9076541553013], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 79%|███████▉  | 789999/1000000 [9:13:20<1:46:14, 32.94it/s]global step 790000, trans_decision ep_re 2696.032685550812

{"global_step": 790000, "eval_re": [3648.9711453600285, 1755.2944446779584, 
2971.753276003204, 2609.4700304711946, 2890.0422131166997, 1928.1775744357537, 
3357.5997917614613, 3130.534868985109, 2537.4893925523343, 2130.9941181443764], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 80%|███████▉  | 799998/1000000 [9:20:20<1:41:10, 32.94it/s]global step 800000, trans_decision ep_re 1535.7420251295766

{"global_step": 800000, "eval_re": [996.1809228771078, 1275.2208997563052, 
1664.875073014773, 1292.7398024872186, 2433.895957558885, 1725.0915772086444, 
1460.2154536361531, 1432.4796013850062, 1499.2683957566164, 1577.4525676150563],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 81%|████████  | 809999/1000000 [9:27:20<1:35:55, 33.01it/s]global step 810000, trans_decision ep_re 1782.4506769763714

{"global_step": 810000, "eval_re": [1656.3740906937317, 1462.0069048270072, 
1699.1790966265062, 1953.4236502445046, 1329.0939797388312, 2165.446424399333, 
2239.9031941006156, 1484.0343589753602, 1997.9118869678168, 1837.1331831900065],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 82%|████████▏ | 819999/1000000 [9:34:30<1:31:21, 32.84it/s]global step 820000, trans_decision ep_re 1800.0087102966663

{"global_step": 820000, "eval_re": [1463.2645009209716, 1455.215841107566, 
3369.6572226587764, 2325.639577462672, 1880.3261087126068, 1333.2073754007965, 
2055.9057030429435, 1454.9672858229558, 1295.1175656525852, 1366.78592218479], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 83%|████████▎ | 829998/1000000 [9:41:30<1:26:13, 32.86it/s]global step 830000, trans_decision ep_re 2097.2481388762767

{"global_step": 830000, "eval_re": [1682.4986457159123, 2112.1641057094594, 
2068.8251875796095, 1404.0982570538115, 1584.5561590877599, 2039.7750501989278, 
3550.3526067617504, 2714.843931045882, 1819.8317526513963, 1995.5356929582586], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 84%|████████▍ | 839997/1000000 [9:48:40<1:20:40, 33.05it/s]global step 840000, trans_decision ep_re 1926.7376665274307

{"global_step": 840000, "eval_re": [2763.5661126512, 1696.2308277798934, 
1464.1289982497542, 1520.6898470410881, 2998.6820971594702, 2094.6574758253864, 
1581.1504836913532, 2311.822290480752, 1404.408961627224, 1432.0395707681841], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 85%|████████▍ | 849999/1000000 [9:55:40<1:15:49, 32.97it/s]global step 850000, trans_decision ep_re 1875.6045132276427

{"global_step": 850000, "eval_re": [2283.767066681031, 2296.506510511018, 
1193.771305630645, 1988.2134567269784, 1353.6422984514052, 1379.760646376838, 
2393.234218200052, 2624.3216605531247, 1318.611619619977, 1924.2163495253606], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 86%|████████▌ | 859999/1000000 [10:02:50<1:11:38, 32.57it/s]global step 860000, trans_decision ep_re 1920.8575580212414

{"global_step": 860000, "eval_re": [1644.757045466963, 1522.62860744735, 
1517.4174512888521, 3096.380557508898, 1738.2929598912401, 2007.9831489488536, 
2953.689419340756, 1351.0533917027526, 1716.0359440351513, 1660.337054581597], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 87%|████████▋ | 869997/1000000 [10:10:00<1:07:20, 32.17it/s]global step 870000, trans_decision ep_re 1727.2308639339644

{"global_step": 870000, "eval_re": [1761.5331517895452, 1901.848201162007, 
1826.9049252852342, 1288.3818546731068, 1352.9730778577946, 1666.6004930356628, 
2452.1005367153193, 1484.8066805637338, 2257.4928251994984, 1279.6668930577407],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 88%|████████▊ | 879997/1000000 [10:17:00<1:00:48, 32.89it/s]global step 880000, trans_decision ep_re 2357.055535846707

{"global_step": 880000, "eval_re": [3078.3371270705984, 3000.5793803136353, 
2835.926777704345, 1531.6289588342615, 3273.355684756846, 1860.1700790055254, 
2653.9964653392244, 2461.9072134916937, 1460.200937138605, 1414.4527348123286], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 89%|████████▉ | 889999/1000000 [10:24:10<55:43, 32.90it/s]global step 890000, trans_decision ep_re 2227.088234967924

{"global_step": 890000, "eval_re": [1442.8720245140983, 3091.4428044394526, 
2178.4087377727114, 1736.1633563718078, 2759.530503992873, 2516.164161961756, 
1631.7150162269015, 1984.0566852707082, 2984.3710746342745, 1946.15798449466], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 90%|████████▉ | 899999/1000000 [10:31:10<50:53, 32.75it/s]global step 900000, trans_decision ep_re 2190.8132422142708

{"global_step": 900000, "eval_re": [1773.9096589948354, 1685.2708737002058, 
1671.4381476633016, 1438.1631509934969, 1417.0919073560826, 2061.828424708186, 
2954.4978461542637, 2621.999833409339, 3334.956367253901, 2948.9762119090956], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 91%|█████████ | 909997/1000000 [10:38:10<45:37, 32.87it/s]global step 910000, trans_decision ep_re 1960.0389520002943

{"global_step": 910000, "eval_re": [3240.7279255408075, 2125.6616422448697, 
2247.1239374245347, 2291.971489674689, 1321.5968506537579, 1764.485001646761, 
1530.5077302134662, 1456.3629260467908, 1498.4409846103856, 2123.5110319468804],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 92%|█████████▏| 919999/1000000 [10:45:20<41:00, 32.52it/s]global step 920000, trans_decision ep_re 2014.2822277301143

{"global_step": 920000, "eval_re": [2340.7218566784295, 2203.690185606547, 
1430.8147646861162, 2334.767042937332, 2500.3667005076327, 1601.1432484439745, 
1196.9339648731861, 1463.7497658913032, 3354.46888116979, 1716.1658665068358], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 93%|█████████▎| 929997/1000000 [10:52:30<35:34, 32.79it/s]global step 930000, trans_decision ep_re 2739.565421111452

{"global_step": 930000, "eval_re": [2267.976452126645, 3346.8229713205506, 
3401.954404955212, 3138.8986187678142, 3362.1695244062153, 2701.071811729968, 
3324.1107375395413, 1347.3752277272474, 2955.912884287704, 1549.3615782536247], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 94%|█████████▍| 939997/1000000 [10:59:30<30:16, 33.03it/s]global step 940000, trans_decision ep_re 1752.6698331591037

{"global_step": 940000, "eval_re": [1283.9991144015908, 2282.2240279747216, 
1621.1003992109554, 1868.4390537033491, 1328.2998248371377, 3032.436097225056, 
1946.0783053579046, 1288.8821622365515, 1512.8338089603064, 1362.4055376834647],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 95%|█████████▍| 949998/1000000 [11:06:40<25:17, 32.94it/s]global step 950000, trans_decision ep_re 1922.970006751255

{"global_step": 950000, "eval_re": [2411.0339049603, 1529.920092024403, 
1805.2025212441017, 1413.6773638014201, 1300.4898763930992, 2124.585567113577, 
1610.6802361913547, 2947.164107470587, 1909.9355103472578, 2177.0108879664463], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 96%|█████████▌| 959997/1000000 [11:13:50<20:15, 32.92it/s]global step 960000, trans_decision ep_re 1853.3797707447422

{"global_step": 960000, "eval_re": [2731.7780106585064, 1601.3228896847095, 
2087.1602133184006, 1696.2980815440044, 2030.9265054586454, 1533.683381085554, 
1465.6756562852845, 1861.557114938226, 1573.3417397444885, 1952.0541147296012], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 97%|█████████▋| 969997/1000000 [11:20:50<15:13, 32.84it/s]global step 970000, trans_decision ep_re 1798.9746504389245

{"global_step": 970000, "eval_re": [2497.4248183286186, 1492.3524501651725, 
2323.191493948831, 1764.8226499661127, 1567.250607834686, 2392.6413400597776, 
1362.082438827064, 1460.09291809018, 1758.8781373939003, 1371.0096497749016], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 98%|█████████▊| 979997/1000000 [11:28:00<10:08, 32.89it/s]global step 980000, trans_decision ep_re 2360.360936734929

{"global_step": 980000, "eval_re": [1888.9823525592353, 1932.1949618061683, 
1770.471702466386, 1436.7496546951743, 1763.345651449775, 1347.0412589184816, 
3299.8574900504136, 3575.107700618745, 3061.2347643843323, 3528.6238304005783], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 99%|█████████▉| 989999/1000000 [11:35:00<05:02, 33.09it/s]global step 990000, trans_decision ep_re 1766.4789660955655

{"global_step": 990000, "eval_re": [1465.637863132351, 1878.5952760907644, 
1311.4758333788618, 2018.7771725544474, 1552.0859778564345, 1595.1178143925504, 
1833.71909662899, 2390.8863754042145, 1913.298342557731, 1705.1959089593086], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|█████████▉| 999999/1000000 [11:42:00<00:00, 32.84it/s]global step 1000000, trans_decision ep_re 1614.0893213360414

{"global_step": 1000000, "eval_re": [1551.121636346336, 1277.8815442459597, 
1837.191932432286, 2089.267655788205, 1583.3767952479218, 1681.6489182151913, 
1687.3728820038766, 1537.8943693862482, 1299.637469106208, 1595.5000105881807], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|██████████| 1000000/1000000 [11:42:29<00:00, 23.73it/s]
