
{
    'exp_name': 'VDPO',
    'env': 'HalfCheetah-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'ExtremeClogL1U23::markov(ord(15,1), ord(3,5,3,shift=22), 
[[124, 1], [1, 19]])'
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9997/1000000 [03:10<7:03:02, 39.00it/s]global step 10000, trans_decision ep_re -117.66729348760859

{"global_step": 10000, "eval_re": [-164.20428506086324, -104.30110863703347, 
-118.64911641615738, -113.02814159926004, -105.26242515633334, 
-93.23376134990933, -104.4758705202093, -106.73474116653242, 
-104.21633004265898, -162.56715492712848], "eval_len": [1000, 1000, 1000, 1000, 
1000, 1000, 1000, 1000, 1000, 1000]}

  2%|▏         | 19997/1000000 [09:30<7:10:00, 37.98it/s]global step 20000, trans_decision ep_re 457.62973306882833

{"global_step": 20000, "eval_re": [359.20902247656574, 596.4783621041497, 
487.8197230316072, 370.69741856064155, 376.9689279776088, 303.1413891315289, 
416.6864756897665, 789.108509907866, 630.5047434583014, 245.6827583502468], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  3%|▎         | 29999/1000000 [15:50<7:02:12, 38.29it/s]global step 30000, trans_decision ep_re 911.8193280157587

{"global_step": 30000, "eval_re": [840.5874344553397, 990.1436850847107, 
923.8546540312032, 861.4380542400334, 963.2495226274623, 947.2032489108952, 
864.4360849670325, 851.3353698699697, 936.8669600109523, 939.0782659599894], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  4%|▍         | 39997/1000000 [22:00<6:57:04, 38.36it/s]global step 40000, trans_decision ep_re 1116.6565709714566

{"global_step": 40000, "eval_re": [1202.3309810028861, 1014.1635741518129, 
1309.8739101224373, 1085.5213157522242, 1104.711743493056, 945.7282325111686, 
1248.542674417975, 1121.9022682536213, 1118.5072227492385, 1015.2837872601461], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  5%|▍         | 49998/1000000 [28:20<6:55:57, 38.06it/s]global step 50000, trans_decision ep_re 1243.5256176103035

{"global_step": 50000, "eval_re": [1199.230053505767, 1200.0774110185307, 
1229.0517616438735, 1045.6902757252367, 1371.683914458953, 1064.4721760975135, 
1325.9057936333122, 1569.5321875960076, 1284.9778353768852, 1144.634767046957], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  6%|▌         | 59998/1000000 [34:40<6:47:22, 38.46it/s]global step 60000, trans_decision ep_re 1426.0577914693822

{"global_step": 60000, "eval_re": [1220.4941617131944, 1480.4102781852307, 
1291.4868813693056, 1487.499736317284, 1114.3901577961649, 1476.776252138177, 
1852.0404935326144, 1239.3963192684316, 1501.8934177369424, 1596.1902166364762],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  7%|▋         | 69996/1000000 [40:50<6:38:31, 38.89it/s]global step 70000, trans_decision ep_re 1715.905230834375

{"global_step": 70000, "eval_re": [1785.9595430877694, 1980.782338593195, 
980.7165163392064, 1007.2408068523756, 1066.0884602346366, 1855.8774016592145, 
3062.9780830076147, 2687.095420259042, 1385.8763086899744, 1346.4374296207243], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  8%|▊         | 79997/1000000 [47:10<6:40:13, 38.31it/s]global step 80000, trans_decision ep_re 1531.3792227990477

{"global_step": 80000, "eval_re": [1208.708521770748, 1278.8159289476132, 
1540.0927793482301, 3236.751581748281, 952.3260529269418, 1585.4654388794092, 
1572.5152439756928, 1200.0556957477831, 1086.7308422867843, 1652.3301423589937],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  9%|▉         | 89998/1000000 [53:30<6:36:13, 38.28it/s]global step 90000, trans_decision ep_re 2553.5238353598834

{"global_step": 90000, "eval_re": [3478.6920190786664, 1635.0303868728777, 
3305.7018190386966, 1093.2600480315768, 2479.3280098938594, 2916.258173908638, 
2029.5101414990895, 3773.7043981760303, 1314.6744370425854, 3509.0789200568133],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 10%|▉         | 99996/1000000 [59:40<6:31:27, 38.32it/s]global step 100000, trans_decision ep_re 1983.4990200048865

{"global_step": 100000, "eval_re": [1306.7801505555335, 3083.4016303701933, 
1548.9485909521572, 3291.371949844583, 2361.9040216129347, 2877.799541650914, 
333.4937161699818, 1382.1261889817417, 2534.7892221424336, 1114.3751877683944], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 11%|█         | 109998/1000000 [1:06:00<6:27:57, 38.23it/s]global step 110000, trans_decision ep_re 1754.5530662669516

{"global_step": 110000, "eval_re": [2290.8093598448154, 1086.0335755108279, 
1298.6034109746065, 1380.0431740739514, 1142.8632392839759, 2542.004512234899, 
3723.4558073096487, 1353.692843589609, 1624.6425916912253, 1103.3821481559562], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 12%|█▏        | 119999/1000000 [1:12:10<6:24:56, 38.10it/s]global step 120000, trans_decision ep_re 2344.5855439281886

{"global_step": 120000, "eval_re": [2446.0836808658873, 2023.1532832348885, 
1206.0441438299754, 1554.4176643842625, 3412.2399982093857, 3543.7681046787143, 
2030.2190107703705, 1902.6219076922064, 3399.398415273754, 1927.909230342441], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 13%|█▎        | 129997/1000000 [1:18:30<6:16:02, 38.56it/s]global step 130000, trans_decision ep_re 1872.6790977272474

{"global_step": 130000, "eval_re": [2004.9575816689319, 1470.5204255744964, 
2573.481057291797, 2169.660356245673, 1083.908517670204, 1178.3923773818835, 
2089.2152831124768, 2106.338304391764, 2506.9511321195814, 1543.3659418156694], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 14%|█▍        | 139996/1000000 [1:24:40<6:05:53, 39.17it/s]global step 140000, trans_decision ep_re 1793.634704187261

{"global_step": 140000, "eval_re": [1110.4028051150385, 1182.6254908742847, 
1406.5687103377418, 1580.9241941332957, 1313.6955756813293, 1534.752364803318, 
2206.1595945128697, 3324.453050463997, 2679.1000910829566, 1597.6651648677794], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 15%|█▍        | 149996/1000000 [1:31:00<6:00:58, 39.25it/s]global step 150000, trans_decision ep_re 2105.708386281346

{"global_step": 150000, "eval_re": [1473.6398105436685, 4489.425135650601, 
2201.479749609495, 1447.6533885227457, 1390.1704664943802, 1724.3988435564268, 
3624.2664934739278, 2135.0508029089, 2281.434771709511, 289.5644003438079], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 16%|█▌        | 159999/1000000 [1:37:10<5:55:25, 39.39it/s]global step 160000, trans_decision ep_re 2906.675006723962

{"global_step": 160000, "eval_re": [1787.0010783097666, 1799.255701218437, 
4245.583751000254, 3824.5239815899354, 1409.0557157450005, 4414.789976612322, 
4439.279454029026, 3291.3169597715505, 1507.0889425828425, 2348.8545063804863], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 17%|█▋        | 169996/1000000 [1:43:30<5:58:35, 38.58it/s]global step 170000, trans_decision ep_re 2103.7230881573564

{"global_step": 170000, "eval_re": [2792.069642858526, 2090.9329283087986, 
1410.3917926207346, 2672.9847907267813, 1537.4039831229468, 1516.3150540843324, 
2816.4201538797615, 1423.823883614276, 2325.5132868575065, 2451.375365499902], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 18%|█▊        | 179998/1000000 [1:49:40<5:52:21, 38.79it/s]global step 180000, trans_decision ep_re 2351.1743353315114

{"global_step": 180000, "eval_re": [2229.9566116574724, 1661.6257077782595, 
3313.520016993432, 2537.4387564347344, 1986.0788326995707, 1181.0652159156905, 
2147.08339264393, 1950.368128396457, 3928.744432906134, 2575.862257889436], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 19%|█▉        | 189998/1000000 [1:56:00<5:45:28, 39.08it/s]global step 190000, trans_decision ep_re 1883.8652289101615

{"global_step": 190000, "eval_re": [1335.562029340684, 1156.8734369018216, 
2255.090403227254, 1822.3215920185405, 2459.1014768378755, 1405.7680204560859, 
2725.0023380484095, 2245.792137454694, 1242.9507103353803, 2190.190144480867], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 20%|█▉        | 199997/1000000 [2:02:10<5:38:21, 39.41it/s]global step 200000, trans_decision ep_re 1983.0234794906173

{"global_step": 200000, "eval_re": [1274.4988577357424, 1635.999515078068, 
2891.7647938135638, 3181.785620266162, 1622.179956601107, 2383.47371850799, 
879.2663220227344, 2761.096129875742, 1368.9868032934987, 1831.1830777115676], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 21%|██        | 209999/1000000 [2:08:30<5:37:54, 38.96it/s]global step 210000, trans_decision ep_re 2504.458065192846

{"global_step": 210000, "eval_re": [3268.4431750943836, 1711.9584041126288, 
3391.6589627886183, 1203.0503375102983, 2424.2289805723626, 1432.140709802526, 
3004.1137517088823, 1993.259620800221, 3295.571458998029, 3320.155250540514], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 22%|██▏       | 219996/1000000 [2:14:40<5:30:14, 39.37it/s]global step 220000, trans_decision ep_re 2327.910147305012

{"global_step": 220000, "eval_re": [2336.2943397198865, 3129.478298470613, 
1734.446805807315, 1952.75579669236, 2064.3441798912304, 2241.4680471140973, 
1548.9092196600789, 2712.6168595312542, 1850.328129794766, 3708.459796368518], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 23%|██▎       | 229997/1000000 [2:21:00<5:35:11, 38.29it/s]global step 230000, trans_decision ep_re 2221.7518534391174

{"global_step": 230000, "eval_re": [1093.0920974929386, 1659.8996849213863, 
2635.228603030421, 1319.3388194416525, 2851.693099399352, 1814.9410237819523, 
1945.9326060198946, 3328.6229061485674, 3238.4018917993217, 2330.3678023556886],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 24%|██▍       | 239996/1000000 [2:27:10<5:32:10, 38.13it/s]global step 240000, trans_decision ep_re 2187.9656284528965

{"global_step": 240000, "eval_re": [1713.005925658703, 2412.9100980533995, 
2783.5444424684842, 1830.7239276572852, 2297.3265079088715, 1850.4820758529588, 
1271.236613484634, 2369.13671218681, 2192.54377257707, 3158.7462086807473], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 25%|██▍       | 249998/1000000 [2:33:20<5:25:09, 38.44it/s]global step 250000, trans_decision ep_re 1640.3927853675887

{"global_step": 250000, "eval_re": [1618.8576602631226, 2155.393720409052, 
1418.4636237644168, 1382.5793378228846, 2921.007605757951, 1127.0881134300726, 
1888.2037868688908, 1351.7871307605296, 1168.2792563624998, 1372.2676182364667],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 26%|██▌       | 259999/1000000 [2:39:40<5:14:51, 39.17it/s]global step 260000, trans_decision ep_re 2012.811351270074

{"global_step": 260000, "eval_re": [2545.355711255835, 1204.173502704081, 
3207.7763411676274, 1282.1896875234231, 2056.883751581208, 1680.4097794273148, 
1363.8837456902147, 2233.6033557789947, 2045.3462092125194, 2508.4914283595203],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 27%|██▋       | 269997/1000000 [2:45:50<5:09:30, 39.31it/s]global step 270000, trans_decision ep_re 1941.5188681249697

{"global_step": 270000, "eval_re": [3266.544982952971, 1249.423906899032, 
1774.3724855485993, 1688.407716158391, 1330.6907276468687, 1851.716773828029, 
2969.016366458226, 2440.315356442184, 1497.790522286559, 1346.9098430288368], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 28%|██▊       | 279999/1000000 [2:52:10<5:07:55, 38.97it/s]global step 280000, trans_decision ep_re 2708.5368220114683

{"global_step": 280000, "eval_re": [2114.507529999066, 3389.618645548806, 
2028.7447922478175, 3049.426647776814, 1622.0416449765855, 2339.175912704987, 
3910.733334409144, 3681.3534651598593, 2201.024064769583, 2748.74218252202], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 29%|██▉       | 289996/1000000 [2:58:20<5:02:07, 39.17it/s]global step 290000, trans_decision ep_re 1705.9688752524003

{"global_step": 290000, "eval_re": [1358.1038158429335, 2109.551026416738, 
1175.7922285194895, 1523.432290886265, 1364.2174569106874, 1448.9934962649195, 
1397.1407022185194, 1903.1844361877618, 2925.3246896344267, 1853.9486096422597],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 30%|██▉       | 299997/1000000 [3:04:30<5:00:04, 38.88it/s]global step 300000, trans_decision ep_re 2493.175601745225

{"global_step": 300000, "eval_re": [1320.117657942811, 3887.2294801641956, 
3952.377201283534, 1200.2842730765674, 2346.446788198457, 3432.604527345597, 
2594.050581015041, 2852.910397547991, 1137.610591238568, 2208.1245196394852], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 31%|███       | 309999/1000000 [3:10:50<4:52:22, 39.33it/s]global step 310000, trans_decision ep_re 2575.039098280753

{"global_step": 310000, "eval_re": [3902.816864914649, 1172.390435101454, 
2327.8355953231303, 2980.5781160770325, 1669.19340741523, 3961.743282015678, 
1856.1768298345182, 3776.7115912301183, 1371.9509815442784, 2730.9938793514416],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 32%|███▏      | 319999/1000000 [3:17:00<4:54:47, 38.44it/s]global step 320000, trans_decision ep_re 2943.539388113167

{"global_step": 320000, "eval_re": [3620.5302902152903, 1755.9536206201585, 
2534.331357130399, 3908.98893134432, 2972.901870121368, 3758.398050852457, 
2384.5700309379527, 3883.012480266193, 1202.1343406088422, 3414.5729090346913], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 33%|███▎      | 329996/1000000 [3:23:20<4:46:57, 38.91it/s]global step 330000, trans_decision ep_re 3144.2282881615224

{"global_step": 330000, "eval_re": [2867.2334952179767, 4033.732173832948, 
3819.710803436987, 1172.8470002840681, 4362.285340848538, 4093.5724521782863, 
3473.8150229429234, 1300.7386002678957, 2093.2404122376097, 4225.1075803679905],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 34%|███▍      | 339998/1000000 [3:29:30<4:40:36, 39.20it/s]global step 340000, trans_decision ep_re 1720.0476745091278

{"global_step": 340000, "eval_re": [2303.3569878882404, 1509.87226449346, 
1086.998181002978, 1171.6482575955263, 1763.1721101600504, 1641.8903018561725, 
2899.4563303491072, 2338.4802672569986, 1195.7145604604693, 1289.8874840282717],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 35%|███▍      | 349999/1000000 [3:35:50<4:41:43, 38.45it/s]global step 350000, trans_decision ep_re 1546.0420258804431

{"global_step": 350000, "eval_re": [1646.1009628055087, 1985.790454776525, 
1411.760597793802, 1346.925308207758, 1228.0715787538409, 1304.15183416416, 
1830.390665184068, 1776.7940564328192, 1420.2946755571718, 1510.140125128779], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 36%|███▌      | 359999/1000000 [3:42:00<4:31:35, 39.28it/s]global step 360000, trans_decision ep_re 2838.2329411365045

{"global_step": 360000, "eval_re": [3366.742171425343, 2162.4309926871465, 
2323.624149957101, 3329.406244276602, 2579.1982245959084, 3085.69520899194, 
3490.8746502210197, 3578.289447983005, 1877.0586858113895, 2589.0096354155853], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 37%|███▋      | 369997/1000000 [3:48:10<4:26:49, 39.35it/s]global step 370000, trans_decision ep_re 2085.554555911831

{"global_step": 370000, "eval_re": [1830.9398015133472, 1902.2623528603135, 
1898.5736712947844, 1596.0238121645202, 1401.44289679272, 2719.598283123515, 
2108.7454827361385, 3238.148627342986, 2908.932786487803, 1250.877844802182], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 38%|███▊      | 379998/1000000 [3:54:30<4:21:20, 39.54it/s]global step 380000, trans_decision ep_re 1680.3708996978755

{"global_step": 380000, "eval_re": [1789.9750640110738, 1178.3943581227652, 
1524.1893349946763, 1547.0222342050886, 1895.1629900291716, 2203.7244548555122, 
1811.7747547285835, 1325.4798785009518, 1980.8350713470954, 1547.1508561838361],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 39%|███▉      | 389996/1000000 [4:00:40<4:26:07, 38.20it/s]global step 390000, trans_decision ep_re 1929.967440563099

{"global_step": 390000, "eval_re": [1067.3870478133354, 3003.0077770343787, 
1263.5468912932997, 1468.6177105647137, 1659.316032758137, 1691.9045511873292, 
1540.5807490450563, 2222.391824545267, 2838.7231702280687, 2544.1986511614064], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 40%|███▉      | 399997/1000000 [4:06:50<4:22:02, 38.16it/s]global step 400000, trans_decision ep_re 1955.802629267974

{"global_step": 400000, "eval_re": [1813.0191664097529, 3155.8627170424293, 
2532.147525403982, 2399.7210795353662, 1873.7087632260636, 1414.1241698621436, 
1261.5429838809046, 1411.9193491886965, 1876.4209541505913, 1819.5595839798095],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 41%|████      | 409997/1000000 [4:13:10<4:10:55, 39.19it/s]global step 410000, trans_decision ep_re 1967.2330109779264

{"global_step": 410000, "eval_re": [1773.14931490107, 2405.1200140722485, 
2076.5540738683735, 1152.6484267316177, 2050.7149343534475, 3477.742398164867, 
1747.0782468011967, 1812.1770546868418, 1657.940784573236, 1519.2048616263687], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 42%|████▏     | 419997/1000000 [4:19:20<4:06:49, 39.16it/s]global step 420000, trans_decision ep_re 1686.1879934136318

{"global_step": 420000, "eval_re": [1366.0779826832986, 1684.5057123173667, 
1468.2979682818284, 1820.5943176939372, 1932.9939860544916, 1917.548119150063, 
1769.0871008154768, 1518.0110131771878, 2171.480918920137, 1213.2828150425303], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 43%|████▎     | 429999/1000000 [4:25:30<4:02:27, 39.18it/s]global step 430000, trans_decision ep_re 1493.974691309621

{"global_step": 430000, "eval_re": [2241.6279701180024, 1636.285344779465, 
1308.6925961298384, 2257.092547225161, 1250.864351461319, 1285.9287059693986, 
1243.4167033370331, 1238.7605161898075, 1266.8285452583748, 1210.2496326278092],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 44%|████▍     | 439999/1000000 [4:31:50<3:58:39, 39.11it/s]global step 440000, trans_decision ep_re 1578.6590745093224

{"global_step": 440000, "eval_re": [1435.975497107635, 1245.6665369488717, 
1292.3927711004535, 1349.8735408003063, 1689.9111964303536, 1549.0361453687538, 
2114.2426019083687, 1279.4335817102144, 2515.1668176011294, 1314.8920561171356],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 45%|████▍     | 449997/1000000 [4:38:00<3:52:32, 39.42it/s]global step 450000, trans_decision ep_re 1655.3985062372674

{"global_step": 450000, "eval_re": [1073.8637267525385, 1160.3276308600098, 
1676.6929325384053, 1045.9815615860336, 2507.720287728674, 1478.9019017510204, 
1640.3170478375023, 1893.5416822813622, 1998.5291220297383, 2078.109169007391], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 46%|████▌     | 459999/1000000 [4:44:10<3:48:09, 39.45it/s]global step 460000, trans_decision ep_re 1667.6197207934795

{"global_step": 460000, "eval_re": [3298.9300967717163, 1281.3087517971358, 
1472.2232321247861, 1162.872165861592, 1298.4833266186763, 1841.6949065036852, 
2053.279242334041, 1746.3677493187538, 1329.5536525571242, 1191.4840840472841], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 47%|████▋     | 469997/1000000 [4:50:20<3:45:41, 39.14it/s]global step 470000, trans_decision ep_re 1702.617626196145

{"global_step": 470000, "eval_re": [1723.4509683307156, 1604.6021987513848, 
1695.0675715804655, 1877.7834481861037, 1441.963810674675, 1778.1004049272224, 
2293.5444120411917, 2076.5431001161464, 1267.2878682241467, 1267.8324791293987],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 48%|████▊     | 479997/1000000 [4:56:40<3:43:07, 38.84it/s]global step 480000, trans_decision ep_re 1559.7489521697119

{"global_step": 480000, "eval_re": [1505.6789223376145, 1639.8657707998204, 
1707.8909027839627, 1427.2026581725604, 1324.936447152547, 1434.51579458219, 
1587.0774435975814, 1426.8387655038916, 2122.636716833808, 1420.8460999331414], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 49%|████▉     | 489999/1000000 [5:02:50<3:35:13, 39.49it/s]global step 490000, trans_decision ep_re 1898.1781751624603

{"global_step": 490000, "eval_re": [1195.7076290538344, 2584.687222350166, 
1446.3900485037718, 1865.1685711074024, 1971.7234274947714, 1449.3112334808775, 
2011.5118945383285, 2859.3415664200743, 1426.4626010656755, 2171.477557609702], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 50%|████▉     | 499998/1000000 [5:09:10<3:33:08, 39.10it/s]global step 500000, trans_decision ep_re 1570.3616781489316

{"global_step": 500000, "eval_re": [1271.1363804632576, 1765.5786057218743, 
1392.5122684962134, 1370.1505037376398, 1030.4673038884125, 2222.7106693970186, 
1386.8445514428774, 2629.257513825569, 1381.317920090536, 1253.641064425916], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 51%|█████     | 509998/1000000 [5:15:20<3:26:29, 39.55it/s]global step 510000, trans_decision ep_re 1524.45769873502

{"global_step": 510000, "eval_re": [1095.1411636932712, 1674.1050065471875, 
1305.571478124461, 1124.1542166731958, 1275.0328380264054, 1375.1389906848233, 
1690.3738635618083, 2155.5040251156433, 1646.1043571498153, 1903.4510477735869],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 52%|█████▏    | 519998/1000000 [5:21:30<3:25:51, 38.86it/s]global step 520000, trans_decision ep_re 2024.3005994725686

{"global_step": 520000, "eval_re": [1749.8514456847086, 2442.465593179274, 
1216.2524809063311, 3428.589865042899, 1630.8156175779463, 2355.9403433722364, 
2165.3753037175634, 2845.9535929620033, 1244.9312977665807, 1162.8304545161457],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 53%|█████▎    | 529999/1000000 [5:27:40<3:18:48, 39.40it/s]global step 530000, trans_decision ep_re 1867.5622129915089

{"global_step": 530000, "eval_re": [2878.3276661883706, 1315.2714918823551, 
1335.627785382227, 1401.7974146095999, 1600.8076846801587, 2668.5011124057783, 
1335.015054722562, 2465.317054575389, 1665.571040030866, 2009.3858254377822], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 54%|█████▍    | 539997/1000000 [5:34:00<3:14:39, 39.39it/s]global step 540000, trans_decision ep_re 1403.8353543338585

{"global_step": 540000, "eval_re": [1736.1233507882282, 1394.0900432215171, 
1375.8611155288204, 1299.8492957821843, 1257.2411814150705, 1356.818599834482, 
1431.3629215606074, 1275.8125332272841, 1489.929296462848, 1421.2652055175429], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 55%|█████▍    | 549999/1000000 [5:40:10<3:10:28, 39.38it/s]global step 550000, trans_decision ep_re 1763.5308797352477

{"global_step": 550000, "eval_re": [1948.6989738805864, 1994.6639921550056, 
2058.055354953749, 1918.8836882521216, 1072.6466561032282, 1324.80252441383, 
2390.1169091546285, 1737.2391117195048, 1446.092733551256, 1744.108853168566], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 56%|█████▌    | 559999/1000000 [5:46:20<3:06:16, 39.37it/s]global step 560000, trans_decision ep_re 1514.6537874432913

{"global_step": 560000, "eval_re": [1306.2165837312723, 2763.819440541014, 
1674.8070314925246, 1401.644248343894, 1238.1036514518419, 1742.803167796547, 
1025.1037904317916, 1312.5936193922028, 1450.8755682014844, 1230.5707730503416],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 57%|█████▋    | 569997/1000000 [5:52:30<3:02:37, 39.24it/s]global step 570000, trans_decision ep_re 1611.7354929976518

{"global_step": 570000, "eval_re": [2681.981450558653, 1231.1294432675827, 
1252.8170682088512, 2567.1605342714406, 1387.9803455629622, 1317.1521942711895, 
1182.682119770578, 1367.4392893398096, 1975.4825894503394, 1153.529895275112], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 58%|█████▊    | 579997/1000000 [5:58:40<2:57:58, 39.33it/s]global step 580000, trans_decision ep_re 1860.9990125798672

{"global_step": 580000, "eval_re": [2417.9219170147126, 1844.017957494004, 
2112.9689717688902, 2767.561059524783, 1204.1889439895444, 948.9716191818729, 
1551.0222278793779, 2625.406322644894, 1652.4916526736165, 1485.4394536269763], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 59%|█████▉    | 589998/1000000 [6:05:00<2:53:38, 39.35it/s]global step 590000, trans_decision ep_re 1849.2592763151024

{"global_step": 590000, "eval_re": [1690.1996749784785, 2782.575790499334, 
1742.237509574056, 2353.7621770516703, 1882.7276263406927, 1481.194209739557, 
1602.6034132781133, 1843.5643968321638, 1692.2006306910039, 1421.5273341659542],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 60%|█████▉    | 599999/1000000 [6:11:10<2:49:12, 39.40it/s]global step 600000, trans_decision ep_re 1675.268385432319

{"global_step": 600000, "eval_re": [2063.472218509459, 1517.7190727218406, 
1801.8024748825515, 1835.4748300983142, 1369.405406849433, 1140.8899661338958, 
1905.5751881743947, 1803.1422606267402, 1656.7837871797412, 1658.4186491468197],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 61%|██████    | 609997/1000000 [6:17:20<2:44:15, 39.57it/s]global step 610000, trans_decision ep_re 1471.4866651643147

{"global_step": 610000, "eval_re": [1390.286702265904, 1453.2452792073866, 
1396.9749605039283, 1597.924092209662, 1495.9243105594503, 1466.1514344484551, 
1488.929590291616, 1284.175816779663, 1216.5545808860695, 1924.6998844910117], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 62%|██████▏   | 619998/1000000 [6:23:30<2:41:24, 39.24it/s]global step 620000, trans_decision ep_re 1659.3684234470725

{"global_step": 620000, "eval_re": [1537.1576063015325, 1393.0754491227544, 
2650.319740709432, 1682.736464666848, 1532.9286988421884, 1573.837536314861, 
1554.0778311322867, 1909.5684742982085, 1392.3917904660007, 1367.5906426166139],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 63%|██████▎   | 629997/1000000 [6:29:40<2:37:25, 39.17it/s]global step 630000, trans_decision ep_re 1932.222433511292

{"global_step": 630000, "eval_re": [2220.876158410627, 1499.829826429079, 
1861.1833770025999, 1529.431153823685, 2073.146974439698, 2419.9444638125087, 
1819.8515465855012, 2295.1298857691804, 1438.1608609161817, 2164.670087923861], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 64%|██████▍   | 639999/1000000 [6:36:00<2:34:47, 38.76it/s]global step 640000, trans_decision ep_re 1600.029498122089

{"global_step": 640000, "eval_re": [1330.3062022401784, 1491.0118693589736, 
1368.838107683644, 1572.7681633233924, 1387.6483336709434, 2368.3027214505273, 
1396.6115484500501, 1638.2746531002408, 1328.8105154949758, 2117.722866447966], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 65%|██████▍   | 649999/1000000 [6:42:10<2:27:59, 39.42it/s]global step 650000, trans_decision ep_re 1559.1738951711889

{"global_step": 650000, "eval_re": [1692.5669460536387, 1370.850943791541, 
1478.9780007177176, 2060.027920104422, 1461.7807628210137, 1701.1324531045186, 
1532.024569967854, 1507.143384409724, 1504.7605385214085, 1282.4734322200504], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 66%|██████▌   | 659997/1000000 [6:48:20<2:23:51, 39.39it/s]global step 660000, trans_decision ep_re 1868.666736897859

{"global_step": 660000, "eval_re": [1748.795880414695, 2004.355100914279, 
1811.554307909065, 1374.5158954714, 1753.5104370845638, 1755.0490398395432, 
2836.666753200734, 1626.5551125107334, 1389.8541126994764, 2385.810728934099], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 67%|██████▋   | 669997/1000000 [6:54:30<2:19:25, 39.45it/s]global step 670000, trans_decision ep_re 1904.343713081406

{"global_step": 670000, "eval_re": [1141.4160650013566, 1296.4426159273094, 
1311.5817646063254, 3186.2634480303273, 1526.6619348969136, 3724.3191477477217, 
1912.7870453727428, 2044.5746441879676, 1739.2767610457727, 1160.1137039976213],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 68%|██████▊   | 679999/1000000 [7:00:40<2:15:17, 39.42it/s]global step 680000, trans_decision ep_re 1796.415446860412

{"global_step": 680000, "eval_re": [1762.0476399830113, 2206.356702200792, 
1755.8414533696823, 2287.9863447084813, 1679.4843923411127, 2048.312138158102, 
1794.6146920202584, 1780.0493547386322, 1416.0051000712635, 1233.4566510127838],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 69%|██████▉   | 689996/1000000 [7:07:00<2:12:50, 38.89it/s]global step 690000, trans_decision ep_re 1813.1494695318263

{"global_step": 690000, "eval_re": [2238.0868935892104, 1531.3963540349146, 
1765.4267306149677, 2384.5059249815417, 1781.5624569114884, 1965.9914771261845, 
2233.6187767467445, 1465.4826532469372, 1329.6390828643061, 1435.7843452019686],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 70%|██████▉   | 699997/1000000 [7:13:10<2:07:53, 39.10it/s]global step 700000, trans_decision ep_re 1665.2816251295378

{"global_step": 700000, "eval_re": [1453.4359926623558, 1998.418964712848, 
2211.024804250798, 1275.541665971141, 1243.980541199639, 1554.0205208119876, 
1224.9804091544177, 1410.7643560925437, 2135.102507067749, 2145.5464893719], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 71%|███████   | 709999/1000000 [7:19:20<2:03:26, 39.15it/s]global step 710000, trans_decision ep_re 1703.1599147930951

{"global_step": 710000, "eval_re": [2339.0129393121774, 1867.4653274952384, 
2209.2418754650253, 1400.1009492235978, 1656.6636175868969, 1978.0041533723954, 
1871.8407844465294, 1164.2008941644854, 1299.571332133188, 1245.497274731417], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 72%|███████▏  | 719997/1000000 [7:25:30<2:01:19, 38.47it/s]global step 720000, trans_decision ep_re 1829.5194806067225

{"global_step": 720000, "eval_re": [1412.8512051323694, 2678.94611224363, 
1682.026396361039, 1325.5666948934766, 1434.9308639171777, 2105.796042115951, 
2498.3536628364395, 1863.1933844051907, 1514.8892704937734, 1778.641173668181], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 73%|███████▎  | 729999/1000000 [7:31:40<1:56:36, 38.59it/s]global step 730000, trans_decision ep_re 1529.6949620184178

{"global_step": 730000, "eval_re": [1297.1338471289682, 1466.9892030626756, 
1928.1715673111635, 1337.4296951735848, 1892.4632152335516, 1948.224900185811, 
1406.3632867714246, 1342.8685514707163, 1326.2934399573428, 1351.0119138889406],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 74%|███████▍  | 739999/1000000 [7:38:00<1:52:19, 38.58it/s]global step 740000, trans_decision ep_re 2001.6553752526331

{"global_step": 740000, "eval_re": [1557.2486537154425, 2935.7121009991, 
2143.924636031312, 1430.5656279636255, 1813.6793243679085, 1974.7697483577397, 
1387.1279529203978, 2194.907673532557, 3009.489186591327, 1569.1288480469218], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 75%|███████▍  | 749999/1000000 [7:44:10<1:47:49, 38.64it/s]global step 750000, trans_decision ep_re 2017.9437704246345

{"global_step": 750000, "eval_re": [2806.9805023455256, 1429.2296726070176, 
1577.0520881239536, 1529.7479745025555, 2187.0651065723423, 1553.8716396270186, 
2834.4643806590393, 1746.9618609738952, 2715.2029484667987, 1798.8615303682], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 76%|███████▌  | 759999/1000000 [7:50:20<1:43:26, 38.67it/s]global step 760000, trans_decision ep_re 2100.9155158273657

{"global_step": 760000, "eval_re": [3133.380599448148, 2387.8720338205694, 
3013.334497498172, 2768.218377350313, 1544.0265465659033, 1480.3760929102134, 
1782.8823697244923, 1575.7202620212724, 1320.877320314997, 2002.4670586195762], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 77%|███████▋  | 769998/1000000 [7:56:30<1:36:55, 39.55it/s]global step 770000, trans_decision ep_re 1422.1822112281604

{"global_step": 770000, "eval_re": [1345.3628064777665, 1086.4829396436667, 
1276.9867784227058, 1512.786455135634, 1352.1063808512304, 1490.9629047401506, 
1856.1827291418053, 1211.547562111399, 1392.6090031877916, 1696.7945525694565], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 78%|███████▊  | 779996/1000000 [8:02:40<1:32:59, 39.43it/s]global step 780000, trans_decision ep_re 1919.1402617695378

{"global_step": 780000, "eval_re": [2151.630266362111, 1900.701723845306, 
1679.6116578168755, 1652.3165838905563, 1504.0422066062088, 1286.386872488341, 
2287.8019752725877, 2564.8633077726467, 2112.9194765466286, 2051.128547094113], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 79%|███████▉  | 789999/1000000 [8:09:00<1:28:28, 39.56it/s]global step 790000, trans_decision ep_re 1623.0584272515205

{"global_step": 790000, "eval_re": [1801.4567731456705, 997.3814729247509, 
1316.4101669315191, 1604.3231460805928, 1701.4044140180988, 1439.0203453736503, 
1694.9565193177862, 1751.8903658636084, 2426.0283096646017, 1497.712759194927], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 80%|███████▉  | 799998/1000000 [8:15:10<1:23:49, 39.77it/s]global step 800000, trans_decision ep_re 1839.798284569124

{"global_step": 800000, "eval_re": [2359.4593089649593, 2422.9310093496015, 
1934.9616938213164, 1458.6904951478375, 1152.3185176739842, 1303.8504246127754, 
1963.3451224258254, 1967.8368163393698, 1837.8195714199999, 1996.7698859355708],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 81%|████████  | 809996/1000000 [8:21:20<1:20:22, 39.40it/s]global step 810000, trans_decision ep_re 1717.8178400929683

{"global_step": 810000, "eval_re": [1569.2048888634868, 1351.4368828620102, 
2130.349674816653, 2035.48619504323, 1902.9341815128957, 1596.5644945135577, 
2050.2092206876705, 1032.6117480978705, 1786.1318535960004, 1723.2492609363092],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 82%|████████▏ | 819999/1000000 [8:27:30<1:16:18, 39.32it/s]global step 820000, trans_decision ep_re 1324.172753571071

{"global_step": 820000, "eval_re": [2102.560616194597, 1293.27250875618, 
1439.4416955249883, 2287.329154908937, -86.99503722980992, 1241.4904657061295, 
917.7167220151824, 1363.8108370971013, 1480.9131023737255, 1202.1874703636809], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 83%|████████▎ | 829996/1000000 [8:33:40<1:11:58, 39.36it/s]global step 830000, trans_decision ep_re 1770.8790544768167

{"global_step": 830000, "eval_re": [2329.7324255485278, 1435.758222496259, 
2704.4816880380213, 1552.0597210192109, 1901.5943100897562, 1379.2120526409171, 
1633.3699321080953, 1733.5279831998598, 1604.0391634225482, 1435.0150462049705],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 84%|████████▍ | 839999/1000000 [8:39:50<1:07:55, 39.26it/s]global step 840000, trans_decision ep_re 1847.2271472467394

{"global_step": 840000, "eval_re": [1181.8309324451031, 2285.178880910045, 
2495.2950920243907, 1530.1135555322428, 1202.4053669091088, 1632.800806185828, 
2529.6279340824776, 2737.7574481252745, 1497.805140499181, 1379.45631575374], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 85%|████████▍ | 849999/1000000 [8:46:10<1:03:32, 39.34it/s]global step 850000, trans_decision ep_re 1523.6656602105197

{"global_step": 850000, "eval_re": [1504.555643014417, 1225.4875942502279, 
1280.007524515558, 1876.1191008722274, 1349.574801366437, 1527.200503810408, 
2130.645336416866, 1162.2632790858688, 1821.5339827967298, 1359.268835976458], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 86%|████████▌ | 859999/1000000 [8:52:20<59:33, 39.18it/s]global step 860000, trans_decision ep_re 2062.677573052171

{"global_step": 860000, "eval_re": [2460.692046570467, 1285.7993593474005, 
2068.6591079027967, 2065.6048966603526, 1256.1781829667932, 3002.0311374401203, 
1991.778617605905, 2550.527075177314, 1614.7568540745067, 2330.748452776058], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 87%|████████▋ | 869999/1000000 [8:58:40<55:23, 39.11it/s]global step 870000, trans_decision ep_re 1760.1101860344802

{"global_step": 870000, "eval_re": [1885.7909748316292, 2355.2439964306104, 
1634.357848365676, 1533.1406513318357, 2090.1051527944082, 1514.2434748317255, 
1396.9164760337237, 1465.4560284781323, 1514.1999557039398, 2211.647301543123], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 88%|████████▊ | 879997/1000000 [9:04:50<50:52, 39.32it/s]global step 880000, trans_decision ep_re 1598.6923951588942

{"global_step": 880000, "eval_re": [1824.709704702045, 1274.4818120589669, 
1722.8044491409148, 1717.499067642474, 1265.4090016822736, 1719.8356147030754, 
1726.6125635186834, 1486.722927605973, 1180.5485851829367, 2068.3002253515997], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 89%|████████▉ | 889999/1000000 [9:11:10<46:47, 39.19it/s]global step 890000, trans_decision ep_re 1956.8691453376116

{"global_step": 890000, "eval_re": [1492.7851509077577, 1284.0319108575418, 
2798.424860443287, 1783.587757919053, 1310.0584980910687, 1565.4810316926098, 
1351.7477019419464, 2930.458581862506, 1824.133109572295, 3227.9828500880526], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 90%|████████▉ | 899996/1000000 [9:17:20<42:23, 39.31it/s]global step 900000, trans_decision ep_re 1538.6597897249046

{"global_step": 900000, "eval_re": [1336.28776714845, 1202.5870307715775, 
1513.350197262049, 1329.2040410525185, 1256.2256121011453, 1692.3777488747419, 
2590.9122777175767, 1311.436113405965, 1221.0002682515162, 1933.216840663507], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 91%|█████████ | 909998/1000000 [9:23:40<39:12, 38.26it/s]global step 910000, trans_decision ep_re 1542.7217736613002

{"global_step": 910000, "eval_re": [1293.6700243315286, 1075.5379697360772, 
1339.5727101613556, 2481.7309155698567, 1352.8943887901826, 261.3238537629364, 
1864.8117268870787, 2185.122628397699, 1481.1256818685451, 2091.4278371077407], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 92%|█████████▏| 919997/1000000 [9:29:51<34:53, 38.21it/s]global step 920000, trans_decision ep_re 1715.5130029163836

{"global_step": 920000, "eval_re": [1213.8070380541812, 2318.4547632920876, 
1399.5430392138476, 1271.207120372373, 1634.8905960303723, 1624.1600793110379, 
1438.01683690578, 2279.1722083153404, 2500.1828999903205, 1475.6954476784942], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 93%|█████████▎| 929998/1000000 [9:36:11<30:26, 38.32it/s]global step 930000, trans_decision ep_re 1445.8346107204804

{"global_step": 930000, "eval_re": [1429.3329039656767, 1252.2194549166618, 
1270.882323286401, 1715.457022707128, 1242.4506726960465, 1216.0274573518473, 
1301.400603024103, 1243.1529406851787, 1971.1366825835528, 1816.2860459882081], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 94%|█████████▍| 939997/1000000 [9:42:21<26:19, 37.99it/s]global step 940000, trans_decision ep_re 2403.16983329405

{"global_step": 940000, "eval_re": [2010.8816543833836, 1610.2362235760104, 
1993.7330195258278, 2400.243744470877, 1386.9471812500274, 3186.905981727497, 
3125.8201980842314, 2970.7903511480736, 3306.0001154540423, 2040.1398633205265],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 95%|█████████▍| 949997/1000000 [9:48:41<21:59, 37.90it/s]global step 950000, trans_decision ep_re 1944.3609348415368

{"global_step": 950000, "eval_re": [2690.9588503686878, 2328.605980836061, 
2660.4894018482023, 1407.294188547417, 1372.2947059289331, 1574.6178256165936, 
2151.2606596515857, 1633.6609382857487, 2200.6081761871046, 1423.8186211450375],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 96%|█████████▌| 959998/1000000 [9:54:51<17:28, 38.17it/s]global step 960000, trans_decision ep_re 1689.8878919669035

{"global_step": 960000, "eval_re": [1297.5177636407786, 1615.6739082223319, 
699.3768085352541, 1367.2688989515423, 1604.974484665942, 2236.900720706511, 
2186.292132479466, 1340.152786151525, 1551.0241311594848, 2999.697285156196], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 97%|█████████▋| 969998/1000000 [10:01:11<12:42, 39.33it/s]global step 970000, trans_decision ep_re 1855.0241831163578

{"global_step": 970000, "eval_re": [1904.9544680991623, 2245.720035467859, 
1582.7029143961975, 1330.6427610085461, 1428.6730607102852, 1476.3662834938057, 
2245.5969922562526, 2004.6503986092994, 2605.75462320781, 1725.1802939143618], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 98%|█████████▊| 979999/1000000 [10:07:21<08:32, 39.06it/s]global step 980000, trans_decision ep_re 2208.7288686797497

{"global_step": 980000, "eval_re": [1641.3110926187135, 1266.720490782456, 
1302.1244243193582, 2027.2262435804118, 2934.4305363149533, 2385.2310318377845, 
1704.6239183650716, 3196.806411390022, 2799.0981489773203, 2829.716388611406], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 99%|█████████▉| 989999/1000000 [10:13:41<04:14, 39.36it/s]global step 990000, trans_decision ep_re 1423.5686741660224

{"global_step": 990000, "eval_re": [1547.5962335350107, 1184.5343604214574, 
1387.0096132099873, 1169.2139174093131, 1220.4775782677716, 1151.723734250128, 
1631.5065887408557, 2233.3451481587936, 1102.2498912106144, 1608.0296764562943],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|█████████▉| 999996/1000000 [10:19:51<00:00, 39.55it/s]global step 1000000, trans_decision ep_re 1673.061655959996

{"global_step": 1000000, "eval_re": [1620.7172753678144, 1262.2320065433257, 
1501.256658930066, 2206.973732171676, 1617.8890825062306, 1577.4486507337708, 
1974.6137322905638, 1413.302572853507, 1694.8833708638963, 1861.2994773391076], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|██████████| 1000000/1000000 [10:20:19<00:00, 26.87it/s]
