
{
    'exp_name': 'VDPO',
    'env': 'HalfCheetah-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'markov(4, 32, [[249, 1], [1, 31]])',
    'noise': 0.05
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9998/1000000 [05:40<13:27:28, 20.43it/s]global step 10000, trans_decision ep_re -59.833716572208345

{"global_step": 10000, "eval_re": [-50.89624011169653, -62.051747186322544, 
-24.954389951500364, -62.48737705178874, -67.14899524567093, -46.2945482572581, 
-28.558196167462114, -121.28556527903136, -69.60392041625214, 
-65.05618605510061], "eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 
1000, 1000, 1000]}

  2%|▏         | 19997/1000000 [17:10<13:12:49, 20.60it/s]global step 20000, trans_decision ep_re -421.9180083254179

{"global_step": 20000, "eval_re": [-325.93498589531566, -441.797710218628, 
-447.74480731667876, -340.2014270422906, -388.86366619362906, 
-445.14372120060114, -474.5986785714911, -445.00715182614425, 
-455.27730646785363, -454.6106285215464], "eval_len": [1000, 1000, 1000, 1000, 
1000, 1000, 1000, 1000, 1000, 1000]}

  3%|▎         | 29999/1000000 [28:40<13:03:15, 20.64it/s]global step 30000, trans_decision ep_re 1184.5515218493722

{"global_step": 30000, "eval_re": [1272.4559120300144, 1052.4114335056736, 
1593.8317082113851, 1107.568600757662, 1247.2104860119905, 1281.338593813348, 
1073.7173341816454, 1259.5385980773844, 947.7606565865273, 1009.6818953180923], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  4%|▍         | 39999/1000000 [40:00<12:45:08, 20.91it/s]global step 40000, trans_decision ep_re 1681.741150443882

{"global_step": 40000, "eval_re": [1715.0821066382546, 1631.9721713912734, 
1811.0219089976908, 1810.0243390980038, 1752.0371571019577, 1600.733231318909, 
1644.529009051578, 1560.0097294701998, 1714.9849064597267, 1577.016944911225], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  5%|▍         | 49999/1000000 [51:30<12:45:26, 20.69it/s]global step 50000, trans_decision ep_re 1062.1170605232276

{"global_step": 50000, "eval_re": [1085.4852239267339, 1021.1558776489404, 
1007.9761844459259, 1188.3140734554681, 1024.61426715808, 958.5377565649629, 
1005.2098209279484, 996.7487742978, 1171.0818025897809, 1162.0468242166348], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  6%|▌         | 59999/1000000 [1:03:00<12:45:23, 20.47it/s]global step 60000, trans_decision ep_re 1334.6563455333862

{"global_step": 60000, "eval_re": [1152.3397362949313, 1131.1418216705506, 
1090.9009732508066, 1309.1628580645681, 1263.513100654797, 1071.1939642398795, 
1824.5848713306311, 1502.1751721117857, 1880.1429872594588, 1121.4079704564533],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  7%|▋         | 69999/1000000 [1:14:20<12:36:52, 20.48it/s]global step 70000, trans_decision ep_re 1369.0328462810726

{"global_step": 70000, "eval_re": [1214.083016871314, 1712.290024456792, 
746.740030670706, 1108.4859227339603, 1579.9111011590699, 1138.3200200090432, 
1407.4678614628797, 1223.5157525045042, 1937.192747306729, 1622.321985635724], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  8%|▊         | 79998/1000000 [1:25:40<12:08:49, 21.04it/s]global step 80000, trans_decision ep_re 1271.5982781486175

{"global_step": 80000, "eval_re": [1173.4683759256, 1168.2588332325345, 
1217.5832030094152, 1464.0907886966504, 1294.7923414164477, 1230.8069674138383, 
1265.5413746551724, 1629.5309303539336, 1198.8200176356615, 1073.0899491469213],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  9%|▉         | 89999/1000000 [1:37:00<12:06:14, 20.88it/s]global step 90000, trans_decision ep_re 2905.822076113695

{"global_step": 90000, "eval_re": [3553.207491406026, 3285.193848418206, 
3607.995882478693, 1706.818296947544, 3358.227978193488, 1366.2561954653852, 
3472.9771279406705, 3430.173778340736, 2133.991070273986, 3143.3790916722146], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 10%|▉         | 99998/1000000 [1:48:20<12:05:38, 20.67it/s]global step 100000, trans_decision ep_re 1852.1122780608002

{"global_step": 100000, "eval_re": [1575.0760124347796, 1871.6413222915655, 
2677.603202012527, 1977.74962683177, 2021.4353268914463, 1291.6599065750306, 
1947.811108124352, 1376.801684021459, 2399.507732919117, 1381.8368585059534], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 11%|█         | 109998/1000000 [1:59:40<11:46:45, 20.99it/s]global step 110000, trans_decision ep_re 1718.342225359734

{"global_step": 110000, "eval_re": [1301.2207726826412, 1244.742436342292, 
1872.1940033145586, 2275.8533931052316, 2141.720392888962, 1602.012487106231, 
1621.6920660109886, 1291.211218378065, 2400.4261263541744, 1432.3493574141921], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 12%|█▏        | 119998/1000000 [2:11:00<11:39:23, 20.97it/s]global step 120000, trans_decision ep_re 1912.6616700360064

{"global_step": 120000, "eval_re": [1582.6163677346497, 1640.4255216009726, 
1537.481218525677, 2987.6919757832457, 1337.2475285481355, 1536.431218323933, 
1354.7793776299116, 1740.5929062726666, 3259.773860570352, 2149.576725370519], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 13%|█▎        | 129999/1000000 [2:22:20<11:39:04, 20.74it/s]global step 130000, trans_decision ep_re 1187.3791245886619

{"global_step": 130000, "eval_re": [1519.1585405752708, 2246.4926272145913, 
1250.8396127726264, 1427.6265852925962, -0.5250072480903545, 1360.9261581215615,
-309.4966719247313, 1617.482482309723, 1228.581212377245, 1532.7057063958277], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 14%|█▍        | 139999/1000000 [2:33:40<11:33:47, 20.66it/s]global step 140000, trans_decision ep_re 1559.610899247774

{"global_step": 140000, "eval_re": [1725.3550615076997, 1104.026098325179, 
2129.8588802845734, 1180.4834507745263, 1439.3251885439706, 1779.0082278744442, 
1758.6891874356543, 1667.2931268893612, 1546.8033249182918, 1265.2664459240393],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 15%|█▍        | 149998/1000000 [2:45:00<11:00:35, 21.45it/s]global step 150000, trans_decision ep_re 1980.8300863839606

{"global_step": 150000, "eval_re": [1384.5331529779883, 1598.4562569179614, 
1702.7187254410026, 2101.1701581639322, 2217.3230142686325, 1937.5915868713082, 
1602.9735901300285, 2019.8029410026772, 2134.9573778808704, 3108.774060185206], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 16%|█▌        | 159998/1000000 [2:56:20<11:12:10, 20.83it/s]global step 160000, trans_decision ep_re 2080.905882960292

{"global_step": 160000, "eval_re": [4014.7351039921523, 1353.4545891143064, 
3273.899520056295, 1773.565127042086, 1813.65159542293, 2120.2180254596383, 
2097.901911667491, 1603.881239516876, 1343.6322855389446, 1414.1194317922016], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 17%|█▋        | 169998/1000000 [3:07:40<11:09:03, 20.68it/s]global step 170000, trans_decision ep_re 3213.1564069465294

{"global_step": 170000, "eval_re": [3628.571391276999, 3474.645906283339, 
3748.387538196016, 2129.3474202314296, 3674.458580436532, 2673.5214912043016, 
3806.7055586948054, 3948.8627693694693, 3672.3689087911084, 1374.6945049812925],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 18%|█▊        | 179998/1000000 [3:19:00<10:48:27, 21.08it/s]global step 180000, trans_decision ep_re 1468.9020774370724

{"global_step": 180000, "eval_re": [1503.8073132112395, 1530.498924266115, 
1451.4602444866898, 1456.3912391160277, 1430.7105656545787, 1763.5505773640405, 
1786.6448917453642, 1574.921935328742, 799.5328557702683, 1391.5022274276598], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 19%|█▉        | 189999/1000000 [3:30:20<10:54:08, 20.64it/s]global step 190000, trans_decision ep_re 1690.980838214773

{"global_step": 190000, "eval_re": [1471.2608761449414, 1391.5600849683672, 
2077.4184109525986, 1899.4639685840364, 1629.3432748117257, 1670.0096791281992, 
1443.324149283639, 1922.4616999912528, 1741.2491441556754, 1663.7170941272946], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 20%|█▉        | 199998/1000000 [3:41:40<10:36:00, 20.96it/s]global step 200000, trans_decision ep_re 2051.8364399491647

{"global_step": 200000, "eval_re": [1509.7512880317665, 2537.0215136903785, 
2667.9483223702996, 1954.3108209591792, 2078.986379267728, 1893.3892428184622, 
1589.693720980054, 1774.8671745892304, 2677.9126693619846, 1834.4832674225659], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 21%|██        | 209998/1000000 [3:53:00<10:30:41, 20.88it/s]global step 210000, trans_decision ep_re 2035.4628654388155

{"global_step": 210000, "eval_re": [2585.2409219227115, 1426.0042849483552, 
1541.0978450313644, 1822.5109522645548, 3061.864692174957, 2037.5630550228432, 
2307.437821886239, 1337.0187104266893, 2442.1886444258516, 1793.7017262845875], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 22%|██▏       | 219999/1000000 [4:04:30<10:30:16, 20.63it/s]global step 220000, trans_decision ep_re 3365.0508383000692

{"global_step": 220000, "eval_re": [3587.60258628544, 3529.9566231789317, 
3098.6882969168278, 3526.362950982534, 3650.4683831052766, 3703.17794972795, 
2700.6511163191126, 2699.038200462852, 3641.5335604025286, 3513.0287156192394], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 23%|██▎       | 229999/1000000 [4:15:40<10:32:10, 20.30it/s]global step 230000, trans_decision ep_re 2645.5937190902414

{"global_step": 230000, "eval_re": [2045.880816370673, 2958.7358573374154, 
1553.078577715577, 2990.217592882747, 1777.1775480560536, 3321.4730856756114, 
3548.4057203896414, 3640.3569461061275, 2348.7986238678636, 2271.812422500707], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 24%|██▍       | 239999/1000000 [4:27:10<10:20:51, 20.40it/s]global step 240000, trans_decision ep_re 2534.826247331538

{"global_step": 240000, "eval_re": [1562.5930737019632, 3509.393296371972, 
1222.0060627097512, 2572.1950583239104, 3452.495732833337, 3395.2843264434346, 
3390.8186432407333, 1580.8062968444387, 3178.0242433440544, 1484.6457395017837],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 25%|██▍       | 249999/1000000 [4:38:30<10:06:27, 20.61it/s]global step 250000, trans_decision ep_re 3291.8449322851584

{"global_step": 250000, "eval_re": [3610.064702473216, 3343.0654185824433, 
3323.430333978083, 3400.423716295889, 3470.5611330685333, 3576.765044733871, 
3264.522045314755, 3347.358908273945, 3599.0974517129466, 1983.1605684179071], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 26%|██▌       | 259999/1000000 [4:50:00<9:55:53, 20.70it/s]global step 260000, trans_decision ep_re 2964.0509496363807

{"global_step": 260000, "eval_re": [2290.348226304332, 3567.6652499716374, 
3879.076544316901, 2564.3372626292416, 3339.1497696442243, 3946.0956394709547, 
1861.7760964234983, 3906.3639795795193, 2381.7703552408025, 1903.9263727826988],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 27%|██▋       | 269999/1000000 [5:01:20<9:49:30, 20.64it/s]global step 270000, trans_decision ep_re 1720.0012844662353

{"global_step": 270000, "eval_re": [1347.5119877643629, 2029.1024347719328, 
1533.5995912541746, 1596.3708952534234, 1427.663346982103, 1643.9712351289131, 
3349.551338326619, 520.0992734236017, 2187.2299389713958, 1564.9128027858271], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 28%|██▊       | 279998/1000000 [5:12:40<9:33:10, 20.94it/s]global step 280000, trans_decision ep_re 2866.88907342801

{"global_step": 280000, "eval_re": [1513.646672607947, 1711.1832109346894, 
2782.959602778985, 2286.096694731328, 3401.5093236915022, 3639.860265568979, 
2553.1167159919387, 4111.780958362194, 4137.267459984315, 2531.4698296282236], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 29%|██▉       | 289998/1000000 [5:24:00<9:25:33, 20.92it/s]global step 290000, trans_decision ep_re 2072.7429075570385

{"global_step": 290000, "eval_re": [2206.6752096230744, 1819.8182387696136, 
1622.2340290834609, 1776.9567386999177, 2743.2867797685835, 1511.934480139257, 
1949.9808209402177, 1797.619452569913, 2163.3487512107986, 3135.5745747655446], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 30%|██▉       | 299999/1000000 [5:35:20<9:24:25, 20.67it/s]global step 300000, trans_decision ep_re 1999.121330559105

{"global_step": 300000, "eval_re": [2134.187938744841, 1579.4028141059184, 
964.0459461717711, 1779.0495491489228, 3839.276990200871, 1230.4139092905689, 
1472.0562449169054, 2913.5900832615216, 2526.3820983447454, 1552.8077314049842],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 31%|███       | 309998/1000000 [5:46:40<9:16:03, 20.68it/s]global step 310000, trans_decision ep_re 1931.6982651295864

{"global_step": 310000, "eval_re": [1418.7484785379263, 2249.0506227605247, 
1516.8952303306476, 1302.1388623164405, 4110.204244844973, 1444.7037098368014, 
1562.4442649388411, 1500.0655454802848, 1655.6200694427853, 2557.111622806638], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 32%|███▏      | 319999/1000000 [5:58:00<9:12:57, 20.50it/s]global step 320000, trans_decision ep_re 2571.945369765361

{"global_step": 320000, "eval_re": [2765.122716762069, 1911.4605071437384, 
1650.0626648295138, 2663.532189446998, 3704.4618641878424, 3436.0126793504264, 
1477.7163132217777, 3509.114153000998, 2633.2466473023132, 1968.7239624079373], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 33%|███▎      | 329998/1000000 [6:09:20<8:45:28, 21.25it/s]global step 330000, trans_decision ep_re 2880.6829899101476

{"global_step": 330000, "eval_re": [2861.020842472795, 1528.93264339204, 
3848.056038315602, 2904.5236293178077, 636.4886076181369, 1890.147915079606, 
4213.738936930576, 3464.6310191175326, 3243.410876020177, 4215.879390837206], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 34%|███▍      | 339999/1000000 [6:20:40<8:40:43, 21.12it/s]global step 340000, trans_decision ep_re 2333.235627597951

{"global_step": 340000, "eval_re": [2391.8197058166406, 2323.3399167264515, 
4143.842405550914, 2332.8422875097795, 2002.7184564871752, 3162.532298983973, 
1862.083346862431, 1481.2824756420182, 1739.6253810810929, 1892.270001319033], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 35%|███▍      | 349997/1000000 [6:32:00<8:44:50, 20.64it/s]global step 350000, trans_decision ep_re 2992.8474438132725

{"global_step": 350000, "eval_re": [1333.906110263008, 4196.430932367065, 
1577.0724299705705, 1996.5392509842818, 3919.359495112077, 2985.0363596273246, 
2898.1330203403286, 4283.898678805846, 4111.078006556579, 2627.0201541056454], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 36%|███▌      | 359999/1000000 [6:43:30<8:35:45, 20.68it/s]global step 360000, trans_decision ep_re 2647.9949934949473

{"global_step": 360000, "eval_re": [3958.7064216874182, 2208.550500051328, 
3029.905694311234, 1448.117081246016, 1765.7685554017323, 1780.4830948444594, 
4102.4957057623005, 3941.9148161473454, 2127.504814084414, 2116.503251413226], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 37%|███▋      | 369998/1000000 [6:54:40<8:14:28, 21.23it/s]global step 370000, trans_decision ep_re 2205.389508234614

{"global_step": 370000, "eval_re": [2407.364989259792, 3631.700711981664, 
1774.998719643109, 1661.2503912211607, 2699.2294646169103, 2554.236582181326, 
1494.3638198488231, 1876.8915541609683, 1500.5543159903395, 2453.304533442045], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 38%|███▊      | 379999/1000000 [7:06:00<8:16:16, 20.82it/s]global step 380000, trans_decision ep_re 2671.9977430744625

{"global_step": 380000, "eval_re": [1398.4678198169465, 2953.1077773258244, 
4130.3898212539025, 2613.92295901045, 3087.4176399207354, 2628.5444972077685, 
1674.6491541248033, 1895.6084420014945, 4203.582462730386, 2134.2868573523124], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 39%|███▉      | 389998/1000000 [7:17:20<8:02:35, 21.07it/s]global step 390000, trans_decision ep_re 2480.5471742237255

{"global_step": 390000, "eval_re": [2388.3098774338514, 3135.3603814283742, 
2633.681930446058, 2600.368047056855, 3072.274197627603, 2482.477046985838, 
1854.2651179537393, 1911.1727386168054, 1761.3327228122193, 2966.229681875915], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 40%|███▉      | 399998/1000000 [7:28:40<8:00:05, 20.83it/s]global step 400000, trans_decision ep_re 2040.4470040598821

{"global_step": 400000, "eval_re": [1886.4642626876098, 1544.2679154009513, 
1587.701634445912, 1505.5825836246577, 1438.089444550427, 2269.262488718444, 
2424.355173847105, 2171.908439570166, 3902.3046107570262, 1674.5334869965232], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 41%|████      | 409998/1000000 [7:40:00<7:51:21, 20.86it/s]global step 410000, trans_decision ep_re 1774.703548201509

{"global_step": 410000, "eval_re": [1832.1494262164526, 1642.829280386784, 
2111.2147567772886, 2140.8955594319227, 1962.7490830375266, 1984.3104726208426, 
1275.1442159892, 1295.4560416360987, 1750.2325806876565, 1752.0540652313157], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 42%|████▏     | 419998/1000000 [7:51:20<7:45:26, 20.77it/s]global step 420000, trans_decision ep_re 1837.2963593002157

{"global_step": 420000, "eval_re": [1506.3712076550123, 2164.4047801769034, 
1451.3341823509174, 1537.914528705071, 1432.5076199294758, 1753.9852463208072, 
1411.7327213038284, 428.0787003757367, 3347.474491943448, 3339.160114240955], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 43%|████▎     | 429999/1000000 [8:02:50<7:40:30, 20.63it/s]global step 430000, trans_decision ep_re 1670.4325631495035

{"global_step": 430000, "eval_re": [2637.952514447557, 1964.8240585240344, 
1409.5852475166678, 1701.189376841708, 1499.2766023075046, 1709.4526619413216, 
1588.6269103156708, 1325.2813412934222, 1473.433835744239, 1394.7030825629095], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 44%|████▍     | 439999/1000000 [8:14:10<7:36:10, 20.46it/s]global step 440000, trans_decision ep_re 1929.301730470585

{"global_step": 440000, "eval_re": [1411.7034098634822, 2571.9882095007665, 
1589.2105948496242, 1367.3899982139521, 2185.7878623832307, 1392.2017341947094, 
1936.8369679642935, 2831.986425104926, 2009.008825840049, 1996.9032767908152], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 45%|████▍     | 449999/1000000 [8:25:30<7:18:24, 20.91it/s]global step 450000, trans_decision ep_re 2018.0307728317482

{"global_step": 450000, "eval_re": [1737.1232746181568, 1377.8664582461258, 
1380.2003117170666, 1620.6962531858817, 1917.5904722362466, 2762.1357910707898, 
1045.4147875215665, 1991.7373465953137, 4231.632382091963, 2115.910651034371], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 46%|████▌     | 459999/1000000 [8:36:50<7:14:52, 20.70it/s]global step 460000, trans_decision ep_re 1443.412597025433

{"global_step": 460000, "eval_re": [1283.2483751004445, 1492.9428413741152, 
2.875002385106532, 1678.4129306394384, 1295.1918544744801, 1909.9782302007986, 
1810.9924037975184, 1882.7811068169856, 1699.9735317641803, 1377.7296937012604],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 47%|████▋     | 469999/1000000 [8:48:00<7:03:02, 20.88it/s]global step 470000, trans_decision ep_re 1753.8183200399967

{"global_step": 470000, "eval_re": [1702.82461423097, 1420.5010945425263, 
1535.6453429817998, 1277.7931822616752, 1655.6853440380476, 1704.5054348771644, 
2628.0429178576915, 1915.183580550221, 1411.397855143066, 2286.603833916804], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 48%|████▊     | 479998/1000000 [8:59:20<6:51:27, 21.06it/s]global step 480000, trans_decision ep_re 2485.3444508275666

{"global_step": 480000, "eval_re": [2316.2295543929854, 1989.1130036298657, 
1961.756794366728, 4107.316002996651, 1698.9972245744314, 4025.257086164691, 
1808.1439250122462, 1452.8844808144197, 2001.2213740385755, 3492.525062285072], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 49%|████▉     | 489999/1000000 [9:10:40<6:53:06, 20.58it/s]global step 490000, trans_decision ep_re 1928.4265563915665

{"global_step": 490000, "eval_re": [1468.1422229857933, 1621.828342550446, 
1748.3582232771116, 1711.3405680251997, 2044.540231098419, 2091.587209415624, 
1631.152422243232, 2793.5586158061546, 1364.0782836451017, 2809.6794448685846], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 50%|████▉     | 499998/1000000 [9:22:00<6:39:23, 20.87it/s]global step 500000, trans_decision ep_re 1655.741842707713

{"global_step": 500000, "eval_re": [2354.4440000221553, 1367.0474672785879, 
1361.2505915478034, 1822.5113530399474, 1503.3697929754533, 1610.1322951630007, 
1774.9946344635898, 1807.6347803434053, 1581.7048531943278, 1374.3286590488583],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 51%|█████     | 509999/1000000 [9:33:30<6:34:54, 20.68it/s]global step 510000, trans_decision ep_re 1475.3251932611454

{"global_step": 510000, "eval_re": [2282.8508488079397, 1386.9372326921048, 
1351.2464585683229, 1477.190648489488, -24.724722640369663, 1713.62561821538, 
1405.0506763257004, 1743.2140530954546, 1769.0569909830099, 1648.8041280744235],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 52%|█████▏    | 519998/1000000 [9:44:50<6:22:28, 20.92it/s]global step 520000, trans_decision ep_re 1774.7554597329079

{"global_step": 520000, "eval_re": [1739.8852912011648, 1774.8590125980272, 
2268.158997840532, 2949.2552914777743, 1489.116434567084, 1491.0261764540414, 
1415.1364052450078, 1470.2608209990974, 1637.2622467979202, 1512.5939201484293],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 53%|█████▎    | 529999/1000000 [9:56:10<6:18:23, 20.70it/s]global step 530000, trans_decision ep_re 1596.1449412454288

{"global_step": 530000, "eval_re": [1640.734459206925, 1765.307108439688, 
841.7418017043983, 1733.931107743159, 1626.3124705001303, 1555.1129608710073, 
1681.6193380102516, 1771.2483687311994, 1703.6933828993774, 1641.7484143481515],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 54%|█████▍    | 539999/1000000 [10:07:20<6:12:33, 20.58it/s]global step 540000, trans_decision ep_re 1645.7654969515122

{"global_step": 540000, "eval_re": [2436.2085255231987, 1507.1756446303068, 
1325.0072185395973, 1461.3766702887053, 1770.9258522442237, 2577.876697247122, 
1921.9380233866339, 605.7981579875963, 758.123271757966, 2093.224907909774], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 55%|█████▍    | 549998/1000000 [10:18:40<5:56:48, 21.02it/s]global step 550000, trans_decision ep_re 1941.1092053096477

{"global_step": 550000, "eval_re": [2303.0835490411714, 2138.159095044073, 
2245.565063259952, 1342.5273308312712, 1668.9528058783353, 2383.6072993206326, 
2239.8619259882985, 1548.5274776531405, 1342.2837177083713, 2198.5237883712284],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 56%|█████▌    | 559999/1000000 [10:30:00<5:47:40, 21.09it/s]global step 560000, trans_decision ep_re 1533.553224029953

{"global_step": 560000, "eval_re": [1584.610372116456, 1437.336614833846, 
1422.2456372949612, 1403.0525212941511, 1606.3503691275669, 1494.478861866635, 
1400.8757845033062, 1499.494529578739, 1581.17490811279, 1905.9126415710791], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 57%|█████▋    | 569998/1000000 [10:41:20<5:40:37, 21.04it/s]global step 570000, trans_decision ep_re 2180.7981645863915

{"global_step": 570000, "eval_re": [1739.3255469866551, 2586.1194685181054, 
2674.2526471221104, 2539.214150281085, 3206.7806267845585, 2363.8086196535155, 
3001.2467686481837, 93.25572308508764, 1792.5433007330178, 1811.434794051594], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 58%|█████▊    | 579998/1000000 [10:52:40<5:33:37, 20.98it/s]global step 580000, trans_decision ep_re 1936.683712226291

{"global_step": 580000, "eval_re": [1488.6972745357907, 1510.8799906557654, 
1671.7274536751288, 3404.017459085681, 2516.252461838909, 1383.0423102681737, 
1651.540354280975, 1364.9781187846243, 1759.332347702004, 2616.3693514358556], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 59%|█████▉    | 589999/1000000 [11:04:10<5:27:38, 20.86it/s]global step 590000, trans_decision ep_re 1836.222678414399

{"global_step": 590000, "eval_re": [1839.2673851782615, 2084.187626054933, 
1371.9029346254183, 1453.7744807129832, 1581.7374682672405, 1798.9369350739416, 
1580.7491325712938, 1955.948027922132, 3323.950103434174, 1371.7726903036125], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 60%|█████▉    | 599999/1000000 [11:15:30<5:28:01, 20.32it/s]global step 600000, trans_decision ep_re 1770.0586610545513

{"global_step": 600000, "eval_re": [1845.3553803569562, 1625.2061759041076, 
1370.7958751377466, 1521.7618282922476, 2028.2937196634073, 2032.1015148475472, 
2278.58347562384, 1400.9285331863248, 1519.2686656877943, 2078.2914418455416], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 61%|██████    | 609997/1000000 [11:26:40<5:14:58, 20.64it/s]global step 610000, trans_decision ep_re 1473.9602987227174

{"global_step": 610000, "eval_re": [1247.6273621722557, 1559.7430991727583, 
1380.4904681569826, 1366.507661169922, 1997.9921591185441, 1556.1096585433697, 
1549.6587199177047, 1345.2390715518495, 1398.2919404458737, 1337.942846977914], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 62%|██████▏   | 619998/1000000 [11:38:00<5:00:18, 21.09it/s]global step 620000, trans_decision ep_re 1681.0518006154434

{"global_step": 620000, "eval_re": [1321.6844510591443, 1720.5023506647076, 
1940.7284359962969, 1641.8940262755725, 1497.1193209383937, 1770.9270154731814, 
1521.5971106823424, 1267.265361457484, 2206.5794353646497, 1922.2204982426613], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 63%|██████▎   | 629999/1000000 [11:49:20<4:54:24, 20.95it/s]global step 630000, trans_decision ep_re 1503.9875352637516

{"global_step": 630000, "eval_re": [1327.2118794012547, 1452.1027961380278, 
1348.7680396720534, 1400.3650313461414, 1510.9484142483893, 1367.7874016114672, 
1539.1041200850946, 1451.7812389315488, 1647.7220955752073, 1994.0843356283324],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 64%|██████▍   | 639998/1000000 [12:00:40<4:40:06, 21.42it/s]global step 640000, trans_decision ep_re 1700.8647675114305

{"global_step": 640000, "eval_re": [1802.4084359002343, 1624.6326025000615, 
1650.8214069909209, 2069.97996621723, 1485.0216180544626, 1480.6065847854602, 
2050.7444997304256, 1580.4858346453454, 2067.244419171442, 1196.702307118723], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 65%|██████▍   | 649999/1000000 [12:12:00<4:40:28, 20.80it/s]global step 650000, trans_decision ep_re 1898.786206734531

{"global_step": 650000, "eval_re": [2460.049928311978, 1687.4523937803624, 
2045.3699593211059, 1686.1525174494561, 2593.7824325239844, 1935.526872325499, 
1435.906462962736, 1469.2546016909603, 2209.922969949366, 1464.4439290298635], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 66%|██████▌   | 659999/1000000 [12:23:10<4:32:21, 20.81it/s]global step 660000, trans_decision ep_re 1775.4548454296444

{"global_step": 660000, "eval_re": [2269.0035145494617, 1409.5118679143625, 
1838.4522533280863, 1980.4591009657279, 1855.695579382458, 1573.654460554979, 
1781.3163911967863, 1454.0851193406768, 1397.8192193463556, 2194.5509477175506],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 67%|██████▋   | 669998/1000000 [12:34:30<4:24:22, 20.80it/s]global step 670000, trans_decision ep_re 1580.1404185824185

{"global_step": 670000, "eval_re": [1455.639299167366, 1639.3882036294117, 
1879.062859935553, 1593.1684320299546, 1552.9199040384092, 1666.4615753122357, 
1479.9853939297595, 1806.778376947425, 1327.566080409704, 1400.434060424366], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 68%|██████▊   | 679999/1000000 [12:45:50<4:18:28, 20.63it/s]global step 680000, trans_decision ep_re 1903.514919483945

{"global_step": 680000, "eval_re": [1631.571197495317, 1605.4201497906824, 
1950.9697141322742, 2130.8957904835556, 1439.1195739719578, 2350.2971876448505, 
1761.4136141219649, 2268.8766186096486, 1648.4399279346649, 2248.1454206545363],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 69%|██████▉   | 689998/1000000 [12:57:10<4:08:59, 20.75it/s]global step 690000, trans_decision ep_re 2088.1655267519054

{"global_step": 690000, "eval_re": [2001.6268507329237, 1456.9627493805185, 
2822.5863027972596, 1976.3059283109033, 2544.5883178581826, 2567.366816357497, 
1569.7295603584853, 1666.1718411730458, 1522.7394397386593, 2753.5774608115794],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 70%|██████▉   | 699999/1000000 [13:08:40<3:57:54, 21.02it/s]global step 700000, trans_decision ep_re 1461.6825102373473

{"global_step": 700000, "eval_re": [1179.866399772872, 1401.3898532669587, 
1486.1092456630192, 1510.4612332473182, 1431.3082718732032, 1533.2718534600651, 
1445.3568510581024, 1755.368385280914, 1491.3350177925129, 1382.3579909585064], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 71%|███████   | 709999/1000000 [13:19:50<3:54:34, 20.61it/s]global step 710000, trans_decision ep_re 1799.9313889729306

{"global_step": 710000, "eval_re": [1574.8986057158068, 1859.793863913953, 
1633.6135198418212, 1882.8107730511879, 1661.4323793878648, 1450.6037401390645, 
2758.159660518493, 1423.0140150446787, 2266.2360203344233, 1488.751311782017], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 72%|███████▏  | 719999/1000000 [13:31:10<3:43:23, 20.89it/s]global step 720000, trans_decision ep_re 1541.5812061844413

{"global_step": 720000, "eval_re": [374.9276605064906, 1440.8267724771415, 
1581.9486068559447, 1453.2621934417689, 1636.8990988716691, 1918.6649420486556, 
1575.4852158719343, 1753.9466023077146, 2303.7972391066246, 1376.0537303564683],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 73%|███████▎  | 729999/1000000 [13:42:30<3:37:23, 20.70it/s]global step 730000, trans_decision ep_re 1848.8350580315869

{"global_step": 730000, "eval_re": [2280.6508681182227, 1805.6306378887543, 
1188.9730088481092, 1580.5945951312463, 1695.058033019521, 2469.4997304195126, 
1981.2752237238492, 1577.2716607454342, 1852.9431518423787, 2056.4536705788405],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 74%|███████▍  | 739998/1000000 [13:53:50<3:23:42, 21.27it/s]global step 740000, trans_decision ep_re 1394.6847009940825

{"global_step": 740000, "eval_re": [1457.1343778160058, 307.7600430912867, 
1570.2858577713966, 1649.1797282334512, 1434.487895904545, 1843.2725941666215, 
1542.7745036146907, 1277.7636742259726, 1537.745541085991, 1326.4427940308642], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 75%|███████▍  | 749999/1000000 [14:05:20<3:20:59, 20.73it/s]global step 750000, trans_decision ep_re 1740.9186550069069

{"global_step": 750000, "eval_re": [2457.738551217679, 2508.124216719139, 
1847.7046285180083, 1594.9030425266874, 1689.8926967747293, 2103.1694453527584, 
1532.3946368010784, 87.43253719882651, 2025.0429043252732, 1562.7838906348868], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 76%|███████▌  | 759999/1000000 [14:16:30<3:12:20, 20.80it/s]global step 760000, trans_decision ep_re 1600.02675784559

{"global_step": 760000, "eval_re": [1490.9049925803345, 1839.6238326477887, 
1564.701303971516, 1568.3876301167163, 1494.1636062125083, 1781.2425470432895, 
1589.508890404153, 1398.4893843517036, 1818.867616060255, 1454.3777750676338], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 77%|███████▋  | 769998/1000000 [14:27:50<3:03:32, 20.89it/s]global step 770000, trans_decision ep_re 1601.200243108456

{"global_step": 770000, "eval_re": [2230.0317295669383, 1287.1423762404215, 
1580.7291241678106, 1416.4642464787464, 1677.560078630132, 1444.0299265325157, 
1852.137988997321, 1738.6584347837927, 1460.6077121139747, 1324.640813572906], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 78%|███████▊  | 779998/1000000 [14:39:10<2:53:30, 21.13it/s]global step 780000, trans_decision ep_re 1581.6181211958292

{"global_step": 780000, "eval_re": [1337.8575240634405, 1580.8777331679748, 
1385.7068987103096, 1270.227319913506, 1666.6980697945799, 2104.6649248373537, 
1554.554738480989, 1614.227084532722, 1409.011107610568, 1892.3558108468483], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 79%|███████▉  | 789999/1000000 [14:50:30<2:51:01, 20.47it/s]global step 790000, trans_decision ep_re 1851.6089433881014

{"global_step": 790000, "eval_re": [1605.5914208630436, 1923.2795028726862, 
1722.091721565369, 2030.8257066741141, 1465.757981599081, 1778.858706239769, 
1617.6899637015538, 2097.307076110452, 1536.620311211061, 2738.0670430438813], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 80%|███████▉  | 799999/1000000 [15:01:51<2:39:38, 20.88it/s]global step 800000, trans_decision ep_re 1801.1500260863274

{"global_step": 800000, "eval_re": [1737.1611685916362, 1704.490827712378, 
1560.784427745917, 1915.0453196187593, 2590.810328960732, 1900.734108179473, 
1655.8328650931983, 1587.015510403025, 1862.8377900856228, 1496.7879144725352], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 81%|████████  | 809999/1000000 [15:13:21<2:31:42, 20.87it/s]global step 810000, trans_decision ep_re 1973.278721956621

{"global_step": 810000, "eval_re": [2199.388272969243, 2088.9128266184835, 
1889.5891518095614, 1932.9274116461033, 1429.092465981594, 2125.160541018256, 
2102.354455423781, 1983.226332572972, 1679.0451398128741, 2303.0906217133384], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 82%|████████▏ | 819999/1000000 [15:24:41<2:23:45, 20.87it/s]global step 820000, trans_decision ep_re 1901.6928834543505

{"global_step": 820000, "eval_re": [1367.846301796284, 1737.1924429594353, 
2297.042052861157, 2483.817588724862, 1978.1089645087934, 2291.3846898296856, 
1797.6928663803963, 1330.112111351105, 1805.2936660348337, 1928.4381500969512], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 83%|████████▎ | 829998/1000000 [15:36:01<2:15:42, 20.88it/s]global step 830000, trans_decision ep_re 1791.1260946781572

{"global_step": 830000, "eval_re": [1966.2378343838006, 1736.0757709295517, 
2022.2087334728758, 2111.1354503479583, 1571.817631034623, 1793.9243736220417, 
1882.4436147289862, 1838.9442624002227, 2034.2768805004296, 954.1963953610787], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 84%|████████▍ | 839998/1000000 [15:47:21<2:06:29, 21.08it/s]global step 840000, trans_decision ep_re 1942.2256794201414

{"global_step": 840000, "eval_re": [2204.5691512394656, 1660.6403882499521, 
2709.199247926576, 2111.66216242521, 1567.3439457121451, 1709.171992289693, 
1854.852341546099, 2094.918772623868, 1836.2020680730625, 1673.6967241153434], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 85%|████████▍ | 849999/1000000 [15:58:41<1:58:57, 21.02it/s]global step 850000, trans_decision ep_re 1857.212370449047

{"global_step": 850000, "eval_re": [1874.4624743452325, 1795.2518402299406, 
2902.2594410737693, 1981.7513534287104, 1602.1884105194636, 1485.3291600821526, 
1529.6738476487565, 2021.5117105188147, 1387.7943247265302, 1991.901141917099], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 86%|████████▌ | 859998/1000000 [16:10:01<1:51:50, 20.86it/s]global step 860000, trans_decision ep_re 1989.5533011345117

{"global_step": 860000, "eval_re": [1909.596526456934, 2103.479103784808, 
2502.0165223850713, 1565.0984493725027, 2392.4980501932428, 2265.78388160027, 
1460.2392253679423, 1594.2676024021487, 2408.39742439505, 1694.1562253871439], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 87%|████████▋ | 869999/1000000 [16:21:21<1:42:51, 21.07it/s]global step 870000, trans_decision ep_re 1349.4428850144789

{"global_step": 870000, "eval_re": [1648.6062651225275, 337.68881538336603, 
1508.0864273592254, 1514.1869120011122, 1461.5391253919413, 836.1674081631098, 
1421.4191663794882, 1414.3601556447247, 1769.4458869114062, 1582.9286877878867],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 88%|████████▊ | 879997/1000000 [16:32:41<1:36:05, 20.81it/s]global step 880000, trans_decision ep_re 1708.6229844313846

{"global_step": 880000, "eval_re": [1554.6890093363043, 1385.3550204150818, 
1417.6309816897876, 1383.5340603161312, 2295.5450830169484, 1969.9095270965536, 
1451.2986199299053, 2053.869890900767, 1372.9943927982686, 2201.4032588140985], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 89%|████████▉ | 889999/1000000 [16:44:01<1:28:12, 20.78it/s]global step 890000, trans_decision ep_re 2162.167242394251

{"global_step": 890000, "eval_re": [2651.383705114173, 2337.5021121216932, 
2129.6394354882095, 2231.8948431170497, 2648.235145687742, 2183.9064672995405, 
1470.4859689601703, 1981.5349808494295, 2022.3922231328718, 1964.6975421716306],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 90%|████████▉ | 899999/1000000 [16:55:21<1:19:31, 20.96it/s]global step 900000, trans_decision ep_re 1812.6085634528529

{"global_step": 900000, "eval_re": [1722.715805559092, 2416.3665728546102, 
1892.6651857856648, 1607.898533602685, 2270.485186032254, 1420.5878202916697, 
1529.2570040816122, 1384.2873379164216, 2301.069450648657, 1580.7527377558615], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 91%|█████████ | 909999/1000000 [17:06:41<1:12:29, 20.69it/s]global step 910000, trans_decision ep_re 2131.373943462089

{"global_step": 910000, "eval_re": [2074.041841320281, 1694.3693304345506, 
2361.718653609272, 2667.8955394972295, 2289.4799984902274, 2121.885634387729, 
1570.624979094334, 2200.7771665955247, 1796.771218275561, 2536.1750729161818], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 92%|█████████▏| 919999/1000000 [17:18:01<1:03:50, 20.88it/s]global step 920000, trans_decision ep_re 1769.9481383569673

{"global_step": 920000, "eval_re": [1907.6711944003312, 1361.6155155138297, 
1765.3405218822616, 1512.8424959603067, 1726.9555332818406, 2262.9482895150495, 
2421.432887094508, 1279.7741228989028, 1992.969636039951, 1467.9311869826934], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 93%|█████████▎| 929999/1000000 [17:29:21<55:59, 20.84it/s]global step 930000, trans_decision ep_re 1794.4942277811442

{"global_step": 930000, "eval_re": [1364.776182126189, 1878.94748948692, 
1837.7260473777694, 1570.2101311673682, 1743.1173359077488, 1531.6139079034078, 
1436.2387272681824, 2482.6049832677395, 1866.0643807446404, 2233.6430925614773],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 94%|█████████▍| 939999/1000000 [17:40:41<48:51, 20.46it/s]global step 940000, trans_decision ep_re 1769.050975986324

{"global_step": 940000, "eval_re": [1705.8166288507532, 1793.0345568638293, 
1724.3804245493209, 2100.8066270308695, 1788.2367360684439, 1878.2411885744464, 
1654.1219915111726, 1536.5544941005662, 1469.451868403328, 2039.8652439105092], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 95%|█████████▍| 949998/1000000 [17:52:01<41:25, 20.12it/s]global step 950000, trans_decision ep_re 1645.2018948550976

{"global_step": 950000, "eval_re": [1624.3945587410633, 1475.3225144420248, 
1529.7846901089147, 2045.0936789072746, 1682.6175372954274, 1636.7211688106127, 
1360.8456078574247, 1435.2359406063401, 1564.1196669998217, 2097.8835847820724],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 96%|█████████▌| 959999/1000000 [18:03:41<32:20, 20.61it/s]global step 960000, trans_decision ep_re 1437.745601876692

{"global_step": 960000, "eval_re": [1609.8309040613892, -50.6546167300821, 
1922.9151883455993, 1595.7235198320236, 1519.9084655313989, 1570.5933662239747, 
1638.3045376038442, 1408.3827259256088, 1461.7211137551992, 1700.7308142179666],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 97%|█████████▋| 969999/1000000 [18:15:01<24:16, 20.60it/s]global step 970000, trans_decision ep_re 1897.5100035270302

{"global_step": 970000, "eval_re": [2481.115403665638, 1366.9300910577572, 
1736.1131397357476, 2462.109077848321, 2003.8281588221805, 2075.5504044663116, 
1519.1306024754285, 1748.3060677703727, 1838.9197179624073, 1743.0973714661393],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 98%|█████████▊| 979999/1000000 [18:26:21<16:01, 20.81it/s]global step 980000, trans_decision ep_re 1637.7608763916126

{"global_step": 980000, "eval_re": [1361.3287073573547, 1556.2211693788768, 
1243.373610236296, 2031.7131883426223, 1596.5109998188518, 1522.2038084039282, 
1569.8447195996907, 1729.5586310861784, 1753.9787853630046, 2012.875144329322], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 99%|█████████▉| 989999/1000000 [18:37:51<08:04, 20.64it/s]global step 990000, trans_decision ep_re 1837.1245768633933

{"global_step": 990000, "eval_re": [1839.8465323596768, 1633.144027244159, 
1674.5740336457932, 1671.4189299670838, 1486.0783959993946, 2103.0790299955584, 
2498.4837731227217, 1880.2779434468873, 1689.3422292752791, 1895.0008735773788],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|█████████▉| 999999/1000000 [18:49:11<00:00, 20.71it/s]global step 1000000, trans_decision ep_re 1634.0808794015343

{"global_step": 1000000, "eval_re": [2006.2907590112104, 1361.6958272257864, 
1554.5485448149398, 1791.6398423395099, 1541.0360724462137, 1681.3753411262333, 
1692.9941284246204, 1588.8499975922127, 1488.6556015633491, 1633.7226794712674],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|██████████| 1000000/1000000 [18:49:44<00:00, 14.75it/s]
