
{
    'exp_name': 'VDPO',
    'env': 'HalfCheetah-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 32,
    'delayspec': 'markov(4, 32, [[249, 1], [1, 31]])',
    'noise': 0.1
}
✓ setup
Created Delay Process: Markovian(ConstantDelay4, ConstantDelay32, [[0.996, 
0.004], [0.03125, 0.96875]])
  1%|          | 9998/1000000 [05:30<12:37:16, 21.79it/s]global step 10000, trans_decision ep_re -138.32217160462784

{"global_step": 10000, "eval_re": [-115.48394878248592, -113.45257918660707, 
-141.7301648336795, -145.31059849138254, -132.58754098289452, 
-141.57581120015791, -174.9883433623198, -96.09065150918397, -161.0857893718389,
-160.9162883257281], "eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 
1000, 1000, 1000]}

  2%|▏         | 19999/1000000 [16:30<12:38:32, 21.53it/s]global step 20000, trans_decision ep_re 150.71898565776806

{"global_step": 20000, "eval_re": [192.64413559255578, 159.65745926768952, 
148.31856639255443, 143.16634841922067, 89.48549098599774, 172.8159581898157, 
170.34859356861108, 148.27097206693767, 141.60018498424185, 140.88214711005597],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  3%|▎         | 29999/1000000 [27:20<12:28:35, 21.60it/s]global step 30000, trans_decision ep_re 1151.7281261073194

{"global_step": 30000, "eval_re": [1027.9178949917434, 864.1790726398291, 
1213.631054362162, 1106.013744281572, 911.4136057472105, 1712.8173073344033, 
1254.714608719221, 1212.4115461401025, 1265.7412928215945, 948.4411340353556], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  4%|▍         | 39999/1000000 [38:20<12:24:30, 21.49it/s]global step 40000, trans_decision ep_re 1222.7754344718446

{"global_step": 40000, "eval_re": [1275.8351220542502, 1169.1437035467948, 
1107.9494169212753, 1287.0002291132412, 1128.1420802180685, 1250.1939406375443, 
1292.2979174802667, 942.1881785626475, 1619.4188936861653, 1155.5848624981925], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  5%|▍         | 49998/1000000 [49:20<12:17:22, 21.47it/s]global step 50000, trans_decision ep_re 1105.9226291316277

{"global_step": 50000, "eval_re": [943.103379029393, 922.4798601279979, 
1093.4151127844918, 836.0965777772295, 961.0114811950391, 869.2778357368577, 
1448.5334758015304, 1402.3344252918162, 1551.7081481066878, 1031.2659954652331],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  6%|▌         | 59998/1000000 [1:00:20<12:02:16, 21.69it/s]global step 60000, trans_decision ep_re 1201.2426372082982

{"global_step": 60000, "eval_re": [1184.1191271838304, 1039.13975994692, 
1663.7561145845202, 1434.3561736745535, 19.334599052672623, 1101.9606901448003, 
1256.7055322179153, 1178.9080537498846, 1504.3800015743377, 1629.766319953547], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  7%|▋         | 69999/1000000 [1:11:20<11:57:52, 21.59it/s]global step 70000, trans_decision ep_re 1087.269719875418

{"global_step": 70000, "eval_re": [1074.1836342236654, 1101.1065575225657, 
1048.5992579908814, 1031.118526678615, 1169.2077527682816, 1224.5404258970077, 
921.6166953854387, 1139.804120981821, 929.7216881961973, 1232.7985391097072], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  8%|▊         | 79998/1000000 [1:22:20<11:46:22, 21.71it/s]global step 80000, trans_decision ep_re 1281.833029385618

{"global_step": 80000, "eval_re": [1012.5945681458679, 1123.7846946791167, 
1339.0364751569819, 1416.5209755951303, 1605.111925648795, 1300.1840041013004, 
1276.7245736916539, 1184.2010418864488, 1433.4377698354915, 1126.7342651153929],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  9%|▉         | 89999/1000000 [1:33:20<11:44:38, 21.52it/s]global step 90000, trans_decision ep_re 1583.1822895880318

{"global_step": 90000, "eval_re": [1582.7171140475568, 1280.3223714192866, 
1254.273575524228, 1679.7289989822953, 1571.6612132840785, 1215.0928518086405, 
1555.561018185116, 2151.907634942479, 1858.0229827262833, 1682.5351349603568], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 10%|▉         | 99999/1000000 [1:44:10<11:42:11, 21.36it/s]global step 100000, trans_decision ep_re 1295.0099877759435

{"global_step": 100000, "eval_re": [1669.7256051516965, 1501.3311533751894, 
1134.7445598329978, 1131.6620966571534, 1163.7083853995634, 1325.3287380988215, 
1117.0131156273872, 1234.7590593220773, 1299.599632289644, 1372.2275320049043], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 11%|█         | 109999/1000000 [1:55:20<11:31:03, 21.46it/s]global step 110000, trans_decision ep_re 1459.1176104704105

{"global_step": 110000, "eval_re": [1435.0992905930852, 2221.1971201615447, 
965.8515944527827, 1072.2617840759744, 2066.149920427742, 1639.0727881790417, 
2092.942513190151, 1097.5523276521328, 896.0093300557711, 1105.0394359158806], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 12%|█▏        | 119999/1000000 [2:06:10<11:25:49, 21.39it/s]global step 120000, trans_decision ep_re 1489.7516982767274

{"global_step": 120000, "eval_re": [1357.9753459363346, 1216.8768253988756, 
1158.7626230228504, 2390.501321460819, 1578.0914981021924, 793.8800925703019, 
1361.6678272752129, 993.346531023603, 2667.63237586614, 1378.782542110944], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 13%|█▎        | 129997/1000000 [2:17:10<11:14:25, 21.50it/s]global step 130000, trans_decision ep_re 1702.1179936367728

{"global_step": 130000, "eval_re": [2263.1097764380556, 2218.5324358711828, 
1125.5547898491666, 1344.975996341879, 2575.144568304935, 1495.1490000202539, 
1685.487342453304, 1195.817084543503, 1951.706800144333, 1165.7021424011136], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 14%|█▍        | 139997/1000000 [2:28:10<11:14:52, 21.24it/s]global step 140000, trans_decision ep_re 1243.416233802132

{"global_step": 140000, "eval_re": [1547.0255068516879, 1400.6732617258322, 
311.6036821772722, 1027.1255408945908, 1611.3890191631115, 1366.2388415712994, 
1205.5321637062093, 1187.5134837741252, 1118.4900124576338, 1658.5708256995567],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 15%|█▍        | 149998/1000000 [2:39:10<11:01:40, 21.41it/s]global step 150000, trans_decision ep_re 1649.647731356116

{"global_step": 150000, "eval_re": [2201.4419357094202, 1740.684083417912, 
1972.5667224945312, 2142.644429060493, 1577.0848235449919, 1990.7719357001295, 
1311.0693419044835, 1410.8087260068583, 1015.8041398293894, 1133.6011758929512],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 16%|█▌        | 159998/1000000 [2:50:10<10:45:08, 21.70it/s]global step 160000, trans_decision ep_re 1320.8046288166097

{"global_step": 160000, "eval_re": [1103.595475665642, 1767.6067712556244, 
1168.8092670686037, 1315.0904976196405, 1186.067489688561, 1153.4371938984937, 
1166.8895073110489, 1478.2371038577257, 1365.9323361276345, 1502.3806456731236],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 17%|█▋        | 169998/1000000 [3:01:10<10:35:29, 21.77it/s]global step 170000, trans_decision ep_re 1427.3683277151144

{"global_step": 170000, "eval_re": [1438.7629996367566, 1828.7024837604308, 
1136.0285712385441, 1194.916301872175, 2302.021587311008, 1184.304853220561, 
1325.8797768938982, 1193.998461859101, 1720.7245541748516, 948.3436871838192], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 18%|█▊        | 179999/1000000 [3:12:10<10:30:26, 21.68it/s]global step 180000, trans_decision ep_re 1583.8772893300206

{"global_step": 180000, "eval_re": [488.9002489486509, 1625.7363641753752, 
1643.188998739651, 1337.4081129557728, 1314.2032504461783, 2091.3965121905353, 
1656.5302261089982, 2081.980760044417, 1400.1611131349246, 2199.267306555706], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 19%|█▉        | 189999/1000000 [3:23:10<10:33:32, 21.31it/s]global step 190000, trans_decision ep_re 1557.1990160815774

{"global_step": 190000, "eval_re": [1687.701054527642, 2487.4991676191958, 
2198.4990517437636, 1122.374574622093, 1296.1546900047315, 1366.3626710024653, 
1405.7746645948405, 1493.7825587277055, 1351.2248941947532, 1162.6168337785844],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 20%|█▉        | 199999/1000000 [3:34:00<10:17:19, 21.60it/s]global step 200000, trans_decision ep_re 1923.4960523633988

{"global_step": 200000, "eval_re": [1871.2089994174885, 2537.1526931613325, 
2484.124897089522, 1984.1617497174125, 1849.7548247777659, 1197.124464689247, 
2159.102662468071, 1606.8097205088438, 1716.3780611816615, 1829.1424506226444], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 21%|██        | 209998/1000000 [3:45:00<9:56:14, 22.08it/s]global step 210000, trans_decision ep_re 1795.5073639458813

{"global_step": 210000, "eval_re": [2639.7736542894854, 1954.6177506311603, 
1075.2409563956774, 1084.3091632652038, 1143.7326357728348, 2201.632035273632, 
1832.2411388851563, 1585.785659527568, 2879.759832281397, 1557.9808131366974], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 22%|██▏       | 219997/1000000 [3:55:50<10:01:56, 21.60it/s]global step 220000, trans_decision ep_re 1454.8271185729702

{"global_step": 220000, "eval_re": [1493.5638335559638, 1371.8505967519257, 
1324.709523897517, 1799.94399387733, 1275.1450814692096, 1200.540595622859, 
1293.9864805100258, 1337.805598428285, 1643.4862433996807, 1807.2392382169062], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 23%|██▎       | 229999/1000000 [4:06:50<9:58:27, 21.44it/s]global step 230000, trans_decision ep_re 1509.6231635577205

{"global_step": 230000, "eval_re": [1866.0020372999525, 1750.8352264560276, 
1359.303396079265, 1337.3238857353874, 1761.2064573270814, 1443.6685355355185, 
1565.8144980182392, 1369.9119312483735, 1461.3943341415438, 1180.771333735818], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 24%|██▍       | 239997/1000000 [4:17:40<9:48:31, 21.52it/s]global step 240000, trans_decision ep_re 1515.2957393535173

{"global_step": 240000, "eval_re": [1272.194506002457, 2216.4724222351397, 
1213.2700652364815, 2145.3403733191262, 1057.348258323088, 1456.837039447157, 
1280.991519150877, 1452.3860865449528, 1136.764414272954, 1921.3527090029409], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 25%|██▍       | 249999/1000000 [4:28:30<9:36:15, 21.69it/s]global step 250000, trans_decision ep_re 1571.7334503701973

{"global_step": 250000, "eval_re": [2062.635108658866, 1369.7178110529126, 
1090.5427172944603, 2508.8836801635116, 1708.967489272451, 1680.3971812191717, 
576.1352743313251, 2049.4116771303593, 1652.3717834888569, 1018.2717810900576], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 26%|██▌       | 259999/1000000 [4:39:30<9:35:05, 21.45it/s]global step 260000, trans_decision ep_re 1401.261356543321

{"global_step": 260000, "eval_re": [1104.2610045903423, 1603.1320126419841, 
1220.6026968189785, 991.8210358725155, 1302.348469327404, 1569.891618543234, 
2338.954848796467, 1381.1224737891946, 1329.5679102481984, 1170.9114948048896], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 27%|██▋       | 269999/1000000 [4:50:20<9:24:43, 21.54it/s]global step 270000, trans_decision ep_re 1310.2492408685966

{"global_step": 270000, "eval_re": [782.0098107153505, 1287.8312519585436, 
1424.4751247227598, 1281.601049152104, 895.5543784961477, 839.6087459790664, 
1120.2055506825252, 2156.6933168540554, 1314.8838548258288, 1999.6293252995863],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 28%|██▊       | 279999/1000000 [5:01:20<9:13:19, 21.69it/s]global step 280000, trans_decision ep_re 1189.999791294901

{"global_step": 280000, "eval_re": [1180.2661549212398, 1122.4889434100257, 
1516.0255758006062, 1404.9173724784441, 1068.0044820204507, 1109.0093345679136, 
1057.7997691191547, 1014.9186643877188, 1267.6043865319643, 1158.9632297114915],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 29%|██▉       | 289997/1000000 [5:12:10<9:02:02, 21.83it/s]global step 290000, trans_decision ep_re 1355.454548260726

{"global_step": 290000, "eval_re": [1246.6597029252016, 1855.2478244993713, 
1048.3747062186296, 1172.8958680693274, 2329.0903821351103, 1290.927056843755, 
1137.9099324017436, 1113.414133009031, 1116.1235207783425, 1243.9023557267467], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 30%|██▉       | 299999/1000000 [5:23:00<9:03:21, 21.47it/s]global step 300000, trans_decision ep_re 1553.2367059113562

{"global_step": 300000, "eval_re": [267.4394478446968, 2720.1522711887374, 
1404.6872385247643, 1594.5880411014805, 1701.2484685236095, 1238.041530623865, 
1510.3011348356563, 1101.4395986389009, 2659.9359543951578, 1334.5333734366939],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 31%|███       | 309997/1000000 [5:34:00<8:56:28, 21.44it/s]global step 310000, trans_decision ep_re 1466.7150236953166

{"global_step": 310000, "eval_re": [1572.3388602478258, 1231.324407577993, 
2718.5764307581558, 2515.252512886303, 1472.9249691051004, 1262.3552525051641, 
1231.5235826154046, 296.85588461755447, 1284.788946636144, 1081.2093900035213], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 32%|███▏      | 319999/1000000 [5:44:50<8:44:55, 21.59it/s]global step 320000, trans_decision ep_re 1343.1969342997504

{"global_step": 320000, "eval_re": [1495.6438670216787, 1626.0698733845618, 
1392.6764560663644, 1521.9970737769574, 1125.8652545367386, 1140.6007753762108, 
1194.1541875685175, 1379.8986287104844, 1105.5514685258333, 1449.5117580301585],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 33%|███▎      | 329998/1000000 [5:55:50<8:37:23, 21.58it/s]global step 330000, trans_decision ep_re 1710.231908541216

{"global_step": 330000, "eval_re": [1557.9301626414556, 1587.0723260724212, 
1761.6927624256782, 1230.055235045017, 2421.716804938268, 1475.2629939677681, 
2138.1877265696426, 1328.816620069835, 1921.7896725204675, 1679.7947811616057], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 34%|███▍      | 339999/1000000 [6:06:40<8:30:16, 21.56it/s]global step 340000, trans_decision ep_re 1460.6821573994107

{"global_step": 340000, "eval_re": [1947.5084622587538, 2071.7619509248834, 
1477.6110461163203, 1123.8621118149363, 968.2724872461506, 1206.631832688278, 
1184.1780024619572, 1190.8569315578272, 1672.7764114972417, 1763.3623374277593],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 35%|███▍      | 349999/1000000 [6:17:30<8:19:37, 21.68it/s]global step 350000, trans_decision ep_re 1363.677213035384

{"global_step": 350000, "eval_re": [1319.6751461920855, 1024.4614118378872, 
1666.55409845551, 1149.0964868247017, 1851.8752669319529, 1740.6150117020202, 
1231.729728877871, 1147.0768995169058, 1294.6761361651781, 1211.0119438497284], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 36%|███▌      | 359997/1000000 [6:28:30<8:10:43, 21.74it/s]global step 360000, trans_decision ep_re 1423.9959925853123

{"global_step": 360000, "eval_re": [1073.7451271875498, 1035.1344425362458, 
2062.2044127303175, 1642.7480193471083, 1665.2664579742548, 1448.4542179752134, 
1403.1544212413717, 2047.5245355701045, 1404.2824854590979, 457.44580583185797],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 37%|███▋      | 369999/1000000 [6:39:20<8:03:19, 21.72it/s]global step 370000, trans_decision ep_re 1389.9322274108076

{"global_step": 370000, "eval_re": [1086.48242398051, 1255.5797989445657, 
1683.8184657733639, 1849.206042529146, 1220.4114467079266, 1114.4265462053074, 
1155.7623110714624, 1171.8181860213374, 1534.795504746553, 1827.0215481279038], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 38%|███▊      | 379999/1000000 [6:50:20<7:58:11, 21.61it/s]global step 380000, trans_decision ep_re 1501.1612123585007

{"global_step": 380000, "eval_re": [1279.9262183173344, 2639.8643507327747, 
1083.022750627094, 1153.2380054368627, 1321.8878267606476, 1447.38738641818, 
2103.773597440626, 1028.7763016543515, 1285.7256955830542, 1668.0099906140838], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 39%|███▉      | 389998/1000000 [7:01:10<7:46:52, 21.78it/s]global step 390000, trans_decision ep_re 1353.558838199873

{"global_step": 390000, "eval_re": [2061.2071346410103, 1298.26494178343, 
1603.08352717478, 1293.1264042431085, 1108.3030607220849, 1130.5793272496962, 
1287.8366110072686, 1461.5485968289588, 1115.325531239317, 1176.3132471090755], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 40%|███▉      | 399999/1000000 [7:12:00<7:39:51, 21.75it/s]global step 400000, trans_decision ep_re 1267.5761809832475

{"global_step": 400000, "eval_re": [1017.8181212337167, 1104.0598934391837, 
1149.4267533965783, 1260.5241052352974, 1231.7993281288125, 1429.8622829081376, 
1408.9790469215975, 1153.4637559416553, 1808.0104042904618, 1111.8181183370348],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 41%|████      | 409999/1000000 [7:23:00<7:29:43, 21.87it/s]global step 410000, trans_decision ep_re 1507.2367333579225

{"global_step": 410000, "eval_re": [1466.5124625383312, 1259.8637918814882, 
1243.2224005841108, 2924.234408887611, 1105.0917092722061, 1179.8187894620307, 
1190.4443149619774, 1256.8486698214722, 1431.724889158718, 2014.605897011277], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 42%|████▏     | 419998/1000000 [7:33:50<7:21:18, 21.90it/s]global step 420000, trans_decision ep_re 1375.7420109396915

{"global_step": 420000, "eval_re": [1386.23600403268, 1117.4524870703126, 
1524.6999086718295, 1102.6435824028856, 1908.873710505397, 1041.7985322817883, 
1431.7397235709348, 1272.5465143020901, 1626.1451150434616, 1345.2845315155344],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 43%|████▎     | 429997/1000000 [7:44:40<7:17:17, 21.72it/s]global step 430000, trans_decision ep_re 1813.0015031441337

{"global_step": 430000, "eval_re": [1479.6697465723778, 1171.0701156972093, 
1393.874688113796, 2943.089294343171, 1420.027931147083, 1474.4086650329757, 
2442.949741841057, 1442.201936405689, 2339.6567237916606, 2023.066188496316], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 44%|████▍     | 439999/1000000 [7:55:40<7:13:03, 21.55it/s]global step 440000, trans_decision ep_re 1456.7822619754618

{"global_step": 440000, "eval_re": [1846.4135522214822, 2237.6349502484236, 
1149.3441659008117, 1575.3155826449843, 596.5678365483849, 1022.4272437655749, 
2419.901018620469, 1446.2481498194581, 1062.1643769358943, 1211.8057430491338], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 45%|████▍     | 449999/1000000 [8:06:30<7:05:53, 21.52it/s]global step 450000, trans_decision ep_re 1119.1597825097185

{"global_step": 450000, "eval_re": [831.7508483387162, 1250.4871907893182, 
978.2051796459157, 1002.0048236391373, 1234.1169177590816, 1165.71371403357, 
1200.1784089799287, 1087.104181821996, 1381.92567860971, 1060.1108814798124], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 46%|████▌     | 459999/1000000 [8:17:20<6:53:46, 21.75it/s]global step 460000, trans_decision ep_re 1378.6209916227974

{"global_step": 460000, "eval_re": [1204.66467499053, 1229.763321756592, 
1333.3145320875844, 1556.1000355267763, 1680.033354802955, 1146.305330547487, 
1762.3942127427676, 1324.3341812173596, 1344.4378500824234, 1204.8624224734972],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 47%|████▋     | 469999/1000000 [8:28:20<6:48:10, 21.64it/s]global step 470000, trans_decision ep_re 1335.5390151907245

{"global_step": 470000, "eval_re": [1285.61832286761, 1230.0972565765762, 
1135.7749341809138, 1363.124175453515, 1612.6830810927008, 1163.3040825536416, 
1247.797935974437, 1448.7069999057576, 1712.155992955103, 1156.1273703469915], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 48%|████▊     | 479999/1000000 [8:39:10<6:42:44, 21.52it/s]global step 480000, trans_decision ep_re 1454.0288536531766

{"global_step": 480000, "eval_re": [1781.2652343521884, 1677.7159719870542, 
1210.0721532275136, 1086.738197615097, 1460.1686593414513, 1952.8016123258906, 
1017.3578984256652, 1467.8974547445976, 1416.6521648621144, 1469.619189650192], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 49%|████▉     | 489998/1000000 [8:50:10<6:26:13, 22.01it/s]global step 490000, trans_decision ep_re 1714.0691024449934

{"global_step": 490000, "eval_re": [1852.7002449688198, 1778.641146196404, 
1908.8223209716846, 2374.5197401933838, 2252.875828458726, 1934.2378958222987, 
1452.493751304165, 1482.206109652963, 961.1387384116532, 1143.0552484698371], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 50%|████▉     | 499997/1000000 [9:01:00<6:25:19, 21.63it/s]global step 500000, trans_decision ep_re 1243.8301046501285

{"global_step": 500000, "eval_re": [1116.4133472164008, 1310.1826233515715, 
1292.2561426691864, 1595.9579182883513, 1160.2049747336466, 1218.320930790771, 
1017.3082430280184, 1008.7320500155491, 1487.7636909943528, 1231.161125413437], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 51%|█████     | 509999/1000000 [9:12:00<6:21:32, 21.40it/s]global step 510000, trans_decision ep_re 1310.2000992261333

{"global_step": 510000, "eval_re": [1164.534817897119, 1481.0525333117214, 
1219.2203683831008, 1203.680429096698, 1558.4750780216564, 1557.5521613072074, 
1106.161818825405, 1224.5086544460476, 1504.72282255053, 1082.0923084218487], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 52%|█████▏    | 519998/1000000 [9:22:50<6:01:45, 22.11it/s]global step 520000, trans_decision ep_re 1393.2048585612026

{"global_step": 520000, "eval_re": [1147.6781796904443, 1095.2207799971384, 
1163.919392066048, 1191.5732171172879, 2131.01505494383, 1187.5765204086786, 
1486.5974001969612, 2125.565260507548, 1250.7721456813872, 1152.1306350027037], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 53%|█████▎    | 529998/1000000 [9:33:40<5:56:56, 21.95it/s]global step 530000, trans_decision ep_re 1573.6398514025427

{"global_step": 530000, "eval_re": [1171.2706093015452, 1596.4501764634254, 
1578.578408782724, 2358.706750468608, 1386.8663869557442, 1078.0689295058214, 
1384.0115982748862, 1549.8090379489147, 1252.436154947886, 2380.2004613758745], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 54%|█████▍    | 539998/1000000 [9:44:30<5:47:37, 22.05it/s]global step 540000, trans_decision ep_re 1389.7438844556668

{"global_step": 540000, "eval_re": [1084.6722027568283, 1125.1374771403391, 
1412.0281194620438, 1560.0271422599617, 1707.3283641078021, 1112.1891890323725, 
1180.7819244341247, 1157.3136357282526, 1171.139750151896, 2386.821039483047], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 55%|█████▍    | 549999/1000000 [9:55:20<5:44:09, 21.79it/s]global step 550000, trans_decision ep_re 1406.472829713752

{"global_step": 550000, "eval_re": [1318.3079942705715, 1710.623931407405, 
1158.1172267727418, 1277.7819648641214, 981.8631751414671, 2622.30739889751, 
1575.1241882644351, 1245.0690683253551, 1076.325265538525, 1099.2080836553846], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 56%|█████▌    | 559999/1000000 [10:06:20<5:39:17, 21.61it/s]global step 560000, trans_decision ep_re 1654.3084864618882

{"global_step": 560000, "eval_re": [2628.2261838180866, 1761.608071370664, 
1688.5359503559441, 1784.9745967383838, 1289.190743187814, 2359.3370457251667, 
1076.743221478213, 1559.5524479163578, 1193.9726971043708, 1200.9439069238786], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 57%|█████▋    | 569998/1000000 [10:17:10<5:30:29, 21.69it/s]global step 570000, trans_decision ep_re 1305.7552802940154

{"global_step": 570000, "eval_re": [1104.2208224913284, 1279.36428659368, 
1233.3976537954156, 1142.3014784715676, 1786.5723549801078, 1237.9759841180073, 
1268.6539723982335, 1221.7379692957181, 1273.2688849522578, 1510.0593958438358],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 58%|█████▊    | 579999/1000000 [10:28:00<5:24:25, 21.58it/s]global step 580000, trans_decision ep_re 1663.6730671537698

{"global_step": 580000, "eval_re": [1194.1204324550192, 1592.4639839581973, 
1881.688703950203, 1197.249971218285, 1416.0413056718196, 1596.3675846275614, 
1596.3715940021561, 1579.727963851179, 2595.966724672331, 1986.7324071309458], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 59%|█████▉    | 589999/1000000 [10:38:50<5:15:35, 21.65it/s]global step 590000, trans_decision ep_re 1496.2266593276258

{"global_step": 590000, "eval_re": [2313.2409732752103, 1773.628982250355, 
1189.7204649880798, 1668.0579486281895, 1101.5372304320874, 1284.7932635201967, 
1431.4127630049334, 1023.8171037764031, 1125.7195588971035, 2050.338304503699], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 60%|█████▉    | 599998/1000000 [10:49:50<5:04:48, 21.87it/s]global step 600000, trans_decision ep_re 1560.724675903491

{"global_step": 600000, "eval_re": [1271.307994025968, 1771.5858865149603, 
1740.595723313152, 1212.8044735463945, 2743.1141407263603, 1729.4273576675062, 
1342.468272323151, 1193.1495246430447, 1129.1879310005183, 1473.6054552738547], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 61%|██████    | 609999/1000000 [11:00:40<5:00:15, 21.65it/s]global step 610000, trans_decision ep_re 1485.0789462378245

{"global_step": 610000, "eval_re": [1106.3481638579628, 1928.8397066030514, 
1471.2568958477727, 1044.273550128156, 2449.2504162996124, 1280.8013496196131, 
1732.8903385165486, 1563.3383909316806, 1145.8431997275788, 1127.9474508462674],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 62%|██████▏   | 619999/1000000 [11:11:30<4:52:08, 21.68it/s]global step 620000, trans_decision ep_re 1581.2768467741284

{"global_step": 620000, "eval_re": [1207.3252211659053, 1757.8122840364422, 
1120.7551073952263, 2402.995111410463, 1808.4580069952128, 1766.925204662707, 
1162.3782870495297, 1804.9529585973812, 1229.1108304846925, 1552.0554559437219],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 63%|██████▎   | 629999/1000000 [11:22:30<4:43:31, 21.75it/s]global step 630000, trans_decision ep_re 1272.8086327784645

{"global_step": 630000, "eval_re": [897.2940292875357, 1631.1766444648988, 
1335.5973856491385, 938.2146887936856, 1212.5474144189552, 1175.244964809998, 
1588.8148605871916, 983.3911850556535, 1122.9638953358356, 1842.8412593817511], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 64%|██████▍   | 639997/1000000 [11:33:20<4:36:37, 21.69it/s]global step 640000, trans_decision ep_re 1412.212351112097

{"global_step": 640000, "eval_re": [1477.9505290058191, 1580.838945080196, 
1233.8679336922464, 1183.3234545053153, 1531.3516526463713, 1975.2454983082357, 
991.0468273755708, 1146.7676089616168, 1554.8231898204838, 1446.9078717251155], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 65%|██████▍   | 649999/1000000 [11:44:10<4:28:44, 21.71it/s]global step 650000, trans_decision ep_re 1662.7992236033253

{"global_step": 650000, "eval_re": [1687.3092906923653, 1134.8189206607046, 
2048.3594119601594, 2464.918340520651, 1703.1914340246772, 1193.7100531951946, 
2049.3170809406215, 1727.8341617269605, 1244.3349784186528, 1374.1985638932692],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 66%|██████▌   | 659999/1000000 [11:55:10<4:20:07, 21.78it/s]global step 660000, trans_decision ep_re 1305.7776447226572

{"global_step": 660000, "eval_re": [1262.764164530258, 1027.3017257809738, 
1865.1429863707124, 1247.9168381327174, 1446.1721431173014, 1334.9823427274723, 
1303.500923919091, 1179.8576624355217, 1286.373875989052, 1103.76378422347], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 67%|██████▋   | 669998/1000000 [12:06:00<4:13:46, 21.67it/s]global step 670000, trans_decision ep_re 1317.1962463689556

{"global_step": 670000, "eval_re": [1091.2875592940818, 1235.6370653576116, 
1845.5546485378043, 1207.423148950251, 1128.6264989246977, 1224.5822581623668, 
1759.3404356938474, 1393.672466760303, 1093.3296083961905, 1192.5087736124017], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 68%|██████▊   | 679999/1000000 [12:16:50<4:05:00, 21.77it/s]global step 680000, trans_decision ep_re 1534.9559273746897

{"global_step": 680000, "eval_re": [1265.9762409769946, 1396.0675280025416, 
1252.1161159433952, 1051.263028053681, 1307.9955408028654, 2271.6362198667134, 
1508.6284686896768, 1359.1734482545198, 2119.089116605715, 1817.6135665507945], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 69%|██████▉   | 689999/1000000 [12:27:40<3:59:28, 21.57it/s]global step 690000, trans_decision ep_re 1628.8313234142017

{"global_step": 690000, "eval_re": [2154.3152064933915, 1442.5434421820837, 
1379.4694299527932, 1947.0938385936415, 1083.9115075515858, 1154.9771213100007, 
1334.0949266334312, 2088.839538573304, 2238.8764873332966, 1464.1917355184887], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 70%|██████▉   | 699999/1000000 [12:38:40<3:50:16, 21.71it/s]global step 700000, trans_decision ep_re 1435.0654159488008

{"global_step": 700000, "eval_re": [1070.3265861519678, 1644.057106963844, 
2008.7836600195335, 1364.2212707409415, 1189.4906494993022, 1485.798746745233, 
1696.4595772563569, 1313.1906846803383, 1261.726332286135, 1316.5995451443546], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 71%|███████   | 709997/1000000 [12:49:30<3:42:30, 21.72it/s]global step 710000, trans_decision ep_re 1477.0528068586154

{"global_step": 710000, "eval_re": [1405.3251657349492, 2725.404008382925, 
1348.6595367176674, 1098.7017259632098, 1757.4848029244415, 1453.7326872618191, 
1164.0800811074225, 1206.0798487024656, 1349.4948374223868, 1261.5653743688672],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 72%|███████▏  | 719999/1000000 [13:00:20<3:37:20, 21.47it/s]global step 720000, trans_decision ep_re 1375.3772686535242

{"global_step": 720000, "eval_re": [2048.7719870284577, 1586.8969620563487, 
1398.57933226119, 1127.350827041855, 1197.5246595591339, 1229.86996252515, 
1230.0584698443918, 952.1892474989479, 1616.4666182046985, 1366.0646205150676], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 73%|███████▎  | 729999/1000000 [13:11:10<3:27:32, 21.68it/s]global step 730000, trans_decision ep_re 1329.8630873360376

{"global_step": 730000, "eval_re": [1237.586287412737, 1180.5691149946676, 
257.3561713025688, 2468.6226627139995, 1584.086546333303, 1396.40616802383, 
1719.8572656268466, 1205.922886561309, 1141.73271004427, 1106.4910603468416], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 74%|███████▍  | 739998/1000000 [13:22:10<3:18:49, 21.79it/s]global step 740000, trans_decision ep_re 1257.458873233532

{"global_step": 740000, "eval_re": [1218.0114835826662, 1210.7409239926433, 
1379.3660817888408, 1649.069014718928, 1116.0030512688388, 1064.2102836178806, 
1140.3292955247994, 1127.1038564734122, 1151.589427168189, 1518.1653141991228], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 75%|███████▍  | 749997/1000000 [13:33:00<3:11:47, 21.73it/s]global step 750000, trans_decision ep_re 1631.8034974465502

{"global_step": 750000, "eval_re": [1042.7355640221072, 1401.2831193748182, 
1415.2748209301358, 1234.2401683665066, 1900.967171787288, 2288.7786290248873, 
1690.5188218011385, 2647.0120006666193, 1165.4529165233548, 1531.7717619686466],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 76%|███████▌  | 759999/1000000 [13:43:50<3:05:00, 21.62it/s]global step 760000, trans_decision ep_re 1406.9951968922278

{"global_step": 760000, "eval_re": [1181.4129161593135, 1430.529929986651, 
1078.9935179414254, 1788.7732700042088, 1306.3223137256641, 1233.6669758260048, 
1443.890480436079, 1237.9584852428184, 1421.0953096699814, 1947.3087699301304], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 77%|███████▋  | 769999/1000000 [13:54:50<2:57:28, 21.60it/s]global step 770000, trans_decision ep_re 1250.0564886198724

{"global_step": 770000, "eval_re": [1176.2433322589231, 1692.6366965051611, 
1531.1740308254525, 672.5521735241116, 1093.4842619736876, 1420.2343122361024, 
1323.0817430101106, 1216.7219466227455, 1215.2820595320566, 1159.1543297103724],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 78%|███████▊  | 779997/1000000 [14:05:40<2:48:47, 21.72it/s]global step 780000, trans_decision ep_re 1442.4150608792097

{"global_step": 780000, "eval_re": [1406.0800075367197, 1690.4172457519835, 
1089.7688969943554, 1508.8217959865428, 1060.983300412136, 1253.3282126623371, 
1345.7893383420692, 1863.9222087876385, 2079.2147767716738, 1125.8248255466403],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 79%|███████▉  | 789999/1000000 [14:16:30<2:42:11, 21.58it/s]global step 790000, trans_decision ep_re 1507.7091647327948

{"global_step": 790000, "eval_re": [1221.0165593302413, 2150.355429805241, 
2114.6427685253225, 1413.38949393283, 1224.9690895822628, 1701.429827596956, 
1214.3895766276046, 1154.7744190163603, 1805.3115256356875, 1076.812957275442], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 80%|███████▉  | 799999/1000000 [14:27:30<2:34:00, 21.64it/s]global step 800000, trans_decision ep_re 1405.4486184670968

{"global_step": 800000, "eval_re": [1532.1241464162006, 1588.4540480434891, 
1122.423277676902, 1127.1232318005336, 1138.380294645066, 1167.5620377198184, 
1224.158822618116, 1420.8922491461585, 2173.711683924883, 1559.6563926798028], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 81%|████████  | 809998/1000000 [14:38:20<2:25:08, 21.82it/s]global step 810000, trans_decision ep_re 1276.2304315860024

{"global_step": 810000, "eval_re": [1025.842640570992, 1200.000975020756, 
1150.4019441397106, 1477.1427751692545, 1110.7042851104115, 1600.516794525323, 
1116.120228994067, 1575.859085771885, 1125.6652293926186, 1380.0503571650086], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 82%|████████▏ | 819999/1000000 [14:49:10<2:17:30, 21.82it/s]global step 820000, trans_decision ep_re 1521.8123151146942

{"global_step": 820000, "eval_re": [1978.4228009469205, 1500.4153969200052, 
1032.9596617036236, 1329.3821518008428, 1221.866488393384, 2335.346063515788, 
1544.6087225052972, 1134.7395864111404, 1385.6145227738875, 1754.7677561760527],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 83%|████████▎ | 829999/1000000 [15:00:01<2:10:47, 21.66it/s]global step 830000, trans_decision ep_re 1582.8530156751824

{"global_step": 830000, "eval_re": [1848.4503650168156, 1303.0929094398741, 
1415.969608258843, 1799.2367491055795, 1466.7153599131334, 1838.5260432086218, 
1319.0689234802242, 1555.7245747279112, 1766.5656895502602, 1515.1799340505609],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 84%|████████▍ | 839999/1000000 [15:11:01<2:02:34, 21.76it/s]global step 840000, trans_decision ep_re 1266.7175544275674

{"global_step": 840000, "eval_re": [1727.2906095687554, 1012.2907946720242, 
1212.5592581671503, 1164.719865966033, 1077.1671710578628, 1503.8272861610374, 
1030.018959108769, 1440.4424679213303, 1085.0822371098186, 1413.7768945428927], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 85%|████████▍ | 849997/1000000 [15:21:51<1:55:20, 21.67it/s]global step 850000, trans_decision ep_re 1348.2759633226365

{"global_step": 850000, "eval_re": [1027.289757531782, 1377.9167609160284, 
1075.0706230120327, 1040.1711249966272, 1456.0416526338095, 1906.1833805965973, 
1327.3160365773172, 1214.8558346622572, 1126.9629923884922, 1930.9514699114186],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 86%|████████▌ | 859999/1000000 [15:32:41<1:48:51, 21.43it/s]global step 860000, trans_decision ep_re 1455.925148023544

{"global_step": 860000, "eval_re": [1057.0705078710544, 1414.174825617048, 
1573.7507891504727, 1278.9690504295077, 1315.7745495938905, 1071.1776918318665, 
1338.9103558242573, 1878.2482321252103, 1746.9960958569716, 1884.1793819351578],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 87%|████████▋ | 869999/1000000 [15:43:41<1:39:57, 21.67it/s]global step 870000, trans_decision ep_re 1568.811398491207

{"global_step": 870000, "eval_re": [1120.650605146361, 2542.6458643275732, 
1779.6351410957493, 1951.0806805916693, 1997.44806795266, 1101.8118011688302, 
1510.8405383471747, 1204.1574735863267, 1145.8347068530293, 1334.009105842697], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 88%|████████▊ | 879998/1000000 [15:54:31<1:31:05, 21.95it/s]global step 880000, trans_decision ep_re 1260.3639892711838

{"global_step": 880000, "eval_re": [1281.1527762146118, 1127.4160958667705, 
1862.6245107763589, 1232.0100825244435, 1358.0614135180324, 1176.504965247784, 
1225.206557612567, 1081.0969630211275, 1075.91016460108, 1183.6563633290605], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 89%|████████▉ | 889999/1000000 [16:05:21<1:24:27, 21.71it/s]global step 890000, trans_decision ep_re 1432.4846253268765

{"global_step": 890000, "eval_re": [1114.8555128387468, 1265.750911121209, 
1179.8721984435033, 1953.3097250043504, 1497.4398453677034, 2135.3276438436087, 
1306.0470047019862, 1102.2214682958968, 1700.6113195480725, 1069.4106241036902],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 90%|████████▉ | 899999/1000000 [16:16:11<1:16:51, 21.68it/s]global step 900000, trans_decision ep_re 1407.0715693372101

{"global_step": 900000, "eval_re": [1913.9924630484504, 1025.315295466736, 
1276.7394312396216, 1562.7949519946776, 937.6399704460856, 1246.675557895523, 
1112.9178826609893, 2395.390441927336, 1102.3716107231035, 1496.8780879695792], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 91%|█████████ | 909999/1000000 [16:27:11<1:09:36, 21.55it/s]global step 910000, trans_decision ep_re 1402.3091173732705

{"global_step": 910000, "eval_re": [1674.2518119529714, 1060.5662252776854, 
1160.7500415162756, 1060.9099320913563, 1198.902832778724, 1230.4995354775149, 
1930.7812453089712, 1322.2203222394446, 1880.5845538179094, 1503.624673271852], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 92%|█████████▏| 919997/1000000 [16:38:01<1:01:13, 21.78it/s]global step 920000, trans_decision ep_re 1517.0785102283207

{"global_step": 920000, "eval_re": [1382.1000686418051, 1382.1058495783338, 
1141.4915347557526, 1653.3042606447755, 1261.69344734389, 1493.4694821460378, 
1800.788090911018, 1486.4370680045774, 1989.5860017091718, 1579.8092985478443], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 93%|█████████▎| 929999/1000000 [16:48:51<53:55, 21.63it/s]global step 930000, trans_decision ep_re 1481.1339386091454

{"global_step": 930000, "eval_re": [1208.6032284223356, 1056.9094152022774, 
2391.9386274009603, 1118.4704372203457, 1221.632546941941, 1874.3647438246603, 
1079.4118232305016, 957.8066757680715, 1676.842813629806, 2225.3590744505545], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 94%|█████████▍| 939999/1000000 [16:59:41<46:04, 21.71it/s]global step 940000, trans_decision ep_re 1446.0494093593534

{"global_step": 940000, "eval_re": [1755.6157664764357, 1078.399261308173, 
1487.0812016313569, 1318.0404212863273, 1647.69860904504, 1786.2639372736212, 
1486.7073641405896, 1052.8706110764422, 1818.1227090549644, 1029.6942123005856],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 95%|█████████▍| 949998/1000000 [17:10:41<38:10, 21.83it/s]global step 950000, trans_decision ep_re 1342.077575730572

{"global_step": 950000, "eval_re": [1045.833275158733, 1724.1771774685403, 
1021.3745190703542, 1654.793341452293, 1429.4077348127967, 1369.5872682267477, 
1443.0852743034827, 1424.82404523577, 974.5645052162856, 1333.1286163607156], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 96%|█████████▌| 959997/1000000 [17:21:31<30:59, 21.51it/s]global step 960000, trans_decision ep_re 1458.5609646815847

{"global_step": 960000, "eval_re": [1323.8952839946874, 1389.2335141286362, 
1265.12259104531, 1808.9945677298717, 1734.1232457978492, 1237.3413284059486, 
1114.429524876175, 1671.7437067754875, 1608.6429412696416, 1432.0829427922395], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 97%|█████████▋| 969999/1000000 [17:32:31<23:14, 21.51it/s]global step 970000, trans_decision ep_re 1442.0292178846996

{"global_step": 970000, "eval_re": [1100.2399306381894, 1419.7510776684892, 
1179.9603967215105, 1905.3280052304062, 2010.4311425081737, 1226.1121846730405, 
1184.5929748391338, 2016.3964029473223, 1142.0448517729642, 1235.4352118477657],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 98%|█████████▊| 979997/1000000 [17:43:21<15:31, 21.48it/s]global step 980000, trans_decision ep_re 1202.2536654170312

{"global_step": 980000, "eval_re": [1114.1990526797392, 1128.4987599351145, 
1022.7537779562778, 1291.7824093685263, 1283.4576972491452, 1515.4699051555956, 
1024.8720225505765, 1160.3448083021049, 1047.0144092831106, 1434.1438116901202],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 99%|█████████▉| 989998/1000000 [17:54:21<07:39, 21.79it/s]global step 990000, trans_decision ep_re 1620.4485972832524

{"global_step": 990000, "eval_re": [1349.717415086349, 1515.7305980251172, 
1732.696177171434, 2175.9149921078906, 1828.821992711536, 1114.987910691469, 
1801.4267027665844, 1220.245083339698, 2024.5745184808502, 1440.370582451597], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|█████████▉| 999999/1000000 [18:05:11<00:00, 21.65it/s]global step 1000000, trans_decision ep_re 1405.5870082038814

{"global_step": 1000000, "eval_re": [1089.4277817195673, 1616.4706861577567, 
1649.5265462930602, 1403.7152153546915, 1071.9446090856623, 1360.3173487124357, 
1062.4912627170056, 1173.999296263664, 2091.8010949323416, 1536.1762408026311], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|██████████| 1000000/1000000 [18:05:49<00:00, 15.35it/s]
