
{
    'exp_name': 'VDPO',
    'env': 'HalfCheetah-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'markov(ord(15,1), ord(3,5,3,shift=22), [[124, 1], [1, 19]])',
    'noise': 0.1
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9999/1000000 [04:30<10:30:21, 26.18it/s]global step 10000, trans_decision ep_re -92.33512352947629

{"global_step": 10000, "eval_re": [-112.17276206959775, -73.25236810537778, 
-134.41302005889094, -135.34314562905647, -65.39995581269295, -49.9115290984125,
-139.24995681763556, -61.50360651607371, -31.200561780869442, 
-120.9043294061557], "eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 
1000, 1000, 1000]}

  2%|▏         | 19998/1000000 [13:30<10:14:40, 26.57it/s]global step 20000, trans_decision ep_re 456.6886316492658

{"global_step": 20000, "eval_re": [573.6073739740135, 385.1171973652454, 
714.451464040003, 426.78365071660767, 266.5577221085143, 335.0627126978817, 
377.9166407514152, 364.4513315566066, 516.6714768759481, 606.2667464064222], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  3%|▎         | 29998/1000000 [22:30<10:08:49, 26.55it/s]global step 30000, trans_decision ep_re 792.027553851894

{"global_step": 30000, "eval_re": [749.3499124552276, 1096.1263665398494, 
513.541021634024, 1015.1422860473382, 930.982111569911, 580.677827711007, 
816.5122120247984, 822.296031002617, 657.0076127215547, 738.6401568126142], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  4%|▍         | 39999/1000000 [31:20<10:11:25, 26.17it/s]global step 40000, trans_decision ep_re 915.1468597076037

{"global_step": 40000, "eval_re": [767.3814738476349, 856.0962826637275, 
982.4557602430204, 746.5112260613355, 712.7033959751021, 1002.1504129085947, 
1500.03615580267, 955.0827164544718, 885.7703557000405, 743.2808174194404], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  5%|▍         | 49998/1000000 [40:20<9:56:41, 26.53it/s]global step 50000, trans_decision ep_re 1199.911344686757

{"global_step": 50000, "eval_re": [1249.390663916054, 1436.121642348306, 
1290.939614327502, 1272.2954411570845, 1084.5773618031412, 985.6636260500018, 
1304.6559405899995, 1036.2019531947021, 1076.384591109812, 1262.8826123709664], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  6%|▌         | 59998/1000000 [49:20<9:53:29, 26.40it/s]global step 60000, trans_decision ep_re 1499.9715961566694

{"global_step": 60000, "eval_re": [1413.3317310676873, 1980.9932227856643, 
1874.04652362716, 1068.3538139169777, 2184.9812465343007, 1591.630024874049, 
701.1660473960101, 1001.2760704466193, 1198.8799032246382, 1985.0573776935848], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  7%|▋         | 69998/1000000 [58:20<9:45:39, 26.47it/s]global step 70000, trans_decision ep_re 1664.276989974785

{"global_step": 70000, "eval_re": [2117.201839265041, 1705.0489283020713, 
1113.7672925389293, 1429.7693772987461, 1215.6379495505212, 2680.244669372432, 
1491.530161866883, 1119.5194601033338, 1949.2694940174542, 1820.7807274324387], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  8%|▊         | 79999/1000000 [1:07:10<9:44:51, 26.22it/s]global step 80000, trans_decision ep_re 1387.6585640091944

{"global_step": 80000, "eval_re": [1302.0215152716119, 1201.4827361224807, 
1417.6856065586148, 2479.389535898699, 1153.8493354761295, 1225.3928179832092, 
1283.589300773337, 980.2970364726445, 1207.0783303810647, 1625.7994251541531], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  9%|▉         | 89998/1000000 [1:16:10<9:31:28, 26.54it/s]global step 90000, trans_decision ep_re 1923.3280859948543

{"global_step": 90000, "eval_re": [1074.0948035390543, 2480.8891717416946, 
1069.8724792710514, 2389.5866998802067, 2725.099112049281, 1324.438935551516, 
1171.4523235225133, 2618.6060515732506, 1640.061520496776, 2739.1797623232014], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 10%|▉         | 99998/1000000 [1:25:10<9:23:46, 26.61it/s]global step 100000, trans_decision ep_re 1814.8107759017723

{"global_step": 100000, "eval_re": [1237.2479806014871, 2083.6977356296125, 
2897.49368493465, 1480.1800804152842, 1030.1544576035158, 1059.7157640718378, 
1413.9566656342595, 2005.5577800083306, 2237.7326134394557, 2702.370996679291], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 11%|█         | 109998/1000000 [1:34:10<9:18:43, 26.55it/s]global step 110000, trans_decision ep_re 1440.393302954188

{"global_step": 110000, "eval_re": [1294.4207209998895, 1869.0795959298664, 
1232.1171706281075, 1333.5895311645454, 960.1176271272968, 1902.3781215292036, 
1396.9403984528858, 1466.133875259772, 1387.0423815927943, 1562.113606857516], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 12%|█▏        | 119999/1000000 [1:43:00<9:20:46, 26.15it/s]global step 120000, trans_decision ep_re 1251.0276314340167

{"global_step": 120000, "eval_re": [1251.1708460896523, 1158.9948288482701, 
1114.0840454990682, 1040.4426847558275, 1272.498304752201, 1702.3977038929002, 
1255.4803315764157, 1061.7021364310747, 1371.8158397702157, 1281.6895927245425],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 13%|█▎        | 129998/1000000 [1:52:00<9:05:38, 26.57it/s]global step 130000, trans_decision ep_re 1884.7639982040448

{"global_step": 130000, "eval_re": [2610.3090756229135, 1015.0937391765755, 
2347.107218261935, 2548.9054433747424, 1035.230554024348, 1597.6672777396966, 
2507.6199601857115, 1874.0132249863768, 2273.1385588910757, 1038.5549297770724],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 14%|█▍        | 139998/1000000 [2:01:00<9:00:27, 26.52it/s]global step 140000, trans_decision ep_re 2240.858416970219

{"global_step": 140000, "eval_re": [1064.3284351464233, 2689.135917649954, 
2565.70111117569, 2630.5166419489306, 1218.5207805316265, 2612.810388817395, 
2011.129649821476, 2473.9308590303044, 2839.968339643382, 2302.542045937006], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 15%|█▍        | 149998/1000000 [2:10:00<8:56:22, 26.41it/s]global step 150000, trans_decision ep_re 1938.9419969322873

{"global_step": 150000, "eval_re": [2599.3865832015613, 2107.7310885325805, 
1483.9661826001777, 3374.8279786410003, 3651.669717196159, 1230.6162513872882, 
1245.8858300379889, 975.610921293386, 1128.181876502452, 1591.543539930281], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 16%|█▌        | 159999/1000000 [2:19:00<8:56:42, 26.09it/s]global step 160000, trans_decision ep_re 2091.1655505743347

{"global_step": 160000, "eval_re": [2097.90347653884, 2166.9213402903083, 
2152.7549842821613, 2161.8450040286657, 2021.4868928979336, 2063.701452680767, 
2039.5192983428078, 2163.0199445614435, 2022.3275122147395, 2022.1755999056802],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 17%|█▋        | 169998/1000000 [2:27:50<8:39:49, 26.61it/s]global step 170000, trans_decision ep_re 1380.6052344979003

{"global_step": 170000, "eval_re": [1256.9275423469348, 1930.4680324203832, 
1684.0686087998563, 1315.770672019167, 1515.3265460161733, 1066.6606807849, 
1265.3863374949842, 1467.6078391113895, 973.5150142183306, 1330.321071766887], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 18%|█▊        | 179998/1000000 [2:36:50<8:36:35, 26.46it/s]global step 180000, trans_decision ep_re 1666.3256186985905

{"global_step": 180000, "eval_re": [1849.5988639457717, 1219.975549461196, 
1016.7239751644045, 1391.8299502535042, 1360.4107460227435, 1913.501051677695, 
1038.538903169461, 3215.3884785334944, 1158.800269489212, 2498.4883992684217], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 19%|█▉        | 189997/1000000 [2:45:50<8:36:07, 26.16it/s]global step 190000, trans_decision ep_re 1217.4484647472343

{"global_step": 190000, "eval_re": [1254.5933686217227, 1202.1793589196682, 
1179.4373363072798, 1472.8299386557935, 1066.892031396275, 1166.4253516888587, 
1135.0184020552424, 1200.9728654024143, 1153.3271937512136, 1342.8088006738742],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 20%|█▉        | 199998/1000000 [2:54:50<8:23:08, 26.50it/s]global step 200000, trans_decision ep_re 1501.7112882939969

{"global_step": 200000, "eval_re": [1947.2487489522382, 1205.8965414843838, 
996.8633762511737, 2341.260952086193, 1100.4737163709078, 3612.4023980858137, 
136.11757810823315, 1014.9912885269276, 1254.1415221833042, 1407.7167608907916],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 21%|██        | 209999/1000000 [3:03:40<8:23:05, 26.17it/s]global step 210000, trans_decision ep_re 1514.308625060247

{"global_step": 210000, "eval_re": [1152.0855885587616, 3097.9532286679096, 
1542.1473932091321, 1064.3254450782151, 1402.0908013092742, 1699.2409716376196, 
1155.0872108108783, 1325.9050081052317, 1576.3721930753247, 1127.878410150124], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 22%|██▏       | 219998/1000000 [3:12:40<8:09:30, 26.56it/s]global step 220000, trans_decision ep_re 1513.5976341388994

{"global_step": 220000, "eval_re": [985.4576703685061, 1160.2436641791226, 
1694.6281445091934, 1397.7376543042897, 1820.0051018529293, 1308.602010146097, 
1126.642401294171, 1142.160896435788, 1388.6394139156184, 3111.859384383279], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 23%|██▎       | 229998/1000000 [3:21:40<8:04:39, 26.48it/s]global step 230000, trans_decision ep_re 1354.3765490925866

{"global_step": 230000, "eval_re": [1248.0885145648067, 1566.3503027336076, 
1212.331968483808, 924.2428526188519, 1084.4856751578386, 1536.6246376074237, 
2184.450787949515, 1074.6931090590012, 1095.6293869295773, 1616.8682558214357], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 24%|██▍       | 239999/1000000 [3:30:30<8:04:56, 26.12it/s]global step 240000, trans_decision ep_re 1577.9904123885415

{"global_step": 240000, "eval_re": [1611.172818020579, 1787.8177872903914, 
1455.9921499105521, 1364.3522076896695, 2397.7653041020694, 1376.1396080820168, 
1392.746170782957, 1431.6651394654102, 1437.0680469722133, 1525.184891569556], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 25%|██▍       | 249998/1000000 [3:39:30<7:51:54, 26.49it/s]global step 250000, trans_decision ep_re 1726.9281308643629

{"global_step": 250000, "eval_re": [1292.8786910739711, 1253.794376059004, 
991.870927872928, 2503.972989027969, 1518.406983707694, 1928.6880151382366, 
2218.8451388710596, 3200.471071775655, 1157.0620346902429, 1203.2910804268702], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 26%|██▌       | 259998/1000000 [3:48:30<7:45:31, 26.49it/s]global step 260000, trans_decision ep_re 1549.215368190136

{"global_step": 260000, "eval_re": [999.156677536219, 1598.8165206522015, 
1583.4308397902448, 1103.1630689847698, 2315.197804796458, 1424.7676403822204, 
2749.428643462676, 1538.7923481300822, 1118.8005471132642, 1060.5995910532242], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 27%|██▋       | 269998/1000000 [3:57:30<7:39:19, 26.49it/s]global step 270000, trans_decision ep_re 1542.060322156752

{"global_step": 270000, "eval_re": [1678.0785807700868, 1174.6441436837233, 
1429.129118494215, 1944.1309651347233, 1308.867404043319, 978.4474122833773, 
1093.672912264555, 1276.1631943770724, 1512.7206945055943, 3024.748796010853], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 28%|██▊       | 279999/1000000 [4:06:20<7:36:43, 26.27it/s]global step 280000, trans_decision ep_re 1633.3697353950968

{"global_step": 280000, "eval_re": [1515.3686830590057, 1131.5174595636574, 
2934.470080997891, 1657.5506790902984, 1270.6841431311818, 1605.8299996402395, 
1231.0987308144072, 2208.8724341205325, 1638.8209953480675, 1139.4841481856856],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 29%|██▉       | 289998/1000000 [4:15:20<7:28:18, 26.40it/s]global step 290000, trans_decision ep_re 1522.379460942981

{"global_step": 290000, "eval_re": [1492.482021079993, 1263.8603741801282, 
2659.457700940123, 1190.023681133953, 1941.2104583057474, 1527.35736732368, 
1259.7688637118363, 1224.317672949294, 1349.3862245963048, 1315.930245208752], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 30%|██▉       | 299998/1000000 [4:24:20<7:17:36, 26.66it/s]global step 300000, trans_decision ep_re 1824.3140135105118

{"global_step": 300000, "eval_re": [2297.6779982370253, 1378.8889541990204, 
1161.948457063558, 2393.9458225305775, 1327.5356811929437, 1292.7488508731535, 
2469.835261825072, 1412.245613692787, 2105.4678456936913, 2402.845649797289], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 31%|███       | 309999/1000000 [4:33:20<7:17:31, 26.28it/s]global step 310000, trans_decision ep_re 1437.1074996485809

{"global_step": 310000, "eval_re": [1847.6509346560697, 1077.4201217097063, 
1575.1695451901792, 1360.1906050149814, 1491.1860597638918, 1255.865358046931, 
1406.9232482793905, 1254.5014446300954, 1499.735686195337, 1602.4319929992257], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 32%|███▏      | 319998/1000000 [4:42:10<7:06:14, 26.59it/s]global step 320000, trans_decision ep_re 1905.594358932778

{"global_step": 320000, "eval_re": [1721.9405646782784, 2337.892296721721, 
1452.9764507071545, 1741.7818522840766, 1623.3722924356919, 2909.616111487734, 
1149.4767410095415, 1862.0442065322507, 2419.753686029834, 1837.089387441497], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 33%|███▎      | 329999/1000000 [4:51:00<7:03:42, 26.35it/s]global step 330000, trans_decision ep_re 1671.879101586493

{"global_step": 330000, "eval_re": [1268.1858514746787, 1164.1869413663912, 
3057.2992843594066, 1433.840745222308, 1480.089717570865, 1384.585896656602, 
1237.744226625167, 1156.2111235014672, 3101.7023147999616, 1434.9449142880835], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 34%|███▍      | 339998/1000000 [5:00:00<6:51:23, 26.74it/s]global step 340000, trans_decision ep_re 1536.2604846217728

{"global_step": 340000, "eval_re": [1325.6780100027293, 1594.4451882807366, 
1115.776001854578, 1221.860620686935, 1172.3029523275945, 2007.057112424434, 
1186.1593850976933, 2084.4000892392587, 1530.3371947891762, 2124.5882915145917],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 35%|███▍      | 349998/1000000 [5:08:50<6:42:23, 26.92it/s]global step 350000, trans_decision ep_re 1596.8267638385826

{"global_step": 350000, "eval_re": [1618.428603801502, 1699.3245021384198, 
1349.8644758421838, 1796.4225611081451, 1223.1683567147597, 1107.500483236243, 
1227.4243324772706, 1528.284926083278, 2982.507452302701, 1435.3419446813202], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 36%|███▌      | 359998/1000000 [5:17:40<6:39:54, 26.67it/s]global step 360000, trans_decision ep_re 1534.4053386696753

{"global_step": 360000, "eval_re": [1024.0545603468497, 1188.9678385312386, 
1744.8086452769755, 1772.7267743797568, 1687.827161406468, 1837.9437711416783, 
1642.7294543367152, 1379.988794774146, 1321.8152023038574, 1743.191184199066], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 37%|███▋      | 369999/1000000 [5:26:30<6:36:46, 26.46it/s]global step 370000, trans_decision ep_re 1688.0674552973298

{"global_step": 370000, "eval_re": [2046.209854959469, 2076.487606725183, 
2001.3304093237648, 1194.3918224549996, 1426.2653262412189, 2019.4244153934148, 
1616.264024205453, 1273.643248344282, 2246.299222783237, 980.3586225422785], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 38%|███▊      | 379998/1000000 [5:35:30<6:26:12, 26.76it/s]global step 380000, trans_decision ep_re 1543.9130308666245

{"global_step": 380000, "eval_re": [1580.1094324058845, 2077.518096728128, 
2411.1035968350207, 1298.0143077314547, 1355.6601845264463, 1353.7685588998843, 
1627.5574476201102, 1184.0417732178403, 1010.7712666712999, 1540.585644030178], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 39%|███▉      | 389998/1000000 [5:44:20<6:18:36, 26.85it/s]global step 390000, trans_decision ep_re 1501.3056772447785

{"global_step": 390000, "eval_re": [1614.8549972008705, 1547.1037293058214, 
1758.7702682666215, 1743.410986709915, 1561.8424408706906, 1071.2304669646937, 
1387.569182675834, 1194.9349039563942, 1712.280221823059, 1421.0595746738848], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 40%|███▉      | 399999/1000000 [5:53:10<6:18:55, 26.39it/s]global step 400000, trans_decision ep_re 1542.7712738250202

{"global_step": 400000, "eval_re": [1263.8305859164007, 1287.9228721039024, 
1512.0882193377674, 1203.368972005023, 1304.4526454117965, 2390.394436408466, 
1229.6216322551668, 1799.9138471497806, 1954.3772524660385, 1481.7422751958627],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 41%|████      | 409999/1000000 [6:02:10<6:12:43, 26.38it/s]global step 410000, trans_decision ep_re 2017.7123385999444

{"global_step": 410000, "eval_re": [2367.8298019991403, 1930.4749379291034, 
1245.4156893665481, 2425.207254034822, 1951.7532352430742, 1564.1778019157605, 
2210.4610577205653, 2556.5603435184585, 2361.347635381011, 1563.895628890958], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 42%|████▏     | 419998/1000000 [6:11:00<6:01:20, 26.75it/s]global step 420000, trans_decision ep_re 1546.5361658213576

{"global_step": 420000, "eval_re": [1188.1249315758553, 1228.426183378912, 
1221.4025397829334, 1788.432163265718, 1386.6507803447469, 2005.8389520280625, 
2037.4115807896503, 2097.475807255126, 1269.8455304982526, 1241.7531892943189], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 43%|████▎     | 429998/1000000 [6:19:50<5:55:04, 26.75it/s]global step 430000, trans_decision ep_re 1749.6642799052418

{"global_step": 430000, "eval_re": [1672.8465684152154, 1455.87220462099, 
1422.3690583897594, 1652.824017478536, 2424.8460358717994, 2017.6904694432894, 
1506.3425485293253, 1430.1835352056632, 2236.4589083981514, 1677.209452699686], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 44%|████▍     | 439998/1000000 [6:28:40<5:49:53, 26.67it/s]global step 440000, trans_decision ep_re 1331.6736219955008

{"global_step": 440000, "eval_re": [1456.1644068823357, 1380.4647372634956, 
1368.816048124591, 1229.576274798576, 1209.555891840351, 1220.1061512077358, 
1273.7446647321367, 1258.5973375199583, 1397.3125739819998, 1522.3981336038264],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 45%|████▍     | 449999/1000000 [6:37:40<5:46:04, 26.49it/s]global step 450000, trans_decision ep_re 1262.9746050815595

{"global_step": 450000, "eval_re": [1232.4436271099867, 1269.053856137009, 
1271.847886292377, 1358.002438108977, 1259.097428126922, 1201.968659090064, 
1456.5248805404283, 786.0173756822805, 1484.8851132291313, 1309.9047864984193], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 46%|████▌     | 459998/1000000 [6:46:30<5:39:42, 26.49it/s]global step 460000, trans_decision ep_re 1379.6292474205716

{"global_step": 460000, "eval_re": [1334.9116860221843, 1454.1660942370993, 
1791.907286781412, 1567.4256846162634, 1283.7488879045047, 1150.6968606692333, 
1463.5446320757774, 1320.7827964583062, 1199.8613440570489, 1229.2472013838872],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 47%|████▋     | 469998/1000000 [6:55:20<5:30:55, 26.69it/s]global step 470000, trans_decision ep_re 1616.4582293318067

{"global_step": 470000, "eval_re": [1453.3918959090881, 1875.706120074394, 
1320.9986992271704, 1430.959427126714, 1475.5205830415819, 1368.401538509248, 
1539.8716284154536, 1783.9611873631409, 1729.0535794677746, 2186.7176341835025],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 48%|████▊     | 479999/1000000 [7:04:10<5:29:42, 26.29it/s]global step 480000, trans_decision ep_re 1372.6307188697142

{"global_step": 480000, "eval_re": [1464.4274943418, 1356.7779777648943, 
1143.6624245101068, 1687.1835278847095, 1369.6369362591709, 1306.2988793017792, 
1864.9505337789153, 560.7274932641857, 1634.9978214807065, 1337.6441001108726], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 49%|████▉     | 489998/1000000 [7:13:10<5:17:48, 26.75it/s]global step 490000, trans_decision ep_re 1997.8028426900012

{"global_step": 490000, "eval_re": [2310.6787237404174, 1560.9547935755513, 
2816.624909841797, 1220.8577726388655, 1548.6359974742893, 2262.71664064661, 
2390.267645794214, 2397.1591164318525, 1779.9296804606975, 1690.2031462957173], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 50%|████▉     | 499999/1000000 [7:22:00<5:13:07, 26.61it/s]global step 500000, trans_decision ep_re 1628.7741460701386

{"global_step": 500000, "eval_re": [1507.0925079002743, 1234.5170976651311, 
1365.3706906843615, 1446.8498048945848, 2091.892384955618, 2189.7407910109036, 
1490.7629815514033, 2088.553970930989, 1208.742423968356, 1664.2188071397607], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 51%|█████     | 509999/1000000 [7:30:50<5:07:42, 26.54it/s]global step 510000, trans_decision ep_re 1378.145793408552

{"global_step": 510000, "eval_re": [1255.0593830548225, 1833.0231472560276, 
1147.0971621412702, 1542.5152429505913, 1533.3885525800488, 1279.4781518570983, 
1382.8226770835822, 1252.737760325511, 1190.245131640023, 1365.0907251965443], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 52%|█████▏    | 519998/1000000 [7:39:50<4:59:59, 26.67it/s]global step 520000, trans_decision ep_re 1354.1767707603472

{"global_step": 520000, "eval_re": [1158.5453056960596, 1272.2290673424573, 
1458.965581980313, 1281.9868792259126, 1452.0185887594546, 1410.3539559515584, 
1260.6397475565368, 1387.1446676678534, 1535.2874423947583, 1324.5964710285702],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 53%|█████▎    | 529998/1000000 [7:48:40<4:53:56, 26.65it/s]global step 530000, trans_decision ep_re 1349.5904188707668

{"global_step": 530000, "eval_re": [1534.165889039488, 2025.8690288518508, 
1409.6026947900252, 271.1132973000473, 1461.2209451788349, 1170.5388518419775, 
1466.4658382210343, 1323.048317667968, 1644.9821533105421, 1188.8971725058993], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 54%|█████▍    | 539999/1000000 [7:57:30<4:50:37, 26.38it/s]global step 540000, trans_decision ep_re 1908.5545880025734

{"global_step": 540000, "eval_re": [1930.8313571069075, 2270.4727614650697, 
1887.38108387491, 1914.7625148618301, 1381.8835213469408, 2575.4815108871594, 
2627.579142214453, 1641.0760482939254, 1430.7049612345231, 1425.3729787400134], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 55%|█████▍    | 549999/1000000 [8:06:30<4:43:53, 26.42it/s]global step 550000, trans_decision ep_re 1422.6750718717453

{"global_step": 550000, "eval_re": [1321.4484418670124, 1734.0960782148688, 
1690.7573463636743, 1261.418121215356, 1161.0602902358823, 1252.37569350295, 
1150.0676624083617, 1296.4084751742105, 1549.8939652358965, 1809.2246444992397],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 56%|█████▌    | 559998/1000000 [8:15:20<4:33:59, 26.77it/s]global step 560000, trans_decision ep_re 1777.4276665862617

{"global_step": 560000, "eval_re": [1350.1357842194466, 1896.7537430703321, 
1182.057219751637, 2212.903968117934, 1275.984719530954, 2378.3067205147727, 
1942.3655182263385, 2619.7758202751425, 1302.7739090235068, 1613.2192631325536],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 57%|█████▋    | 569998/1000000 [8:24:10<4:29:06, 26.63it/s]global step 570000, trans_decision ep_re 1474.6597466069024

{"global_step": 570000, "eval_re": [1321.5753297312594, 1253.782059761817, 
1462.157628849399, 1203.7966316730228, 1568.0296904803847, 2271.9315111017872, 
1435.9008295981478, 1195.0202549770538, 1557.5682914303998, 1476.8352384657526],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 58%|█████▊    | 579999/1000000 [8:33:00<4:24:09, 26.50it/s]global step 580000, trans_decision ep_re 1518.9780266163018

{"global_step": 580000, "eval_re": [1519.3091566329024, 1232.1701759139876, 
1162.3739029754909, 1449.7633129551518, 1218.151802413338, 1511.3086615620398, 
2598.0667725018307, 1958.614023765906, 1264.7397687787854, 1275.2826886635842], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 59%|█████▉    | 589998/1000000 [8:42:00<4:14:27, 26.85it/s]global step 590000, trans_decision ep_re 1725.0777775809197

{"global_step": 590000, "eval_re": [1342.3795705842124, 1378.0243036979064, 
1504.0094722095002, 2126.8938988214554, 1808.9639239083142, 2299.020266080543, 
1284.4367745031202, 1358.691452424914, 2013.7183310725784, 2134.6397825066574], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 60%|█████▉    | 599998/1000000 [8:50:50<4:07:40, 26.92it/s]global step 600000, trans_decision ep_re 1480.6980122871369

{"global_step": 600000, "eval_re": [1438.6110357872517, 1495.314902598501, 
1040.9875711424309, 1626.345226718466, 1322.203547626379, 1654.4624090456227, 
1585.0927773750595, 1133.532088512964, 1737.635875589314, 1772.7946884753796], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 61%|██████    | 609998/1000000 [8:59:40<4:03:53, 26.65it/s]global step 610000, trans_decision ep_re 1547.0464530221484

{"global_step": 610000, "eval_re": [1491.2128954735692, 1806.7882626948012, 
1578.6121111158748, 1461.0694488973477, 1570.521602906613, 1291.467025612415, 
1509.064519189277, 1429.1161157957497, 1575.9592895531546, 1756.6532589826836], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 62%|██████▏   | 619999/1000000 [9:08:40<4:00:02, 26.38it/s]global step 620000, trans_decision ep_re 1401.8744249099293

{"global_step": 620000, "eval_re": [1624.5625627249983, 1537.2099229266864, 
1309.5647005947621, 1456.6392539693945, 1239.4248280935933, 1790.4290658864886, 
1374.733768345275, 1191.2125076863756, 1276.323505089018, 1218.6441337826996], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 63%|██████▎   | 629998/1000000 [9:17:30<3:49:13, 26.90it/s]global step 630000, trans_decision ep_re 1444.4364402971119

{"global_step": 630000, "eval_re": [1589.3221076535087, 1343.1051848297936, 
1404.6677098555574, 1316.8189542622374, 1737.5770174506085, 1363.0601650124183, 
1501.631802773602, 1181.4619774951643, 1393.3263100035394, 1613.3931736346867], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 64%|██████▍   | 639998/1000000 [9:26:20<3:44:58, 26.67it/s]global step 640000, trans_decision ep_re 1539.674130823804

{"global_step": 640000, "eval_re": [1517.091268906875, 1814.2528846421299, 
1466.9172115338424, 1467.4727900602438, 1156.3744905737012, 1381.8663304017182, 
1358.0378132496453, 2372.530441922624, 1135.1715800805653, 1727.026496866697], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 65%|██████▍   | 649998/1000000 [9:35:20<3:37:24, 26.83it/s]global step 650000, trans_decision ep_re 1535.066367650934

{"global_step": 650000, "eval_re": [1324.9604682093443, 1478.3329286754993, 
1812.3661924324301, 1209.4083700630772, 1587.844888794325, 1774.5719213577524, 
1516.5384880132433, 1983.9136352521814, 1513.4652288752166, 1149.2615548362708],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 66%|██████▌   | 659998/1000000 [9:44:10<3:32:34, 26.66it/s]global step 660000, trans_decision ep_re 1325.182750409441

{"global_step": 660000, "eval_re": [1576.2957196079553, 1162.3626528135924, 
994.3216774583896, 2077.2699370449486, 1408.6385402798808, 1313.7498118900676, 
19.077544871961926, 1436.463535045482, 1955.6023336076244, 1308.045751474506], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 67%|██████▋   | 669999/1000000 [9:53:00<3:28:10, 26.42it/s]global step 670000, trans_decision ep_re 1628.0116479778428

{"global_step": 670000, "eval_re": [1424.3475118675328, 1797.8805174991576, 
1392.5500113817905, 2040.11489337107, 1563.6188161212528, 1579.9949158319837, 
1026.4279487208757, 1640.2650932126733, 2149.5204220621927, 1665.3963497099], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 68%|██████▊   | 679999/1000000 [10:01:50<3:21:40, 26.45it/s]global step 680000, trans_decision ep_re 1491.9209408811562

{"global_step": 680000, "eval_re": [1362.855629232471, 1194.853535718069, 
2103.6615617905954, 1613.1546053110237, 1565.324321147572, 1271.2079062545795, 
1459.2662015665608, 1395.2648317223293, 1762.2646541102476, 1191.3561619581121],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 69%|██████▉   | 689998/1000000 [10:10:50<3:13:59, 26.63it/s]global step 690000, trans_decision ep_re 1590.9524196524358

{"global_step": 690000, "eval_re": [1453.526867128044, 2829.778994113171, 
1214.9145742369697, 1550.1300080372805, 1236.6004406524057, 1162.1111400741042, 
1369.1920135790238, 1171.2218138305773, 2407.116466876659, 1514.9318779961222], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 70%|██████▉   | 699998/1000000 [10:19:40<3:07:56, 26.60it/s]global step 700000, trans_decision ep_re 1524.2625421458404

{"global_step": 700000, "eval_re": [1812.9890799006748, 1350.1342059379558, 
1821.7082916962686, 1369.5728322655493, 1430.0790620127434, 1614.0206467861246, 
1275.5841174669724, 1392.9383396397313, 1696.9724536608433, 1478.6263920915414],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 71%|███████   | 709999/1000000 [10:28:30<3:03:12, 26.38it/s]global step 710000, trans_decision ep_re 1562.039526187604

{"global_step": 710000, "eval_re": [1762.6485416490395, 1492.6067061827082, 
1268.9121920061225, 1209.2121326814276, 1478.564683929263, 1863.9889849439282, 
2029.0145142178544, 1512.1405025397253, 1197.711851247683, 1805.595152478289], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 72%|███████▏  | 719998/1000000 [10:37:30<2:54:30, 26.74it/s]global step 720000, trans_decision ep_re 1437.5555491900775

{"global_step": 720000, "eval_re": [1386.0381561428878, 1451.431429060633, 
1572.6885868049349, 1375.4940283401384, 1841.2415239377078, 1518.081297061989, 
1238.4345398316402, 1168.1827813279672, 1402.1496957109916, 1421.8134536818857],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 73%|███████▎  | 729998/1000000 [10:46:20<2:48:28, 26.71it/s]global step 730000, trans_decision ep_re 1719.4447178731552

{"global_step": 730000, "eval_re": [1642.7825257508869, 1706.1326008361993, 
1789.2714143170795, 1243.5809166854986, 2188.1200358087813, 1714.1851537569476, 
1964.2531674012362, 1483.499326896501, 1823.5843799341244, 1639.037657344298], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 74%|███████▍  | 739999/1000000 [10:55:10<2:44:52, 26.28it/s]global step 740000, trans_decision ep_re 1359.1451349648737

{"global_step": 740000, "eval_re": [1167.9195256592702, 1359.397996497909, 
1143.8531197382827, 1525.9210200725747, 1201.7267923320917, 2020.0425684581066, 
1504.7496787522327, 1216.8484803612275, 1134.2572135109099, 1316.7349542661336],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 75%|███████▍  | 749998/1000000 [11:04:10<2:35:17, 26.83it/s]global step 750000, trans_decision ep_re 1483.1315832170803

{"global_step": 750000, "eval_re": [2073.3877025385623, 1871.975681639834, 
1150.215260161688, 1262.9088472788899, 1403.5503948734602, 2096.2260206652613, 
1209.4371574037832, 1352.4655993314411, 1277.3695558460815, 1133.7796124318018],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 76%|███████▌  | 759998/1000000 [11:13:00<2:30:35, 26.56it/s]global step 760000, trans_decision ep_re 1623.6488174086408

{"global_step": 760000, "eval_re": [1597.9014512743731, 2280.5331334605994, 
1331.2493588725256, 1542.75626878088, 1811.3778165487809, 1260.41786183017, 
1190.8965943451815, 1496.739006595985, 2034.1215630449578, 1690.4951193329543], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 77%|███████▋  | 769999/1000000 [11:21:50<2:25:33, 26.34it/s]global step 770000, trans_decision ep_re 1229.7395505337506

{"global_step": 770000, "eval_re": [1247.174862105116, 538.2028441064937, 
1302.385720811449, 1282.3972112987562, 1308.6555517117229, 1440.8514356772253, 
1337.6563297455525, 1443.5915960728057, 1224.1701781046727, 1172.309775703714], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 78%|███████▊  | 779998/1000000 [11:30:50<2:17:00, 26.76it/s]global step 780000, trans_decision ep_re 1391.25678517632

{"global_step": 780000, "eval_re": [1399.5061799717273, 1394.0455149216243, 
1286.2217114012092, 1115.1327954011217, 1297.7588591181955, 1335.182141617082, 
1431.7350149380047, 1483.0786354313059, 1964.1915254915116, 1205.7154734714172],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 79%|███████▉  | 789998/1000000 [11:39:40<2:10:53, 26.74it/s]global step 790000, trans_decision ep_re 1463.3519203616675

{"global_step": 790000, "eval_re": [1301.954438357657, 1338.4784246580075, 
1847.2691657295652, 1483.8716652087119, 1971.7768597420506, 1230.4422394280189, 
1575.7748335615534, 1290.3130422235088, 1439.6866427414875, 1153.9518919661111],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 80%|███████▉  | 799999/1000000 [11:48:30<2:06:24, 26.37it/s]global step 800000, trans_decision ep_re 1464.5235616359282

{"global_step": 800000, "eval_re": [1342.6364839409964, 2192.526397360083, 
1216.0260331984011, 1242.1921955552396, 1498.2217340919442, 1374.842569117003, 
1246.9917429604648, 1161.8304399993838, 1773.7591488042992, 1596.2088713314665],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 81%|████████  | 809999/1000000 [11:57:20<1:59:54, 26.41it/s]global step 810000, trans_decision ep_re 1673.71436707496

{"global_step": 810000, "eval_re": [1512.0041495919515, 1302.2973217978545, 
2525.655072443152, 1513.2975256282473, 1629.0086867307732, 1563.049034472846, 
1811.159149149422, 1343.499611058737, 1705.172199043903, 1832.0009208327108], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 82%|████████▏ | 819998/1000000 [12:06:20<1:52:43, 26.62it/s]global step 820000, trans_decision ep_re 1707.5163234921954

{"global_step": 820000, "eval_re": [1432.0065147974847, 1265.4172917605483, 
1544.0728741457194, 2371.836007617489, 1920.0520652468158, 1847.3327218255192, 
1318.6984649099902, 1429.5581413612117, 1707.6090178897553, 2238.58013536742], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 83%|████████▎ | 829998/1000000 [12:15:10<1:46:07, 26.70it/s]global step 830000, trans_decision ep_re 1556.369115869085

{"global_step": 830000, "eval_re": [1301.2937499726147, 2066.5557810866835, 
2280.5659197254017, 1215.6469633209076, 1246.133252735488, 1255.9928509801318, 
1357.7758205333444, 1393.105837856254, 2251.2423435123374, 1195.3786389676861], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 84%|████████▍ | 839999/1000000 [12:24:00<1:41:19, 26.32it/s]global step 840000, trans_decision ep_re 1714.3845005029962

{"global_step": 840000, "eval_re": [1853.8585709531249, 1401.0486013753223, 
1610.4655887921222, 1901.8118272998274, 1984.8515824674907, 2423.7550311454115, 
1395.4411740658293, 1300.2279147773668, 1758.4027044479146, 1513.982009705551], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 85%|████████▍ | 849998/1000000 [12:33:00<1:33:17, 26.80it/s]global step 850000, trans_decision ep_re 1480.0878869639803

{"global_step": 850000, "eval_re": [1575.1773255528688, 1666.7536942477375, 
1406.6375434503072, 1359.8948796049613, 1286.629529042743, 1288.1471679467065, 
1215.7397821097552, 1223.9758835486502, 1672.7707747700965, 2105.1522893659753],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 86%|████████▌ | 859998/1000000 [12:41:50<1:27:35, 26.64it/s]global step 860000, trans_decision ep_re 1386.898999947838

{"global_step": 860000, "eval_re": [1621.3367340984664, 1429.036099826879, 
1354.766436720572, 1594.0230850144515, 1155.7177765884633, 1376.6805354323285, 
1162.292844331921, 1267.4546462565404, 1607.6567273360567, 1300.025113872704], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 87%|████████▋ | 869998/1000000 [12:50:40<1:21:20, 26.64it/s]global step 870000, trans_decision ep_re 1492.3164567284143

{"global_step": 870000, "eval_re": [1166.4506516102153, 1399.2082791668065, 
1147.759091454833, 2074.175262769596, 1290.0608466867861, 1258.8919714209305, 
1944.931055371218, 1199.790087001206, 1700.9879195912429, 1740.9094022113086], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 88%|████████▊ | 879997/1000000 [12:59:40<1:15:42, 26.42it/s]global step 880000, trans_decision ep_re 1364.9085070713804

{"global_step": 880000, "eval_re": [1568.049427264532, 1191.6683179031509, 
1499.2661834952423, 1405.980760976257, 1197.0260345774832, 1168.1776808039997, 
1156.9794118047007, 1368.8072512702447, 1487.0987839499555, 1606.0312186682372],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 89%|████████▉ | 889998/1000000 [13:08:30<1:09:19, 26.45it/s]global step 890000, trans_decision ep_re 1420.8355763760696

{"global_step": 890000, "eval_re": [1468.7718210731089, 1182.1113776795214, 
1358.686083946283, 1194.9705603874022, 1437.196776661896, 1595.9921557567575, 
1564.5309145191493, 1421.5659415273617, 1535.0730301212307, 1449.4571020879862],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 90%|████████▉ | 899999/1000000 [13:17:30<1:04:08, 25.98it/s]global step 900000, trans_decision ep_re 1666.5754995420752

{"global_step": 900000, "eval_re": [1170.6097153921223, 1279.742394340155, 
1581.4599902600708, 2194.232000431005, 1918.461778956727, 1392.6561851893728, 
2753.062809484274, 1731.4906051014316, 1103.8164542290888, 1540.2230620365065], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 91%|█████████ | 909997/1000000 [13:26:30<57:38, 26.02it/s]global step 910000, trans_decision ep_re 1297.184766868136

{"global_step": 910000, "eval_re": [1299.934589056856, 1229.513687293302, 
1132.0077810300313, 1135.5876865007224, 1740.1352289046463, 1213.6557719794102, 
1185.698304238827, 1215.8773985898517, 1597.5325440853533, 1221.9046770023588], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 92%|█████████▏| 919997/1000000 [13:35:30<51:26, 25.92it/s]global step 920000, trans_decision ep_re 1576.5400223830275

{"global_step": 920000, "eval_re": [1585.6909889493886, 1255.9261570354709, 
1461.0874441538874, 1767.5710683947395, 1887.6959401590598, 2121.5474766364136, 
1613.2597771661326, 1143.062509774827, 1385.4792368386115, 1544.0796247217436], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 93%|█████████▎| 929999/1000000 [13:44:30<45:15, 25.78it/s]global step 930000, trans_decision ep_re 1522.169571456603

{"global_step": 930000, "eval_re": [1094.8086620748902, 1273.86952243114, 
2162.709145075597, 1307.789361862512, 1335.7859958914728, 2568.7098176080294, 
1690.7016815692905, 1453.0647717065306, 1128.6317599016995, 1205.6249964448675],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 94%|█████████▍| 939999/1000000 [13:53:30<38:16, 26.13it/s]global step 940000, trans_decision ep_re 1528.6634291024056

{"global_step": 940000, "eval_re": [1221.8193832979725, 1308.1459459493158, 
1944.1728275843857, 1270.0124344934027, 1574.6514867587978, 1712.8250024438407, 
2271.1413999727947, 1363.8272026042655, 1368.209843807102, 1251.8287641121779], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 95%|█████████▍| 949999/1000000 [14:02:20<31:50, 26.18it/s]global step 950000, trans_decision ep_re 1385.9693117115946

{"global_step": 950000, "eval_re": [1353.4102549996155, 1325.7842666510755, 
1352.066796368335, 1293.6146448731029, 1231.4377296093644, 1252.2762011092977, 
1392.7427443858048, 1211.0277810607488, 1981.8263697122736, 1465.5063283463294],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 96%|█████████▌| 959999/1000000 [14:11:20<25:25, 26.23it/s]global step 960000, trans_decision ep_re 1572.221343308182

{"global_step": 960000, "eval_re": [1580.3451061923524, 1127.100550370855, 
1592.4278668682316, 1333.6227131719054, 1726.4815876651119, 1142.6376332977916, 
2260.672221012652, 1250.7813543534378, 1835.8304827093361, 1872.313917440145], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 97%|█████████▋| 969998/1000000 [14:20:20<18:48, 26.58it/s]global step 970000, trans_decision ep_re 1478.0988892573816

{"global_step": 970000, "eval_re": [1484.2968962625673, 1104.5735791643413, 
1270.555730409194, 1717.073127140192, 1315.96770385053, 1515.388480826743, 
1580.9768817267927, 1497.0213193559173, 1559.7758208889345, 1735.359352948603], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 98%|█████████▊| 979999/1000000 [14:29:10<12:35, 26.48it/s]global step 980000, trans_decision ep_re 1373.5779941180062

{"global_step": 980000, "eval_re": [1275.0317843607052, 1446.5505409167697, 
1812.0557068289165, 1566.0591593078555, 1255.4849898690468, 1313.4972002237605, 
1318.1922234679623, 1265.4684664717274, 1201.8993740249998, 1281.5404957083192],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 99%|█████████▉| 989999/1000000 [14:38:10<06:30, 25.60it/s]global step 990000, trans_decision ep_re 1398.910330969243

{"global_step": 990000, "eval_re": [1444.9263082831203, 1201.5223433267897, 
2046.879980167269, 1475.1105619066893, 1223.5318801506733, 1322.4209019737232, 
1223.4299016388895, 1407.9369505391946, 1066.538114609821, 1576.8063670962606], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|█████████▉| 999998/1000000 [14:47:10<00:00, 26.40it/s]global step 1000000, trans_decision ep_re 1385.144114364319

{"global_step": 1000000, "eval_re": [1459.9818140664156, 1504.7861620254532, 
1438.0483638438745, 1854.6488506107912, 1624.748123554048, 1284.1876360848994, 
1486.3629236202773, 1142.909923939836, 1225.0963025678695, 830.6710433297226], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|██████████| 1000000/1000000 [14:47:37<00:00, 18.78it/s]
