
{
    'exp_name': 'VDPO',
    'env': 'HalfCheetah-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 24,
    'delayspec': 'markov(ord(15,1), ord(3,5,3,shift=22), [[124, 1], [1, 19]])',
    'noise': 0.15
}
✓ setup
Created Delay Process: Markovian(Categorical(0.938,0.0625), 
Categorical(0.273,0.455,0.273,shift=22), [[0.992, 0.008], [0.05, 0.95]])
  1%|          | 9999/1000000 [04:40<10:50:49, 25.35it/s]global step 10000, trans_decision ep_re -180.26967387325502

{"global_step": 10000, "eval_re": [-185.89745274909305, -210.23777840486792, 
-197.6539237414552, -179.80919256478285, -178.67593251866356, 
-161.1586174112444, -176.96150633299928, -171.4732571521006, 
-176.74965243536147, -164.07942542198188], "eval_len": [1000, 1000, 1000, 1000, 
1000, 1000, 1000, 1000, 1000, 1000]}

  2%|▏         | 19999/1000000 [13:50<10:46:22, 25.27it/s]global step 20000, trans_decision ep_re 515.0191899208023

{"global_step": 20000, "eval_re": [600.7005705922242, 524.9091279816203, 
490.77835507495985, 490.70237990524714, 652.3135098966476, 458.78062990386684, 
608.4047820394767, 402.7297167768414, 418.89318199575297, 501.9796450413857], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  3%|▎         | 29997/1000000 [22:50<10:34:21, 25.48it/s]global step 30000, trans_decision ep_re 859.1325333735616

{"global_step": 30000, "eval_re": [1035.7233980008584, 720.3156739393705, 
882.4720611145643, 896.5721571993745, 987.1652330074337, 988.9996910706222, 
1065.9004949347402, 673.9577354209066, 671.245126121485, 668.9737629262612], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  4%|▍         | 39999/1000000 [32:00<10:27:29, 25.50it/s]global step 40000, trans_decision ep_re 1009.7223424094915

{"global_step": 40000, "eval_re": [859.2282953020789, 886.0044426862726, 
955.1912594275757, 1093.9286641296596, 1175.6612382861445, 1294.128441308546, 
804.041454925793, 1066.409565035993, 898.5254589971569, 1064.1046039956939], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  5%|▍         | 49997/1000000 [41:10<10:29:40, 25.15it/s]global step 50000, trans_decision ep_re 879.1687591267142

{"global_step": 50000, "eval_re": [860.4860791764276, 867.1351830053725, 
808.0006478776228, 744.3782665399402, 1017.1422481898154, 853.1002774633668, 
856.9140485224822, 1051.3719574025934, 769.7249200818475, 963.4339630076735], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  6%|▌         | 59999/1000000 [50:20<10:11:00, 25.64it/s]global step 60000, trans_decision ep_re 976.6629911363777

{"global_step": 60000, "eval_re": [770.9026268978467, 779.0267924497338, 
979.8078149261765, 1333.8619458965313, 939.8417244960929, 887.6915588718209, 
923.2436051328541, 913.2209558221383, 879.5662819178741, 1359.4666049527093], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  7%|▋         | 69998/1000000 [59:30<10:08:53, 25.46it/s]global step 70000, trans_decision ep_re 961.2230036544239

{"global_step": 70000, "eval_re": [1057.6085939416494, 928.0352761143705, 
1029.2042044194022, 843.6852500597295, 1019.5153451301632, 914.2133274930163, 
986.8210422772199, 1118.9022313820174, 846.176194738412, 868.0685709882592], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  8%|▊         | 79998/1000000 [1:08:40<9:49:02, 26.03it/s]global step 80000, trans_decision ep_re 1250.5425910913368

{"global_step": 80000, "eval_re": [1024.8655410360223, 1603.4728321715581, 
1234.1514029783887, 1545.3787310964021, 994.6985730062194, 1136.335208768168, 
1229.103185689209, 1411.2498032549854, 855.1755341450652, 1470.9950987673494], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  9%|▉         | 89999/1000000 [1:17:50<9:50:19, 25.69it/s]global step 90000, trans_decision ep_re 1034.442112424322

{"global_step": 90000, "eval_re": [1156.1086051744983, 925.2544671319104, 
1052.8039816689752, 1063.7102212050313, 1018.8685688282177, 957.5935416619915, 
1158.9363642073604, 970.7711377276855, 1035.8612205175993, 1004.5130161199504], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 10%|▉         | 99999/1000000 [1:27:10<9:45:27, 25.62it/s]global step 100000, trans_decision ep_re 1198.8283283216344

{"global_step": 100000, "eval_re": [1526.723289140101, 1624.2209258808439, 
1031.4003216992985, 1270.0508968645752, 1274.8334317292229, 1335.4454439094548, 
1625.6190280550181, 1119.8886809018406, 941.8616574408941, 238.2396075950943], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 11%|█         | 109998/1000000 [1:36:20<9:31:20, 25.96it/s]global step 110000, trans_decision ep_re 1183.0191379196394

{"global_step": 110000, "eval_re": [915.8887344280457, 1024.5877172475625, 
1886.2872308423919, 1026.3119417705068, 1477.0561329175123, 1026.599445596123, 
1240.4559956823082, 895.8703247082454, 1310.3449625077287, 1026.78889349597], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 12%|█▏        | 119997/1000000 [1:45:30<9:47:03, 24.98it/s]global step 120000, trans_decision ep_re 1053.9294035676608

{"global_step": 120000, "eval_re": [1289.2762557461156, 789.5726528743005, 
1065.3551001539545, 1007.9692337987036, 1048.634110226957, 1008.484556615475, 
990.9718166644789, 1090.0874191966495, 979.2099121676792, 1269.7329782322931], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 13%|█▎        | 129999/1000000 [1:54:40<9:28:20, 25.51it/s]global step 130000, trans_decision ep_re 1103.6447989146457

{"global_step": 130000, "eval_re": [1008.8458845113587, 977.7854148960832, 
1256.5431027447378, 1217.692783064676, 962.6546324602397, 984.6744499874343, 
1339.116845820813, 1208.9587240880464, 946.4096686490947, 1133.7664829239736], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 14%|█▍        | 139997/1000000 [2:03:50<9:19:02, 25.64it/s]global step 140000, trans_decision ep_re 1011.0438430744898

{"global_step": 140000, "eval_re": [1101.9954130600108, 884.6464533840127, 
917.6879764119261, 863.0558221502315, 835.4436892632216, 1316.1865134291647, 
1130.914925169281, 1146.6486987265685, 948.1190275163467, 965.7399116341356], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 15%|█▍        | 149999/1000000 [2:13:00<9:11:58, 25.67it/s]global step 150000, trans_decision ep_re 1241.316578733566

{"global_step": 150000, "eval_re": [1065.040573750844, 1307.3459306955897, 
1037.248518848848, 1289.090883500884, 1806.891881715629, 1080.9896490295273, 
1833.7064162200056, 1072.2135321267442, 973.6807056342111, 946.9576958133779], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 16%|█▌        | 159998/1000000 [2:22:00<8:56:44, 26.08it/s]global step 160000, trans_decision ep_re 1121.4294171017134

{"global_step": 160000, "eval_re": [992.4161037728614, 1230.701935581716, 
1128.0421001832008, 1321.160184543224, 1044.9326926327815, 1063.9765046548885, 
1170.1741210948412, 968.6002738336543, 1146.3742606622855, 1147.9159940576835], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 17%|█▋        | 169998/1000000 [2:31:10<8:51:51, 26.01it/s]global step 170000, trans_decision ep_re 1275.257526604797

{"global_step": 170000, "eval_re": [1499.3367996243608, 1249.115980287191, 
1612.7439965966869, 948.2031921326943, 1178.39999350219, 1096.3928267489132, 
1037.0633897729426, 1143.0501843752127, 2007.3738136777438, 980.8950893300358], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 18%|█▊        | 179997/1000000 [2:40:10<8:49:41, 25.80it/s]global step 180000, trans_decision ep_re 1374.9439982851804

{"global_step": 180000, "eval_re": [1170.8689424024901, 1511.2011381447055, 
1346.5201855952798, 1573.7123412611809, 1494.262818811041, 1262.127111605277, 
1463.7561005221248, 1035.5731966413828, 1637.1135754898035, 1254.304572378517], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 19%|█▉        | 189997/1000000 [2:49:20<8:40:46, 25.92it/s]global step 190000, trans_decision ep_re 1307.7458416114507

{"global_step": 190000, "eval_re": [1748.4119535730479, 1267.688209222736, 
1305.1475034278844, 1163.591345904699, 1352.2947736207082, 1252.6739995948283, 
948.369920347582, 1111.1987681270123, 1131.3085237187581, 1796.7734185772495], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 20%|█▉        | 199999/1000000 [2:58:20<8:35:40, 25.86it/s]global step 200000, trans_decision ep_re 1664.1123909665628

{"global_step": 200000, "eval_re": [1828.8269748445873, 1573.19736086909, 
1807.13256975564, 1660.2899072945218, 1787.2931459124234, 1703.600622880727, 
1757.0033814386445, 1525.1327594763873, 1664.8501855128904, 1333.7970016807158],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 21%|██        | 209999/1000000 [3:07:30<8:30:18, 25.80it/s]global step 210000, trans_decision ep_re 1233.291456636946

{"global_step": 210000, "eval_re": [1253.5991228911948, 1394.3206466514837, 
1470.3951089894847, 957.5099396060751, 1577.3630887973989, 1279.689882105735, 
1018.877494858493, 1298.1413331272008, 1047.4431998526252, 1035.5747494897678], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 22%|██▏       | 219999/1000000 [3:16:30<8:23:37, 25.81it/s]global step 220000, trans_decision ep_re 1159.1406222573826

{"global_step": 220000, "eval_re": [2096.868290662506, 1084.8324442879004, 
864.1141982027389, 1028.649023509805, 992.6151115238431, 1337.0518481464017, 
950.1203734112149, 960.5970749141853, 1206.136337408034, 1070.4215205071955], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 23%|██▎       | 229998/1000000 [3:25:40<8:08:09, 26.29it/s]global step 230000, trans_decision ep_re 1367.5457446522405

{"global_step": 230000, "eval_re": [1496.18258710536, 1430.0723264173323, 
1325.7077078218279, 1260.8124292913203, 1121.8737246413205, 1400.1159136367507, 
1250.0559132456958, 1485.1167065224472, 1353.0153825849961, 1552.5047552553551],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 24%|██▍       | 239999/1000000 [3:34:40<8:08:18, 25.94it/s]global step 240000, trans_decision ep_re 1168.952826969199

{"global_step": 240000, "eval_re": [1115.51517638487, 1126.6024855976762, 
1257.2039576504685, 987.4590632925377, 1020.2320986176021, 1102.7529482046234, 
1371.3453004502207, 1029.6365507799019, 1274.4787556215335, 1404.3019330925526],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 25%|██▍       | 249998/1000000 [3:43:40<7:57:11, 26.19it/s]global step 250000, trans_decision ep_re 1000.9912060831086

{"global_step": 250000, "eval_re": [962.7033529714575, 998.4984766113793, 
453.7049311745482, 906.7608983055623, 983.4235792193508, 1261.985344818457, 
1070.95988798857, 968.9671486127801, 1401.5528846784923, 1001.355556450488], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 26%|██▌       | 259999/1000000 [3:52:40<7:48:58, 26.30it/s]global step 260000, trans_decision ep_re 1039.0771728539107

{"global_step": 260000, "eval_re": [966.414538198152, 995.6130392695056, 
1237.9795493948604, 1046.6471247976192, 890.7215380382917, 1213.4424614254658, 
973.2262837749829, 1094.8375156678808, 910.6754734439482, 1061.2142045284013], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 27%|██▋       | 269997/1000000 [4:01:40<7:48:03, 25.99it/s]global step 270000, trans_decision ep_re 1355.7808100080936

{"global_step": 270000, "eval_re": [864.4311812321042, 1084.6235817459892, 
1436.189269286135, 1189.1339327371572, 1857.015388553654, 1262.3336456309992, 
945.1950564359546, 2078.4958380495727, 1727.1800034347077, 1113.2102029746632], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 28%|██▊       | 279999/1000000 [4:10:40<7:39:04, 26.14it/s]global step 280000, trans_decision ep_re 1393.090923838185

{"global_step": 280000, "eval_re": [1574.5165852926948, 1423.2023981665247, 
1400.6069929603418, 1750.3439205379777, 1135.7122433847842, 953.149917837189, 
1456.6627730309015, 1581.4717591573096, 1195.2436295493717, 1459.999018464756], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 29%|██▉       | 289998/1000000 [4:19:40<7:26:36, 26.50it/s]global step 290000, trans_decision ep_re 1346.7188352468586

{"global_step": 290000, "eval_re": [965.228546483757, 1262.1571395193102, 
1034.922448234378, 1189.1771340359048, 926.2211802822231, 1258.4627479928015, 
1282.3365437926884, 2079.900190817785, 2455.6999396763704, 1013.0824816333635], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 30%|██▉       | 299999/1000000 [4:28:40<7:28:34, 26.01it/s]global step 300000, trans_decision ep_re 1244.9706418042101

{"global_step": 300000, "eval_re": [1371.1440594025198, 947.732805169663, 
1086.3000052153077, 1812.353946148272, 887.790564576799, 1132.766006204216, 
1339.6372576268977, 1189.8481327012191, 1676.6806821223674, 1005.4529588748395],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 31%|███       | 309999/1000000 [4:37:40<7:18:00, 26.26it/s]global step 310000, trans_decision ep_re 1045.6032122337037

{"global_step": 310000, "eval_re": [1125.221010358942, 999.5897648686802, 
1069.9330731061814, 926.0926897057765, 903.4423857355614, 1142.4087104591576, 
1038.6153879056135, 1092.0227323422735, 1003.6169923051004, 1155.0893755497518],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 32%|███▏      | 319999/1000000 [4:46:40<7:17:35, 25.90it/s]global step 320000, trans_decision ep_re 1377.0490639514483

{"global_step": 320000, "eval_re": [1463.9255215268536, 916.8903980655997, 
1314.9937016207157, 1596.7735986669986, 1401.6840659599918, 1553.8531827734853, 
1256.8459467660591, 1739.0448364444892, 1148.2345967149743, 1378.2447909753155],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 33%|███▎      | 329999/1000000 [4:55:40<7:06:15, 26.20it/s]global step 330000, trans_decision ep_re 1227.526258924596

{"global_step": 330000, "eval_re": [1178.2874534406064, 1255.2162650110986, 
2018.574787092597, 1765.172280872412, 1035.9197527180575, 1036.4863820140688, 
934.8009032035396, 1275.4613186973158, 873.2080088328053, 902.1354373634588], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 34%|███▍      | 339998/1000000 [5:04:50<6:56:30, 26.41it/s]global step 340000, trans_decision ep_re 1567.4460163237422

{"global_step": 340000, "eval_re": [1536.8827974705268, 1222.2149670759682, 
1843.3569773371785, 1010.8343938081841, 1447.5078544200703, 2239.700489719463, 
1399.8095733778835, 1781.2215110778402, 1436.6972859191505, 1756.2343130311579],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 35%|███▍      | 349997/1000000 [5:13:50<6:55:16, 26.09it/s]global step 350000, trans_decision ep_re 1437.581341584042

{"global_step": 350000, "eval_re": [1550.9604669724135, 1019.7792493521608, 
2040.1250060352793, 1178.7761194801467, 1356.4106486645906, 1019.0021814907648, 
2025.9418459581766, 1001.5803172150842, 1475.896415887238, 1707.341164784565], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 36%|███▌      | 359999/1000000 [5:22:40<6:46:14, 26.26it/s]global step 360000, trans_decision ep_re 1413.2820684826709

{"global_step": 360000, "eval_re": [985.5145825895864, 1414.6247337390955, 
1206.401634375922, 1521.1995519788495, 1458.2915522872042, 1373.535196936664, 
1419.0830711164544, 1565.2837160064344, 1057.427332418887, 2131.459313377612], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 37%|███▋      | 369998/1000000 [5:31:40<6:33:10, 26.71it/s]global step 370000, trans_decision ep_re 1164.1946939514382

{"global_step": 370000, "eval_re": [980.054261647901, 1398.7047784633269, 
1423.8398876151277, 1055.2161457596744, 1417.8097767908632, 1006.4938040208098, 
861.6929003340729, 997.1780669379667, 1045.7603252378988, 1455.1969927067396], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 38%|███▊      | 379999/1000000 [5:40:30<6:34:20, 26.20it/s]global step 380000, trans_decision ep_re 1122.4808283162972

{"global_step": 380000, "eval_re": [1413.2656666454238, 297.1817623873557, 
1394.993039068362, 1108.9700573363875, 1030.7312040622369, 1578.8270473295986, 
988.0160508739426, 1007.7479395501599, 1557.944073124656, 847.1314427848495], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 39%|███▉      | 389998/1000000 [5:49:30<6:20:28, 26.72it/s]global step 390000, trans_decision ep_re 1072.3069186308887

{"global_step": 390000, "eval_re": [914.8155790201216, 1292.9976495127412, 
935.6866811979788, 865.0921859492705, 951.6216566484869, 1524.4961612078673, 
1409.271662318526, 972.5473570984989, 857.8276500789323, 998.712603276463], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 40%|███▉      | 399999/1000000 [5:58:30<6:20:36, 26.27it/s]global step 400000, trans_decision ep_re 1280.8311937477256

{"global_step": 400000, "eval_re": [1102.786055370535, 1263.1637612098486, 
1422.5173416635016, 1271.7706693649063, 1038.1015080203895, 1453.4810850593071, 
1587.6386796852241, 1701.0700970080816, 1038.7924108580453, 928.990329237419], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 41%|████      | 409999/1000000 [6:07:30<6:17:11, 26.07it/s]global step 410000, trans_decision ep_re 1186.0195185474172

{"global_step": 410000, "eval_re": [1139.2792373359794, 1008.7736347522982, 
962.1439745510788, 1175.2743312811895, 1253.1983768955438, 1737.400915307739, 
994.770475722874, 1137.3235290698108, 1195.6328319656886, 1256.3978785919699], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 42%|████▏     | 419998/1000000 [6:16:20<6:05:14, 26.47it/s]global step 420000, trans_decision ep_re 1339.5701881675936

{"global_step": 420000, "eval_re": [1751.944015394324, 1982.8658592404042, 
1161.7643249014843, 1122.9222270975029, 949.1671192184704, 1478.659597662831, 
1315.8541846451737, 972.7938808754964, 1729.2368653065714, 930.4938073336762], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 43%|████▎     | 429998/1000000 [6:25:20<5:57:34, 26.57it/s]global step 430000, trans_decision ep_re 1194.0793841127252

{"global_step": 430000, "eval_re": [1620.538615154424, 1009.4676606475064, 
825.0726569716052, 1123.451660700873, 1061.1951138828679, 952.5315872443407, 
951.5285300760011, 1865.1630451850544, 1180.5007616952487, 1351.3442095693297], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 44%|████▍     | 439999/1000000 [6:34:10<5:55:58, 26.22it/s]global step 440000, trans_decision ep_re 1139.6021862437822

{"global_step": 440000, "eval_re": [1344.9722711439786, 1145.189422430791, 
2001.7758212575557, 1056.6743864982652, 1005.1033030094385, 970.7769423456797, 
1005.3650995749974, 971.6029048485867, 926.938721329923, 967.6229899986038], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 45%|████▍     | 449998/1000000 [6:43:10<5:47:08, 26.41it/s]global step 450000, trans_decision ep_re 1324.6157090410775

{"global_step": 450000, "eval_re": [1174.199787669076, 1439.2315476794338, 
1529.3682829206282, 1073.6872330471529, 1427.4029690337477, 1334.7640686110528, 
957.4614963431928, 1348.4224033612097, 870.2245302146152, 2091.394771530668], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 46%|████▌     | 459998/1000000 [6:52:10<5:42:19, 26.29it/s]global step 460000, trans_decision ep_re 1170.9842223590938

{"global_step": 460000, "eval_re": [955.6461292848792, 938.2889543894809, 
1353.9748075338532, 1385.741308097483, 1065.851872077163, 1300.039242519432, 
992.3033706873842, 990.6136405088865, 1445.209542985715, 1282.1733555066617], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 47%|████▋     | 469997/1000000 [7:01:10<5:38:19, 26.11it/s]global step 470000, trans_decision ep_re 1256.3439306883624

{"global_step": 470000, "eval_re": [1915.4301819021991, 1158.8188963596028, 
995.6586971578389, 1145.4132329866256, 1364.4297748998472, 938.6273268126382, 
1097.8122923733386, 1766.8242708049515, 1102.2369760299691, 1078.187657556613], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 48%|████▊     | 479999/1000000 [7:10:00<5:30:39, 26.21it/s]global step 480000, trans_decision ep_re 1307.8656512569194

{"global_step": 480000, "eval_re": [1325.4468203199858, 993.2612294127144, 
948.6516095062339, 1020.3477456788279, 1199.2726833456043, 1043.8255450301767, 
1046.769848474351, 1822.4562489712278, 2232.590246217539, 1446.0345356125351], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 49%|████▉     | 489998/1000000 [7:19:00<5:20:31, 26.52it/s]global step 490000, trans_decision ep_re 1181.723853496766

{"global_step": 490000, "eval_re": [1097.5899829137104, 1157.096718631803, 
1619.9362868706748, 893.1295680848144, 937.9017781563434, 1195.8187587822715, 
1435.131642915567, 810.2831137778869, 1016.885149034009, 1653.4655358005757], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 50%|████▉     | 499998/1000000 [7:28:00<5:11:55, 26.72it/s]global step 500000, trans_decision ep_re 1025.6887421994816

{"global_step": 500000, "eval_re": [897.52070438173, 1087.048651621777, 
1324.336482456474, 1031.758959578911, 848.047201387016, 1182.874432371238, 
985.0789399940077, 918.8870003886725, 870.7314766987175, 1110.6035731162724], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 51%|█████     | 509998/1000000 [7:37:00<5:12:40, 26.12it/s]global step 510000, trans_decision ep_re 1166.00587339223

{"global_step": 510000, "eval_re": [1350.0292640611242, 1140.917110020904, 
1569.0595721487932, 1110.1270233902596, 1068.383351168786, 936.1908393601595, 
1224.4688197953865, 1077.7505547084763, 1262.4319269707198, 920.7002722976879], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 52%|█████▏    | 519999/1000000 [7:45:50<5:05:39, 26.17it/s]global step 520000, trans_decision ep_re 1275.9281370659087

{"global_step": 520000, "eval_re": [1204.429438183211, 1007.3751144665634, 
1118.1635407715853, 1552.3895319868043, 1069.9002488817061, 1834.9979483414024, 
1035.777468761905, 955.063141328908, 1159.2667852108536, 1821.9181527261467], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 53%|█████▎    | 529998/1000000 [7:54:50<4:55:47, 26.48it/s]global step 530000, trans_decision ep_re 1287.128860211843

{"global_step": 530000, "eval_re": [1311.3962791211368, 1078.8665612523034, 
1221.7651892973322, 1133.5654859932083, 1359.3144436594253, 1560.4293943464834, 
1024.263128969058, 1573.927392024963, 1382.180838685568, 1225.5798887689507], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 54%|█████▍    | 539998/1000000 [8:03:50<4:47:51, 26.63it/s]global step 540000, trans_decision ep_re 1069.700341537401

{"global_step": 540000, "eval_re": [917.3121544312054, 1537.4726079562893, 
1119.460830559298, 978.3924074656693, 1042.1479393743411, 943.3015807620337, 
1098.2210544183345, 904.412019597693, 1092.9560595514727, 1063.326761257674], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 55%|█████▍    | 549999/1000000 [8:12:40<4:45:53, 26.23it/s]global step 550000, trans_decision ep_re 1048.908185426215

{"global_step": 550000, "eval_re": [1016.2220298156001, 998.6826387113241, 
1038.7951533039109, 1242.8568368807144, 1131.419673255399, 978.1712103099769, 
1040.8770640209964, 1011.0414813392724, 1031.8618827528094, 999.1538838721441], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 56%|█████▌    | 559998/1000000 [8:21:40<4:35:58, 26.57it/s]global step 560000, trans_decision ep_re 1109.8484927205054

{"global_step": 560000, "eval_re": [1446.3635835597465, 1023.2118574419095, 
993.9350886907888, 1192.4486954498193, 1203.713233772599, 1064.5194231955068, 
903.0789322148075, 1080.5772951628496, 1032.2122306502074, 1158.42458706682], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 57%|█████▋    | 569998/1000000 [8:30:40<4:29:43, 26.57it/s]global step 570000, trans_decision ep_re 1198.0431439373695

{"global_step": 570000, "eval_re": [1176.1286590408158, 1019.7450723436841, 
907.9243885165878, 989.8938289136798, 1499.0540791138585, 1699.271786497339, 
1090.5300518447575, 1459.528633546993, 954.2919546490015, 1184.0629849069799], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 58%|█████▊    | 579998/1000000 [8:39:30<4:22:39, 26.65it/s]global step 580000, trans_decision ep_re 1105.212050301315

{"global_step": 580000, "eval_re": [984.0608719821621, 288.5907859015381, 
1790.2326833574705, 1086.5796455352322, 946.5742496259911, 1138.144975996735, 
950.4254471205809, 1042.4792566384388, 1374.929618907705, 1450.1029679472965], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 59%|█████▉    | 589998/1000000 [8:48:30<4:16:59, 26.59it/s]global step 590000, trans_decision ep_re 1190.3407818902865

{"global_step": 590000, "eval_re": [1794.016614086906, 1240.3699833559708, 
1451.661930300887, 688.7367114371343, 982.4044953110758, 1425.3226660350006, 
1002.0031528794605, 1012.8733776881066, 1146.3624978885523, 1159.6563899197722],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 60%|█████▉    | 599997/1000000 [8:57:30<4:15:54, 26.05it/s]global step 600000, trans_decision ep_re 1122.308709181862

{"global_step": 600000, "eval_re": [1064.0518528778246, 1196.2543236116956, 
985.0727177538984, 996.0626745543957, 1006.5153624871358, 1002.7634428747886, 
1358.5744747439526, 983.2357328113147, 1240.7751720349925, 1389.781338068621], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 61%|██████    | 609999/1000000 [9:06:20<4:08:35, 26.15it/s]global step 610000, trans_decision ep_re 1197.2699085565132

{"global_step": 610000, "eval_re": [998.0487821734266, 1028.1676087101027, 
978.1066298033332, 1506.5984946682092, 1092.1481764656855, 1175.9817078505623, 
1416.045827907754, 976.6402589953135, 1277.9612598522967, 1523.0003391384487], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 62%|██████▏   | 619998/1000000 [9:15:20<4:00:03, 26.38it/s]global step 620000, trans_decision ep_re 1191.2585813291757

{"global_step": 620000, "eval_re": [975.924266468929, 1218.3120518464116, 
1033.7434843461065, 1430.3776829539806, 1042.3000968470762, 926.2690234263408, 
1074.1064527591623, 1581.962290742685, 1362.4288523272605, 1267.161611573805], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 63%|██████▎   | 629997/1000000 [9:24:20<3:55:53, 26.14it/s]global step 630000, trans_decision ep_re 1216.775880676108

{"global_step": 630000, "eval_re": [1075.2185190897803, 1036.976402203559, 
940.9140394846722, 1643.126680086948, 1788.4912164892974, 1255.373842980717, 
1008.4227125934747, 1217.2913804278246, 1157.2035834318328, 1044.7404299729774],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 64%|██████▍   | 639999/1000000 [9:33:10<3:48:50, 26.22it/s]global step 640000, trans_decision ep_re 1334.670125708813

{"global_step": 640000, "eval_re": [1510.8200802748322, 1038.823613911796, 
1099.9082163301034, 1379.6892723413237, 890.1513783386984, 1163.690936182283, 
2038.4413230236953, 1303.9636328497504, 1601.6226570142678, 1319.59014682138], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 65%|██████▍   | 649998/1000000 [9:42:10<3:39:09, 26.62it/s]global step 650000, trans_decision ep_re 1252.0344834288371

{"global_step": 650000, "eval_re": [1235.295827440444, 1052.7557365247758, 
1092.8386879235386, 1513.1272288269056, 1378.1894358244006, 910.5685586361767, 
1913.7199641884715, 1211.2087533849635, 913.8658646065472, 1298.7747769321493], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 66%|██████▌   | 659999/1000000 [9:51:10<3:35:18, 26.32it/s]global step 660000, trans_decision ep_re 1405.0569937803577

{"global_step": 660000, "eval_re": [1319.1181228516768, 1312.0726580271041, 
1939.8335778755911, 1171.9048913716827, 1273.1009985329779, 1061.9293072555142, 
1674.0923424968478, 1190.3452557655817, 1517.1971665299388, 1590.9756170966605],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 67%|██████▋   | 669999/1000000 [10:00:00<3:29:31, 26.25it/s]global step 670000, trans_decision ep_re 1334.095671488009

{"global_step": 670000, "eval_re": [1005.5030705481366, 1408.9006010104024, 
1315.7622104248226, 2032.7867618108714, 64.09553793528363, 1763.6927076788875, 
1465.0986674806654, 1268.6474017017015, 1058.8935660687137, 1957.576190220604], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 68%|██████▊   | 679998/1000000 [10:09:00<3:19:59, 26.67it/s]global step 680000, trans_decision ep_re 1096.2963435278048

{"global_step": 680000, "eval_re": [993.0897400561709, 1220.0060483358643, 
1306.5294473404217, 1066.5795249134326, 1092.6234380247809, 1398.1000346282365, 
1076.594754188675, 841.9516080747634, 1051.6083655760067, 915.8804741396974], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 69%|██████▉   | 689999/1000000 [10:17:50<3:16:35, 26.28it/s]global step 690000, trans_decision ep_re 1092.8871291755027

{"global_step": 690000, "eval_re": [1387.9776047108344, 953.4082762202553, 
1254.103386606769, 1096.0500881517266, 895.0655194789614, 1252.8569826734047, 
1076.0890555759763, 915.6778329554701, 927.7686975508249, 1169.8738478308042], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 70%|██████▉   | 699998/1000000 [10:26:50<3:07:56, 26.60it/s]global step 700000, trans_decision ep_re 1122.9567264289592

{"global_step": 700000, "eval_re": [980.3190623360241, 1071.6061807155284, 
962.9609354305575, 1169.9458761945732, 1417.158180388578, 1202.460759119148, 
1159.2720259490002, 872.5244154091957, 1316.7424382662687, 1076.577390480718], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 71%|███████   | 709999/1000000 [10:35:50<3:04:08, 26.25it/s]global step 710000, trans_decision ep_re 1200.928418922408

{"global_step": 710000, "eval_re": [1513.480164199179, 1406.4044376998152, 
1095.0490918905227, 1503.7268947307982, 1018.328948706529, 923.3605369511511, 
957.2462624790362, 950.7116062201052, 1401.8147312560413, 1239.1615150909013], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 72%|███████▏  | 719998/1000000 [10:44:50<2:56:26, 26.45it/s]global step 720000, trans_decision ep_re 1211.3721922095515

{"global_step": 720000, "eval_re": [1224.825370888952, 912.1094660501373, 
1126.8802284806093, 1398.795732969147, 1287.8260565081816, 1398.1646039630587, 
1187.3824064346543, 899.6884272270088, 1108.893700946873, 1569.1559286268914], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 73%|███████▎  | 729999/1000000 [10:53:40<2:51:21, 26.26it/s]global step 730000, trans_decision ep_re 1178.2475989706097

{"global_step": 730000, "eval_re": [970.7305790119486, 1179.3355106354209, 
1058.2463624026916, 1087.5558313118022, 1296.5650521754387, 1297.6560731182688, 
1371.034495011139, 1368.07535013151, 1046.7658124918198, 1106.510923416058], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 74%|███████▍  | 739998/1000000 [11:02:40<2:42:36, 26.65it/s]global step 740000, trans_decision ep_re 1328.6318773835728

{"global_step": 740000, "eval_re": [1079.2541108928779, 1800.7782005569725, 
1224.2374676408615, 1398.0456642359854, 1338.9963333454887, 1529.8922998360717, 
1099.8886313180326, 1213.0619353375141, 1566.6806020371835, 1035.4835286347388],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 75%|███████▍  | 749999/1000000 [11:11:40<2:38:42, 26.25it/s]global step 750000, trans_decision ep_re 1272.323355640917

{"global_step": 750000, "eval_re": [1138.1162227995435, 1210.6070621386054, 
1613.1883141658507, 914.6188741049959, 999.1229593838976, 1218.9705925189094, 
1245.6153129219165, 1866.8645540689583, 1145.8365840313988, 1370.2930802750934],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 76%|███████▌  | 759998/1000000 [11:20:30<2:30:48, 26.52it/s]global step 760000, trans_decision ep_re 1253.2629660005646

{"global_step": 760000, "eval_re": [1706.7656587363167, 1114.2387848953629, 
1024.0119438270506, 957.0231247807651, 1521.221075461745, 928.916442409899, 
1429.2833899279674, 1030.6602822727853, 1457.1991333937942, 1363.309824299958], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 77%|███████▋  | 769998/1000000 [11:29:30<2:24:29, 26.53it/s]global step 770000, trans_decision ep_re 1390.1763989710084

{"global_step": 770000, "eval_re": [1818.8746026461972, 1410.5067308818818, 
1862.1178865690783, 1451.6745760229765, 1079.1801034828857, 1674.4569563741131, 
1109.433114289185, 899.8957025465762, 1242.8278169525229, 1352.7964999446676], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 78%|███████▊  | 779998/1000000 [11:38:30<2:17:35, 26.65it/s]global step 780000, trans_decision ep_re 1205.8783353701633

{"global_step": 780000, "eval_re": [1128.330527788547, 1505.0031926012648, 
1279.3211043770236, 1005.4608586220819, 1417.2445564434076, 1333.1499702988801, 
1205.5012334900428, 1081.3253945743945, 1198.1451270436319, 905.30138846236], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 79%|███████▉  | 789998/1000000 [11:47:20<2:11:23, 26.64it/s]global step 790000, trans_decision ep_re 1157.0857491490249

{"global_step": 790000, "eval_re": [898.1797179633801, 877.8920000222663, 
1573.377582472973, 928.5596969242484, 1110.3575331444315, 1539.4671206525636, 
905.6650698064827, 1245.2545107869316, 1257.8983365997562, 1234.2059231172173], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 80%|███████▉  | 799998/1000000 [11:56:20<2:05:46, 26.50it/s]global step 800000, trans_decision ep_re 1230.859208138319

{"global_step": 800000, "eval_re": [1076.2827128139725, 1093.491781091891, 
1007.6980503737046, 1272.9350508774903, 1089.8840929090711, 1345.8680041937967, 
1837.2381867600243, 1603.7788822276518, 1000.1738412199087, 981.2414789156782], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 81%|████████  | 809998/1000000 [12:05:20<1:59:32, 26.49it/s]global step 810000, trans_decision ep_re 1086.3667582023577

{"global_step": 810000, "eval_re": [1216.3729222768802, 1044.6061989542163, 
1203.9773121688863, 1147.686885820791, 938.633819370584, 1039.2883823604961, 
1192.5783039219793, 912.7267524477105, 1228.6248941325066, 939.1721105695267], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 82%|████████▏ | 819999/1000000 [12:14:10<1:55:07, 26.06it/s]global step 820000, trans_decision ep_re 1188.6355544817513

{"global_step": 820000, "eval_re": [924.6256852088858, 800.0738878946278, 
1721.608246007495, 1021.4370361865349, 1410.01788373527, 1248.7664140578088, 
869.5300904601057, 1468.1115106657867, 1165.0781371871035, 1257.1066534138959], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 83%|████████▎ | 829998/1000000 [12:23:10<1:47:01, 26.48it/s]global step 830000, trans_decision ep_re 1207.9860034678538

{"global_step": 830000, "eval_re": [1282.9929902085141, 926.3240188679295, 
1214.9466365442725, 906.3476812933384, 968.3998116644972, 1087.785565014103, 
1709.6454355158714, 1757.6166631123162, 1342.664239397439, 883.1369930602568], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 84%|████████▍ | 839998/1000000 [12:32:10<1:41:01, 26.40it/s]global step 840000, trans_decision ep_re 1230.8332749934586

{"global_step": 840000, "eval_re": [1139.8841119224348, 1101.5373249211864, 
1189.3999993525567, 1153.549665544355, 993.5096386074341, 2041.424273731878, 
936.184801577455, 1245.0556866805073, 986.2788680471167, 1521.5083795496628], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 85%|████████▍ | 849998/1000000 [12:41:10<1:34:32, 26.44it/s]global step 850000, trans_decision ep_re 1004.7590684709121

{"global_step": 850000, "eval_re": [850.9679929566133, 1068.686493674449, 
855.7637806601991, 1187.7189833895945, 1045.635468381487, 869.072588277221, 
1051.913491563956, 1071.327929252516, 970.8722042570326, 1075.6317522960503], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 86%|████████▌ | 859997/1000000 [12:50:10<1:29:46, 25.99it/s]global step 860000, trans_decision ep_re 1082.3944565656184

{"global_step": 860000, "eval_re": [981.2870816005035, 1123.3711958370939, 
1165.6227315340439, 958.7346384206003, 980.7655068555704, 1232.1271602756601, 
869.0845417345951, 849.7842372501316, 1599.2617963792716, 1063.905675768714], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 87%|████████▋ | 869998/1000000 [12:59:10<1:22:16, 26.33it/s]global step 870000, trans_decision ep_re 1177.2616409221075

{"global_step": 870000, "eval_re": [1245.484863872064, 1019.0693017946409, 
1068.3234079975884, 935.523700210514, 1583.2970110456783, 996.3891775316963, 
1181.0547000109696, 1020.3359911788209, 1565.379253428256, 1157.759002150845], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 88%|████████▊ | 879997/1000000 [13:08:10<1:16:59, 25.98it/s]global step 880000, trans_decision ep_re 1374.5225914055995

{"global_step": 880000, "eval_re": [1061.6752316087527, 1788.6605997026604, 
867.8342492102594, 812.2390161190408, 1427.0652895249805, 1876.879351859559, 
1230.187259332373, 1757.711512689233, 1583.70779896958, 1339.2656050395576], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 89%|████████▉ | 889999/1000000 [13:17:20<1:11:16, 25.72it/s]global step 890000, trans_decision ep_re 1407.0386859799662

{"global_step": 890000, "eval_re": [1763.3661326921892, 1434.5835938882897, 
1154.2703849958, 1416.924194214239, 1431.860673566696, 1444.9015262651485, 
1635.0739372151659, 1591.2398842112618, 1170.6050373907776, 1027.5614953600937],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 90%|████████▉ | 899998/1000000 [13:26:20<1:04:27, 25.86it/s]global step 900000, trans_decision ep_re 1125.0789670455447

{"global_step": 900000, "eval_re": [1209.25060630199, 949.4757422618889, 
922.33210860937, 1445.946412527459, 1501.2569102007321, 1039.3059065024859, 
1360.4268938272744, 836.4700939490665, 1128.542795969238, 857.7822003059413], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 91%|█████████ | 909997/1000000 [13:35:30<58:31, 25.63it/s]global step 910000, trans_decision ep_re 1205.7047122528095

{"global_step": 910000, "eval_re": [1187.2942034402029, 1121.8222034390558, 
1450.3903140992757, 1459.3706036554886, 958.1229492965479, 1509.208923133255, 
1042.9352858082214, 932.042786771272, 1128.75847749234, 1267.1013753924362], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 92%|█████████▏| 919997/1000000 [13:44:30<51:36, 25.84it/s]global step 920000, trans_decision ep_re 1457.580010746037

{"global_step": 920000, "eval_re": [1819.0953388887106, 1953.3962018006177, 
1687.4239462849973, 1702.665128422395, 882.6062442832865, 1209.4634424874907, 
1318.6428760567187, 1216.5567773657078, 1357.674130356964, 1428.2760215134817], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 93%|█████████▎| 929999/1000000 [13:53:40<45:33, 25.61it/s]global step 930000, trans_decision ep_re 1092.5272994004117

{"global_step": 930000, "eval_re": [987.0215909169807, 926.2130085113965, 
961.2962816362536, 1437.7340382474297, 1286.5511399959512, 1049.2519881843032, 
1015.1068347466673, 1095.451385935055, 1164.552792328473, 1002.0939335016056], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 94%|█████████▍| 939999/1000000 [14:02:40<38:40, 25.86it/s]global step 940000, trans_decision ep_re 1209.0926505406273

{"global_step": 940000, "eval_re": [763.1174944663258, 1127.8733748572465, 
829.718652040263, 1383.1318276798015, 1406.2504724545138, 1632.5000768646682, 
992.636789927227, 1530.9073024554225, 1319.2083548861956, 1105.5821597746094], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 95%|█████████▍| 949999/1000000 [14:11:40<32:14, 25.85it/s]global step 950000, trans_decision ep_re 1404.0339465466745

{"global_step": 950000, "eval_re": [1869.4612824427065, 899.9560031632632, 
925.9575104093633, 1707.1149135753267, 1449.1084503929173, 1369.4554602009268, 
1714.4602080595826, 1635.2086819532587, 978.2231124233655, 1491.3938428460335], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 96%|█████████▌| 959998/1000000 [14:20:50<25:37, 26.01it/s]global step 960000, trans_decision ep_re 1255.4873118854111

{"global_step": 960000, "eval_re": [1270.0608876063895, 1113.3210507279118, 
854.5878087638005, 1590.4026269708957, 1327.3506929620926, 1033.3002247554755, 
1091.8642459978369, 1281.6457289705627, 1459.987946622665, 1532.3519054764795], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 97%|█████████▋| 969999/1000000 [14:30:00<19:18, 25.89it/s]global step 970000, trans_decision ep_re 1203.6095816828151

{"global_step": 970000, "eval_re": [1274.8000537883433, 1128.0240291556843, 
1229.2996055080328, 867.6247903903169, 1387.1131523569125, 862.7286428663524, 
1230.5454898211106, 1637.646251406733, 1426.3820144091226, 991.9317871255441], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 98%|█████████▊| 979997/1000000 [14:39:00<12:50, 25.97it/s]global step 980000, trans_decision ep_re 1168.860128602538

{"global_step": 980000, "eval_re": [1005.058413789419, 1382.9944194044813, 
1106.1874665827822, 943.1611913143845, 1197.6895647366584, 1352.1643834706733, 
935.7672295739387, 937.0389025424321, 1526.670102098274, 1301.8696125123327], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 99%|█████████▉| 989998/1000000 [14:48:00<06:23, 26.06it/s]global step 990000, trans_decision ep_re 1301.31410453243

{"global_step": 990000, "eval_re": [1065.9165987925714, 1905.0742258974356, 
1025.663580855722, 1600.0629108212906, 911.993731665869, 1219.225861800522, 
971.286528365916, 952.9128825265017, 1613.183338713338, 1747.821385885134], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|█████████▉| 999999/1000000 [14:57:10<00:00, 25.94it/s]global step 1000000, trans_decision ep_re 1436.022533170825

{"global_step": 1000000, "eval_re": [1372.132503904241, 1296.8800768806934, 
1799.8057419133315, 1152.5552721974807, 1660.3843351234839, 1314.1164802753515, 
1234.3430739288092, 1349.0510568717878, 1761.6712593251145, 1419.2855312879576],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|██████████| 1000000/1000000 [14:57:37<00:00, 18.57it/s]
