
{
    'exp_name': 'VDPO',
    'env': 'HalfCheetah-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)',
    'noise': 0.15
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9996/1000000 [03:30<8:20:58, 32.94it/s]global step 10000, trans_decision ep_re -217.59145657104395

{"global_step": 10000, "eval_re": [-206.9314739816278, -195.06705671170425, 
-219.93072592497018, -202.93627537855636, -222.82791107505, -242.64968600360473,
-217.35792335230803, -223.95143856151574, -217.3136987291982, 
-226.94837599190436], "eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 
1000, 1000, 1000]}

  2%|▏         | 19997/1000000 [10:40<8:14:49, 33.01it/s]global step 20000, trans_decision ep_re 54.6570032431664

{"global_step": 20000, "eval_re": [-55.28308204615971, 60.613387372202304, 
-39.76532004027931, 31.156388111219616, 98.57451365805812, 226.21870004600677, 
75.20294202858142, -21.382936989359198, 59.06085379224499, 112.17458649914902], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  3%|▎         | 29999/1000000 [17:40<8:10:51, 32.94it/s]global step 30000, trans_decision ep_re 378.24534382704286

{"global_step": 30000, "eval_re": [339.3422056034316, 398.574952024257, 
461.3344323426337, 422.05583105408795, 359.5458644137969, 303.70482581481997, 
380.79225752776847, 379.3692481703996, 344.3766416111604, 393.3571797080732], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  4%|▍         | 39997/1000000 [24:40<8:05:35, 32.95it/s]global step 40000, trans_decision ep_re 677.542702182579

{"global_step": 40000, "eval_re": [685.1700990900827, 708.6189268722079, 
686.7406308630444, 745.1378906585066, 594.884029424893, 742.3613077853341, 
655.0939552665247, 733.2701836511242, 591.6072753309012, 632.5427228831715], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  5%|▍         | 49997/1000000 [31:50<8:02:53, 32.79it/s]global step 50000, trans_decision ep_re 648.4476120806346

{"global_step": 50000, "eval_re": [710.1958223820791, -451.86588926418335, 
745.9960484811206, 807.1474471960582, 884.1454511281844, 775.8177269727222, 
835.5509985033647, 851.2944690046857, 635.6835208127021, 690.5105255896129], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  6%|▌         | 59997/1000000 [38:50<7:55:20, 32.96it/s]global step 60000, trans_decision ep_re 803.9236009303437

{"global_step": 60000, "eval_re": [753.0938814578766, 828.8138267419229, 
776.5803926199274, 843.6219000821116, 804.5209996374513, 708.3567372029239, 
951.440642245048, 768.0054063217823, 811.770103150857, 793.0321198435358], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  7%|▋         | 69999/1000000 [45:50<7:50:32, 32.94it/s]global step 70000, trans_decision ep_re 734.5236864377023

{"global_step": 70000, "eval_re": [852.2196536696591, 817.0185802327534, 
803.9322335338398, 775.6045796426654, 719.5340451209249, 543.2786238299404, 
806.7503763284993, 822.26629786481, 505.8213236049597, 698.8111505489717], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  8%|▊         | 79997/1000000 [53:00<7:46:47, 32.85it/s]global step 80000, trans_decision ep_re 722.3123045062518

{"global_step": 80000, "eval_re": [863.7725398365635, 779.3197033099481, 
783.5554835991579, 767.0595995117804, 519.9401540115182, 815.3429819358477, 
293.5254010923324, 954.210137468743, 776.3416965828039, 670.0553477138242], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

  9%|▉         | 89998/1000000 [1:00:00<7:39:30, 33.01it/s]global step 90000, trans_decision ep_re 828.6804236433054

{"global_step": 90000, "eval_re": [864.0318500758656, 779.2039963215793, 
862.6913469370008, 870.7620291189537, 878.155664262126, 735.2095736291204, 
867.3417029825276, 836.75124196688, 831.6143883119256, 761.0424428270753], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 10%|▉         | 99999/1000000 [1:07:00<7:35:24, 32.94it/s]global step 100000, trans_decision ep_re 863.6128198161889

{"global_step": 100000, "eval_re": [911.2619832385298, 881.6483029013609, 
817.8331235008303, 886.4320494847324, 1014.1977194616732, 743.9745514543572, 
794.3723346147854, 903.5892691648197, 909.3230543122376, 773.4958100285633], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 11%|█         | 109998/1000000 [1:14:10<7:30:28, 32.93it/s]global step 110000, trans_decision ep_re 804.7379647920864

{"global_step": 110000, "eval_re": [915.9610649946885, 818.1552385888472, 
811.6370510478206, 775.8610116616865, 778.7482458902513, 876.1571181529314, 
936.2291045225793, 588.9515476987532, 822.2445401731858, 723.4347251901212], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 12%|█▏        | 119997/1000000 [1:21:10<7:26:32, 32.84it/s]global step 120000, trans_decision ep_re 921.4209129286344

{"global_step": 120000, "eval_re": [907.2838232060521, 923.3806737333651, 
938.279016713969, 834.2561856393356, 850.2199494553519, 1063.316257725845, 
814.6540833476887, 1058.5717925827198, 956.5364323588173, 867.7109145232007], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 13%|█▎        | 129997/1000000 [1:28:10<7:20:52, 32.89it/s]global step 130000, trans_decision ep_re 996.704297538992

{"global_step": 130000, "eval_re": [1063.850998809651, 669.3511098894056, 
1059.5366651087647, 1055.6469477631397, 948.9700432807073, 1238.2470059577272, 
1092.196523410263, 980.1629082022397, 944.9860481422355, 914.0947248257879], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 14%|█▍        | 139997/1000000 [1:35:20<7:14:32, 32.99it/s]global step 140000, trans_decision ep_re 1004.604820197771

{"global_step": 140000, "eval_re": [861.4259771883611, 864.6163690689423, 
932.2429464351676, 943.6613919814205, 938.6550046591746, 1084.7793481626636, 
1019.3712717913958, 1132.0106155183319, 1027.9996445470028, 1241.28563262525], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 15%|█▍        | 149999/1000000 [1:42:20<7:10:39, 32.90it/s]global step 150000, trans_decision ep_re 1118.0315455738626

{"global_step": 150000, "eval_re": [1099.048076349249, 1032.6503914257125, 
1143.9292354635286, 951.0976585201935, 1425.6462273729103, 980.8912354318705, 
1021.2695193682697, 1159.1438986842115, 1129.19493667264, 1237.444276450042], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 16%|█▌        | 159997/1000000 [1:49:20<7:05:19, 32.92it/s]global step 160000, trans_decision ep_re 1492.5370900233986

{"global_step": 160000, "eval_re": [1261.4499984109104, 1877.3311616676667, 
1571.6559322314226, 1264.378686972114, 1199.5771042990716, 1518.3309546675414, 
2120.9274019555764, 1027.3437803338225, 1865.8712567567216, 1218.50462293914], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 17%|█▋        | 169997/1000000 [1:56:30<6:59:29, 32.98it/s]global step 170000, trans_decision ep_re 1386.8193215838783

{"global_step": 170000, "eval_re": [941.5019125560431, 1845.9550697937661, 
1454.036190671861, 1185.8903602368412, 1136.0748766051986, 1772.832791512718, 
1473.3199262612181, 1590.555966369775, 1138.2428675792132, 1329.783254252149], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 18%|█▊        | 179997/1000000 [2:03:30<6:54:41, 32.96it/s]global step 180000, trans_decision ep_re 1444.1795252467705

{"global_step": 180000, "eval_re": [1295.5472618979904, 1176.6991292766495, 
1601.6507310172865, 2163.0969809829035, 2251.7536172621008, 1053.3308840241832, 
1255.710920349086, 1300.962138844526, 1116.1660099815192, 1226.8775788314572], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 19%|█▉        | 189997/1000000 [2:10:30<6:50:00, 32.93it/s]global step 190000, trans_decision ep_re 1279.3637598857003

{"global_step": 190000, "eval_re": [1568.0436531104413, 1042.7148861896792, 
1091.8959980158527, 1388.0917942616181, 1276.497819513288, 2434.1020317560174, 
779.0222452170437, 1053.378420112675, 1097.6676196422295, 1062.223131038158], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 20%|█▉        | 199999/1000000 [2:17:40<6:45:13, 32.90it/s]global step 200000, trans_decision ep_re 1304.7333927001073

{"global_step": 200000, "eval_re": [1286.6629303873635, 1136.5321284619922, 
988.5556888773611, 1672.5162413051119, 1438.1415878046803, 1311.9282463077436, 
1159.0081014618092, 1139.1288625181153, 1333.622415758112, 1581.2377241187821], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 21%|██        | 209997/1000000 [2:24:40<6:40:38, 32.86it/s]global step 210000, trans_decision ep_re 1896.9069401797642

{"global_step": 210000, "eval_re": [2278.358372736709, 2059.453904380339, 
1800.5100494820022, 2098.950752678097, 1250.8372286981817, 2346.007943748567, 
2116.233933118674, 2083.868471278526, 983.7723688969937, 1951.0763767795559], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 22%|██▏       | 219997/1000000 [2:31:40<6:34:36, 32.94it/s]global step 220000, trans_decision ep_re 1288.7565647247234

{"global_step": 220000, "eval_re": [1132.3303412188145, 968.6489525796985, 
1211.2493791398851, 1896.561347059082, 1721.2468283771389, 1573.1386557900096, 
1039.0047767708552, 1228.246908683803, 1057.886901131845, 1059.2515564961002], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 23%|██▎       | 229997/1000000 [2:38:40<6:29:39, 32.93it/s]global step 230000, trans_decision ep_re 1259.782532939508

{"global_step": 230000, "eval_re": [1122.7871174774273, 982.7066278604211, 
1042.3459439759538, 1098.7366120677248, 1574.8919893094348, 1869.9568111307808, 
1070.383180599387, 1266.2397041237855, 965.9963023607587, 1603.7810404894083], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 24%|██▍       | 239998/1000000 [2:45:50<6:24:35, 32.94it/s]global step 240000, trans_decision ep_re 1562.7543971735479

{"global_step": 240000, "eval_re": [1798.873636248273, 1628.6102044028305, 
1167.6861025312944, 1428.2769825879018, 1160.1760020544925, 1571.9150728122613, 
1267.2289736776777, 1922.628913629921, 1724.9029782563964, 1957.2451055344318], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 25%|██▍       | 249997/1000000 [2:52:50<6:20:18, 32.87it/s]global step 250000, trans_decision ep_re 1562.3877133716346

{"global_step": 250000, "eval_re": [1258.4467450434286, 1213.0350540157253, 
1864.1341352082095, 2240.581405018473, 1966.2366920019315, 1054.073703208319, 
1253.0634568662451, 1702.9968411924485, 1852.8943502082884, 1218.4147509532784],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 26%|██▌       | 259999/1000000 [2:59:50<6:10:18, 33.31it/s]global step 260000, trans_decision ep_re 1317.990845335214

{"global_step": 260000, "eval_re": [1626.1043458671088, 1112.5982143827555, 
1009.4916840408265, 943.7397034952397, 1463.9598282554264, 1612.225861134886, 
1789.545270423697, 1365.1717795645566, 1264.2689325281578, 992.8028336594872], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 27%|██▋       | 269999/1000000 [3:06:50<6:02:47, 33.54it/s]global step 270000, trans_decision ep_re 1632.4805157249425

{"global_step": 270000, "eval_re": [1595.6278173313924, 1606.7559364992185, 
1635.7872778070541, 1206.7526008194225, 1558.059393942142, 1699.398842244939, 
1608.86672734909, 1863.638276154773, 1759.962213082838, 1789.9560720185539], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 28%|██▊       | 279999/1000000 [3:13:50<6:02:42, 33.08it/s]global step 280000, trans_decision ep_re 1342.2212281937486

{"global_step": 280000, "eval_re": [2081.7748015789425, 1278.621070066145, 
1476.7027818232302, 1283.80779702115, 1791.966701474967, 1629.0893979052373, 
1494.6771687259593, 943.4276255956689, 253.11168597582213, 1189.0332517703637], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 29%|██▉       | 289999/1000000 [3:20:50<5:53:56, 33.43it/s]global step 290000, trans_decision ep_re 1527.0018964740134

{"global_step": 290000, "eval_re": [1763.3009183033114, 1373.6664928477871, 
1275.4224322770638, 2050.338212965359, 1004.6820177108627, 1652.7769967808904, 
1031.8059596793676, 1852.0317780520372, 1693.7718415379998, 1572.2223145854553],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 30%|██▉       | 299999/1000000 [3:27:50<5:53:08, 33.04it/s]global step 300000, trans_decision ep_re 1396.4788866612155

{"global_step": 300000, "eval_re": [1415.645623564554, 1090.8921799792051, 
1259.6619543923293, 1763.0260113874826, 1443.4609654433268, 1555.183351705511, 
941.3388741470199, 1423.7082724993154, 1343.2027853024458, 1728.6688481909634], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 31%|███       | 309999/1000000 [3:34:50<5:45:37, 33.27it/s]global step 310000, trans_decision ep_re 1420.3553458066117

{"global_step": 310000, "eval_re": [1073.6976612090054, 1271.7854194115405, 
2419.069549502962, 1546.3041959052632, 1000.7732290513919, 1628.1812138202872, 
1264.7285651077032, 1212.6464935209042, 1174.8389185906062, 1611.5282119464512],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 32%|███▏      | 319999/1000000 [3:41:50<5:45:09, 32.84it/s]global step 320000, trans_decision ep_re 1448.891781040066

{"global_step": 320000, "eval_re": [1200.2665603209086, 2162.5757562305544, 
1993.563533195417, 1026.8780357459852, 1378.6578785673944, 936.3128075111713, 
1007.3415618012934, 2029.8657349404793, 1622.1774821370589, 1131.2784599504], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 33%|███▎      | 329999/1000000 [3:48:50<5:38:09, 33.02it/s]global step 330000, trans_decision ep_re 1570.979364528307

{"global_step": 330000, "eval_re": [976.5590182871874, 1302.1661746470447, 
2362.570462202961, 1852.1155383429034, 1055.568007096456, 1172.5669317882687, 
1804.5727981733235, 1491.0448157254582, 1716.859911683931, 1975.7699873355373], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 34%|███▍      | 339999/1000000 [3:55:50<5:34:37, 32.87it/s]global step 340000, trans_decision ep_re 1395.7347645731254

{"global_step": 340000, "eval_re": [1808.1932422898778, 1557.6007617872333, 
1020.1003876729909, 1608.4464107064475, 1701.5124873957886, 1147.5382692289081, 
982.5357076894484, 1169.810020819625, 1542.9865681212605, 1418.623790019673], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 35%|███▍      | 349997/1000000 [4:03:00<5:36:10, 32.22it/s]global step 350000, trans_decision ep_re 1285.1901370377518

{"global_step": 350000, "eval_re": [986.6473210959606, 1262.73459928408, 
1709.5072459791354, 1364.657542490547, 1113.714854137267, 1462.8092477449063, 
1147.3000599285292, 959.1158802496672, 1471.1687127150662, 1374.2459067523614], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 36%|███▌      | 359997/1000000 [4:10:00<5:27:28, 32.57it/s]global step 360000, trans_decision ep_re 1486.2881952866003

{"global_step": 360000, "eval_re": [1413.1355178391284, 1256.3270372221625, 
1110.9444257610114, 1714.66787440397, 1529.509888732211, 1538.538019507904, 
1032.9389468582876, 1832.3052995350363, 1872.558592296159, 1561.9563507101348], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 37%|███▋      | 369997/1000000 [4:17:10<5:23:31, 32.46it/s]global step 370000, trans_decision ep_re 1618.675957992075

{"global_step": 370000, "eval_re": [2167.316392130869, 1215.8760115370396, 
1504.5928554735458, 1928.3132274783873, 2037.094594511989, 1308.6839037380269, 
1201.687819543474, 1389.8153503143344, 1693.4280029391698, 1739.9514222539106], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 38%|███▊      | 379997/1000000 [4:24:10<5:15:17, 32.77it/s]global step 380000, trans_decision ep_re 1491.8721523663237

{"global_step": 380000, "eval_re": [1755.882009289853, 981.4917962226945, 
1582.739584122227, 1154.7450273285203, 1144.2515872337744, 1983.8775599341952, 
1616.8717521927285, 1571.8489083991058, 1479.8277327834483, 1647.1855661566917],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 39%|███▉      | 389997/1000000 [4:31:20<5:10:22, 32.76it/s]global step 390000, trans_decision ep_re 1342.106039976302

{"global_step": 390000, "eval_re": [1444.098689648066, 1544.6834194564658, 
1061.9611231459553, 1043.0215519114372, 1149.705491648078, 1009.4206998208107, 
1268.6202865545222, 1247.5014499629272, 1652.8333053569975, 1999.2143822577623],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 40%|███▉      | 399998/1000000 [4:38:20<5:10:48, 32.17it/s]global step 400000, trans_decision ep_re 1287.5956678168727

{"global_step": 400000, "eval_re": [1250.420744979577, 1614.722044945388, 
1248.1703712098856, 983.9084835810877, 1507.2552885129674, 1219.9404732852304, 
1080.0920064798217, 1224.6208472631495, 1558.1624277515918, 1188.663990160029], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 41%|████      | 409997/1000000 [4:45:20<4:58:54, 32.90it/s]global step 410000, trans_decision ep_re 1321.8844696862564

{"global_step": 410000, "eval_re": [1661.955517570347, 1674.6696469204953, 
1085.4896085881117, 1072.31197886408, 1300.8859763548217, 1524.8374642487906, 
1066.3932950279445, 1028.9195891580014, 1484.6506282956511, 1318.73099183432], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 42%|████▏     | 419997/1000000 [4:52:30<5:00:01, 32.22it/s]global step 420000, trans_decision ep_re 1473.8438138587717

{"global_step": 420000, "eval_re": [1220.5415275073667, 1359.6847167764824, 
1643.8874236594258, 1776.4146035648225, 1154.2839625011306, 1168.5052087888666, 
1841.7860033702354, 1433.6380605129978, 1844.350000028991, 1295.3466318773974], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 43%|████▎     | 429997/1000000 [4:59:40<4:48:01, 32.98it/s]global step 430000, trans_decision ep_re 1411.0481522097534

{"global_step": 430000, "eval_re": [1589.6496951649917, 1261.9897162536972, 
1140.1247835771924, 1538.1898943798126, 934.4943535560669, 1610.5153680939175, 
1254.0261719897176, 1901.9346033030693, 1800.6623046982613, 1078.8946310808071],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 44%|████▍     | 439999/1000000 [5:06:40<4:41:49, 33.12it/s]global step 440000, trans_decision ep_re 1481.0756401206222

{"global_step": 440000, "eval_re": [1588.4232235873665, 1674.4916277399252, 
1294.182063920159, 1511.3054596216848, 1522.9664800538333, 1426.384513961897, 
1499.5442727748746, 1655.1523635994622, 1312.5373243892782, 1325.7690715577435],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 45%|████▍     | 449999/1000000 [5:13:50<4:39:48, 32.76it/s]global step 450000, trans_decision ep_re 1272.9641014958756

{"global_step": 450000, "eval_re": [1556.6065701399425, 1000.8842829795753, 
1558.7590303405636, 1397.9848367036527, 1238.2909487436273, 1313.4733723411532, 
1218.3600777905492, 1030.116651904037, 1282.098581255226, 1133.0666627604323], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 46%|████▌     | 459999/1000000 [5:20:50<4:34:21, 32.80it/s]global step 460000, trans_decision ep_re 1280.9065346065458

{"global_step": 460000, "eval_re": [1185.6119844439427, 1630.633057609641, 
1615.4817775972324, 1074.05135297897, 1144.8641249057807, 910.683414848574, 
1118.8413042139036, 1542.783314865701, 1174.49615680163, 1411.6188578000847], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 47%|████▋     | 469998/1000000 [5:28:00<4:33:16, 32.32it/s]global step 470000, trans_decision ep_re 1315.6283068773798

{"global_step": 470000, "eval_re": [1487.3817058723719, 1435.8527740921024, 
927.131135717987, 1354.0847064916939, 1018.3126187749762, 1665.8039343962516, 
1279.0695069548378, 1397.0155700936068, 1313.152831684313, 1278.4782846956582], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 48%|████▊     | 479997/1000000 [5:35:00<4:24:09, 32.81it/s]global step 480000, trans_decision ep_re 1514.6601545286335

{"global_step": 480000, "eval_re": [1211.5038536326465, 990.120743677384, 
1725.7073457692895, 1412.716901301783, 1737.2221300036522, 1502.495462077098, 
1627.8985001865785, 1717.5118948839681, 1746.15162255071, 1475.2730912032252], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 49%|████▉     | 489997/1000000 [5:42:10<4:18:04, 32.94it/s]global step 490000, trans_decision ep_re 1562.170874450163

{"global_step": 490000, "eval_re": [1160.2135325946633, 1470.238668814204, 
1983.2435750506138, 1186.3981890292664, 1787.5429630680958, 1578.1327072867593, 
1168.398037159721, 1831.380329749633, 1396.5257052651073, 2059.6350364835666], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 50%|████▉     | 499997/1000000 [5:49:10<4:14:38, 32.73it/s]global step 500000, trans_decision ep_re 1334.1212144404947

{"global_step": 500000, "eval_re": [1473.4623667642072, 1330.266427116178, 
1023.5477826715642, 1523.3674286625424, 1096.597920135574, 1005.5620513847472, 
1698.075690232218, 1268.457069207821, 1245.4457201570665, 1676.429688073029], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 51%|█████     | 509997/1000000 [5:56:20<4:10:41, 32.58it/s]global step 510000, trans_decision ep_re 1355.0228914415416

{"global_step": 510000, "eval_re": [1008.2073074199818, 1361.735400305215, 
1238.7547854676336, 1412.6490881349534, 1065.295656427384, 2217.690197280318, 
1229.075441875108, 1832.5161845261339, 1004.3882471208925, 1179.9166058577944], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 52%|█████▏    | 519997/1000000 [6:03:30<4:07:39, 32.30it/s]global step 520000, trans_decision ep_re 1236.6653614089855

{"global_step": 520000, "eval_re": [1196.1094997110629, 1244.235931491317, 
1197.032789480293, 1332.6184051156822, 1054.492272475887, 1191.2896737120436, 
1612.0504551455049, 978.8077918377728, 1460.7843145517352, 1099.232480568556], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 53%|█████▎    | 529997/1000000 [6:10:30<4:01:47, 32.40it/s]global step 530000, trans_decision ep_re 1521.556529504513

{"global_step": 530000, "eval_re": [1457.3031841058973, 1001.6505495582617, 
1813.3204489081163, 1925.6859353980915, 1267.9893020510028, 1810.670012710606, 
1772.500390160552, 1564.023903901882, 1163.3362720830028, 1439.0852961677167], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 54%|█████▍    | 539997/1000000 [6:17:40<3:53:55, 32.77it/s]global step 540000, trans_decision ep_re 1445.4618402513684

{"global_step": 540000, "eval_re": [1113.1345620266186, 1390.1317527121787, 
1779.5657572712814, 1603.1559737769921, 1693.1236467994593, 1458.4621294905037, 
1261.4437795272029, 1926.0514244453336, 971.7657126655616, 1257.7836637985517], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 55%|█████▍    | 549997/1000000 [6:24:40<3:51:47, 32.36it/s]global step 550000, trans_decision ep_re 1220.9392326845732

{"global_step": 550000, "eval_re": [975.9779748986272, 1041.8412437372222, 
1248.3393421118428, 1836.8924180241338, 1050.28270577035, 926.8143179848121, 
1123.5757089942997, 1529.8491330668326, 1430.2035838198972, 1045.615898437716], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 56%|█████▌    | 559997/1000000 [6:31:50<3:49:13, 31.99it/s]global step 560000, trans_decision ep_re 1368.6100478654794

{"global_step": 560000, "eval_re": [1496.15927080368, 1469.9972766547614, 
1874.263916202272, 1393.308155453528, 1075.6912721042056, 1593.597162161184, 
1140.7935185520555, 986.1714325727318, 994.0185132818491, 1662.0999608685252], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 57%|█████▋    | 569997/1000000 [6:39:00<3:37:13, 32.99it/s]global step 570000, trans_decision ep_re 1154.388048129541

{"global_step": 570000, "eval_re": [1027.1789727092184, 1211.9991816957893, 
1242.1010718588702, 1143.279656939746, 1028.1069166075781, 1157.3190096197995, 
1744.210074579797, 1087.903703353807, 938.3595361734544, 963.4223577573492], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 58%|█████▊    | 579997/1000000 [6:46:00<3:35:16, 32.52it/s]global step 580000, trans_decision ep_re 1183.091099235034

{"global_step": 580000, "eval_re": [1029.5353538661307, 1155.2868977001062, 
1186.0070977468217, 1122.2126221391436, 1416.5991948081412, 1094.7491068049903, 
1035.109747967314, 1155.837735047464, 1584.7092278796301, 1050.8640083905968], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 59%|█████▉    | 589997/1000000 [6:53:10<3:28:16, 32.81it/s]global step 590000, trans_decision ep_re 1262.9683234952229

{"global_step": 590000, "eval_re": [923.428526995576, 1006.5314746558272, 
1107.9161182300354, 1420.9058073102153, 1470.4647352858426, 1087.816131020909, 
1576.3606511952898, 966.5637252710715, 1673.6785393593823, 1396.0175256280802], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 60%|█████▉    | 599997/1000000 [7:00:10<3:22:00, 33.00it/s]global step 600000, trans_decision ep_re 1302.9119917033165

{"global_step": 600000, "eval_re": [1720.9086527736683, 1309.5591055410275, 
1291.1695640134276, 1595.2004381705015, 1079.9902500588785, 962.5867195883625, 
1321.0366261496276, 826.9152040763325, 1109.1194441022353, 1812.6339125591062], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 61%|██████    | 609997/1000000 [7:07:20<3:19:05, 32.65it/s]global step 610000, trans_decision ep_re 1180.7587944887687

{"global_step": 610000, "eval_re": [1469.5400140809056, 990.7010918167067, 
945.0033100740577, 1120.6363325817117, 1326.1850157081556, 1037.3079110450856, 
1424.4039345414137, 1301.9162890168209, 1250.3572846476584, 941.5367613751704], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 62%|██████▏   | 619997/1000000 [7:14:30<3:12:56, 32.83it/s]global step 620000, trans_decision ep_re 1200.2415770953116

{"global_step": 620000, "eval_re": [867.9512317862257, 1161.1033729507246, 
1312.0848512882606, 1352.4139829427038, 1014.2952653715773, 1599.3646049252163, 
1062.4780397663178, 1203.8182678264411, 1372.6462734676727, 1056.259880627977], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 63%|██████▎   | 629997/1000000 [7:21:40<3:06:42, 33.03it/s]global step 630000, trans_decision ep_re 1415.3453294026526

{"global_step": 630000, "eval_re": [984.0484551964632, 1302.7004480274088, 
2094.835007344013, 1013.8834227675463, 1601.1278614557837, 1635.7043322819818, 
1047.2073320726452, 1674.1211376350936, 1369.441378764515, 1430.383918481076], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 64%|██████▍   | 639999/1000000 [7:28:40<3:03:57, 32.62it/s]global step 640000, trans_decision ep_re 1324.2950149625556

{"global_step": 640000, "eval_re": [1268.1539068784539, 1617.5906329192962, 
1160.8458854027415, 1778.0250964576837, 1146.656978548371, 919.1495129068699, 
1255.5409552959734, 1438.6837941839024, 1202.2291355478187, 1456.0742514844444],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 65%|██████▍   | 649997/1000000 [7:35:50<2:57:01, 32.95it/s]global step 650000, trans_decision ep_re 1451.038046586919

{"global_step": 650000, "eval_re": [1460.6693511553262, 1456.9188006453953, 
1609.040935378963, 1473.7373744477763, 1837.9257691549187, 1191.688372268604, 
1432.1943449158566, 1584.3217897542233, 944.9306808773817, 1518.9530472707474], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 66%|██████▌   | 659997/1000000 [7:42:50<2:53:05, 32.74it/s]global step 660000, trans_decision ep_re 1437.2027260840434

{"global_step": 660000, "eval_re": [1755.4278729693206, 1039.0944785394104, 
1616.7225389070236, 1753.486486848835, 1362.76736733186, 1702.4414351735759, 
1735.1392082999987, 992.6681957326631, 1320.7062253088793, 1093.573451728868], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 67%|██████▋   | 669997/1000000 [7:50:00<2:47:10, 32.90it/s]global step 670000, trans_decision ep_re 1119.0699244104546

{"global_step": 670000, "eval_re": [979.6589150967523, 923.748616793008, 
1167.147488864943, 994.4128999659705, 1215.374508666103, 1098.6926392933317, 
840.8507578169009, 1783.9860276807947, 1046.7605852663626, 1140.06680466038], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 68%|██████▊   | 679999/1000000 [7:57:00<2:42:04, 32.91it/s]global step 680000, trans_decision ep_re 1185.8155464252548

{"global_step": 680000, "eval_re": [1183.292778201532, 1158.4162673474643, 
1408.9260808336874, 1303.0828430177485, 954.3076077488224, 1108.3940518293039, 
1016.7855601631735, 895.5577795606492, 1246.6345021331292, 1582.7579934170399], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 69%|██████▉   | 689997/1000000 [8:04:00<2:36:57, 32.92it/s]global step 690000, trans_decision ep_re 1214.1519570822798

{"global_step": 690000, "eval_re": [1849.84899854002, 1350.2807351282468, 
1639.459939839581, 981.1854966702011, 1057.6543154264261, 1148.3468145924521, 
942.2359676664103, 953.3462999701982, 1064.6581343200126, 1154.50286866925], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 70%|██████▉   | 699999/1000000 [8:11:00<2:33:06, 32.66it/s]global step 700000, trans_decision ep_re 1467.4912049729326

{"global_step": 700000, "eval_re": [1715.192274514089, 2107.2835408832266, 
1690.859910322852, 1610.386681460214, 1498.9687639294752, 1133.8723743740438, 
1059.0794869848844, 1165.7898433256687, 1034.6993072284997, 1658.7798667063735],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 71%|███████   | 709999/1000000 [8:18:00<2:27:16, 32.82it/s]global step 710000, trans_decision ep_re 1460.0912295290022

{"global_step": 710000, "eval_re": [1321.203565384532, 1765.631778499341, 
1106.3996578923504, 1670.3712800851279, 1489.7668103881845, 1717.849652366233, 
1876.5905460058862, 974.3160446311867, 1650.1297275319926, 1028.653232505187], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 72%|███████▏  | 719999/1000000 [8:25:10<2:21:32, 32.97it/s]global step 720000, trans_decision ep_re 1280.0008270858318

{"global_step": 720000, "eval_re": [1100.100549425154, 1160.4951472590092, 
1075.6140334372053, 1438.7167574712043, 890.803024037079, 1342.0504980915243, 
1325.8194594497822, 1652.3294925109021, 1734.7049833703352, 1079.3743258061222],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 73%|███████▎  | 729999/1000000 [8:32:10<2:16:37, 32.94it/s]global step 730000, trans_decision ep_re 1410.443244552384

{"global_step": 730000, "eval_re": [1378.929615531547, 1774.501413277475, 
1061.2110606938747, 1563.5636961220291, 1088.4951586288641, 1192.5619016209039, 
1261.8743513855834, 2042.3334963579257, 1579.6510950633779, 1161.3106568422563],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 74%|███████▍  | 739999/1000000 [8:39:10<2:11:26, 32.97it/s]global step 740000, trans_decision ep_re 1425.323227661249

{"global_step": 740000, "eval_re": [1474.046340282131, 1599.6304377940246, 
932.6325391464607, 1428.714358418059, 1594.0476183214885, 1135.0829029715226, 
1857.7091219359222, 981.4421502064471, 1547.268431101384, 1702.6583764350487], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 75%|███████▍  | 749999/1000000 [8:46:10<2:06:56, 32.82it/s]global step 750000, trans_decision ep_re 931.5379895272108

{"global_step": 750000, "eval_re": [969.7625008228052, 1211.7191292382186, 
1109.3957550150494, -243.5846932604024, 1109.1148698053062, 981.392872245186, 
1382.4827724640534, 1083.2859193784657, 468.9812454655342, 1242.8295240978919], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 76%|███████▌  | 759997/1000000 [8:53:10<2:01:29, 32.92it/s]global step 760000, trans_decision ep_re 1344.6737654259414

{"global_step": 760000, "eval_re": [1102.888553613454, 1135.4820262852934, 
1075.4989816218729, 1581.4007527848353, 1438.2193766171583, 1266.6101911292874, 
1614.1708948674614, 1410.5031342100744, 1577.2615332054343, 1244.7022099245419],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 77%|███████▋  | 769999/1000000 [9:00:20<1:55:53, 33.08it/s]global step 770000, trans_decision ep_re 1289.3616735604942

{"global_step": 770000, "eval_re": [1289.2754288907836, 1050.926962880584, 
993.1821698632945, 1478.1928797628475, 1123.6100699369463, 1310.6882938969775, 
1372.3949939426732, 1044.4271076538023, 1693.1615525366053, 1537.7572762404295],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 78%|███████▊  | 779999/1000000 [9:07:20<1:51:02, 33.02it/s]global step 780000, trans_decision ep_re 1522.843618149635

{"global_step": 780000, "eval_re": [1209.191644641672, 1882.5857252273506, 
1548.1269433137277, 1125.1685135868902, 1577.3662086197166, 1618.7816814576388, 
1273.8238200735739, 1847.167377074209, 1464.424779267479, 1681.7994882340897], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 79%|███████▉  | 789999/1000000 [9:14:20<1:46:29, 32.87it/s]global step 790000, trans_decision ep_re 1264.7104376380594

{"global_step": 790000, "eval_re": [1028.2157866791708, 1684.8231285704214, 
1017.405993849707, 1495.7987536693927, 1063.3243780733399, 943.7374929161663, 
1847.8511032011677, 910.571804286161, 1693.0107134068744, 962.3652217281915], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 80%|███████▉  | 799997/1000000 [9:21:20<1:41:13, 32.93it/s]global step 800000, trans_decision ep_re 1266.5851697076678

{"global_step": 800000, "eval_re": [732.5888390762805, 1691.3408284197928, 
1465.182447988217, 1006.9601697223719, 1072.1351323863723, 1095.3034114619636, 
1775.3857097286373, 997.1271749384968, 1228.665990618201, 1601.1619927363429], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 81%|████████  | 809998/1000000 [9:28:20<1:37:05, 32.62it/s]global step 810000, trans_decision ep_re 1321.0334713772534

{"global_step": 810000, "eval_re": [881.9160923034269, 1366.878958387403, 
1721.7781044395597, 1407.8782498186715, 1342.2942348732397, 1257.0804181265885, 
1449.4574477230542, 1353.656982368697, 1128.4124104312466, 1300.9818153006474], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 82%|████████▏ | 819997/1000000 [9:35:20<1:31:07, 32.92it/s]global step 820000, trans_decision ep_re 1272.8238500921536

{"global_step": 820000, "eval_re": [1124.9731094421218, 1265.7620285389085, 
1434.8259967027989, 1182.1725143838091, 1696.8113825279956, 1101.0148938616758, 
1503.4908703603942, 937.1056668125602, 1341.9120498225955, 1140.1699884686777], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 83%|████████▎ | 829999/1000000 [9:42:30<1:26:05, 32.91it/s]global step 830000, trans_decision ep_re 1138.99472860776

{"global_step": 830000, "eval_re": [1356.6778160448737, 889.7592251721788, 
1115.840394318068, 1308.2697727655275, 1039.5483632473727, 1267.2386454932496, 
926.5860795336815, 900.7001196162322, 1020.5631898486668, 1564.763680037747], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 84%|████████▍ | 839998/1000000 [9:49:30<1:20:46, 33.02it/s]global step 840000, trans_decision ep_re 1453.5815069644523

{"global_step": 840000, "eval_re": [1501.7900954115426, 1753.1919732888227, 
1006.7174169390568, 1248.9226993750303, 1882.6221898589938, 1754.5181708303214, 
1609.8675651869999, 1052.5372460051956, 1239.999217959164, 1485.6484947893941], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 85%|████████▍ | 849999/1000000 [9:56:30<1:16:07, 32.84it/s]global step 850000, trans_decision ep_re 1149.4506676535614

{"global_step": 850000, "eval_re": [949.8955263131928, 1288.0555664378198, 
947.9816239985049, 1049.257078341954, 972.2762692837651, 1257.8814092641564, 
1617.9128469660498, 1013.3999539488965, 946.8328357226006, 1451.0135662586736], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 86%|████████▌ | 859997/1000000 [10:03:30<1:10:35, 33.06it/s]global step 860000, trans_decision ep_re 1236.3672659014717

{"global_step": 860000, "eval_re": [945.3766547741, 1378.5943803664, 
1210.3338058306895, 1302.9774519608118, 1080.604655342834, 1101.4271280460757, 
1553.9567093014489, 1391.2139675784922, 1237.6253543198372, 1161.562551494027], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 87%|████████▋ | 869999/1000000 [10:10:30<1:06:00, 32.82it/s]global step 870000, trans_decision ep_re 1242.8749561234276

{"global_step": 870000, "eval_re": [896.5290804776577, 1997.9899496668017, 
1132.6559173342253, 1188.2439150576395, 1352.7454345782608, 1055.3274384163174, 
909.930738662361, 1233.968198419731, 1448.9982978680569, 1212.360590753225], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 88%|████████▊ | 879997/1000000 [10:17:30<1:00:52, 32.86it/s]global step 880000, trans_decision ep_re 1193.57477724081

{"global_step": 880000, "eval_re": [1054.413014183056, 960.6976221942182, 
1193.3254821362088, 1343.3665672301129, 1288.5470184816072, 944.2665561647214, 
1238.8911577444408, 1500.400617991792, 1043.032486536917, 1368.8072497450235], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 89%|████████▉ | 889997/1000000 [10:24:40<55:37, 32.96it/s]global step 890000, trans_decision ep_re 1362.5143714819353

{"global_step": 890000, "eval_re": [1749.024720811158, 1070.7645690468755, 
1008.9334676953455, 1016.1646523571408, 1487.1372420843472, 1316.3987711599675, 
1418.9608633265836, 1579.5181797179814, 1390.1660342246162, 1588.0752143953382],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 90%|████████▉ | 899999/1000000 [10:31:40<51:45, 32.20it/s]global step 900000, trans_decision ep_re 1352.608040941812

{"global_step": 900000, "eval_re": [1162.0457559787012, 1451.3699707240837, 
1454.5115216419072, 1577.6118404024164, 1283.4976347966558, 1156.4500883367903, 
1552.405775745589, 1319.3503912498206, 1452.4080933170621, 1116.4293372250938], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 91%|█████████ | 909998/1000000 [10:38:40<45:37, 32.87it/s]global step 910000, trans_decision ep_re 1496.7645194438512

{"global_step": 910000, "eval_re": [1950.625413124506, 1383.867783247329, 
1012.1067036298228, 1894.7856037768634, 1250.827093445074, 939.3235017727259, 
1672.1684671445814, 1708.5665147475256, 1567.4805631606014, 1587.893550389481], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 92%|█████████▏| 919997/1000000 [10:45:40<40:30, 32.92it/s]global step 920000, trans_decision ep_re 1096.5337731516925

{"global_step": 920000, "eval_re": [879.1236567603959, 1344.7061834255271, 
1057.0431575454936, 1119.1878325842515, 1167.3565564844582, 967.2594098878202, 
1336.9216711131673, 1090.466647782019, 216.8781097714778, 1786.3945061623133], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 93%|█████████▎| 929999/1000000 [10:52:50<35:23, 32.97it/s]global step 930000, trans_decision ep_re 1214.2471050403367

{"global_step": 930000, "eval_re": [1175.1075998471085, 1154.6883371949614, 
831.6161183630904, 1324.0503328142972, 1310.0931971126352, 1351.4850157989142, 
1404.9253685313415, 919.1534320422783, 1268.0985012231206, 1403.2531474756174], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 94%|█████████▍| 939999/1000000 [10:59:50<30:18, 32.99it/s]global step 940000, trans_decision ep_re 1165.3920092943304

{"global_step": 940000, "eval_re": [858.5753529292322, 1595.7875430202048, 
167.75206327386968, 1463.6737352349378, 1355.9358924639039, 969.428325083786, 
995.2545363825873, 1278.330897703253, 1386.3369929640276, 1582.844753887502], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 95%|█████████▍| 949999/1000000 [11:06:50<25:18, 32.93it/s]global step 950000, trans_decision ep_re 1282.1621010687295

{"global_step": 950000, "eval_re": [1668.5437452884198, 1073.05760926538, 
1585.0714317080376, 1622.1023605359455, 899.0873243629667, 882.9677314515291, 
1198.112178681025, 990.2349448848631, 1720.181067421636, 1182.2626170874917], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 96%|█████████▌| 959999/1000000 [11:13:50<20:16, 32.88it/s]global step 960000, trans_decision ep_re 1360.25207441672

{"global_step": 960000, "eval_re": [1176.6293672903535, 1462.6263626566708, 
1035.998728576307, 1125.3848551961478, 1447.7152555043092, 1396.5538407149381, 
1647.2104880069376, 1344.52381253474, 1484.5397884231495, 1481.3382452636454], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 97%|█████████▋| 969999/1000000 [11:21:00<15:13, 32.85it/s]global step 970000, trans_decision ep_re 1200.2311792811308

{"global_step": 970000, "eval_re": [1058.1496160610172, 1176.2117563516322, 
1169.141301108733, 975.9963622939404, 1547.5973553548758, 1498.3139347222175, 
966.962573193837, 943.8306562226555, 1029.363031795869, 1636.7452057065316], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 98%|█████████▊| 979997/1000000 [11:28:00<10:06, 33.00it/s]global step 980000, trans_decision ep_re 1421.5628145203968

{"global_step": 980000, "eval_re": [1011.4261964399641, 1506.421633965624, 
1213.4817206870835, 1477.9382122167453, 1564.3088126707312, 1629.277629902101, 
959.0229515421142, 1609.9988630207865, 1468.2524215341016, 1775.4997032247156], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

 99%|█████████▉| 989999/1000000 [11:35:00<05:04, 32.87it/s]global step 990000, trans_decision ep_re 1398.6073319774928

{"global_step": 990000, "eval_re": [1276.883942934666, 1308.2358621460216, 
1555.6668364421225, 1419.228750512571, 1422.9769980662422, 1562.9787402788495, 
1124.6781804203822, 1029.9500251466277, 1725.0243846139313, 1560.4495992135128],
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|█████████▉| 999999/1000000 [11:42:10<00:00, 32.95it/s]global step 1000000, trans_decision ep_re 1342.0557311081234

{"global_step": 1000000, "eval_re": [1704.8980983963359, 946.8214484552756, 
1273.2881661139834, 955.6961701569422, 1381.101096144917, 1801.0759191764694, 
1359.7380791641806, 1173.0936744721573, 1513.330983289175, 1311.5136757117934], 
"eval_len": [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]}

100%|██████████| 1000000/1000000 [11:42:29<00:00, 23.73it/s]
