
{
    'exp_name': 'VDPO',
    'env': 'Ant-v4',
    'gamma': 0.99,
    'device': device(type='cuda'),
    'seed': 0,
    'buffer_size': 1000000,
    'total_timesteps': 1000000,
    'learn_start': 5000,
    'batch_size': 256,
    'actor_lr': 0.0003,
    'critic_lr': 0.001,
    'alpha_lr': 0.001,
    'target_update_factor': 0.005,
    'actor_freq': 2,
    'target_freq': 1,
    'eval_freq': 10000,
    'eval_num': 10,
    'kl_freq': 1000,
    'belief_lr': 0.0003,
    'embedding_dim': 256,
    'n_steps': 3,
    'delay': 16,
    'delayspec': 'MM1Queue_a033_s075::mm1queue(0.33, 0.75)',
    'noise': 0.1
}
✓ setup
Created Delay Process: MM1Queue(0.33, 0.75)
  1%|          | 9997/1000000 [03:40<8:42:07, 31.60it/s]global step 10000, trans_decision ep_re 416.69806260805933

{"global_step": 10000, "eval_re": [457.2099350986088, 550.0685210504727, 
502.5504184051391, 25.512805735315478, 24.0443278502715, 551.7322939409007, 
523.3595427544964, 502.4075592412454, 548.8643397778438, 481.23088222629985], 
"eval_len": [1000, 1000, 1000, 23, 22, 1000, 1000, 1000, 1000, 1000]}

  2%|▏         | 19997/1000000 [10:50<8:30:03, 32.02it/s]global step 20000, trans_decision ep_re 196.4960754852463

{"global_step": 20000, "eval_re": [367.4467041054409, 482.9501050523127, 
211.15884467258002, 44.39713197049264, 28.677184447229763, 56.694421745132324, 
296.9373344414754, 41.418527202232234, 88.89596244749298, 346.38453876807375], 
"eval_len": [1000, 1000, 370, 67, 44, 200, 433, 53, 123, 1000]}

  3%|▎         | 29997/1000000 [17:50<8:21:45, 32.22it/s]global step 30000, trans_decision ep_re 548.6767130239831

{"global_step": 30000, "eval_re": [606.6399822432932, 586.0082739458934, 
645.0345242123741, 697.7481971723389, 199.36861270833535, 711.3573374524808, 
628.2829477128788, 507.085261328072, 242.85033266200938, 662.3916608021555], 
"eval_len": [1000, 1000, 1000, 1000, 427, 1000, 1000, 1000, 685, 1000]}

  4%|▍         | 39999/1000000 [25:10<8:22:01, 31.87it/s]global step 40000, trans_decision ep_re 273.2415866295304

{"global_step": 40000, "eval_re": [628.7630143238198, 715.3898549565024, 
20.887447959993864, 175.72094183278244, 339.3453201159141, 35.793289984548466, 
304.8338737346112, 77.77249545200534, 131.3587394469793, 302.5508884881468], 
"eval_len": [1000, 1000, 30, 194, 438, 39, 1000, 81, 1000, 388]}

  5%|▍         | 49997/1000000 [32:10<8:20:30, 31.63it/s]global step 50000, trans_decision ep_re 483.1635697625623

{"global_step": 50000, "eval_re": [608.8899717377188, 304.1596217387369, 
788.4122260502428, 281.7683238728395, 104.30881791206579, 267.9195382083022, 
331.47492045037154, 483.5300305880944, 834.3327509696921, 826.8394960975597], 
"eval_len": [1000, 317, 833, 322, 98, 332, 366, 1000, 1000, 1000]}

  6%|▌         | 59997/1000000 [39:20<8:11:20, 31.89it/s]global step 60000, trans_decision ep_re 346.9089548218576

{"global_step": 60000, "eval_re": [384.55139174877297, 143.90746442956745, 
675.8960358736006, 77.21036241559902, 628.5999513179115, 95.61435920441588, 
651.273789917934, 79.70275330238569, 493.0722141332228, 239.26122587516602], 
"eval_len": [366, 124, 590, 75, 1000, 148, 1000, 62, 1000, 230]}

  7%|▋         | 69997/1000000 [46:20<8:05:09, 31.95it/s]global step 70000, trans_decision ep_re 490.21500991958476

{"global_step": 70000, "eval_re": [118.60286525271243, 50.464113976898524, 
217.2026347000155, 1016.2161664916972, 174.91882056809027, 1222.1303895553292, 
319.91715817458004, 502.2803307568866, 448.5750912446162, 831.8425284750211], 
"eval_len": [127, 52, 199, 1000, 223, 1000, 216, 402, 383, 1000]}

  8%|▊         | 79997/1000000 [53:20<8:07:25, 31.46it/s]global step 80000, trans_decision ep_re 519.1832889832015

{"global_step": 80000, "eval_re": [253.1917675061608, 657.2412735745219, 
814.8350098924755, 671.6190606029145, 825.5359289262419, 171.11036289997142, 
80.51709922446068, 1112.2226049054275, 158.59673424839875, 446.96304805144274], 
"eval_len": [196, 1000, 633, 1000, 1000, 168, 109, 1000, 164, 303]}

  9%|▉         | 89997/1000000 [1:00:20<7:55:08, 31.92it/s]global step 90000, trans_decision ep_re 664.8736110932075

{"global_step": 90000, "eval_re": [1086.9041734464627, 615.0937345922738, 
380.36325241394127, 729.1259611122439, 109.17519495002999, 608.6656282648855, 
152.45561527406187, 806.6692565808979, 1445.4088441089891, 714.8744501882893], 
"eval_len": [1000, 468, 339, 733, 127, 439, 120, 1000, 1000, 453]}

 10%|▉         | 99997/1000000 [1:07:30<7:49:33, 31.95it/s]global step 100000, trans_decision ep_re 360.75554223390884

{"global_step": 100000, "eval_re": [839.733098609172, 74.18834337572927, 
213.44623836426695, 163.7471794462315, 937.8573955543934, 236.7513261742507, 
132.11639973383726, 567.95679886331, 179.28590377789843, 262.4727384399991], 
"eval_len": [535, 85, 139, 94, 720, 218, 99, 1000, 145, 207]}

 11%|█         | 109997/1000000 [1:14:30<7:45:51, 31.84it/s]global step 110000, trans_decision ep_re 581.6587968987557

{"global_step": 110000, "eval_re": [715.5462119121862, 167.23075014012053, 
773.5095928511363, 318.4066277462784, 929.7816303338145, 591.8296452920588, 
154.03177794196444, 620.1744262051915, 470.48856957444417, 1075.5887369903621], 
"eval_len": [1000, 111, 1000, 233, 1000, 467, 142, 1000, 412, 1000]}

 12%|█▏        | 119997/1000000 [1:21:30<7:45:57, 31.48it/s]global step 120000, trans_decision ep_re 389.4868206578848

{"global_step": 120000, "eval_re": [1017.9210370315161, 248.69125585323067, 
281.27424492540274, 79.4829030759638, 667.5796326742405, 796.9937947003492, 
305.55189147230277, 76.46675648499175, 335.19040179983716, 85.71628856101265], 
"eval_len": [1000, 235, 211, 68, 1000, 651, 284, 97, 273, 67]}

 13%|█▎        | 129999/1000000 [1:28:30<7:34:58, 31.87it/s]global step 130000, trans_decision ep_re 645.2127599807884

{"global_step": 130000, "eval_re": [298.88198716874246, 70.34720649465999, 
323.55528127530863, 1093.737631734072, 1380.9477283213405, 365.50056719283555, 
1028.616394435866, 1386.0008480454155, 96.52717182505694, 408.01278331458764], 
"eval_len": [294, 60, 281, 702, 950, 236, 763, 1000, 62, 259]}

 14%|█▍        | 139997/1000000 [1:35:30<7:29:06, 31.91it/s]global step 140000, trans_decision ep_re 356.44134690422237

{"global_step": 140000, "eval_re": [456.63628501878844, 484.37145766360754, 
61.76981818210721, 95.03937423224343, 27.88068697636545, 160.7364987891661, 
1065.6358154274944, 26.924952612685832, 196.43541813711306, 988.9831620026526], 
"eval_len": [357, 285, 84, 84, 41, 116, 661, 35, 139, 610]}

 15%|█▍        | 149997/1000000 [1:42:30<7:17:53, 32.35it/s]global step 150000, trans_decision ep_re 476.5845595589103

{"global_step": 150000, "eval_re": [214.53534677876902, 900.0067754133535, 
671.1780995572498, 894.9154044249648, 507.295251942809, 53.420512273194255, 
692.5396163236577, 445.8267830597811, 195.1356539233277, 190.99215189199714], 
"eval_len": [137, 623, 1000, 1000, 378, 58, 442, 294, 120, 134]}

 16%|█▌        | 159997/1000000 [1:49:20<7:18:49, 31.90it/s]global step 160000, trans_decision ep_re 751.3575159968698

{"global_step": 160000, "eval_re": [1422.4160294785174, 845.4209717081665, 
285.8444371034048, 442.75211807667125, 437.7340754741504, 301.6664200899191, 
786.9164598646843, 855.1465619729988, 742.8996661099353, 1392.7784200902495], 
"eval_len": [1000, 1000, 212, 286, 290, 210, 1000, 493, 433, 1000]}

 17%|█▋        | 169997/1000000 [1:56:30<7:12:51, 31.96it/s]global step 170000, trans_decision ep_re 624.5183051916954

{"global_step": 170000, "eval_re": [133.27773936038358, 162.624374296133, 
353.5324326923833, 741.6492495156107, 175.42163086243642, 557.2377306582542, 
1172.8421628678186, 1302.405125114053, 1259.1960305232253, 386.99657602665536], 
"eval_len": [96, 110, 258, 467, 136, 367, 768, 816, 730, 229]}

 18%|█▊        | 179997/1000000 [2:03:20<7:03:30, 32.27it/s]global step 180000, trans_decision ep_re 1129.9805012839354

{"global_step": 180000, "eval_re": [950.778740594291, 505.80732053319383, 
987.2415116060887, 1633.9146474025051, 801.1493466990335, 657.4532667610921, 
1342.6668235744903, 1716.9788582940464, 1619.8719480366462, 1083.9425493379663],
"eval_len": [562, 311, 633, 1000, 544, 1000, 1000, 1000, 1000, 604]}

 19%|█▉        | 189997/1000000 [2:10:30<6:58:13, 32.28it/s]global step 190000, trans_decision ep_re 315.3210212474138

{"global_step": 190000, "eval_re": [97.45215998910503, 770.009362645058, 
281.927036221518, 44.33781152364585, 67.39694484513872, 208.54525448054898, 
613.4465503248651, 223.26225659385972, 66.02925573128802, 780.8035801191103], 
"eval_len": [83, 513, 200, 46, 49, 186, 369, 216, 64, 511]}

 20%|█▉        | 199997/1000000 [2:17:20<6:58:18, 31.87it/s]global step 200000, trans_decision ep_re 657.7433399087096

{"global_step": 200000, "eval_re": [828.1558868177228, 600.0840013923442, 
1150.9386442225982, 852.9934090901191, 907.2144872123055, 261.4607150442327, 
680.2840149127994, 185.2910634619682, 935.117142012259, 175.89403492074572], 
"eval_len": [542, 430, 744, 1000, 543, 173, 456, 110, 618, 116]}

 21%|██        | 209997/1000000 [2:24:20<6:47:10, 32.34it/s]global step 210000, trans_decision ep_re 603.5062370879066

{"global_step": 210000, "eval_re": [176.81814207327665, 1470.4828019190568, 
143.51054450078183, 199.03967127010839, 144.7318927149033, 1465.487836586068, 
102.68645499049595, 678.5443871013807, 1244.3502224751353, 409.4104172478587], 
"eval_len": [123, 1000, 120, 164, 114, 1000, 106, 431, 829, 331]}

 22%|██▏       | 219997/1000000 [2:31:10<6:42:50, 32.27it/s]global step 220000, trans_decision ep_re 413.5685896008994

{"global_step": 220000, "eval_re": [238.28446259387476, 192.35890194323343, 
878.4766552259425, 179.29050972367025, 396.7909221552412, 148.60651275736058, 
530.529480744459, 112.2284990929842, 451.58222396792877, 1007.5377278042994], 
"eval_len": [208, 150, 1000, 117, 257, 114, 277, 80, 292, 612]}

 23%|██▎       | 229997/1000000 [2:38:00<6:36:46, 32.34it/s]global step 230000, trans_decision ep_re 498.34776175479254

{"global_step": 230000, "eval_re": [36.01248065590876, 737.1445986895262, 
210.29532494242002, 744.6502800375754, 186.5838084118447, 408.7627345698741, 
971.8129839104737, 449.36867425902386, 227.32538056488238, 1011.5213515063969], 
"eval_len": [53, 448, 126, 498, 160, 305, 679, 328, 154, 627]}

 24%|██▍       | 239997/1000000 [2:45:00<6:33:57, 32.15it/s]global step 240000, trans_decision ep_re 444.00214597522717

{"global_step": 240000, "eval_re": [663.1312168036499, 138.09750884073426, 
373.07301474767365, 265.9046511303807, 987.2684063654707, 289.74554553476764, 
358.8013132172235, 178.49222660947217, 294.16254962343095, 891.3450268794686], 
"eval_len": [418, 91, 200, 179, 535, 218, 230, 146, 176, 550]}

 25%|██▍       | 249997/1000000 [2:51:50<6:26:58, 32.30it/s]global step 250000, trans_decision ep_re 829.1173666932616

{"global_step": 250000, "eval_re": [569.818584369855, 1413.6526227923262, 
245.82155681630516, 194.20079186369347, 1385.9089475077412, 763.1069015539306, 
671.8809094292857, 1380.1831673990555, 1415.780291890622, 250.81989330980184], 
"eval_len": [396, 946, 173, 174, 1000, 600, 480, 922, 916, 280]}

 26%|██▌       | 259997/1000000 [2:58:50<6:23:28, 32.16it/s]global step 260000, trans_decision ep_re 1036.4188225858456

{"global_step": 260000, "eval_re": [221.92504125389866, 1889.9873438270804, 
915.5636919847148, 172.51850275776889, 879.5396116471404, 411.1382149786971, 
1366.117765308358, 1808.8424536491232, 1216.4443885423161, 1482.1112119093593], 
"eval_len": [184, 1000, 614, 99, 514, 236, 848, 1000, 726, 1000]}

 27%|██▋       | 269997/1000000 [3:05:50<6:19:26, 32.06it/s]global step 270000, trans_decision ep_re 903.3768056846553

{"global_step": 270000, "eval_re": [876.6082394008207, 695.1074366354458, 
127.90551377582584, 454.00099902679955, 84.78432665327429, 1816.9068265542282, 
1185.8745485927823, 1436.3918672220466, 1568.1148951180649, 788.0734038672662], 
"eval_len": [1000, 1000, 88, 268, 61, 1000, 691, 781, 1000, 463]}

 28%|██▊       | 279997/1000000 [3:13:00<6:16:20, 31.89it/s]global step 280000, trans_decision ep_re 868.6627571228748

{"global_step": 280000, "eval_re": [1622.161291813812, 963.5346577671805, 
95.24049492090151, 1680.0344532829902, 149.43669190625963, 566.5050832815599, 
392.1423013168101, 661.4986433339783, 1432.318811692754, 1123.7551419125016], 
"eval_len": [1000, 613, 49, 1000, 110, 392, 268, 405, 768, 670]}

 29%|██▉       | 289997/1000000 [3:20:00<6:05:30, 32.38it/s]global step 290000, trans_decision ep_re 1102.4162396212955

{"global_step": 290000, "eval_re": [1144.3139267698757, 526.6268867156282, 
1100.4233399907052, 1681.254494050531, 491.25440284454714, 1450.6476358920147, 
1348.4288396661502, 1535.6860573177983, 1547.2674030340336, 198.25940993167282],
"eval_len": [701, 331, 637, 1000, 347, 1000, 1000, 1000, 1000, 135]}

 30%|██▉       | 299997/1000000 [3:27:00<6:05:26, 31.93it/s]global step 300000, trans_decision ep_re 794.5902433626247

{"global_step": 300000, "eval_re": [689.5886072431865, 995.6428918949101, 
228.1296454507005, 493.5848623720538, 1072.7927289204715, 313.3627430505033, 
455.6237174052759, 1720.1876455517988, 216.62352794601998, 1760.366063791326], 
"eval_len": [388, 1000, 136, 294, 737, 215, 299, 1000, 129, 1000]}

 31%|███       | 309997/1000000 [3:34:00<6:03:00, 31.68it/s]global step 310000, trans_decision ep_re 1319.5493492451524

{"global_step": 310000, "eval_re": [1561.9112985634163, 836.7213811370747, 
981.0791428304498, 1970.2589884073345, 1826.2788975860929, 1765.6278267754628, 
125.99461287948917, 1419.7968001596666, 1641.8279189954444, 1065.9966251170938],
"eval_len": [1000, 1000, 557, 1000, 1000, 1000, 76, 810, 921, 1000]}

 32%|███▏      | 319997/1000000 [3:41:10<5:50:07, 32.37it/s]global step 320000, trans_decision ep_re 1043.1246622092656

{"global_step": 320000, "eval_re": [408.4200135047466, 887.5556077329477, 
885.6649682045526, 833.7666645835476, 944.2208711985404, 1207.7680162472338, 
1648.4382627330012, 1835.0983104294255, 83.59002839129592, 1696.7238790673644], 
"eval_len": [252, 450, 1000, 423, 559, 713, 1000, 1000, 67, 1000]}

 33%|███▎      | 329997/1000000 [3:48:10<5:46:06, 32.26it/s]global step 330000, trans_decision ep_re 344.8510297538663

{"global_step": 330000, "eval_re": [139.4623790563476, 1070.5498004515828, 
986.1507190980127, 216.80570274155852, 141.886484361098, 124.01373346093065, 
303.99114082752806, 108.60390162520747, 316.0649892223615, 40.98144669403594], 
"eval_len": [118, 553, 1000, 131, 96, 87, 157, 117, 175, 37]}

 34%|███▍      | 339997/1000000 [3:55:10<5:46:20, 31.76it/s]global step 340000, trans_decision ep_re 1108.5134551648812

{"global_step": 340000, "eval_re": [1694.4806293980712, 72.93675155297144, 
1375.7182291623117, 1341.150843611596, 1749.1863645797198, 345.0698690171901, 
790.3934908926485, 1252.9473936077175, 853.9969817360457, 1609.253998090542], 
"eval_len": [1000, 66, 701, 757, 1000, 192, 1000, 705, 1000, 1000]}

 35%|███▍      | 349997/1000000 [4:02:10<5:37:57, 32.06it/s]global step 350000, trans_decision ep_re 1116.8732698588224

{"global_step": 350000, "eval_re": [1614.2590461203558, 477.97703449703226, 
721.7896808668703, 1439.6295718219292, 802.5221279335072, 494.5239267892907, 
1438.9286335231031, 805.309687568257, 1682.777513393173, 1691.015476074704], 
"eval_len": [988, 262, 437, 1000, 1000, 282, 1000, 516, 1000, 1000]}

 36%|███▌      | 359997/1000000 [4:09:20<5:31:44, 32.15it/s]global step 360000, trans_decision ep_re 596.0497566525835

{"global_step": 360000, "eval_re": [645.9228951436007, 745.5048312771773, 
828.0415573717058, 221.6145876579594, 296.2128033050673, 1537.3675025440302, 
496.7520153784219, 112.94342013843657, 560.8356844912215, 515.3022692182141], 
"eval_len": [375, 1000, 442, 115, 160, 859, 302, 63, 290, 356]}

 37%|███▋      | 369997/1000000 [4:16:20<5:29:56, 31.82it/s]global step 370000, trans_decision ep_re 1224.0102738609764

{"global_step": 370000, "eval_re": [1835.2664667896404, 1336.2544390702103, 
1773.1810552070842, 565.1192570422864, 1654.149534049917, 657.2667136237033, 
391.741058688909, 1766.4648754942882, 608.1515621892778, 1652.5077764544476], 
"eval_len": [1000, 751, 1000, 297, 1000, 400, 216, 1000, 373, 992]}

 38%|███▊      | 379997/1000000 [4:23:20<5:17:44, 32.52it/s]global step 380000, trans_decision ep_re 603.4536230500264

{"global_step": 380000, "eval_re": [239.46925503880664, 1004.3447567707823, 
1377.385973029118, 552.9961445914125, 840.0372415891252, 643.6931981823412, 
541.0847630099698, 287.121423545138, 524.4930946543128, 23.91038008925805], 
"eval_len": [202, 716, 863, 320, 576, 418, 345, 194, 326, 30]}

 39%|███▉      | 389997/1000000 [4:30:20<5:16:25, 32.13it/s]global step 390000, trans_decision ep_re 650.6348351364162

{"global_step": 390000, "eval_re": [356.48943072741906, 1122.6175463045208, 
1053.7903263035164, 332.2533080720719, 904.3937487544129, 180.45220939311244, 
783.0106836054338, 197.63488459876353, 940.3602073327514, 635.3460062721604], 
"eval_len": [1000, 633, 1000, 1000, 503, 77, 424, 1000, 513, 385]}

 40%|███▉      | 399997/1000000 [4:37:20<5:14:37, 31.78it/s]global step 400000, trans_decision ep_re 821.5725848981043

{"global_step": 400000, "eval_re": [998.2669860428807, 117.54290622418794, 
1108.4559874541155, 918.3248599658065, 813.7599082626532, 353.5695170697865, 
924.7444295700063, 596.1656535892597, 675.4842620823631, 1709.4113387199845], 
"eval_len": [1000, 79, 670, 566, 546, 226, 506, 391, 359, 1000]}

 41%|████      | 409997/1000000 [4:44:20<5:04:33, 32.29it/s]global step 410000, trans_decision ep_re 509.041128443657

{"global_step": 410000, "eval_re": [142.78622720392877, 569.8959826041083, 
517.811584200567, 1733.761075734043, 668.1234349877562, 372.24185354180275, 
152.6765805794962, 109.81813380812363, 41.380960214094955, 781.9154515626494], 
"eval_len": [98, 388, 266, 1000, 507, 231, 68, 58, 35, 446]}

 42%|████▏     | 419997/1000000 [4:51:20<5:01:26, 32.07it/s]global step 420000, trans_decision ep_re 1081.1917067383552

{"global_step": 420000, "eval_re": [2037.8067408299123, 1293.3104051181977, 
1595.4364607487348, 809.4802356825346, 1559.3214869471244, 1123.0206913003121, 
-266.13504984718895, 1121.0727791630268, 246.9562213546808, 1291.647096086217], 
"eval_len": [1000, 795, 1000, 544, 971, 1000, 1000, 667, 154, 792]}

 43%|████▎     | 429997/1000000 [4:58:20<4:54:31, 32.26it/s]global step 430000, trans_decision ep_re 1102.5629694890915

{"global_step": 430000, "eval_re": [1380.6693885870247, 99.38294920033391, 
1746.8323249668435, 292.50769540100896, 899.3767716491963, 1832.0796447707182, 
1842.5035532346358, 997.4053380100913, 1596.873479577634, 337.99854949342796], 
"eval_len": [744, 69, 1000, 228, 1000, 1000, 984, 570, 850, 200]}

 44%|████▍     | 439997/1000000 [5:05:30<4:48:46, 32.32it/s]global step 440000, trans_decision ep_re 714.853363069977

{"global_step": 440000, "eval_re": [626.0560592130564, 824.5241301552879, 
717.5223854066066, 1018.0440004669148, 833.095924273451, 713.5682648720945, 
892.4556274656711, 831.4937226706372, 296.1200409886005, 395.65347518744926], 
"eval_len": [1000, 1000, 1000, 606, 481, 413, 1000, 1000, 151, 179]}

 45%|████▍     | 449997/1000000 [5:12:40<4:48:17, 31.80it/s]global step 450000, trans_decision ep_re 718.6119743107571

{"global_step": 450000, "eval_re": [230.8740504485251, 1816.7959132636665, 
412.5069737674444, 479.40117134662205, 821.0579270721929, 877.8417306477226, 
252.94008811298895, 1200.987787232606, 1011.6668104358091, 82.04729077999177], 
"eval_len": [148, 1000, 215, 296, 444, 1000, 155, 668, 583, 78]}

 46%|████▌     | 459997/1000000 [5:19:20<4:43:11, 31.78it/s]global step 460000, trans_decision ep_re 528.6402112607517

{"global_step": 460000, "eval_re": [63.49333852320011, 785.8042891689186, 
1550.9309230060835, 63.984412071475994, 455.0794368476959, 669.608462111531, 
29.205311125586032, 1472.0916129390332, 126.3095144574815, 69.89481235651186], 
"eval_len": [44, 455, 1000, 42, 289, 339, 33, 762, 102, 49]}

 47%|████▋     | 469999/1000000 [5:26:30<4:31:50, 32.49it/s]global step 470000, trans_decision ep_re 503.6735174662922

{"global_step": 470000, "eval_re": [626.47745621483, 133.63700405752493, 
882.6724432109567, 750.6386042516855, 252.65926915754434, 702.2136866554243, 
100.39849945245429, 614.8712273993048, 546.1951877901186, 426.97179647307905], 
"eval_len": [1000, 89, 1000, 1000, 166, 1000, 84, 429, 321, 347]}

 48%|████▊     | 479997/1000000 [5:33:30<4:27:46, 32.37it/s]global step 480000, trans_decision ep_re 751.7403750659921

{"global_step": 480000, "eval_re": [423.5486397024322, 39.30913140494985, 
470.86382012579827, 1732.8143770651789, 2020.9423212679674, 1609.3236231684507, 
210.5850334155897, 560.4951026686929, 80.46719178343048, 369.05451005743214], 
"eval_len": [221, 29, 287, 1000, 1000, 1000, 141, 319, 49, 247]}

 49%|████▉     | 489997/1000000 [5:40:30<4:21:20, 32.52it/s]global step 490000, trans_decision ep_re 702.2714682670814

{"global_step": 490000, "eval_re": [581.1661626093492, 280.416915091879, 
252.0069106964192, 1043.2933197377301, 534.0082700143316, 588.5831536386805, 
253.58089605071106, 1804.7868085356924, 972.5363756147595, 712.3358706812619], 
"eval_len": [334, 150, 216, 609, 275, 313, 158, 900, 1000, 333]}

 50%|████▉     | 499997/1000000 [5:47:20<4:17:14, 32.40it/s]global step 500000, trans_decision ep_re 705.1067075334761

{"global_step": 500000, "eval_re": [745.3870833599038, 814.5858438732988, 
1230.6151344204773, 1245.2087387980866, 197.5923088295758, 1430.5118581373163, 
378.2813061073587, 778.061285571374, 125.69006430202606, 105.13345193534393], 
"eval_len": [417, 1000, 780, 1000, 130, 1000, 235, 1000, 97, 99]}

 51%|█████     | 509997/1000000 [5:54:30<4:19:28, 31.47it/s]global step 510000, trans_decision ep_re 656.4335359343129

{"global_step": 510000, "eval_re": [1176.5546768648705, 1385.1678563109326, 
668.2843689626792, 771.1826233426376, 63.36059314136751, 446.3546626356368, 
1210.5322512579025, 285.27950578949475, 92.24626338707674, 465.37255765053146], 
"eval_len": [717, 1000, 371, 1000, 40, 253, 1000, 170, 53, 285]}

 52%|█████▏    | 519997/1000000 [6:01:30<4:05:54, 32.53it/s]global step 520000, trans_decision ep_re 606.7759346529602

{"global_step": 520000, "eval_re": [60.6206096607205, 1360.7996505511055, 
89.04362320495984, 998.6112643083994, 358.58594539601586, 662.881414998644, 
1328.3255061325965, 761.4047252018725, 222.06500239415445, 225.42160468113343], 
"eval_len": [42, 822, 64, 1000, 251, 1000, 1000, 435, 169, 154]}

 53%|█████▎    | 529997/1000000 [6:08:20<4:03:01, 32.23it/s]global step 530000, trans_decision ep_re 752.7079440752545

{"global_step": 530000, "eval_re": [851.8381431629278, 1030.920386893241, 
127.33663824046471, 104.52085339855678, 1934.6182493743324, 887.7042153083082, 
673.8696383559576, 895.7555283129954, 230.53333712696065, 789.9824505788007], 
"eval_len": [1000, 535, 60, 68, 1000, 1000, 385, 1000, 130, 402]}

 54%|█████▍    | 539997/1000000 [6:15:30<4:00:45, 31.84it/s]global step 540000, trans_decision ep_re 1182.8522966972648

{"global_step": 540000, "eval_re": [93.04989290306919, 894.7329560179858, 
869.3667449576473, 1366.4388236291468, 1826.971100435154, 1109.0165249767024, 
1711.2793154051792, 1860.9199640459858, 1270.0803176339805, 826.6673269677981], 
"eval_len": [82, 481, 465, 890, 1000, 1000, 1000, 1000, 681, 431]}

 55%|█████▍    | 549997/1000000 [6:22:30<3:56:12, 31.75it/s]global step 550000, trans_decision ep_re 606.3763405213642

{"global_step": 550000, "eval_re": [97.35711200198443, 605.6435818904821, 
789.8971463146372, 359.60483489263424, 340.3801709339053, 1326.2639970507762, 
551.0613622674077, 1129.52064322509, 454.40804982733755, 409.62650680938776], 
"eval_len": [66, 370, 1000, 198, 217, 700, 308, 616, 218, 268]}

 56%|█████▌    | 559997/1000000 [6:29:30<3:46:09, 32.43it/s]global step 560000, trans_decision ep_re 803.610760683335

{"global_step": 560000, "eval_re": [1899.8750011851216, 341.49411093921157, 
321.696080715926, 822.4915391130819, 748.1540000610464, 838.6826137283223, 
1509.0882180551498, 236.31630642514577, 150.61055889297538, 1167.699177717371], 
"eval_len": [1000, 232, 212, 481, 1000, 1000, 1000, 165, 101, 691]}

 57%|█████▋    | 569997/1000000 [6:36:30<3:44:39, 31.90it/s]global step 570000, trans_decision ep_re 1109.8299908850154

{"global_step": 570000, "eval_re": [1171.2718479595449, 101.84440809620719, 
910.3360190222529, 688.0088110189646, 1164.9142746405257, 1336.7387859976402, 
1854.963081207984, 480.43544468960073, 1813.0567016946356, 1576.7305345227978], 
"eval_len": [653, 61, 552, 405, 1000, 1000, 1000, 308, 1000, 852]}

 58%|█████▊    | 579997/1000000 [6:43:30<3:36:36, 32.32it/s]global step 580000, trans_decision ep_re 1212.2736904719873

{"global_step": 580000, "eval_re": [1435.0383420884632, 830.4480254601732, 
1962.5718366756805, 165.96149748691107, 796.117586005812, 1983.7301753798058, 
1692.7430919106469, 1197.8261509521906, 868.0209952032505, 1190.2792035569407], 
"eval_len": [816, 1000, 1000, 98, 400, 1000, 1000, 562, 1000, 743]}

 59%|█████▉    | 589997/1000000 [6:50:40<3:30:52, 32.41it/s]global step 590000, trans_decision ep_re 558.6621495949094

{"global_step": 590000, "eval_re": [56.825322024704235, 1562.0950085794984, 
1095.8347616336405, 90.27465112918344, 504.22525362450597, 751.1021775096558, 
372.81939604152564, 175.8589543416284, 207.25341320287353, 770.3325578618781], 
"eval_len": [38, 1000, 630, 67, 343, 1000, 208, 140, 115, 433]}

 60%|█████▉    | 599997/1000000 [6:57:30<3:28:16, 32.01it/s]global step 600000, trans_decision ep_re 552.9129756645245

{"global_step": 600000, "eval_re": [44.86792875017763, 773.618245559236, 
243.09703958723398, 70.44371614723227, 857.5634612707397, 1053.9677346125, 
1149.3580272454958, 408.08590149170476, 173.63867674225096, 754.4890252386731], 
"eval_len": [29, 1000, 147, 72, 1000, 1000, 671, 215, 146, 483]}

 61%|██████    | 609996/1000000 [7:04:30<3:20:57, 32.35it/s]global step 610000, trans_decision ep_re 869.5521763321965

{"global_step": 610000, "eval_re": [585.4628755599913, 1114.994467043858, 
1824.6291665790104, 1852.8128262238633, 141.91215117433714, 974.5084490465092, 
1156.3564562703045, 405.75527273616166, 311.7688708245478, 327.32122786338147], 
"eval_len": [345, 591, 1000, 1000, 95, 1000, 1000, 213, 228, 200]}

 62%|██████▏   | 619997/1000000 [7:11:40<3:16:24, 32.25it/s]global step 620000, trans_decision ep_re 490.4590295823508

{"global_step": 620000, "eval_re": [370.1159501544245, 411.6555641903929, 
174.20742590295976, 1126.7232356321394, 440.82629159636537, 541.1069913021183, 
216.29769169701206, 564.188796364978, 663.9942835792516, 395.4740654038671], 
"eval_len": [212, 246, 94, 1000, 231, 330, 126, 307, 415, 174]}

 63%|██████▎   | 629997/1000000 [7:18:30<3:13:28, 31.87it/s]global step 630000, trans_decision ep_re 574.1589798895521

{"global_step": 630000, "eval_re": [329.58090488096695, 513.6561445257247, 
924.5181386050991, 844.7711185705215, 1698.055566579165, 58.39802051059238, 
45.64915638909679, 389.5523417371016, 587.6401799402548, 349.7682271569987], 
"eval_len": [253, 345, 1000, 1000, 1000, 53, 41, 201, 454, 206]}

 64%|██████▍   | 639997/1000000 [7:25:30<3:03:41, 32.66it/s]global step 640000, trans_decision ep_re 975.2361017631517

{"global_step": 640000, "eval_re": [833.5124415373003, 103.5252247807015, 
907.7700688406396, 187.71426833752452, 808.1848097447061, 1486.0502967278815, 
693.2131093149698, 1836.9779556179933, 1209.3748533096266, 1686.0379894201747], 
"eval_len": [406, 61, 513, 105, 446, 1000, 384, 1000, 675, 1000]}

 65%|██████▍   | 649997/1000000 [7:32:30<3:02:22, 31.99it/s]global step 650000, trans_decision ep_re 691.1348297819208

{"global_step": 650000, "eval_re": [718.58462966364, 651.0342425043302, 
380.87198208344256, 1450.3742332578872, 273.4683790679736, 255.32947322469914, 
562.2268222167189, 917.44328145656, 803.5232943055094, 898.4919600384476], 
"eval_len": [333, 381, 226, 1000, 138, 132, 310, 1000, 1000, 1000]}

 66%|██████▌   | 659997/1000000 [7:39:30<2:58:15, 31.79it/s]global step 660000, trans_decision ep_re 449.50372295078415

{"global_step": 660000, "eval_re": [399.0566203331896, 314.9807890268731, 
369.53744298336, 888.855457557191, 164.35493274719235, 717.3309962138314, 
533.8045700462903, 291.3900434521565, 200.87860457372705, 614.8477725740299], 
"eval_len": [213, 241, 233, 1000, 110, 1000, 317, 204, 105, 369]}

 67%|██████▋   | 669997/1000000 [7:46:30<2:51:48, 32.01it/s]global step 670000, trans_decision ep_re 757.8933416395819

{"global_step": 670000, "eval_re": [196.93827653796083, 1102.0502954794747, 
705.2463888541897, 827.3172090419793, 954.1380526797369, 1442.984287249846, 
297.2793831068866, 1406.4416469964117, 429.89314596215115, 216.64473048718304], 
"eval_len": [96, 1000, 417, 505, 1000, 817, 152, 819, 295, 1000]}

 68%|██████▊   | 679997/1000000 [7:53:30<2:45:29, 32.23it/s]global step 680000, trans_decision ep_re 839.3831121869131

{"global_step": 680000, "eval_re": [673.895095729343, 1280.0574317621977, 
458.70491802924687, 76.7645872378214, 357.06836475624056, 1733.3633749393105, 
-5.649972221836039, 1510.3408904586033, 596.0705262584523, 1713.2159049197512], 
"eval_len": [356, 794, 250, 87, 181, 1000, 1000, 1000, 321, 1000]}

 69%|██████▉   | 689997/1000000 [8:00:30<2:42:33, 31.78it/s]global step 690000, trans_decision ep_re 743.3697449654403

{"global_step": 690000, "eval_re": [781.9235938050381, 1906.123297252376, 
791.0084500162146, 551.9381810602277, 812.3110823062477, 159.61683083184388, 
559.0549520063233, 150.3770878414274, 1234.9643339195695, 486.3796406151339], 
"eval_len": [463, 1000, 399, 354, 1000, 94, 1000, 91, 751, 301]}

 70%|██████▉   | 699997/1000000 [8:07:30<2:35:03, 32.24it/s]global step 700000, trans_decision ep_re 1111.079693156945

{"global_step": 700000, "eval_re": [390.1784550011944, 671.4619329682085, 
1609.6459476085888, 1336.990438110695, 1603.0275483921168, 1769.5010771405096, 
586.0847980939848, 1127.8150987266044, 428.05188367937416, 1588.0397518481725], 
"eval_len": [219, 349, 1000, 775, 1000, 1000, 358, 660, 257, 1000]}

 71%|███████   | 709997/1000000 [8:14:40<2:30:33, 32.10it/s]global step 710000, trans_decision ep_re 552.6822856470145

{"global_step": 710000, "eval_re": [407.4333175802772, 191.2835233329189, 
1618.472482084248, 150.54094807839883, 648.2202856889262, 131.46765857336987, 
13.034991280585967, 837.2263042539596, 173.00112556310376, 1356.1422200343566], 
"eval_len": [231, 97, 919, 88, 401, 110, 20, 1000, 108, 801]}

 72%|███████▏  | 719997/1000000 [8:21:40<2:25:39, 32.04it/s]global step 720000, trans_decision ep_re 723.2807197666554

{"global_step": 720000, "eval_re": [136.2222603951771, 1307.9262008356848, 
133.42766095942665, 152.19816191776113, 1298.8408562365719, 755.5523070625663, 
816.5349088382914, 657.8554178243385, 1554.2716658883226, 419.97775770841304], 
"eval_len": [110, 740, 77, 130, 784, 453, 450, 330, 854, 202]}

 73%|███████▎  | 729999/1000000 [8:28:40<2:21:31, 31.80it/s]global step 730000, trans_decision ep_re 830.2819749662931

{"global_step": 730000, "eval_re": [1638.27039434271, 869.911664623621, 
414.6735694416464, 958.6775179894548, 146.37762437927137, 421.2619388748904, 
932.8071352001081, 1143.0603926570045, 731.652333794536, 1046.127178359687], 
"eval_len": [1000, 506, 1000, 525, 104, 260, 1000, 635, 403, 584]}

 74%|███████▍  | 739997/1000000 [8:35:40<2:14:51, 32.13it/s]global step 740000, trans_decision ep_re 770.1730007957717

{"global_step": 740000, "eval_re": [101.53381203835406, 340.7848004913718, 
720.1877086178112, 114.84337263947586, 1683.4872287168607, 1789.3414538427737, 
249.07576676327884, 1744.3878818479905, 580.8612355579801, 377.2267474418207], 
"eval_len": [81, 224, 452, 63, 1000, 1000, 131, 1000, 1000, 206]}

 75%|███████▍  | 749997/1000000 [8:42:40<2:09:54, 32.07it/s]global step 750000, trans_decision ep_re 758.0959353873728

{"global_step": 750000, "eval_re": [125.45168258038338, 544.4934745429131, 
1521.7115888888702, 1350.4957281536208, 419.0444703462041, 910.2716800354214, 
601.1077377408666, 219.01111650393833, 350.2001499135817, 1539.1717251679288], 
"eval_len": [114, 1000, 1000, 843, 285, 1000, 1000, 125, 200, 1000]}

 76%|███████▌  | 759997/1000000 [8:49:40<2:04:56, 32.02it/s]global step 760000, trans_decision ep_re 803.298961456185

{"global_step": 760000, "eval_re": [842.5567210875442, 1011.7105310318159, 
876.1457338210744, 1179.8253518819604, 939.7978870763427, 919.0383498988747, 
538.2681637617958, 116.00255078374694, 151.158493629939, 1458.485831588756], 
"eval_len": [529, 670, 528, 795, 608, 570, 353, 82, 102, 1000]}

 77%|███████▋  | 769999/1000000 [8:56:40<1:58:54, 32.24it/s]global step 770000, trans_decision ep_re 1066.958378939029

{"global_step": 770000, "eval_re": [1526.3422435208895, 1899.2187404729796, 
975.9589804875266, 1928.6597778924408, 882.8749807459053, 615.2186778790998, 
928.5969411047165, 1550.6189406323522, 59.394305396529724, 302.70020125784845], 
"eval_len": [877, 1000, 505, 1000, 521, 1000, 536, 1000, 57, 163]}

 78%|███████▊  | 779996/1000000 [9:03:50<1:55:00, 31.88it/s]global step 780000, trans_decision ep_re 433.08710069123515

{"global_step": 780000, "eval_re": [344.39736428965455, 998.9847501005434, 
110.7309752279998, 1127.9931131533203, 216.82934323787597, 152.2717406732434, 
1019.1605504627875, 138.7490362433707, 52.52797709575241, 169.22615642780463], 
"eval_len": [215, 1000, 82, 1000, 109, 84, 1000, 91, 41, 95]}

 79%|███████▉  | 789997/1000000 [9:10:50<1:48:49, 32.16it/s]global step 790000, trans_decision ep_re 852.0202977972087

{"global_step": 790000, "eval_re": [628.2802667113515, 1154.5237800598409, 
996.5603288611658, 626.9945680077392, 375.36378991384595, 519.3190295303666, 
1264.436353815176, 188.15655014268953, 1227.188854068216, 1539.3794568616963], 
"eval_len": [1000, 1000, 577, 1000, 251, 316, 766, 125, 738, 1000]}

 80%|███████▉  | 799997/1000000 [9:17:50<1:43:03, 32.35it/s]global step 800000, trans_decision ep_re 823.3578350575432

{"global_step": 800000, "eval_re": [203.26736284823687, 1561.9363965644739, 
1856.9378178227887, 110.98739649798442, 467.9168118822982, 1371.3913416054322, 
75.29325089365273, 629.5813036906264, 994.9491698073067, 961.3174989626314], 
"eval_len": [121, 1000, 1000, 80, 288, 1000, 75, 1000, 1000, 1000]}

 81%|████████  | 809997/1000000 [9:25:00<1:38:46, 32.06it/s]global step 810000, trans_decision ep_re 763.1256114176288

{"global_step": 810000, "eval_re": [863.500008276747, 1094.7469857209496, 
100.61268722316437, 991.7574066394751, 358.44036892082386, 1432.2159584972433, 
699.1427219862165, 394.9509791973351, 196.56770816891307, 1499.3212895454196], 
"eval_len": [611, 732, 58, 1000, 239, 848, 455, 270, 134, 1000]}

 82%|████████▏ | 819997/1000000 [9:32:00<1:31:12, 32.89it/s]global step 820000, trans_decision ep_re 871.7341851727781

{"global_step": 820000, "eval_re": [342.4812941561469, 693.0181619420185, 
566.7493561706985, 479.5570613859557, 1377.9999600358092, 1569.0660576090156, 
1724.724621681623, 131.35332118039506, 1224.9910100267482, 607.4010075393702], 
"eval_len": [210, 1000, 379, 288, 951, 1000, 1000, 110, 1000, 380]}

 83%|████████▎ | 829997/1000000 [9:39:00<1:29:24, 31.69it/s]global step 830000, trans_decision ep_re 1269.3182522225445

{"global_step": 830000, "eval_re": [1641.2136412690531, 212.27897302973366, 
1572.5274368515259, 155.41462448452393, 824.0471587131796, 1747.035984547741, 
1550.0945441966485, 1656.0417028880881, 1695.6685839586466, 1638.859872286304], 
"eval_len": [1000, 134, 1000, 103, 1000, 1000, 991, 982, 1000, 1000]}

 84%|████████▍ | 839997/1000000 [9:46:00<1:20:59, 32.92it/s]global step 840000, trans_decision ep_re 841.62675264824

{"global_step": 840000, "eval_re": [888.4765560642999, 171.8992334207329, 
557.1367620460778, 85.65323605219656, 770.1956606927557, 1306.4025016457933, 
1519.2474411675773, 976.3068099412806, 1034.08813684583, 1106.861188605855], 
"eval_len": [577, 104, 320, 94, 462, 1000, 853, 555, 520, 602]}

 85%|████████▍ | 849997/1000000 [9:53:00<1:16:23, 32.73it/s]global step 850000, trans_decision ep_re 559.5886791527302

{"global_step": 850000, "eval_re": [65.18565874385449, 280.0087559365826, 
83.06148928481801, 805.7040711138226, 1102.0181946196333, 877.8588609656782, 
128.83830499202463, 773.4670407128715, 1412.5519246099657, 67.19249054805033], 
"eval_len": [49, 140, 74, 484, 753, 473, 83, 402, 724, 41]}

 86%|████████▌ | 859997/1000000 [9:59:50<1:12:40, 32.10it/s]global step 860000, trans_decision ep_re 356.08784652599434

{"global_step": 860000, "eval_re": [888.5968151024839, 425.9247001024072, 
134.53858105495038, 175.07545286503276, 130.67630140420832, 301.4345193857552, 
238.48018337234367, 25.21294555714088, 104.21078760204578, 1136.7281788135751], 
"eval_len": [518, 221, 96, 113, 72, 233, 124, 28, 52, 1000]}

 87%|████████▋ | 869997/1000000 [10:06:40<1:06:57, 32.36it/s]global step 870000, trans_decision ep_re 851.0851238237001

{"global_step": 870000, "eval_re": [567.3126375392322, 138.62533576075225, 
277.652009453537, 73.36141591910736, 636.734246678465, 1779.6271208072046, 
1816.7933476799542, 1649.7407522072465, 1511.0486736582002, 59.95569853330155], 
"eval_len": [285, 118, 187, 51, 381, 1000, 1000, 1000, 910, 56]}

 88%|████████▊ | 879997/1000000 [10:13:40<1:02:21, 32.07it/s]global step 880000, trans_decision ep_re 437.60028727360276

{"global_step": 880000, "eval_re": [457.5840487256093, 402.5928544669712, 
758.9611176442571, 76.3415196421595, 610.6678932088065, 48.165912394517576, 
393.4677096321754, 984.3658381841591, 101.6841055663591, 542.1718732710134], 
"eval_len": [324, 212, 480, 62, 327, 42, 230, 665, 57, 1000]}

 89%|████████▉ | 889997/1000000 [10:20:30<56:04, 32.69it/s]global step 890000, trans_decision ep_re 562.557083375938

{"global_step": 890000, "eval_re": [326.386914925717, 913.8008475474213, 
55.272056167128675, 181.8030545187276, 1785.1844465164795, 102.09245400051387, 
651.1307830849646, 211.47947127378373, 720.3561158702624, 678.064689854381], 
"eval_len": [181, 533, 65, 110, 1000, 72, 1000, 137, 391, 1000]}

 90%|████████▉ | 899997/1000000 [10:27:11<51:37, 32.28it/s]global step 900000, trans_decision ep_re 431.62769810400994

{"global_step": 900000, "eval_re": [446.147676366397, 51.88196914445814, 
470.7899277656453, 94.98191105244338, 1337.2676884989344, 52.54884127455845, 
1486.3185947439672, 63.40617974752222, 215.03977141356333, 97.89442103261025], 
"eval_len": [325, 49, 282, 70, 1000, 37, 1000, 44, 145, 63]}

 91%|█████████ | 909999/1000000 [10:34:20<46:53, 31.99it/s]global step 910000, trans_decision ep_re 847.3705866411004

{"global_step": 910000, "eval_re": [1623.9001609445615, 99.5655799993604, 
112.87668994844152, 1837.172462667223, 358.6136785203661, 435.21992893099326, 
1519.7883565674229, 714.0644127952767, 100.04680783358585, 1672.4577882037747], 
"eval_len": [1000, 99, 111, 1000, 201, 1000, 1000, 1000, 76, 1000]}

 92%|█████████▏| 919997/1000000 [10:41:20<41:13, 32.35it/s]global step 920000, trans_decision ep_re 476.3230506617282

{"global_step": 920000, "eval_re": [836.3951842512179, 529.7469401856885, 
422.9280361643918, 764.8597778396552, 32.732820475860095, 663.7018926688844, 
337.5025203846915, 249.36916012372774, 823.3207851477378, 102.6733893754276], 
"eval_len": [1000, 330, 278, 366, 43, 1000, 181, 175, 1000, 89]}

 93%|█████████▎| 929997/1000000 [10:48:10<36:18, 32.13it/s]global step 930000, trans_decision ep_re 814.8580078372279

{"global_step": 930000, "eval_re": [679.9183594149912, 1809.229462045513, 
1042.5884352906935, 420.864677472672, 910.2790031541957, 694.1576655716624, 
1048.7157293760383, 653.8844537884054, 176.9898432121764, 711.9524490459313], 
"eval_len": [1000, 1000, 1000, 277, 1000, 1000, 596, 416, 122, 475]}

 94%|█████████▍| 939997/1000000 [10:55:10<30:47, 32.48it/s]global step 940000, trans_decision ep_re 309.31278819088595

{"global_step": 940000, "eval_re": [488.3709180880219, 62.3932465270207, 
657.9876666625908, 199.01927840622795, 290.6286033735419, 163.76991411852057, 
567.884029728989, 73.71408619085044, 400.76682780369845, 188.59331100939744], 
"eval_len": [302, 52, 430, 180, 157, 117, 414, 58, 237, 99]}

 95%|█████████▍| 949997/1000000 [11:02:00<25:42, 32.42it/s]global step 950000, trans_decision ep_re -324.46431561179986

{"global_step": 950000, "eval_re": [-334.31151673505354, 52.18379108277331, 
37.539093319383, 82.74163743181045, 192.90097939930203, -518.9671630340549, 
-907.7787408731375, -712.8325747443448, -860.1618158447645, -275.9568461199124],
"eval_len": [1000, 88, 61, 78, 159, 1000, 1000, 1000, 1000, 1000]}

 96%|█████████▌| 959997/1000000 [11:09:00<20:36, 32.36it/s]global step 960000, trans_decision ep_re 981.0409033744854

{"global_step": 960000, "eval_re": [1096.7117299589825, 794.2436472665968, 
1571.1644994355254, 1181.1031543946283, 296.6280833191483, 641.3455860083917, 
1530.8220657745353, 565.0849746191985, 1259.7491307486214, 873.5561622192254], 
"eval_len": [681, 411, 997, 687, 219, 331, 1000, 383, 768, 1000]}

 97%|█████████▋| 969997/1000000 [11:16:00<15:30, 32.25it/s]global step 970000, trans_decision ep_re 827.4277271377092

{"global_step": 970000, "eval_re": [730.1215510791565, 141.3746249171475, 
1471.5522501054384, 1089.798257359272, 351.83850250544043, 1286.2348760450868, 
253.56287099772726, 600.299055077283, 1575.0087439701783, 774.4865393203613], 
"eval_len": [1000, 79, 1000, 711, 242, 1000, 171, 400, 1000, 457]}

 98%|█████████▊| 979998/1000000 [11:23:00<10:14, 32.56it/s]global step 980000, trans_decision ep_re 1179.3592367760918

{"global_step": 980000, "eval_re": [675.9194141031942, 1688.3914050353553, 
1793.335834957655, 1656.262839069975, 377.3535634978781, 1020.0895014107322, 
1576.3169275103248, 710.6557985721074, 532.7690829379587, 1762.498000665739], 
"eval_len": [1000, 1000, 1000, 1000, 207, 570, 1000, 419, 323, 1000]}

 99%|█████████▉| 989997/1000000 [11:30:10<05:10, 32.21it/s]global step 990000, trans_decision ep_re 771.0201397737261

{"global_step": 990000, "eval_re": [1039.8432519981518, 1695.1247853465793, 
738.842354935013, 481.42760804322586, 1337.359577636324, 30.662982650439332, 
96.47462969191594, 1430.389436930181, 77.44872110915688, 782.6280493962743], 
"eval_len": [578, 1000, 433, 279, 778, 32, 56, 799, 83, 1000]}

100%|█████████▉| 999997/1000000 [11:37:10<00:00, 32.20it/s]global step 1000000, trans_decision ep_re 945.2162427407663

{"global_step": 1000000, "eval_re": [31.63132328238247, 1525.301421382545, 
722.9809902409373, 44.71503464066752, 1719.6728409665227, 1720.5858235963588, 
1084.2086143765416, 90.74921159215765, 1033.805277497339, 1478.5118898322098], 
"eval_len": [38, 840, 466, 40, 1000, 987, 1000, 61, 1000, 1000]}

100%|██████████| 1000000/1000000 [11:37:21<00:00, 23.90it/s]
